]> Git Repo - qemu.git/blame - fpu/softfloat.c
tests/qom-proplist: check class properties iterator
[qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
6fff2167 86#include "qemu/bitops.h"
6b4c305c 87#include "fpu/softfloat.h"
158142c2 88
dc355b76 89/* We only need stdlib for abort() */
dc355b76 90
158142c2
FB
91/*----------------------------------------------------------------------------
92| Primitive arithmetic functions, including multi-word arithmetic, and
93| division and square root approximations. (Can be specialized to target if
94| desired.)
95*----------------------------------------------------------------------------*/
88857aca 96#include "fpu/softfloat-macros.h"
158142c2 97
bb4d4bb3
PM
98/*----------------------------------------------------------------------------
99| Returns the fraction bits of the half-precision floating-point value `a'.
100*----------------------------------------------------------------------------*/
101
a49db98d 102static inline uint32_t extractFloat16Frac(float16 a)
bb4d4bb3
PM
103{
104 return float16_val(a) & 0x3ff;
105}
106
107/*----------------------------------------------------------------------------
108| Returns the exponent bits of the half-precision floating-point value `a'.
109*----------------------------------------------------------------------------*/
110
0c48262d 111static inline int extractFloat16Exp(float16 a)
bb4d4bb3
PM
112{
113 return (float16_val(a) >> 10) & 0x1f;
114}
115
d97544c9
AB
116/*----------------------------------------------------------------------------
117| Returns the fraction bits of the single-precision floating-point value `a'.
118*----------------------------------------------------------------------------*/
119
120static inline uint32_t extractFloat32Frac(float32 a)
121{
122 return float32_val(a) & 0x007FFFFF;
123}
124
125/*----------------------------------------------------------------------------
126| Returns the exponent bits of the single-precision floating-point value `a'.
127*----------------------------------------------------------------------------*/
128
129static inline int extractFloat32Exp(float32 a)
130{
131 return (float32_val(a) >> 23) & 0xFF;
132}
133
134/*----------------------------------------------------------------------------
135| Returns the sign bit of the single-precision floating-point value `a'.
136*----------------------------------------------------------------------------*/
137
138static inline flag extractFloat32Sign(float32 a)
139{
140 return float32_val(a) >> 31;
141}
142
143/*----------------------------------------------------------------------------
144| Returns the fraction bits of the double-precision floating-point value `a'.
145*----------------------------------------------------------------------------*/
146
147static inline uint64_t extractFloat64Frac(float64 a)
148{
149 return float64_val(a) & LIT64(0x000FFFFFFFFFFFFF);
150}
151
152/*----------------------------------------------------------------------------
153| Returns the exponent bits of the double-precision floating-point value `a'.
154*----------------------------------------------------------------------------*/
155
156static inline int extractFloat64Exp(float64 a)
157{
158 return (float64_val(a) >> 52) & 0x7FF;
159}
160
161/*----------------------------------------------------------------------------
162| Returns the sign bit of the double-precision floating-point value `a'.
163*----------------------------------------------------------------------------*/
164
165static inline flag extractFloat64Sign(float64 a)
166{
167 return float64_val(a) >> 63;
168}
169
a90119b5
AB
170/*
171 * Classify a floating point number. Everything above float_class_qnan
172 * is a NaN so cls >= float_class_qnan is any NaN.
173 */
174
175typedef enum __attribute__ ((__packed__)) {
176 float_class_unclassified,
177 float_class_zero,
178 float_class_normal,
179 float_class_inf,
180 float_class_qnan, /* all NaNs from here */
181 float_class_snan,
a90119b5
AB
182} FloatClass;
183
247d1f21
RH
184/* Simple helpers for checking if, or what kind of, NaN we have */
185static inline __attribute__((unused)) bool is_nan(FloatClass c)
186{
187 return unlikely(c >= float_class_qnan);
188}
189
190static inline __attribute__((unused)) bool is_snan(FloatClass c)
191{
192 return c == float_class_snan;
193}
194
195static inline __attribute__((unused)) bool is_qnan(FloatClass c)
196{
197 return c == float_class_qnan;
198}
199
a90119b5
AB
200/*
201 * Structure holding all of the decomposed parts of a float. The
202 * exponent is unbiased and the fraction is normalized. All
203 * calculations are done with a 64 bit fraction and then rounded as
204 * appropriate for the final format.
205 *
206 * Thanks to the packed FloatClass a decent compiler should be able to
207 * fit the whole structure into registers and avoid using the stack
208 * for parameter passing.
209 */
210
211typedef struct {
212 uint64_t frac;
213 int32_t exp;
214 FloatClass cls;
215 bool sign;
216} FloatParts;
217
218#define DECOMPOSED_BINARY_POINT (64 - 2)
219#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
220#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
221
222/* Structure holding all of the relevant parameters for a format.
223 * exp_size: the size of the exponent field
224 * exp_bias: the offset applied to the exponent field
225 * exp_max: the maximum normalised exponent
226 * frac_size: the size of the fraction field
227 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
228 * The following are computed based the size of fraction
229 * frac_lsb: least significant bit of fraction
ca3a3d5a 230 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 231 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
232 * The following optional modifiers are available:
233 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
234 */
235typedef struct {
236 int exp_size;
237 int exp_bias;
238 int exp_max;
239 int frac_size;
240 int frac_shift;
241 uint64_t frac_lsb;
242 uint64_t frac_lsbm1;
243 uint64_t round_mask;
244 uint64_t roundeven_mask;
ca3a3d5a 245 bool arm_althp;
a90119b5
AB
246} FloatFmt;
247
248/* Expand fields based on the size of exponent and fraction */
249#define FLOAT_PARAMS(E, F) \
250 .exp_size = E, \
251 .exp_bias = ((1 << E) - 1) >> 1, \
252 .exp_max = (1 << E) - 1, \
253 .frac_size = F, \
254 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
255 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
256 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
257 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
258 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
259
260static const FloatFmt float16_params = {
261 FLOAT_PARAMS(5, 10)
262};
263
6fed16b2
AB
264static const FloatFmt float16_params_ahp = {
265 FLOAT_PARAMS(5, 10),
266 .arm_althp = true
267};
268
a90119b5
AB
269static const FloatFmt float32_params = {
270 FLOAT_PARAMS(8, 23)
271};
272
273static const FloatFmt float64_params = {
274 FLOAT_PARAMS(11, 52)
275};
276
6fff2167
AB
277/* Unpack a float to parts, but do not canonicalize. */
278static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
279{
280 const int sign_pos = fmt.frac_size + fmt.exp_size;
281
282 return (FloatParts) {
283 .cls = float_class_unclassified,
284 .sign = extract64(raw, sign_pos, 1),
285 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
286 .frac = extract64(raw, 0, fmt.frac_size),
287 };
288}
289
290static inline FloatParts float16_unpack_raw(float16 f)
291{
292 return unpack_raw(float16_params, f);
293}
294
295static inline FloatParts float32_unpack_raw(float32 f)
296{
297 return unpack_raw(float32_params, f);
298}
299
300static inline FloatParts float64_unpack_raw(float64 f)
301{
302 return unpack_raw(float64_params, f);
303}
304
305/* Pack a float from parts, but do not canonicalize. */
306static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
307{
308 const int sign_pos = fmt.frac_size + fmt.exp_size;
309 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
310 return deposit64(ret, sign_pos, 1, p.sign);
311}
312
313static inline float16 float16_pack_raw(FloatParts p)
314{
315 return make_float16(pack_raw(float16_params, p));
316}
317
318static inline float32 float32_pack_raw(FloatParts p)
319{
320 return make_float32(pack_raw(float32_params, p));
321}
322
323static inline float64 float64_pack_raw(FloatParts p)
324{
325 return make_float64(pack_raw(float64_params, p));
326}
327
0664335a
RH
328/*----------------------------------------------------------------------------
329| Functions and definitions to determine: (1) whether tininess for underflow
330| is detected before or after rounding by default, (2) what (if anything)
331| happens when exceptions are raised, (3) how signaling NaNs are distinguished
332| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
333| are propagated from function inputs to output. These details are target-
334| specific.
335*----------------------------------------------------------------------------*/
336#include "softfloat-specialize.h"
337
6fff2167
AB
338/* Canonicalize EXP and FRAC, setting CLS. */
339static FloatParts canonicalize(FloatParts part, const FloatFmt *parm,
340 float_status *status)
341{
ca3a3d5a 342 if (part.exp == parm->exp_max && !parm->arm_althp) {
6fff2167
AB
343 if (part.frac == 0) {
344 part.cls = float_class_inf;
345 } else {
94933df0 346 part.frac <<= parm->frac_shift;
298b468e
RH
347 part.cls = (parts_is_snan_frac(part.frac, status)
348 ? float_class_snan : float_class_qnan);
6fff2167
AB
349 }
350 } else if (part.exp == 0) {
351 if (likely(part.frac == 0)) {
352 part.cls = float_class_zero;
353 } else if (status->flush_inputs_to_zero) {
354 float_raise(float_flag_input_denormal, status);
355 part.cls = float_class_zero;
356 part.frac = 0;
357 } else {
358 int shift = clz64(part.frac) - 1;
359 part.cls = float_class_normal;
360 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
361 part.frac <<= shift;
362 }
363 } else {
364 part.cls = float_class_normal;
365 part.exp -= parm->exp_bias;
366 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
367 }
368 return part;
369}
370
371/* Round and uncanonicalize a floating-point number by parts. There
372 * are FRAC_SHIFT bits that may require rounding at the bottom of the
373 * fraction; these bits will be removed. The exponent will be biased
374 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
375 */
376
377static FloatParts round_canonical(FloatParts p, float_status *s,
378 const FloatFmt *parm)
379{
380 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
381 const uint64_t round_mask = parm->round_mask;
382 const uint64_t roundeven_mask = parm->roundeven_mask;
383 const int exp_max = parm->exp_max;
384 const int frac_shift = parm->frac_shift;
385 uint64_t frac, inc;
386 int exp, flags = 0;
387 bool overflow_norm;
388
389 frac = p.frac;
390 exp = p.exp;
391
392 switch (p.cls) {
393 case float_class_normal:
394 switch (s->float_rounding_mode) {
395 case float_round_nearest_even:
396 overflow_norm = false;
397 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
398 break;
399 case float_round_ties_away:
400 overflow_norm = false;
401 inc = frac_lsbm1;
402 break;
403 case float_round_to_zero:
404 overflow_norm = true;
405 inc = 0;
406 break;
407 case float_round_up:
408 inc = p.sign ? 0 : round_mask;
409 overflow_norm = p.sign;
410 break;
411 case float_round_down:
412 inc = p.sign ? round_mask : 0;
413 overflow_norm = !p.sign;
414 break;
415 default:
416 g_assert_not_reached();
417 }
418
419 exp += parm->exp_bias;
420 if (likely(exp > 0)) {
421 if (frac & round_mask) {
422 flags |= float_flag_inexact;
423 frac += inc;
424 if (frac & DECOMPOSED_OVERFLOW_BIT) {
425 frac >>= 1;
426 exp++;
427 }
428 }
429 frac >>= frac_shift;
430
ca3a3d5a
AB
431 if (parm->arm_althp) {
432 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
433 if (unlikely(exp > exp_max)) {
434 /* Overflow. Return the maximum normal. */
435 flags = float_flag_invalid;
436 exp = exp_max;
437 frac = -1;
438 }
439 } else if (unlikely(exp >= exp_max)) {
6fff2167
AB
440 flags |= float_flag_overflow | float_flag_inexact;
441 if (overflow_norm) {
442 exp = exp_max - 1;
443 frac = -1;
444 } else {
445 p.cls = float_class_inf;
446 goto do_inf;
447 }
448 }
449 } else if (s->flush_to_zero) {
450 flags |= float_flag_output_denormal;
451 p.cls = float_class_zero;
452 goto do_zero;
453 } else {
454 bool is_tiny = (s->float_detect_tininess
455 == float_tininess_before_rounding)
456 || (exp < 0)
457 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
458
459 shift64RightJamming(frac, 1 - exp, &frac);
460 if (frac & round_mask) {
461 /* Need to recompute round-to-even. */
462 if (s->float_rounding_mode == float_round_nearest_even) {
463 inc = ((frac & roundeven_mask) != frac_lsbm1
464 ? frac_lsbm1 : 0);
465 }
466 flags |= float_flag_inexact;
467 frac += inc;
468 }
469
470 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
471 frac >>= frac_shift;
472
473 if (is_tiny && (flags & float_flag_inexact)) {
474 flags |= float_flag_underflow;
475 }
476 if (exp == 0 && frac == 0) {
477 p.cls = float_class_zero;
478 }
479 }
480 break;
481
482 case float_class_zero:
483 do_zero:
484 exp = 0;
485 frac = 0;
486 break;
487
488 case float_class_inf:
489 do_inf:
ca3a3d5a 490 assert(!parm->arm_althp);
6fff2167
AB
491 exp = exp_max;
492 frac = 0;
493 break;
494
495 case float_class_qnan:
496 case float_class_snan:
ca3a3d5a 497 assert(!parm->arm_althp);
6fff2167 498 exp = exp_max;
94933df0 499 frac >>= parm->frac_shift;
6fff2167
AB
500 break;
501
502 default:
503 g_assert_not_reached();
504 }
505
506 float_raise(flags, s);
507 p.exp = exp;
508 p.frac = frac;
509 return p;
510}
511
6fed16b2
AB
512/* Explicit FloatFmt version */
513static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
514 const FloatFmt *params)
515{
516 return canonicalize(float16_unpack_raw(f), params, s);
517}
518
6fff2167
AB
519static FloatParts float16_unpack_canonical(float16 f, float_status *s)
520{
6fed16b2
AB
521 return float16a_unpack_canonical(f, s, &float16_params);
522}
523
524static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
525 const FloatFmt *params)
526{
527 return float16_pack_raw(round_canonical(p, s, params));
6fff2167
AB
528}
529
530static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
531{
6fed16b2 532 return float16a_round_pack_canonical(p, s, &float16_params);
6fff2167
AB
533}
534
535static FloatParts float32_unpack_canonical(float32 f, float_status *s)
536{
537 return canonicalize(float32_unpack_raw(f), &float32_params, s);
538}
539
540static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
541{
0bcfbcbe 542 return float32_pack_raw(round_canonical(p, s, &float32_params));
6fff2167
AB
543}
544
545static FloatParts float64_unpack_canonical(float64 f, float_status *s)
546{
547 return canonicalize(float64_unpack_raw(f), &float64_params, s);
548}
549
550static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
551{
0bcfbcbe 552 return float64_pack_raw(round_canonical(p, s, &float64_params));
6fff2167
AB
553}
554
dbe4d53a
AB
555static FloatParts return_nan(FloatParts a, float_status *s)
556{
557 switch (a.cls) {
558 case float_class_snan:
559 s->float_exception_flags |= float_flag_invalid;
0bcfbcbe 560 a = parts_silence_nan(a, s);
dbe4d53a
AB
561 /* fall through */
562 case float_class_qnan:
563 if (s->default_nan_mode) {
f7e598e2 564 return parts_default_nan(s);
dbe4d53a
AB
565 }
566 break;
567
568 default:
569 g_assert_not_reached();
570 }
571 return a;
572}
573
6fff2167
AB
574static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
575{
576 if (is_snan(a.cls) || is_snan(b.cls)) {
577 s->float_exception_flags |= float_flag_invalid;
578 }
579
580 if (s->default_nan_mode) {
f7e598e2 581 return parts_default_nan(s);
6fff2167 582 } else {
4f251cfd 583 if (pickNaN(a.cls, b.cls,
6fff2167
AB
584 a.frac > b.frac ||
585 (a.frac == b.frac && a.sign < b.sign))) {
586 a = b;
587 }
0bcfbcbe
RH
588 if (is_snan(a.cls)) {
589 return parts_silence_nan(a, s);
590 }
6fff2167
AB
591 }
592 return a;
593}
594
d446830a
AB
595static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
596 bool inf_zero, float_status *s)
597{
1839189b
PM
598 int which;
599
d446830a
AB
600 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
601 s->float_exception_flags |= float_flag_invalid;
602 }
603
3bd2dec1 604 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
1839189b 605
d446830a 606 if (s->default_nan_mode) {
1839189b
PM
607 /* Note that this check is after pickNaNMulAdd so that function
608 * has an opportunity to set the Invalid flag.
609 */
f7e598e2 610 which = 3;
1839189b 611 }
d446830a 612
1839189b
PM
613 switch (which) {
614 case 0:
615 break;
616 case 1:
617 a = b;
618 break;
619 case 2:
620 a = c;
621 break;
622 case 3:
f7e598e2 623 return parts_default_nan(s);
1839189b
PM
624 default:
625 g_assert_not_reached();
d446830a 626 }
1839189b 627
0bcfbcbe
RH
628 if (is_snan(a.cls)) {
629 return parts_silence_nan(a, s);
630 }
d446830a
AB
631 return a;
632}
633
6fff2167
AB
634/*
635 * Returns the result of adding or subtracting the values of the
636 * floating-point values `a' and `b'. The operation is performed
637 * according to the IEC/IEEE Standard for Binary Floating-Point
638 * Arithmetic.
639 */
640
641static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
642 float_status *s)
643{
644 bool a_sign = a.sign;
645 bool b_sign = b.sign ^ subtract;
646
647 if (a_sign != b_sign) {
648 /* Subtraction */
649
650 if (a.cls == float_class_normal && b.cls == float_class_normal) {
651 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
652 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
653 a.frac = a.frac - b.frac;
654 } else {
655 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
656 a.frac = b.frac - a.frac;
657 a.exp = b.exp;
658 a_sign ^= 1;
659 }
660
661 if (a.frac == 0) {
662 a.cls = float_class_zero;
663 a.sign = s->float_rounding_mode == float_round_down;
664 } else {
665 int shift = clz64(a.frac) - 1;
666 a.frac = a.frac << shift;
667 a.exp = a.exp - shift;
668 a.sign = a_sign;
669 }
670 return a;
671 }
672 if (is_nan(a.cls) || is_nan(b.cls)) {
673 return pick_nan(a, b, s);
674 }
675 if (a.cls == float_class_inf) {
676 if (b.cls == float_class_inf) {
677 float_raise(float_flag_invalid, s);
f7e598e2 678 return parts_default_nan(s);
6fff2167
AB
679 }
680 return a;
681 }
682 if (a.cls == float_class_zero && b.cls == float_class_zero) {
683 a.sign = s->float_rounding_mode == float_round_down;
684 return a;
685 }
686 if (a.cls == float_class_zero || b.cls == float_class_inf) {
687 b.sign = a_sign ^ 1;
688 return b;
689 }
690 if (b.cls == float_class_zero) {
691 return a;
692 }
693 } else {
694 /* Addition */
695 if (a.cls == float_class_normal && b.cls == float_class_normal) {
696 if (a.exp > b.exp) {
697 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
698 } else if (a.exp < b.exp) {
699 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
700 a.exp = b.exp;
701 }
702 a.frac += b.frac;
703 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
64d450a0 704 shift64RightJamming(a.frac, 1, &a.frac);
6fff2167
AB
705 a.exp += 1;
706 }
707 return a;
708 }
709 if (is_nan(a.cls) || is_nan(b.cls)) {
710 return pick_nan(a, b, s);
711 }
712 if (a.cls == float_class_inf || b.cls == float_class_zero) {
713 return a;
714 }
715 if (b.cls == float_class_inf || a.cls == float_class_zero) {
716 b.sign = b_sign;
717 return b;
718 }
719 }
720 g_assert_not_reached();
721}
722
723/*
724 * Returns the result of adding or subtracting the floating-point
725 * values `a' and `b'. The operation is performed according to the
726 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
727 */
728
729float16 __attribute__((flatten)) float16_add(float16 a, float16 b,
730 float_status *status)
731{
732 FloatParts pa = float16_unpack_canonical(a, status);
733 FloatParts pb = float16_unpack_canonical(b, status);
734 FloatParts pr = addsub_floats(pa, pb, false, status);
735
736 return float16_round_pack_canonical(pr, status);
737}
738
739float32 __attribute__((flatten)) float32_add(float32 a, float32 b,
740 float_status *status)
741{
742 FloatParts pa = float32_unpack_canonical(a, status);
743 FloatParts pb = float32_unpack_canonical(b, status);
744 FloatParts pr = addsub_floats(pa, pb, false, status);
745
746 return float32_round_pack_canonical(pr, status);
747}
748
749float64 __attribute__((flatten)) float64_add(float64 a, float64 b,
750 float_status *status)
751{
752 FloatParts pa = float64_unpack_canonical(a, status);
753 FloatParts pb = float64_unpack_canonical(b, status);
754 FloatParts pr = addsub_floats(pa, pb, false, status);
755
756 return float64_round_pack_canonical(pr, status);
757}
758
759float16 __attribute__((flatten)) float16_sub(float16 a, float16 b,
760 float_status *status)
761{
762 FloatParts pa = float16_unpack_canonical(a, status);
763 FloatParts pb = float16_unpack_canonical(b, status);
764 FloatParts pr = addsub_floats(pa, pb, true, status);
765
766 return float16_round_pack_canonical(pr, status);
767}
768
769float32 __attribute__((flatten)) float32_sub(float32 a, float32 b,
770 float_status *status)
771{
772 FloatParts pa = float32_unpack_canonical(a, status);
773 FloatParts pb = float32_unpack_canonical(b, status);
774 FloatParts pr = addsub_floats(pa, pb, true, status);
775
776 return float32_round_pack_canonical(pr, status);
777}
778
779float64 __attribute__((flatten)) float64_sub(float64 a, float64 b,
780 float_status *status)
781{
782 FloatParts pa = float64_unpack_canonical(a, status);
783 FloatParts pb = float64_unpack_canonical(b, status);
784 FloatParts pr = addsub_floats(pa, pb, true, status);
785
786 return float64_round_pack_canonical(pr, status);
787}
788
74d707e2
AB
789/*
790 * Returns the result of multiplying the floating-point values `a' and
791 * `b'. The operation is performed according to the IEC/IEEE Standard
792 * for Binary Floating-Point Arithmetic.
793 */
794
795static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
796{
797 bool sign = a.sign ^ b.sign;
798
799 if (a.cls == float_class_normal && b.cls == float_class_normal) {
800 uint64_t hi, lo;
801 int exp = a.exp + b.exp;
802
803 mul64To128(a.frac, b.frac, &hi, &lo);
804 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
805 if (lo & DECOMPOSED_OVERFLOW_BIT) {
806 shift64RightJamming(lo, 1, &lo);
807 exp += 1;
808 }
809
810 /* Re-use a */
811 a.exp = exp;
812 a.sign = sign;
813 a.frac = lo;
814 return a;
815 }
816 /* handle all the NaN cases */
817 if (is_nan(a.cls) || is_nan(b.cls)) {
818 return pick_nan(a, b, s);
819 }
820 /* Inf * Zero == NaN */
821 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
822 (a.cls == float_class_zero && b.cls == float_class_inf)) {
823 s->float_exception_flags |= float_flag_invalid;
f7e598e2 824 return parts_default_nan(s);
74d707e2
AB
825 }
826 /* Multiply by 0 or Inf */
827 if (a.cls == float_class_inf || a.cls == float_class_zero) {
828 a.sign = sign;
829 return a;
830 }
831 if (b.cls == float_class_inf || b.cls == float_class_zero) {
832 b.sign = sign;
833 return b;
834 }
835 g_assert_not_reached();
836}
837
838float16 __attribute__((flatten)) float16_mul(float16 a, float16 b,
839 float_status *status)
840{
841 FloatParts pa = float16_unpack_canonical(a, status);
842 FloatParts pb = float16_unpack_canonical(b, status);
843 FloatParts pr = mul_floats(pa, pb, status);
844
845 return float16_round_pack_canonical(pr, status);
846}
847
848float32 __attribute__((flatten)) float32_mul(float32 a, float32 b,
849 float_status *status)
850{
851 FloatParts pa = float32_unpack_canonical(a, status);
852 FloatParts pb = float32_unpack_canonical(b, status);
853 FloatParts pr = mul_floats(pa, pb, status);
854
855 return float32_round_pack_canonical(pr, status);
856}
857
858float64 __attribute__((flatten)) float64_mul(float64 a, float64 b,
859 float_status *status)
860{
861 FloatParts pa = float64_unpack_canonical(a, status);
862 FloatParts pb = float64_unpack_canonical(b, status);
863 FloatParts pr = mul_floats(pa, pb, status);
864
865 return float64_round_pack_canonical(pr, status);
866}
867
d446830a
AB
868/*
869 * Returns the result of multiplying the floating-point values `a' and
870 * `b' then adding 'c', with no intermediate rounding step after the
871 * multiplication. The operation is performed according to the
872 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
873 * The flags argument allows the caller to select negation of the
874 * addend, the intermediate product, or the final result. (The
875 * difference between this and having the caller do a separate
876 * negation is that negating externally will flip the sign bit on
877 * NaNs.)
878 */
879
880static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
881 int flags, float_status *s)
882{
883 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
884 ((1 << float_class_inf) | (1 << float_class_zero));
885 bool p_sign;
886 bool sign_flip = flags & float_muladd_negate_result;
887 FloatClass p_class;
888 uint64_t hi, lo;
889 int p_exp;
890
891 /* It is implementation-defined whether the cases of (0,inf,qnan)
892 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
893 * they return if they do), so we have to hand this information
894 * off to the target-specific pick-a-NaN routine.
895 */
896 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
897 return pick_nan_muladd(a, b, c, inf_zero, s);
898 }
899
900 if (inf_zero) {
901 s->float_exception_flags |= float_flag_invalid;
f7e598e2 902 return parts_default_nan(s);
d446830a
AB
903 }
904
905 if (flags & float_muladd_negate_c) {
906 c.sign ^= 1;
907 }
908
909 p_sign = a.sign ^ b.sign;
910
911 if (flags & float_muladd_negate_product) {
912 p_sign ^= 1;
913 }
914
915 if (a.cls == float_class_inf || b.cls == float_class_inf) {
916 p_class = float_class_inf;
917 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
918 p_class = float_class_zero;
919 } else {
920 p_class = float_class_normal;
921 }
922
923 if (c.cls == float_class_inf) {
924 if (p_class == float_class_inf && p_sign != c.sign) {
925 s->float_exception_flags |= float_flag_invalid;
f7e598e2 926 return parts_default_nan(s);
d446830a
AB
927 } else {
928 a.cls = float_class_inf;
929 a.sign = c.sign ^ sign_flip;
f7e598e2 930 return a;
d446830a 931 }
d446830a
AB
932 }
933
934 if (p_class == float_class_inf) {
935 a.cls = float_class_inf;
936 a.sign = p_sign ^ sign_flip;
937 return a;
938 }
939
940 if (p_class == float_class_zero) {
941 if (c.cls == float_class_zero) {
942 if (p_sign != c.sign) {
943 p_sign = s->float_rounding_mode == float_round_down;
944 }
945 c.sign = p_sign;
946 } else if (flags & float_muladd_halve_result) {
947 c.exp -= 1;
948 }
949 c.sign ^= sign_flip;
950 return c;
951 }
952
953 /* a & b should be normals now... */
954 assert(a.cls == float_class_normal &&
955 b.cls == float_class_normal);
956
957 p_exp = a.exp + b.exp;
958
959 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
960 * result.
961 */
962 mul64To128(a.frac, b.frac, &hi, &lo);
963 /* binary point now at bit 124 */
964
965 /* check for overflow */
966 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
967 shift128RightJamming(hi, lo, 1, &hi, &lo);
968 p_exp += 1;
969 }
970
971 /* + add/sub */
972 if (c.cls == float_class_zero) {
973 /* move binary point back to 62 */
974 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
975 } else {
976 int exp_diff = p_exp - c.exp;
977 if (p_sign == c.sign) {
978 /* Addition */
979 if (exp_diff <= 0) {
980 shift128RightJamming(hi, lo,
981 DECOMPOSED_BINARY_POINT - exp_diff,
982 &hi, &lo);
983 lo += c.frac;
984 p_exp = c.exp;
985 } else {
986 uint64_t c_hi, c_lo;
987 /* shift c to the same binary point as the product (124) */
988 c_hi = c.frac >> 2;
989 c_lo = 0;
990 shift128RightJamming(c_hi, c_lo,
991 exp_diff,
992 &c_hi, &c_lo);
993 add128(hi, lo, c_hi, c_lo, &hi, &lo);
994 /* move binary point back to 62 */
995 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
996 }
997
998 if (lo & DECOMPOSED_OVERFLOW_BIT) {
999 shift64RightJamming(lo, 1, &lo);
1000 p_exp += 1;
1001 }
1002
1003 } else {
1004 /* Subtraction */
1005 uint64_t c_hi, c_lo;
1006 /* make C binary point match product at bit 124 */
1007 c_hi = c.frac >> 2;
1008 c_lo = 0;
1009
1010 if (exp_diff <= 0) {
1011 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1012 if (exp_diff == 0
1013 &&
1014 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1015 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1016 } else {
1017 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1018 p_sign ^= 1;
1019 p_exp = c.exp;
1020 }
1021 } else {
1022 shift128RightJamming(c_hi, c_lo,
1023 exp_diff,
1024 &c_hi, &c_lo);
1025 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1026 }
1027
1028 if (hi == 0 && lo == 0) {
1029 a.cls = float_class_zero;
1030 a.sign = s->float_rounding_mode == float_round_down;
1031 a.sign ^= sign_flip;
1032 return a;
1033 } else {
1034 int shift;
1035 if (hi != 0) {
1036 shift = clz64(hi);
1037 } else {
1038 shift = clz64(lo) + 64;
1039 }
1040 /* Normalizing to a binary point of 124 is the
1041 correct adjust for the exponent. However since we're
1042 shifting, we might as well put the binary point back
1043 at 62 where we really want it. Therefore shift as
1044 if we're leaving 1 bit at the top of the word, but
1045 adjust the exponent as if we're leaving 3 bits. */
1046 shift -= 1;
1047 if (shift >= 64) {
1048 lo = lo << (shift - 64);
1049 } else {
1050 hi = (hi << shift) | (lo >> (64 - shift));
1051 lo = hi | ((lo << shift) != 0);
1052 }
1053 p_exp -= shift - 2;
1054 }
1055 }
1056 }
1057
1058 if (flags & float_muladd_halve_result) {
1059 p_exp -= 1;
1060 }
1061
1062 /* finally prepare our result */
1063 a.cls = float_class_normal;
1064 a.sign = p_sign ^ sign_flip;
1065 a.exp = p_exp;
1066 a.frac = lo;
1067
1068 return a;
1069}
1070
1071float16 __attribute__((flatten)) float16_muladd(float16 a, float16 b, float16 c,
1072 int flags, float_status *status)
1073{
1074 FloatParts pa = float16_unpack_canonical(a, status);
1075 FloatParts pb = float16_unpack_canonical(b, status);
1076 FloatParts pc = float16_unpack_canonical(c, status);
1077 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1078
1079 return float16_round_pack_canonical(pr, status);
1080}
1081
1082float32 __attribute__((flatten)) float32_muladd(float32 a, float32 b, float32 c,
1083 int flags, float_status *status)
1084{
1085 FloatParts pa = float32_unpack_canonical(a, status);
1086 FloatParts pb = float32_unpack_canonical(b, status);
1087 FloatParts pc = float32_unpack_canonical(c, status);
1088 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1089
1090 return float32_round_pack_canonical(pr, status);
1091}
1092
1093float64 __attribute__((flatten)) float64_muladd(float64 a, float64 b, float64 c,
1094 int flags, float_status *status)
1095{
1096 FloatParts pa = float64_unpack_canonical(a, status);
1097 FloatParts pb = float64_unpack_canonical(b, status);
1098 FloatParts pc = float64_unpack_canonical(c, status);
1099 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1100
1101 return float64_round_pack_canonical(pr, status);
1102}
1103
cf07323d
AB
1104/*
1105 * Returns the result of dividing the floating-point value `a' by the
1106 * corresponding value `b'. The operation is performed according to
1107 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1108 */
1109
1110static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1111{
1112 bool sign = a.sign ^ b.sign;
1113
1114 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1115 uint64_t temp_lo, temp_hi;
1116 int exp = a.exp - b.exp;
1117 if (a.frac < b.frac) {
1118 exp -= 1;
1119 shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1,
1120 &temp_hi, &temp_lo);
1121 } else {
1122 shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT,
1123 &temp_hi, &temp_lo);
1124 }
1125 /* LSB of quot is set if inexact which roundandpack will use
1126 * to set flags. Yet again we re-use a for the result */
1127 a.frac = div128To64(temp_lo, temp_hi, b.frac);
1128 a.sign = sign;
1129 a.exp = exp;
1130 return a;
1131 }
1132 /* handle all the NaN cases */
1133 if (is_nan(a.cls) || is_nan(b.cls)) {
1134 return pick_nan(a, b, s);
1135 }
1136 /* 0/0 or Inf/Inf */
1137 if (a.cls == b.cls
1138 &&
1139 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1140 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1141 return parts_default_nan(s);
cf07323d 1142 }
9cb4e398
AB
1143 /* Inf / x or 0 / x */
1144 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1145 a.sign = sign;
1146 return a;
1147 }
cf07323d
AB
1148 /* Div 0 => Inf */
1149 if (b.cls == float_class_zero) {
1150 s->float_exception_flags |= float_flag_divbyzero;
1151 a.cls = float_class_inf;
1152 a.sign = sign;
1153 return a;
1154 }
cf07323d
AB
1155 /* Div by Inf */
1156 if (b.cls == float_class_inf) {
1157 a.cls = float_class_zero;
1158 a.sign = sign;
1159 return a;
1160 }
1161 g_assert_not_reached();
1162}
1163
1164float16 float16_div(float16 a, float16 b, float_status *status)
1165{
1166 FloatParts pa = float16_unpack_canonical(a, status);
1167 FloatParts pb = float16_unpack_canonical(b, status);
1168 FloatParts pr = div_floats(pa, pb, status);
1169
1170 return float16_round_pack_canonical(pr, status);
1171}
1172
1173float32 float32_div(float32 a, float32 b, float_status *status)
1174{
1175 FloatParts pa = float32_unpack_canonical(a, status);
1176 FloatParts pb = float32_unpack_canonical(b, status);
1177 FloatParts pr = div_floats(pa, pb, status);
1178
1179 return float32_round_pack_canonical(pr, status);
1180}
1181
1182float64 float64_div(float64 a, float64 b, float_status *status)
1183{
1184 FloatParts pa = float64_unpack_canonical(a, status);
1185 FloatParts pb = float64_unpack_canonical(b, status);
1186 FloatParts pr = div_floats(pa, pb, status);
1187
1188 return float64_round_pack_canonical(pr, status);
1189}
1190
6fed16b2
AB
1191/*
1192 * Float to Float conversions
1193 *
1194 * Returns the result of converting one float format to another. The
1195 * conversion is performed according to the IEC/IEEE Standard for
1196 * Binary Floating-Point Arithmetic.
1197 *
1198 * The float_to_float helper only needs to take care of raising
1199 * invalid exceptions and handling the conversion on NaNs.
1200 */
1201
1202static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1203 float_status *s)
1204{
1205 if (dstf->arm_althp) {
1206 switch (a.cls) {
1207 case float_class_qnan:
1208 case float_class_snan:
1209 /* There is no NaN in the destination format. Raise Invalid
1210 * and return a zero with the sign of the input NaN.
1211 */
1212 s->float_exception_flags |= float_flag_invalid;
1213 a.cls = float_class_zero;
1214 a.frac = 0;
1215 a.exp = 0;
1216 break;
1217
1218 case float_class_inf:
1219 /* There is no Inf in the destination format. Raise Invalid
1220 * and return the maximum normal with the correct sign.
1221 */
1222 s->float_exception_flags |= float_flag_invalid;
1223 a.cls = float_class_normal;
1224 a.exp = dstf->exp_max;
1225 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1226 break;
1227
1228 default:
1229 break;
1230 }
1231 } else if (is_nan(a.cls)) {
1232 if (is_snan(a.cls)) {
1233 s->float_exception_flags |= float_flag_invalid;
1234 a = parts_silence_nan(a, s);
1235 }
1236 if (s->default_nan_mode) {
1237 return parts_default_nan(s);
1238 }
1239 }
1240 return a;
1241}
1242
1243float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1244{
1245 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1246 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1247 FloatParts pr = float_to_float(p, &float32_params, s);
1248 return float32_round_pack_canonical(pr, s);
1249}
1250
1251float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1252{
1253 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1254 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1255 FloatParts pr = float_to_float(p, &float64_params, s);
1256 return float64_round_pack_canonical(pr, s);
1257}
1258
1259float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1260{
1261 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1262 FloatParts p = float32_unpack_canonical(a, s);
1263 FloatParts pr = float_to_float(p, fmt16, s);
1264 return float16a_round_pack_canonical(pr, s, fmt16);
1265}
1266
1267float64 float32_to_float64(float32 a, float_status *s)
1268{
1269 FloatParts p = float32_unpack_canonical(a, s);
1270 FloatParts pr = float_to_float(p, &float64_params, s);
1271 return float64_round_pack_canonical(pr, s);
1272}
1273
1274float16 float64_to_float16(float64 a, bool ieee, float_status *s)
1275{
1276 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1277 FloatParts p = float64_unpack_canonical(a, s);
1278 FloatParts pr = float_to_float(p, fmt16, s);
1279 return float16a_round_pack_canonical(pr, s, fmt16);
1280}
1281
1282float32 float64_to_float32(float64 a, float_status *s)
1283{
1284 FloatParts p = float64_unpack_canonical(a, s);
1285 FloatParts pr = float_to_float(p, &float32_params, s);
1286 return float32_round_pack_canonical(pr, s);
1287}
1288
dbe4d53a
AB
1289/*
1290 * Rounds the floating-point value `a' to an integer, and returns the
1291 * result as a floating-point value. The operation is performed
1292 * according to the IEC/IEEE Standard for Binary Floating-Point
1293 * Arithmetic.
1294 */
1295
2f6c74be
RH
1296static FloatParts round_to_int(FloatParts a, int rmode,
1297 int scale, float_status *s)
dbe4d53a 1298{
2f6c74be
RH
1299 switch (a.cls) {
1300 case float_class_qnan:
1301 case float_class_snan:
dbe4d53a 1302 return return_nan(a, s);
dbe4d53a 1303
dbe4d53a
AB
1304 case float_class_zero:
1305 case float_class_inf:
dbe4d53a
AB
1306 /* already "integral" */
1307 break;
2f6c74be 1308
dbe4d53a 1309 case float_class_normal:
2f6c74be
RH
1310 scale = MIN(MAX(scale, -0x10000), 0x10000);
1311 a.exp += scale;
1312
dbe4d53a
AB
1313 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1314 /* already integral */
1315 break;
1316 }
1317 if (a.exp < 0) {
1318 bool one;
1319 /* all fractional */
1320 s->float_exception_flags |= float_flag_inexact;
2f6c74be 1321 switch (rmode) {
dbe4d53a
AB
1322 case float_round_nearest_even:
1323 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1324 break;
1325 case float_round_ties_away:
1326 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
1327 break;
1328 case float_round_to_zero:
1329 one = false;
1330 break;
1331 case float_round_up:
1332 one = !a.sign;
1333 break;
1334 case float_round_down:
1335 one = a.sign;
1336 break;
1337 default:
1338 g_assert_not_reached();
1339 }
1340
1341 if (one) {
1342 a.frac = DECOMPOSED_IMPLICIT_BIT;
1343 a.exp = 0;
1344 } else {
1345 a.cls = float_class_zero;
1346 }
1347 } else {
1348 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
1349 uint64_t frac_lsbm1 = frac_lsb >> 1;
1350 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
1351 uint64_t rnd_mask = rnd_even_mask >> 1;
1352 uint64_t inc;
1353
2f6c74be 1354 switch (rmode) {
dbe4d53a
AB
1355 case float_round_nearest_even:
1356 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1357 break;
1358 case float_round_ties_away:
1359 inc = frac_lsbm1;
1360 break;
1361 case float_round_to_zero:
1362 inc = 0;
1363 break;
1364 case float_round_up:
1365 inc = a.sign ? 0 : rnd_mask;
1366 break;
1367 case float_round_down:
1368 inc = a.sign ? rnd_mask : 0;
1369 break;
1370 default:
1371 g_assert_not_reached();
1372 }
1373
1374 if (a.frac & rnd_mask) {
1375 s->float_exception_flags |= float_flag_inexact;
1376 a.frac += inc;
1377 a.frac &= ~rnd_mask;
1378 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
1379 a.frac >>= 1;
1380 a.exp++;
1381 }
1382 }
1383 }
1384 break;
1385 default:
1386 g_assert_not_reached();
1387 }
1388 return a;
1389}
1390
1391float16 float16_round_to_int(float16 a, float_status *s)
1392{
1393 FloatParts pa = float16_unpack_canonical(a, s);
2f6c74be 1394 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
1395 return float16_round_pack_canonical(pr, s);
1396}
1397
1398float32 float32_round_to_int(float32 a, float_status *s)
1399{
1400 FloatParts pa = float32_unpack_canonical(a, s);
2f6c74be 1401 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
1402 return float32_round_pack_canonical(pr, s);
1403}
1404
1405float64 float64_round_to_int(float64 a, float_status *s)
1406{
1407 FloatParts pa = float64_unpack_canonical(a, s);
2f6c74be 1408 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
1409 return float64_round_pack_canonical(pr, s);
1410}
1411
1412float64 float64_trunc_to_int(float64 a, float_status *s)
1413{
1414 FloatParts pa = float64_unpack_canonical(a, s);
2f6c74be 1415 FloatParts pr = round_to_int(pa, float_round_to_zero, 0, s);
dbe4d53a
AB
1416 return float64_round_pack_canonical(pr, s);
1417}
1418
ab52f973
AB
1419/*
1420 * Returns the result of converting the floating-point value `a' to
1421 * the two's complement integer format. The conversion is performed
1422 * according to the IEC/IEEE Standard for Binary Floating-Point
1423 * Arithmetic---which means in particular that the conversion is
1424 * rounded according to the current rounding mode. If `a' is a NaN,
1425 * the largest positive integer is returned. Otherwise, if the
1426 * conversion overflows, the largest integer with the same sign as `a'
1427 * is returned.
1428*/
1429
2f6c74be 1430static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale,
ab52f973
AB
1431 int64_t min, int64_t max,
1432 float_status *s)
1433{
1434 uint64_t r;
1435 int orig_flags = get_float_exception_flags(s);
2f6c74be 1436 FloatParts p = round_to_int(in, rmode, scale, s);
ab52f973
AB
1437
1438 switch (p.cls) {
1439 case float_class_snan:
1440 case float_class_qnan:
801bc563 1441 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1442 return max;
1443 case float_class_inf:
801bc563 1444 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1445 return p.sign ? min : max;
1446 case float_class_zero:
1447 return 0;
1448 case float_class_normal:
1449 if (p.exp < DECOMPOSED_BINARY_POINT) {
1450 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1451 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1452 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1453 } else {
1454 r = UINT64_MAX;
1455 }
1456 if (p.sign) {
33358375 1457 if (r <= -(uint64_t) min) {
ab52f973
AB
1458 return -r;
1459 } else {
1460 s->float_exception_flags = orig_flags | float_flag_invalid;
1461 return min;
1462 }
1463 } else {
33358375 1464 if (r <= max) {
ab52f973
AB
1465 return r;
1466 } else {
1467 s->float_exception_flags = orig_flags | float_flag_invalid;
1468 return max;
1469 }
1470 }
1471 default:
1472 g_assert_not_reached();
1473 }
1474}
1475
2f6c74be
RH
1476int16_t float16_to_int16_scalbn(float16 a, int rmode, int scale,
1477 float_status *s)
1478{
1479 return round_to_int_and_pack(float16_unpack_canonical(a, s),
1480 rmode, scale, INT16_MIN, INT16_MAX, s);
1481}
1482
1483int32_t float16_to_int32_scalbn(float16 a, int rmode, int scale,
1484 float_status *s)
1485{
1486 return round_to_int_and_pack(float16_unpack_canonical(a, s),
1487 rmode, scale, INT32_MIN, INT32_MAX, s);
1488}
1489
1490int64_t float16_to_int64_scalbn(float16 a, int rmode, int scale,
1491 float_status *s)
1492{
1493 return round_to_int_and_pack(float16_unpack_canonical(a, s),
1494 rmode, scale, INT64_MIN, INT64_MAX, s);
1495}
1496
1497int16_t float32_to_int16_scalbn(float32 a, int rmode, int scale,
1498 float_status *s)
1499{
1500 return round_to_int_and_pack(float32_unpack_canonical(a, s),
1501 rmode, scale, INT16_MIN, INT16_MAX, s);
1502}
1503
1504int32_t float32_to_int32_scalbn(float32 a, int rmode, int scale,
1505 float_status *s)
1506{
1507 return round_to_int_and_pack(float32_unpack_canonical(a, s),
1508 rmode, scale, INT32_MIN, INT32_MAX, s);
1509}
1510
1511int64_t float32_to_int64_scalbn(float32 a, int rmode, int scale,
1512 float_status *s)
1513{
1514 return round_to_int_and_pack(float32_unpack_canonical(a, s),
1515 rmode, scale, INT64_MIN, INT64_MAX, s);
1516}
1517
1518int16_t float64_to_int16_scalbn(float64 a, int rmode, int scale,
1519 float_status *s)
1520{
1521 return round_to_int_and_pack(float64_unpack_canonical(a, s),
1522 rmode, scale, INT16_MIN, INT16_MAX, s);
1523}
1524
1525int32_t float64_to_int32_scalbn(float64 a, int rmode, int scale,
1526 float_status *s)
1527{
1528 return round_to_int_and_pack(float64_unpack_canonical(a, s),
1529 rmode, scale, INT32_MIN, INT32_MAX, s);
1530}
1531
1532int64_t float64_to_int64_scalbn(float64 a, int rmode, int scale,
1533 float_status *s)
1534{
1535 return round_to_int_and_pack(float64_unpack_canonical(a, s),
1536 rmode, scale, INT64_MIN, INT64_MAX, s);
1537}
1538
1539int16_t float16_to_int16(float16 a, float_status *s)
1540{
1541 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
1542}
1543
1544int32_t float16_to_int32(float16 a, float_status *s)
1545{
1546 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
1547}
1548
1549int64_t float16_to_int64(float16 a, float_status *s)
1550{
1551 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
1552}
1553
1554int16_t float32_to_int16(float32 a, float_status *s)
1555{
1556 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
1557}
1558
1559int32_t float32_to_int32(float32 a, float_status *s)
1560{
1561 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
1562}
1563
1564int64_t float32_to_int64(float32 a, float_status *s)
1565{
1566 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
1567}
1568
1569int16_t float64_to_int16(float64 a, float_status *s)
1570{
1571 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
1572}
1573
1574int32_t float64_to_int32(float64 a, float_status *s)
1575{
1576 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
1577}
1578
1579int64_t float64_to_int64(float64 a, float_status *s)
1580{
1581 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
1582}
1583
1584int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
1585{
1586 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
1587}
1588
1589int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
1590{
1591 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
1592}
1593
1594int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
1595{
1596 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
1597}
1598
2f6c74be
RH
1599int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
1600{
1601 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
1602}
ab52f973 1603
2f6c74be
RH
1604int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
1605{
1606 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
1607}
1608
1609int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
1610{
1611 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
1612}
1613
1614int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
1615{
1616 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
1617}
ab52f973 1618
2f6c74be
RH
1619int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
1620{
1621 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
1622}
ab52f973 1623
2f6c74be
RH
1624int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
1625{
1626 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
1627}
ab52f973
AB
1628
1629/*
1630 * Returns the result of converting the floating-point value `a' to
1631 * the unsigned integer format. The conversion is performed according
1632 * to the IEC/IEEE Standard for Binary Floating-Point
1633 * Arithmetic---which means in particular that the conversion is
1634 * rounded according to the current rounding mode. If `a' is a NaN,
1635 * the largest unsigned integer is returned. Otherwise, if the
1636 * conversion overflows, the largest unsigned integer is returned. If
1637 * the 'a' is negative, the result is rounded and zero is returned;
1638 * values that do not round to zero will raise the inexact exception
1639 * flag.
1640 */
1641
2f6c74be
RH
1642static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale,
1643 uint64_t max, float_status *s)
ab52f973
AB
1644{
1645 int orig_flags = get_float_exception_flags(s);
2f6c74be
RH
1646 FloatParts p = round_to_int(in, rmode, scale, s);
1647 uint64_t r;
ab52f973
AB
1648
1649 switch (p.cls) {
1650 case float_class_snan:
1651 case float_class_qnan:
1652 s->float_exception_flags = orig_flags | float_flag_invalid;
1653 return max;
1654 case float_class_inf:
801bc563 1655 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1656 return p.sign ? 0 : max;
1657 case float_class_zero:
1658 return 0;
1659 case float_class_normal:
ab52f973
AB
1660 if (p.sign) {
1661 s->float_exception_flags = orig_flags | float_flag_invalid;
1662 return 0;
1663 }
1664
1665 if (p.exp < DECOMPOSED_BINARY_POINT) {
1666 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1667 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1668 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1669 } else {
1670 s->float_exception_flags = orig_flags | float_flag_invalid;
1671 return max;
1672 }
1673
1674 /* For uint64 this will never trip, but if p.exp is too large
1675 * to shift a decomposed fraction we shall have exited via the
1676 * 3rd leg above.
1677 */
1678 if (r > max) {
1679 s->float_exception_flags = orig_flags | float_flag_invalid;
1680 return max;
ab52f973 1681 }
2f6c74be 1682 return r;
ab52f973
AB
1683 default:
1684 g_assert_not_reached();
1685 }
1686}
1687
2f6c74be
RH
1688uint16_t float16_to_uint16_scalbn(float16 a, int rmode, int scale,
1689 float_status *s)
1690{
1691 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
1692 rmode, scale, UINT16_MAX, s);
1693}
1694
1695uint32_t float16_to_uint32_scalbn(float16 a, int rmode, int scale,
1696 float_status *s)
1697{
1698 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
1699 rmode, scale, UINT32_MAX, s);
1700}
1701
1702uint64_t float16_to_uint64_scalbn(float16 a, int rmode, int scale,
1703 float_status *s)
1704{
1705 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
1706 rmode, scale, UINT64_MAX, s);
1707}
1708
1709uint16_t float32_to_uint16_scalbn(float32 a, int rmode, int scale,
1710 float_status *s)
1711{
1712 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
1713 rmode, scale, UINT16_MAX, s);
1714}
1715
1716uint32_t float32_to_uint32_scalbn(float32 a, int rmode, int scale,
1717 float_status *s)
1718{
1719 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
1720 rmode, scale, UINT32_MAX, s);
1721}
1722
1723uint64_t float32_to_uint64_scalbn(float32 a, int rmode, int scale,
1724 float_status *s)
1725{
1726 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
1727 rmode, scale, UINT64_MAX, s);
1728}
1729
1730uint16_t float64_to_uint16_scalbn(float64 a, int rmode, int scale,
1731 float_status *s)
1732{
1733 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
1734 rmode, scale, UINT16_MAX, s);
1735}
1736
1737uint32_t float64_to_uint32_scalbn(float64 a, int rmode, int scale,
1738 float_status *s)
1739{
1740 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
1741 rmode, scale, UINT32_MAX, s);
1742}
1743
1744uint64_t float64_to_uint64_scalbn(float64 a, int rmode, int scale,
1745 float_status *s)
1746{
1747 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
1748 rmode, scale, UINT64_MAX, s);
1749}
1750
1751uint16_t float16_to_uint16(float16 a, float_status *s)
1752{
1753 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
1754}
1755
1756uint32_t float16_to_uint32(float16 a, float_status *s)
1757{
1758 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
1759}
1760
1761uint64_t float16_to_uint64(float16 a, float_status *s)
1762{
1763 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
1764}
1765
1766uint16_t float32_to_uint16(float32 a, float_status *s)
1767{
1768 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
1769}
1770
1771uint32_t float32_to_uint32(float32 a, float_status *s)
1772{
1773 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
1774}
1775
1776uint64_t float32_to_uint64(float32 a, float_status *s)
1777{
1778 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
1779}
1780
1781uint16_t float64_to_uint16(float64 a, float_status *s)
1782{
1783 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
1784}
1785
1786uint32_t float64_to_uint32(float64 a, float_status *s)
1787{
1788 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
1789}
1790
1791uint64_t float64_to_uint64(float64 a, float_status *s)
1792{
1793 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
1794}
1795
1796uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
1797{
1798 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
1799}
1800
1801uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
1802{
1803 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
1804}
1805
1806uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
1807{
1808 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
1809}
1810
1811uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
1812{
1813 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
1814}
1815
1816uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
1817{
1818 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
1819}
1820
1821uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
1822{
1823 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
1824}
1825
1826uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
1827{
1828 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
1829}
1830
1831uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
1832{
1833 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
1834}
1835
1836uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
1837{
1838 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
1839}
ab52f973 1840
c02e1fb8
AB
1841/*
1842 * Integer to float conversions
1843 *
1844 * Returns the result of converting the two's complement integer `a'
1845 * to the floating-point format. The conversion is performed according
1846 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1847 */
1848
2abdfe24 1849static FloatParts int_to_float(int64_t a, int scale, float_status *status)
c02e1fb8 1850{
2abdfe24
RH
1851 FloatParts r = { .sign = false };
1852
c02e1fb8
AB
1853 if (a == 0) {
1854 r.cls = float_class_zero;
c02e1fb8 1855 } else {
2abdfe24
RH
1856 uint64_t f = a;
1857 int shift;
1858
1859 r.cls = float_class_normal;
c02e1fb8 1860 if (a < 0) {
2abdfe24 1861 f = -f;
c02e1fb8 1862 r.sign = true;
c02e1fb8 1863 }
2abdfe24
RH
1864 shift = clz64(f) - 1;
1865 scale = MIN(MAX(scale, -0x10000), 0x10000);
1866
1867 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
1868 r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
c02e1fb8
AB
1869 }
1870
1871 return r;
1872}
1873
2abdfe24 1874float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 1875{
2abdfe24 1876 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
1877 return float16_round_pack_canonical(pa, status);
1878}
1879
2abdfe24
RH
1880float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
1881{
1882 return int64_to_float16_scalbn(a, scale, status);
1883}
1884
1885float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
1886{
1887 return int64_to_float16_scalbn(a, scale, status);
1888}
1889
1890float16 int64_to_float16(int64_t a, float_status *status)
1891{
1892 return int64_to_float16_scalbn(a, 0, status);
1893}
1894
c02e1fb8
AB
1895float16 int32_to_float16(int32_t a, float_status *status)
1896{
2abdfe24 1897 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
1898}
1899
1900float16 int16_to_float16(int16_t a, float_status *status)
1901{
2abdfe24 1902 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
1903}
1904
2abdfe24 1905float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 1906{
2abdfe24 1907 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
1908 return float32_round_pack_canonical(pa, status);
1909}
1910
2abdfe24
RH
1911float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
1912{
1913 return int64_to_float32_scalbn(a, scale, status);
1914}
1915
1916float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
1917{
1918 return int64_to_float32_scalbn(a, scale, status);
1919}
1920
1921float32 int64_to_float32(int64_t a, float_status *status)
1922{
1923 return int64_to_float32_scalbn(a, 0, status);
1924}
1925
c02e1fb8
AB
1926float32 int32_to_float32(int32_t a, float_status *status)
1927{
2abdfe24 1928 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
1929}
1930
1931float32 int16_to_float32(int16_t a, float_status *status)
1932{
2abdfe24 1933 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
1934}
1935
2abdfe24 1936float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 1937{
2abdfe24 1938 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
1939 return float64_round_pack_canonical(pa, status);
1940}
1941
2abdfe24
RH
1942float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
1943{
1944 return int64_to_float64_scalbn(a, scale, status);
1945}
1946
1947float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
1948{
1949 return int64_to_float64_scalbn(a, scale, status);
1950}
1951
1952float64 int64_to_float64(int64_t a, float_status *status)
1953{
1954 return int64_to_float64_scalbn(a, 0, status);
1955}
1956
c02e1fb8
AB
1957float64 int32_to_float64(int32_t a, float_status *status)
1958{
2abdfe24 1959 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
1960}
1961
1962float64 int16_to_float64(int16_t a, float_status *status)
1963{
2abdfe24 1964 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
1965}
1966
1967
1968/*
1969 * Unsigned Integer to float conversions
1970 *
1971 * Returns the result of converting the unsigned integer `a' to the
1972 * floating-point format. The conversion is performed according to the
1973 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1974 */
1975
2abdfe24 1976static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 1977{
2abdfe24 1978 FloatParts r = { .sign = false };
c02e1fb8
AB
1979
1980 if (a == 0) {
1981 r.cls = float_class_zero;
1982 } else {
2abdfe24 1983 scale = MIN(MAX(scale, -0x10000), 0x10000);
c02e1fb8 1984 r.cls = float_class_normal;
2abdfe24
RH
1985 if ((int64_t)a < 0) {
1986 r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
1987 shift64RightJamming(a, 1, &a);
c02e1fb8
AB
1988 r.frac = a;
1989 } else {
2abdfe24
RH
1990 int shift = clz64(a) - 1;
1991 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
1992 r.frac = a << shift;
c02e1fb8
AB
1993 }
1994 }
1995
1996 return r;
1997}
1998
2abdfe24 1999float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2000{
2abdfe24 2001 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2002 return float16_round_pack_canonical(pa, status);
2003}
2004
2abdfe24
RH
2005float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
2006{
2007 return uint64_to_float16_scalbn(a, scale, status);
2008}
2009
2010float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
2011{
2012 return uint64_to_float16_scalbn(a, scale, status);
2013}
2014
2015float16 uint64_to_float16(uint64_t a, float_status *status)
2016{
2017 return uint64_to_float16_scalbn(a, 0, status);
2018}
2019
c02e1fb8
AB
2020float16 uint32_to_float16(uint32_t a, float_status *status)
2021{
2abdfe24 2022 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2023}
2024
2025float16 uint16_to_float16(uint16_t a, float_status *status)
2026{
2abdfe24 2027 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2028}
2029
2abdfe24 2030float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2031{
2abdfe24 2032 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2033 return float32_round_pack_canonical(pa, status);
2034}
2035
2abdfe24
RH
2036float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
2037{
2038 return uint64_to_float32_scalbn(a, scale, status);
2039}
2040
2041float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
2042{
2043 return uint64_to_float32_scalbn(a, scale, status);
2044}
2045
2046float32 uint64_to_float32(uint64_t a, float_status *status)
2047{
2048 return uint64_to_float32_scalbn(a, 0, status);
2049}
2050
c02e1fb8
AB
2051float32 uint32_to_float32(uint32_t a, float_status *status)
2052{
2abdfe24 2053 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2054}
2055
2056float32 uint16_to_float32(uint16_t a, float_status *status)
2057{
2abdfe24 2058 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2059}
2060
2abdfe24 2061float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2062{
2abdfe24 2063 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2064 return float64_round_pack_canonical(pa, status);
2065}
2066
2abdfe24
RH
2067float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
2068{
2069 return uint64_to_float64_scalbn(a, scale, status);
2070}
2071
2072float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
2073{
2074 return uint64_to_float64_scalbn(a, scale, status);
2075}
2076
2077float64 uint64_to_float64(uint64_t a, float_status *status)
2078{
2079 return uint64_to_float64_scalbn(a, 0, status);
2080}
2081
c02e1fb8
AB
2082float64 uint32_to_float64(uint32_t a, float_status *status)
2083{
2abdfe24 2084 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2085}
2086
2087float64 uint16_to_float64(uint16_t a, float_status *status)
2088{
2abdfe24 2089 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2090}
2091
89360067
AB
2092/* Float Min/Max */
2093/* min() and max() functions. These can't be implemented as
2094 * 'compare and pick one input' because that would mishandle
2095 * NaNs and +0 vs -0.
2096 *
2097 * minnum() and maxnum() functions. These are similar to the min()
2098 * and max() functions but if one of the arguments is a QNaN and
2099 * the other is numerical then the numerical argument is returned.
2100 * SNaNs will get quietened before being returned.
2101 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
2102 * and maxNum() operations. min() and max() are the typical min/max
2103 * semantics provided by many CPUs which predate that specification.
2104 *
2105 * minnummag() and maxnummag() functions correspond to minNumMag()
2106 * and minNumMag() from the IEEE-754 2008.
2107 */
2108static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
2109 bool ieee, bool ismag, float_status *s)
2110{
2111 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
2112 if (ieee) {
2113 /* Takes two floating-point values `a' and `b', one of
2114 * which is a NaN, and returns the appropriate NaN
2115 * result. If either `a' or `b' is a signaling NaN,
2116 * the invalid exception is raised.
2117 */
2118 if (is_snan(a.cls) || is_snan(b.cls)) {
2119 return pick_nan(a, b, s);
2120 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
2121 return b;
2122 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
2123 return a;
2124 }
2125 }
2126 return pick_nan(a, b, s);
2127 } else {
2128 int a_exp, b_exp;
89360067
AB
2129
2130 switch (a.cls) {
2131 case float_class_normal:
2132 a_exp = a.exp;
2133 break;
2134 case float_class_inf:
2135 a_exp = INT_MAX;
2136 break;
2137 case float_class_zero:
2138 a_exp = INT_MIN;
2139 break;
2140 default:
2141 g_assert_not_reached();
2142 break;
2143 }
2144 switch (b.cls) {
2145 case float_class_normal:
2146 b_exp = b.exp;
2147 break;
2148 case float_class_inf:
2149 b_exp = INT_MAX;
2150 break;
2151 case float_class_zero:
2152 b_exp = INT_MIN;
2153 break;
2154 default:
2155 g_assert_not_reached();
2156 break;
2157 }
2158
6245327a
EC
2159 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
2160 bool a_less = a_exp < b_exp;
2161 if (a_exp == b_exp) {
2162 a_less = a.frac < b.frac;
2163 }
2164 return a_less ^ ismin ? b : a;
89360067
AB
2165 }
2166
6245327a 2167 if (a.sign == b.sign) {
89360067
AB
2168 bool a_less = a_exp < b_exp;
2169 if (a_exp == b_exp) {
2170 a_less = a.frac < b.frac;
2171 }
6245327a 2172 return a.sign ^ a_less ^ ismin ? b : a;
89360067 2173 } else {
6245327a 2174 return a.sign ^ ismin ? b : a;
89360067
AB
2175 }
2176 }
2177}
2178
2179#define MINMAX(sz, name, ismin, isiee, ismag) \
2180float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
2181 float_status *s) \
2182{ \
2183 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2184 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2185 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
2186 \
2187 return float ## sz ## _round_pack_canonical(pr, s); \
2188}
2189
2190MINMAX(16, min, true, false, false)
2191MINMAX(16, minnum, true, true, false)
2192MINMAX(16, minnummag, true, true, true)
2193MINMAX(16, max, false, false, false)
2194MINMAX(16, maxnum, false, true, false)
2195MINMAX(16, maxnummag, false, true, true)
2196
2197MINMAX(32, min, true, false, false)
2198MINMAX(32, minnum, true, true, false)
2199MINMAX(32, minnummag, true, true, true)
2200MINMAX(32, max, false, false, false)
2201MINMAX(32, maxnum, false, true, false)
2202MINMAX(32, maxnummag, false, true, true)
2203
2204MINMAX(64, min, true, false, false)
2205MINMAX(64, minnum, true, true, false)
2206MINMAX(64, minnummag, true, true, true)
2207MINMAX(64, max, false, false, false)
2208MINMAX(64, maxnum, false, true, false)
2209MINMAX(64, maxnummag, false, true, true)
2210
2211#undef MINMAX
2212
0c4c9092
AB
2213/* Floating point compare */
2214static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
2215 float_status *s)
2216{
2217 if (is_nan(a.cls) || is_nan(b.cls)) {
2218 if (!is_quiet ||
2219 a.cls == float_class_snan ||
2220 b.cls == float_class_snan) {
2221 s->float_exception_flags |= float_flag_invalid;
2222 }
2223 return float_relation_unordered;
2224 }
2225
2226 if (a.cls == float_class_zero) {
2227 if (b.cls == float_class_zero) {
2228 return float_relation_equal;
2229 }
2230 return b.sign ? float_relation_greater : float_relation_less;
2231 } else if (b.cls == float_class_zero) {
2232 return a.sign ? float_relation_less : float_relation_greater;
2233 }
2234
2235 /* The only really important thing about infinity is its sign. If
2236 * both are infinities the sign marks the smallest of the two.
2237 */
2238 if (a.cls == float_class_inf) {
2239 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
2240 return float_relation_equal;
2241 }
2242 return a.sign ? float_relation_less : float_relation_greater;
2243 } else if (b.cls == float_class_inf) {
2244 return b.sign ? float_relation_greater : float_relation_less;
2245 }
2246
2247 if (a.sign != b.sign) {
2248 return a.sign ? float_relation_less : float_relation_greater;
2249 }
2250
2251 if (a.exp == b.exp) {
2252 if (a.frac == b.frac) {
2253 return float_relation_equal;
2254 }
2255 if (a.sign) {
2256 return a.frac > b.frac ?
2257 float_relation_less : float_relation_greater;
2258 } else {
2259 return a.frac > b.frac ?
2260 float_relation_greater : float_relation_less;
2261 }
2262 } else {
2263 if (a.sign) {
2264 return a.exp > b.exp ? float_relation_less : float_relation_greater;
2265 } else {
2266 return a.exp > b.exp ? float_relation_greater : float_relation_less;
2267 }
2268 }
2269}
2270
2271#define COMPARE(sz) \
2272int float ## sz ## _compare(float ## sz a, float ## sz b, \
2273 float_status *s) \
2274{ \
2275 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2276 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2277 return compare_floats(pa, pb, false, s); \
2278} \
2279int float ## sz ## _compare_quiet(float ## sz a, float ## sz b, \
2280 float_status *s) \
2281{ \
2282 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2283 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2284 return compare_floats(pa, pb, true, s); \
2285}
2286
2287COMPARE(16)
2288COMPARE(32)
2289COMPARE(64)
2290
2291#undef COMPARE
2292
0bfc9f19
AB
2293/* Multiply A by 2 raised to the power N. */
2294static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
2295{
2296 if (unlikely(is_nan(a.cls))) {
2297 return return_nan(a, s);
2298 }
2299 if (a.cls == float_class_normal) {
ce8d4082
RH
2300 /* The largest float type (even though not supported by FloatParts)
2301 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
2302 * still allows rounding to infinity, without allowing overflow
2303 * within the int32_t that backs FloatParts.exp.
2304 */
2305 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
2306 a.exp += n;
2307 }
2308 return a;
2309}
2310
2311float16 float16_scalbn(float16 a, int n, float_status *status)
2312{
2313 FloatParts pa = float16_unpack_canonical(a, status);
2314 FloatParts pr = scalbn_decomposed(pa, n, status);
2315 return float16_round_pack_canonical(pr, status);
2316}
2317
2318float32 float32_scalbn(float32 a, int n, float_status *status)
2319{
2320 FloatParts pa = float32_unpack_canonical(a, status);
2321 FloatParts pr = scalbn_decomposed(pa, n, status);
2322 return float32_round_pack_canonical(pr, status);
2323}
2324
2325float64 float64_scalbn(float64 a, int n, float_status *status)
2326{
2327 FloatParts pa = float64_unpack_canonical(a, status);
2328 FloatParts pr = scalbn_decomposed(pa, n, status);
2329 return float64_round_pack_canonical(pr, status);
2330}
2331
c13bb2da
AB
2332/*
2333 * Square Root
2334 *
2335 * The old softfloat code did an approximation step before zeroing in
2336 * on the final result. However for simpleness we just compute the
2337 * square root by iterating down from the implicit bit to enough extra
2338 * bits to ensure we get a correctly rounded result.
2339 *
2340 * This does mean however the calculation is slower than before,
2341 * especially for 64 bit floats.
2342 */
2343
2344static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
2345{
2346 uint64_t a_frac, r_frac, s_frac;
2347 int bit, last_bit;
2348
2349 if (is_nan(a.cls)) {
2350 return return_nan(a, s);
2351 }
2352 if (a.cls == float_class_zero) {
2353 return a; /* sqrt(+-0) = +-0 */
2354 }
2355 if (a.sign) {
2356 s->float_exception_flags |= float_flag_invalid;
f7e598e2 2357 return parts_default_nan(s);
c13bb2da
AB
2358 }
2359 if (a.cls == float_class_inf) {
2360 return a; /* sqrt(+inf) = +inf */
2361 }
2362
2363 assert(a.cls == float_class_normal);
2364
2365 /* We need two overflow bits at the top. Adding room for that is a
2366 * right shift. If the exponent is odd, we can discard the low bit
2367 * by multiplying the fraction by 2; that's a left shift. Combine
2368 * those and we shift right if the exponent is even.
2369 */
2370 a_frac = a.frac;
2371 if (!(a.exp & 1)) {
2372 a_frac >>= 1;
2373 }
2374 a.exp >>= 1;
2375
2376 /* Bit-by-bit computation of sqrt. */
2377 r_frac = 0;
2378 s_frac = 0;
2379
2380 /* Iterate from implicit bit down to the 3 extra bits to compute a
2381 * properly rounded result. Remember we've inserted one more bit
2382 * at the top, so these positions are one less.
2383 */
2384 bit = DECOMPOSED_BINARY_POINT - 1;
2385 last_bit = MAX(p->frac_shift - 4, 0);
2386 do {
2387 uint64_t q = 1ULL << bit;
2388 uint64_t t_frac = s_frac + q;
2389 if (t_frac <= a_frac) {
2390 s_frac = t_frac + q;
2391 a_frac -= t_frac;
2392 r_frac += q;
2393 }
2394 a_frac <<= 1;
2395 } while (--bit >= last_bit);
2396
2397 /* Undo the right shift done above. If there is any remaining
2398 * fraction, the result is inexact. Set the sticky bit.
2399 */
2400 a.frac = (r_frac << 1) + (a_frac != 0);
2401
2402 return a;
2403}
2404
2405float16 __attribute__((flatten)) float16_sqrt(float16 a, float_status *status)
2406{
2407 FloatParts pa = float16_unpack_canonical(a, status);
2408 FloatParts pr = sqrt_float(pa, status, &float16_params);
2409 return float16_round_pack_canonical(pr, status);
2410}
2411
2412float32 __attribute__((flatten)) float32_sqrt(float32 a, float_status *status)
2413{
2414 FloatParts pa = float32_unpack_canonical(a, status);
2415 FloatParts pr = sqrt_float(pa, status, &float32_params);
2416 return float32_round_pack_canonical(pr, status);
2417}
2418
2419float64 __attribute__((flatten)) float64_sqrt(float64 a, float_status *status)
2420{
2421 FloatParts pa = float64_unpack_canonical(a, status);
2422 FloatParts pr = sqrt_float(pa, status, &float64_params);
2423 return float64_round_pack_canonical(pr, status);
2424}
2425
0218a16e
RH
2426/*----------------------------------------------------------------------------
2427| The pattern for a default generated NaN.
2428*----------------------------------------------------------------------------*/
2429
2430float16 float16_default_nan(float_status *status)
2431{
2432 FloatParts p = parts_default_nan(status);
2433 p.frac >>= float16_params.frac_shift;
2434 return float16_pack_raw(p);
2435}
2436
2437float32 float32_default_nan(float_status *status)
2438{
2439 FloatParts p = parts_default_nan(status);
2440 p.frac >>= float32_params.frac_shift;
2441 return float32_pack_raw(p);
2442}
2443
2444float64 float64_default_nan(float_status *status)
2445{
2446 FloatParts p = parts_default_nan(status);
2447 p.frac >>= float64_params.frac_shift;
2448 return float64_pack_raw(p);
2449}
2450
2451float128 float128_default_nan(float_status *status)
2452{
2453 FloatParts p = parts_default_nan(status);
2454 float128 r;
2455
2456 /* Extrapolate from the choices made by parts_default_nan to fill
2457 * in the quad-floating format. If the low bit is set, assume we
2458 * want to set all non-snan bits.
2459 */
2460 r.low = -(p.frac & 1);
2461 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
2462 r.high |= LIT64(0x7FFF000000000000);
2463 r.high |= (uint64_t)p.sign << 63;
2464
2465 return r;
2466}
c13bb2da 2467
158142c2 2468/*----------------------------------------------------------------------------
377ed926
RH
2469| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
2470*----------------------------------------------------------------------------*/
2471
2472float16 float16_silence_nan(float16 a, float_status *status)
2473{
2474 FloatParts p = float16_unpack_raw(a);
2475 p.frac <<= float16_params.frac_shift;
2476 p = parts_silence_nan(p, status);
2477 p.frac >>= float16_params.frac_shift;
2478 return float16_pack_raw(p);
2479}
2480
2481float32 float32_silence_nan(float32 a, float_status *status)
2482{
2483 FloatParts p = float32_unpack_raw(a);
2484 p.frac <<= float32_params.frac_shift;
2485 p = parts_silence_nan(p, status);
2486 p.frac >>= float32_params.frac_shift;
2487 return float32_pack_raw(p);
2488}
2489
2490float64 float64_silence_nan(float64 a, float_status *status)
2491{
2492 FloatParts p = float64_unpack_raw(a);
2493 p.frac <<= float64_params.frac_shift;
2494 p = parts_silence_nan(p, status);
2495 p.frac >>= float64_params.frac_shift;
2496 return float64_pack_raw(p);
2497}
2498
2499/*----------------------------------------------------------------------------
158142c2
FB
2500| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
2501| and 7, and returns the properly rounded 32-bit integer corresponding to the
2502| input. If `zSign' is 1, the input is negated before being converted to an
2503| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
2504| is simply rounded to an integer, with the inexact exception raised if the
2505| input cannot be represented exactly as an integer. However, if the fixed-
2506| point input is too large, the invalid exception is raised and the largest
2507| positive or negative integer is returned.
2508*----------------------------------------------------------------------------*/
2509
f4014512 2510static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
158142c2 2511{
8f506c70 2512 int8_t roundingMode;
158142c2 2513 flag roundNearestEven;
8f506c70 2514 int8_t roundIncrement, roundBits;
760e1416 2515 int32_t z;
158142c2 2516
a2f2d288 2517 roundingMode = status->float_rounding_mode;
158142c2 2518 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2519 switch (roundingMode) {
2520 case float_round_nearest_even:
f9288a76 2521 case float_round_ties_away:
dc355b76
PM
2522 roundIncrement = 0x40;
2523 break;
2524 case float_round_to_zero:
2525 roundIncrement = 0;
2526 break;
2527 case float_round_up:
2528 roundIncrement = zSign ? 0 : 0x7f;
2529 break;
2530 case float_round_down:
2531 roundIncrement = zSign ? 0x7f : 0;
2532 break;
2533 default:
2534 abort();
158142c2
FB
2535 }
2536 roundBits = absZ & 0x7F;
2537 absZ = ( absZ + roundIncrement )>>7;
2538 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2539 z = absZ;
2540 if ( zSign ) z = - z;
2541 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 2542 float_raise(float_flag_invalid, status);
bb98fe42 2543 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2 2544 }
a2f2d288
PM
2545 if (roundBits) {
2546 status->float_exception_flags |= float_flag_inexact;
2547 }
158142c2
FB
2548 return z;
2549
2550}
2551
2552/*----------------------------------------------------------------------------
2553| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
2554| `absZ1', with binary point between bits 63 and 64 (between the input words),
2555| and returns the properly rounded 64-bit integer corresponding to the input.
2556| If `zSign' is 1, the input is negated before being converted to an integer.
2557| Ordinarily, the fixed-point input is simply rounded to an integer, with
2558| the inexact exception raised if the input cannot be represented exactly as
2559| an integer. However, if the fixed-point input is too large, the invalid
2560| exception is raised and the largest positive or negative integer is
2561| returned.
2562*----------------------------------------------------------------------------*/
2563
f42c2224 2564static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 2565 float_status *status)
158142c2 2566{
8f506c70 2567 int8_t roundingMode;
158142c2 2568 flag roundNearestEven, increment;
760e1416 2569 int64_t z;
158142c2 2570
a2f2d288 2571 roundingMode = status->float_rounding_mode;
158142c2 2572 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2573 switch (roundingMode) {
2574 case float_round_nearest_even:
f9288a76 2575 case float_round_ties_away:
dc355b76
PM
2576 increment = ((int64_t) absZ1 < 0);
2577 break;
2578 case float_round_to_zero:
2579 increment = 0;
2580 break;
2581 case float_round_up:
2582 increment = !zSign && absZ1;
2583 break;
2584 case float_round_down:
2585 increment = zSign && absZ1;
2586 break;
2587 default:
2588 abort();
158142c2
FB
2589 }
2590 if ( increment ) {
2591 ++absZ0;
2592 if ( absZ0 == 0 ) goto overflow;
bb98fe42 2593 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
2594 }
2595 z = absZ0;
2596 if ( zSign ) z = - z;
2597 if ( z && ( ( z < 0 ) ^ zSign ) ) {
2598 overflow:
ff32e16e 2599 float_raise(float_flag_invalid, status);
158142c2 2600 return
bb98fe42 2601 zSign ? (int64_t) LIT64( 0x8000000000000000 )
158142c2
FB
2602 : LIT64( 0x7FFFFFFFFFFFFFFF );
2603 }
a2f2d288
PM
2604 if (absZ1) {
2605 status->float_exception_flags |= float_flag_inexact;
2606 }
158142c2
FB
2607 return z;
2608
2609}
2610
fb3ea83a
TM
2611/*----------------------------------------------------------------------------
2612| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
2613| `absZ1', with binary point between bits 63 and 64 (between the input words),
2614| and returns the properly rounded 64-bit unsigned integer corresponding to the
2615| input. Ordinarily, the fixed-point input is simply rounded to an integer,
2616| with the inexact exception raised if the input cannot be represented exactly
2617| as an integer. However, if the fixed-point input is too large, the invalid
2618| exception is raised and the largest unsigned integer is returned.
2619*----------------------------------------------------------------------------*/
2620
f42c2224 2621static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
e5a41ffa 2622 uint64_t absZ1, float_status *status)
fb3ea83a 2623{
8f506c70 2624 int8_t roundingMode;
fb3ea83a
TM
2625 flag roundNearestEven, increment;
2626
a2f2d288 2627 roundingMode = status->float_rounding_mode;
fb3ea83a 2628 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
2629 switch (roundingMode) {
2630 case float_round_nearest_even:
f9288a76 2631 case float_round_ties_away:
dc355b76
PM
2632 increment = ((int64_t)absZ1 < 0);
2633 break;
2634 case float_round_to_zero:
2635 increment = 0;
2636 break;
2637 case float_round_up:
2638 increment = !zSign && absZ1;
2639 break;
2640 case float_round_down:
2641 increment = zSign && absZ1;
2642 break;
2643 default:
2644 abort();
fb3ea83a
TM
2645 }
2646 if (increment) {
2647 ++absZ0;
2648 if (absZ0 == 0) {
ff32e16e 2649 float_raise(float_flag_invalid, status);
fb3ea83a
TM
2650 return LIT64(0xFFFFFFFFFFFFFFFF);
2651 }
2652 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
2653 }
2654
2655 if (zSign && absZ0) {
ff32e16e 2656 float_raise(float_flag_invalid, status);
fb3ea83a
TM
2657 return 0;
2658 }
2659
2660 if (absZ1) {
a2f2d288 2661 status->float_exception_flags |= float_flag_inexact;
fb3ea83a
TM
2662 }
2663 return absZ0;
2664}
2665
37d18660
PM
2666/*----------------------------------------------------------------------------
2667| If `a' is denormal and we are in flush-to-zero mode then set the
2668| input-denormal exception and return zero. Otherwise just return the value.
2669*----------------------------------------------------------------------------*/
e5a41ffa 2670float32 float32_squash_input_denormal(float32 a, float_status *status)
37d18660 2671{
a2f2d288 2672 if (status->flush_inputs_to_zero) {
37d18660 2673 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
ff32e16e 2674 float_raise(float_flag_input_denormal, status);
37d18660
PM
2675 return make_float32(float32_val(a) & 0x80000000);
2676 }
2677 }
2678 return a;
2679}
2680
158142c2
FB
2681/*----------------------------------------------------------------------------
2682| Normalizes the subnormal single-precision floating-point value represented
2683| by the denormalized significand `aSig'. The normalized exponent and
2684| significand are stored at the locations pointed to by `zExpPtr' and
2685| `zSigPtr', respectively.
2686*----------------------------------------------------------------------------*/
2687
2688static void
0c48262d 2689 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 2690{
8f506c70 2691 int8_t shiftCount;
158142c2
FB
2692
2693 shiftCount = countLeadingZeros32( aSig ) - 8;
2694 *zSigPtr = aSig<<shiftCount;
2695 *zExpPtr = 1 - shiftCount;
2696
2697}
2698
158142c2
FB
2699/*----------------------------------------------------------------------------
2700| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2701| and significand `zSig', and returns the proper single-precision floating-
2702| point value corresponding to the abstract input. Ordinarily, the abstract
2703| value is simply rounded and packed into the single-precision format, with
2704| the inexact exception raised if the abstract input cannot be represented
2705| exactly. However, if the abstract value is too large, the overflow and
2706| inexact exceptions are raised and an infinity or maximal finite value is
2707| returned. If the abstract value is too small, the input value is rounded to
2708| a subnormal number, and the underflow and inexact exceptions are raised if
2709| the abstract input cannot be represented exactly as a subnormal single-
2710| precision floating-point number.
2711| The input significand `zSig' has its binary point between bits 30
2712| and 29, which is 7 bits to the left of the usual location. This shifted
2713| significand must be normalized or smaller. If `zSig' is not normalized,
2714| `zExp' must be 0; in that case, the result returned is a subnormal number,
2715| and it must not require rounding. In the usual case that `zSig' is
2716| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
2717| The handling of underflow and overflow follows the IEC/IEEE Standard for
2718| Binary Floating-Point Arithmetic.
2719*----------------------------------------------------------------------------*/
2720
0c48262d 2721static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 2722 float_status *status)
158142c2 2723{
8f506c70 2724 int8_t roundingMode;
158142c2 2725 flag roundNearestEven;
8f506c70 2726 int8_t roundIncrement, roundBits;
158142c2
FB
2727 flag isTiny;
2728
a2f2d288 2729 roundingMode = status->float_rounding_mode;
158142c2 2730 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2731 switch (roundingMode) {
2732 case float_round_nearest_even:
f9288a76 2733 case float_round_ties_away:
dc355b76
PM
2734 roundIncrement = 0x40;
2735 break;
2736 case float_round_to_zero:
2737 roundIncrement = 0;
2738 break;
2739 case float_round_up:
2740 roundIncrement = zSign ? 0 : 0x7f;
2741 break;
2742 case float_round_down:
2743 roundIncrement = zSign ? 0x7f : 0;
2744 break;
2745 default:
2746 abort();
2747 break;
158142c2
FB
2748 }
2749 roundBits = zSig & 0x7F;
bb98fe42 2750 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
2751 if ( ( 0xFD < zExp )
2752 || ( ( zExp == 0xFD )
bb98fe42 2753 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 2754 ) {
ff32e16e 2755 float_raise(float_flag_overflow | float_flag_inexact, status);
f090c9d4 2756 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
158142c2
FB
2757 }
2758 if ( zExp < 0 ) {
a2f2d288 2759 if (status->flush_to_zero) {
ff32e16e 2760 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2761 return packFloat32(zSign, 0, 0);
2762 }
158142c2 2763 isTiny =
a2f2d288
PM
2764 (status->float_detect_tininess
2765 == float_tininess_before_rounding)
158142c2
FB
2766 || ( zExp < -1 )
2767 || ( zSig + roundIncrement < 0x80000000 );
2768 shift32RightJamming( zSig, - zExp, &zSig );
2769 zExp = 0;
2770 roundBits = zSig & 0x7F;
ff32e16e
PM
2771 if (isTiny && roundBits) {
2772 float_raise(float_flag_underflow, status);
2773 }
158142c2
FB
2774 }
2775 }
a2f2d288
PM
2776 if (roundBits) {
2777 status->float_exception_flags |= float_flag_inexact;
2778 }
158142c2
FB
2779 zSig = ( zSig + roundIncrement )>>7;
2780 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2781 if ( zSig == 0 ) zExp = 0;
2782 return packFloat32( zSign, zExp, zSig );
2783
2784}
2785
2786/*----------------------------------------------------------------------------
2787| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2788| and significand `zSig', and returns the proper single-precision floating-
2789| point value corresponding to the abstract input. This routine is just like
2790| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
2791| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
2792| floating-point exponent.
2793*----------------------------------------------------------------------------*/
2794
2795static float32
0c48262d 2796 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 2797 float_status *status)
158142c2 2798{
8f506c70 2799 int8_t shiftCount;
158142c2
FB
2800
2801 shiftCount = countLeadingZeros32( zSig ) - 1;
ff32e16e
PM
2802 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
2803 status);
158142c2
FB
2804
2805}
2806
37d18660
PM
2807/*----------------------------------------------------------------------------
2808| If `a' is denormal and we are in flush-to-zero mode then set the
2809| input-denormal exception and return zero. Otherwise just return the value.
2810*----------------------------------------------------------------------------*/
e5a41ffa 2811float64 float64_squash_input_denormal(float64 a, float_status *status)
37d18660 2812{
a2f2d288 2813 if (status->flush_inputs_to_zero) {
37d18660 2814 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
ff32e16e 2815 float_raise(float_flag_input_denormal, status);
37d18660
PM
2816 return make_float64(float64_val(a) & (1ULL << 63));
2817 }
2818 }
2819 return a;
2820}
2821
158142c2
FB
2822/*----------------------------------------------------------------------------
2823| Normalizes the subnormal double-precision floating-point value represented
2824| by the denormalized significand `aSig'. The normalized exponent and
2825| significand are stored at the locations pointed to by `zExpPtr' and
2826| `zSigPtr', respectively.
2827*----------------------------------------------------------------------------*/
2828
2829static void
0c48262d 2830 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 2831{
8f506c70 2832 int8_t shiftCount;
158142c2
FB
2833
2834 shiftCount = countLeadingZeros64( aSig ) - 11;
2835 *zSigPtr = aSig<<shiftCount;
2836 *zExpPtr = 1 - shiftCount;
2837
2838}
2839
2840/*----------------------------------------------------------------------------
2841| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2842| double-precision floating-point value, returning the result. After being
2843| shifted into the proper positions, the three fields are simply added
2844| together to form the result. This means that any integer portion of `zSig'
2845| will be added into the exponent. Since a properly normalized significand
2846| will have an integer portion equal to 1, the `zExp' input should be 1 less
2847| than the desired result exponent whenever `zSig' is a complete, normalized
2848| significand.
2849*----------------------------------------------------------------------------*/
2850
0c48262d 2851static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
158142c2
FB
2852{
2853
f090c9d4 2854 return make_float64(
bb98fe42 2855 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
2856
2857}
2858
2859/*----------------------------------------------------------------------------
2860| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2861| and significand `zSig', and returns the proper double-precision floating-
2862| point value corresponding to the abstract input. Ordinarily, the abstract
2863| value is simply rounded and packed into the double-precision format, with
2864| the inexact exception raised if the abstract input cannot be represented
2865| exactly. However, if the abstract value is too large, the overflow and
2866| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
2867| returned. If the abstract value is too small, the input value is rounded to
2868| a subnormal number, and the underflow and inexact exceptions are raised if
2869| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
2870| precision floating-point number.
2871| The input significand `zSig' has its binary point between bits 62
2872| and 61, which is 10 bits to the left of the usual location. This shifted
2873| significand must be normalized or smaller. If `zSig' is not normalized,
2874| `zExp' must be 0; in that case, the result returned is a subnormal number,
2875| and it must not require rounding. In the usual case that `zSig' is
2876| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
2877| The handling of underflow and overflow follows the IEC/IEEE Standard for
2878| Binary Floating-Point Arithmetic.
2879*----------------------------------------------------------------------------*/
2880
0c48262d 2881static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 2882 float_status *status)
158142c2 2883{
8f506c70 2884 int8_t roundingMode;
158142c2 2885 flag roundNearestEven;
0c48262d 2886 int roundIncrement, roundBits;
158142c2
FB
2887 flag isTiny;
2888
a2f2d288 2889 roundingMode = status->float_rounding_mode;
158142c2 2890 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2891 switch (roundingMode) {
2892 case float_round_nearest_even:
f9288a76 2893 case float_round_ties_away:
dc355b76
PM
2894 roundIncrement = 0x200;
2895 break;
2896 case float_round_to_zero:
2897 roundIncrement = 0;
2898 break;
2899 case float_round_up:
2900 roundIncrement = zSign ? 0 : 0x3ff;
2901 break;
2902 case float_round_down:
2903 roundIncrement = zSign ? 0x3ff : 0;
2904 break;
9ee6f678
BR
2905 case float_round_to_odd:
2906 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2907 break;
dc355b76
PM
2908 default:
2909 abort();
158142c2
FB
2910 }
2911 roundBits = zSig & 0x3FF;
bb98fe42 2912 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
2913 if ( ( 0x7FD < zExp )
2914 || ( ( zExp == 0x7FD )
bb98fe42 2915 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 2916 ) {
9ee6f678
BR
2917 bool overflow_to_inf = roundingMode != float_round_to_odd &&
2918 roundIncrement != 0;
ff32e16e 2919 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 2920 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
2921 }
2922 if ( zExp < 0 ) {
a2f2d288 2923 if (status->flush_to_zero) {
ff32e16e 2924 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2925 return packFloat64(zSign, 0, 0);
2926 }
158142c2 2927 isTiny =
a2f2d288
PM
2928 (status->float_detect_tininess
2929 == float_tininess_before_rounding)
158142c2
FB
2930 || ( zExp < -1 )
2931 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
2932 shift64RightJamming( zSig, - zExp, &zSig );
2933 zExp = 0;
2934 roundBits = zSig & 0x3FF;
ff32e16e
PM
2935 if (isTiny && roundBits) {
2936 float_raise(float_flag_underflow, status);
2937 }
9ee6f678
BR
2938 if (roundingMode == float_round_to_odd) {
2939 /*
2940 * For round-to-odd case, the roundIncrement depends on
2941 * zSig which just changed.
2942 */
2943 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2944 }
158142c2
FB
2945 }
2946 }
a2f2d288
PM
2947 if (roundBits) {
2948 status->float_exception_flags |= float_flag_inexact;
2949 }
158142c2
FB
2950 zSig = ( zSig + roundIncrement )>>10;
2951 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
2952 if ( zSig == 0 ) zExp = 0;
2953 return packFloat64( zSign, zExp, zSig );
2954
2955}
2956
2957/*----------------------------------------------------------------------------
2958| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2959| and significand `zSig', and returns the proper double-precision floating-
2960| point value corresponding to the abstract input. This routine is just like
2961| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
2962| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
2963| floating-point exponent.
2964*----------------------------------------------------------------------------*/
2965
2966static float64
0c48262d 2967 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 2968 float_status *status)
158142c2 2969{
8f506c70 2970 int8_t shiftCount;
158142c2
FB
2971
2972 shiftCount = countLeadingZeros64( zSig ) - 1;
ff32e16e
PM
2973 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
2974 status);
158142c2
FB
2975
2976}
2977
158142c2
FB
2978/*----------------------------------------------------------------------------
2979| Normalizes the subnormal extended double-precision floating-point value
2980| represented by the denormalized significand `aSig'. The normalized exponent
2981| and significand are stored at the locations pointed to by `zExpPtr' and
2982| `zSigPtr', respectively.
2983*----------------------------------------------------------------------------*/
2984
88857aca
LV
2985void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
2986 uint64_t *zSigPtr)
158142c2 2987{
8f506c70 2988 int8_t shiftCount;
158142c2
FB
2989
2990 shiftCount = countLeadingZeros64( aSig );
2991 *zSigPtr = aSig<<shiftCount;
2992 *zExpPtr = 1 - shiftCount;
158142c2
FB
2993}
2994
2995/*----------------------------------------------------------------------------
2996| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2997| and extended significand formed by the concatenation of `zSig0' and `zSig1',
2998| and returns the proper extended double-precision floating-point value
2999| corresponding to the abstract input. Ordinarily, the abstract value is
3000| rounded and packed into the extended double-precision format, with the
3001| inexact exception raised if the abstract input cannot be represented
3002| exactly. However, if the abstract value is too large, the overflow and
3003| inexact exceptions are raised and an infinity or maximal finite value is
3004| returned. If the abstract value is too small, the input value is rounded to
3005| a subnormal number, and the underflow and inexact exceptions are raised if
3006| the abstract input cannot be represented exactly as a subnormal extended
3007| double-precision floating-point number.
3008| If `roundingPrecision' is 32 or 64, the result is rounded to the same
3009| number of bits as single or double precision, respectively. Otherwise, the
3010| result is rounded to the full precision of the extended double-precision
3011| format.
3012| The input significand must be normalized or smaller. If the input
3013| significand is not normalized, `zExp' must be 0; in that case, the result
3014| returned is a subnormal number, and it must not require rounding. The
3015| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
3016| Floating-Point Arithmetic.
3017*----------------------------------------------------------------------------*/
3018
88857aca
LV
3019floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
3020 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
3021 float_status *status)
158142c2 3022{
8f506c70 3023 int8_t roundingMode;
158142c2 3024 flag roundNearestEven, increment, isTiny;
f42c2224 3025 int64_t roundIncrement, roundMask, roundBits;
158142c2 3026
a2f2d288 3027 roundingMode = status->float_rounding_mode;
158142c2
FB
3028 roundNearestEven = ( roundingMode == float_round_nearest_even );
3029 if ( roundingPrecision == 80 ) goto precision80;
3030 if ( roundingPrecision == 64 ) {
3031 roundIncrement = LIT64( 0x0000000000000400 );
3032 roundMask = LIT64( 0x00000000000007FF );
3033 }
3034 else if ( roundingPrecision == 32 ) {
3035 roundIncrement = LIT64( 0x0000008000000000 );
3036 roundMask = LIT64( 0x000000FFFFFFFFFF );
3037 }
3038 else {
3039 goto precision80;
3040 }
3041 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
3042 switch (roundingMode) {
3043 case float_round_nearest_even:
f9288a76 3044 case float_round_ties_away:
dc355b76
PM
3045 break;
3046 case float_round_to_zero:
3047 roundIncrement = 0;
3048 break;
3049 case float_round_up:
3050 roundIncrement = zSign ? 0 : roundMask;
3051 break;
3052 case float_round_down:
3053 roundIncrement = zSign ? roundMask : 0;
3054 break;
3055 default:
3056 abort();
158142c2
FB
3057 }
3058 roundBits = zSig0 & roundMask;
bb98fe42 3059 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3060 if ( ( 0x7FFE < zExp )
3061 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
3062 ) {
3063 goto overflow;
3064 }
3065 if ( zExp <= 0 ) {
a2f2d288 3066 if (status->flush_to_zero) {
ff32e16e 3067 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3068 return packFloatx80(zSign, 0, 0);
3069 }
158142c2 3070 isTiny =
a2f2d288
PM
3071 (status->float_detect_tininess
3072 == float_tininess_before_rounding)
158142c2
FB
3073 || ( zExp < 0 )
3074 || ( zSig0 <= zSig0 + roundIncrement );
3075 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
3076 zExp = 0;
3077 roundBits = zSig0 & roundMask;
ff32e16e
PM
3078 if (isTiny && roundBits) {
3079 float_raise(float_flag_underflow, status);
3080 }
a2f2d288
PM
3081 if (roundBits) {
3082 status->float_exception_flags |= float_flag_inexact;
3083 }
158142c2 3084 zSig0 += roundIncrement;
bb98fe42 3085 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
3086 roundIncrement = roundMask + 1;
3087 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3088 roundMask |= roundIncrement;
3089 }
3090 zSig0 &= ~ roundMask;
3091 return packFloatx80( zSign, zExp, zSig0 );
3092 }
3093 }
a2f2d288
PM
3094 if (roundBits) {
3095 status->float_exception_flags |= float_flag_inexact;
3096 }
158142c2
FB
3097 zSig0 += roundIncrement;
3098 if ( zSig0 < roundIncrement ) {
3099 ++zExp;
3100 zSig0 = LIT64( 0x8000000000000000 );
3101 }
3102 roundIncrement = roundMask + 1;
3103 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3104 roundMask |= roundIncrement;
3105 }
3106 zSig0 &= ~ roundMask;
3107 if ( zSig0 == 0 ) zExp = 0;
3108 return packFloatx80( zSign, zExp, zSig0 );
3109 precision80:
dc355b76
PM
3110 switch (roundingMode) {
3111 case float_round_nearest_even:
f9288a76 3112 case float_round_ties_away:
dc355b76
PM
3113 increment = ((int64_t)zSig1 < 0);
3114 break;
3115 case float_round_to_zero:
3116 increment = 0;
3117 break;
3118 case float_round_up:
3119 increment = !zSign && zSig1;
3120 break;
3121 case float_round_down:
3122 increment = zSign && zSig1;
3123 break;
3124 default:
3125 abort();
158142c2 3126 }
bb98fe42 3127 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3128 if ( ( 0x7FFE < zExp )
3129 || ( ( zExp == 0x7FFE )
3130 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
3131 && increment
3132 )
3133 ) {
3134 roundMask = 0;
3135 overflow:
ff32e16e 3136 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
3137 if ( ( roundingMode == float_round_to_zero )
3138 || ( zSign && ( roundingMode == float_round_up ) )
3139 || ( ! zSign && ( roundingMode == float_round_down ) )
3140 ) {
3141 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
3142 }
0f605c88
LV
3143 return packFloatx80(zSign,
3144 floatx80_infinity_high,
3145 floatx80_infinity_low);
158142c2
FB
3146 }
3147 if ( zExp <= 0 ) {
3148 isTiny =
a2f2d288
PM
3149 (status->float_detect_tininess
3150 == float_tininess_before_rounding)
158142c2
FB
3151 || ( zExp < 0 )
3152 || ! increment
3153 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
3154 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
3155 zExp = 0;
ff32e16e
PM
3156 if (isTiny && zSig1) {
3157 float_raise(float_flag_underflow, status);
3158 }
a2f2d288
PM
3159 if (zSig1) {
3160 status->float_exception_flags |= float_flag_inexact;
3161 }
dc355b76
PM
3162 switch (roundingMode) {
3163 case float_round_nearest_even:
f9288a76 3164 case float_round_ties_away:
dc355b76
PM
3165 increment = ((int64_t)zSig1 < 0);
3166 break;
3167 case float_round_to_zero:
3168 increment = 0;
3169 break;
3170 case float_round_up:
3171 increment = !zSign && zSig1;
3172 break;
3173 case float_round_down:
3174 increment = zSign && zSig1;
3175 break;
3176 default:
3177 abort();
158142c2
FB
3178 }
3179 if ( increment ) {
3180 ++zSig0;
3181 zSig0 &=
bb98fe42
AF
3182 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
3183 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
3184 }
3185 return packFloatx80( zSign, zExp, zSig0 );
3186 }
3187 }
a2f2d288
PM
3188 if (zSig1) {
3189 status->float_exception_flags |= float_flag_inexact;
3190 }
158142c2
FB
3191 if ( increment ) {
3192 ++zSig0;
3193 if ( zSig0 == 0 ) {
3194 ++zExp;
3195 zSig0 = LIT64( 0x8000000000000000 );
3196 }
3197 else {
bb98fe42 3198 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
3199 }
3200 }
3201 else {
3202 if ( zSig0 == 0 ) zExp = 0;
3203 }
3204 return packFloatx80( zSign, zExp, zSig0 );
3205
3206}
3207
3208/*----------------------------------------------------------------------------
3209| Takes an abstract floating-point value having sign `zSign', exponent
3210| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
3211| and returns the proper extended double-precision floating-point value
3212| corresponding to the abstract input. This routine is just like
3213| `roundAndPackFloatx80' except that the input significand does not have to be
3214| normalized.
3215*----------------------------------------------------------------------------*/
3216
88857aca
LV
3217floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
3218 flag zSign, int32_t zExp,
3219 uint64_t zSig0, uint64_t zSig1,
3220 float_status *status)
158142c2 3221{
8f506c70 3222 int8_t shiftCount;
158142c2
FB
3223
3224 if ( zSig0 == 0 ) {
3225 zSig0 = zSig1;
3226 zSig1 = 0;
3227 zExp -= 64;
3228 }
3229 shiftCount = countLeadingZeros64( zSig0 );
3230 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3231 zExp -= shiftCount;
ff32e16e
PM
3232 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
3233 zSig0, zSig1, status);
158142c2
FB
3234
3235}
3236
158142c2
FB
3237/*----------------------------------------------------------------------------
3238| Returns the least-significant 64 fraction bits of the quadruple-precision
3239| floating-point value `a'.
3240*----------------------------------------------------------------------------*/
3241
a49db98d 3242static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
3243{
3244
3245 return a.low;
3246
3247}
3248
3249/*----------------------------------------------------------------------------
3250| Returns the most-significant 48 fraction bits of the quadruple-precision
3251| floating-point value `a'.
3252*----------------------------------------------------------------------------*/
3253
a49db98d 3254static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
3255{
3256
3257 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
3258
3259}
3260
3261/*----------------------------------------------------------------------------
3262| Returns the exponent bits of the quadruple-precision floating-point value
3263| `a'.
3264*----------------------------------------------------------------------------*/
3265
f4014512 3266static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
3267{
3268
3269 return ( a.high>>48 ) & 0x7FFF;
3270
3271}
3272
3273/*----------------------------------------------------------------------------
3274| Returns the sign bit of the quadruple-precision floating-point value `a'.
3275*----------------------------------------------------------------------------*/
3276
a49db98d 3277static inline flag extractFloat128Sign( float128 a )
158142c2
FB
3278{
3279
3280 return a.high>>63;
3281
3282}
3283
3284/*----------------------------------------------------------------------------
3285| Normalizes the subnormal quadruple-precision floating-point value
3286| represented by the denormalized significand formed by the concatenation of
3287| `aSig0' and `aSig1'. The normalized exponent is stored at the location
3288| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
3289| significand are stored at the location pointed to by `zSig0Ptr', and the
3290| least significant 64 bits of the normalized significand are stored at the
3291| location pointed to by `zSig1Ptr'.
3292*----------------------------------------------------------------------------*/
3293
3294static void
3295 normalizeFloat128Subnormal(
bb98fe42
AF
3296 uint64_t aSig0,
3297 uint64_t aSig1,
f4014512 3298 int32_t *zExpPtr,
bb98fe42
AF
3299 uint64_t *zSig0Ptr,
3300 uint64_t *zSig1Ptr
158142c2
FB
3301 )
3302{
8f506c70 3303 int8_t shiftCount;
158142c2
FB
3304
3305 if ( aSig0 == 0 ) {
3306 shiftCount = countLeadingZeros64( aSig1 ) - 15;
3307 if ( shiftCount < 0 ) {
3308 *zSig0Ptr = aSig1>>( - shiftCount );
3309 *zSig1Ptr = aSig1<<( shiftCount & 63 );
3310 }
3311 else {
3312 *zSig0Ptr = aSig1<<shiftCount;
3313 *zSig1Ptr = 0;
3314 }
3315 *zExpPtr = - shiftCount - 63;
3316 }
3317 else {
3318 shiftCount = countLeadingZeros64( aSig0 ) - 15;
3319 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
3320 *zExpPtr = 1 - shiftCount;
3321 }
3322
3323}
3324
3325/*----------------------------------------------------------------------------
3326| Packs the sign `zSign', the exponent `zExp', and the significand formed
3327| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
3328| floating-point value, returning the result. After being shifted into the
3329| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
3330| added together to form the most significant 32 bits of the result. This
3331| means that any integer portion of `zSig0' will be added into the exponent.
3332| Since a properly normalized significand will have an integer portion equal
3333| to 1, the `zExp' input should be 1 less than the desired result exponent
3334| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
3335| significand.
3336*----------------------------------------------------------------------------*/
3337
a49db98d 3338static inline float128
f4014512 3339 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
158142c2
FB
3340{
3341 float128 z;
3342
3343 z.low = zSig1;
bb98fe42 3344 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
158142c2
FB
3345 return z;
3346
3347}
3348
3349/*----------------------------------------------------------------------------
3350| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3351| and extended significand formed by the concatenation of `zSig0', `zSig1',
3352| and `zSig2', and returns the proper quadruple-precision floating-point value
3353| corresponding to the abstract input. Ordinarily, the abstract value is
3354| simply rounded and packed into the quadruple-precision format, with the
3355| inexact exception raised if the abstract input cannot be represented
3356| exactly. However, if the abstract value is too large, the overflow and
3357| inexact exceptions are raised and an infinity or maximal finite value is
3358| returned. If the abstract value is too small, the input value is rounded to
3359| a subnormal number, and the underflow and inexact exceptions are raised if
3360| the abstract input cannot be represented exactly as a subnormal quadruple-
3361| precision floating-point number.
3362| The input significand must be normalized or smaller. If the input
3363| significand is not normalized, `zExp' must be 0; in that case, the result
3364| returned is a subnormal number, and it must not require rounding. In the
3365| usual case that the input significand is normalized, `zExp' must be 1 less
3366| than the ``true'' floating-point exponent. The handling of underflow and
3367| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3368*----------------------------------------------------------------------------*/
3369
f4014512 3370static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
3371 uint64_t zSig0, uint64_t zSig1,
3372 uint64_t zSig2, float_status *status)
158142c2 3373{
8f506c70 3374 int8_t roundingMode;
158142c2
FB
3375 flag roundNearestEven, increment, isTiny;
3376
a2f2d288 3377 roundingMode = status->float_rounding_mode;
158142c2 3378 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3379 switch (roundingMode) {
3380 case float_round_nearest_even:
f9288a76 3381 case float_round_ties_away:
dc355b76
PM
3382 increment = ((int64_t)zSig2 < 0);
3383 break;
3384 case float_round_to_zero:
3385 increment = 0;
3386 break;
3387 case float_round_up:
3388 increment = !zSign && zSig2;
3389 break;
3390 case float_round_down:
3391 increment = zSign && zSig2;
3392 break;
9ee6f678
BR
3393 case float_round_to_odd:
3394 increment = !(zSig1 & 0x1) && zSig2;
3395 break;
dc355b76
PM
3396 default:
3397 abort();
158142c2 3398 }
bb98fe42 3399 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
3400 if ( ( 0x7FFD < zExp )
3401 || ( ( zExp == 0x7FFD )
3402 && eq128(
3403 LIT64( 0x0001FFFFFFFFFFFF ),
3404 LIT64( 0xFFFFFFFFFFFFFFFF ),
3405 zSig0,
3406 zSig1
3407 )
3408 && increment
3409 )
3410 ) {
ff32e16e 3411 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
3412 if ( ( roundingMode == float_round_to_zero )
3413 || ( zSign && ( roundingMode == float_round_up ) )
3414 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 3415 || (roundingMode == float_round_to_odd)
158142c2
FB
3416 ) {
3417 return
3418 packFloat128(
3419 zSign,
3420 0x7FFE,
3421 LIT64( 0x0000FFFFFFFFFFFF ),
3422 LIT64( 0xFFFFFFFFFFFFFFFF )
3423 );
3424 }
3425 return packFloat128( zSign, 0x7FFF, 0, 0 );
3426 }
3427 if ( zExp < 0 ) {
a2f2d288 3428 if (status->flush_to_zero) {
ff32e16e 3429 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3430 return packFloat128(zSign, 0, 0, 0);
3431 }
158142c2 3432 isTiny =
a2f2d288
PM
3433 (status->float_detect_tininess
3434 == float_tininess_before_rounding)
158142c2
FB
3435 || ( zExp < -1 )
3436 || ! increment
3437 || lt128(
3438 zSig0,
3439 zSig1,
3440 LIT64( 0x0001FFFFFFFFFFFF ),
3441 LIT64( 0xFFFFFFFFFFFFFFFF )
3442 );
3443 shift128ExtraRightJamming(
3444 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
3445 zExp = 0;
ff32e16e
PM
3446 if (isTiny && zSig2) {
3447 float_raise(float_flag_underflow, status);
3448 }
dc355b76
PM
3449 switch (roundingMode) {
3450 case float_round_nearest_even:
f9288a76 3451 case float_round_ties_away:
dc355b76
PM
3452 increment = ((int64_t)zSig2 < 0);
3453 break;
3454 case float_round_to_zero:
3455 increment = 0;
3456 break;
3457 case float_round_up:
3458 increment = !zSign && zSig2;
3459 break;
3460 case float_round_down:
3461 increment = zSign && zSig2;
3462 break;
9ee6f678
BR
3463 case float_round_to_odd:
3464 increment = !(zSig1 & 0x1) && zSig2;
3465 break;
dc355b76
PM
3466 default:
3467 abort();
158142c2
FB
3468 }
3469 }
3470 }
a2f2d288
PM
3471 if (zSig2) {
3472 status->float_exception_flags |= float_flag_inexact;
3473 }
158142c2
FB
3474 if ( increment ) {
3475 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
3476 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
3477 }
3478 else {
3479 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
3480 }
3481 return packFloat128( zSign, zExp, zSig0, zSig1 );
3482
3483}
3484
3485/*----------------------------------------------------------------------------
3486| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3487| and significand formed by the concatenation of `zSig0' and `zSig1', and
3488| returns the proper quadruple-precision floating-point value corresponding
3489| to the abstract input. This routine is just like `roundAndPackFloat128'
3490| except that the input significand has fewer bits and does not have to be
3491| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
3492| point exponent.
3493*----------------------------------------------------------------------------*/
3494
f4014512 3495static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
3496 uint64_t zSig0, uint64_t zSig1,
3497 float_status *status)
158142c2 3498{
8f506c70 3499 int8_t shiftCount;
bb98fe42 3500 uint64_t zSig2;
158142c2
FB
3501
3502 if ( zSig0 == 0 ) {
3503 zSig0 = zSig1;
3504 zSig1 = 0;
3505 zExp -= 64;
3506 }
3507 shiftCount = countLeadingZeros64( zSig0 ) - 15;
3508 if ( 0 <= shiftCount ) {
3509 zSig2 = 0;
3510 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3511 }
3512 else {
3513 shift128ExtraRightJamming(
3514 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
3515 }
3516 zExp -= shiftCount;
ff32e16e 3517 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
3518
3519}
3520
158142c2 3521
158142c2
FB
3522/*----------------------------------------------------------------------------
3523| Returns the result of converting the 32-bit two's complement integer `a'
3524| to the extended double-precision floating-point format. The conversion
3525| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3526| Arithmetic.
3527*----------------------------------------------------------------------------*/
3528
e5a41ffa 3529floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2
FB
3530{
3531 flag zSign;
3a87d009 3532 uint32_t absA;
8f506c70 3533 int8_t shiftCount;
bb98fe42 3534 uint64_t zSig;
158142c2
FB
3535
3536 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3537 zSign = ( a < 0 );
3538 absA = zSign ? - a : a;
3539 shiftCount = countLeadingZeros32( absA ) + 32;
3540 zSig = absA;
3541 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
3542
3543}
3544
158142c2
FB
3545/*----------------------------------------------------------------------------
3546| Returns the result of converting the 32-bit two's complement integer `a' to
3547| the quadruple-precision floating-point format. The conversion is performed
3548| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3549*----------------------------------------------------------------------------*/
3550
e5a41ffa 3551float128 int32_to_float128(int32_t a, float_status *status)
158142c2
FB
3552{
3553 flag zSign;
3a87d009 3554 uint32_t absA;
8f506c70 3555 int8_t shiftCount;
bb98fe42 3556 uint64_t zSig0;
158142c2
FB
3557
3558 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3559 zSign = ( a < 0 );
3560 absA = zSign ? - a : a;
3561 shiftCount = countLeadingZeros32( absA ) + 17;
3562 zSig0 = absA;
3563 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
3564
3565}
3566
158142c2
FB
3567/*----------------------------------------------------------------------------
3568| Returns the result of converting the 64-bit two's complement integer `a'
3569| to the extended double-precision floating-point format. The conversion
3570| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3571| Arithmetic.
3572*----------------------------------------------------------------------------*/
3573
e5a41ffa 3574floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2
FB
3575{
3576 flag zSign;
182f42fd 3577 uint64_t absA;
8f506c70 3578 int8_t shiftCount;
158142c2
FB
3579
3580 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3581 zSign = ( a < 0 );
3582 absA = zSign ? - a : a;
3583 shiftCount = countLeadingZeros64( absA );
3584 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
3585
3586}
3587
158142c2
FB
3588/*----------------------------------------------------------------------------
3589| Returns the result of converting the 64-bit two's complement integer `a' to
3590| the quadruple-precision floating-point format. The conversion is performed
3591| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3592*----------------------------------------------------------------------------*/
3593
e5a41ffa 3594float128 int64_to_float128(int64_t a, float_status *status)
158142c2
FB
3595{
3596 flag zSign;
182f42fd 3597 uint64_t absA;
8f506c70 3598 int8_t shiftCount;
f4014512 3599 int32_t zExp;
bb98fe42 3600 uint64_t zSig0, zSig1;
158142c2
FB
3601
3602 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3603 zSign = ( a < 0 );
3604 absA = zSign ? - a : a;
3605 shiftCount = countLeadingZeros64( absA ) + 49;
3606 zExp = 0x406E - shiftCount;
3607 if ( 64 <= shiftCount ) {
3608 zSig1 = 0;
3609 zSig0 = absA;
3610 shiftCount -= 64;
3611 }
3612 else {
3613 zSig1 = absA;
3614 zSig0 = 0;
3615 }
3616 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3617 return packFloat128( zSign, zExp, zSig0, zSig1 );
3618
3619}
3620
6bb8e0f1
PM
3621/*----------------------------------------------------------------------------
3622| Returns the result of converting the 64-bit unsigned integer `a'
3623| to the quadruple-precision floating-point format. The conversion is performed
3624| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3625*----------------------------------------------------------------------------*/
3626
e5a41ffa 3627float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
3628{
3629 if (a == 0) {
3630 return float128_zero;
3631 }
6603d506 3632 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
3633}
3634
158142c2
FB
3635/*----------------------------------------------------------------------------
3636| Returns the result of converting the single-precision floating-point value
3637| `a' to the extended double-precision floating-point format. The conversion
3638| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3639| Arithmetic.
3640*----------------------------------------------------------------------------*/
3641
e5a41ffa 3642floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2
FB
3643{
3644 flag aSign;
0c48262d 3645 int aExp;
bb98fe42 3646 uint32_t aSig;
158142c2 3647
ff32e16e 3648 a = float32_squash_input_denormal(a, status);
158142c2
FB
3649 aSig = extractFloat32Frac( a );
3650 aExp = extractFloat32Exp( a );
3651 aSign = extractFloat32Sign( a );
3652 if ( aExp == 0xFF ) {
ff32e16e
PM
3653 if (aSig) {
3654 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
3655 }
0f605c88
LV
3656 return packFloatx80(aSign,
3657 floatx80_infinity_high,
3658 floatx80_infinity_low);
158142c2
FB
3659 }
3660 if ( aExp == 0 ) {
3661 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3662 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3663 }
3664 aSig |= 0x00800000;
bb98fe42 3665 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
3666
3667}
3668
158142c2
FB
3669/*----------------------------------------------------------------------------
3670| Returns the result of converting the single-precision floating-point value
3671| `a' to the double-precision floating-point format. The conversion is
3672| performed according to the IEC/IEEE Standard for Binary Floating-Point
3673| Arithmetic.
3674*----------------------------------------------------------------------------*/
3675
e5a41ffa 3676float128 float32_to_float128(float32 a, float_status *status)
158142c2
FB
3677{
3678 flag aSign;
0c48262d 3679 int aExp;
bb98fe42 3680 uint32_t aSig;
158142c2 3681
ff32e16e 3682 a = float32_squash_input_denormal(a, status);
158142c2
FB
3683 aSig = extractFloat32Frac( a );
3684 aExp = extractFloat32Exp( a );
3685 aSign = extractFloat32Sign( a );
3686 if ( aExp == 0xFF ) {
ff32e16e
PM
3687 if (aSig) {
3688 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
3689 }
158142c2
FB
3690 return packFloat128( aSign, 0x7FFF, 0, 0 );
3691 }
3692 if ( aExp == 0 ) {
3693 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3694 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3695 --aExp;
3696 }
bb98fe42 3697 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
3698
3699}
3700
158142c2
FB
3701/*----------------------------------------------------------------------------
3702| Returns the remainder of the single-precision floating-point value `a'
3703| with respect to the corresponding value `b'. The operation is performed
3704| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3705*----------------------------------------------------------------------------*/
3706
e5a41ffa 3707float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 3708{
ed086f3d 3709 flag aSign, zSign;
0c48262d 3710 int aExp, bExp, expDiff;
bb98fe42
AF
3711 uint32_t aSig, bSig;
3712 uint32_t q;
3713 uint64_t aSig64, bSig64, q64;
3714 uint32_t alternateASig;
3715 int32_t sigMean;
ff32e16e
PM
3716 a = float32_squash_input_denormal(a, status);
3717 b = float32_squash_input_denormal(b, status);
158142c2
FB
3718
3719 aSig = extractFloat32Frac( a );
3720 aExp = extractFloat32Exp( a );
3721 aSign = extractFloat32Sign( a );
3722 bSig = extractFloat32Frac( b );
3723 bExp = extractFloat32Exp( b );
158142c2
FB
3724 if ( aExp == 0xFF ) {
3725 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 3726 return propagateFloat32NaN(a, b, status);
158142c2 3727 }
ff32e16e 3728 float_raise(float_flag_invalid, status);
af39bc8c 3729 return float32_default_nan(status);
158142c2
FB
3730 }
3731 if ( bExp == 0xFF ) {
ff32e16e
PM
3732 if (bSig) {
3733 return propagateFloat32NaN(a, b, status);
3734 }
158142c2
FB
3735 return a;
3736 }
3737 if ( bExp == 0 ) {
3738 if ( bSig == 0 ) {
ff32e16e 3739 float_raise(float_flag_invalid, status);
af39bc8c 3740 return float32_default_nan(status);
158142c2
FB
3741 }
3742 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
3743 }
3744 if ( aExp == 0 ) {
3745 if ( aSig == 0 ) return a;
3746 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3747 }
3748 expDiff = aExp - bExp;
3749 aSig |= 0x00800000;
3750 bSig |= 0x00800000;
3751 if ( expDiff < 32 ) {
3752 aSig <<= 8;
3753 bSig <<= 8;
3754 if ( expDiff < 0 ) {
3755 if ( expDiff < -1 ) return a;
3756 aSig >>= 1;
3757 }
3758 q = ( bSig <= aSig );
3759 if ( q ) aSig -= bSig;
3760 if ( 0 < expDiff ) {
bb98fe42 3761 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
3762 q >>= 32 - expDiff;
3763 bSig >>= 2;
3764 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3765 }
3766 else {
3767 aSig >>= 2;
3768 bSig >>= 2;
3769 }
3770 }
3771 else {
3772 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
3773 aSig64 = ( (uint64_t) aSig )<<40;
3774 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
3775 expDiff -= 64;
3776 while ( 0 < expDiff ) {
3777 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3778 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3779 aSig64 = - ( ( bSig * q64 )<<38 );
3780 expDiff -= 62;
3781 }
3782 expDiff += 64;
3783 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3784 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3785 q = q64>>( 64 - expDiff );
3786 bSig <<= 6;
3787 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
3788 }
3789 do {
3790 alternateASig = aSig;
3791 ++q;
3792 aSig -= bSig;
bb98fe42 3793 } while ( 0 <= (int32_t) aSig );
158142c2
FB
3794 sigMean = aSig + alternateASig;
3795 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3796 aSig = alternateASig;
3797 }
bb98fe42 3798 zSign = ( (int32_t) aSig < 0 );
158142c2 3799 if ( zSign ) aSig = - aSig;
ff32e16e 3800 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
3801}
3802
369be8f6 3803
158142c2 3804
8229c991
AJ
3805/*----------------------------------------------------------------------------
3806| Returns the binary exponential of the single-precision floating-point value
3807| `a'. The operation is performed according to the IEC/IEEE Standard for
3808| Binary Floating-Point Arithmetic.
3809|
3810| Uses the following identities:
3811|
3812| 1. -------------------------------------------------------------------------
3813| x x*ln(2)
3814| 2 = e
3815|
3816| 2. -------------------------------------------------------------------------
3817| 2 3 4 5 n
3818| x x x x x x x
3819| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
3820| 1! 2! 3! 4! 5! n!
3821*----------------------------------------------------------------------------*/
3822
3823static const float64 float32_exp2_coefficients[15] =
3824{
d5138cf4
PM
3825 const_float64( 0x3ff0000000000000ll ), /* 1 */
3826 const_float64( 0x3fe0000000000000ll ), /* 2 */
3827 const_float64( 0x3fc5555555555555ll ), /* 3 */
3828 const_float64( 0x3fa5555555555555ll ), /* 4 */
3829 const_float64( 0x3f81111111111111ll ), /* 5 */
3830 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
3831 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
3832 const_float64( 0x3efa01a01a01a01all ), /* 8 */
3833 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
3834 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
3835 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
3836 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
3837 const_float64( 0x3de6124613a86d09ll ), /* 13 */
3838 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
3839 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
3840};
3841
e5a41ffa 3842float32 float32_exp2(float32 a, float_status *status)
8229c991
AJ
3843{
3844 flag aSign;
0c48262d 3845 int aExp;
bb98fe42 3846 uint32_t aSig;
8229c991
AJ
3847 float64 r, x, xn;
3848 int i;
ff32e16e 3849 a = float32_squash_input_denormal(a, status);
8229c991
AJ
3850
3851 aSig = extractFloat32Frac( a );
3852 aExp = extractFloat32Exp( a );
3853 aSign = extractFloat32Sign( a );
3854
3855 if ( aExp == 0xFF) {
ff32e16e
PM
3856 if (aSig) {
3857 return propagateFloat32NaN(a, float32_zero, status);
3858 }
8229c991
AJ
3859 return (aSign) ? float32_zero : a;
3860 }
3861 if (aExp == 0) {
3862 if (aSig == 0) return float32_one;
3863 }
3864
ff32e16e 3865 float_raise(float_flag_inexact, status);
8229c991
AJ
3866
3867 /* ******************************* */
3868 /* using float64 for approximation */
3869 /* ******************************* */
ff32e16e
PM
3870 x = float32_to_float64(a, status);
3871 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
3872
3873 xn = x;
3874 r = float64_one;
3875 for (i = 0 ; i < 15 ; i++) {
3876 float64 f;
3877
ff32e16e
PM
3878 f = float64_mul(xn, float32_exp2_coefficients[i], status);
3879 r = float64_add(r, f, status);
8229c991 3880
ff32e16e 3881 xn = float64_mul(xn, x, status);
8229c991
AJ
3882 }
3883
3884 return float64_to_float32(r, status);
3885}
3886
374dfc33
AJ
3887/*----------------------------------------------------------------------------
3888| Returns the binary log of the single-precision floating-point value `a'.
3889| The operation is performed according to the IEC/IEEE Standard for Binary
3890| Floating-Point Arithmetic.
3891*----------------------------------------------------------------------------*/
e5a41ffa 3892float32 float32_log2(float32 a, float_status *status)
374dfc33
AJ
3893{
3894 flag aSign, zSign;
0c48262d 3895 int aExp;
bb98fe42 3896 uint32_t aSig, zSig, i;
374dfc33 3897
ff32e16e 3898 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
3899 aSig = extractFloat32Frac( a );
3900 aExp = extractFloat32Exp( a );
3901 aSign = extractFloat32Sign( a );
3902
3903 if ( aExp == 0 ) {
3904 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
3905 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3906 }
3907 if ( aSign ) {
ff32e16e 3908 float_raise(float_flag_invalid, status);
af39bc8c 3909 return float32_default_nan(status);
374dfc33
AJ
3910 }
3911 if ( aExp == 0xFF ) {
ff32e16e
PM
3912 if (aSig) {
3913 return propagateFloat32NaN(a, float32_zero, status);
3914 }
374dfc33
AJ
3915 return a;
3916 }
3917
3918 aExp -= 0x7F;
3919 aSig |= 0x00800000;
3920 zSign = aExp < 0;
3921 zSig = aExp << 23;
3922
3923 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 3924 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
3925 if ( aSig & 0x01000000 ) {
3926 aSig >>= 1;
3927 zSig |= i;
3928 }
3929 }
3930
3931 if ( zSign )
3932 zSig = -zSig;
3933
ff32e16e 3934 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
3935}
3936
158142c2
FB
3937/*----------------------------------------------------------------------------
3938| Returns 1 if the single-precision floating-point value `a' is equal to
b689362d
AJ
3939| the corresponding value `b', and 0 otherwise. The invalid exception is
3940| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
3941| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3942*----------------------------------------------------------------------------*/
3943
e5a41ffa 3944int float32_eq(float32 a, float32 b, float_status *status)
158142c2 3945{
b689362d 3946 uint32_t av, bv;
ff32e16e
PM
3947 a = float32_squash_input_denormal(a, status);
3948 b = float32_squash_input_denormal(b, status);
158142c2
FB
3949
3950 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3951 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3952 ) {
ff32e16e 3953 float_raise(float_flag_invalid, status);
158142c2
FB
3954 return 0;
3955 }
b689362d
AJ
3956 av = float32_val(a);
3957 bv = float32_val(b);
3958 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
3959}
3960
3961/*----------------------------------------------------------------------------
3962| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
3963| or equal to the corresponding value `b', and 0 otherwise. The invalid
3964| exception is raised if either operand is a NaN. The comparison is performed
3965| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3966*----------------------------------------------------------------------------*/
3967
e5a41ffa 3968int float32_le(float32 a, float32 b, float_status *status)
158142c2
FB
3969{
3970 flag aSign, bSign;
bb98fe42 3971 uint32_t av, bv;
ff32e16e
PM
3972 a = float32_squash_input_denormal(a, status);
3973 b = float32_squash_input_denormal(b, status);
158142c2
FB
3974
3975 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3976 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3977 ) {
ff32e16e 3978 float_raise(float_flag_invalid, status);
158142c2
FB
3979 return 0;
3980 }
3981 aSign = extractFloat32Sign( a );
3982 bSign = extractFloat32Sign( b );
f090c9d4
PB
3983 av = float32_val(a);
3984 bv = float32_val(b);
bb98fe42 3985 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 3986 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
3987
3988}
3989
3990/*----------------------------------------------------------------------------
3991| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
3992| the corresponding value `b', and 0 otherwise. The invalid exception is
3993| raised if either operand is a NaN. The comparison is performed according
3994| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3995*----------------------------------------------------------------------------*/
3996
e5a41ffa 3997int float32_lt(float32 a, float32 b, float_status *status)
158142c2
FB
3998{
3999 flag aSign, bSign;
bb98fe42 4000 uint32_t av, bv;
ff32e16e
PM
4001 a = float32_squash_input_denormal(a, status);
4002 b = float32_squash_input_denormal(b, status);
158142c2
FB
4003
4004 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4005 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4006 ) {
ff32e16e 4007 float_raise(float_flag_invalid, status);
158142c2
FB
4008 return 0;
4009 }
4010 aSign = extractFloat32Sign( a );
4011 bSign = extractFloat32Sign( b );
f090c9d4
PB
4012 av = float32_val(a);
4013 bv = float32_val(b);
bb98fe42 4014 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4015 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4016
4017}
4018
67b7861d
AJ
4019/*----------------------------------------------------------------------------
4020| Returns 1 if the single-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4021| be compared, and 0 otherwise. The invalid exception is raised if either
4022| operand is a NaN. The comparison is performed according to the IEC/IEEE
4023| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4024*----------------------------------------------------------------------------*/
4025
e5a41ffa 4026int float32_unordered(float32 a, float32 b, float_status *status)
67b7861d 4027{
ff32e16e
PM
4028 a = float32_squash_input_denormal(a, status);
4029 b = float32_squash_input_denormal(b, status);
67b7861d
AJ
4030
4031 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4032 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4033 ) {
ff32e16e 4034 float_raise(float_flag_invalid, status);
67b7861d
AJ
4035 return 1;
4036 }
4037 return 0;
4038}
b689362d 4039
158142c2
FB
4040/*----------------------------------------------------------------------------
4041| Returns 1 if the single-precision floating-point value `a' is equal to
f5a64251
AJ
4042| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4043| exception. The comparison is performed according to the IEC/IEEE Standard
4044| for Binary Floating-Point Arithmetic.
158142c2
FB
4045*----------------------------------------------------------------------------*/
4046
e5a41ffa 4047int float32_eq_quiet(float32 a, float32 b, float_status *status)
158142c2 4048{
ff32e16e
PM
4049 a = float32_squash_input_denormal(a, status);
4050 b = float32_squash_input_denormal(b, status);
158142c2
FB
4051
4052 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4053 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4054 ) {
af39bc8c
AM
4055 if (float32_is_signaling_nan(a, status)
4056 || float32_is_signaling_nan(b, status)) {
ff32e16e 4057 float_raise(float_flag_invalid, status);
b689362d 4058 }
158142c2
FB
4059 return 0;
4060 }
b689362d
AJ
4061 return ( float32_val(a) == float32_val(b) ) ||
4062 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
158142c2
FB
4063}
4064
4065/*----------------------------------------------------------------------------
4066| Returns 1 if the single-precision floating-point value `a' is less than or
4067| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4068| cause an exception. Otherwise, the comparison is performed according to the
4069| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4070*----------------------------------------------------------------------------*/
4071
e5a41ffa 4072int float32_le_quiet(float32 a, float32 b, float_status *status)
158142c2
FB
4073{
4074 flag aSign, bSign;
bb98fe42 4075 uint32_t av, bv;
ff32e16e
PM
4076 a = float32_squash_input_denormal(a, status);
4077 b = float32_squash_input_denormal(b, status);
158142c2
FB
4078
4079 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4080 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4081 ) {
af39bc8c
AM
4082 if (float32_is_signaling_nan(a, status)
4083 || float32_is_signaling_nan(b, status)) {
ff32e16e 4084 float_raise(float_flag_invalid, status);
158142c2
FB
4085 }
4086 return 0;
4087 }
4088 aSign = extractFloat32Sign( a );
4089 bSign = extractFloat32Sign( b );
f090c9d4
PB
4090 av = float32_val(a);
4091 bv = float32_val(b);
bb98fe42 4092 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4093 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4094
4095}
4096
4097/*----------------------------------------------------------------------------
4098| Returns 1 if the single-precision floating-point value `a' is less than
4099| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4100| exception. Otherwise, the comparison is performed according to the IEC/IEEE
ab52f973 4101| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4102*----------------------------------------------------------------------------*/
4103
ab52f973 4104int float32_lt_quiet(float32 a, float32 b, float_status *status)
158142c2 4105{
ab52f973
AB
4106 flag aSign, bSign;
4107 uint32_t av, bv;
4108 a = float32_squash_input_denormal(a, status);
4109 b = float32_squash_input_denormal(b, status);
158142c2 4110
ab52f973
AB
4111 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4112 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4113 ) {
4114 if (float32_is_signaling_nan(a, status)
4115 || float32_is_signaling_nan(b, status)) {
ff32e16e 4116 float_raise(float_flag_invalid, status);
158142c2 4117 }
ab52f973 4118 return 0;
158142c2 4119 }
ab52f973
AB
4120 aSign = extractFloat32Sign( a );
4121 bSign = extractFloat32Sign( b );
4122 av = float32_val(a);
4123 bv = float32_val(b);
4124 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
4125 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4126
4127}
4128
4129/*----------------------------------------------------------------------------
ab52f973
AB
4130| Returns 1 if the single-precision floating-point values `a' and `b' cannot
4131| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4132| comparison is performed according to the IEC/IEEE Standard for Binary
4133| Floating-Point Arithmetic.
158142c2
FB
4134*----------------------------------------------------------------------------*/
4135
ab52f973 4136int float32_unordered_quiet(float32 a, float32 b, float_status *status)
158142c2 4137{
ab52f973
AB
4138 a = float32_squash_input_denormal(a, status);
4139 b = float32_squash_input_denormal(b, status);
158142c2 4140
ab52f973
AB
4141 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4142 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4143 ) {
4144 if (float32_is_signaling_nan(a, status)
4145 || float32_is_signaling_nan(b, status)) {
4146 float_raise(float_flag_invalid, status);
158142c2 4147 }
ab52f973 4148 return 1;
158142c2 4149 }
ab52f973 4150 return 0;
158142c2
FB
4151}
4152
210cbd49
AB
4153/*----------------------------------------------------------------------------
4154| If `a' is denormal and we are in flush-to-zero mode then set the
4155| input-denormal exception and return zero. Otherwise just return the value.
4156*----------------------------------------------------------------------------*/
4157float16 float16_squash_input_denormal(float16 a, float_status *status)
4158{
4159 if (status->flush_inputs_to_zero) {
4160 if (extractFloat16Exp(a) == 0 && extractFloat16Frac(a) != 0) {
4161 float_raise(float_flag_input_denormal, status);
4162 return make_float16(float16_val(a) & 0x8000);
4163 }
4164 }
4165 return a;
4166}
4167
158142c2
FB
4168/*----------------------------------------------------------------------------
4169| Returns the result of converting the double-precision floating-point value
4170| `a' to the extended double-precision floating-point format. The conversion
4171| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4172| Arithmetic.
4173*----------------------------------------------------------------------------*/
4174
e5a41ffa 4175floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2
FB
4176{
4177 flag aSign;
0c48262d 4178 int aExp;
bb98fe42 4179 uint64_t aSig;
158142c2 4180
ff32e16e 4181 a = float64_squash_input_denormal(a, status);
158142c2
FB
4182 aSig = extractFloat64Frac( a );
4183 aExp = extractFloat64Exp( a );
4184 aSign = extractFloat64Sign( a );
4185 if ( aExp == 0x7FF ) {
ff32e16e
PM
4186 if (aSig) {
4187 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
4188 }
0f605c88
LV
4189 return packFloatx80(aSign,
4190 floatx80_infinity_high,
4191 floatx80_infinity_low);
158142c2
FB
4192 }
4193 if ( aExp == 0 ) {
4194 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4195 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4196 }
4197 return
4198 packFloatx80(
4199 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
4200
4201}
4202
158142c2
FB
4203/*----------------------------------------------------------------------------
4204| Returns the result of converting the double-precision floating-point value
4205| `a' to the quadruple-precision floating-point format. The conversion is
4206| performed according to the IEC/IEEE Standard for Binary Floating-Point
4207| Arithmetic.
4208*----------------------------------------------------------------------------*/
4209
e5a41ffa 4210float128 float64_to_float128(float64 a, float_status *status)
158142c2
FB
4211{
4212 flag aSign;
0c48262d 4213 int aExp;
bb98fe42 4214 uint64_t aSig, zSig0, zSig1;
158142c2 4215
ff32e16e 4216 a = float64_squash_input_denormal(a, status);
158142c2
FB
4217 aSig = extractFloat64Frac( a );
4218 aExp = extractFloat64Exp( a );
4219 aSign = extractFloat64Sign( a );
4220 if ( aExp == 0x7FF ) {
ff32e16e
PM
4221 if (aSig) {
4222 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
4223 }
158142c2
FB
4224 return packFloat128( aSign, 0x7FFF, 0, 0 );
4225 }
4226 if ( aExp == 0 ) {
4227 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
4228 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4229 --aExp;
4230 }
4231 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
4232 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
4233
4234}
4235
158142c2
FB
4236
4237/*----------------------------------------------------------------------------
4238| Returns the remainder of the double-precision floating-point value `a'
4239| with respect to the corresponding value `b'. The operation is performed
4240| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4241*----------------------------------------------------------------------------*/
4242
e5a41ffa 4243float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 4244{
ed086f3d 4245 flag aSign, zSign;
0c48262d 4246 int aExp, bExp, expDiff;
bb98fe42
AF
4247 uint64_t aSig, bSig;
4248 uint64_t q, alternateASig;
4249 int64_t sigMean;
158142c2 4250
ff32e16e
PM
4251 a = float64_squash_input_denormal(a, status);
4252 b = float64_squash_input_denormal(b, status);
158142c2
FB
4253 aSig = extractFloat64Frac( a );
4254 aExp = extractFloat64Exp( a );
4255 aSign = extractFloat64Sign( a );
4256 bSig = extractFloat64Frac( b );
4257 bExp = extractFloat64Exp( b );
158142c2
FB
4258 if ( aExp == 0x7FF ) {
4259 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 4260 return propagateFloat64NaN(a, b, status);
158142c2 4261 }
ff32e16e 4262 float_raise(float_flag_invalid, status);
af39bc8c 4263 return float64_default_nan(status);
158142c2
FB
4264 }
4265 if ( bExp == 0x7FF ) {
ff32e16e
PM
4266 if (bSig) {
4267 return propagateFloat64NaN(a, b, status);
4268 }
158142c2
FB
4269 return a;
4270 }
4271 if ( bExp == 0 ) {
4272 if ( bSig == 0 ) {
ff32e16e 4273 float_raise(float_flag_invalid, status);
af39bc8c 4274 return float64_default_nan(status);
158142c2
FB
4275 }
4276 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4277 }
4278 if ( aExp == 0 ) {
4279 if ( aSig == 0 ) return a;
4280 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4281 }
4282 expDiff = aExp - bExp;
4283 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
4284 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4285 if ( expDiff < 0 ) {
4286 if ( expDiff < -1 ) return a;
4287 aSig >>= 1;
4288 }
4289 q = ( bSig <= aSig );
4290 if ( q ) aSig -= bSig;
4291 expDiff -= 64;
4292 while ( 0 < expDiff ) {
4293 q = estimateDiv128To64( aSig, 0, bSig );
4294 q = ( 2 < q ) ? q - 2 : 0;
4295 aSig = - ( ( bSig>>2 ) * q );
4296 expDiff -= 62;
4297 }
4298 expDiff += 64;
4299 if ( 0 < expDiff ) {
4300 q = estimateDiv128To64( aSig, 0, bSig );
4301 q = ( 2 < q ) ? q - 2 : 0;
4302 q >>= 64 - expDiff;
4303 bSig >>= 2;
4304 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4305 }
4306 else {
4307 aSig >>= 2;
4308 bSig >>= 2;
4309 }
4310 do {
4311 alternateASig = aSig;
4312 ++q;
4313 aSig -= bSig;
bb98fe42 4314 } while ( 0 <= (int64_t) aSig );
158142c2
FB
4315 sigMean = aSig + alternateASig;
4316 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4317 aSig = alternateASig;
4318 }
bb98fe42 4319 zSign = ( (int64_t) aSig < 0 );
158142c2 4320 if ( zSign ) aSig = - aSig;
ff32e16e 4321 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
4322
4323}
4324
374dfc33
AJ
4325/*----------------------------------------------------------------------------
4326| Returns the binary log of the double-precision floating-point value `a'.
4327| The operation is performed according to the IEC/IEEE Standard for Binary
4328| Floating-Point Arithmetic.
4329*----------------------------------------------------------------------------*/
e5a41ffa 4330float64 float64_log2(float64 a, float_status *status)
374dfc33
AJ
4331{
4332 flag aSign, zSign;
0c48262d 4333 int aExp;
bb98fe42 4334 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 4335 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
4336
4337 aSig = extractFloat64Frac( a );
4338 aExp = extractFloat64Exp( a );
4339 aSign = extractFloat64Sign( a );
4340
4341 if ( aExp == 0 ) {
4342 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
4343 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4344 }
4345 if ( aSign ) {
ff32e16e 4346 float_raise(float_flag_invalid, status);
af39bc8c 4347 return float64_default_nan(status);
374dfc33
AJ
4348 }
4349 if ( aExp == 0x7FF ) {
ff32e16e
PM
4350 if (aSig) {
4351 return propagateFloat64NaN(a, float64_zero, status);
4352 }
374dfc33
AJ
4353 return a;
4354 }
4355
4356 aExp -= 0x3FF;
4357 aSig |= LIT64( 0x0010000000000000 );
4358 zSign = aExp < 0;
bb98fe42 4359 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
4360 for (i = 1LL << 51; i > 0; i >>= 1) {
4361 mul64To128( aSig, aSig, &aSig0, &aSig1 );
4362 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
4363 if ( aSig & LIT64( 0x0020000000000000 ) ) {
4364 aSig >>= 1;
4365 zSig |= i;
4366 }
4367 }
4368
4369 if ( zSign )
4370 zSig = -zSig;
ff32e16e 4371 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
4372}
4373
158142c2
FB
4374/*----------------------------------------------------------------------------
4375| Returns 1 if the double-precision floating-point value `a' is equal to the
b689362d
AJ
4376| corresponding value `b', and 0 otherwise. The invalid exception is raised
4377| if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
4378| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4379*----------------------------------------------------------------------------*/
4380
e5a41ffa 4381int float64_eq(float64 a, float64 b, float_status *status)
158142c2 4382{
bb98fe42 4383 uint64_t av, bv;
ff32e16e
PM
4384 a = float64_squash_input_denormal(a, status);
4385 b = float64_squash_input_denormal(b, status);
158142c2
FB
4386
4387 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4388 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4389 ) {
ff32e16e 4390 float_raise(float_flag_invalid, status);
158142c2
FB
4391 return 0;
4392 }
f090c9d4 4393 av = float64_val(a);
a1b91bb4 4394 bv = float64_val(b);
bb98fe42 4395 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4396
4397}
4398
4399/*----------------------------------------------------------------------------
4400| Returns 1 if the double-precision floating-point value `a' is less than or
f5a64251
AJ
4401| equal to the corresponding value `b', and 0 otherwise. The invalid
4402| exception is raised if either operand is a NaN. The comparison is performed
4403| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4404*----------------------------------------------------------------------------*/
4405
e5a41ffa 4406int float64_le(float64 a, float64 b, float_status *status)
158142c2
FB
4407{
4408 flag aSign, bSign;
bb98fe42 4409 uint64_t av, bv;
ff32e16e
PM
4410 a = float64_squash_input_denormal(a, status);
4411 b = float64_squash_input_denormal(b, status);
158142c2
FB
4412
4413 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4414 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4415 ) {
ff32e16e 4416 float_raise(float_flag_invalid, status);
158142c2
FB
4417 return 0;
4418 }
4419 aSign = extractFloat64Sign( a );
4420 bSign = extractFloat64Sign( b );
f090c9d4 4421 av = float64_val(a);
a1b91bb4 4422 bv = float64_val(b);
bb98fe42 4423 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4424 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4425
4426}
4427
4428/*----------------------------------------------------------------------------
4429| Returns 1 if the double-precision floating-point value `a' is less than
f5a64251
AJ
4430| the corresponding value `b', and 0 otherwise. The invalid exception is
4431| raised if either operand is a NaN. The comparison is performed according
4432| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4433*----------------------------------------------------------------------------*/
4434
e5a41ffa 4435int float64_lt(float64 a, float64 b, float_status *status)
158142c2
FB
4436{
4437 flag aSign, bSign;
bb98fe42 4438 uint64_t av, bv;
158142c2 4439
ff32e16e
PM
4440 a = float64_squash_input_denormal(a, status);
4441 b = float64_squash_input_denormal(b, status);
158142c2
FB
4442 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4443 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4444 ) {
ff32e16e 4445 float_raise(float_flag_invalid, status);
158142c2
FB
4446 return 0;
4447 }
4448 aSign = extractFloat64Sign( a );
4449 bSign = extractFloat64Sign( b );
f090c9d4 4450 av = float64_val(a);
a1b91bb4 4451 bv = float64_val(b);
bb98fe42 4452 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4453 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4454
4455}
4456
67b7861d
AJ
4457/*----------------------------------------------------------------------------
4458| Returns 1 if the double-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4459| be compared, and 0 otherwise. The invalid exception is raised if either
4460| operand is a NaN. The comparison is performed according to the IEC/IEEE
4461| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4462*----------------------------------------------------------------------------*/
4463
e5a41ffa 4464int float64_unordered(float64 a, float64 b, float_status *status)
67b7861d 4465{
ff32e16e
PM
4466 a = float64_squash_input_denormal(a, status);
4467 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4468
4469 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4470 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4471 ) {
ff32e16e 4472 float_raise(float_flag_invalid, status);
67b7861d
AJ
4473 return 1;
4474 }
4475 return 0;
4476}
4477
158142c2
FB
4478/*----------------------------------------------------------------------------
4479| Returns 1 if the double-precision floating-point value `a' is equal to the
f5a64251
AJ
4480| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4481| exception.The comparison is performed according to the IEC/IEEE Standard
4482| for Binary Floating-Point Arithmetic.
158142c2
FB
4483*----------------------------------------------------------------------------*/
4484
e5a41ffa 4485int float64_eq_quiet(float64 a, float64 b, float_status *status)
158142c2 4486{
bb98fe42 4487 uint64_t av, bv;
ff32e16e
PM
4488 a = float64_squash_input_denormal(a, status);
4489 b = float64_squash_input_denormal(b, status);
158142c2
FB
4490
4491 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4492 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4493 ) {
af39bc8c
AM
4494 if (float64_is_signaling_nan(a, status)
4495 || float64_is_signaling_nan(b, status)) {
ff32e16e 4496 float_raise(float_flag_invalid, status);
b689362d 4497 }
158142c2
FB
4498 return 0;
4499 }
f090c9d4 4500 av = float64_val(a);
a1b91bb4 4501 bv = float64_val(b);
bb98fe42 4502 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4503
4504}
4505
4506/*----------------------------------------------------------------------------
4507| Returns 1 if the double-precision floating-point value `a' is less than or
4508| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4509| cause an exception. Otherwise, the comparison is performed according to the
4510| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4511*----------------------------------------------------------------------------*/
4512
e5a41ffa 4513int float64_le_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4514{
4515 flag aSign, bSign;
bb98fe42 4516 uint64_t av, bv;
ff32e16e
PM
4517 a = float64_squash_input_denormal(a, status);
4518 b = float64_squash_input_denormal(b, status);
158142c2
FB
4519
4520 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4521 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4522 ) {
af39bc8c
AM
4523 if (float64_is_signaling_nan(a, status)
4524 || float64_is_signaling_nan(b, status)) {
ff32e16e 4525 float_raise(float_flag_invalid, status);
158142c2
FB
4526 }
4527 return 0;
4528 }
4529 aSign = extractFloat64Sign( a );
4530 bSign = extractFloat64Sign( b );
f090c9d4 4531 av = float64_val(a);
a1b91bb4 4532 bv = float64_val(b);
bb98fe42 4533 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4534 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4535
4536}
4537
4538/*----------------------------------------------------------------------------
4539| Returns 1 if the double-precision floating-point value `a' is less than
4540| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4541| exception. Otherwise, the comparison is performed according to the IEC/IEEE
4542| Standard for Binary Floating-Point Arithmetic.
4543*----------------------------------------------------------------------------*/
4544
e5a41ffa 4545int float64_lt_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4546{
4547 flag aSign, bSign;
bb98fe42 4548 uint64_t av, bv;
ff32e16e
PM
4549 a = float64_squash_input_denormal(a, status);
4550 b = float64_squash_input_denormal(b, status);
158142c2
FB
4551
4552 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4553 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4554 ) {
af39bc8c
AM
4555 if (float64_is_signaling_nan(a, status)
4556 || float64_is_signaling_nan(b, status)) {
ff32e16e 4557 float_raise(float_flag_invalid, status);
158142c2
FB
4558 }
4559 return 0;
4560 }
4561 aSign = extractFloat64Sign( a );
4562 bSign = extractFloat64Sign( b );
f090c9d4 4563 av = float64_val(a);
a1b91bb4 4564 bv = float64_val(b);
bb98fe42 4565 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4566 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4567
4568}
4569
67b7861d
AJ
4570/*----------------------------------------------------------------------------
4571| Returns 1 if the double-precision floating-point values `a' and `b' cannot
4572| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4573| comparison is performed according to the IEC/IEEE Standard for Binary
4574| Floating-Point Arithmetic.
4575*----------------------------------------------------------------------------*/
4576
e5a41ffa 4577int float64_unordered_quiet(float64 a, float64 b, float_status *status)
67b7861d 4578{
ff32e16e
PM
4579 a = float64_squash_input_denormal(a, status);
4580 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4581
4582 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4583 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4584 ) {
af39bc8c
AM
4585 if (float64_is_signaling_nan(a, status)
4586 || float64_is_signaling_nan(b, status)) {
ff32e16e 4587 float_raise(float_flag_invalid, status);
67b7861d
AJ
4588 }
4589 return 1;
4590 }
4591 return 0;
4592}
4593
158142c2
FB
4594/*----------------------------------------------------------------------------
4595| Returns the result of converting the extended double-precision floating-
4596| point value `a' to the 32-bit two's complement integer format. The
4597| conversion is performed according to the IEC/IEEE Standard for Binary
4598| Floating-Point Arithmetic---which means in particular that the conversion
4599| is rounded according to the current rounding mode. If `a' is a NaN, the
4600| largest positive integer is returned. Otherwise, if the conversion
4601| overflows, the largest integer with the same sign as `a' is returned.
4602*----------------------------------------------------------------------------*/
4603
f4014512 4604int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2
FB
4605{
4606 flag aSign;
f4014512 4607 int32_t aExp, shiftCount;
bb98fe42 4608 uint64_t aSig;
158142c2 4609
d1eb8f2a
AD
4610 if (floatx80_invalid_encoding(a)) {
4611 float_raise(float_flag_invalid, status);
4612 return 1 << 31;
4613 }
158142c2
FB
4614 aSig = extractFloatx80Frac( a );
4615 aExp = extractFloatx80Exp( a );
4616 aSign = extractFloatx80Sign( a );
bb98fe42 4617 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4618 shiftCount = 0x4037 - aExp;
4619 if ( shiftCount <= 0 ) shiftCount = 1;
4620 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 4621 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
4622
4623}
4624
4625/*----------------------------------------------------------------------------
4626| Returns the result of converting the extended double-precision floating-
4627| point value `a' to the 32-bit two's complement integer format. The
4628| conversion is performed according to the IEC/IEEE Standard for Binary
4629| Floating-Point Arithmetic, except that the conversion is always rounded
4630| toward zero. If `a' is a NaN, the largest positive integer is returned.
4631| Otherwise, if the conversion overflows, the largest integer with the same
4632| sign as `a' is returned.
4633*----------------------------------------------------------------------------*/
4634
f4014512 4635int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4636{
4637 flag aSign;
f4014512 4638 int32_t aExp, shiftCount;
bb98fe42 4639 uint64_t aSig, savedASig;
b3a6a2e0 4640 int32_t z;
158142c2 4641
d1eb8f2a
AD
4642 if (floatx80_invalid_encoding(a)) {
4643 float_raise(float_flag_invalid, status);
4644 return 1 << 31;
4645 }
158142c2
FB
4646 aSig = extractFloatx80Frac( a );
4647 aExp = extractFloatx80Exp( a );
4648 aSign = extractFloatx80Sign( a );
4649 if ( 0x401E < aExp ) {
bb98fe42 4650 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4651 goto invalid;
4652 }
4653 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4654 if (aExp || aSig) {
4655 status->float_exception_flags |= float_flag_inexact;
4656 }
158142c2
FB
4657 return 0;
4658 }
4659 shiftCount = 0x403E - aExp;
4660 savedASig = aSig;
4661 aSig >>= shiftCount;
4662 z = aSig;
4663 if ( aSign ) z = - z;
4664 if ( ( z < 0 ) ^ aSign ) {
4665 invalid:
ff32e16e 4666 float_raise(float_flag_invalid, status);
bb98fe42 4667 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
4668 }
4669 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 4670 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4671 }
4672 return z;
4673
4674}
4675
4676/*----------------------------------------------------------------------------
4677| Returns the result of converting the extended double-precision floating-
4678| point value `a' to the 64-bit two's complement integer format. The
4679| conversion is performed according to the IEC/IEEE Standard for Binary
4680| Floating-Point Arithmetic---which means in particular that the conversion
4681| is rounded according to the current rounding mode. If `a' is a NaN,
4682| the largest positive integer is returned. Otherwise, if the conversion
4683| overflows, the largest integer with the same sign as `a' is returned.
4684*----------------------------------------------------------------------------*/
4685
f42c2224 4686int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2
FB
4687{
4688 flag aSign;
f4014512 4689 int32_t aExp, shiftCount;
bb98fe42 4690 uint64_t aSig, aSigExtra;
158142c2 4691
d1eb8f2a
AD
4692 if (floatx80_invalid_encoding(a)) {
4693 float_raise(float_flag_invalid, status);
4694 return 1ULL << 63;
4695 }
158142c2
FB
4696 aSig = extractFloatx80Frac( a );
4697 aExp = extractFloatx80Exp( a );
4698 aSign = extractFloatx80Sign( a );
4699 shiftCount = 0x403E - aExp;
4700 if ( shiftCount <= 0 ) {
4701 if ( shiftCount ) {
ff32e16e 4702 float_raise(float_flag_invalid, status);
0f605c88 4703 if (!aSign || floatx80_is_any_nan(a)) {
158142c2
FB
4704 return LIT64( 0x7FFFFFFFFFFFFFFF );
4705 }
bb98fe42 4706 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4707 }
4708 aSigExtra = 0;
4709 }
4710 else {
4711 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4712 }
ff32e16e 4713 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
4714
4715}
4716
4717/*----------------------------------------------------------------------------
4718| Returns the result of converting the extended double-precision floating-
4719| point value `a' to the 64-bit two's complement integer format. The
4720| conversion is performed according to the IEC/IEEE Standard for Binary
4721| Floating-Point Arithmetic, except that the conversion is always rounded
4722| toward zero. If `a' is a NaN, the largest positive integer is returned.
4723| Otherwise, if the conversion overflows, the largest integer with the same
4724| sign as `a' is returned.
4725*----------------------------------------------------------------------------*/
4726
f42c2224 4727int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4728{
4729 flag aSign;
f4014512 4730 int32_t aExp, shiftCount;
bb98fe42 4731 uint64_t aSig;
f42c2224 4732 int64_t z;
158142c2 4733
d1eb8f2a
AD
4734 if (floatx80_invalid_encoding(a)) {
4735 float_raise(float_flag_invalid, status);
4736 return 1ULL << 63;
4737 }
158142c2
FB
4738 aSig = extractFloatx80Frac( a );
4739 aExp = extractFloatx80Exp( a );
4740 aSign = extractFloatx80Sign( a );
4741 shiftCount = aExp - 0x403E;
4742 if ( 0 <= shiftCount ) {
4743 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4744 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 4745 float_raise(float_flag_invalid, status);
158142c2
FB
4746 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4747 return LIT64( 0x7FFFFFFFFFFFFFFF );
4748 }
4749 }
bb98fe42 4750 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4751 }
4752 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4753 if (aExp | aSig) {
4754 status->float_exception_flags |= float_flag_inexact;
4755 }
158142c2
FB
4756 return 0;
4757 }
4758 z = aSig>>( - shiftCount );
bb98fe42 4759 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
a2f2d288 4760 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4761 }
4762 if ( aSign ) z = - z;
4763 return z;
4764
4765}
4766
4767/*----------------------------------------------------------------------------
4768| Returns the result of converting the extended double-precision floating-
4769| point value `a' to the single-precision floating-point format. The
4770| conversion is performed according to the IEC/IEEE Standard for Binary
4771| Floating-Point Arithmetic.
4772*----------------------------------------------------------------------------*/
4773
e5a41ffa 4774float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2
FB
4775{
4776 flag aSign;
f4014512 4777 int32_t aExp;
bb98fe42 4778 uint64_t aSig;
158142c2 4779
d1eb8f2a
AD
4780 if (floatx80_invalid_encoding(a)) {
4781 float_raise(float_flag_invalid, status);
4782 return float32_default_nan(status);
4783 }
158142c2
FB
4784 aSig = extractFloatx80Frac( a );
4785 aExp = extractFloatx80Exp( a );
4786 aSign = extractFloatx80Sign( a );
4787 if ( aExp == 0x7FFF ) {
bb98fe42 4788 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4789 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4790 }
4791 return packFloat32( aSign, 0xFF, 0 );
4792 }
4793 shift64RightJamming( aSig, 33, &aSig );
4794 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 4795 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
4796
4797}
4798
4799/*----------------------------------------------------------------------------
4800| Returns the result of converting the extended double-precision floating-
4801| point value `a' to the double-precision floating-point format. The
4802| conversion is performed according to the IEC/IEEE Standard for Binary
4803| Floating-Point Arithmetic.
4804*----------------------------------------------------------------------------*/
4805
e5a41ffa 4806float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2
FB
4807{
4808 flag aSign;
f4014512 4809 int32_t aExp;
bb98fe42 4810 uint64_t aSig, zSig;
158142c2 4811
d1eb8f2a
AD
4812 if (floatx80_invalid_encoding(a)) {
4813 float_raise(float_flag_invalid, status);
4814 return float64_default_nan(status);
4815 }
158142c2
FB
4816 aSig = extractFloatx80Frac( a );
4817 aExp = extractFloatx80Exp( a );
4818 aSign = extractFloatx80Sign( a );
4819 if ( aExp == 0x7FFF ) {
bb98fe42 4820 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4821 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4822 }
4823 return packFloat64( aSign, 0x7FF, 0 );
4824 }
4825 shift64RightJamming( aSig, 1, &zSig );
4826 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 4827 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
4828
4829}
4830
158142c2
FB
4831/*----------------------------------------------------------------------------
4832| Returns the result of converting the extended double-precision floating-
4833| point value `a' to the quadruple-precision floating-point format. The
4834| conversion is performed according to the IEC/IEEE Standard for Binary
4835| Floating-Point Arithmetic.
4836*----------------------------------------------------------------------------*/
4837
e5a41ffa 4838float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2
FB
4839{
4840 flag aSign;
0c48262d 4841 int aExp;
bb98fe42 4842 uint64_t aSig, zSig0, zSig1;
158142c2 4843
d1eb8f2a
AD
4844 if (floatx80_invalid_encoding(a)) {
4845 float_raise(float_flag_invalid, status);
4846 return float128_default_nan(status);
4847 }
158142c2
FB
4848 aSig = extractFloatx80Frac( a );
4849 aExp = extractFloatx80Exp( a );
4850 aSign = extractFloatx80Sign( a );
bb98fe42 4851 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4852 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4853 }
4854 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4855 return packFloat128( aSign, aExp, zSig0, zSig1 );
4856
4857}
4858
0f721292
LV
4859/*----------------------------------------------------------------------------
4860| Rounds the extended double-precision floating-point value `a'
4861| to the precision provided by floatx80_rounding_precision and returns the
4862| result as an extended double-precision floating-point value.
4863| The operation is performed according to the IEC/IEEE Standard for Binary
4864| Floating-Point Arithmetic.
4865*----------------------------------------------------------------------------*/
4866
4867floatx80 floatx80_round(floatx80 a, float_status *status)
4868{
4869 return roundAndPackFloatx80(status->floatx80_rounding_precision,
4870 extractFloatx80Sign(a),
4871 extractFloatx80Exp(a),
4872 extractFloatx80Frac(a), 0, status);
4873}
4874
158142c2
FB
4875/*----------------------------------------------------------------------------
4876| Rounds the extended double-precision floating-point value `a' to an integer,
4877| and returns the result as an extended quadruple-precision floating-point
4878| value. The operation is performed according to the IEC/IEEE Standard for
4879| Binary Floating-Point Arithmetic.
4880*----------------------------------------------------------------------------*/
4881
e5a41ffa 4882floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2
FB
4883{
4884 flag aSign;
f4014512 4885 int32_t aExp;
bb98fe42 4886 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
4887 floatx80 z;
4888
d1eb8f2a
AD
4889 if (floatx80_invalid_encoding(a)) {
4890 float_raise(float_flag_invalid, status);
4891 return floatx80_default_nan(status);
4892 }
158142c2
FB
4893 aExp = extractFloatx80Exp( a );
4894 if ( 0x403E <= aExp ) {
bb98fe42 4895 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 4896 return propagateFloatx80NaN(a, a, status);
158142c2
FB
4897 }
4898 return a;
4899 }
4900 if ( aExp < 0x3FFF ) {
4901 if ( ( aExp == 0 )
bb98fe42 4902 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
158142c2
FB
4903 return a;
4904 }
a2f2d288 4905 status->float_exception_flags |= float_flag_inexact;
158142c2 4906 aSign = extractFloatx80Sign( a );
a2f2d288 4907 switch (status->float_rounding_mode) {
158142c2 4908 case float_round_nearest_even:
bb98fe42 4909 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
4910 ) {
4911 return
4912 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4913 }
4914 break;
f9288a76
PM
4915 case float_round_ties_away:
4916 if (aExp == 0x3FFE) {
4917 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
4918 }
4919 break;
158142c2
FB
4920 case float_round_down:
4921 return
4922 aSign ?
4923 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4924 : packFloatx80( 0, 0, 0 );
4925 case float_round_up:
4926 return
4927 aSign ? packFloatx80( 1, 0, 0 )
4928 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4929 }
4930 return packFloatx80( aSign, 0, 0 );
4931 }
4932 lastBitMask = 1;
4933 lastBitMask <<= 0x403E - aExp;
4934 roundBitsMask = lastBitMask - 1;
4935 z = a;
a2f2d288 4936 switch (status->float_rounding_mode) {
dc355b76 4937 case float_round_nearest_even:
158142c2 4938 z.low += lastBitMask>>1;
dc355b76
PM
4939 if ((z.low & roundBitsMask) == 0) {
4940 z.low &= ~lastBitMask;
4941 }
4942 break;
f9288a76
PM
4943 case float_round_ties_away:
4944 z.low += lastBitMask >> 1;
4945 break;
dc355b76
PM
4946 case float_round_to_zero:
4947 break;
4948 case float_round_up:
4949 if (!extractFloatx80Sign(z)) {
4950 z.low += roundBitsMask;
4951 }
4952 break;
4953 case float_round_down:
4954 if (extractFloatx80Sign(z)) {
158142c2
FB
4955 z.low += roundBitsMask;
4956 }
dc355b76
PM
4957 break;
4958 default:
4959 abort();
158142c2
FB
4960 }
4961 z.low &= ~ roundBitsMask;
4962 if ( z.low == 0 ) {
4963 ++z.high;
4964 z.low = LIT64( 0x8000000000000000 );
4965 }
a2f2d288
PM
4966 if (z.low != a.low) {
4967 status->float_exception_flags |= float_flag_inexact;
4968 }
158142c2
FB
4969 return z;
4970
4971}
4972
4973/*----------------------------------------------------------------------------
4974| Returns the result of adding the absolute values of the extended double-
4975| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
4976| negated before being returned. `zSign' is ignored if the result is a NaN.
4977| The addition is performed according to the IEC/IEEE Standard for Binary
4978| Floating-Point Arithmetic.
4979*----------------------------------------------------------------------------*/
4980
e5a41ffa
PM
4981static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
4982 float_status *status)
158142c2 4983{
f4014512 4984 int32_t aExp, bExp, zExp;
bb98fe42 4985 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 4986 int32_t expDiff;
158142c2
FB
4987
4988 aSig = extractFloatx80Frac( a );
4989 aExp = extractFloatx80Exp( a );
4990 bSig = extractFloatx80Frac( b );
4991 bExp = extractFloatx80Exp( b );
4992 expDiff = aExp - bExp;
4993 if ( 0 < expDiff ) {
4994 if ( aExp == 0x7FFF ) {
ff32e16e
PM
4995 if ((uint64_t)(aSig << 1)) {
4996 return propagateFloatx80NaN(a, b, status);
4997 }
158142c2
FB
4998 return a;
4999 }
5000 if ( bExp == 0 ) --expDiff;
5001 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5002 zExp = aExp;
5003 }
5004 else if ( expDiff < 0 ) {
5005 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5006 if ((uint64_t)(bSig << 1)) {
5007 return propagateFloatx80NaN(a, b, status);
5008 }
0f605c88
LV
5009 return packFloatx80(zSign,
5010 floatx80_infinity_high,
5011 floatx80_infinity_low);
158142c2
FB
5012 }
5013 if ( aExp == 0 ) ++expDiff;
5014 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5015 zExp = bExp;
5016 }
5017 else {
5018 if ( aExp == 0x7FFF ) {
bb98fe42 5019 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5020 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5021 }
5022 return a;
5023 }
5024 zSig1 = 0;
5025 zSig0 = aSig + bSig;
5026 if ( aExp == 0 ) {
5027 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5028 goto roundAndPack;
5029 }
5030 zExp = aExp;
5031 goto shiftRight1;
5032 }
5033 zSig0 = aSig + bSig;
bb98fe42 5034 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
5035 shiftRight1:
5036 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5037 zSig0 |= LIT64( 0x8000000000000000 );
5038 ++zExp;
5039 roundAndPack:
a2f2d288 5040 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5041 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5042}
5043
5044/*----------------------------------------------------------------------------
5045| Returns the result of subtracting the absolute values of the extended
5046| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5047| difference is negated before being returned. `zSign' is ignored if the
5048| result is a NaN. The subtraction is performed according to the IEC/IEEE
5049| Standard for Binary Floating-Point Arithmetic.
5050*----------------------------------------------------------------------------*/
5051
e5a41ffa
PM
5052static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5053 float_status *status)
158142c2 5054{
f4014512 5055 int32_t aExp, bExp, zExp;
bb98fe42 5056 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5057 int32_t expDiff;
158142c2
FB
5058
5059 aSig = extractFloatx80Frac( a );
5060 aExp = extractFloatx80Exp( a );
5061 bSig = extractFloatx80Frac( b );
5062 bExp = extractFloatx80Exp( b );
5063 expDiff = aExp - bExp;
5064 if ( 0 < expDiff ) goto aExpBigger;
5065 if ( expDiff < 0 ) goto bExpBigger;
5066 if ( aExp == 0x7FFF ) {
bb98fe42 5067 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5068 return propagateFloatx80NaN(a, b, status);
158142c2 5069 }
ff32e16e 5070 float_raise(float_flag_invalid, status);
af39bc8c 5071 return floatx80_default_nan(status);
158142c2
FB
5072 }
5073 if ( aExp == 0 ) {
5074 aExp = 1;
5075 bExp = 1;
5076 }
5077 zSig1 = 0;
5078 if ( bSig < aSig ) goto aBigger;
5079 if ( aSig < bSig ) goto bBigger;
a2f2d288 5080 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
5081 bExpBigger:
5082 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5083 if ((uint64_t)(bSig << 1)) {
5084 return propagateFloatx80NaN(a, b, status);
5085 }
0f605c88
LV
5086 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5087 floatx80_infinity_low);
158142c2
FB
5088 }
5089 if ( aExp == 0 ) ++expDiff;
5090 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5091 bBigger:
5092 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5093 zExp = bExp;
5094 zSign ^= 1;
5095 goto normalizeRoundAndPack;
5096 aExpBigger:
5097 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5098 if ((uint64_t)(aSig << 1)) {
5099 return propagateFloatx80NaN(a, b, status);
5100 }
158142c2
FB
5101 return a;
5102 }
5103 if ( bExp == 0 ) --expDiff;
5104 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5105 aBigger:
5106 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5107 zExp = aExp;
5108 normalizeRoundAndPack:
a2f2d288 5109 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5110 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5111}
5112
5113/*----------------------------------------------------------------------------
5114| Returns the result of adding the extended double-precision floating-point
5115| values `a' and `b'. The operation is performed according to the IEC/IEEE
5116| Standard for Binary Floating-Point Arithmetic.
5117*----------------------------------------------------------------------------*/
5118
e5a41ffa 5119floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5120{
5121 flag aSign, bSign;
5122
d1eb8f2a
AD
5123 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5124 float_raise(float_flag_invalid, status);
5125 return floatx80_default_nan(status);
5126 }
158142c2
FB
5127 aSign = extractFloatx80Sign( a );
5128 bSign = extractFloatx80Sign( b );
5129 if ( aSign == bSign ) {
ff32e16e 5130 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5131 }
5132 else {
ff32e16e 5133 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5134 }
5135
5136}
5137
5138/*----------------------------------------------------------------------------
5139| Returns the result of subtracting the extended double-precision floating-
5140| point values `a' and `b'. The operation is performed according to the
5141| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5142*----------------------------------------------------------------------------*/
5143
e5a41ffa 5144floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5145{
5146 flag aSign, bSign;
5147
d1eb8f2a
AD
5148 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5149 float_raise(float_flag_invalid, status);
5150 return floatx80_default_nan(status);
5151 }
158142c2
FB
5152 aSign = extractFloatx80Sign( a );
5153 bSign = extractFloatx80Sign( b );
5154 if ( aSign == bSign ) {
ff32e16e 5155 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5156 }
5157 else {
ff32e16e 5158 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5159 }
5160
5161}
5162
5163/*----------------------------------------------------------------------------
5164| Returns the result of multiplying the extended double-precision floating-
5165| point values `a' and `b'. The operation is performed according to the
5166| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5167*----------------------------------------------------------------------------*/
5168
e5a41ffa 5169floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5170{
5171 flag aSign, bSign, zSign;
f4014512 5172 int32_t aExp, bExp, zExp;
bb98fe42 5173 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 5174
d1eb8f2a
AD
5175 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5176 float_raise(float_flag_invalid, status);
5177 return floatx80_default_nan(status);
5178 }
158142c2
FB
5179 aSig = extractFloatx80Frac( a );
5180 aExp = extractFloatx80Exp( a );
5181 aSign = extractFloatx80Sign( a );
5182 bSig = extractFloatx80Frac( b );
5183 bExp = extractFloatx80Exp( b );
5184 bSign = extractFloatx80Sign( b );
5185 zSign = aSign ^ bSign;
5186 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5187 if ( (uint64_t) ( aSig<<1 )
5188 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5189 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5190 }
5191 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
5192 return packFloatx80(zSign, floatx80_infinity_high,
5193 floatx80_infinity_low);
158142c2
FB
5194 }
5195 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5196 if ((uint64_t)(bSig << 1)) {
5197 return propagateFloatx80NaN(a, b, status);
5198 }
158142c2
FB
5199 if ( ( aExp | aSig ) == 0 ) {
5200 invalid:
ff32e16e 5201 float_raise(float_flag_invalid, status);
af39bc8c 5202 return floatx80_default_nan(status);
158142c2 5203 }
0f605c88
LV
5204 return packFloatx80(zSign, floatx80_infinity_high,
5205 floatx80_infinity_low);
158142c2
FB
5206 }
5207 if ( aExp == 0 ) {
5208 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5209 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5210 }
5211 if ( bExp == 0 ) {
5212 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5213 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5214 }
5215 zExp = aExp + bExp - 0x3FFE;
5216 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 5217 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
5218 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5219 --zExp;
5220 }
a2f2d288 5221 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5222 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5223}
5224
5225/*----------------------------------------------------------------------------
5226| Returns the result of dividing the extended double-precision floating-point
5227| value `a' by the corresponding value `b'. The operation is performed
5228| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5229*----------------------------------------------------------------------------*/
5230
e5a41ffa 5231floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5232{
5233 flag aSign, bSign, zSign;
f4014512 5234 int32_t aExp, bExp, zExp;
bb98fe42
AF
5235 uint64_t aSig, bSig, zSig0, zSig1;
5236 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 5237
d1eb8f2a
AD
5238 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5239 float_raise(float_flag_invalid, status);
5240 return floatx80_default_nan(status);
5241 }
158142c2
FB
5242 aSig = extractFloatx80Frac( a );
5243 aExp = extractFloatx80Exp( a );
5244 aSign = extractFloatx80Sign( a );
5245 bSig = extractFloatx80Frac( b );
5246 bExp = extractFloatx80Exp( b );
5247 bSign = extractFloatx80Sign( b );
5248 zSign = aSign ^ bSign;
5249 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5250 if ((uint64_t)(aSig << 1)) {
5251 return propagateFloatx80NaN(a, b, status);
5252 }
158142c2 5253 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5254 if ((uint64_t)(bSig << 1)) {
5255 return propagateFloatx80NaN(a, b, status);
5256 }
158142c2
FB
5257 goto invalid;
5258 }
0f605c88
LV
5259 return packFloatx80(zSign, floatx80_infinity_high,
5260 floatx80_infinity_low);
158142c2
FB
5261 }
5262 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5263 if ((uint64_t)(bSig << 1)) {
5264 return propagateFloatx80NaN(a, b, status);
5265 }
158142c2
FB
5266 return packFloatx80( zSign, 0, 0 );
5267 }
5268 if ( bExp == 0 ) {
5269 if ( bSig == 0 ) {
5270 if ( ( aExp | aSig ) == 0 ) {
5271 invalid:
ff32e16e 5272 float_raise(float_flag_invalid, status);
af39bc8c 5273 return floatx80_default_nan(status);
158142c2 5274 }
ff32e16e 5275 float_raise(float_flag_divbyzero, status);
0f605c88
LV
5276 return packFloatx80(zSign, floatx80_infinity_high,
5277 floatx80_infinity_low);
158142c2
FB
5278 }
5279 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5280 }
5281 if ( aExp == 0 ) {
5282 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5283 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5284 }
5285 zExp = aExp - bExp + 0x3FFE;
5286 rem1 = 0;
5287 if ( bSig <= aSig ) {
5288 shift128Right( aSig, 0, 1, &aSig, &rem1 );
5289 ++zExp;
5290 }
5291 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5292 mul64To128( bSig, zSig0, &term0, &term1 );
5293 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 5294 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
5295 --zSig0;
5296 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
5297 }
5298 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 5299 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
5300 mul64To128( bSig, zSig1, &term1, &term2 );
5301 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 5302 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
5303 --zSig1;
5304 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
5305 }
5306 zSig1 |= ( ( rem1 | rem2 ) != 0 );
5307 }
a2f2d288 5308 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5309 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5310}
5311
5312/*----------------------------------------------------------------------------
5313| Returns the remainder of the extended double-precision floating-point value
5314| `a' with respect to the corresponding value `b'. The operation is performed
5315| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5316*----------------------------------------------------------------------------*/
5317
e5a41ffa 5318floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
158142c2 5319{
ed086f3d 5320 flag aSign, zSign;
f4014512 5321 int32_t aExp, bExp, expDiff;
bb98fe42
AF
5322 uint64_t aSig0, aSig1, bSig;
5323 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 5324
d1eb8f2a
AD
5325 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5326 float_raise(float_flag_invalid, status);
5327 return floatx80_default_nan(status);
5328 }
158142c2
FB
5329 aSig0 = extractFloatx80Frac( a );
5330 aExp = extractFloatx80Exp( a );
5331 aSign = extractFloatx80Sign( a );
5332 bSig = extractFloatx80Frac( b );
5333 bExp = extractFloatx80Exp( b );
158142c2 5334 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5335 if ( (uint64_t) ( aSig0<<1 )
5336 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5337 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5338 }
5339 goto invalid;
5340 }
5341 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5342 if ((uint64_t)(bSig << 1)) {
5343 return propagateFloatx80NaN(a, b, status);
5344 }
158142c2
FB
5345 return a;
5346 }
5347 if ( bExp == 0 ) {
5348 if ( bSig == 0 ) {
5349 invalid:
ff32e16e 5350 float_raise(float_flag_invalid, status);
af39bc8c 5351 return floatx80_default_nan(status);
158142c2
FB
5352 }
5353 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5354 }
5355 if ( aExp == 0 ) {
bb98fe42 5356 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
158142c2
FB
5357 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5358 }
5359 bSig |= LIT64( 0x8000000000000000 );
5360 zSign = aSign;
5361 expDiff = aExp - bExp;
5362 aSig1 = 0;
5363 if ( expDiff < 0 ) {
5364 if ( expDiff < -1 ) return a;
5365 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5366 expDiff = 0;
5367 }
5368 q = ( bSig <= aSig0 );
5369 if ( q ) aSig0 -= bSig;
5370 expDiff -= 64;
5371 while ( 0 < expDiff ) {
5372 q = estimateDiv128To64( aSig0, aSig1, bSig );
5373 q = ( 2 < q ) ? q - 2 : 0;
5374 mul64To128( bSig, q, &term0, &term1 );
5375 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5376 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5377 expDiff -= 62;
5378 }
5379 expDiff += 64;
5380 if ( 0 < expDiff ) {
5381 q = estimateDiv128To64( aSig0, aSig1, bSig );
5382 q = ( 2 < q ) ? q - 2 : 0;
5383 q >>= 64 - expDiff;
5384 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5385 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5386 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5387 while ( le128( term0, term1, aSig0, aSig1 ) ) {
5388 ++q;
5389 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5390 }
5391 }
5392 else {
5393 term1 = 0;
5394 term0 = bSig;
5395 }
5396 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
5397 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
5398 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5399 && ( q & 1 ) )
5400 ) {
5401 aSig0 = alternateASig0;
5402 aSig1 = alternateASig1;
5403 zSign = ! zSign;
5404 }
5405 return
5406 normalizeRoundAndPackFloatx80(
ff32e16e 5407 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
5408
5409}
5410
5411/*----------------------------------------------------------------------------
5412| Returns the square root of the extended double-precision floating-point
5413| value `a'. The operation is performed according to the IEC/IEEE Standard
5414| for Binary Floating-Point Arithmetic.
5415*----------------------------------------------------------------------------*/
5416
e5a41ffa 5417floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2
FB
5418{
5419 flag aSign;
f4014512 5420 int32_t aExp, zExp;
bb98fe42
AF
5421 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
5422 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 5423
d1eb8f2a
AD
5424 if (floatx80_invalid_encoding(a)) {
5425 float_raise(float_flag_invalid, status);
5426 return floatx80_default_nan(status);
5427 }
158142c2
FB
5428 aSig0 = extractFloatx80Frac( a );
5429 aExp = extractFloatx80Exp( a );
5430 aSign = extractFloatx80Sign( a );
5431 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5432 if ((uint64_t)(aSig0 << 1)) {
5433 return propagateFloatx80NaN(a, a, status);
5434 }
158142c2
FB
5435 if ( ! aSign ) return a;
5436 goto invalid;
5437 }
5438 if ( aSign ) {
5439 if ( ( aExp | aSig0 ) == 0 ) return a;
5440 invalid:
ff32e16e 5441 float_raise(float_flag_invalid, status);
af39bc8c 5442 return floatx80_default_nan(status);
158142c2
FB
5443 }
5444 if ( aExp == 0 ) {
5445 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
5446 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5447 }
5448 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
5449 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
5450 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
5451 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5452 doubleZSig0 = zSig0<<1;
5453 mul64To128( zSig0, zSig0, &term0, &term1 );
5454 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 5455 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
5456 --zSig0;
5457 doubleZSig0 -= 2;
5458 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5459 }
5460 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5461 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
5462 if ( zSig1 == 0 ) zSig1 = 1;
5463 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5464 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5465 mul64To128( zSig1, zSig1, &term2, &term3 );
5466 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 5467 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
5468 --zSig1;
5469 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5470 term3 |= 1;
5471 term2 |= doubleZSig0;
5472 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5473 }
5474 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5475 }
5476 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5477 zSig0 |= doubleZSig0;
a2f2d288
PM
5478 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5479 0, zExp, zSig0, zSig1, status);
158142c2
FB
5480}
5481
5482/*----------------------------------------------------------------------------
b689362d
AJ
5483| Returns 1 if the extended double-precision floating-point value `a' is equal
5484| to the corresponding value `b', and 0 otherwise. The invalid exception is
5485| raised if either operand is a NaN. Otherwise, the comparison is performed
5486| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5487*----------------------------------------------------------------------------*/
5488
e5a41ffa 5489int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5490{
5491
d1eb8f2a
AD
5492 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5493 || (extractFloatx80Exp(a) == 0x7FFF
5494 && (uint64_t) (extractFloatx80Frac(a) << 1))
5495 || (extractFloatx80Exp(b) == 0x7FFF
5496 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5497 ) {
ff32e16e 5498 float_raise(float_flag_invalid, status);
158142c2
FB
5499 return 0;
5500 }
5501 return
5502 ( a.low == b.low )
5503 && ( ( a.high == b.high )
5504 || ( ( a.low == 0 )
bb98fe42 5505 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5506 );
5507
5508}
5509
5510/*----------------------------------------------------------------------------
5511| Returns 1 if the extended double-precision floating-point value `a' is
5512| less than or equal to the corresponding value `b', and 0 otherwise. The
f5a64251
AJ
5513| invalid exception is raised if either operand is a NaN. The comparison is
5514| performed according to the IEC/IEEE Standard for Binary Floating-Point
5515| Arithmetic.
158142c2
FB
5516*----------------------------------------------------------------------------*/
5517
e5a41ffa 5518int floatx80_le(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5519{
5520 flag aSign, bSign;
5521
d1eb8f2a
AD
5522 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5523 || (extractFloatx80Exp(a) == 0x7FFF
5524 && (uint64_t) (extractFloatx80Frac(a) << 1))
5525 || (extractFloatx80Exp(b) == 0x7FFF
5526 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5527 ) {
ff32e16e 5528 float_raise(float_flag_invalid, status);
158142c2
FB
5529 return 0;
5530 }
5531 aSign = extractFloatx80Sign( a );
5532 bSign = extractFloatx80Sign( b );
5533 if ( aSign != bSign ) {
5534 return
5535 aSign
bb98fe42 5536 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5537 == 0 );
5538 }
5539 return
5540 aSign ? le128( b.high, b.low, a.high, a.low )
5541 : le128( a.high, a.low, b.high, b.low );
5542
5543}
5544
5545/*----------------------------------------------------------------------------
5546| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5547| less than the corresponding value `b', and 0 otherwise. The invalid
5548| exception is raised if either operand is a NaN. The comparison is performed
5549| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5550*----------------------------------------------------------------------------*/
5551
e5a41ffa 5552int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5553{
5554 flag aSign, bSign;
5555
d1eb8f2a
AD
5556 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5557 || (extractFloatx80Exp(a) == 0x7FFF
5558 && (uint64_t) (extractFloatx80Frac(a) << 1))
5559 || (extractFloatx80Exp(b) == 0x7FFF
5560 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5561 ) {
ff32e16e 5562 float_raise(float_flag_invalid, status);
158142c2
FB
5563 return 0;
5564 }
5565 aSign = extractFloatx80Sign( a );
5566 bSign = extractFloatx80Sign( b );
5567 if ( aSign != bSign ) {
5568 return
5569 aSign
bb98fe42 5570 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5571 != 0 );
5572 }
5573 return
5574 aSign ? lt128( b.high, b.low, a.high, a.low )
5575 : lt128( a.high, a.low, b.high, b.low );
5576
5577}
5578
67b7861d
AJ
5579/*----------------------------------------------------------------------------
5580| Returns 1 if the extended double-precision floating-point values `a' and `b'
f5a64251
AJ
5581| cannot be compared, and 0 otherwise. The invalid exception is raised if
5582| either operand is a NaN. The comparison is performed according to the
5583| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
67b7861d 5584*----------------------------------------------------------------------------*/
e5a41ffa 5585int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
67b7861d 5586{
d1eb8f2a
AD
5587 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5588 || (extractFloatx80Exp(a) == 0x7FFF
5589 && (uint64_t) (extractFloatx80Frac(a) << 1))
5590 || (extractFloatx80Exp(b) == 0x7FFF
5591 && (uint64_t) (extractFloatx80Frac(b) << 1))
67b7861d 5592 ) {
ff32e16e 5593 float_raise(float_flag_invalid, status);
67b7861d
AJ
5594 return 1;
5595 }
5596 return 0;
5597}
5598
158142c2 5599/*----------------------------------------------------------------------------
b689362d 5600| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5601| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5602| cause an exception. The comparison is performed according to the IEC/IEEE
5603| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5604*----------------------------------------------------------------------------*/
5605
e5a41ffa 5606int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5607{
5608
d1eb8f2a
AD
5609 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5610 float_raise(float_flag_invalid, status);
5611 return 0;
5612 }
158142c2 5613 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5614 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5615 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5616 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5617 ) {
af39bc8c
AM
5618 if (floatx80_is_signaling_nan(a, status)
5619 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5620 float_raise(float_flag_invalid, status);
b689362d 5621 }
158142c2
FB
5622 return 0;
5623 }
5624 return
5625 ( a.low == b.low )
5626 && ( ( a.high == b.high )
5627 || ( ( a.low == 0 )
bb98fe42 5628 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5629 );
5630
5631}
5632
5633/*----------------------------------------------------------------------------
5634| Returns 1 if the extended double-precision floating-point value `a' is less
5635| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
5636| do not cause an exception. Otherwise, the comparison is performed according
5637| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5638*----------------------------------------------------------------------------*/
5639
e5a41ffa 5640int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5641{
5642 flag aSign, bSign;
5643
d1eb8f2a
AD
5644 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5645 float_raise(float_flag_invalid, status);
5646 return 0;
5647 }
158142c2 5648 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5649 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5650 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5651 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5652 ) {
af39bc8c
AM
5653 if (floatx80_is_signaling_nan(a, status)
5654 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5655 float_raise(float_flag_invalid, status);
158142c2
FB
5656 }
5657 return 0;
5658 }
5659 aSign = extractFloatx80Sign( a );
5660 bSign = extractFloatx80Sign( b );
5661 if ( aSign != bSign ) {
5662 return
5663 aSign
bb98fe42 5664 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5665 == 0 );
5666 }
5667 return
5668 aSign ? le128( b.high, b.low, a.high, a.low )
5669 : le128( a.high, a.low, b.high, b.low );
5670
5671}
5672
5673/*----------------------------------------------------------------------------
5674| Returns 1 if the extended double-precision floating-point value `a' is less
5675| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
5676| an exception. Otherwise, the comparison is performed according to the
5677| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5678*----------------------------------------------------------------------------*/
5679
e5a41ffa 5680int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5681{
5682 flag aSign, bSign;
5683
d1eb8f2a
AD
5684 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5685 float_raise(float_flag_invalid, status);
5686 return 0;
5687 }
158142c2 5688 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5689 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5690 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5691 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5692 ) {
af39bc8c
AM
5693 if (floatx80_is_signaling_nan(a, status)
5694 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5695 float_raise(float_flag_invalid, status);
158142c2
FB
5696 }
5697 return 0;
5698 }
5699 aSign = extractFloatx80Sign( a );
5700 bSign = extractFloatx80Sign( b );
5701 if ( aSign != bSign ) {
5702 return
5703 aSign
bb98fe42 5704 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5705 != 0 );
5706 }
5707 return
5708 aSign ? lt128( b.high, b.low, a.high, a.low )
5709 : lt128( a.high, a.low, b.high, b.low );
5710
5711}
5712
67b7861d
AJ
5713/*----------------------------------------------------------------------------
5714| Returns 1 if the extended double-precision floating-point values `a' and `b'
5715| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
5716| The comparison is performed according to the IEC/IEEE Standard for Binary
5717| Floating-Point Arithmetic.
5718*----------------------------------------------------------------------------*/
e5a41ffa 5719int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
67b7861d 5720{
d1eb8f2a
AD
5721 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5722 float_raise(float_flag_invalid, status);
5723 return 1;
5724 }
67b7861d
AJ
5725 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5726 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5727 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5728 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5729 ) {
af39bc8c
AM
5730 if (floatx80_is_signaling_nan(a, status)
5731 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5732 float_raise(float_flag_invalid, status);
67b7861d
AJ
5733 }
5734 return 1;
5735 }
5736 return 0;
5737}
5738
158142c2
FB
5739/*----------------------------------------------------------------------------
5740| Returns the result of converting the quadruple-precision floating-point
5741| value `a' to the 32-bit two's complement integer format. The conversion
5742| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5743| Arithmetic---which means in particular that the conversion is rounded
5744| according to the current rounding mode. If `a' is a NaN, the largest
5745| positive integer is returned. Otherwise, if the conversion overflows, the
5746| largest integer with the same sign as `a' is returned.
5747*----------------------------------------------------------------------------*/
5748
f4014512 5749int32_t float128_to_int32(float128 a, float_status *status)
158142c2
FB
5750{
5751 flag aSign;
f4014512 5752 int32_t aExp, shiftCount;
bb98fe42 5753 uint64_t aSig0, aSig1;
158142c2
FB
5754
5755 aSig1 = extractFloat128Frac1( a );
5756 aSig0 = extractFloat128Frac0( a );
5757 aExp = extractFloat128Exp( a );
5758 aSign = extractFloat128Sign( a );
5759 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5760 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5761 aSig0 |= ( aSig1 != 0 );
5762 shiftCount = 0x4028 - aExp;
5763 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 5764 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
5765
5766}
5767
5768/*----------------------------------------------------------------------------
5769| Returns the result of converting the quadruple-precision floating-point
5770| value `a' to the 32-bit two's complement integer format. The conversion
5771| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5772| Arithmetic, except that the conversion is always rounded toward zero. If
5773| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
5774| conversion overflows, the largest integer with the same sign as `a' is
5775| returned.
5776*----------------------------------------------------------------------------*/
5777
f4014512 5778int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2
FB
5779{
5780 flag aSign;
f4014512 5781 int32_t aExp, shiftCount;
bb98fe42 5782 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 5783 int32_t z;
158142c2
FB
5784
5785 aSig1 = extractFloat128Frac1( a );
5786 aSig0 = extractFloat128Frac0( a );
5787 aExp = extractFloat128Exp( a );
5788 aSign = extractFloat128Sign( a );
5789 aSig0 |= ( aSig1 != 0 );
5790 if ( 0x401E < aExp ) {
5791 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5792 goto invalid;
5793 }
5794 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5795 if (aExp || aSig0) {
5796 status->float_exception_flags |= float_flag_inexact;
5797 }
158142c2
FB
5798 return 0;
5799 }
5800 aSig0 |= LIT64( 0x0001000000000000 );
5801 shiftCount = 0x402F - aExp;
5802 savedASig = aSig0;
5803 aSig0 >>= shiftCount;
5804 z = aSig0;
5805 if ( aSign ) z = - z;
5806 if ( ( z < 0 ) ^ aSign ) {
5807 invalid:
ff32e16e 5808 float_raise(float_flag_invalid, status);
bb98fe42 5809 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5810 }
5811 if ( ( aSig0<<shiftCount ) != savedASig ) {
a2f2d288 5812 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5813 }
5814 return z;
5815
5816}
5817
5818/*----------------------------------------------------------------------------
5819| Returns the result of converting the quadruple-precision floating-point
5820| value `a' to the 64-bit two's complement integer format. The conversion
5821| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5822| Arithmetic---which means in particular that the conversion is rounded
5823| according to the current rounding mode. If `a' is a NaN, the largest
5824| positive integer is returned. Otherwise, if the conversion overflows, the
5825| largest integer with the same sign as `a' is returned.
5826*----------------------------------------------------------------------------*/
5827
f42c2224 5828int64_t float128_to_int64(float128 a, float_status *status)
158142c2
FB
5829{
5830 flag aSign;
f4014512 5831 int32_t aExp, shiftCount;
bb98fe42 5832 uint64_t aSig0, aSig1;
158142c2
FB
5833
5834 aSig1 = extractFloat128Frac1( a );
5835 aSig0 = extractFloat128Frac0( a );
5836 aExp = extractFloat128Exp( a );
5837 aSign = extractFloat128Sign( a );
5838 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5839 shiftCount = 0x402F - aExp;
5840 if ( shiftCount <= 0 ) {
5841 if ( 0x403E < aExp ) {
ff32e16e 5842 float_raise(float_flag_invalid, status);
158142c2
FB
5843 if ( ! aSign
5844 || ( ( aExp == 0x7FFF )
5845 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
5846 )
5847 ) {
5848 return LIT64( 0x7FFFFFFFFFFFFFFF );
5849 }
bb98fe42 5850 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5851 }
5852 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
5853 }
5854 else {
5855 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
5856 }
ff32e16e 5857 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
5858
5859}
5860
5861/*----------------------------------------------------------------------------
5862| Returns the result of converting the quadruple-precision floating-point
5863| value `a' to the 64-bit two's complement integer format. The conversion
5864| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5865| Arithmetic, except that the conversion is always rounded toward zero.
5866| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
5867| the conversion overflows, the largest integer with the same sign as `a' is
5868| returned.
5869*----------------------------------------------------------------------------*/
5870
f42c2224 5871int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2
FB
5872{
5873 flag aSign;
f4014512 5874 int32_t aExp, shiftCount;
bb98fe42 5875 uint64_t aSig0, aSig1;
f42c2224 5876 int64_t z;
158142c2
FB
5877
5878 aSig1 = extractFloat128Frac1( a );
5879 aSig0 = extractFloat128Frac0( a );
5880 aExp = extractFloat128Exp( a );
5881 aSign = extractFloat128Sign( a );
5882 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5883 shiftCount = aExp - 0x402F;
5884 if ( 0 < shiftCount ) {
5885 if ( 0x403E <= aExp ) {
5886 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
5887 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
5888 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
a2f2d288
PM
5889 if (aSig1) {
5890 status->float_exception_flags |= float_flag_inexact;
5891 }
158142c2
FB
5892 }
5893 else {
ff32e16e 5894 float_raise(float_flag_invalid, status);
158142c2
FB
5895 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
5896 return LIT64( 0x7FFFFFFFFFFFFFFF );
5897 }
5898 }
bb98fe42 5899 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5900 }
5901 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 5902 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
a2f2d288 5903 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5904 }
5905 }
5906 else {
5907 if ( aExp < 0x3FFF ) {
5908 if ( aExp | aSig0 | aSig1 ) {
a2f2d288 5909 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5910 }
5911 return 0;
5912 }
5913 z = aSig0>>( - shiftCount );
5914 if ( aSig1
bb98fe42 5915 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
a2f2d288 5916 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5917 }
5918 }
5919 if ( aSign ) z = - z;
5920 return z;
5921
5922}
5923
2e6d8568
BR
5924/*----------------------------------------------------------------------------
5925| Returns the result of converting the quadruple-precision floating-point value
5926| `a' to the 64-bit unsigned integer format. The conversion is
5927| performed according to the IEC/IEEE Standard for Binary Floating-Point
5928| Arithmetic---which means in particular that the conversion is rounded
5929| according to the current rounding mode. If `a' is a NaN, the largest
5930| positive integer is returned. If the conversion overflows, the
5931| largest unsigned integer is returned. If 'a' is negative, the value is
5932| rounded and zero is returned; negative values that do not round to zero
5933| will raise the inexact exception.
5934*----------------------------------------------------------------------------*/
5935
5936uint64_t float128_to_uint64(float128 a, float_status *status)
5937{
5938 flag aSign;
5939 int aExp;
5940 int shiftCount;
5941 uint64_t aSig0, aSig1;
5942
5943 aSig0 = extractFloat128Frac0(a);
5944 aSig1 = extractFloat128Frac1(a);
5945 aExp = extractFloat128Exp(a);
5946 aSign = extractFloat128Sign(a);
5947 if (aSign && (aExp > 0x3FFE)) {
5948 float_raise(float_flag_invalid, status);
5949 if (float128_is_any_nan(a)) {
5950 return LIT64(0xFFFFFFFFFFFFFFFF);
5951 } else {
5952 return 0;
5953 }
5954 }
5955 if (aExp) {
5956 aSig0 |= LIT64(0x0001000000000000);
5957 }
5958 shiftCount = 0x402F - aExp;
5959 if (shiftCount <= 0) {
5960 if (0x403E < aExp) {
5961 float_raise(float_flag_invalid, status);
5962 return LIT64(0xFFFFFFFFFFFFFFFF);
5963 }
5964 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
5965 } else {
5966 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
5967 }
5968 return roundAndPackUint64(aSign, aSig0, aSig1, status);
5969}
5970
5971uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
5972{
5973 uint64_t v;
5974 signed char current_rounding_mode = status->float_rounding_mode;
5975
5976 set_float_rounding_mode(float_round_to_zero, status);
5977 v = float128_to_uint64(a, status);
5978 set_float_rounding_mode(current_rounding_mode, status);
5979
5980 return v;
5981}
5982
158142c2
FB
5983/*----------------------------------------------------------------------------
5984| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
5985| value `a' to the 32-bit unsigned integer format. The conversion
5986| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5987| Arithmetic except that the conversion is always rounded toward zero.
5988| If `a' is a NaN, the largest positive integer is returned. Otherwise,
5989| if the conversion overflows, the largest unsigned integer is returned.
5990| If 'a' is negative, the value is rounded and zero is returned; negative
5991| values that do not round to zero will raise the inexact exception.
5992*----------------------------------------------------------------------------*/
5993
5994uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
5995{
5996 uint64_t v;
5997 uint32_t res;
5998 int old_exc_flags = get_float_exception_flags(status);
5999
6000 v = float128_to_uint64_round_to_zero(a, status);
6001 if (v > 0xffffffff) {
6002 res = 0xffffffff;
6003 } else {
6004 return v;
6005 }
6006 set_float_exception_flags(old_exc_flags, status);
6007 float_raise(float_flag_invalid, status);
6008 return res;
6009}
6010
6011/*----------------------------------------------------------------------------
6012| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
6013| value `a' to the single-precision floating-point format. The conversion
6014| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6015| Arithmetic.
6016*----------------------------------------------------------------------------*/
6017
e5a41ffa 6018float32 float128_to_float32(float128 a, float_status *status)
158142c2
FB
6019{
6020 flag aSign;
f4014512 6021 int32_t aExp;
bb98fe42
AF
6022 uint64_t aSig0, aSig1;
6023 uint32_t zSig;
158142c2
FB
6024
6025 aSig1 = extractFloat128Frac1( a );
6026 aSig0 = extractFloat128Frac0( a );
6027 aExp = extractFloat128Exp( a );
6028 aSign = extractFloat128Sign( a );
6029 if ( aExp == 0x7FFF ) {
6030 if ( aSig0 | aSig1 ) {
ff32e16e 6031 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
6032 }
6033 return packFloat32( aSign, 0xFF, 0 );
6034 }
6035 aSig0 |= ( aSig1 != 0 );
6036 shift64RightJamming( aSig0, 18, &aSig0 );
6037 zSig = aSig0;
6038 if ( aExp || zSig ) {
6039 zSig |= 0x40000000;
6040 aExp -= 0x3F81;
6041 }
ff32e16e 6042 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
6043
6044}
6045
6046/*----------------------------------------------------------------------------
6047| Returns the result of converting the quadruple-precision floating-point
6048| value `a' to the double-precision floating-point format. The conversion
6049| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6050| Arithmetic.
6051*----------------------------------------------------------------------------*/
6052
e5a41ffa 6053float64 float128_to_float64(float128 a, float_status *status)
158142c2
FB
6054{
6055 flag aSign;
f4014512 6056 int32_t aExp;
bb98fe42 6057 uint64_t aSig0, aSig1;
158142c2
FB
6058
6059 aSig1 = extractFloat128Frac1( a );
6060 aSig0 = extractFloat128Frac0( a );
6061 aExp = extractFloat128Exp( a );
6062 aSign = extractFloat128Sign( a );
6063 if ( aExp == 0x7FFF ) {
6064 if ( aSig0 | aSig1 ) {
ff32e16e 6065 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
6066 }
6067 return packFloat64( aSign, 0x7FF, 0 );
6068 }
6069 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6070 aSig0 |= ( aSig1 != 0 );
6071 if ( aExp || aSig0 ) {
6072 aSig0 |= LIT64( 0x4000000000000000 );
6073 aExp -= 0x3C01;
6074 }
ff32e16e 6075 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
6076
6077}
6078
158142c2
FB
6079/*----------------------------------------------------------------------------
6080| Returns the result of converting the quadruple-precision floating-point
6081| value `a' to the extended double-precision floating-point format. The
6082| conversion is performed according to the IEC/IEEE Standard for Binary
6083| Floating-Point Arithmetic.
6084*----------------------------------------------------------------------------*/
6085
e5a41ffa 6086floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2
FB
6087{
6088 flag aSign;
f4014512 6089 int32_t aExp;
bb98fe42 6090 uint64_t aSig0, aSig1;
158142c2
FB
6091
6092 aSig1 = extractFloat128Frac1( a );
6093 aSig0 = extractFloat128Frac0( a );
6094 aExp = extractFloat128Exp( a );
6095 aSign = extractFloat128Sign( a );
6096 if ( aExp == 0x7FFF ) {
6097 if ( aSig0 | aSig1 ) {
ff32e16e 6098 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
158142c2 6099 }
0f605c88
LV
6100 return packFloatx80(aSign, floatx80_infinity_high,
6101 floatx80_infinity_low);
158142c2
FB
6102 }
6103 if ( aExp == 0 ) {
6104 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6105 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6106 }
6107 else {
6108 aSig0 |= LIT64( 0x0001000000000000 );
6109 }
6110 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6111 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6112
6113}
6114
158142c2
FB
6115/*----------------------------------------------------------------------------
6116| Rounds the quadruple-precision floating-point value `a' to an integer, and
6117| returns the result as a quadruple-precision floating-point value. The
6118| operation is performed according to the IEC/IEEE Standard for Binary
6119| Floating-Point Arithmetic.
6120*----------------------------------------------------------------------------*/
6121
e5a41ffa 6122float128 float128_round_to_int(float128 a, float_status *status)
158142c2
FB
6123{
6124 flag aSign;
f4014512 6125 int32_t aExp;
bb98fe42 6126 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
6127 float128 z;
6128
6129 aExp = extractFloat128Exp( a );
6130 if ( 0x402F <= aExp ) {
6131 if ( 0x406F <= aExp ) {
6132 if ( ( aExp == 0x7FFF )
6133 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6134 ) {
ff32e16e 6135 return propagateFloat128NaN(a, a, status);
158142c2
FB
6136 }
6137 return a;
6138 }
6139 lastBitMask = 1;
6140 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6141 roundBitsMask = lastBitMask - 1;
6142 z = a;
a2f2d288 6143 switch (status->float_rounding_mode) {
dc355b76 6144 case float_round_nearest_even:
158142c2
FB
6145 if ( lastBitMask ) {
6146 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6147 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6148 }
6149 else {
bb98fe42 6150 if ( (int64_t) z.low < 0 ) {
158142c2 6151 ++z.high;
bb98fe42 6152 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
6153 }
6154 }
dc355b76 6155 break;
f9288a76
PM
6156 case float_round_ties_away:
6157 if (lastBitMask) {
6158 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6159 } else {
6160 if ((int64_t) z.low < 0) {
6161 ++z.high;
6162 }
6163 }
6164 break;
dc355b76
PM
6165 case float_round_to_zero:
6166 break;
6167 case float_round_up:
6168 if (!extractFloat128Sign(z)) {
6169 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6170 }
6171 break;
6172 case float_round_down:
6173 if (extractFloat128Sign(z)) {
6174 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 6175 }
dc355b76
PM
6176 break;
6177 default:
6178 abort();
158142c2
FB
6179 }
6180 z.low &= ~ roundBitsMask;
6181 }
6182 else {
6183 if ( aExp < 0x3FFF ) {
bb98fe42 6184 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
a2f2d288 6185 status->float_exception_flags |= float_flag_inexact;
158142c2 6186 aSign = extractFloat128Sign( a );
a2f2d288 6187 switch (status->float_rounding_mode) {
158142c2
FB
6188 case float_round_nearest_even:
6189 if ( ( aExp == 0x3FFE )
6190 && ( extractFloat128Frac0( a )
6191 | extractFloat128Frac1( a ) )
6192 ) {
6193 return packFloat128( aSign, 0x3FFF, 0, 0 );
6194 }
6195 break;
f9288a76
PM
6196 case float_round_ties_away:
6197 if (aExp == 0x3FFE) {
6198 return packFloat128(aSign, 0x3FFF, 0, 0);
6199 }
6200 break;
158142c2
FB
6201 case float_round_down:
6202 return
6203 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6204 : packFloat128( 0, 0, 0, 0 );
6205 case float_round_up:
6206 return
6207 aSign ? packFloat128( 1, 0, 0, 0 )
6208 : packFloat128( 0, 0x3FFF, 0, 0 );
6209 }
6210 return packFloat128( aSign, 0, 0, 0 );
6211 }
6212 lastBitMask = 1;
6213 lastBitMask <<= 0x402F - aExp;
6214 roundBitsMask = lastBitMask - 1;
6215 z.low = 0;
6216 z.high = a.high;
a2f2d288 6217 switch (status->float_rounding_mode) {
dc355b76 6218 case float_round_nearest_even:
158142c2
FB
6219 z.high += lastBitMask>>1;
6220 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
6221 z.high &= ~ lastBitMask;
6222 }
dc355b76 6223 break;
f9288a76
PM
6224 case float_round_ties_away:
6225 z.high += lastBitMask>>1;
6226 break;
dc355b76
PM
6227 case float_round_to_zero:
6228 break;
6229 case float_round_up:
6230 if (!extractFloat128Sign(z)) {
158142c2
FB
6231 z.high |= ( a.low != 0 );
6232 z.high += roundBitsMask;
6233 }
dc355b76
PM
6234 break;
6235 case float_round_down:
6236 if (extractFloat128Sign(z)) {
6237 z.high |= (a.low != 0);
6238 z.high += roundBitsMask;
6239 }
6240 break;
6241 default:
6242 abort();
158142c2
FB
6243 }
6244 z.high &= ~ roundBitsMask;
6245 }
6246 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
a2f2d288 6247 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6248 }
6249 return z;
6250
6251}
6252
6253/*----------------------------------------------------------------------------
6254| Returns the result of adding the absolute values of the quadruple-precision
6255| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
6256| before being returned. `zSign' is ignored if the result is a NaN.
6257| The addition is performed according to the IEC/IEEE Standard for Binary
6258| Floating-Point Arithmetic.
6259*----------------------------------------------------------------------------*/
6260
e5a41ffa
PM
6261static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
6262 float_status *status)
158142c2 6263{
f4014512 6264 int32_t aExp, bExp, zExp;
bb98fe42 6265 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 6266 int32_t expDiff;
158142c2
FB
6267
6268 aSig1 = extractFloat128Frac1( a );
6269 aSig0 = extractFloat128Frac0( a );
6270 aExp = extractFloat128Exp( a );
6271 bSig1 = extractFloat128Frac1( b );
6272 bSig0 = extractFloat128Frac0( b );
6273 bExp = extractFloat128Exp( b );
6274 expDiff = aExp - bExp;
6275 if ( 0 < expDiff ) {
6276 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6277 if (aSig0 | aSig1) {
6278 return propagateFloat128NaN(a, b, status);
6279 }
158142c2
FB
6280 return a;
6281 }
6282 if ( bExp == 0 ) {
6283 --expDiff;
6284 }
6285 else {
6286 bSig0 |= LIT64( 0x0001000000000000 );
6287 }
6288 shift128ExtraRightJamming(
6289 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
6290 zExp = aExp;
6291 }
6292 else if ( expDiff < 0 ) {
6293 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6294 if (bSig0 | bSig1) {
6295 return propagateFloat128NaN(a, b, status);
6296 }
158142c2
FB
6297 return packFloat128( zSign, 0x7FFF, 0, 0 );
6298 }
6299 if ( aExp == 0 ) {
6300 ++expDiff;
6301 }
6302 else {
6303 aSig0 |= LIT64( 0x0001000000000000 );
6304 }
6305 shift128ExtraRightJamming(
6306 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
6307 zExp = bExp;
6308 }
6309 else {
6310 if ( aExp == 0x7FFF ) {
6311 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 6312 return propagateFloat128NaN(a, b, status);
158142c2
FB
6313 }
6314 return a;
6315 }
6316 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 6317 if ( aExp == 0 ) {
a2f2d288 6318 if (status->flush_to_zero) {
e6afc87f 6319 if (zSig0 | zSig1) {
ff32e16e 6320 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
6321 }
6322 return packFloat128(zSign, 0, 0, 0);
6323 }
fe76d976
PB
6324 return packFloat128( zSign, 0, zSig0, zSig1 );
6325 }
158142c2
FB
6326 zSig2 = 0;
6327 zSig0 |= LIT64( 0x0002000000000000 );
6328 zExp = aExp;
6329 goto shiftRight1;
6330 }
6331 aSig0 |= LIT64( 0x0001000000000000 );
6332 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6333 --zExp;
6334 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
6335 ++zExp;
6336 shiftRight1:
6337 shift128ExtraRightJamming(
6338 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6339 roundAndPack:
ff32e16e 6340 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6341
6342}
6343
6344/*----------------------------------------------------------------------------
6345| Returns the result of subtracting the absolute values of the quadruple-
6346| precision floating-point values `a' and `b'. If `zSign' is 1, the
6347| difference is negated before being returned. `zSign' is ignored if the
6348| result is a NaN. The subtraction is performed according to the IEC/IEEE
6349| Standard for Binary Floating-Point Arithmetic.
6350*----------------------------------------------------------------------------*/
6351
e5a41ffa
PM
6352static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
6353 float_status *status)
158142c2 6354{
f4014512 6355 int32_t aExp, bExp, zExp;
bb98fe42 6356 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 6357 int32_t expDiff;
158142c2
FB
6358
6359 aSig1 = extractFloat128Frac1( a );
6360 aSig0 = extractFloat128Frac0( a );
6361 aExp = extractFloat128Exp( a );
6362 bSig1 = extractFloat128Frac1( b );
6363 bSig0 = extractFloat128Frac0( b );
6364 bExp = extractFloat128Exp( b );
6365 expDiff = aExp - bExp;
6366 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6367 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
6368 if ( 0 < expDiff ) goto aExpBigger;
6369 if ( expDiff < 0 ) goto bExpBigger;
6370 if ( aExp == 0x7FFF ) {
6371 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 6372 return propagateFloat128NaN(a, b, status);
158142c2 6373 }
ff32e16e 6374 float_raise(float_flag_invalid, status);
af39bc8c 6375 return float128_default_nan(status);
158142c2
FB
6376 }
6377 if ( aExp == 0 ) {
6378 aExp = 1;
6379 bExp = 1;
6380 }
6381 if ( bSig0 < aSig0 ) goto aBigger;
6382 if ( aSig0 < bSig0 ) goto bBigger;
6383 if ( bSig1 < aSig1 ) goto aBigger;
6384 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
6385 return packFloat128(status->float_rounding_mode == float_round_down,
6386 0, 0, 0);
158142c2
FB
6387 bExpBigger:
6388 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6389 if (bSig0 | bSig1) {
6390 return propagateFloat128NaN(a, b, status);
6391 }
158142c2
FB
6392 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
6393 }
6394 if ( aExp == 0 ) {
6395 ++expDiff;
6396 }
6397 else {
6398 aSig0 |= LIT64( 0x4000000000000000 );
6399 }
6400 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6401 bSig0 |= LIT64( 0x4000000000000000 );
6402 bBigger:
6403 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
6404 zExp = bExp;
6405 zSign ^= 1;
6406 goto normalizeRoundAndPack;
6407 aExpBigger:
6408 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6409 if (aSig0 | aSig1) {
6410 return propagateFloat128NaN(a, b, status);
6411 }
158142c2
FB
6412 return a;
6413 }
6414 if ( bExp == 0 ) {
6415 --expDiff;
6416 }
6417 else {
6418 bSig0 |= LIT64( 0x4000000000000000 );
6419 }
6420 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
6421 aSig0 |= LIT64( 0x4000000000000000 );
6422 aBigger:
6423 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6424 zExp = aExp;
6425 normalizeRoundAndPack:
6426 --zExp;
ff32e16e
PM
6427 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
6428 status);
158142c2
FB
6429
6430}
6431
6432/*----------------------------------------------------------------------------
6433| Returns the result of adding the quadruple-precision floating-point values
6434| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
6435| for Binary Floating-Point Arithmetic.
6436*----------------------------------------------------------------------------*/
6437
e5a41ffa 6438float128 float128_add(float128 a, float128 b, float_status *status)
158142c2
FB
6439{
6440 flag aSign, bSign;
6441
6442 aSign = extractFloat128Sign( a );
6443 bSign = extractFloat128Sign( b );
6444 if ( aSign == bSign ) {
ff32e16e 6445 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6446 }
6447 else {
ff32e16e 6448 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6449 }
6450
6451}
6452
6453/*----------------------------------------------------------------------------
6454| Returns the result of subtracting the quadruple-precision floating-point
6455| values `a' and `b'. The operation is performed according to the IEC/IEEE
6456| Standard for Binary Floating-Point Arithmetic.
6457*----------------------------------------------------------------------------*/
6458
e5a41ffa 6459float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2
FB
6460{
6461 flag aSign, bSign;
6462
6463 aSign = extractFloat128Sign( a );
6464 bSign = extractFloat128Sign( b );
6465 if ( aSign == bSign ) {
ff32e16e 6466 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6467 }
6468 else {
ff32e16e 6469 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6470 }
6471
6472}
6473
6474/*----------------------------------------------------------------------------
6475| Returns the result of multiplying the quadruple-precision floating-point
6476| values `a' and `b'. The operation is performed according to the IEC/IEEE
6477| Standard for Binary Floating-Point Arithmetic.
6478*----------------------------------------------------------------------------*/
6479
e5a41ffa 6480float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2
FB
6481{
6482 flag aSign, bSign, zSign;
f4014512 6483 int32_t aExp, bExp, zExp;
bb98fe42 6484 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
6485
6486 aSig1 = extractFloat128Frac1( a );
6487 aSig0 = extractFloat128Frac0( a );
6488 aExp = extractFloat128Exp( a );
6489 aSign = extractFloat128Sign( a );
6490 bSig1 = extractFloat128Frac1( b );
6491 bSig0 = extractFloat128Frac0( b );
6492 bExp = extractFloat128Exp( b );
6493 bSign = extractFloat128Sign( b );
6494 zSign = aSign ^ bSign;
6495 if ( aExp == 0x7FFF ) {
6496 if ( ( aSig0 | aSig1 )
6497 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6498 return propagateFloat128NaN(a, b, status);
158142c2
FB
6499 }
6500 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
6501 return packFloat128( zSign, 0x7FFF, 0, 0 );
6502 }
6503 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6504 if (bSig0 | bSig1) {
6505 return propagateFloat128NaN(a, b, status);
6506 }
158142c2
FB
6507 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6508 invalid:
ff32e16e 6509 float_raise(float_flag_invalid, status);
af39bc8c 6510 return float128_default_nan(status);
158142c2
FB
6511 }
6512 return packFloat128( zSign, 0x7FFF, 0, 0 );
6513 }
6514 if ( aExp == 0 ) {
6515 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6516 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6517 }
6518 if ( bExp == 0 ) {
6519 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6520 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6521 }
6522 zExp = aExp + bExp - 0x4000;
6523 aSig0 |= LIT64( 0x0001000000000000 );
6524 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
6525 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
6526 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
6527 zSig2 |= ( zSig3 != 0 );
6528 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
6529 shift128ExtraRightJamming(
6530 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6531 ++zExp;
6532 }
ff32e16e 6533 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6534
6535}
6536
6537/*----------------------------------------------------------------------------
6538| Returns the result of dividing the quadruple-precision floating-point value
6539| `a' by the corresponding value `b'. The operation is performed according to
6540| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6541*----------------------------------------------------------------------------*/
6542
e5a41ffa 6543float128 float128_div(float128 a, float128 b, float_status *status)
158142c2
FB
6544{
6545 flag aSign, bSign, zSign;
f4014512 6546 int32_t aExp, bExp, zExp;
bb98fe42
AF
6547 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6548 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6549
6550 aSig1 = extractFloat128Frac1( a );
6551 aSig0 = extractFloat128Frac0( a );
6552 aExp = extractFloat128Exp( a );
6553 aSign = extractFloat128Sign( a );
6554 bSig1 = extractFloat128Frac1( b );
6555 bSig0 = extractFloat128Frac0( b );
6556 bExp = extractFloat128Exp( b );
6557 bSign = extractFloat128Sign( b );
6558 zSign = aSign ^ bSign;
6559 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6560 if (aSig0 | aSig1) {
6561 return propagateFloat128NaN(a, b, status);
6562 }
158142c2 6563 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6564 if (bSig0 | bSig1) {
6565 return propagateFloat128NaN(a, b, status);
6566 }
158142c2
FB
6567 goto invalid;
6568 }
6569 return packFloat128( zSign, 0x7FFF, 0, 0 );
6570 }
6571 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6572 if (bSig0 | bSig1) {
6573 return propagateFloat128NaN(a, b, status);
6574 }
158142c2
FB
6575 return packFloat128( zSign, 0, 0, 0 );
6576 }
6577 if ( bExp == 0 ) {
6578 if ( ( bSig0 | bSig1 ) == 0 ) {
6579 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6580 invalid:
ff32e16e 6581 float_raise(float_flag_invalid, status);
af39bc8c 6582 return float128_default_nan(status);
158142c2 6583 }
ff32e16e 6584 float_raise(float_flag_divbyzero, status);
158142c2
FB
6585 return packFloat128( zSign, 0x7FFF, 0, 0 );
6586 }
6587 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6588 }
6589 if ( aExp == 0 ) {
6590 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6591 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6592 }
6593 zExp = aExp - bExp + 0x3FFD;
6594 shortShift128Left(
6595 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
6596 shortShift128Left(
6597 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6598 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
6599 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
6600 ++zExp;
6601 }
6602 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
6603 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
6604 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 6605 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6606 --zSig0;
6607 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
6608 }
6609 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
6610 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
6611 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
6612 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6613 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6614 --zSig1;
6615 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
6616 }
6617 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6618 }
6619 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 6620 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6621
6622}
6623
6624/*----------------------------------------------------------------------------
6625| Returns the remainder of the quadruple-precision floating-point value `a'
6626| with respect to the corresponding value `b'. The operation is performed
6627| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6628*----------------------------------------------------------------------------*/
6629
e5a41ffa 6630float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 6631{
ed086f3d 6632 flag aSign, zSign;
f4014512 6633 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6634 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6635 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6636 int64_t sigMean0;
158142c2
FB
6637
6638 aSig1 = extractFloat128Frac1( a );
6639 aSig0 = extractFloat128Frac0( a );
6640 aExp = extractFloat128Exp( a );
6641 aSign = extractFloat128Sign( a );
6642 bSig1 = extractFloat128Frac1( b );
6643 bSig0 = extractFloat128Frac0( b );
6644 bExp = extractFloat128Exp( b );
158142c2
FB
6645 if ( aExp == 0x7FFF ) {
6646 if ( ( aSig0 | aSig1 )
6647 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6648 return propagateFloat128NaN(a, b, status);
158142c2
FB
6649 }
6650 goto invalid;
6651 }
6652 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6653 if (bSig0 | bSig1) {
6654 return propagateFloat128NaN(a, b, status);
6655 }
158142c2
FB
6656 return a;
6657 }
6658 if ( bExp == 0 ) {
6659 if ( ( bSig0 | bSig1 ) == 0 ) {
6660 invalid:
ff32e16e 6661 float_raise(float_flag_invalid, status);
af39bc8c 6662 return float128_default_nan(status);
158142c2
FB
6663 }
6664 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6665 }
6666 if ( aExp == 0 ) {
6667 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6668 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6669 }
6670 expDiff = aExp - bExp;
6671 if ( expDiff < -1 ) return a;
6672 shortShift128Left(
6673 aSig0 | LIT64( 0x0001000000000000 ),
6674 aSig1,
6675 15 - ( expDiff < 0 ),
6676 &aSig0,
6677 &aSig1
6678 );
6679 shortShift128Left(
6680 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6681 q = le128( bSig0, bSig1, aSig0, aSig1 );
6682 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6683 expDiff -= 64;
6684 while ( 0 < expDiff ) {
6685 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6686 q = ( 4 < q ) ? q - 4 : 0;
6687 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6688 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6689 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6690 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6691 expDiff -= 61;
6692 }
6693 if ( -64 < expDiff ) {
6694 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6695 q = ( 4 < q ) ? q - 4 : 0;
6696 q >>= - expDiff;
6697 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6698 expDiff += 52;
6699 if ( expDiff < 0 ) {
6700 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6701 }
6702 else {
6703 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6704 }
6705 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6706 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6707 }
6708 else {
6709 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6710 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6711 }
6712 do {
6713 alternateASig0 = aSig0;
6714 alternateASig1 = aSig1;
6715 ++q;
6716 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 6717 } while ( 0 <= (int64_t) aSig0 );
158142c2 6718 add128(
bb98fe42 6719 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
6720 if ( ( sigMean0 < 0 )
6721 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6722 aSig0 = alternateASig0;
6723 aSig1 = alternateASig1;
6724 }
bb98fe42 6725 zSign = ( (int64_t) aSig0 < 0 );
158142c2 6726 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
6727 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6728 status);
158142c2
FB
6729}
6730
6731/*----------------------------------------------------------------------------
6732| Returns the square root of the quadruple-precision floating-point value `a'.
6733| The operation is performed according to the IEC/IEEE Standard for Binary
6734| Floating-Point Arithmetic.
6735*----------------------------------------------------------------------------*/
6736
e5a41ffa 6737float128 float128_sqrt(float128 a, float_status *status)
158142c2
FB
6738{
6739 flag aSign;
f4014512 6740 int32_t aExp, zExp;
bb98fe42
AF
6741 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6742 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6743
6744 aSig1 = extractFloat128Frac1( a );
6745 aSig0 = extractFloat128Frac0( a );
6746 aExp = extractFloat128Exp( a );
6747 aSign = extractFloat128Sign( a );
6748 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6749 if (aSig0 | aSig1) {
6750 return propagateFloat128NaN(a, a, status);
6751 }
158142c2
FB
6752 if ( ! aSign ) return a;
6753 goto invalid;
6754 }
6755 if ( aSign ) {
6756 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6757 invalid:
ff32e16e 6758 float_raise(float_flag_invalid, status);
af39bc8c 6759 return float128_default_nan(status);
158142c2
FB
6760 }
6761 if ( aExp == 0 ) {
6762 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6763 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6764 }
6765 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6766 aSig0 |= LIT64( 0x0001000000000000 );
6767 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6768 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6769 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6770 doubleZSig0 = zSig0<<1;
6771 mul64To128( zSig0, zSig0, &term0, &term1 );
6772 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6773 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6774 --zSig0;
6775 doubleZSig0 -= 2;
6776 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6777 }
6778 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6779 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6780 if ( zSig1 == 0 ) zSig1 = 1;
6781 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6782 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6783 mul64To128( zSig1, zSig1, &term2, &term3 );
6784 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6785 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6786 --zSig1;
6787 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6788 term3 |= 1;
6789 term2 |= doubleZSig0;
6790 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6791 }
6792 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6793 }
6794 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 6795 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6796
6797}
6798
6799/*----------------------------------------------------------------------------
6800| Returns 1 if the quadruple-precision floating-point value `a' is equal to
b689362d
AJ
6801| the corresponding value `b', and 0 otherwise. The invalid exception is
6802| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
6803| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6804*----------------------------------------------------------------------------*/
6805
e5a41ffa 6806int float128_eq(float128 a, float128 b, float_status *status)
158142c2
FB
6807{
6808
6809 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6810 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6811 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6812 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6813 ) {
ff32e16e 6814 float_raise(float_flag_invalid, status);
158142c2
FB
6815 return 0;
6816 }
6817 return
6818 ( a.low == b.low )
6819 && ( ( a.high == b.high )
6820 || ( ( a.low == 0 )
bb98fe42 6821 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6822 );
6823
6824}
6825
6826/*----------------------------------------------------------------------------
6827| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6828| or equal to the corresponding value `b', and 0 otherwise. The invalid
6829| exception is raised if either operand is a NaN. The comparison is performed
6830| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6831*----------------------------------------------------------------------------*/
6832
e5a41ffa 6833int float128_le(float128 a, float128 b, float_status *status)
158142c2
FB
6834{
6835 flag aSign, bSign;
6836
6837 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6838 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6839 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6840 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6841 ) {
ff32e16e 6842 float_raise(float_flag_invalid, status);
158142c2
FB
6843 return 0;
6844 }
6845 aSign = extractFloat128Sign( a );
6846 bSign = extractFloat128Sign( b );
6847 if ( aSign != bSign ) {
6848 return
6849 aSign
bb98fe42 6850 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6851 == 0 );
6852 }
6853 return
6854 aSign ? le128( b.high, b.low, a.high, a.low )
6855 : le128( a.high, a.low, b.high, b.low );
6856
6857}
6858
6859/*----------------------------------------------------------------------------
6860| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6861| the corresponding value `b', and 0 otherwise. The invalid exception is
6862| raised if either operand is a NaN. The comparison is performed according
6863| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6864*----------------------------------------------------------------------------*/
6865
e5a41ffa 6866int float128_lt(float128 a, float128 b, float_status *status)
158142c2
FB
6867{
6868 flag aSign, bSign;
6869
6870 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6871 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6872 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6873 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6874 ) {
ff32e16e 6875 float_raise(float_flag_invalid, status);
158142c2
FB
6876 return 0;
6877 }
6878 aSign = extractFloat128Sign( a );
6879 bSign = extractFloat128Sign( b );
6880 if ( aSign != bSign ) {
6881 return
6882 aSign
bb98fe42 6883 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6884 != 0 );
6885 }
6886 return
6887 aSign ? lt128( b.high, b.low, a.high, a.low )
6888 : lt128( a.high, a.low, b.high, b.low );
6889
6890}
6891
67b7861d
AJ
6892/*----------------------------------------------------------------------------
6893| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
f5a64251
AJ
6894| be compared, and 0 otherwise. The invalid exception is raised if either
6895| operand is a NaN. The comparison is performed according to the IEC/IEEE
6896| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
6897*----------------------------------------------------------------------------*/
6898
e5a41ffa 6899int float128_unordered(float128 a, float128 b, float_status *status)
67b7861d
AJ
6900{
6901 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6902 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6903 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6904 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6905 ) {
ff32e16e 6906 float_raise(float_flag_invalid, status);
67b7861d
AJ
6907 return 1;
6908 }
6909 return 0;
6910}
6911
158142c2
FB
6912/*----------------------------------------------------------------------------
6913| Returns 1 if the quadruple-precision floating-point value `a' is equal to
f5a64251
AJ
6914| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6915| exception. The comparison is performed according to the IEC/IEEE Standard
6916| for Binary Floating-Point Arithmetic.
158142c2
FB
6917*----------------------------------------------------------------------------*/
6918
e5a41ffa 6919int float128_eq_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6920{
6921
6922 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6923 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6924 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6925 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6926 ) {
af39bc8c
AM
6927 if (float128_is_signaling_nan(a, status)
6928 || float128_is_signaling_nan(b, status)) {
ff32e16e 6929 float_raise(float_flag_invalid, status);
b689362d 6930 }
158142c2
FB
6931 return 0;
6932 }
6933 return
6934 ( a.low == b.low )
6935 && ( ( a.high == b.high )
6936 || ( ( a.low == 0 )
bb98fe42 6937 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6938 );
6939
6940}
6941
6942/*----------------------------------------------------------------------------
6943| Returns 1 if the quadruple-precision floating-point value `a' is less than
6944| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6945| cause an exception. Otherwise, the comparison is performed according to the
6946| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6947*----------------------------------------------------------------------------*/
6948
e5a41ffa 6949int float128_le_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6950{
6951 flag aSign, bSign;
6952
6953 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6954 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6955 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6956 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6957 ) {
af39bc8c
AM
6958 if (float128_is_signaling_nan(a, status)
6959 || float128_is_signaling_nan(b, status)) {
ff32e16e 6960 float_raise(float_flag_invalid, status);
158142c2
FB
6961 }
6962 return 0;
6963 }
6964 aSign = extractFloat128Sign( a );
6965 bSign = extractFloat128Sign( b );
6966 if ( aSign != bSign ) {
6967 return
6968 aSign
bb98fe42 6969 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6970 == 0 );
6971 }
6972 return
6973 aSign ? le128( b.high, b.low, a.high, a.low )
6974 : le128( a.high, a.low, b.high, b.low );
6975
6976}
6977
6978/*----------------------------------------------------------------------------
6979| Returns 1 if the quadruple-precision floating-point value `a' is less than
6980| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6981| exception. Otherwise, the comparison is performed according to the IEC/IEEE
6982| Standard for Binary Floating-Point Arithmetic.
6983*----------------------------------------------------------------------------*/
6984
e5a41ffa 6985int float128_lt_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6986{
6987 flag aSign, bSign;
6988
6989 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6990 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6991 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6992 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6993 ) {
af39bc8c
AM
6994 if (float128_is_signaling_nan(a, status)
6995 || float128_is_signaling_nan(b, status)) {
ff32e16e 6996 float_raise(float_flag_invalid, status);
158142c2
FB
6997 }
6998 return 0;
6999 }
7000 aSign = extractFloat128Sign( a );
7001 bSign = extractFloat128Sign( b );
7002 if ( aSign != bSign ) {
7003 return
7004 aSign
bb98fe42 7005 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7006 != 0 );
7007 }
7008 return
7009 aSign ? lt128( b.high, b.low, a.high, a.low )
7010 : lt128( a.high, a.low, b.high, b.low );
7011
7012}
7013
67b7861d
AJ
7014/*----------------------------------------------------------------------------
7015| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
7016| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
7017| comparison is performed according to the IEC/IEEE Standard for Binary
7018| Floating-Point Arithmetic.
7019*----------------------------------------------------------------------------*/
7020
e5a41ffa 7021int float128_unordered_quiet(float128 a, float128 b, float_status *status)
67b7861d
AJ
7022{
7023 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7024 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7025 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7026 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7027 ) {
af39bc8c
AM
7028 if (float128_is_signaling_nan(a, status)
7029 || float128_is_signaling_nan(b, status)) {
ff32e16e 7030 float_raise(float_flag_invalid, status);
67b7861d
AJ
7031 }
7032 return 1;
7033 }
7034 return 0;
7035}
7036
e5a41ffa
PM
7037static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
7038 int is_quiet, float_status *status)
f6714d36
AJ
7039{
7040 flag aSign, bSign;
7041
d1eb8f2a
AD
7042 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7043 float_raise(float_flag_invalid, status);
7044 return float_relation_unordered;
7045 }
f6714d36
AJ
7046 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7047 ( extractFloatx80Frac( a )<<1 ) ) ||
7048 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7049 ( extractFloatx80Frac( b )<<1 ) )) {
7050 if (!is_quiet ||
af39bc8c
AM
7051 floatx80_is_signaling_nan(a, status) ||
7052 floatx80_is_signaling_nan(b, status)) {
ff32e16e 7053 float_raise(float_flag_invalid, status);
f6714d36
AJ
7054 }
7055 return float_relation_unordered;
7056 }
7057 aSign = extractFloatx80Sign( a );
7058 bSign = extractFloatx80Sign( b );
7059 if ( aSign != bSign ) {
7060
7061 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7062 ( ( a.low | b.low ) == 0 ) ) {
7063 /* zero case */
7064 return float_relation_equal;
7065 } else {
7066 return 1 - (2 * aSign);
7067 }
7068 } else {
7069 if (a.low == b.low && a.high == b.high) {
7070 return float_relation_equal;
7071 } else {
7072 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7073 }
7074 }
7075}
7076
e5a41ffa 7077int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 7078{
ff32e16e 7079 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
7080}
7081
e5a41ffa 7082int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
f6714d36 7083{
ff32e16e 7084 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
7085}
7086
e5a41ffa
PM
7087static inline int float128_compare_internal(float128 a, float128 b,
7088 int is_quiet, float_status *status)
1f587329
BS
7089{
7090 flag aSign, bSign;
7091
7092 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7093 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7094 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7095 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7096 if (!is_quiet ||
af39bc8c
AM
7097 float128_is_signaling_nan(a, status) ||
7098 float128_is_signaling_nan(b, status)) {
ff32e16e 7099 float_raise(float_flag_invalid, status);
1f587329
BS
7100 }
7101 return float_relation_unordered;
7102 }
7103 aSign = extractFloat128Sign( a );
7104 bSign = extractFloat128Sign( b );
7105 if ( aSign != bSign ) {
7106 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7107 /* zero case */
7108 return float_relation_equal;
7109 } else {
7110 return 1 - (2 * aSign);
7111 }
7112 } else {
7113 if (a.low == b.low && a.high == b.high) {
7114 return float_relation_equal;
7115 } else {
7116 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7117 }
7118 }
7119}
7120
e5a41ffa 7121int float128_compare(float128 a, float128 b, float_status *status)
1f587329 7122{
ff32e16e 7123 return float128_compare_internal(a, b, 0, status);
1f587329
BS
7124}
7125
e5a41ffa 7126int float128_compare_quiet(float128 a, float128 b, float_status *status)
1f587329 7127{
ff32e16e 7128 return float128_compare_internal(a, b, 1, status);
1f587329
BS
7129}
7130
e5a41ffa 7131floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb
PB
7132{
7133 flag aSign;
326b9e98 7134 int32_t aExp;
bb98fe42 7135 uint64_t aSig;
9ee6e8bb 7136
d1eb8f2a
AD
7137 if (floatx80_invalid_encoding(a)) {
7138 float_raise(float_flag_invalid, status);
7139 return floatx80_default_nan(status);
7140 }
9ee6e8bb
PB
7141 aSig = extractFloatx80Frac( a );
7142 aExp = extractFloatx80Exp( a );
7143 aSign = extractFloatx80Sign( a );
7144
326b9e98
AJ
7145 if ( aExp == 0x7FFF ) {
7146 if ( aSig<<1 ) {
ff32e16e 7147 return propagateFloatx80NaN(a, a, status);
326b9e98 7148 }
9ee6e8bb
PB
7149 return a;
7150 }
326b9e98 7151
3c85c37f
PM
7152 if (aExp == 0) {
7153 if (aSig == 0) {
7154 return a;
7155 }
7156 aExp++;
7157 }
69397542 7158
326b9e98
AJ
7159 if (n > 0x10000) {
7160 n = 0x10000;
7161 } else if (n < -0x10000) {
7162 n = -0x10000;
7163 }
7164
9ee6e8bb 7165 aExp += n;
a2f2d288
PM
7166 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7167 aSign, aExp, aSig, 0, status);
9ee6e8bb 7168}
9ee6e8bb 7169
e5a41ffa 7170float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb
PB
7171{
7172 flag aSign;
326b9e98 7173 int32_t aExp;
bb98fe42 7174 uint64_t aSig0, aSig1;
9ee6e8bb
PB
7175
7176 aSig1 = extractFloat128Frac1( a );
7177 aSig0 = extractFloat128Frac0( a );
7178 aExp = extractFloat128Exp( a );
7179 aSign = extractFloat128Sign( a );
7180 if ( aExp == 0x7FFF ) {
326b9e98 7181 if ( aSig0 | aSig1 ) {
ff32e16e 7182 return propagateFloat128NaN(a, a, status);
326b9e98 7183 }
9ee6e8bb
PB
7184 return a;
7185 }
3c85c37f 7186 if (aExp != 0) {
69397542 7187 aSig0 |= LIT64( 0x0001000000000000 );
3c85c37f 7188 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 7189 return a;
3c85c37f
PM
7190 } else {
7191 aExp++;
7192 }
69397542 7193
326b9e98
AJ
7194 if (n > 0x10000) {
7195 n = 0x10000;
7196 } else if (n < -0x10000) {
7197 n = -0x10000;
7198 }
7199
69397542
PB
7200 aExp += n - 1;
7201 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 7202 , status);
9ee6e8bb
PB
7203
7204}
This page took 1.757984 seconds and 4 git commands to generate.