]> Git Repo - qemu.git/blame - fpu/softfloat.c
tests: make docker.py check for persistent configs
[qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 s->float_exception_flags |= float_flag_input_denormal; \
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
342/* Note: @fast_test and @post can be NULL */
343static inline float32
344float32_gen2(float32 xa, float32 xb, float_status *s,
345 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
346 f32_check_fn pre, f32_check_fn post,
347 f32_check_fn fast_test, soft_f32_op2_fn fast_op)
348{
349 union_float32 ua, ub, ur;
350
351 ua.s = xa;
352 ub.s = xb;
353
354 if (unlikely(!can_use_fpu(s))) {
355 goto soft;
356 }
357
358 float32_input_flush2(&ua.s, &ub.s, s);
359 if (unlikely(!pre(ua, ub))) {
360 goto soft;
361 }
362 if (fast_test && fast_test(ua, ub)) {
363 return fast_op(ua.s, ub.s, s);
364 }
365
366 ur.h = hard(ua.h, ub.h);
367 if (unlikely(f32_is_inf(ur))) {
368 s->float_exception_flags |= float_flag_overflow;
369 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
370 if (post == NULL || post(ua, ub)) {
371 goto soft;
372 }
373 }
374 return ur.s;
375
376 soft:
377 return soft(ua.s, ub.s, s);
378}
379
380static inline float64
381float64_gen2(float64 xa, float64 xb, float_status *s,
382 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
383 f64_check_fn pre, f64_check_fn post,
384 f64_check_fn fast_test, soft_f64_op2_fn fast_op)
385{
386 union_float64 ua, ub, ur;
387
388 ua.s = xa;
389 ub.s = xb;
390
391 if (unlikely(!can_use_fpu(s))) {
392 goto soft;
393 }
394
395 float64_input_flush2(&ua.s, &ub.s, s);
396 if (unlikely(!pre(ua, ub))) {
397 goto soft;
398 }
399 if (fast_test && fast_test(ua, ub)) {
400 return fast_op(ua.s, ub.s, s);
401 }
402
403 ur.h = hard(ua.h, ub.h);
404 if (unlikely(f64_is_inf(ur))) {
405 s->float_exception_flags |= float_flag_overflow;
406 } else if (unlikely(fabs(ur.h) <= DBL_MIN)) {
407 if (post == NULL || post(ua, ub)) {
408 goto soft;
409 }
410 }
411 return ur.s;
412
413 soft:
414 return soft(ua.s, ub.s, s);
415}
416
bb4d4bb3
PM
417/*----------------------------------------------------------------------------
418| Returns the fraction bits of the half-precision floating-point value `a'.
419*----------------------------------------------------------------------------*/
420
a49db98d 421static inline uint32_t extractFloat16Frac(float16 a)
bb4d4bb3
PM
422{
423 return float16_val(a) & 0x3ff;
424}
425
426/*----------------------------------------------------------------------------
427| Returns the exponent bits of the half-precision floating-point value `a'.
428*----------------------------------------------------------------------------*/
429
0c48262d 430static inline int extractFloat16Exp(float16 a)
bb4d4bb3
PM
431{
432 return (float16_val(a) >> 10) & 0x1f;
433}
434
d97544c9
AB
435/*----------------------------------------------------------------------------
436| Returns the fraction bits of the single-precision floating-point value `a'.
437*----------------------------------------------------------------------------*/
438
439static inline uint32_t extractFloat32Frac(float32 a)
440{
441 return float32_val(a) & 0x007FFFFF;
442}
443
444/*----------------------------------------------------------------------------
445| Returns the exponent bits of the single-precision floating-point value `a'.
446*----------------------------------------------------------------------------*/
447
448static inline int extractFloat32Exp(float32 a)
449{
450 return (float32_val(a) >> 23) & 0xFF;
451}
452
453/*----------------------------------------------------------------------------
454| Returns the sign bit of the single-precision floating-point value `a'.
455*----------------------------------------------------------------------------*/
456
457static inline flag extractFloat32Sign(float32 a)
458{
459 return float32_val(a) >> 31;
460}
461
462/*----------------------------------------------------------------------------
463| Returns the fraction bits of the double-precision floating-point value `a'.
464*----------------------------------------------------------------------------*/
465
466static inline uint64_t extractFloat64Frac(float64 a)
467{
468 return float64_val(a) & LIT64(0x000FFFFFFFFFFFFF);
469}
470
471/*----------------------------------------------------------------------------
472| Returns the exponent bits of the double-precision floating-point value `a'.
473*----------------------------------------------------------------------------*/
474
475static inline int extractFloat64Exp(float64 a)
476{
477 return (float64_val(a) >> 52) & 0x7FF;
478}
479
480/*----------------------------------------------------------------------------
481| Returns the sign bit of the double-precision floating-point value `a'.
482*----------------------------------------------------------------------------*/
483
484static inline flag extractFloat64Sign(float64 a)
485{
486 return float64_val(a) >> 63;
487}
488
a90119b5
AB
489/*
490 * Classify a floating point number. Everything above float_class_qnan
491 * is a NaN so cls >= float_class_qnan is any NaN.
492 */
493
494typedef enum __attribute__ ((__packed__)) {
495 float_class_unclassified,
496 float_class_zero,
497 float_class_normal,
498 float_class_inf,
499 float_class_qnan, /* all NaNs from here */
500 float_class_snan,
a90119b5
AB
501} FloatClass;
502
247d1f21
RH
503/* Simple helpers for checking if, or what kind of, NaN we have */
504static inline __attribute__((unused)) bool is_nan(FloatClass c)
505{
506 return unlikely(c >= float_class_qnan);
507}
508
509static inline __attribute__((unused)) bool is_snan(FloatClass c)
510{
511 return c == float_class_snan;
512}
513
514static inline __attribute__((unused)) bool is_qnan(FloatClass c)
515{
516 return c == float_class_qnan;
517}
518
a90119b5
AB
519/*
520 * Structure holding all of the decomposed parts of a float. The
521 * exponent is unbiased and the fraction is normalized. All
522 * calculations are done with a 64 bit fraction and then rounded as
523 * appropriate for the final format.
524 *
525 * Thanks to the packed FloatClass a decent compiler should be able to
526 * fit the whole structure into registers and avoid using the stack
527 * for parameter passing.
528 */
529
530typedef struct {
531 uint64_t frac;
532 int32_t exp;
533 FloatClass cls;
534 bool sign;
535} FloatParts;
536
537#define DECOMPOSED_BINARY_POINT (64 - 2)
538#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
539#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
540
541/* Structure holding all of the relevant parameters for a format.
542 * exp_size: the size of the exponent field
543 * exp_bias: the offset applied to the exponent field
544 * exp_max: the maximum normalised exponent
545 * frac_size: the size of the fraction field
546 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
547 * The following are computed based the size of fraction
548 * frac_lsb: least significant bit of fraction
ca3a3d5a 549 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 550 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
551 * The following optional modifiers are available:
552 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
553 */
554typedef struct {
555 int exp_size;
556 int exp_bias;
557 int exp_max;
558 int frac_size;
559 int frac_shift;
560 uint64_t frac_lsb;
561 uint64_t frac_lsbm1;
562 uint64_t round_mask;
563 uint64_t roundeven_mask;
ca3a3d5a 564 bool arm_althp;
a90119b5
AB
565} FloatFmt;
566
567/* Expand fields based on the size of exponent and fraction */
568#define FLOAT_PARAMS(E, F) \
569 .exp_size = E, \
570 .exp_bias = ((1 << E) - 1) >> 1, \
571 .exp_max = (1 << E) - 1, \
572 .frac_size = F, \
573 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
574 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
575 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
576 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
577 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
578
579static const FloatFmt float16_params = {
580 FLOAT_PARAMS(5, 10)
581};
582
6fed16b2
AB
583static const FloatFmt float16_params_ahp = {
584 FLOAT_PARAMS(5, 10),
585 .arm_althp = true
586};
587
a90119b5
AB
588static const FloatFmt float32_params = {
589 FLOAT_PARAMS(8, 23)
590};
591
592static const FloatFmt float64_params = {
593 FLOAT_PARAMS(11, 52)
594};
595
6fff2167
AB
596/* Unpack a float to parts, but do not canonicalize. */
597static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
598{
599 const int sign_pos = fmt.frac_size + fmt.exp_size;
600
601 return (FloatParts) {
602 .cls = float_class_unclassified,
603 .sign = extract64(raw, sign_pos, 1),
604 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
605 .frac = extract64(raw, 0, fmt.frac_size),
606 };
607}
608
609static inline FloatParts float16_unpack_raw(float16 f)
610{
611 return unpack_raw(float16_params, f);
612}
613
614static inline FloatParts float32_unpack_raw(float32 f)
615{
616 return unpack_raw(float32_params, f);
617}
618
619static inline FloatParts float64_unpack_raw(float64 f)
620{
621 return unpack_raw(float64_params, f);
622}
623
624/* Pack a float from parts, but do not canonicalize. */
625static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
626{
627 const int sign_pos = fmt.frac_size + fmt.exp_size;
628 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
629 return deposit64(ret, sign_pos, 1, p.sign);
630}
631
632static inline float16 float16_pack_raw(FloatParts p)
633{
634 return make_float16(pack_raw(float16_params, p));
635}
636
637static inline float32 float32_pack_raw(FloatParts p)
638{
639 return make_float32(pack_raw(float32_params, p));
640}
641
642static inline float64 float64_pack_raw(FloatParts p)
643{
644 return make_float64(pack_raw(float64_params, p));
645}
646
0664335a
RH
647/*----------------------------------------------------------------------------
648| Functions and definitions to determine: (1) whether tininess for underflow
649| is detected before or after rounding by default, (2) what (if anything)
650| happens when exceptions are raised, (3) how signaling NaNs are distinguished
651| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
652| are propagated from function inputs to output. These details are target-
653| specific.
654*----------------------------------------------------------------------------*/
655#include "softfloat-specialize.h"
656
6fff2167 657/* Canonicalize EXP and FRAC, setting CLS. */
f9943c7f
EC
658static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
659 float_status *status)
6fff2167 660{
ca3a3d5a 661 if (part.exp == parm->exp_max && !parm->arm_althp) {
6fff2167
AB
662 if (part.frac == 0) {
663 part.cls = float_class_inf;
664 } else {
94933df0 665 part.frac <<= parm->frac_shift;
298b468e
RH
666 part.cls = (parts_is_snan_frac(part.frac, status)
667 ? float_class_snan : float_class_qnan);
6fff2167
AB
668 }
669 } else if (part.exp == 0) {
670 if (likely(part.frac == 0)) {
671 part.cls = float_class_zero;
672 } else if (status->flush_inputs_to_zero) {
673 float_raise(float_flag_input_denormal, status);
674 part.cls = float_class_zero;
675 part.frac = 0;
676 } else {
677 int shift = clz64(part.frac) - 1;
678 part.cls = float_class_normal;
679 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
680 part.frac <<= shift;
681 }
682 } else {
683 part.cls = float_class_normal;
684 part.exp -= parm->exp_bias;
685 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
686 }
687 return part;
688}
689
690/* Round and uncanonicalize a floating-point number by parts. There
691 * are FRAC_SHIFT bits that may require rounding at the bottom of the
692 * fraction; these bits will be removed. The exponent will be biased
693 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
694 */
695
696static FloatParts round_canonical(FloatParts p, float_status *s,
697 const FloatFmt *parm)
698{
699 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
700 const uint64_t round_mask = parm->round_mask;
701 const uint64_t roundeven_mask = parm->roundeven_mask;
702 const int exp_max = parm->exp_max;
703 const int frac_shift = parm->frac_shift;
704 uint64_t frac, inc;
705 int exp, flags = 0;
706 bool overflow_norm;
707
708 frac = p.frac;
709 exp = p.exp;
710
711 switch (p.cls) {
712 case float_class_normal:
713 switch (s->float_rounding_mode) {
714 case float_round_nearest_even:
715 overflow_norm = false;
716 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
717 break;
718 case float_round_ties_away:
719 overflow_norm = false;
720 inc = frac_lsbm1;
721 break;
722 case float_round_to_zero:
723 overflow_norm = true;
724 inc = 0;
725 break;
726 case float_round_up:
727 inc = p.sign ? 0 : round_mask;
728 overflow_norm = p.sign;
729 break;
730 case float_round_down:
731 inc = p.sign ? round_mask : 0;
732 overflow_norm = !p.sign;
733 break;
734 default:
735 g_assert_not_reached();
736 }
737
738 exp += parm->exp_bias;
739 if (likely(exp > 0)) {
740 if (frac & round_mask) {
741 flags |= float_flag_inexact;
742 frac += inc;
743 if (frac & DECOMPOSED_OVERFLOW_BIT) {
744 frac >>= 1;
745 exp++;
746 }
747 }
748 frac >>= frac_shift;
749
ca3a3d5a
AB
750 if (parm->arm_althp) {
751 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
752 if (unlikely(exp > exp_max)) {
753 /* Overflow. Return the maximum normal. */
754 flags = float_flag_invalid;
755 exp = exp_max;
756 frac = -1;
757 }
758 } else if (unlikely(exp >= exp_max)) {
6fff2167
AB
759 flags |= float_flag_overflow | float_flag_inexact;
760 if (overflow_norm) {
761 exp = exp_max - 1;
762 frac = -1;
763 } else {
764 p.cls = float_class_inf;
765 goto do_inf;
766 }
767 }
768 } else if (s->flush_to_zero) {
769 flags |= float_flag_output_denormal;
770 p.cls = float_class_zero;
771 goto do_zero;
772 } else {
773 bool is_tiny = (s->float_detect_tininess
774 == float_tininess_before_rounding)
775 || (exp < 0)
776 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
777
778 shift64RightJamming(frac, 1 - exp, &frac);
779 if (frac & round_mask) {
780 /* Need to recompute round-to-even. */
781 if (s->float_rounding_mode == float_round_nearest_even) {
782 inc = ((frac & roundeven_mask) != frac_lsbm1
783 ? frac_lsbm1 : 0);
784 }
785 flags |= float_flag_inexact;
786 frac += inc;
787 }
788
789 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
790 frac >>= frac_shift;
791
792 if (is_tiny && (flags & float_flag_inexact)) {
793 flags |= float_flag_underflow;
794 }
795 if (exp == 0 && frac == 0) {
796 p.cls = float_class_zero;
797 }
798 }
799 break;
800
801 case float_class_zero:
802 do_zero:
803 exp = 0;
804 frac = 0;
805 break;
806
807 case float_class_inf:
808 do_inf:
ca3a3d5a 809 assert(!parm->arm_althp);
6fff2167
AB
810 exp = exp_max;
811 frac = 0;
812 break;
813
814 case float_class_qnan:
815 case float_class_snan:
ca3a3d5a 816 assert(!parm->arm_althp);
6fff2167 817 exp = exp_max;
94933df0 818 frac >>= parm->frac_shift;
6fff2167
AB
819 break;
820
821 default:
822 g_assert_not_reached();
823 }
824
825 float_raise(flags, s);
826 p.exp = exp;
827 p.frac = frac;
828 return p;
829}
830
6fed16b2
AB
831/* Explicit FloatFmt version */
832static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
833 const FloatFmt *params)
834{
f9943c7f 835 return sf_canonicalize(float16_unpack_raw(f), params, s);
6fed16b2
AB
836}
837
6fff2167
AB
838static FloatParts float16_unpack_canonical(float16 f, float_status *s)
839{
6fed16b2
AB
840 return float16a_unpack_canonical(f, s, &float16_params);
841}
842
843static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
844 const FloatFmt *params)
845{
846 return float16_pack_raw(round_canonical(p, s, params));
6fff2167
AB
847}
848
849static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
850{
6fed16b2 851 return float16a_round_pack_canonical(p, s, &float16_params);
6fff2167
AB
852}
853
854static FloatParts float32_unpack_canonical(float32 f, float_status *s)
855{
f9943c7f 856 return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
6fff2167
AB
857}
858
859static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
860{
0bcfbcbe 861 return float32_pack_raw(round_canonical(p, s, &float32_params));
6fff2167
AB
862}
863
864static FloatParts float64_unpack_canonical(float64 f, float_status *s)
865{
f9943c7f 866 return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
6fff2167
AB
867}
868
869static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
870{
0bcfbcbe 871 return float64_pack_raw(round_canonical(p, s, &float64_params));
6fff2167
AB
872}
873
dbe4d53a
AB
874static FloatParts return_nan(FloatParts a, float_status *s)
875{
876 switch (a.cls) {
877 case float_class_snan:
878 s->float_exception_flags |= float_flag_invalid;
0bcfbcbe 879 a = parts_silence_nan(a, s);
dbe4d53a
AB
880 /* fall through */
881 case float_class_qnan:
882 if (s->default_nan_mode) {
f7e598e2 883 return parts_default_nan(s);
dbe4d53a
AB
884 }
885 break;
886
887 default:
888 g_assert_not_reached();
889 }
890 return a;
891}
892
6fff2167
AB
893static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
894{
895 if (is_snan(a.cls) || is_snan(b.cls)) {
896 s->float_exception_flags |= float_flag_invalid;
897 }
898
899 if (s->default_nan_mode) {
f7e598e2 900 return parts_default_nan(s);
6fff2167 901 } else {
4f251cfd 902 if (pickNaN(a.cls, b.cls,
6fff2167
AB
903 a.frac > b.frac ||
904 (a.frac == b.frac && a.sign < b.sign))) {
905 a = b;
906 }
0bcfbcbe
RH
907 if (is_snan(a.cls)) {
908 return parts_silence_nan(a, s);
909 }
6fff2167
AB
910 }
911 return a;
912}
913
d446830a
AB
914static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
915 bool inf_zero, float_status *s)
916{
1839189b
PM
917 int which;
918
d446830a
AB
919 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
920 s->float_exception_flags |= float_flag_invalid;
921 }
922
3bd2dec1 923 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
1839189b 924
d446830a 925 if (s->default_nan_mode) {
1839189b
PM
926 /* Note that this check is after pickNaNMulAdd so that function
927 * has an opportunity to set the Invalid flag.
928 */
f7e598e2 929 which = 3;
1839189b 930 }
d446830a 931
1839189b
PM
932 switch (which) {
933 case 0:
934 break;
935 case 1:
936 a = b;
937 break;
938 case 2:
939 a = c;
940 break;
941 case 3:
f7e598e2 942 return parts_default_nan(s);
1839189b
PM
943 default:
944 g_assert_not_reached();
d446830a 945 }
1839189b 946
0bcfbcbe
RH
947 if (is_snan(a.cls)) {
948 return parts_silence_nan(a, s);
949 }
d446830a
AB
950 return a;
951}
952
6fff2167
AB
953/*
954 * Returns the result of adding or subtracting the values of the
955 * floating-point values `a' and `b'. The operation is performed
956 * according to the IEC/IEEE Standard for Binary Floating-Point
957 * Arithmetic.
958 */
959
960static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
961 float_status *s)
962{
963 bool a_sign = a.sign;
964 bool b_sign = b.sign ^ subtract;
965
966 if (a_sign != b_sign) {
967 /* Subtraction */
968
969 if (a.cls == float_class_normal && b.cls == float_class_normal) {
970 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
971 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
972 a.frac = a.frac - b.frac;
973 } else {
974 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
975 a.frac = b.frac - a.frac;
976 a.exp = b.exp;
977 a_sign ^= 1;
978 }
979
980 if (a.frac == 0) {
981 a.cls = float_class_zero;
982 a.sign = s->float_rounding_mode == float_round_down;
983 } else {
984 int shift = clz64(a.frac) - 1;
985 a.frac = a.frac << shift;
986 a.exp = a.exp - shift;
987 a.sign = a_sign;
988 }
989 return a;
990 }
991 if (is_nan(a.cls) || is_nan(b.cls)) {
992 return pick_nan(a, b, s);
993 }
994 if (a.cls == float_class_inf) {
995 if (b.cls == float_class_inf) {
996 float_raise(float_flag_invalid, s);
f7e598e2 997 return parts_default_nan(s);
6fff2167
AB
998 }
999 return a;
1000 }
1001 if (a.cls == float_class_zero && b.cls == float_class_zero) {
1002 a.sign = s->float_rounding_mode == float_round_down;
1003 return a;
1004 }
1005 if (a.cls == float_class_zero || b.cls == float_class_inf) {
1006 b.sign = a_sign ^ 1;
1007 return b;
1008 }
1009 if (b.cls == float_class_zero) {
1010 return a;
1011 }
1012 } else {
1013 /* Addition */
1014 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1015 if (a.exp > b.exp) {
1016 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
1017 } else if (a.exp < b.exp) {
1018 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
1019 a.exp = b.exp;
1020 }
1021 a.frac += b.frac;
1022 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
64d450a0 1023 shift64RightJamming(a.frac, 1, &a.frac);
6fff2167
AB
1024 a.exp += 1;
1025 }
1026 return a;
1027 }
1028 if (is_nan(a.cls) || is_nan(b.cls)) {
1029 return pick_nan(a, b, s);
1030 }
1031 if (a.cls == float_class_inf || b.cls == float_class_zero) {
1032 return a;
1033 }
1034 if (b.cls == float_class_inf || a.cls == float_class_zero) {
1035 b.sign = b_sign;
1036 return b;
1037 }
1038 }
1039 g_assert_not_reached();
1040}
1041
1042/*
1043 * Returns the result of adding or subtracting the floating-point
1044 * values `a' and `b'. The operation is performed according to the
1045 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1046 */
1047
97ff87c0 1048float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
6fff2167
AB
1049{
1050 FloatParts pa = float16_unpack_canonical(a, status);
1051 FloatParts pb = float16_unpack_canonical(b, status);
1052 FloatParts pr = addsub_floats(pa, pb, false, status);
1053
1054 return float16_round_pack_canonical(pr, status);
1055}
1056
1b615d48
EC
1057float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
1058{
1059 FloatParts pa = float16_unpack_canonical(a, status);
1060 FloatParts pb = float16_unpack_canonical(b, status);
1061 FloatParts pr = addsub_floats(pa, pb, true, status);
1062
1063 return float16_round_pack_canonical(pr, status);
1064}
1065
1066static float32 QEMU_SOFTFLOAT_ATTR
1067soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
6fff2167
AB
1068{
1069 FloatParts pa = float32_unpack_canonical(a, status);
1070 FloatParts pb = float32_unpack_canonical(b, status);
1b615d48 1071 FloatParts pr = addsub_floats(pa, pb, subtract, status);
6fff2167
AB
1072
1073 return float32_round_pack_canonical(pr, status);
1074}
1075
1b615d48
EC
1076static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
1077{
1078 return soft_f32_addsub(a, b, false, status);
1079}
1080
1081static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1082{
1083 return soft_f32_addsub(a, b, true, status);
1084}
1085
1086static float64 QEMU_SOFTFLOAT_ATTR
1087soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
6fff2167
AB
1088{
1089 FloatParts pa = float64_unpack_canonical(a, status);
1090 FloatParts pb = float64_unpack_canonical(b, status);
1b615d48 1091 FloatParts pr = addsub_floats(pa, pb, subtract, status);
6fff2167
AB
1092
1093 return float64_round_pack_canonical(pr, status);
1094}
1095
1b615d48 1096static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1097{
1b615d48
EC
1098 return soft_f64_addsub(a, b, false, status);
1099}
6fff2167 1100
1b615d48
EC
1101static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1102{
1103 return soft_f64_addsub(a, b, true, status);
6fff2167
AB
1104}
1105
1b615d48 1106static float hard_f32_add(float a, float b)
6fff2167 1107{
1b615d48
EC
1108 return a + b;
1109}
6fff2167 1110
1b615d48
EC
1111static float hard_f32_sub(float a, float b)
1112{
1113 return a - b;
6fff2167
AB
1114}
1115
1b615d48 1116static double hard_f64_add(double a, double b)
6fff2167 1117{
1b615d48
EC
1118 return a + b;
1119}
6fff2167 1120
1b615d48
EC
1121static double hard_f64_sub(double a, double b)
1122{
1123 return a - b;
1124}
1125
1126static bool f32_addsub_post(union_float32 a, union_float32 b)
1127{
1128 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1129 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1130 }
1131 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1132}
1133
1134static bool f64_addsub_post(union_float64 a, union_float64 b)
1135{
1136 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1137 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1138 } else {
1139 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1140 }
1141}
1142
1143static float32 float32_addsub(float32 a, float32 b, float_status *s,
1144 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1145{
1146 return float32_gen2(a, b, s, hard, soft,
1147 f32_is_zon2, f32_addsub_post, NULL, NULL);
1148}
1149
1150static float64 float64_addsub(float64 a, float64 b, float_status *s,
1151 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1152{
1153 return float64_gen2(a, b, s, hard, soft,
1154 f64_is_zon2, f64_addsub_post, NULL, NULL);
1155}
1156
1157float32 QEMU_FLATTEN
1158float32_add(float32 a, float32 b, float_status *s)
1159{
1160 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1161}
1162
1163float32 QEMU_FLATTEN
1164float32_sub(float32 a, float32 b, float_status *s)
1165{
1166 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1167}
1168
1169float64 QEMU_FLATTEN
1170float64_add(float64 a, float64 b, float_status *s)
1171{
1172 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1173}
1174
1175float64 QEMU_FLATTEN
1176float64_sub(float64 a, float64 b, float_status *s)
1177{
1178 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1179}
1180
74d707e2
AB
1181/*
1182 * Returns the result of multiplying the floating-point values `a' and
1183 * `b'. The operation is performed according to the IEC/IEEE Standard
1184 * for Binary Floating-Point Arithmetic.
1185 */
1186
1187static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
1188{
1189 bool sign = a.sign ^ b.sign;
1190
1191 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1192 uint64_t hi, lo;
1193 int exp = a.exp + b.exp;
1194
1195 mul64To128(a.frac, b.frac, &hi, &lo);
1196 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1197 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1198 shift64RightJamming(lo, 1, &lo);
1199 exp += 1;
1200 }
1201
1202 /* Re-use a */
1203 a.exp = exp;
1204 a.sign = sign;
1205 a.frac = lo;
1206 return a;
1207 }
1208 /* handle all the NaN cases */
1209 if (is_nan(a.cls) || is_nan(b.cls)) {
1210 return pick_nan(a, b, s);
1211 }
1212 /* Inf * Zero == NaN */
1213 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
1214 (a.cls == float_class_zero && b.cls == float_class_inf)) {
1215 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1216 return parts_default_nan(s);
74d707e2
AB
1217 }
1218 /* Multiply by 0 or Inf */
1219 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1220 a.sign = sign;
1221 return a;
1222 }
1223 if (b.cls == float_class_inf || b.cls == float_class_zero) {
1224 b.sign = sign;
1225 return b;
1226 }
1227 g_assert_not_reached();
1228}
1229
97ff87c0 1230float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2
AB
1231{
1232 FloatParts pa = float16_unpack_canonical(a, status);
1233 FloatParts pb = float16_unpack_canonical(b, status);
1234 FloatParts pr = mul_floats(pa, pb, status);
1235
1236 return float16_round_pack_canonical(pr, status);
1237}
1238
2dfabc86
EC
1239static float32 QEMU_SOFTFLOAT_ATTR
1240soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2
AB
1241{
1242 FloatParts pa = float32_unpack_canonical(a, status);
1243 FloatParts pb = float32_unpack_canonical(b, status);
1244 FloatParts pr = mul_floats(pa, pb, status);
1245
1246 return float32_round_pack_canonical(pr, status);
1247}
1248
2dfabc86
EC
1249static float64 QEMU_SOFTFLOAT_ATTR
1250soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2
AB
1251{
1252 FloatParts pa = float64_unpack_canonical(a, status);
1253 FloatParts pb = float64_unpack_canonical(b, status);
1254 FloatParts pr = mul_floats(pa, pb, status);
1255
1256 return float64_round_pack_canonical(pr, status);
1257}
1258
2dfabc86
EC
1259static float hard_f32_mul(float a, float b)
1260{
1261 return a * b;
1262}
1263
1264static double hard_f64_mul(double a, double b)
1265{
1266 return a * b;
1267}
1268
1269static bool f32_mul_fast_test(union_float32 a, union_float32 b)
1270{
1271 return float32_is_zero(a.s) || float32_is_zero(b.s);
1272}
1273
1274static bool f64_mul_fast_test(union_float64 a, union_float64 b)
1275{
1276 return float64_is_zero(a.s) || float64_is_zero(b.s);
1277}
1278
1279static float32 f32_mul_fast_op(float32 a, float32 b, float_status *s)
1280{
1281 bool signbit = float32_is_neg(a) ^ float32_is_neg(b);
1282
1283 return float32_set_sign(float32_zero, signbit);
1284}
1285
1286static float64 f64_mul_fast_op(float64 a, float64 b, float_status *s)
1287{
1288 bool signbit = float64_is_neg(a) ^ float64_is_neg(b);
1289
1290 return float64_set_sign(float64_zero, signbit);
1291}
1292
1293float32 QEMU_FLATTEN
1294float32_mul(float32 a, float32 b, float_status *s)
1295{
1296 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1297 f32_is_zon2, NULL, f32_mul_fast_test, f32_mul_fast_op);
1298}
1299
1300float64 QEMU_FLATTEN
1301float64_mul(float64 a, float64 b, float_status *s)
1302{
1303 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1304 f64_is_zon2, NULL, f64_mul_fast_test, f64_mul_fast_op);
1305}
1306
d446830a
AB
1307/*
1308 * Returns the result of multiplying the floating-point values `a' and
1309 * `b' then adding 'c', with no intermediate rounding step after the
1310 * multiplication. The operation is performed according to the
1311 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
1312 * The flags argument allows the caller to select negation of the
1313 * addend, the intermediate product, or the final result. (The
1314 * difference between this and having the caller do a separate
1315 * negation is that negating externally will flip the sign bit on
1316 * NaNs.)
1317 */
1318
1319static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
1320 int flags, float_status *s)
1321{
1322 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
1323 ((1 << float_class_inf) | (1 << float_class_zero));
1324 bool p_sign;
1325 bool sign_flip = flags & float_muladd_negate_result;
1326 FloatClass p_class;
1327 uint64_t hi, lo;
1328 int p_exp;
1329
1330 /* It is implementation-defined whether the cases of (0,inf,qnan)
1331 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
1332 * they return if they do), so we have to hand this information
1333 * off to the target-specific pick-a-NaN routine.
1334 */
1335 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
1336 return pick_nan_muladd(a, b, c, inf_zero, s);
1337 }
1338
1339 if (inf_zero) {
1340 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1341 return parts_default_nan(s);
d446830a
AB
1342 }
1343
1344 if (flags & float_muladd_negate_c) {
1345 c.sign ^= 1;
1346 }
1347
1348 p_sign = a.sign ^ b.sign;
1349
1350 if (flags & float_muladd_negate_product) {
1351 p_sign ^= 1;
1352 }
1353
1354 if (a.cls == float_class_inf || b.cls == float_class_inf) {
1355 p_class = float_class_inf;
1356 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
1357 p_class = float_class_zero;
1358 } else {
1359 p_class = float_class_normal;
1360 }
1361
1362 if (c.cls == float_class_inf) {
1363 if (p_class == float_class_inf && p_sign != c.sign) {
1364 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1365 return parts_default_nan(s);
d446830a
AB
1366 } else {
1367 a.cls = float_class_inf;
1368 a.sign = c.sign ^ sign_flip;
f7e598e2 1369 return a;
d446830a 1370 }
d446830a
AB
1371 }
1372
1373 if (p_class == float_class_inf) {
1374 a.cls = float_class_inf;
1375 a.sign = p_sign ^ sign_flip;
1376 return a;
1377 }
1378
1379 if (p_class == float_class_zero) {
1380 if (c.cls == float_class_zero) {
1381 if (p_sign != c.sign) {
1382 p_sign = s->float_rounding_mode == float_round_down;
1383 }
1384 c.sign = p_sign;
1385 } else if (flags & float_muladd_halve_result) {
1386 c.exp -= 1;
1387 }
1388 c.sign ^= sign_flip;
1389 return c;
1390 }
1391
1392 /* a & b should be normals now... */
1393 assert(a.cls == float_class_normal &&
1394 b.cls == float_class_normal);
1395
1396 p_exp = a.exp + b.exp;
1397
1398 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
1399 * result.
1400 */
1401 mul64To128(a.frac, b.frac, &hi, &lo);
1402 /* binary point now at bit 124 */
1403
1404 /* check for overflow */
1405 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
1406 shift128RightJamming(hi, lo, 1, &hi, &lo);
1407 p_exp += 1;
1408 }
1409
1410 /* + add/sub */
1411 if (c.cls == float_class_zero) {
1412 /* move binary point back to 62 */
1413 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1414 } else {
1415 int exp_diff = p_exp - c.exp;
1416 if (p_sign == c.sign) {
1417 /* Addition */
1418 if (exp_diff <= 0) {
1419 shift128RightJamming(hi, lo,
1420 DECOMPOSED_BINARY_POINT - exp_diff,
1421 &hi, &lo);
1422 lo += c.frac;
1423 p_exp = c.exp;
1424 } else {
1425 uint64_t c_hi, c_lo;
1426 /* shift c to the same binary point as the product (124) */
1427 c_hi = c.frac >> 2;
1428 c_lo = 0;
1429 shift128RightJamming(c_hi, c_lo,
1430 exp_diff,
1431 &c_hi, &c_lo);
1432 add128(hi, lo, c_hi, c_lo, &hi, &lo);
1433 /* move binary point back to 62 */
1434 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1435 }
1436
1437 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1438 shift64RightJamming(lo, 1, &lo);
1439 p_exp += 1;
1440 }
1441
1442 } else {
1443 /* Subtraction */
1444 uint64_t c_hi, c_lo;
1445 /* make C binary point match product at bit 124 */
1446 c_hi = c.frac >> 2;
1447 c_lo = 0;
1448
1449 if (exp_diff <= 0) {
1450 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1451 if (exp_diff == 0
1452 &&
1453 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1454 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1455 } else {
1456 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1457 p_sign ^= 1;
1458 p_exp = c.exp;
1459 }
1460 } else {
1461 shift128RightJamming(c_hi, c_lo,
1462 exp_diff,
1463 &c_hi, &c_lo);
1464 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1465 }
1466
1467 if (hi == 0 && lo == 0) {
1468 a.cls = float_class_zero;
1469 a.sign = s->float_rounding_mode == float_round_down;
1470 a.sign ^= sign_flip;
1471 return a;
1472 } else {
1473 int shift;
1474 if (hi != 0) {
1475 shift = clz64(hi);
1476 } else {
1477 shift = clz64(lo) + 64;
1478 }
1479 /* Normalizing to a binary point of 124 is the
1480 correct adjust for the exponent. However since we're
1481 shifting, we might as well put the binary point back
1482 at 62 where we really want it. Therefore shift as
1483 if we're leaving 1 bit at the top of the word, but
1484 adjust the exponent as if we're leaving 3 bits. */
1485 shift -= 1;
1486 if (shift >= 64) {
1487 lo = lo << (shift - 64);
1488 } else {
1489 hi = (hi << shift) | (lo >> (64 - shift));
1490 lo = hi | ((lo << shift) != 0);
1491 }
1492 p_exp -= shift - 2;
1493 }
1494 }
1495 }
1496
1497 if (flags & float_muladd_halve_result) {
1498 p_exp -= 1;
1499 }
1500
1501 /* finally prepare our result */
1502 a.cls = float_class_normal;
1503 a.sign = p_sign ^ sign_flip;
1504 a.exp = p_exp;
1505 a.frac = lo;
1506
1507 return a;
1508}
1509
97ff87c0 1510float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
d446830a
AB
1511 int flags, float_status *status)
1512{
1513 FloatParts pa = float16_unpack_canonical(a, status);
1514 FloatParts pb = float16_unpack_canonical(b, status);
1515 FloatParts pc = float16_unpack_canonical(c, status);
1516 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1517
1518 return float16_round_pack_canonical(pr, status);
1519}
1520
ccf770ba
EC
1521static float32 QEMU_SOFTFLOAT_ATTR
1522soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1523 float_status *status)
d446830a
AB
1524{
1525 FloatParts pa = float32_unpack_canonical(a, status);
1526 FloatParts pb = float32_unpack_canonical(b, status);
1527 FloatParts pc = float32_unpack_canonical(c, status);
1528 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1529
1530 return float32_round_pack_canonical(pr, status);
1531}
1532
ccf770ba
EC
1533static float64 QEMU_SOFTFLOAT_ATTR
1534soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1535 float_status *status)
d446830a
AB
1536{
1537 FloatParts pa = float64_unpack_canonical(a, status);
1538 FloatParts pb = float64_unpack_canonical(b, status);
1539 FloatParts pc = float64_unpack_canonical(c, status);
1540 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1541
1542 return float64_round_pack_canonical(pr, status);
1543}
1544
f6b3b108
EC
1545static bool force_soft_fma;
1546
ccf770ba
EC
1547float32 QEMU_FLATTEN
1548float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1549{
1550 union_float32 ua, ub, uc, ur;
1551
1552 ua.s = xa;
1553 ub.s = xb;
1554 uc.s = xc;
1555
1556 if (unlikely(!can_use_fpu(s))) {
1557 goto soft;
1558 }
1559 if (unlikely(flags & float_muladd_halve_result)) {
1560 goto soft;
1561 }
1562
1563 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1564 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1565 goto soft;
1566 }
f6b3b108
EC
1567
1568 if (unlikely(force_soft_fma)) {
1569 goto soft;
1570 }
1571
ccf770ba
EC
1572 /*
1573 * When (a || b) == 0, there's no need to check for under/over flow,
1574 * since we know the addend is (normal || 0) and the product is 0.
1575 */
1576 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1577 union_float32 up;
1578 bool prod_sign;
1579
1580 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1581 prod_sign ^= !!(flags & float_muladd_negate_product);
1582 up.s = float32_set_sign(float32_zero, prod_sign);
1583
1584 if (flags & float_muladd_negate_c) {
1585 uc.h = -uc.h;
1586 }
1587 ur.h = up.h + uc.h;
1588 } else {
1589 if (flags & float_muladd_negate_product) {
1590 ua.h = -ua.h;
1591 }
1592 if (flags & float_muladd_negate_c) {
1593 uc.h = -uc.h;
1594 }
1595
1596 ur.h = fmaf(ua.h, ub.h, uc.h);
1597
1598 if (unlikely(f32_is_inf(ur))) {
1599 s->float_exception_flags |= float_flag_overflow;
1600 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1601 goto soft;
1602 }
1603 }
1604 if (flags & float_muladd_negate_result) {
1605 return float32_chs(ur.s);
1606 }
1607 return ur.s;
1608
1609 soft:
1610 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1611}
1612
1613float64 QEMU_FLATTEN
1614float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1615{
1616 union_float64 ua, ub, uc, ur;
1617
1618 ua.s = xa;
1619 ub.s = xb;
1620 uc.s = xc;
1621
1622 if (unlikely(!can_use_fpu(s))) {
1623 goto soft;
1624 }
1625 if (unlikely(flags & float_muladd_halve_result)) {
1626 goto soft;
1627 }
1628
1629 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1630 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1631 goto soft;
1632 }
f6b3b108
EC
1633
1634 if (unlikely(force_soft_fma)) {
1635 goto soft;
1636 }
1637
ccf770ba
EC
1638 /*
1639 * When (a || b) == 0, there's no need to check for under/over flow,
1640 * since we know the addend is (normal || 0) and the product is 0.
1641 */
1642 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1643 union_float64 up;
1644 bool prod_sign;
1645
1646 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1647 prod_sign ^= !!(flags & float_muladd_negate_product);
1648 up.s = float64_set_sign(float64_zero, prod_sign);
1649
1650 if (flags & float_muladd_negate_c) {
1651 uc.h = -uc.h;
1652 }
1653 ur.h = up.h + uc.h;
1654 } else {
1655 if (flags & float_muladd_negate_product) {
1656 ua.h = -ua.h;
1657 }
1658 if (flags & float_muladd_negate_c) {
1659 uc.h = -uc.h;
1660 }
1661
1662 ur.h = fma(ua.h, ub.h, uc.h);
1663
1664 if (unlikely(f64_is_inf(ur))) {
1665 s->float_exception_flags |= float_flag_overflow;
1666 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1667 goto soft;
1668 }
1669 }
1670 if (flags & float_muladd_negate_result) {
1671 return float64_chs(ur.s);
1672 }
1673 return ur.s;
1674
1675 soft:
1676 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1677}
1678
cf07323d
AB
1679/*
1680 * Returns the result of dividing the floating-point value `a' by the
1681 * corresponding value `b'. The operation is performed according to
1682 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1683 */
1684
1685static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1686{
1687 bool sign = a.sign ^ b.sign;
1688
1689 if (a.cls == float_class_normal && b.cls == float_class_normal) {
5dfbc9e4 1690 uint64_t n0, n1, q, r;
cf07323d 1691 int exp = a.exp - b.exp;
5dfbc9e4
RH
1692
1693 /*
1694 * We want a 2*N / N-bit division to produce exactly an N-bit
1695 * result, so that we do not lose any precision and so that we
1696 * do not have to renormalize afterward. If A.frac < B.frac,
1697 * then division would produce an (N-1)-bit result; shift A left
1698 * by one to produce the an N-bit result, and decrement the
1699 * exponent to match.
1700 *
1701 * The udiv_qrnnd algorithm that we're using requires normalization,
1702 * i.e. the msb of the denominator must be set. Since we know that
1703 * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
1704 * by one (more), and the remainder must be shifted right by one.
1705 */
cf07323d
AB
1706 if (a.frac < b.frac) {
1707 exp -= 1;
5dfbc9e4 1708 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
cf07323d 1709 } else {
5dfbc9e4 1710 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
cf07323d 1711 }
5dfbc9e4
RH
1712 q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
1713
1714 /*
1715 * Set lsb if there is a remainder, to set inexact.
1716 * As mentioned above, to find the actual value of the remainder we
1717 * would need to shift right, but (1) we are only concerned about
1718 * non-zero-ness, and (2) the remainder will always be even because
1719 * both inputs to the division primitive are even.
1720 */
1721 a.frac = q | (r != 0);
cf07323d
AB
1722 a.sign = sign;
1723 a.exp = exp;
1724 return a;
1725 }
1726 /* handle all the NaN cases */
1727 if (is_nan(a.cls) || is_nan(b.cls)) {
1728 return pick_nan(a, b, s);
1729 }
1730 /* 0/0 or Inf/Inf */
1731 if (a.cls == b.cls
1732 &&
1733 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1734 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1735 return parts_default_nan(s);
cf07323d 1736 }
9cb4e398
AB
1737 /* Inf / x or 0 / x */
1738 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1739 a.sign = sign;
1740 return a;
1741 }
cf07323d
AB
1742 /* Div 0 => Inf */
1743 if (b.cls == float_class_zero) {
1744 s->float_exception_flags |= float_flag_divbyzero;
1745 a.cls = float_class_inf;
1746 a.sign = sign;
1747 return a;
1748 }
cf07323d
AB
1749 /* Div by Inf */
1750 if (b.cls == float_class_inf) {
1751 a.cls = float_class_zero;
1752 a.sign = sign;
1753 return a;
1754 }
1755 g_assert_not_reached();
1756}
1757
1758float16 float16_div(float16 a, float16 b, float_status *status)
1759{
1760 FloatParts pa = float16_unpack_canonical(a, status);
1761 FloatParts pb = float16_unpack_canonical(b, status);
1762 FloatParts pr = div_floats(pa, pb, status);
1763
1764 return float16_round_pack_canonical(pr, status);
1765}
1766
4a629561
EC
1767static float32 QEMU_SOFTFLOAT_ATTR
1768soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d
AB
1769{
1770 FloatParts pa = float32_unpack_canonical(a, status);
1771 FloatParts pb = float32_unpack_canonical(b, status);
1772 FloatParts pr = div_floats(pa, pb, status);
1773
1774 return float32_round_pack_canonical(pr, status);
1775}
1776
4a629561
EC
1777static float64 QEMU_SOFTFLOAT_ATTR
1778soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d
AB
1779{
1780 FloatParts pa = float64_unpack_canonical(a, status);
1781 FloatParts pb = float64_unpack_canonical(b, status);
1782 FloatParts pr = div_floats(pa, pb, status);
1783
1784 return float64_round_pack_canonical(pr, status);
1785}
1786
4a629561
EC
1787static float hard_f32_div(float a, float b)
1788{
1789 return a / b;
1790}
1791
1792static double hard_f64_div(double a, double b)
1793{
1794 return a / b;
1795}
1796
1797static bool f32_div_pre(union_float32 a, union_float32 b)
1798{
1799 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1800 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1801 fpclassify(b.h) == FP_NORMAL;
1802 }
1803 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1804}
1805
1806static bool f64_div_pre(union_float64 a, union_float64 b)
1807{
1808 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1809 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1810 fpclassify(b.h) == FP_NORMAL;
1811 }
1812 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1813}
1814
1815static bool f32_div_post(union_float32 a, union_float32 b)
1816{
1817 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1818 return fpclassify(a.h) != FP_ZERO;
1819 }
1820 return !float32_is_zero(a.s);
1821}
1822
1823static bool f64_div_post(union_float64 a, union_float64 b)
1824{
1825 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1826 return fpclassify(a.h) != FP_ZERO;
1827 }
1828 return !float64_is_zero(a.s);
1829}
1830
1831float32 QEMU_FLATTEN
1832float32_div(float32 a, float32 b, float_status *s)
1833{
1834 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
1835 f32_div_pre, f32_div_post, NULL, NULL);
1836}
1837
1838float64 QEMU_FLATTEN
1839float64_div(float64 a, float64 b, float_status *s)
1840{
1841 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
1842 f64_div_pre, f64_div_post, NULL, NULL);
1843}
1844
6fed16b2
AB
1845/*
1846 * Float to Float conversions
1847 *
1848 * Returns the result of converting one float format to another. The
1849 * conversion is performed according to the IEC/IEEE Standard for
1850 * Binary Floating-Point Arithmetic.
1851 *
1852 * The float_to_float helper only needs to take care of raising
1853 * invalid exceptions and handling the conversion on NaNs.
1854 */
1855
1856static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1857 float_status *s)
1858{
1859 if (dstf->arm_althp) {
1860 switch (a.cls) {
1861 case float_class_qnan:
1862 case float_class_snan:
1863 /* There is no NaN in the destination format. Raise Invalid
1864 * and return a zero with the sign of the input NaN.
1865 */
1866 s->float_exception_flags |= float_flag_invalid;
1867 a.cls = float_class_zero;
1868 a.frac = 0;
1869 a.exp = 0;
1870 break;
1871
1872 case float_class_inf:
1873 /* There is no Inf in the destination format. Raise Invalid
1874 * and return the maximum normal with the correct sign.
1875 */
1876 s->float_exception_flags |= float_flag_invalid;
1877 a.cls = float_class_normal;
1878 a.exp = dstf->exp_max;
1879 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1880 break;
1881
1882 default:
1883 break;
1884 }
1885 } else if (is_nan(a.cls)) {
1886 if (is_snan(a.cls)) {
1887 s->float_exception_flags |= float_flag_invalid;
1888 a = parts_silence_nan(a, s);
1889 }
1890 if (s->default_nan_mode) {
1891 return parts_default_nan(s);
1892 }
1893 }
1894 return a;
1895}
1896
1897float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1898{
1899 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1900 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1901 FloatParts pr = float_to_float(p, &float32_params, s);
1902 return float32_round_pack_canonical(pr, s);
1903}
1904
1905float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1906{
1907 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1908 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1909 FloatParts pr = float_to_float(p, &float64_params, s);
1910 return float64_round_pack_canonical(pr, s);
1911}
1912
1913float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1914{
1915 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1916 FloatParts p = float32_unpack_canonical(a, s);
1917 FloatParts pr = float_to_float(p, fmt16, s);
1918 return float16a_round_pack_canonical(pr, s, fmt16);
1919}
1920
1921float64 float32_to_float64(float32 a, float_status *s)
1922{
1923 FloatParts p = float32_unpack_canonical(a, s);
1924 FloatParts pr = float_to_float(p, &float64_params, s);
1925 return float64_round_pack_canonical(pr, s);
1926}
1927
1928float16 float64_to_float16(float64 a, bool ieee, float_status *s)
1929{
1930 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1931 FloatParts p = float64_unpack_canonical(a, s);
1932 FloatParts pr = float_to_float(p, fmt16, s);
1933 return float16a_round_pack_canonical(pr, s, fmt16);
1934}
1935
1936float32 float64_to_float32(float64 a, float_status *s)
1937{
1938 FloatParts p = float64_unpack_canonical(a, s);
1939 FloatParts pr = float_to_float(p, &float32_params, s);
1940 return float32_round_pack_canonical(pr, s);
1941}
1942
dbe4d53a
AB
1943/*
1944 * Rounds the floating-point value `a' to an integer, and returns the
1945 * result as a floating-point value. The operation is performed
1946 * according to the IEC/IEEE Standard for Binary Floating-Point
1947 * Arithmetic.
1948 */
1949
2f6c74be
RH
1950static FloatParts round_to_int(FloatParts a, int rmode,
1951 int scale, float_status *s)
dbe4d53a 1952{
2f6c74be
RH
1953 switch (a.cls) {
1954 case float_class_qnan:
1955 case float_class_snan:
dbe4d53a 1956 return return_nan(a, s);
dbe4d53a 1957
dbe4d53a
AB
1958 case float_class_zero:
1959 case float_class_inf:
dbe4d53a
AB
1960 /* already "integral" */
1961 break;
2f6c74be 1962
dbe4d53a 1963 case float_class_normal:
2f6c74be
RH
1964 scale = MIN(MAX(scale, -0x10000), 0x10000);
1965 a.exp += scale;
1966
dbe4d53a
AB
1967 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1968 /* already integral */
1969 break;
1970 }
1971 if (a.exp < 0) {
1972 bool one;
1973 /* all fractional */
1974 s->float_exception_flags |= float_flag_inexact;
2f6c74be 1975 switch (rmode) {
dbe4d53a
AB
1976 case float_round_nearest_even:
1977 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1978 break;
1979 case float_round_ties_away:
1980 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
1981 break;
1982 case float_round_to_zero:
1983 one = false;
1984 break;
1985 case float_round_up:
1986 one = !a.sign;
1987 break;
1988 case float_round_down:
1989 one = a.sign;
1990 break;
1991 default:
1992 g_assert_not_reached();
1993 }
1994
1995 if (one) {
1996 a.frac = DECOMPOSED_IMPLICIT_BIT;
1997 a.exp = 0;
1998 } else {
1999 a.cls = float_class_zero;
2000 }
2001 } else {
2002 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2003 uint64_t frac_lsbm1 = frac_lsb >> 1;
2004 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2005 uint64_t rnd_mask = rnd_even_mask >> 1;
2006 uint64_t inc;
2007
2f6c74be 2008 switch (rmode) {
dbe4d53a
AB
2009 case float_round_nearest_even:
2010 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2011 break;
2012 case float_round_ties_away:
2013 inc = frac_lsbm1;
2014 break;
2015 case float_round_to_zero:
2016 inc = 0;
2017 break;
2018 case float_round_up:
2019 inc = a.sign ? 0 : rnd_mask;
2020 break;
2021 case float_round_down:
2022 inc = a.sign ? rnd_mask : 0;
2023 break;
2024 default:
2025 g_assert_not_reached();
2026 }
2027
2028 if (a.frac & rnd_mask) {
2029 s->float_exception_flags |= float_flag_inexact;
2030 a.frac += inc;
2031 a.frac &= ~rnd_mask;
2032 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
2033 a.frac >>= 1;
2034 a.exp++;
2035 }
2036 }
2037 }
2038 break;
2039 default:
2040 g_assert_not_reached();
2041 }
2042 return a;
2043}
2044
2045float16 float16_round_to_int(float16 a, float_status *s)
2046{
2047 FloatParts pa = float16_unpack_canonical(a, s);
2f6c74be 2048 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2049 return float16_round_pack_canonical(pr, s);
2050}
2051
2052float32 float32_round_to_int(float32 a, float_status *s)
2053{
2054 FloatParts pa = float32_unpack_canonical(a, s);
2f6c74be 2055 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2056 return float32_round_pack_canonical(pr, s);
2057}
2058
2059float64 float64_round_to_int(float64 a, float_status *s)
2060{
2061 FloatParts pa = float64_unpack_canonical(a, s);
2f6c74be 2062 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2063 return float64_round_pack_canonical(pr, s);
2064}
2065
ab52f973
AB
2066/*
2067 * Returns the result of converting the floating-point value `a' to
2068 * the two's complement integer format. The conversion is performed
2069 * according to the IEC/IEEE Standard for Binary Floating-Point
2070 * Arithmetic---which means in particular that the conversion is
2071 * rounded according to the current rounding mode. If `a' is a NaN,
2072 * the largest positive integer is returned. Otherwise, if the
2073 * conversion overflows, the largest integer with the same sign as `a'
2074 * is returned.
2075*/
2076
2f6c74be 2077static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale,
ab52f973
AB
2078 int64_t min, int64_t max,
2079 float_status *s)
2080{
2081 uint64_t r;
2082 int orig_flags = get_float_exception_flags(s);
2f6c74be 2083 FloatParts p = round_to_int(in, rmode, scale, s);
ab52f973
AB
2084
2085 switch (p.cls) {
2086 case float_class_snan:
2087 case float_class_qnan:
801bc563 2088 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2089 return max;
2090 case float_class_inf:
801bc563 2091 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2092 return p.sign ? min : max;
2093 case float_class_zero:
2094 return 0;
2095 case float_class_normal:
2096 if (p.exp < DECOMPOSED_BINARY_POINT) {
2097 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2098 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2099 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2100 } else {
2101 r = UINT64_MAX;
2102 }
2103 if (p.sign) {
33358375 2104 if (r <= -(uint64_t) min) {
ab52f973
AB
2105 return -r;
2106 } else {
2107 s->float_exception_flags = orig_flags | float_flag_invalid;
2108 return min;
2109 }
2110 } else {
33358375 2111 if (r <= max) {
ab52f973
AB
2112 return r;
2113 } else {
2114 s->float_exception_flags = orig_flags | float_flag_invalid;
2115 return max;
2116 }
2117 }
2118 default:
2119 g_assert_not_reached();
2120 }
2121}
2122
2f6c74be
RH
2123int16_t float16_to_int16_scalbn(float16 a, int rmode, int scale,
2124 float_status *s)
2125{
2126 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2127 rmode, scale, INT16_MIN, INT16_MAX, s);
2128}
2129
2130int32_t float16_to_int32_scalbn(float16 a, int rmode, int scale,
2131 float_status *s)
2132{
2133 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2134 rmode, scale, INT32_MIN, INT32_MAX, s);
2135}
2136
2137int64_t float16_to_int64_scalbn(float16 a, int rmode, int scale,
2138 float_status *s)
2139{
2140 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2141 rmode, scale, INT64_MIN, INT64_MAX, s);
2142}
2143
2144int16_t float32_to_int16_scalbn(float32 a, int rmode, int scale,
2145 float_status *s)
2146{
2147 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2148 rmode, scale, INT16_MIN, INT16_MAX, s);
2149}
2150
2151int32_t float32_to_int32_scalbn(float32 a, int rmode, int scale,
2152 float_status *s)
2153{
2154 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2155 rmode, scale, INT32_MIN, INT32_MAX, s);
2156}
2157
2158int64_t float32_to_int64_scalbn(float32 a, int rmode, int scale,
2159 float_status *s)
2160{
2161 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2162 rmode, scale, INT64_MIN, INT64_MAX, s);
2163}
2164
2165int16_t float64_to_int16_scalbn(float64 a, int rmode, int scale,
2166 float_status *s)
2167{
2168 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2169 rmode, scale, INT16_MIN, INT16_MAX, s);
2170}
2171
2172int32_t float64_to_int32_scalbn(float64 a, int rmode, int scale,
2173 float_status *s)
2174{
2175 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2176 rmode, scale, INT32_MIN, INT32_MAX, s);
2177}
2178
2179int64_t float64_to_int64_scalbn(float64 a, int rmode, int scale,
2180 float_status *s)
2181{
2182 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2183 rmode, scale, INT64_MIN, INT64_MAX, s);
2184}
2185
2186int16_t float16_to_int16(float16 a, float_status *s)
2187{
2188 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2189}
2190
2191int32_t float16_to_int32(float16 a, float_status *s)
2192{
2193 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2194}
2195
2196int64_t float16_to_int64(float16 a, float_status *s)
2197{
2198 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2199}
2200
2201int16_t float32_to_int16(float32 a, float_status *s)
2202{
2203 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2204}
2205
2206int32_t float32_to_int32(float32 a, float_status *s)
2207{
2208 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2209}
2210
2211int64_t float32_to_int64(float32 a, float_status *s)
2212{
2213 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2214}
2215
2216int16_t float64_to_int16(float64 a, float_status *s)
2217{
2218 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2219}
2220
2221int32_t float64_to_int32(float64 a, float_status *s)
2222{
2223 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2224}
2225
2226int64_t float64_to_int64(float64 a, float_status *s)
2227{
2228 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2229}
2230
2231int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2232{
2233 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2234}
2235
2236int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2237{
2238 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2239}
2240
2241int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2242{
2243 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2244}
2245
2f6c74be
RH
2246int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2247{
2248 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2249}
ab52f973 2250
2f6c74be
RH
2251int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2252{
2253 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2254}
2255
2256int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2257{
2258 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2259}
2260
2261int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2262{
2263 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2264}
ab52f973 2265
2f6c74be
RH
2266int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2267{
2268 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2269}
ab52f973 2270
2f6c74be
RH
2271int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2272{
2273 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2274}
ab52f973
AB
2275
2276/*
2277 * Returns the result of converting the floating-point value `a' to
2278 * the unsigned integer format. The conversion is performed according
2279 * to the IEC/IEEE Standard for Binary Floating-Point
2280 * Arithmetic---which means in particular that the conversion is
2281 * rounded according to the current rounding mode. If `a' is a NaN,
2282 * the largest unsigned integer is returned. Otherwise, if the
2283 * conversion overflows, the largest unsigned integer is returned. If
2284 * the 'a' is negative, the result is rounded and zero is returned;
2285 * values that do not round to zero will raise the inexact exception
2286 * flag.
2287 */
2288
2f6c74be
RH
2289static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale,
2290 uint64_t max, float_status *s)
ab52f973
AB
2291{
2292 int orig_flags = get_float_exception_flags(s);
2f6c74be
RH
2293 FloatParts p = round_to_int(in, rmode, scale, s);
2294 uint64_t r;
ab52f973
AB
2295
2296 switch (p.cls) {
2297 case float_class_snan:
2298 case float_class_qnan:
2299 s->float_exception_flags = orig_flags | float_flag_invalid;
2300 return max;
2301 case float_class_inf:
801bc563 2302 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2303 return p.sign ? 0 : max;
2304 case float_class_zero:
2305 return 0;
2306 case float_class_normal:
ab52f973
AB
2307 if (p.sign) {
2308 s->float_exception_flags = orig_flags | float_flag_invalid;
2309 return 0;
2310 }
2311
2312 if (p.exp < DECOMPOSED_BINARY_POINT) {
2313 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2314 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2315 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2316 } else {
2317 s->float_exception_flags = orig_flags | float_flag_invalid;
2318 return max;
2319 }
2320
2321 /* For uint64 this will never trip, but if p.exp is too large
2322 * to shift a decomposed fraction we shall have exited via the
2323 * 3rd leg above.
2324 */
2325 if (r > max) {
2326 s->float_exception_flags = orig_flags | float_flag_invalid;
2327 return max;
ab52f973 2328 }
2f6c74be 2329 return r;
ab52f973
AB
2330 default:
2331 g_assert_not_reached();
2332 }
2333}
2334
2f6c74be
RH
2335uint16_t float16_to_uint16_scalbn(float16 a, int rmode, int scale,
2336 float_status *s)
2337{
2338 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2339 rmode, scale, UINT16_MAX, s);
2340}
2341
2342uint32_t float16_to_uint32_scalbn(float16 a, int rmode, int scale,
2343 float_status *s)
2344{
2345 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2346 rmode, scale, UINT32_MAX, s);
2347}
2348
2349uint64_t float16_to_uint64_scalbn(float16 a, int rmode, int scale,
2350 float_status *s)
2351{
2352 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2353 rmode, scale, UINT64_MAX, s);
2354}
2355
2356uint16_t float32_to_uint16_scalbn(float32 a, int rmode, int scale,
2357 float_status *s)
2358{
2359 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2360 rmode, scale, UINT16_MAX, s);
2361}
2362
2363uint32_t float32_to_uint32_scalbn(float32 a, int rmode, int scale,
2364 float_status *s)
2365{
2366 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2367 rmode, scale, UINT32_MAX, s);
2368}
2369
2370uint64_t float32_to_uint64_scalbn(float32 a, int rmode, int scale,
2371 float_status *s)
2372{
2373 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2374 rmode, scale, UINT64_MAX, s);
2375}
2376
2377uint16_t float64_to_uint16_scalbn(float64 a, int rmode, int scale,
2378 float_status *s)
2379{
2380 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2381 rmode, scale, UINT16_MAX, s);
2382}
2383
2384uint32_t float64_to_uint32_scalbn(float64 a, int rmode, int scale,
2385 float_status *s)
2386{
2387 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2388 rmode, scale, UINT32_MAX, s);
2389}
2390
2391uint64_t float64_to_uint64_scalbn(float64 a, int rmode, int scale,
2392 float_status *s)
2393{
2394 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2395 rmode, scale, UINT64_MAX, s);
2396}
2397
2398uint16_t float16_to_uint16(float16 a, float_status *s)
2399{
2400 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2401}
2402
2403uint32_t float16_to_uint32(float16 a, float_status *s)
2404{
2405 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2406}
2407
2408uint64_t float16_to_uint64(float16 a, float_status *s)
2409{
2410 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2411}
2412
2413uint16_t float32_to_uint16(float32 a, float_status *s)
2414{
2415 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2416}
2417
2418uint32_t float32_to_uint32(float32 a, float_status *s)
2419{
2420 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2421}
2422
2423uint64_t float32_to_uint64(float32 a, float_status *s)
2424{
2425 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2426}
2427
2428uint16_t float64_to_uint16(float64 a, float_status *s)
2429{
2430 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2431}
2432
2433uint32_t float64_to_uint32(float64 a, float_status *s)
2434{
2435 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2436}
2437
2438uint64_t float64_to_uint64(float64 a, float_status *s)
2439{
2440 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2441}
2442
2443uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2444{
2445 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2446}
2447
2448uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2449{
2450 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2451}
2452
2453uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2454{
2455 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2456}
2457
2458uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2459{
2460 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2461}
2462
2463uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2464{
2465 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2466}
2467
2468uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2469{
2470 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2471}
2472
2473uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2474{
2475 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2476}
2477
2478uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2479{
2480 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2481}
2482
2483uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2484{
2485 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2486}
ab52f973 2487
c02e1fb8
AB
2488/*
2489 * Integer to float conversions
2490 *
2491 * Returns the result of converting the two's complement integer `a'
2492 * to the floating-point format. The conversion is performed according
2493 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2494 */
2495
2abdfe24 2496static FloatParts int_to_float(int64_t a, int scale, float_status *status)
c02e1fb8 2497{
2abdfe24
RH
2498 FloatParts r = { .sign = false };
2499
c02e1fb8
AB
2500 if (a == 0) {
2501 r.cls = float_class_zero;
c02e1fb8 2502 } else {
2abdfe24
RH
2503 uint64_t f = a;
2504 int shift;
2505
2506 r.cls = float_class_normal;
c02e1fb8 2507 if (a < 0) {
2abdfe24 2508 f = -f;
c02e1fb8 2509 r.sign = true;
c02e1fb8 2510 }
2abdfe24
RH
2511 shift = clz64(f) - 1;
2512 scale = MIN(MAX(scale, -0x10000), 0x10000);
2513
2514 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2515 r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
c02e1fb8
AB
2516 }
2517
2518 return r;
2519}
2520
2abdfe24 2521float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2522{
2abdfe24 2523 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2524 return float16_round_pack_canonical(pa, status);
2525}
2526
2abdfe24
RH
2527float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2528{
2529 return int64_to_float16_scalbn(a, scale, status);
2530}
2531
2532float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2533{
2534 return int64_to_float16_scalbn(a, scale, status);
2535}
2536
2537float16 int64_to_float16(int64_t a, float_status *status)
2538{
2539 return int64_to_float16_scalbn(a, 0, status);
2540}
2541
c02e1fb8
AB
2542float16 int32_to_float16(int32_t a, float_status *status)
2543{
2abdfe24 2544 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2545}
2546
2547float16 int16_to_float16(int16_t a, float_status *status)
2548{
2abdfe24 2549 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2550}
2551
2abdfe24 2552float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2553{
2abdfe24 2554 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2555 return float32_round_pack_canonical(pa, status);
2556}
2557
2abdfe24
RH
2558float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2559{
2560 return int64_to_float32_scalbn(a, scale, status);
2561}
2562
2563float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2564{
2565 return int64_to_float32_scalbn(a, scale, status);
2566}
2567
2568float32 int64_to_float32(int64_t a, float_status *status)
2569{
2570 return int64_to_float32_scalbn(a, 0, status);
2571}
2572
c02e1fb8
AB
2573float32 int32_to_float32(int32_t a, float_status *status)
2574{
2abdfe24 2575 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2576}
2577
2578float32 int16_to_float32(int16_t a, float_status *status)
2579{
2abdfe24 2580 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2581}
2582
2abdfe24 2583float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2584{
2abdfe24 2585 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2586 return float64_round_pack_canonical(pa, status);
2587}
2588
2abdfe24
RH
2589float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2590{
2591 return int64_to_float64_scalbn(a, scale, status);
2592}
2593
2594float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2595{
2596 return int64_to_float64_scalbn(a, scale, status);
2597}
2598
2599float64 int64_to_float64(int64_t a, float_status *status)
2600{
2601 return int64_to_float64_scalbn(a, 0, status);
2602}
2603
c02e1fb8
AB
2604float64 int32_to_float64(int32_t a, float_status *status)
2605{
2abdfe24 2606 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2607}
2608
2609float64 int16_to_float64(int16_t a, float_status *status)
2610{
2abdfe24 2611 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2612}
2613
2614
2615/*
2616 * Unsigned Integer to float conversions
2617 *
2618 * Returns the result of converting the unsigned integer `a' to the
2619 * floating-point format. The conversion is performed according to the
2620 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2621 */
2622
2abdfe24 2623static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 2624{
2abdfe24 2625 FloatParts r = { .sign = false };
c02e1fb8
AB
2626
2627 if (a == 0) {
2628 r.cls = float_class_zero;
2629 } else {
2abdfe24 2630 scale = MIN(MAX(scale, -0x10000), 0x10000);
c02e1fb8 2631 r.cls = float_class_normal;
2abdfe24
RH
2632 if ((int64_t)a < 0) {
2633 r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
2634 shift64RightJamming(a, 1, &a);
c02e1fb8
AB
2635 r.frac = a;
2636 } else {
2abdfe24
RH
2637 int shift = clz64(a) - 1;
2638 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2639 r.frac = a << shift;
c02e1fb8
AB
2640 }
2641 }
2642
2643 return r;
2644}
2645
2abdfe24 2646float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2647{
2abdfe24 2648 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2649 return float16_round_pack_canonical(pa, status);
2650}
2651
2abdfe24
RH
2652float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
2653{
2654 return uint64_to_float16_scalbn(a, scale, status);
2655}
2656
2657float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
2658{
2659 return uint64_to_float16_scalbn(a, scale, status);
2660}
2661
2662float16 uint64_to_float16(uint64_t a, float_status *status)
2663{
2664 return uint64_to_float16_scalbn(a, 0, status);
2665}
2666
c02e1fb8
AB
2667float16 uint32_to_float16(uint32_t a, float_status *status)
2668{
2abdfe24 2669 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2670}
2671
2672float16 uint16_to_float16(uint16_t a, float_status *status)
2673{
2abdfe24 2674 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2675}
2676
2abdfe24 2677float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2678{
2abdfe24 2679 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2680 return float32_round_pack_canonical(pa, status);
2681}
2682
2abdfe24
RH
2683float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
2684{
2685 return uint64_to_float32_scalbn(a, scale, status);
2686}
2687
2688float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
2689{
2690 return uint64_to_float32_scalbn(a, scale, status);
2691}
2692
2693float32 uint64_to_float32(uint64_t a, float_status *status)
2694{
2695 return uint64_to_float32_scalbn(a, 0, status);
2696}
2697
c02e1fb8
AB
2698float32 uint32_to_float32(uint32_t a, float_status *status)
2699{
2abdfe24 2700 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2701}
2702
2703float32 uint16_to_float32(uint16_t a, float_status *status)
2704{
2abdfe24 2705 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2706}
2707
2abdfe24 2708float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2709{
2abdfe24 2710 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2711 return float64_round_pack_canonical(pa, status);
2712}
2713
2abdfe24
RH
2714float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
2715{
2716 return uint64_to_float64_scalbn(a, scale, status);
2717}
2718
2719float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
2720{
2721 return uint64_to_float64_scalbn(a, scale, status);
2722}
2723
2724float64 uint64_to_float64(uint64_t a, float_status *status)
2725{
2726 return uint64_to_float64_scalbn(a, 0, status);
2727}
2728
c02e1fb8
AB
2729float64 uint32_to_float64(uint32_t a, float_status *status)
2730{
2abdfe24 2731 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2732}
2733
2734float64 uint16_to_float64(uint16_t a, float_status *status)
2735{
2abdfe24 2736 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2737}
2738
89360067
AB
2739/* Float Min/Max */
2740/* min() and max() functions. These can't be implemented as
2741 * 'compare and pick one input' because that would mishandle
2742 * NaNs and +0 vs -0.
2743 *
2744 * minnum() and maxnum() functions. These are similar to the min()
2745 * and max() functions but if one of the arguments is a QNaN and
2746 * the other is numerical then the numerical argument is returned.
2747 * SNaNs will get quietened before being returned.
2748 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
2749 * and maxNum() operations. min() and max() are the typical min/max
2750 * semantics provided by many CPUs which predate that specification.
2751 *
2752 * minnummag() and maxnummag() functions correspond to minNumMag()
2753 * and minNumMag() from the IEEE-754 2008.
2754 */
2755static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
2756 bool ieee, bool ismag, float_status *s)
2757{
2758 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
2759 if (ieee) {
2760 /* Takes two floating-point values `a' and `b', one of
2761 * which is a NaN, and returns the appropriate NaN
2762 * result. If either `a' or `b' is a signaling NaN,
2763 * the invalid exception is raised.
2764 */
2765 if (is_snan(a.cls) || is_snan(b.cls)) {
2766 return pick_nan(a, b, s);
2767 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
2768 return b;
2769 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
2770 return a;
2771 }
2772 }
2773 return pick_nan(a, b, s);
2774 } else {
2775 int a_exp, b_exp;
89360067
AB
2776
2777 switch (a.cls) {
2778 case float_class_normal:
2779 a_exp = a.exp;
2780 break;
2781 case float_class_inf:
2782 a_exp = INT_MAX;
2783 break;
2784 case float_class_zero:
2785 a_exp = INT_MIN;
2786 break;
2787 default:
2788 g_assert_not_reached();
2789 break;
2790 }
2791 switch (b.cls) {
2792 case float_class_normal:
2793 b_exp = b.exp;
2794 break;
2795 case float_class_inf:
2796 b_exp = INT_MAX;
2797 break;
2798 case float_class_zero:
2799 b_exp = INT_MIN;
2800 break;
2801 default:
2802 g_assert_not_reached();
2803 break;
2804 }
2805
6245327a
EC
2806 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
2807 bool a_less = a_exp < b_exp;
2808 if (a_exp == b_exp) {
2809 a_less = a.frac < b.frac;
2810 }
2811 return a_less ^ ismin ? b : a;
89360067
AB
2812 }
2813
6245327a 2814 if (a.sign == b.sign) {
89360067
AB
2815 bool a_less = a_exp < b_exp;
2816 if (a_exp == b_exp) {
2817 a_less = a.frac < b.frac;
2818 }
6245327a 2819 return a.sign ^ a_less ^ ismin ? b : a;
89360067 2820 } else {
6245327a 2821 return a.sign ^ ismin ? b : a;
89360067
AB
2822 }
2823 }
2824}
2825
2826#define MINMAX(sz, name, ismin, isiee, ismag) \
2827float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
2828 float_status *s) \
2829{ \
2830 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2831 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2832 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
2833 \
2834 return float ## sz ## _round_pack_canonical(pr, s); \
2835}
2836
2837MINMAX(16, min, true, false, false)
2838MINMAX(16, minnum, true, true, false)
2839MINMAX(16, minnummag, true, true, true)
2840MINMAX(16, max, false, false, false)
2841MINMAX(16, maxnum, false, true, false)
2842MINMAX(16, maxnummag, false, true, true)
2843
2844MINMAX(32, min, true, false, false)
2845MINMAX(32, minnum, true, true, false)
2846MINMAX(32, minnummag, true, true, true)
2847MINMAX(32, max, false, false, false)
2848MINMAX(32, maxnum, false, true, false)
2849MINMAX(32, maxnummag, false, true, true)
2850
2851MINMAX(64, min, true, false, false)
2852MINMAX(64, minnum, true, true, false)
2853MINMAX(64, minnummag, true, true, true)
2854MINMAX(64, max, false, false, false)
2855MINMAX(64, maxnum, false, true, false)
2856MINMAX(64, maxnummag, false, true, true)
2857
2858#undef MINMAX
2859
0c4c9092
AB
2860/* Floating point compare */
2861static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
2862 float_status *s)
2863{
2864 if (is_nan(a.cls) || is_nan(b.cls)) {
2865 if (!is_quiet ||
2866 a.cls == float_class_snan ||
2867 b.cls == float_class_snan) {
2868 s->float_exception_flags |= float_flag_invalid;
2869 }
2870 return float_relation_unordered;
2871 }
2872
2873 if (a.cls == float_class_zero) {
2874 if (b.cls == float_class_zero) {
2875 return float_relation_equal;
2876 }
2877 return b.sign ? float_relation_greater : float_relation_less;
2878 } else if (b.cls == float_class_zero) {
2879 return a.sign ? float_relation_less : float_relation_greater;
2880 }
2881
2882 /* The only really important thing about infinity is its sign. If
2883 * both are infinities the sign marks the smallest of the two.
2884 */
2885 if (a.cls == float_class_inf) {
2886 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
2887 return float_relation_equal;
2888 }
2889 return a.sign ? float_relation_less : float_relation_greater;
2890 } else if (b.cls == float_class_inf) {
2891 return b.sign ? float_relation_greater : float_relation_less;
2892 }
2893
2894 if (a.sign != b.sign) {
2895 return a.sign ? float_relation_less : float_relation_greater;
2896 }
2897
2898 if (a.exp == b.exp) {
2899 if (a.frac == b.frac) {
2900 return float_relation_equal;
2901 }
2902 if (a.sign) {
2903 return a.frac > b.frac ?
2904 float_relation_less : float_relation_greater;
2905 } else {
2906 return a.frac > b.frac ?
2907 float_relation_greater : float_relation_less;
2908 }
2909 } else {
2910 if (a.sign) {
2911 return a.exp > b.exp ? float_relation_less : float_relation_greater;
2912 } else {
2913 return a.exp > b.exp ? float_relation_greater : float_relation_less;
2914 }
2915 }
2916}
2917
d9fe9db9
EC
2918#define COMPARE(name, attr, sz) \
2919static int attr \
2920name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
0c4c9092
AB
2921{ \
2922 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2923 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
d9fe9db9 2924 return compare_floats(pa, pb, is_quiet, s); \
0c4c9092
AB
2925}
2926
d9fe9db9
EC
2927COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
2928COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
2929COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
0c4c9092
AB
2930
2931#undef COMPARE
2932
d9fe9db9
EC
2933int float16_compare(float16 a, float16 b, float_status *s)
2934{
2935 return soft_f16_compare(a, b, false, s);
2936}
2937
2938int float16_compare_quiet(float16 a, float16 b, float_status *s)
2939{
2940 return soft_f16_compare(a, b, true, s);
2941}
2942
2943static int QEMU_FLATTEN
2944f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
2945{
2946 union_float32 ua, ub;
2947
2948 ua.s = xa;
2949 ub.s = xb;
2950
2951 if (QEMU_NO_HARDFLOAT) {
2952 goto soft;
2953 }
2954
2955 float32_input_flush2(&ua.s, &ub.s, s);
2956 if (isgreaterequal(ua.h, ub.h)) {
2957 if (isgreater(ua.h, ub.h)) {
2958 return float_relation_greater;
2959 }
2960 return float_relation_equal;
2961 }
2962 if (likely(isless(ua.h, ub.h))) {
2963 return float_relation_less;
2964 }
2965 /* The only condition remaining is unordered.
2966 * Fall through to set flags.
2967 */
2968 soft:
2969 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
2970}
2971
2972int float32_compare(float32 a, float32 b, float_status *s)
2973{
2974 return f32_compare(a, b, false, s);
2975}
2976
2977int float32_compare_quiet(float32 a, float32 b, float_status *s)
2978{
2979 return f32_compare(a, b, true, s);
2980}
2981
2982static int QEMU_FLATTEN
2983f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
2984{
2985 union_float64 ua, ub;
2986
2987 ua.s = xa;
2988 ub.s = xb;
2989
2990 if (QEMU_NO_HARDFLOAT) {
2991 goto soft;
2992 }
2993
2994 float64_input_flush2(&ua.s, &ub.s, s);
2995 if (isgreaterequal(ua.h, ub.h)) {
2996 if (isgreater(ua.h, ub.h)) {
2997 return float_relation_greater;
2998 }
2999 return float_relation_equal;
3000 }
3001 if (likely(isless(ua.h, ub.h))) {
3002 return float_relation_less;
3003 }
3004 /* The only condition remaining is unordered.
3005 * Fall through to set flags.
3006 */
3007 soft:
3008 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3009}
3010
3011int float64_compare(float64 a, float64 b, float_status *s)
3012{
3013 return f64_compare(a, b, false, s);
3014}
3015
3016int float64_compare_quiet(float64 a, float64 b, float_status *s)
3017{
3018 return f64_compare(a, b, true, s);
3019}
3020
0bfc9f19
AB
3021/* Multiply A by 2 raised to the power N. */
3022static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
3023{
3024 if (unlikely(is_nan(a.cls))) {
3025 return return_nan(a, s);
3026 }
3027 if (a.cls == float_class_normal) {
ce8d4082
RH
3028 /* The largest float type (even though not supported by FloatParts)
3029 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3030 * still allows rounding to infinity, without allowing overflow
3031 * within the int32_t that backs FloatParts.exp.
3032 */
3033 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
3034 a.exp += n;
3035 }
3036 return a;
3037}
3038
3039float16 float16_scalbn(float16 a, int n, float_status *status)
3040{
3041 FloatParts pa = float16_unpack_canonical(a, status);
3042 FloatParts pr = scalbn_decomposed(pa, n, status);
3043 return float16_round_pack_canonical(pr, status);
3044}
3045
3046float32 float32_scalbn(float32 a, int n, float_status *status)
3047{
3048 FloatParts pa = float32_unpack_canonical(a, status);
3049 FloatParts pr = scalbn_decomposed(pa, n, status);
3050 return float32_round_pack_canonical(pr, status);
3051}
3052
3053float64 float64_scalbn(float64 a, int n, float_status *status)
3054{
3055 FloatParts pa = float64_unpack_canonical(a, status);
3056 FloatParts pr = scalbn_decomposed(pa, n, status);
3057 return float64_round_pack_canonical(pr, status);
3058}
3059
c13bb2da
AB
3060/*
3061 * Square Root
3062 *
3063 * The old softfloat code did an approximation step before zeroing in
3064 * on the final result. However for simpleness we just compute the
3065 * square root by iterating down from the implicit bit to enough extra
3066 * bits to ensure we get a correctly rounded result.
3067 *
3068 * This does mean however the calculation is slower than before,
3069 * especially for 64 bit floats.
3070 */
3071
3072static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
3073{
3074 uint64_t a_frac, r_frac, s_frac;
3075 int bit, last_bit;
3076
3077 if (is_nan(a.cls)) {
3078 return return_nan(a, s);
3079 }
3080 if (a.cls == float_class_zero) {
3081 return a; /* sqrt(+-0) = +-0 */
3082 }
3083 if (a.sign) {
3084 s->float_exception_flags |= float_flag_invalid;
f7e598e2 3085 return parts_default_nan(s);
c13bb2da
AB
3086 }
3087 if (a.cls == float_class_inf) {
3088 return a; /* sqrt(+inf) = +inf */
3089 }
3090
3091 assert(a.cls == float_class_normal);
3092
3093 /* We need two overflow bits at the top. Adding room for that is a
3094 * right shift. If the exponent is odd, we can discard the low bit
3095 * by multiplying the fraction by 2; that's a left shift. Combine
3096 * those and we shift right if the exponent is even.
3097 */
3098 a_frac = a.frac;
3099 if (!(a.exp & 1)) {
3100 a_frac >>= 1;
3101 }
3102 a.exp >>= 1;
3103
3104 /* Bit-by-bit computation of sqrt. */
3105 r_frac = 0;
3106 s_frac = 0;
3107
3108 /* Iterate from implicit bit down to the 3 extra bits to compute a
3109 * properly rounded result. Remember we've inserted one more bit
3110 * at the top, so these positions are one less.
3111 */
3112 bit = DECOMPOSED_BINARY_POINT - 1;
3113 last_bit = MAX(p->frac_shift - 4, 0);
3114 do {
3115 uint64_t q = 1ULL << bit;
3116 uint64_t t_frac = s_frac + q;
3117 if (t_frac <= a_frac) {
3118 s_frac = t_frac + q;
3119 a_frac -= t_frac;
3120 r_frac += q;
3121 }
3122 a_frac <<= 1;
3123 } while (--bit >= last_bit);
3124
3125 /* Undo the right shift done above. If there is any remaining
3126 * fraction, the result is inexact. Set the sticky bit.
3127 */
3128 a.frac = (r_frac << 1) + (a_frac != 0);
3129
3130 return a;
3131}
3132
97ff87c0 3133float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da
AB
3134{
3135 FloatParts pa = float16_unpack_canonical(a, status);
3136 FloatParts pr = sqrt_float(pa, status, &float16_params);
3137 return float16_round_pack_canonical(pr, status);
3138}
3139
f131bae8
EC
3140static float32 QEMU_SOFTFLOAT_ATTR
3141soft_f32_sqrt(float32 a, float_status *status)
c13bb2da
AB
3142{
3143 FloatParts pa = float32_unpack_canonical(a, status);
3144 FloatParts pr = sqrt_float(pa, status, &float32_params);
3145 return float32_round_pack_canonical(pr, status);
3146}
3147
f131bae8
EC
3148static float64 QEMU_SOFTFLOAT_ATTR
3149soft_f64_sqrt(float64 a, float_status *status)
c13bb2da
AB
3150{
3151 FloatParts pa = float64_unpack_canonical(a, status);
3152 FloatParts pr = sqrt_float(pa, status, &float64_params);
3153 return float64_round_pack_canonical(pr, status);
3154}
3155
f131bae8
EC
3156float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3157{
3158 union_float32 ua, ur;
3159
3160 ua.s = xa;
3161 if (unlikely(!can_use_fpu(s))) {
3162 goto soft;
3163 }
3164
3165 float32_input_flush1(&ua.s, s);
3166 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3167 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3168 fpclassify(ua.h) == FP_ZERO) ||
3169 signbit(ua.h))) {
3170 goto soft;
3171 }
3172 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3173 float32_is_neg(ua.s))) {
3174 goto soft;
3175 }
3176 ur.h = sqrtf(ua.h);
3177 return ur.s;
3178
3179 soft:
3180 return soft_f32_sqrt(ua.s, s);
3181}
3182
3183float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3184{
3185 union_float64 ua, ur;
3186
3187 ua.s = xa;
3188 if (unlikely(!can_use_fpu(s))) {
3189 goto soft;
3190 }
3191
3192 float64_input_flush1(&ua.s, s);
3193 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3194 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3195 fpclassify(ua.h) == FP_ZERO) ||
3196 signbit(ua.h))) {
3197 goto soft;
3198 }
3199 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3200 float64_is_neg(ua.s))) {
3201 goto soft;
3202 }
3203 ur.h = sqrt(ua.h);
3204 return ur.s;
3205
3206 soft:
3207 return soft_f64_sqrt(ua.s, s);
3208}
3209
0218a16e
RH
3210/*----------------------------------------------------------------------------
3211| The pattern for a default generated NaN.
3212*----------------------------------------------------------------------------*/
3213
3214float16 float16_default_nan(float_status *status)
3215{
3216 FloatParts p = parts_default_nan(status);
3217 p.frac >>= float16_params.frac_shift;
3218 return float16_pack_raw(p);
3219}
3220
3221float32 float32_default_nan(float_status *status)
3222{
3223 FloatParts p = parts_default_nan(status);
3224 p.frac >>= float32_params.frac_shift;
3225 return float32_pack_raw(p);
3226}
3227
3228float64 float64_default_nan(float_status *status)
3229{
3230 FloatParts p = parts_default_nan(status);
3231 p.frac >>= float64_params.frac_shift;
3232 return float64_pack_raw(p);
3233}
3234
3235float128 float128_default_nan(float_status *status)
3236{
3237 FloatParts p = parts_default_nan(status);
3238 float128 r;
3239
3240 /* Extrapolate from the choices made by parts_default_nan to fill
3241 * in the quad-floating format. If the low bit is set, assume we
3242 * want to set all non-snan bits.
3243 */
3244 r.low = -(p.frac & 1);
3245 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
3246 r.high |= LIT64(0x7FFF000000000000);
3247 r.high |= (uint64_t)p.sign << 63;
3248
3249 return r;
3250}
c13bb2da 3251
158142c2 3252/*----------------------------------------------------------------------------
377ed926
RH
3253| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3254*----------------------------------------------------------------------------*/
3255
3256float16 float16_silence_nan(float16 a, float_status *status)
3257{
3258 FloatParts p = float16_unpack_raw(a);
3259 p.frac <<= float16_params.frac_shift;
3260 p = parts_silence_nan(p, status);
3261 p.frac >>= float16_params.frac_shift;
3262 return float16_pack_raw(p);
3263}
3264
3265float32 float32_silence_nan(float32 a, float_status *status)
3266{
3267 FloatParts p = float32_unpack_raw(a);
3268 p.frac <<= float32_params.frac_shift;
3269 p = parts_silence_nan(p, status);
3270 p.frac >>= float32_params.frac_shift;
3271 return float32_pack_raw(p);
3272}
3273
3274float64 float64_silence_nan(float64 a, float_status *status)
3275{
3276 FloatParts p = float64_unpack_raw(a);
3277 p.frac <<= float64_params.frac_shift;
3278 p = parts_silence_nan(p, status);
3279 p.frac >>= float64_params.frac_shift;
3280 return float64_pack_raw(p);
3281}
3282
3283/*----------------------------------------------------------------------------
158142c2
FB
3284| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3285| and 7, and returns the properly rounded 32-bit integer corresponding to the
3286| input. If `zSign' is 1, the input is negated before being converted to an
3287| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3288| is simply rounded to an integer, with the inexact exception raised if the
3289| input cannot be represented exactly as an integer. However, if the fixed-
3290| point input is too large, the invalid exception is raised and the largest
3291| positive or negative integer is returned.
3292*----------------------------------------------------------------------------*/
3293
f4014512 3294static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
158142c2 3295{
8f506c70 3296 int8_t roundingMode;
158142c2 3297 flag roundNearestEven;
8f506c70 3298 int8_t roundIncrement, roundBits;
760e1416 3299 int32_t z;
158142c2 3300
a2f2d288 3301 roundingMode = status->float_rounding_mode;
158142c2 3302 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3303 switch (roundingMode) {
3304 case float_round_nearest_even:
f9288a76 3305 case float_round_ties_away:
dc355b76
PM
3306 roundIncrement = 0x40;
3307 break;
3308 case float_round_to_zero:
3309 roundIncrement = 0;
3310 break;
3311 case float_round_up:
3312 roundIncrement = zSign ? 0 : 0x7f;
3313 break;
3314 case float_round_down:
3315 roundIncrement = zSign ? 0x7f : 0;
3316 break;
3317 default:
3318 abort();
158142c2
FB
3319 }
3320 roundBits = absZ & 0x7F;
3321 absZ = ( absZ + roundIncrement )>>7;
3322 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
3323 z = absZ;
3324 if ( zSign ) z = - z;
3325 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 3326 float_raise(float_flag_invalid, status);
bb98fe42 3327 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2 3328 }
a2f2d288
PM
3329 if (roundBits) {
3330 status->float_exception_flags |= float_flag_inexact;
3331 }
158142c2
FB
3332 return z;
3333
3334}
3335
3336/*----------------------------------------------------------------------------
3337| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3338| `absZ1', with binary point between bits 63 and 64 (between the input words),
3339| and returns the properly rounded 64-bit integer corresponding to the input.
3340| If `zSign' is 1, the input is negated before being converted to an integer.
3341| Ordinarily, the fixed-point input is simply rounded to an integer, with
3342| the inexact exception raised if the input cannot be represented exactly as
3343| an integer. However, if the fixed-point input is too large, the invalid
3344| exception is raised and the largest positive or negative integer is
3345| returned.
3346*----------------------------------------------------------------------------*/
3347
f42c2224 3348static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 3349 float_status *status)
158142c2 3350{
8f506c70 3351 int8_t roundingMode;
158142c2 3352 flag roundNearestEven, increment;
760e1416 3353 int64_t z;
158142c2 3354
a2f2d288 3355 roundingMode = status->float_rounding_mode;
158142c2 3356 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3357 switch (roundingMode) {
3358 case float_round_nearest_even:
f9288a76 3359 case float_round_ties_away:
dc355b76
PM
3360 increment = ((int64_t) absZ1 < 0);
3361 break;
3362 case float_round_to_zero:
3363 increment = 0;
3364 break;
3365 case float_round_up:
3366 increment = !zSign && absZ1;
3367 break;
3368 case float_round_down:
3369 increment = zSign && absZ1;
3370 break;
3371 default:
3372 abort();
158142c2
FB
3373 }
3374 if ( increment ) {
3375 ++absZ0;
3376 if ( absZ0 == 0 ) goto overflow;
bb98fe42 3377 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
3378 }
3379 z = absZ0;
3380 if ( zSign ) z = - z;
3381 if ( z && ( ( z < 0 ) ^ zSign ) ) {
3382 overflow:
ff32e16e 3383 float_raise(float_flag_invalid, status);
158142c2 3384 return
bb98fe42 3385 zSign ? (int64_t) LIT64( 0x8000000000000000 )
158142c2
FB
3386 : LIT64( 0x7FFFFFFFFFFFFFFF );
3387 }
a2f2d288
PM
3388 if (absZ1) {
3389 status->float_exception_flags |= float_flag_inexact;
3390 }
158142c2
FB
3391 return z;
3392
3393}
3394
fb3ea83a
TM
3395/*----------------------------------------------------------------------------
3396| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3397| `absZ1', with binary point between bits 63 and 64 (between the input words),
3398| and returns the properly rounded 64-bit unsigned integer corresponding to the
3399| input. Ordinarily, the fixed-point input is simply rounded to an integer,
3400| with the inexact exception raised if the input cannot be represented exactly
3401| as an integer. However, if the fixed-point input is too large, the invalid
3402| exception is raised and the largest unsigned integer is returned.
3403*----------------------------------------------------------------------------*/
3404
f42c2224 3405static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
e5a41ffa 3406 uint64_t absZ1, float_status *status)
fb3ea83a 3407{
8f506c70 3408 int8_t roundingMode;
fb3ea83a
TM
3409 flag roundNearestEven, increment;
3410
a2f2d288 3411 roundingMode = status->float_rounding_mode;
fb3ea83a 3412 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
3413 switch (roundingMode) {
3414 case float_round_nearest_even:
f9288a76 3415 case float_round_ties_away:
dc355b76
PM
3416 increment = ((int64_t)absZ1 < 0);
3417 break;
3418 case float_round_to_zero:
3419 increment = 0;
3420 break;
3421 case float_round_up:
3422 increment = !zSign && absZ1;
3423 break;
3424 case float_round_down:
3425 increment = zSign && absZ1;
3426 break;
3427 default:
3428 abort();
fb3ea83a
TM
3429 }
3430 if (increment) {
3431 ++absZ0;
3432 if (absZ0 == 0) {
ff32e16e 3433 float_raise(float_flag_invalid, status);
fb3ea83a
TM
3434 return LIT64(0xFFFFFFFFFFFFFFFF);
3435 }
3436 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
3437 }
3438
3439 if (zSign && absZ0) {
ff32e16e 3440 float_raise(float_flag_invalid, status);
fb3ea83a
TM
3441 return 0;
3442 }
3443
3444 if (absZ1) {
a2f2d288 3445 status->float_exception_flags |= float_flag_inexact;
fb3ea83a
TM
3446 }
3447 return absZ0;
3448}
3449
37d18660
PM
3450/*----------------------------------------------------------------------------
3451| If `a' is denormal and we are in flush-to-zero mode then set the
3452| input-denormal exception and return zero. Otherwise just return the value.
3453*----------------------------------------------------------------------------*/
e5a41ffa 3454float32 float32_squash_input_denormal(float32 a, float_status *status)
37d18660 3455{
a2f2d288 3456 if (status->flush_inputs_to_zero) {
37d18660 3457 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
ff32e16e 3458 float_raise(float_flag_input_denormal, status);
37d18660
PM
3459 return make_float32(float32_val(a) & 0x80000000);
3460 }
3461 }
3462 return a;
3463}
3464
158142c2
FB
3465/*----------------------------------------------------------------------------
3466| Normalizes the subnormal single-precision floating-point value represented
3467| by the denormalized significand `aSig'. The normalized exponent and
3468| significand are stored at the locations pointed to by `zExpPtr' and
3469| `zSigPtr', respectively.
3470*----------------------------------------------------------------------------*/
3471
3472static void
0c48262d 3473 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 3474{
8f506c70 3475 int8_t shiftCount;
158142c2 3476
0019d5c3 3477 shiftCount = clz32(aSig) - 8;
158142c2
FB
3478 *zSigPtr = aSig<<shiftCount;
3479 *zExpPtr = 1 - shiftCount;
3480
3481}
3482
158142c2
FB
3483/*----------------------------------------------------------------------------
3484| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3485| and significand `zSig', and returns the proper single-precision floating-
3486| point value corresponding to the abstract input. Ordinarily, the abstract
3487| value is simply rounded and packed into the single-precision format, with
3488| the inexact exception raised if the abstract input cannot be represented
3489| exactly. However, if the abstract value is too large, the overflow and
3490| inexact exceptions are raised and an infinity or maximal finite value is
3491| returned. If the abstract value is too small, the input value is rounded to
3492| a subnormal number, and the underflow and inexact exceptions are raised if
3493| the abstract input cannot be represented exactly as a subnormal single-
3494| precision floating-point number.
3495| The input significand `zSig' has its binary point between bits 30
3496| and 29, which is 7 bits to the left of the usual location. This shifted
3497| significand must be normalized or smaller. If `zSig' is not normalized,
3498| `zExp' must be 0; in that case, the result returned is a subnormal number,
3499| and it must not require rounding. In the usual case that `zSig' is
3500| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3501| The handling of underflow and overflow follows the IEC/IEEE Standard for
3502| Binary Floating-Point Arithmetic.
3503*----------------------------------------------------------------------------*/
3504
0c48262d 3505static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 3506 float_status *status)
158142c2 3507{
8f506c70 3508 int8_t roundingMode;
158142c2 3509 flag roundNearestEven;
8f506c70 3510 int8_t roundIncrement, roundBits;
158142c2
FB
3511 flag isTiny;
3512
a2f2d288 3513 roundingMode = status->float_rounding_mode;
158142c2 3514 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3515 switch (roundingMode) {
3516 case float_round_nearest_even:
f9288a76 3517 case float_round_ties_away:
dc355b76
PM
3518 roundIncrement = 0x40;
3519 break;
3520 case float_round_to_zero:
3521 roundIncrement = 0;
3522 break;
3523 case float_round_up:
3524 roundIncrement = zSign ? 0 : 0x7f;
3525 break;
3526 case float_round_down:
3527 roundIncrement = zSign ? 0x7f : 0;
3528 break;
3529 default:
3530 abort();
3531 break;
158142c2
FB
3532 }
3533 roundBits = zSig & 0x7F;
bb98fe42 3534 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
3535 if ( ( 0xFD < zExp )
3536 || ( ( zExp == 0xFD )
bb98fe42 3537 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 3538 ) {
ff32e16e 3539 float_raise(float_flag_overflow | float_flag_inexact, status);
f090c9d4 3540 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
158142c2
FB
3541 }
3542 if ( zExp < 0 ) {
a2f2d288 3543 if (status->flush_to_zero) {
ff32e16e 3544 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3545 return packFloat32(zSign, 0, 0);
3546 }
158142c2 3547 isTiny =
a2f2d288
PM
3548 (status->float_detect_tininess
3549 == float_tininess_before_rounding)
158142c2
FB
3550 || ( zExp < -1 )
3551 || ( zSig + roundIncrement < 0x80000000 );
3552 shift32RightJamming( zSig, - zExp, &zSig );
3553 zExp = 0;
3554 roundBits = zSig & 0x7F;
ff32e16e
PM
3555 if (isTiny && roundBits) {
3556 float_raise(float_flag_underflow, status);
3557 }
158142c2
FB
3558 }
3559 }
a2f2d288
PM
3560 if (roundBits) {
3561 status->float_exception_flags |= float_flag_inexact;
3562 }
158142c2
FB
3563 zSig = ( zSig + roundIncrement )>>7;
3564 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
3565 if ( zSig == 0 ) zExp = 0;
3566 return packFloat32( zSign, zExp, zSig );
3567
3568}
3569
3570/*----------------------------------------------------------------------------
3571| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3572| and significand `zSig', and returns the proper single-precision floating-
3573| point value corresponding to the abstract input. This routine is just like
3574| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
3575| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
3576| floating-point exponent.
3577*----------------------------------------------------------------------------*/
3578
3579static float32
0c48262d 3580 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 3581 float_status *status)
158142c2 3582{
8f506c70 3583 int8_t shiftCount;
158142c2 3584
0019d5c3 3585 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
3586 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
3587 status);
158142c2
FB
3588
3589}
3590
37d18660
PM
3591/*----------------------------------------------------------------------------
3592| If `a' is denormal and we are in flush-to-zero mode then set the
3593| input-denormal exception and return zero. Otherwise just return the value.
3594*----------------------------------------------------------------------------*/
e5a41ffa 3595float64 float64_squash_input_denormal(float64 a, float_status *status)
37d18660 3596{
a2f2d288 3597 if (status->flush_inputs_to_zero) {
37d18660 3598 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
ff32e16e 3599 float_raise(float_flag_input_denormal, status);
37d18660
PM
3600 return make_float64(float64_val(a) & (1ULL << 63));
3601 }
3602 }
3603 return a;
3604}
3605
158142c2
FB
3606/*----------------------------------------------------------------------------
3607| Normalizes the subnormal double-precision floating-point value represented
3608| by the denormalized significand `aSig'. The normalized exponent and
3609| significand are stored at the locations pointed to by `zExpPtr' and
3610| `zSigPtr', respectively.
3611*----------------------------------------------------------------------------*/
3612
3613static void
0c48262d 3614 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 3615{
8f506c70 3616 int8_t shiftCount;
158142c2 3617
0019d5c3 3618 shiftCount = clz64(aSig) - 11;
158142c2
FB
3619 *zSigPtr = aSig<<shiftCount;
3620 *zExpPtr = 1 - shiftCount;
3621
3622}
3623
3624/*----------------------------------------------------------------------------
3625| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
3626| double-precision floating-point value, returning the result. After being
3627| shifted into the proper positions, the three fields are simply added
3628| together to form the result. This means that any integer portion of `zSig'
3629| will be added into the exponent. Since a properly normalized significand
3630| will have an integer portion equal to 1, the `zExp' input should be 1 less
3631| than the desired result exponent whenever `zSig' is a complete, normalized
3632| significand.
3633*----------------------------------------------------------------------------*/
3634
0c48262d 3635static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
158142c2
FB
3636{
3637
f090c9d4 3638 return make_float64(
bb98fe42 3639 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
3640
3641}
3642
3643/*----------------------------------------------------------------------------
3644| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3645| and significand `zSig', and returns the proper double-precision floating-
3646| point value corresponding to the abstract input. Ordinarily, the abstract
3647| value is simply rounded and packed into the double-precision format, with
3648| the inexact exception raised if the abstract input cannot be represented
3649| exactly. However, if the abstract value is too large, the overflow and
3650| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
3651| returned. If the abstract value is too small, the input value is rounded to
3652| a subnormal number, and the underflow and inexact exceptions are raised if
3653| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
3654| precision floating-point number.
3655| The input significand `zSig' has its binary point between bits 62
3656| and 61, which is 10 bits to the left of the usual location. This shifted
3657| significand must be normalized or smaller. If `zSig' is not normalized,
3658| `zExp' must be 0; in that case, the result returned is a subnormal number,
3659| and it must not require rounding. In the usual case that `zSig' is
3660| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3661| The handling of underflow and overflow follows the IEC/IEEE Standard for
3662| Binary Floating-Point Arithmetic.
3663*----------------------------------------------------------------------------*/
3664
0c48262d 3665static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 3666 float_status *status)
158142c2 3667{
8f506c70 3668 int8_t roundingMode;
158142c2 3669 flag roundNearestEven;
0c48262d 3670 int roundIncrement, roundBits;
158142c2
FB
3671 flag isTiny;
3672
a2f2d288 3673 roundingMode = status->float_rounding_mode;
158142c2 3674 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3675 switch (roundingMode) {
3676 case float_round_nearest_even:
f9288a76 3677 case float_round_ties_away:
dc355b76
PM
3678 roundIncrement = 0x200;
3679 break;
3680 case float_round_to_zero:
3681 roundIncrement = 0;
3682 break;
3683 case float_round_up:
3684 roundIncrement = zSign ? 0 : 0x3ff;
3685 break;
3686 case float_round_down:
3687 roundIncrement = zSign ? 0x3ff : 0;
3688 break;
9ee6f678
BR
3689 case float_round_to_odd:
3690 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
3691 break;
dc355b76
PM
3692 default:
3693 abort();
158142c2
FB
3694 }
3695 roundBits = zSig & 0x3FF;
bb98fe42 3696 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
3697 if ( ( 0x7FD < zExp )
3698 || ( ( zExp == 0x7FD )
bb98fe42 3699 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 3700 ) {
9ee6f678
BR
3701 bool overflow_to_inf = roundingMode != float_round_to_odd &&
3702 roundIncrement != 0;
ff32e16e 3703 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 3704 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
3705 }
3706 if ( zExp < 0 ) {
a2f2d288 3707 if (status->flush_to_zero) {
ff32e16e 3708 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3709 return packFloat64(zSign, 0, 0);
3710 }
158142c2 3711 isTiny =
a2f2d288
PM
3712 (status->float_detect_tininess
3713 == float_tininess_before_rounding)
158142c2
FB
3714 || ( zExp < -1 )
3715 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
3716 shift64RightJamming( zSig, - zExp, &zSig );
3717 zExp = 0;
3718 roundBits = zSig & 0x3FF;
ff32e16e
PM
3719 if (isTiny && roundBits) {
3720 float_raise(float_flag_underflow, status);
3721 }
9ee6f678
BR
3722 if (roundingMode == float_round_to_odd) {
3723 /*
3724 * For round-to-odd case, the roundIncrement depends on
3725 * zSig which just changed.
3726 */
3727 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
3728 }
158142c2
FB
3729 }
3730 }
a2f2d288
PM
3731 if (roundBits) {
3732 status->float_exception_flags |= float_flag_inexact;
3733 }
158142c2
FB
3734 zSig = ( zSig + roundIncrement )>>10;
3735 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
3736 if ( zSig == 0 ) zExp = 0;
3737 return packFloat64( zSign, zExp, zSig );
3738
3739}
3740
3741/*----------------------------------------------------------------------------
3742| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3743| and significand `zSig', and returns the proper double-precision floating-
3744| point value corresponding to the abstract input. This routine is just like
3745| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
3746| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
3747| floating-point exponent.
3748*----------------------------------------------------------------------------*/
3749
3750static float64
0c48262d 3751 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 3752 float_status *status)
158142c2 3753{
8f506c70 3754 int8_t shiftCount;
158142c2 3755
0019d5c3 3756 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
3757 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
3758 status);
158142c2
FB
3759
3760}
3761
158142c2
FB
3762/*----------------------------------------------------------------------------
3763| Normalizes the subnormal extended double-precision floating-point value
3764| represented by the denormalized significand `aSig'. The normalized exponent
3765| and significand are stored at the locations pointed to by `zExpPtr' and
3766| `zSigPtr', respectively.
3767*----------------------------------------------------------------------------*/
3768
88857aca
LV
3769void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
3770 uint64_t *zSigPtr)
158142c2 3771{
8f506c70 3772 int8_t shiftCount;
158142c2 3773
0019d5c3 3774 shiftCount = clz64(aSig);
158142c2
FB
3775 *zSigPtr = aSig<<shiftCount;
3776 *zExpPtr = 1 - shiftCount;
158142c2
FB
3777}
3778
3779/*----------------------------------------------------------------------------
3780| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3781| and extended significand formed by the concatenation of `zSig0' and `zSig1',
3782| and returns the proper extended double-precision floating-point value
3783| corresponding to the abstract input. Ordinarily, the abstract value is
3784| rounded and packed into the extended double-precision format, with the
3785| inexact exception raised if the abstract input cannot be represented
3786| exactly. However, if the abstract value is too large, the overflow and
3787| inexact exceptions are raised and an infinity or maximal finite value is
3788| returned. If the abstract value is too small, the input value is rounded to
3789| a subnormal number, and the underflow and inexact exceptions are raised if
3790| the abstract input cannot be represented exactly as a subnormal extended
3791| double-precision floating-point number.
3792| If `roundingPrecision' is 32 or 64, the result is rounded to the same
3793| number of bits as single or double precision, respectively. Otherwise, the
3794| result is rounded to the full precision of the extended double-precision
3795| format.
3796| The input significand must be normalized or smaller. If the input
3797| significand is not normalized, `zExp' must be 0; in that case, the result
3798| returned is a subnormal number, and it must not require rounding. The
3799| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
3800| Floating-Point Arithmetic.
3801*----------------------------------------------------------------------------*/
3802
88857aca
LV
3803floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
3804 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
3805 float_status *status)
158142c2 3806{
8f506c70 3807 int8_t roundingMode;
158142c2 3808 flag roundNearestEven, increment, isTiny;
f42c2224 3809 int64_t roundIncrement, roundMask, roundBits;
158142c2 3810
a2f2d288 3811 roundingMode = status->float_rounding_mode;
158142c2
FB
3812 roundNearestEven = ( roundingMode == float_round_nearest_even );
3813 if ( roundingPrecision == 80 ) goto precision80;
3814 if ( roundingPrecision == 64 ) {
3815 roundIncrement = LIT64( 0x0000000000000400 );
3816 roundMask = LIT64( 0x00000000000007FF );
3817 }
3818 else if ( roundingPrecision == 32 ) {
3819 roundIncrement = LIT64( 0x0000008000000000 );
3820 roundMask = LIT64( 0x000000FFFFFFFFFF );
3821 }
3822 else {
3823 goto precision80;
3824 }
3825 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
3826 switch (roundingMode) {
3827 case float_round_nearest_even:
f9288a76 3828 case float_round_ties_away:
dc355b76
PM
3829 break;
3830 case float_round_to_zero:
3831 roundIncrement = 0;
3832 break;
3833 case float_round_up:
3834 roundIncrement = zSign ? 0 : roundMask;
3835 break;
3836 case float_round_down:
3837 roundIncrement = zSign ? roundMask : 0;
3838 break;
3839 default:
3840 abort();
158142c2
FB
3841 }
3842 roundBits = zSig0 & roundMask;
bb98fe42 3843 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3844 if ( ( 0x7FFE < zExp )
3845 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
3846 ) {
3847 goto overflow;
3848 }
3849 if ( zExp <= 0 ) {
a2f2d288 3850 if (status->flush_to_zero) {
ff32e16e 3851 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3852 return packFloatx80(zSign, 0, 0);
3853 }
158142c2 3854 isTiny =
a2f2d288
PM
3855 (status->float_detect_tininess
3856 == float_tininess_before_rounding)
158142c2
FB
3857 || ( zExp < 0 )
3858 || ( zSig0 <= zSig0 + roundIncrement );
3859 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
3860 zExp = 0;
3861 roundBits = zSig0 & roundMask;
ff32e16e
PM
3862 if (isTiny && roundBits) {
3863 float_raise(float_flag_underflow, status);
3864 }
a2f2d288
PM
3865 if (roundBits) {
3866 status->float_exception_flags |= float_flag_inexact;
3867 }
158142c2 3868 zSig0 += roundIncrement;
bb98fe42 3869 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
3870 roundIncrement = roundMask + 1;
3871 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3872 roundMask |= roundIncrement;
3873 }
3874 zSig0 &= ~ roundMask;
3875 return packFloatx80( zSign, zExp, zSig0 );
3876 }
3877 }
a2f2d288
PM
3878 if (roundBits) {
3879 status->float_exception_flags |= float_flag_inexact;
3880 }
158142c2
FB
3881 zSig0 += roundIncrement;
3882 if ( zSig0 < roundIncrement ) {
3883 ++zExp;
3884 zSig0 = LIT64( 0x8000000000000000 );
3885 }
3886 roundIncrement = roundMask + 1;
3887 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3888 roundMask |= roundIncrement;
3889 }
3890 zSig0 &= ~ roundMask;
3891 if ( zSig0 == 0 ) zExp = 0;
3892 return packFloatx80( zSign, zExp, zSig0 );
3893 precision80:
dc355b76
PM
3894 switch (roundingMode) {
3895 case float_round_nearest_even:
f9288a76 3896 case float_round_ties_away:
dc355b76
PM
3897 increment = ((int64_t)zSig1 < 0);
3898 break;
3899 case float_round_to_zero:
3900 increment = 0;
3901 break;
3902 case float_round_up:
3903 increment = !zSign && zSig1;
3904 break;
3905 case float_round_down:
3906 increment = zSign && zSig1;
3907 break;
3908 default:
3909 abort();
158142c2 3910 }
bb98fe42 3911 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3912 if ( ( 0x7FFE < zExp )
3913 || ( ( zExp == 0x7FFE )
3914 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
3915 && increment
3916 )
3917 ) {
3918 roundMask = 0;
3919 overflow:
ff32e16e 3920 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
3921 if ( ( roundingMode == float_round_to_zero )
3922 || ( zSign && ( roundingMode == float_round_up ) )
3923 || ( ! zSign && ( roundingMode == float_round_down ) )
3924 ) {
3925 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
3926 }
0f605c88
LV
3927 return packFloatx80(zSign,
3928 floatx80_infinity_high,
3929 floatx80_infinity_low);
158142c2
FB
3930 }
3931 if ( zExp <= 0 ) {
3932 isTiny =
a2f2d288
PM
3933 (status->float_detect_tininess
3934 == float_tininess_before_rounding)
158142c2
FB
3935 || ( zExp < 0 )
3936 || ! increment
3937 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
3938 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
3939 zExp = 0;
ff32e16e
PM
3940 if (isTiny && zSig1) {
3941 float_raise(float_flag_underflow, status);
3942 }
a2f2d288
PM
3943 if (zSig1) {
3944 status->float_exception_flags |= float_flag_inexact;
3945 }
dc355b76
PM
3946 switch (roundingMode) {
3947 case float_round_nearest_even:
f9288a76 3948 case float_round_ties_away:
dc355b76
PM
3949 increment = ((int64_t)zSig1 < 0);
3950 break;
3951 case float_round_to_zero:
3952 increment = 0;
3953 break;
3954 case float_round_up:
3955 increment = !zSign && zSig1;
3956 break;
3957 case float_round_down:
3958 increment = zSign && zSig1;
3959 break;
3960 default:
3961 abort();
158142c2
FB
3962 }
3963 if ( increment ) {
3964 ++zSig0;
3965 zSig0 &=
bb98fe42
AF
3966 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
3967 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
3968 }
3969 return packFloatx80( zSign, zExp, zSig0 );
3970 }
3971 }
a2f2d288
PM
3972 if (zSig1) {
3973 status->float_exception_flags |= float_flag_inexact;
3974 }
158142c2
FB
3975 if ( increment ) {
3976 ++zSig0;
3977 if ( zSig0 == 0 ) {
3978 ++zExp;
3979 zSig0 = LIT64( 0x8000000000000000 );
3980 }
3981 else {
bb98fe42 3982 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
3983 }
3984 }
3985 else {
3986 if ( zSig0 == 0 ) zExp = 0;
3987 }
3988 return packFloatx80( zSign, zExp, zSig0 );
3989
3990}
3991
3992/*----------------------------------------------------------------------------
3993| Takes an abstract floating-point value having sign `zSign', exponent
3994| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
3995| and returns the proper extended double-precision floating-point value
3996| corresponding to the abstract input. This routine is just like
3997| `roundAndPackFloatx80' except that the input significand does not have to be
3998| normalized.
3999*----------------------------------------------------------------------------*/
4000
88857aca
LV
4001floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4002 flag zSign, int32_t zExp,
4003 uint64_t zSig0, uint64_t zSig1,
4004 float_status *status)
158142c2 4005{
8f506c70 4006 int8_t shiftCount;
158142c2
FB
4007
4008 if ( zSig0 == 0 ) {
4009 zSig0 = zSig1;
4010 zSig1 = 0;
4011 zExp -= 64;
4012 }
0019d5c3 4013 shiftCount = clz64(zSig0);
158142c2
FB
4014 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4015 zExp -= shiftCount;
ff32e16e
PM
4016 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4017 zSig0, zSig1, status);
158142c2
FB
4018
4019}
4020
158142c2
FB
4021/*----------------------------------------------------------------------------
4022| Returns the least-significant 64 fraction bits of the quadruple-precision
4023| floating-point value `a'.
4024*----------------------------------------------------------------------------*/
4025
a49db98d 4026static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
4027{
4028
4029 return a.low;
4030
4031}
4032
4033/*----------------------------------------------------------------------------
4034| Returns the most-significant 48 fraction bits of the quadruple-precision
4035| floating-point value `a'.
4036*----------------------------------------------------------------------------*/
4037
a49db98d 4038static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
4039{
4040
4041 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
4042
4043}
4044
4045/*----------------------------------------------------------------------------
4046| Returns the exponent bits of the quadruple-precision floating-point value
4047| `a'.
4048*----------------------------------------------------------------------------*/
4049
f4014512 4050static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
4051{
4052
4053 return ( a.high>>48 ) & 0x7FFF;
4054
4055}
4056
4057/*----------------------------------------------------------------------------
4058| Returns the sign bit of the quadruple-precision floating-point value `a'.
4059*----------------------------------------------------------------------------*/
4060
a49db98d 4061static inline flag extractFloat128Sign( float128 a )
158142c2
FB
4062{
4063
4064 return a.high>>63;
4065
4066}
4067
4068/*----------------------------------------------------------------------------
4069| Normalizes the subnormal quadruple-precision floating-point value
4070| represented by the denormalized significand formed by the concatenation of
4071| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4072| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4073| significand are stored at the location pointed to by `zSig0Ptr', and the
4074| least significant 64 bits of the normalized significand are stored at the
4075| location pointed to by `zSig1Ptr'.
4076*----------------------------------------------------------------------------*/
4077
4078static void
4079 normalizeFloat128Subnormal(
bb98fe42
AF
4080 uint64_t aSig0,
4081 uint64_t aSig1,
f4014512 4082 int32_t *zExpPtr,
bb98fe42
AF
4083 uint64_t *zSig0Ptr,
4084 uint64_t *zSig1Ptr
158142c2
FB
4085 )
4086{
8f506c70 4087 int8_t shiftCount;
158142c2
FB
4088
4089 if ( aSig0 == 0 ) {
0019d5c3 4090 shiftCount = clz64(aSig1) - 15;
158142c2
FB
4091 if ( shiftCount < 0 ) {
4092 *zSig0Ptr = aSig1>>( - shiftCount );
4093 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4094 }
4095 else {
4096 *zSig0Ptr = aSig1<<shiftCount;
4097 *zSig1Ptr = 0;
4098 }
4099 *zExpPtr = - shiftCount - 63;
4100 }
4101 else {
0019d5c3 4102 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4103 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4104 *zExpPtr = 1 - shiftCount;
4105 }
4106
4107}
4108
4109/*----------------------------------------------------------------------------
4110| Packs the sign `zSign', the exponent `zExp', and the significand formed
4111| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4112| floating-point value, returning the result. After being shifted into the
4113| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4114| added together to form the most significant 32 bits of the result. This
4115| means that any integer portion of `zSig0' will be added into the exponent.
4116| Since a properly normalized significand will have an integer portion equal
4117| to 1, the `zExp' input should be 1 less than the desired result exponent
4118| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4119| significand.
4120*----------------------------------------------------------------------------*/
4121
a49db98d 4122static inline float128
f4014512 4123 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
158142c2
FB
4124{
4125 float128 z;
4126
4127 z.low = zSig1;
bb98fe42 4128 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
158142c2
FB
4129 return z;
4130
4131}
4132
4133/*----------------------------------------------------------------------------
4134| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4135| and extended significand formed by the concatenation of `zSig0', `zSig1',
4136| and `zSig2', and returns the proper quadruple-precision floating-point value
4137| corresponding to the abstract input. Ordinarily, the abstract value is
4138| simply rounded and packed into the quadruple-precision format, with the
4139| inexact exception raised if the abstract input cannot be represented
4140| exactly. However, if the abstract value is too large, the overflow and
4141| inexact exceptions are raised and an infinity or maximal finite value is
4142| returned. If the abstract value is too small, the input value is rounded to
4143| a subnormal number, and the underflow and inexact exceptions are raised if
4144| the abstract input cannot be represented exactly as a subnormal quadruple-
4145| precision floating-point number.
4146| The input significand must be normalized or smaller. If the input
4147| significand is not normalized, `zExp' must be 0; in that case, the result
4148| returned is a subnormal number, and it must not require rounding. In the
4149| usual case that the input significand is normalized, `zExp' must be 1 less
4150| than the ``true'' floating-point exponent. The handling of underflow and
4151| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4152*----------------------------------------------------------------------------*/
4153
f4014512 4154static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
4155 uint64_t zSig0, uint64_t zSig1,
4156 uint64_t zSig2, float_status *status)
158142c2 4157{
8f506c70 4158 int8_t roundingMode;
158142c2
FB
4159 flag roundNearestEven, increment, isTiny;
4160
a2f2d288 4161 roundingMode = status->float_rounding_mode;
158142c2 4162 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4163 switch (roundingMode) {
4164 case float_round_nearest_even:
f9288a76 4165 case float_round_ties_away:
dc355b76
PM
4166 increment = ((int64_t)zSig2 < 0);
4167 break;
4168 case float_round_to_zero:
4169 increment = 0;
4170 break;
4171 case float_round_up:
4172 increment = !zSign && zSig2;
4173 break;
4174 case float_round_down:
4175 increment = zSign && zSig2;
4176 break;
9ee6f678
BR
4177 case float_round_to_odd:
4178 increment = !(zSig1 & 0x1) && zSig2;
4179 break;
dc355b76
PM
4180 default:
4181 abort();
158142c2 4182 }
bb98fe42 4183 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4184 if ( ( 0x7FFD < zExp )
4185 || ( ( zExp == 0x7FFD )
4186 && eq128(
4187 LIT64( 0x0001FFFFFFFFFFFF ),
4188 LIT64( 0xFFFFFFFFFFFFFFFF ),
4189 zSig0,
4190 zSig1
4191 )
4192 && increment
4193 )
4194 ) {
ff32e16e 4195 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4196 if ( ( roundingMode == float_round_to_zero )
4197 || ( zSign && ( roundingMode == float_round_up ) )
4198 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4199 || (roundingMode == float_round_to_odd)
158142c2
FB
4200 ) {
4201 return
4202 packFloat128(
4203 zSign,
4204 0x7FFE,
4205 LIT64( 0x0000FFFFFFFFFFFF ),
4206 LIT64( 0xFFFFFFFFFFFFFFFF )
4207 );
4208 }
4209 return packFloat128( zSign, 0x7FFF, 0, 0 );
4210 }
4211 if ( zExp < 0 ) {
a2f2d288 4212 if (status->flush_to_zero) {
ff32e16e 4213 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4214 return packFloat128(zSign, 0, 0, 0);
4215 }
158142c2 4216 isTiny =
a2f2d288
PM
4217 (status->float_detect_tininess
4218 == float_tininess_before_rounding)
158142c2
FB
4219 || ( zExp < -1 )
4220 || ! increment
4221 || lt128(
4222 zSig0,
4223 zSig1,
4224 LIT64( 0x0001FFFFFFFFFFFF ),
4225 LIT64( 0xFFFFFFFFFFFFFFFF )
4226 );
4227 shift128ExtraRightJamming(
4228 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4229 zExp = 0;
ff32e16e
PM
4230 if (isTiny && zSig2) {
4231 float_raise(float_flag_underflow, status);
4232 }
dc355b76
PM
4233 switch (roundingMode) {
4234 case float_round_nearest_even:
f9288a76 4235 case float_round_ties_away:
dc355b76
PM
4236 increment = ((int64_t)zSig2 < 0);
4237 break;
4238 case float_round_to_zero:
4239 increment = 0;
4240 break;
4241 case float_round_up:
4242 increment = !zSign && zSig2;
4243 break;
4244 case float_round_down:
4245 increment = zSign && zSig2;
4246 break;
9ee6f678
BR
4247 case float_round_to_odd:
4248 increment = !(zSig1 & 0x1) && zSig2;
4249 break;
dc355b76
PM
4250 default:
4251 abort();
158142c2
FB
4252 }
4253 }
4254 }
a2f2d288
PM
4255 if (zSig2) {
4256 status->float_exception_flags |= float_flag_inexact;
4257 }
158142c2
FB
4258 if ( increment ) {
4259 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4260 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
4261 }
4262 else {
4263 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4264 }
4265 return packFloat128( zSign, zExp, zSig0, zSig1 );
4266
4267}
4268
4269/*----------------------------------------------------------------------------
4270| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4271| and significand formed by the concatenation of `zSig0' and `zSig1', and
4272| returns the proper quadruple-precision floating-point value corresponding
4273| to the abstract input. This routine is just like `roundAndPackFloat128'
4274| except that the input significand has fewer bits and does not have to be
4275| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4276| point exponent.
4277*----------------------------------------------------------------------------*/
4278
f4014512 4279static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
4280 uint64_t zSig0, uint64_t zSig1,
4281 float_status *status)
158142c2 4282{
8f506c70 4283 int8_t shiftCount;
bb98fe42 4284 uint64_t zSig2;
158142c2
FB
4285
4286 if ( zSig0 == 0 ) {
4287 zSig0 = zSig1;
4288 zSig1 = 0;
4289 zExp -= 64;
4290 }
0019d5c3 4291 shiftCount = clz64(zSig0) - 15;
158142c2
FB
4292 if ( 0 <= shiftCount ) {
4293 zSig2 = 0;
4294 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4295 }
4296 else {
4297 shift128ExtraRightJamming(
4298 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4299 }
4300 zExp -= shiftCount;
ff32e16e 4301 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
4302
4303}
4304
158142c2 4305
158142c2
FB
4306/*----------------------------------------------------------------------------
4307| Returns the result of converting the 32-bit two's complement integer `a'
4308| to the extended double-precision floating-point format. The conversion
4309| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4310| Arithmetic.
4311*----------------------------------------------------------------------------*/
4312
e5a41ffa 4313floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2
FB
4314{
4315 flag zSign;
3a87d009 4316 uint32_t absA;
8f506c70 4317 int8_t shiftCount;
bb98fe42 4318 uint64_t zSig;
158142c2
FB
4319
4320 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4321 zSign = ( a < 0 );
4322 absA = zSign ? - a : a;
0019d5c3 4323 shiftCount = clz32(absA) + 32;
158142c2
FB
4324 zSig = absA;
4325 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4326
4327}
4328
158142c2
FB
4329/*----------------------------------------------------------------------------
4330| Returns the result of converting the 32-bit two's complement integer `a' to
4331| the quadruple-precision floating-point format. The conversion is performed
4332| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4333*----------------------------------------------------------------------------*/
4334
e5a41ffa 4335float128 int32_to_float128(int32_t a, float_status *status)
158142c2
FB
4336{
4337 flag zSign;
3a87d009 4338 uint32_t absA;
8f506c70 4339 int8_t shiftCount;
bb98fe42 4340 uint64_t zSig0;
158142c2
FB
4341
4342 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4343 zSign = ( a < 0 );
4344 absA = zSign ? - a : a;
0019d5c3 4345 shiftCount = clz32(absA) + 17;
158142c2
FB
4346 zSig0 = absA;
4347 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
4348
4349}
4350
158142c2
FB
4351/*----------------------------------------------------------------------------
4352| Returns the result of converting the 64-bit two's complement integer `a'
4353| to the extended double-precision floating-point format. The conversion
4354| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4355| Arithmetic.
4356*----------------------------------------------------------------------------*/
4357
e5a41ffa 4358floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2
FB
4359{
4360 flag zSign;
182f42fd 4361 uint64_t absA;
8f506c70 4362 int8_t shiftCount;
158142c2
FB
4363
4364 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4365 zSign = ( a < 0 );
4366 absA = zSign ? - a : a;
0019d5c3 4367 shiftCount = clz64(absA);
158142c2
FB
4368 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4369
4370}
4371
158142c2
FB
4372/*----------------------------------------------------------------------------
4373| Returns the result of converting the 64-bit two's complement integer `a' to
4374| the quadruple-precision floating-point format. The conversion is performed
4375| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4376*----------------------------------------------------------------------------*/
4377
e5a41ffa 4378float128 int64_to_float128(int64_t a, float_status *status)
158142c2
FB
4379{
4380 flag zSign;
182f42fd 4381 uint64_t absA;
8f506c70 4382 int8_t shiftCount;
f4014512 4383 int32_t zExp;
bb98fe42 4384 uint64_t zSig0, zSig1;
158142c2
FB
4385
4386 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4387 zSign = ( a < 0 );
4388 absA = zSign ? - a : a;
0019d5c3 4389 shiftCount = clz64(absA) + 49;
158142c2
FB
4390 zExp = 0x406E - shiftCount;
4391 if ( 64 <= shiftCount ) {
4392 zSig1 = 0;
4393 zSig0 = absA;
4394 shiftCount -= 64;
4395 }
4396 else {
4397 zSig1 = absA;
4398 zSig0 = 0;
4399 }
4400 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4401 return packFloat128( zSign, zExp, zSig0, zSig1 );
4402
4403}
4404
6bb8e0f1
PM
4405/*----------------------------------------------------------------------------
4406| Returns the result of converting the 64-bit unsigned integer `a'
4407| to the quadruple-precision floating-point format. The conversion is performed
4408| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4409*----------------------------------------------------------------------------*/
4410
e5a41ffa 4411float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
4412{
4413 if (a == 0) {
4414 return float128_zero;
4415 }
6603d506 4416 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
4417}
4418
158142c2
FB
4419/*----------------------------------------------------------------------------
4420| Returns the result of converting the single-precision floating-point value
4421| `a' to the extended double-precision floating-point format. The conversion
4422| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4423| Arithmetic.
4424*----------------------------------------------------------------------------*/
4425
e5a41ffa 4426floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2
FB
4427{
4428 flag aSign;
0c48262d 4429 int aExp;
bb98fe42 4430 uint32_t aSig;
158142c2 4431
ff32e16e 4432 a = float32_squash_input_denormal(a, status);
158142c2
FB
4433 aSig = extractFloat32Frac( a );
4434 aExp = extractFloat32Exp( a );
4435 aSign = extractFloat32Sign( a );
4436 if ( aExp == 0xFF ) {
ff32e16e
PM
4437 if (aSig) {
4438 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
4439 }
0f605c88
LV
4440 return packFloatx80(aSign,
4441 floatx80_infinity_high,
4442 floatx80_infinity_low);
158142c2
FB
4443 }
4444 if ( aExp == 0 ) {
4445 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4446 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4447 }
4448 aSig |= 0x00800000;
bb98fe42 4449 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
4450
4451}
4452
158142c2
FB
4453/*----------------------------------------------------------------------------
4454| Returns the result of converting the single-precision floating-point value
4455| `a' to the double-precision floating-point format. The conversion is
4456| performed according to the IEC/IEEE Standard for Binary Floating-Point
4457| Arithmetic.
4458*----------------------------------------------------------------------------*/
4459
e5a41ffa 4460float128 float32_to_float128(float32 a, float_status *status)
158142c2
FB
4461{
4462 flag aSign;
0c48262d 4463 int aExp;
bb98fe42 4464 uint32_t aSig;
158142c2 4465
ff32e16e 4466 a = float32_squash_input_denormal(a, status);
158142c2
FB
4467 aSig = extractFloat32Frac( a );
4468 aExp = extractFloat32Exp( a );
4469 aSign = extractFloat32Sign( a );
4470 if ( aExp == 0xFF ) {
ff32e16e
PM
4471 if (aSig) {
4472 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
4473 }
158142c2
FB
4474 return packFloat128( aSign, 0x7FFF, 0, 0 );
4475 }
4476 if ( aExp == 0 ) {
4477 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
4478 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4479 --aExp;
4480 }
bb98fe42 4481 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
4482
4483}
4484
158142c2
FB
4485/*----------------------------------------------------------------------------
4486| Returns the remainder of the single-precision floating-point value `a'
4487| with respect to the corresponding value `b'. The operation is performed
4488| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4489*----------------------------------------------------------------------------*/
4490
e5a41ffa 4491float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 4492{
ed086f3d 4493 flag aSign, zSign;
0c48262d 4494 int aExp, bExp, expDiff;
bb98fe42
AF
4495 uint32_t aSig, bSig;
4496 uint32_t q;
4497 uint64_t aSig64, bSig64, q64;
4498 uint32_t alternateASig;
4499 int32_t sigMean;
ff32e16e
PM
4500 a = float32_squash_input_denormal(a, status);
4501 b = float32_squash_input_denormal(b, status);
158142c2
FB
4502
4503 aSig = extractFloat32Frac( a );
4504 aExp = extractFloat32Exp( a );
4505 aSign = extractFloat32Sign( a );
4506 bSig = extractFloat32Frac( b );
4507 bExp = extractFloat32Exp( b );
158142c2
FB
4508 if ( aExp == 0xFF ) {
4509 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 4510 return propagateFloat32NaN(a, b, status);
158142c2 4511 }
ff32e16e 4512 float_raise(float_flag_invalid, status);
af39bc8c 4513 return float32_default_nan(status);
158142c2
FB
4514 }
4515 if ( bExp == 0xFF ) {
ff32e16e
PM
4516 if (bSig) {
4517 return propagateFloat32NaN(a, b, status);
4518 }
158142c2
FB
4519 return a;
4520 }
4521 if ( bExp == 0 ) {
4522 if ( bSig == 0 ) {
ff32e16e 4523 float_raise(float_flag_invalid, status);
af39bc8c 4524 return float32_default_nan(status);
158142c2
FB
4525 }
4526 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
4527 }
4528 if ( aExp == 0 ) {
4529 if ( aSig == 0 ) return a;
4530 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4531 }
4532 expDiff = aExp - bExp;
4533 aSig |= 0x00800000;
4534 bSig |= 0x00800000;
4535 if ( expDiff < 32 ) {
4536 aSig <<= 8;
4537 bSig <<= 8;
4538 if ( expDiff < 0 ) {
4539 if ( expDiff < -1 ) return a;
4540 aSig >>= 1;
4541 }
4542 q = ( bSig <= aSig );
4543 if ( q ) aSig -= bSig;
4544 if ( 0 < expDiff ) {
bb98fe42 4545 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
4546 q >>= 32 - expDiff;
4547 bSig >>= 2;
4548 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4549 }
4550 else {
4551 aSig >>= 2;
4552 bSig >>= 2;
4553 }
4554 }
4555 else {
4556 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
4557 aSig64 = ( (uint64_t) aSig )<<40;
4558 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
4559 expDiff -= 64;
4560 while ( 0 < expDiff ) {
4561 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
4562 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
4563 aSig64 = - ( ( bSig * q64 )<<38 );
4564 expDiff -= 62;
4565 }
4566 expDiff += 64;
4567 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
4568 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
4569 q = q64>>( 64 - expDiff );
4570 bSig <<= 6;
4571 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
4572 }
4573 do {
4574 alternateASig = aSig;
4575 ++q;
4576 aSig -= bSig;
bb98fe42 4577 } while ( 0 <= (int32_t) aSig );
158142c2
FB
4578 sigMean = aSig + alternateASig;
4579 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4580 aSig = alternateASig;
4581 }
bb98fe42 4582 zSign = ( (int32_t) aSig < 0 );
158142c2 4583 if ( zSign ) aSig = - aSig;
ff32e16e 4584 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
4585}
4586
369be8f6 4587
158142c2 4588
8229c991
AJ
4589/*----------------------------------------------------------------------------
4590| Returns the binary exponential of the single-precision floating-point value
4591| `a'. The operation is performed according to the IEC/IEEE Standard for
4592| Binary Floating-Point Arithmetic.
4593|
4594| Uses the following identities:
4595|
4596| 1. -------------------------------------------------------------------------
4597| x x*ln(2)
4598| 2 = e
4599|
4600| 2. -------------------------------------------------------------------------
4601| 2 3 4 5 n
4602| x x x x x x x
4603| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4604| 1! 2! 3! 4! 5! n!
4605*----------------------------------------------------------------------------*/
4606
4607static const float64 float32_exp2_coefficients[15] =
4608{
d5138cf4
PM
4609 const_float64( 0x3ff0000000000000ll ), /* 1 */
4610 const_float64( 0x3fe0000000000000ll ), /* 2 */
4611 const_float64( 0x3fc5555555555555ll ), /* 3 */
4612 const_float64( 0x3fa5555555555555ll ), /* 4 */
4613 const_float64( 0x3f81111111111111ll ), /* 5 */
4614 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
4615 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
4616 const_float64( 0x3efa01a01a01a01all ), /* 8 */
4617 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
4618 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
4619 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
4620 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
4621 const_float64( 0x3de6124613a86d09ll ), /* 13 */
4622 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
4623 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
4624};
4625
e5a41ffa 4626float32 float32_exp2(float32 a, float_status *status)
8229c991
AJ
4627{
4628 flag aSign;
0c48262d 4629 int aExp;
bb98fe42 4630 uint32_t aSig;
8229c991
AJ
4631 float64 r, x, xn;
4632 int i;
ff32e16e 4633 a = float32_squash_input_denormal(a, status);
8229c991
AJ
4634
4635 aSig = extractFloat32Frac( a );
4636 aExp = extractFloat32Exp( a );
4637 aSign = extractFloat32Sign( a );
4638
4639 if ( aExp == 0xFF) {
ff32e16e
PM
4640 if (aSig) {
4641 return propagateFloat32NaN(a, float32_zero, status);
4642 }
8229c991
AJ
4643 return (aSign) ? float32_zero : a;
4644 }
4645 if (aExp == 0) {
4646 if (aSig == 0) return float32_one;
4647 }
4648
ff32e16e 4649 float_raise(float_flag_inexact, status);
8229c991
AJ
4650
4651 /* ******************************* */
4652 /* using float64 for approximation */
4653 /* ******************************* */
ff32e16e
PM
4654 x = float32_to_float64(a, status);
4655 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
4656
4657 xn = x;
4658 r = float64_one;
4659 for (i = 0 ; i < 15 ; i++) {
4660 float64 f;
4661
ff32e16e
PM
4662 f = float64_mul(xn, float32_exp2_coefficients[i], status);
4663 r = float64_add(r, f, status);
8229c991 4664
ff32e16e 4665 xn = float64_mul(xn, x, status);
8229c991
AJ
4666 }
4667
4668 return float64_to_float32(r, status);
4669}
4670
374dfc33
AJ
4671/*----------------------------------------------------------------------------
4672| Returns the binary log of the single-precision floating-point value `a'.
4673| The operation is performed according to the IEC/IEEE Standard for Binary
4674| Floating-Point Arithmetic.
4675*----------------------------------------------------------------------------*/
e5a41ffa 4676float32 float32_log2(float32 a, float_status *status)
374dfc33
AJ
4677{
4678 flag aSign, zSign;
0c48262d 4679 int aExp;
bb98fe42 4680 uint32_t aSig, zSig, i;
374dfc33 4681
ff32e16e 4682 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
4683 aSig = extractFloat32Frac( a );
4684 aExp = extractFloat32Exp( a );
4685 aSign = extractFloat32Sign( a );
4686
4687 if ( aExp == 0 ) {
4688 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
4689 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4690 }
4691 if ( aSign ) {
ff32e16e 4692 float_raise(float_flag_invalid, status);
af39bc8c 4693 return float32_default_nan(status);
374dfc33
AJ
4694 }
4695 if ( aExp == 0xFF ) {
ff32e16e
PM
4696 if (aSig) {
4697 return propagateFloat32NaN(a, float32_zero, status);
4698 }
374dfc33
AJ
4699 return a;
4700 }
4701
4702 aExp -= 0x7F;
4703 aSig |= 0x00800000;
4704 zSign = aExp < 0;
4705 zSig = aExp << 23;
4706
4707 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 4708 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
4709 if ( aSig & 0x01000000 ) {
4710 aSig >>= 1;
4711 zSig |= i;
4712 }
4713 }
4714
4715 if ( zSign )
4716 zSig = -zSig;
4717
ff32e16e 4718 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
4719}
4720
158142c2
FB
4721/*----------------------------------------------------------------------------
4722| Returns 1 if the single-precision floating-point value `a' is equal to
b689362d
AJ
4723| the corresponding value `b', and 0 otherwise. The invalid exception is
4724| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
4725| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4726*----------------------------------------------------------------------------*/
4727
e5a41ffa 4728int float32_eq(float32 a, float32 b, float_status *status)
158142c2 4729{
b689362d 4730 uint32_t av, bv;
ff32e16e
PM
4731 a = float32_squash_input_denormal(a, status);
4732 b = float32_squash_input_denormal(b, status);
158142c2
FB
4733
4734 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4735 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4736 ) {
ff32e16e 4737 float_raise(float_flag_invalid, status);
158142c2
FB
4738 return 0;
4739 }
b689362d
AJ
4740 av = float32_val(a);
4741 bv = float32_val(b);
4742 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4743}
4744
4745/*----------------------------------------------------------------------------
4746| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
4747| or equal to the corresponding value `b', and 0 otherwise. The invalid
4748| exception is raised if either operand is a NaN. The comparison is performed
4749| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4750*----------------------------------------------------------------------------*/
4751
e5a41ffa 4752int float32_le(float32 a, float32 b, float_status *status)
158142c2
FB
4753{
4754 flag aSign, bSign;
bb98fe42 4755 uint32_t av, bv;
ff32e16e
PM
4756 a = float32_squash_input_denormal(a, status);
4757 b = float32_squash_input_denormal(b, status);
158142c2
FB
4758
4759 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4760 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4761 ) {
ff32e16e 4762 float_raise(float_flag_invalid, status);
158142c2
FB
4763 return 0;
4764 }
4765 aSign = extractFloat32Sign( a );
4766 bSign = extractFloat32Sign( b );
f090c9d4
PB
4767 av = float32_val(a);
4768 bv = float32_val(b);
bb98fe42 4769 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4770 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4771
4772}
4773
4774/*----------------------------------------------------------------------------
4775| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
4776| the corresponding value `b', and 0 otherwise. The invalid exception is
4777| raised if either operand is a NaN. The comparison is performed according
4778| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4779*----------------------------------------------------------------------------*/
4780
e5a41ffa 4781int float32_lt(float32 a, float32 b, float_status *status)
158142c2
FB
4782{
4783 flag aSign, bSign;
bb98fe42 4784 uint32_t av, bv;
ff32e16e
PM
4785 a = float32_squash_input_denormal(a, status);
4786 b = float32_squash_input_denormal(b, status);
158142c2
FB
4787
4788 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4789 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4790 ) {
ff32e16e 4791 float_raise(float_flag_invalid, status);
158142c2
FB
4792 return 0;
4793 }
4794 aSign = extractFloat32Sign( a );
4795 bSign = extractFloat32Sign( b );
f090c9d4
PB
4796 av = float32_val(a);
4797 bv = float32_val(b);
bb98fe42 4798 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4799 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4800
4801}
4802
67b7861d
AJ
4803/*----------------------------------------------------------------------------
4804| Returns 1 if the single-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4805| be compared, and 0 otherwise. The invalid exception is raised if either
4806| operand is a NaN. The comparison is performed according to the IEC/IEEE
4807| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4808*----------------------------------------------------------------------------*/
4809
e5a41ffa 4810int float32_unordered(float32 a, float32 b, float_status *status)
67b7861d 4811{
ff32e16e
PM
4812 a = float32_squash_input_denormal(a, status);
4813 b = float32_squash_input_denormal(b, status);
67b7861d
AJ
4814
4815 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4816 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4817 ) {
ff32e16e 4818 float_raise(float_flag_invalid, status);
67b7861d
AJ
4819 return 1;
4820 }
4821 return 0;
4822}
b689362d 4823
158142c2
FB
4824/*----------------------------------------------------------------------------
4825| Returns 1 if the single-precision floating-point value `a' is equal to
f5a64251
AJ
4826| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4827| exception. The comparison is performed according to the IEC/IEEE Standard
4828| for Binary Floating-Point Arithmetic.
158142c2
FB
4829*----------------------------------------------------------------------------*/
4830
e5a41ffa 4831int float32_eq_quiet(float32 a, float32 b, float_status *status)
158142c2 4832{
ff32e16e
PM
4833 a = float32_squash_input_denormal(a, status);
4834 b = float32_squash_input_denormal(b, status);
158142c2
FB
4835
4836 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4837 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4838 ) {
af39bc8c
AM
4839 if (float32_is_signaling_nan(a, status)
4840 || float32_is_signaling_nan(b, status)) {
ff32e16e 4841 float_raise(float_flag_invalid, status);
b689362d 4842 }
158142c2
FB
4843 return 0;
4844 }
b689362d
AJ
4845 return ( float32_val(a) == float32_val(b) ) ||
4846 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
158142c2
FB
4847}
4848
4849/*----------------------------------------------------------------------------
4850| Returns 1 if the single-precision floating-point value `a' is less than or
4851| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4852| cause an exception. Otherwise, the comparison is performed according to the
4853| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4854*----------------------------------------------------------------------------*/
4855
e5a41ffa 4856int float32_le_quiet(float32 a, float32 b, float_status *status)
158142c2
FB
4857{
4858 flag aSign, bSign;
bb98fe42 4859 uint32_t av, bv;
ff32e16e
PM
4860 a = float32_squash_input_denormal(a, status);
4861 b = float32_squash_input_denormal(b, status);
158142c2
FB
4862
4863 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4864 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4865 ) {
af39bc8c
AM
4866 if (float32_is_signaling_nan(a, status)
4867 || float32_is_signaling_nan(b, status)) {
ff32e16e 4868 float_raise(float_flag_invalid, status);
158142c2
FB
4869 }
4870 return 0;
4871 }
4872 aSign = extractFloat32Sign( a );
4873 bSign = extractFloat32Sign( b );
f090c9d4
PB
4874 av = float32_val(a);
4875 bv = float32_val(b);
bb98fe42 4876 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4877 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4878
4879}
4880
4881/*----------------------------------------------------------------------------
4882| Returns 1 if the single-precision floating-point value `a' is less than
4883| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4884| exception. Otherwise, the comparison is performed according to the IEC/IEEE
ab52f973 4885| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4886*----------------------------------------------------------------------------*/
4887
ab52f973 4888int float32_lt_quiet(float32 a, float32 b, float_status *status)
158142c2 4889{
ab52f973
AB
4890 flag aSign, bSign;
4891 uint32_t av, bv;
4892 a = float32_squash_input_denormal(a, status);
4893 b = float32_squash_input_denormal(b, status);
158142c2 4894
ab52f973
AB
4895 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4896 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4897 ) {
4898 if (float32_is_signaling_nan(a, status)
4899 || float32_is_signaling_nan(b, status)) {
ff32e16e 4900 float_raise(float_flag_invalid, status);
158142c2 4901 }
ab52f973 4902 return 0;
158142c2 4903 }
ab52f973
AB
4904 aSign = extractFloat32Sign( a );
4905 bSign = extractFloat32Sign( b );
4906 av = float32_val(a);
4907 bv = float32_val(b);
4908 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
4909 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4910
4911}
4912
4913/*----------------------------------------------------------------------------
ab52f973
AB
4914| Returns 1 if the single-precision floating-point values `a' and `b' cannot
4915| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4916| comparison is performed according to the IEC/IEEE Standard for Binary
4917| Floating-Point Arithmetic.
158142c2
FB
4918*----------------------------------------------------------------------------*/
4919
ab52f973 4920int float32_unordered_quiet(float32 a, float32 b, float_status *status)
158142c2 4921{
ab52f973
AB
4922 a = float32_squash_input_denormal(a, status);
4923 b = float32_squash_input_denormal(b, status);
158142c2 4924
ab52f973
AB
4925 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4926 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4927 ) {
4928 if (float32_is_signaling_nan(a, status)
4929 || float32_is_signaling_nan(b, status)) {
4930 float_raise(float_flag_invalid, status);
158142c2 4931 }
ab52f973 4932 return 1;
158142c2 4933 }
ab52f973 4934 return 0;
158142c2
FB
4935}
4936
210cbd49
AB
4937/*----------------------------------------------------------------------------
4938| If `a' is denormal and we are in flush-to-zero mode then set the
4939| input-denormal exception and return zero. Otherwise just return the value.
4940*----------------------------------------------------------------------------*/
4941float16 float16_squash_input_denormal(float16 a, float_status *status)
4942{
4943 if (status->flush_inputs_to_zero) {
4944 if (extractFloat16Exp(a) == 0 && extractFloat16Frac(a) != 0) {
4945 float_raise(float_flag_input_denormal, status);
4946 return make_float16(float16_val(a) & 0x8000);
4947 }
4948 }
4949 return a;
4950}
4951
158142c2
FB
4952/*----------------------------------------------------------------------------
4953| Returns the result of converting the double-precision floating-point value
4954| `a' to the extended double-precision floating-point format. The conversion
4955| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4956| Arithmetic.
4957*----------------------------------------------------------------------------*/
4958
e5a41ffa 4959floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2
FB
4960{
4961 flag aSign;
0c48262d 4962 int aExp;
bb98fe42 4963 uint64_t aSig;
158142c2 4964
ff32e16e 4965 a = float64_squash_input_denormal(a, status);
158142c2
FB
4966 aSig = extractFloat64Frac( a );
4967 aExp = extractFloat64Exp( a );
4968 aSign = extractFloat64Sign( a );
4969 if ( aExp == 0x7FF ) {
ff32e16e
PM
4970 if (aSig) {
4971 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
4972 }
0f605c88
LV
4973 return packFloatx80(aSign,
4974 floatx80_infinity_high,
4975 floatx80_infinity_low);
158142c2
FB
4976 }
4977 if ( aExp == 0 ) {
4978 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4979 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4980 }
4981 return
4982 packFloatx80(
4983 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
4984
4985}
4986
158142c2
FB
4987/*----------------------------------------------------------------------------
4988| Returns the result of converting the double-precision floating-point value
4989| `a' to the quadruple-precision floating-point format. The conversion is
4990| performed according to the IEC/IEEE Standard for Binary Floating-Point
4991| Arithmetic.
4992*----------------------------------------------------------------------------*/
4993
e5a41ffa 4994float128 float64_to_float128(float64 a, float_status *status)
158142c2
FB
4995{
4996 flag aSign;
0c48262d 4997 int aExp;
bb98fe42 4998 uint64_t aSig, zSig0, zSig1;
158142c2 4999
ff32e16e 5000 a = float64_squash_input_denormal(a, status);
158142c2
FB
5001 aSig = extractFloat64Frac( a );
5002 aExp = extractFloat64Exp( a );
5003 aSign = extractFloat64Sign( a );
5004 if ( aExp == 0x7FF ) {
ff32e16e
PM
5005 if (aSig) {
5006 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
5007 }
158142c2
FB
5008 return packFloat128( aSign, 0x7FFF, 0, 0 );
5009 }
5010 if ( aExp == 0 ) {
5011 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5012 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5013 --aExp;
5014 }
5015 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
5016 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
5017
5018}
5019
158142c2
FB
5020
5021/*----------------------------------------------------------------------------
5022| Returns the remainder of the double-precision floating-point value `a'
5023| with respect to the corresponding value `b'. The operation is performed
5024| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5025*----------------------------------------------------------------------------*/
5026
e5a41ffa 5027float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 5028{
ed086f3d 5029 flag aSign, zSign;
0c48262d 5030 int aExp, bExp, expDiff;
bb98fe42
AF
5031 uint64_t aSig, bSig;
5032 uint64_t q, alternateASig;
5033 int64_t sigMean;
158142c2 5034
ff32e16e
PM
5035 a = float64_squash_input_denormal(a, status);
5036 b = float64_squash_input_denormal(b, status);
158142c2
FB
5037 aSig = extractFloat64Frac( a );
5038 aExp = extractFloat64Exp( a );
5039 aSign = extractFloat64Sign( a );
5040 bSig = extractFloat64Frac( b );
5041 bExp = extractFloat64Exp( b );
158142c2
FB
5042 if ( aExp == 0x7FF ) {
5043 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 5044 return propagateFloat64NaN(a, b, status);
158142c2 5045 }
ff32e16e 5046 float_raise(float_flag_invalid, status);
af39bc8c 5047 return float64_default_nan(status);
158142c2
FB
5048 }
5049 if ( bExp == 0x7FF ) {
ff32e16e
PM
5050 if (bSig) {
5051 return propagateFloat64NaN(a, b, status);
5052 }
158142c2
FB
5053 return a;
5054 }
5055 if ( bExp == 0 ) {
5056 if ( bSig == 0 ) {
ff32e16e 5057 float_raise(float_flag_invalid, status);
af39bc8c 5058 return float64_default_nan(status);
158142c2
FB
5059 }
5060 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5061 }
5062 if ( aExp == 0 ) {
5063 if ( aSig == 0 ) return a;
5064 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5065 }
5066 expDiff = aExp - bExp;
5067 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
5068 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
5069 if ( expDiff < 0 ) {
5070 if ( expDiff < -1 ) return a;
5071 aSig >>= 1;
5072 }
5073 q = ( bSig <= aSig );
5074 if ( q ) aSig -= bSig;
5075 expDiff -= 64;
5076 while ( 0 < expDiff ) {
5077 q = estimateDiv128To64( aSig, 0, bSig );
5078 q = ( 2 < q ) ? q - 2 : 0;
5079 aSig = - ( ( bSig>>2 ) * q );
5080 expDiff -= 62;
5081 }
5082 expDiff += 64;
5083 if ( 0 < expDiff ) {
5084 q = estimateDiv128To64( aSig, 0, bSig );
5085 q = ( 2 < q ) ? q - 2 : 0;
5086 q >>= 64 - expDiff;
5087 bSig >>= 2;
5088 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5089 }
5090 else {
5091 aSig >>= 2;
5092 bSig >>= 2;
5093 }
5094 do {
5095 alternateASig = aSig;
5096 ++q;
5097 aSig -= bSig;
bb98fe42 5098 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5099 sigMean = aSig + alternateASig;
5100 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5101 aSig = alternateASig;
5102 }
bb98fe42 5103 zSign = ( (int64_t) aSig < 0 );
158142c2 5104 if ( zSign ) aSig = - aSig;
ff32e16e 5105 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5106
5107}
5108
374dfc33
AJ
5109/*----------------------------------------------------------------------------
5110| Returns the binary log of the double-precision floating-point value `a'.
5111| The operation is performed according to the IEC/IEEE Standard for Binary
5112| Floating-Point Arithmetic.
5113*----------------------------------------------------------------------------*/
e5a41ffa 5114float64 float64_log2(float64 a, float_status *status)
374dfc33
AJ
5115{
5116 flag aSign, zSign;
0c48262d 5117 int aExp;
bb98fe42 5118 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5119 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5120
5121 aSig = extractFloat64Frac( a );
5122 aExp = extractFloat64Exp( a );
5123 aSign = extractFloat64Sign( a );
5124
5125 if ( aExp == 0 ) {
5126 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5127 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5128 }
5129 if ( aSign ) {
ff32e16e 5130 float_raise(float_flag_invalid, status);
af39bc8c 5131 return float64_default_nan(status);
374dfc33
AJ
5132 }
5133 if ( aExp == 0x7FF ) {
ff32e16e
PM
5134 if (aSig) {
5135 return propagateFloat64NaN(a, float64_zero, status);
5136 }
374dfc33
AJ
5137 return a;
5138 }
5139
5140 aExp -= 0x3FF;
5141 aSig |= LIT64( 0x0010000000000000 );
5142 zSign = aExp < 0;
bb98fe42 5143 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5144 for (i = 1LL << 51; i > 0; i >>= 1) {
5145 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5146 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5147 if ( aSig & LIT64( 0x0020000000000000 ) ) {
5148 aSig >>= 1;
5149 zSig |= i;
5150 }
5151 }
5152
5153 if ( zSign )
5154 zSig = -zSig;
ff32e16e 5155 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5156}
5157
158142c2
FB
5158/*----------------------------------------------------------------------------
5159| Returns 1 if the double-precision floating-point value `a' is equal to the
b689362d
AJ
5160| corresponding value `b', and 0 otherwise. The invalid exception is raised
5161| if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
5162| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5163*----------------------------------------------------------------------------*/
5164
e5a41ffa 5165int float64_eq(float64 a, float64 b, float_status *status)
158142c2 5166{
bb98fe42 5167 uint64_t av, bv;
ff32e16e
PM
5168 a = float64_squash_input_denormal(a, status);
5169 b = float64_squash_input_denormal(b, status);
158142c2
FB
5170
5171 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5172 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5173 ) {
ff32e16e 5174 float_raise(float_flag_invalid, status);
158142c2
FB
5175 return 0;
5176 }
f090c9d4 5177 av = float64_val(a);
a1b91bb4 5178 bv = float64_val(b);
bb98fe42 5179 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
5180
5181}
5182
5183/*----------------------------------------------------------------------------
5184| Returns 1 if the double-precision floating-point value `a' is less than or
f5a64251
AJ
5185| equal to the corresponding value `b', and 0 otherwise. The invalid
5186| exception is raised if either operand is a NaN. The comparison is performed
5187| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5188*----------------------------------------------------------------------------*/
5189
e5a41ffa 5190int float64_le(float64 a, float64 b, float_status *status)
158142c2
FB
5191{
5192 flag aSign, bSign;
bb98fe42 5193 uint64_t av, bv;
ff32e16e
PM
5194 a = float64_squash_input_denormal(a, status);
5195 b = float64_squash_input_denormal(b, status);
158142c2
FB
5196
5197 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5198 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5199 ) {
ff32e16e 5200 float_raise(float_flag_invalid, status);
158142c2
FB
5201 return 0;
5202 }
5203 aSign = extractFloat64Sign( a );
5204 bSign = extractFloat64Sign( b );
f090c9d4 5205 av = float64_val(a);
a1b91bb4 5206 bv = float64_val(b);
bb98fe42 5207 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 5208 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
5209
5210}
5211
5212/*----------------------------------------------------------------------------
5213| Returns 1 if the double-precision floating-point value `a' is less than
f5a64251
AJ
5214| the corresponding value `b', and 0 otherwise. The invalid exception is
5215| raised if either operand is a NaN. The comparison is performed according
5216| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5217*----------------------------------------------------------------------------*/
5218
e5a41ffa 5219int float64_lt(float64 a, float64 b, float_status *status)
158142c2
FB
5220{
5221 flag aSign, bSign;
bb98fe42 5222 uint64_t av, bv;
158142c2 5223
ff32e16e
PM
5224 a = float64_squash_input_denormal(a, status);
5225 b = float64_squash_input_denormal(b, status);
158142c2
FB
5226 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5227 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5228 ) {
ff32e16e 5229 float_raise(float_flag_invalid, status);
158142c2
FB
5230 return 0;
5231 }
5232 aSign = extractFloat64Sign( a );
5233 bSign = extractFloat64Sign( b );
f090c9d4 5234 av = float64_val(a);
a1b91bb4 5235 bv = float64_val(b);
bb98fe42 5236 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 5237 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
5238
5239}
5240
67b7861d
AJ
5241/*----------------------------------------------------------------------------
5242| Returns 1 if the double-precision floating-point values `a' and `b' cannot
f5a64251
AJ
5243| be compared, and 0 otherwise. The invalid exception is raised if either
5244| operand is a NaN. The comparison is performed according to the IEC/IEEE
5245| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
5246*----------------------------------------------------------------------------*/
5247
e5a41ffa 5248int float64_unordered(float64 a, float64 b, float_status *status)
67b7861d 5249{
ff32e16e
PM
5250 a = float64_squash_input_denormal(a, status);
5251 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
5252
5253 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5254 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5255 ) {
ff32e16e 5256 float_raise(float_flag_invalid, status);
67b7861d
AJ
5257 return 1;
5258 }
5259 return 0;
5260}
5261
158142c2
FB
5262/*----------------------------------------------------------------------------
5263| Returns 1 if the double-precision floating-point value `a' is equal to the
f5a64251
AJ
5264| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
5265| exception.The comparison is performed according to the IEC/IEEE Standard
5266| for Binary Floating-Point Arithmetic.
158142c2
FB
5267*----------------------------------------------------------------------------*/
5268
e5a41ffa 5269int float64_eq_quiet(float64 a, float64 b, float_status *status)
158142c2 5270{
bb98fe42 5271 uint64_t av, bv;
ff32e16e
PM
5272 a = float64_squash_input_denormal(a, status);
5273 b = float64_squash_input_denormal(b, status);
158142c2
FB
5274
5275 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5276 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5277 ) {
af39bc8c
AM
5278 if (float64_is_signaling_nan(a, status)
5279 || float64_is_signaling_nan(b, status)) {
ff32e16e 5280 float_raise(float_flag_invalid, status);
b689362d 5281 }
158142c2
FB
5282 return 0;
5283 }
f090c9d4 5284 av = float64_val(a);
a1b91bb4 5285 bv = float64_val(b);
bb98fe42 5286 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
5287
5288}
5289
5290/*----------------------------------------------------------------------------
5291| Returns 1 if the double-precision floating-point value `a' is less than or
5292| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5293| cause an exception. Otherwise, the comparison is performed according to the
5294| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5295*----------------------------------------------------------------------------*/
5296
e5a41ffa 5297int float64_le_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
5298{
5299 flag aSign, bSign;
bb98fe42 5300 uint64_t av, bv;
ff32e16e
PM
5301 a = float64_squash_input_denormal(a, status);
5302 b = float64_squash_input_denormal(b, status);
158142c2
FB
5303
5304 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5305 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5306 ) {
af39bc8c
AM
5307 if (float64_is_signaling_nan(a, status)
5308 || float64_is_signaling_nan(b, status)) {
ff32e16e 5309 float_raise(float_flag_invalid, status);
158142c2
FB
5310 }
5311 return 0;
5312 }
5313 aSign = extractFloat64Sign( a );
5314 bSign = extractFloat64Sign( b );
f090c9d4 5315 av = float64_val(a);
a1b91bb4 5316 bv = float64_val(b);
bb98fe42 5317 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 5318 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
5319
5320}
5321
5322/*----------------------------------------------------------------------------
5323| Returns 1 if the double-precision floating-point value `a' is less than
5324| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
5325| exception. Otherwise, the comparison is performed according to the IEC/IEEE
5326| Standard for Binary Floating-Point Arithmetic.
5327*----------------------------------------------------------------------------*/
5328
e5a41ffa 5329int float64_lt_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
5330{
5331 flag aSign, bSign;
bb98fe42 5332 uint64_t av, bv;
ff32e16e
PM
5333 a = float64_squash_input_denormal(a, status);
5334 b = float64_squash_input_denormal(b, status);
158142c2
FB
5335
5336 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5337 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5338 ) {
af39bc8c
AM
5339 if (float64_is_signaling_nan(a, status)
5340 || float64_is_signaling_nan(b, status)) {
ff32e16e 5341 float_raise(float_flag_invalid, status);
158142c2
FB
5342 }
5343 return 0;
5344 }
5345 aSign = extractFloat64Sign( a );
5346 bSign = extractFloat64Sign( b );
f090c9d4 5347 av = float64_val(a);
a1b91bb4 5348 bv = float64_val(b);
bb98fe42 5349 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 5350 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
5351
5352}
5353
67b7861d
AJ
5354/*----------------------------------------------------------------------------
5355| Returns 1 if the double-precision floating-point values `a' and `b' cannot
5356| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
5357| comparison is performed according to the IEC/IEEE Standard for Binary
5358| Floating-Point Arithmetic.
5359*----------------------------------------------------------------------------*/
5360
e5a41ffa 5361int float64_unordered_quiet(float64 a, float64 b, float_status *status)
67b7861d 5362{
ff32e16e
PM
5363 a = float64_squash_input_denormal(a, status);
5364 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
5365
5366 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5367 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5368 ) {
af39bc8c
AM
5369 if (float64_is_signaling_nan(a, status)
5370 || float64_is_signaling_nan(b, status)) {
ff32e16e 5371 float_raise(float_flag_invalid, status);
67b7861d
AJ
5372 }
5373 return 1;
5374 }
5375 return 0;
5376}
5377
158142c2
FB
5378/*----------------------------------------------------------------------------
5379| Returns the result of converting the extended double-precision floating-
5380| point value `a' to the 32-bit two's complement integer format. The
5381| conversion is performed according to the IEC/IEEE Standard for Binary
5382| Floating-Point Arithmetic---which means in particular that the conversion
5383| is rounded according to the current rounding mode. If `a' is a NaN, the
5384| largest positive integer is returned. Otherwise, if the conversion
5385| overflows, the largest integer with the same sign as `a' is returned.
5386*----------------------------------------------------------------------------*/
5387
f4014512 5388int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2
FB
5389{
5390 flag aSign;
f4014512 5391 int32_t aExp, shiftCount;
bb98fe42 5392 uint64_t aSig;
158142c2 5393
d1eb8f2a
AD
5394 if (floatx80_invalid_encoding(a)) {
5395 float_raise(float_flag_invalid, status);
5396 return 1 << 31;
5397 }
158142c2
FB
5398 aSig = extractFloatx80Frac( a );
5399 aExp = extractFloatx80Exp( a );
5400 aSign = extractFloatx80Sign( a );
bb98fe42 5401 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5402 shiftCount = 0x4037 - aExp;
5403 if ( shiftCount <= 0 ) shiftCount = 1;
5404 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5405 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5406
5407}
5408
5409/*----------------------------------------------------------------------------
5410| Returns the result of converting the extended double-precision floating-
5411| point value `a' to the 32-bit two's complement integer format. The
5412| conversion is performed according to the IEC/IEEE Standard for Binary
5413| Floating-Point Arithmetic, except that the conversion is always rounded
5414| toward zero. If `a' is a NaN, the largest positive integer is returned.
5415| Otherwise, if the conversion overflows, the largest integer with the same
5416| sign as `a' is returned.
5417*----------------------------------------------------------------------------*/
5418
f4014512 5419int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
5420{
5421 flag aSign;
f4014512 5422 int32_t aExp, shiftCount;
bb98fe42 5423 uint64_t aSig, savedASig;
b3a6a2e0 5424 int32_t z;
158142c2 5425
d1eb8f2a
AD
5426 if (floatx80_invalid_encoding(a)) {
5427 float_raise(float_flag_invalid, status);
5428 return 1 << 31;
5429 }
158142c2
FB
5430 aSig = extractFloatx80Frac( a );
5431 aExp = extractFloatx80Exp( a );
5432 aSign = extractFloatx80Sign( a );
5433 if ( 0x401E < aExp ) {
bb98fe42 5434 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5435 goto invalid;
5436 }
5437 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5438 if (aExp || aSig) {
5439 status->float_exception_flags |= float_flag_inexact;
5440 }
158142c2
FB
5441 return 0;
5442 }
5443 shiftCount = 0x403E - aExp;
5444 savedASig = aSig;
5445 aSig >>= shiftCount;
5446 z = aSig;
5447 if ( aSign ) z = - z;
5448 if ( ( z < 0 ) ^ aSign ) {
5449 invalid:
ff32e16e 5450 float_raise(float_flag_invalid, status);
bb98fe42 5451 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5452 }
5453 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 5454 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5455 }
5456 return z;
5457
5458}
5459
5460/*----------------------------------------------------------------------------
5461| Returns the result of converting the extended double-precision floating-
5462| point value `a' to the 64-bit two's complement integer format. The
5463| conversion is performed according to the IEC/IEEE Standard for Binary
5464| Floating-Point Arithmetic---which means in particular that the conversion
5465| is rounded according to the current rounding mode. If `a' is a NaN,
5466| the largest positive integer is returned. Otherwise, if the conversion
5467| overflows, the largest integer with the same sign as `a' is returned.
5468*----------------------------------------------------------------------------*/
5469
f42c2224 5470int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2
FB
5471{
5472 flag aSign;
f4014512 5473 int32_t aExp, shiftCount;
bb98fe42 5474 uint64_t aSig, aSigExtra;
158142c2 5475
d1eb8f2a
AD
5476 if (floatx80_invalid_encoding(a)) {
5477 float_raise(float_flag_invalid, status);
5478 return 1ULL << 63;
5479 }
158142c2
FB
5480 aSig = extractFloatx80Frac( a );
5481 aExp = extractFloatx80Exp( a );
5482 aSign = extractFloatx80Sign( a );
5483 shiftCount = 0x403E - aExp;
5484 if ( shiftCount <= 0 ) {
5485 if ( shiftCount ) {
ff32e16e 5486 float_raise(float_flag_invalid, status);
0f605c88 5487 if (!aSign || floatx80_is_any_nan(a)) {
158142c2
FB
5488 return LIT64( 0x7FFFFFFFFFFFFFFF );
5489 }
bb98fe42 5490 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5491 }
5492 aSigExtra = 0;
5493 }
5494 else {
5495 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5496 }
ff32e16e 5497 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5498
5499}
5500
5501/*----------------------------------------------------------------------------
5502| Returns the result of converting the extended double-precision floating-
5503| point value `a' to the 64-bit two's complement integer format. The
5504| conversion is performed according to the IEC/IEEE Standard for Binary
5505| Floating-Point Arithmetic, except that the conversion is always rounded
5506| toward zero. If `a' is a NaN, the largest positive integer is returned.
5507| Otherwise, if the conversion overflows, the largest integer with the same
5508| sign as `a' is returned.
5509*----------------------------------------------------------------------------*/
5510
f42c2224 5511int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
5512{
5513 flag aSign;
f4014512 5514 int32_t aExp, shiftCount;
bb98fe42 5515 uint64_t aSig;
f42c2224 5516 int64_t z;
158142c2 5517
d1eb8f2a
AD
5518 if (floatx80_invalid_encoding(a)) {
5519 float_raise(float_flag_invalid, status);
5520 return 1ULL << 63;
5521 }
158142c2
FB
5522 aSig = extractFloatx80Frac( a );
5523 aExp = extractFloatx80Exp( a );
5524 aSign = extractFloatx80Sign( a );
5525 shiftCount = aExp - 0x403E;
5526 if ( 0 <= shiftCount ) {
5527 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
5528 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5529 float_raise(float_flag_invalid, status);
158142c2
FB
5530 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5531 return LIT64( 0x7FFFFFFFFFFFFFFF );
5532 }
5533 }
bb98fe42 5534 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5535 }
5536 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5537 if (aExp | aSig) {
5538 status->float_exception_flags |= float_flag_inexact;
5539 }
158142c2
FB
5540 return 0;
5541 }
5542 z = aSig>>( - shiftCount );
bb98fe42 5543 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
a2f2d288 5544 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5545 }
5546 if ( aSign ) z = - z;
5547 return z;
5548
5549}
5550
5551/*----------------------------------------------------------------------------
5552| Returns the result of converting the extended double-precision floating-
5553| point value `a' to the single-precision floating-point format. The
5554| conversion is performed according to the IEC/IEEE Standard for Binary
5555| Floating-Point Arithmetic.
5556*----------------------------------------------------------------------------*/
5557
e5a41ffa 5558float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2
FB
5559{
5560 flag aSign;
f4014512 5561 int32_t aExp;
bb98fe42 5562 uint64_t aSig;
158142c2 5563
d1eb8f2a
AD
5564 if (floatx80_invalid_encoding(a)) {
5565 float_raise(float_flag_invalid, status);
5566 return float32_default_nan(status);
5567 }
158142c2
FB
5568 aSig = extractFloatx80Frac( a );
5569 aExp = extractFloatx80Exp( a );
5570 aSign = extractFloatx80Sign( a );
5571 if ( aExp == 0x7FFF ) {
bb98fe42 5572 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 5573 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
158142c2
FB
5574 }
5575 return packFloat32( aSign, 0xFF, 0 );
5576 }
5577 shift64RightJamming( aSig, 33, &aSig );
5578 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5579 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5580
5581}
5582
5583/*----------------------------------------------------------------------------
5584| Returns the result of converting the extended double-precision floating-
5585| point value `a' to the double-precision floating-point format. The
5586| conversion is performed according to the IEC/IEEE Standard for Binary
5587| Floating-Point Arithmetic.
5588*----------------------------------------------------------------------------*/
5589
e5a41ffa 5590float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2
FB
5591{
5592 flag aSign;
f4014512 5593 int32_t aExp;
bb98fe42 5594 uint64_t aSig, zSig;
158142c2 5595
d1eb8f2a
AD
5596 if (floatx80_invalid_encoding(a)) {
5597 float_raise(float_flag_invalid, status);
5598 return float64_default_nan(status);
5599 }
158142c2
FB
5600 aSig = extractFloatx80Frac( a );
5601 aExp = extractFloatx80Exp( a );
5602 aSign = extractFloatx80Sign( a );
5603 if ( aExp == 0x7FFF ) {
bb98fe42 5604 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 5605 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
158142c2
FB
5606 }
5607 return packFloat64( aSign, 0x7FF, 0 );
5608 }
5609 shift64RightJamming( aSig, 1, &zSig );
5610 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5611 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5612
5613}
5614
158142c2
FB
5615/*----------------------------------------------------------------------------
5616| Returns the result of converting the extended double-precision floating-
5617| point value `a' to the quadruple-precision floating-point format. The
5618| conversion is performed according to the IEC/IEEE Standard for Binary
5619| Floating-Point Arithmetic.
5620*----------------------------------------------------------------------------*/
5621
e5a41ffa 5622float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2
FB
5623{
5624 flag aSign;
0c48262d 5625 int aExp;
bb98fe42 5626 uint64_t aSig, zSig0, zSig1;
158142c2 5627
d1eb8f2a
AD
5628 if (floatx80_invalid_encoding(a)) {
5629 float_raise(float_flag_invalid, status);
5630 return float128_default_nan(status);
5631 }
158142c2
FB
5632 aSig = extractFloatx80Frac( a );
5633 aExp = extractFloatx80Exp( a );
5634 aSign = extractFloatx80Sign( a );
bb98fe42 5635 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
ff32e16e 5636 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
158142c2
FB
5637 }
5638 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5639 return packFloat128( aSign, aExp, zSig0, zSig1 );
5640
5641}
5642
0f721292
LV
5643/*----------------------------------------------------------------------------
5644| Rounds the extended double-precision floating-point value `a'
5645| to the precision provided by floatx80_rounding_precision and returns the
5646| result as an extended double-precision floating-point value.
5647| The operation is performed according to the IEC/IEEE Standard for Binary
5648| Floating-Point Arithmetic.
5649*----------------------------------------------------------------------------*/
5650
5651floatx80 floatx80_round(floatx80 a, float_status *status)
5652{
5653 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5654 extractFloatx80Sign(a),
5655 extractFloatx80Exp(a),
5656 extractFloatx80Frac(a), 0, status);
5657}
5658
158142c2
FB
5659/*----------------------------------------------------------------------------
5660| Rounds the extended double-precision floating-point value `a' to an integer,
5661| and returns the result as an extended quadruple-precision floating-point
5662| value. The operation is performed according to the IEC/IEEE Standard for
5663| Binary Floating-Point Arithmetic.
5664*----------------------------------------------------------------------------*/
5665
e5a41ffa 5666floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2
FB
5667{
5668 flag aSign;
f4014512 5669 int32_t aExp;
bb98fe42 5670 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5671 floatx80 z;
5672
d1eb8f2a
AD
5673 if (floatx80_invalid_encoding(a)) {
5674 float_raise(float_flag_invalid, status);
5675 return floatx80_default_nan(status);
5676 }
158142c2
FB
5677 aExp = extractFloatx80Exp( a );
5678 if ( 0x403E <= aExp ) {
bb98fe42 5679 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5680 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5681 }
5682 return a;
5683 }
5684 if ( aExp < 0x3FFF ) {
5685 if ( ( aExp == 0 )
bb98fe42 5686 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
158142c2
FB
5687 return a;
5688 }
a2f2d288 5689 status->float_exception_flags |= float_flag_inexact;
158142c2 5690 aSign = extractFloatx80Sign( a );
a2f2d288 5691 switch (status->float_rounding_mode) {
158142c2 5692 case float_round_nearest_even:
bb98fe42 5693 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5694 ) {
5695 return
5696 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
5697 }
5698 break;
f9288a76
PM
5699 case float_round_ties_away:
5700 if (aExp == 0x3FFE) {
5701 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
5702 }
5703 break;
158142c2
FB
5704 case float_round_down:
5705 return
5706 aSign ?
5707 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
5708 : packFloatx80( 0, 0, 0 );
5709 case float_round_up:
5710 return
5711 aSign ? packFloatx80( 1, 0, 0 )
5712 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
5713 }
5714 return packFloatx80( aSign, 0, 0 );
5715 }
5716 lastBitMask = 1;
5717 lastBitMask <<= 0x403E - aExp;
5718 roundBitsMask = lastBitMask - 1;
5719 z = a;
a2f2d288 5720 switch (status->float_rounding_mode) {
dc355b76 5721 case float_round_nearest_even:
158142c2 5722 z.low += lastBitMask>>1;
dc355b76
PM
5723 if ((z.low & roundBitsMask) == 0) {
5724 z.low &= ~lastBitMask;
5725 }
5726 break;
f9288a76
PM
5727 case float_round_ties_away:
5728 z.low += lastBitMask >> 1;
5729 break;
dc355b76
PM
5730 case float_round_to_zero:
5731 break;
5732 case float_round_up:
5733 if (!extractFloatx80Sign(z)) {
5734 z.low += roundBitsMask;
5735 }
5736 break;
5737 case float_round_down:
5738 if (extractFloatx80Sign(z)) {
158142c2
FB
5739 z.low += roundBitsMask;
5740 }
dc355b76
PM
5741 break;
5742 default:
5743 abort();
158142c2
FB
5744 }
5745 z.low &= ~ roundBitsMask;
5746 if ( z.low == 0 ) {
5747 ++z.high;
5748 z.low = LIT64( 0x8000000000000000 );
5749 }
a2f2d288
PM
5750 if (z.low != a.low) {
5751 status->float_exception_flags |= float_flag_inexact;
5752 }
158142c2
FB
5753 return z;
5754
5755}
5756
5757/*----------------------------------------------------------------------------
5758| Returns the result of adding the absolute values of the extended double-
5759| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5760| negated before being returned. `zSign' is ignored if the result is a NaN.
5761| The addition is performed according to the IEC/IEEE Standard for Binary
5762| Floating-Point Arithmetic.
5763*----------------------------------------------------------------------------*/
5764
e5a41ffa
PM
5765static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5766 float_status *status)
158142c2 5767{
f4014512 5768 int32_t aExp, bExp, zExp;
bb98fe42 5769 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5770 int32_t expDiff;
158142c2
FB
5771
5772 aSig = extractFloatx80Frac( a );
5773 aExp = extractFloatx80Exp( a );
5774 bSig = extractFloatx80Frac( b );
5775 bExp = extractFloatx80Exp( b );
5776 expDiff = aExp - bExp;
5777 if ( 0 < expDiff ) {
5778 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5779 if ((uint64_t)(aSig << 1)) {
5780 return propagateFloatx80NaN(a, b, status);
5781 }
158142c2
FB
5782 return a;
5783 }
5784 if ( bExp == 0 ) --expDiff;
5785 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5786 zExp = aExp;
5787 }
5788 else if ( expDiff < 0 ) {
5789 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5790 if ((uint64_t)(bSig << 1)) {
5791 return propagateFloatx80NaN(a, b, status);
5792 }
0f605c88
LV
5793 return packFloatx80(zSign,
5794 floatx80_infinity_high,
5795 floatx80_infinity_low);
158142c2
FB
5796 }
5797 if ( aExp == 0 ) ++expDiff;
5798 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5799 zExp = bExp;
5800 }
5801 else {
5802 if ( aExp == 0x7FFF ) {
bb98fe42 5803 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5804 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5805 }
5806 return a;
5807 }
5808 zSig1 = 0;
5809 zSig0 = aSig + bSig;
5810 if ( aExp == 0 ) {
5811 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5812 goto roundAndPack;
5813 }
5814 zExp = aExp;
5815 goto shiftRight1;
5816 }
5817 zSig0 = aSig + bSig;
bb98fe42 5818 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
5819 shiftRight1:
5820 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5821 zSig0 |= LIT64( 0x8000000000000000 );
5822 ++zExp;
5823 roundAndPack:
a2f2d288 5824 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5825 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5826}
5827
5828/*----------------------------------------------------------------------------
5829| Returns the result of subtracting the absolute values of the extended
5830| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5831| difference is negated before being returned. `zSign' is ignored if the
5832| result is a NaN. The subtraction is performed according to the IEC/IEEE
5833| Standard for Binary Floating-Point Arithmetic.
5834*----------------------------------------------------------------------------*/
5835
e5a41ffa
PM
5836static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5837 float_status *status)
158142c2 5838{
f4014512 5839 int32_t aExp, bExp, zExp;
bb98fe42 5840 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5841 int32_t expDiff;
158142c2
FB
5842
5843 aSig = extractFloatx80Frac( a );
5844 aExp = extractFloatx80Exp( a );
5845 bSig = extractFloatx80Frac( b );
5846 bExp = extractFloatx80Exp( b );
5847 expDiff = aExp - bExp;
5848 if ( 0 < expDiff ) goto aExpBigger;
5849 if ( expDiff < 0 ) goto bExpBigger;
5850 if ( aExp == 0x7FFF ) {
bb98fe42 5851 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5852 return propagateFloatx80NaN(a, b, status);
158142c2 5853 }
ff32e16e 5854 float_raise(float_flag_invalid, status);
af39bc8c 5855 return floatx80_default_nan(status);
158142c2
FB
5856 }
5857 if ( aExp == 0 ) {
5858 aExp = 1;
5859 bExp = 1;
5860 }
5861 zSig1 = 0;
5862 if ( bSig < aSig ) goto aBigger;
5863 if ( aSig < bSig ) goto bBigger;
a2f2d288 5864 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
5865 bExpBigger:
5866 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5867 if ((uint64_t)(bSig << 1)) {
5868 return propagateFloatx80NaN(a, b, status);
5869 }
0f605c88
LV
5870 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5871 floatx80_infinity_low);
158142c2
FB
5872 }
5873 if ( aExp == 0 ) ++expDiff;
5874 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5875 bBigger:
5876 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5877 zExp = bExp;
5878 zSign ^= 1;
5879 goto normalizeRoundAndPack;
5880 aExpBigger:
5881 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5882 if ((uint64_t)(aSig << 1)) {
5883 return propagateFloatx80NaN(a, b, status);
5884 }
158142c2
FB
5885 return a;
5886 }
5887 if ( bExp == 0 ) --expDiff;
5888 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5889 aBigger:
5890 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5891 zExp = aExp;
5892 normalizeRoundAndPack:
a2f2d288 5893 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5894 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5895}
5896
5897/*----------------------------------------------------------------------------
5898| Returns the result of adding the extended double-precision floating-point
5899| values `a' and `b'. The operation is performed according to the IEC/IEEE
5900| Standard for Binary Floating-Point Arithmetic.
5901*----------------------------------------------------------------------------*/
5902
e5a41ffa 5903floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5904{
5905 flag aSign, bSign;
5906
d1eb8f2a
AD
5907 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5908 float_raise(float_flag_invalid, status);
5909 return floatx80_default_nan(status);
5910 }
158142c2
FB
5911 aSign = extractFloatx80Sign( a );
5912 bSign = extractFloatx80Sign( b );
5913 if ( aSign == bSign ) {
ff32e16e 5914 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5915 }
5916 else {
ff32e16e 5917 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5918 }
5919
5920}
5921
5922/*----------------------------------------------------------------------------
5923| Returns the result of subtracting the extended double-precision floating-
5924| point values `a' and `b'. The operation is performed according to the
5925| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5926*----------------------------------------------------------------------------*/
5927
e5a41ffa 5928floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5929{
5930 flag aSign, bSign;
5931
d1eb8f2a
AD
5932 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5933 float_raise(float_flag_invalid, status);
5934 return floatx80_default_nan(status);
5935 }
158142c2
FB
5936 aSign = extractFloatx80Sign( a );
5937 bSign = extractFloatx80Sign( b );
5938 if ( aSign == bSign ) {
ff32e16e 5939 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5940 }
5941 else {
ff32e16e 5942 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5943 }
5944
5945}
5946
5947/*----------------------------------------------------------------------------
5948| Returns the result of multiplying the extended double-precision floating-
5949| point values `a' and `b'. The operation is performed according to the
5950| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5951*----------------------------------------------------------------------------*/
5952
e5a41ffa 5953floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5954{
5955 flag aSign, bSign, zSign;
f4014512 5956 int32_t aExp, bExp, zExp;
bb98fe42 5957 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 5958
d1eb8f2a
AD
5959 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5960 float_raise(float_flag_invalid, status);
5961 return floatx80_default_nan(status);
5962 }
158142c2
FB
5963 aSig = extractFloatx80Frac( a );
5964 aExp = extractFloatx80Exp( a );
5965 aSign = extractFloatx80Sign( a );
5966 bSig = extractFloatx80Frac( b );
5967 bExp = extractFloatx80Exp( b );
5968 bSign = extractFloatx80Sign( b );
5969 zSign = aSign ^ bSign;
5970 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5971 if ( (uint64_t) ( aSig<<1 )
5972 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5973 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5974 }
5975 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
5976 return packFloatx80(zSign, floatx80_infinity_high,
5977 floatx80_infinity_low);
158142c2
FB
5978 }
5979 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5980 if ((uint64_t)(bSig << 1)) {
5981 return propagateFloatx80NaN(a, b, status);
5982 }
158142c2
FB
5983 if ( ( aExp | aSig ) == 0 ) {
5984 invalid:
ff32e16e 5985 float_raise(float_flag_invalid, status);
af39bc8c 5986 return floatx80_default_nan(status);
158142c2 5987 }
0f605c88
LV
5988 return packFloatx80(zSign, floatx80_infinity_high,
5989 floatx80_infinity_low);
158142c2
FB
5990 }
5991 if ( aExp == 0 ) {
5992 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5993 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5994 }
5995 if ( bExp == 0 ) {
5996 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5997 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5998 }
5999 zExp = aExp + bExp - 0x3FFE;
6000 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 6001 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
6002 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6003 --zExp;
6004 }
a2f2d288 6005 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6006 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6007}
6008
6009/*----------------------------------------------------------------------------
6010| Returns the result of dividing the extended double-precision floating-point
6011| value `a' by the corresponding value `b'. The operation is performed
6012| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6013*----------------------------------------------------------------------------*/
6014
e5a41ffa 6015floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6016{
6017 flag aSign, bSign, zSign;
f4014512 6018 int32_t aExp, bExp, zExp;
bb98fe42
AF
6019 uint64_t aSig, bSig, zSig0, zSig1;
6020 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 6021
d1eb8f2a
AD
6022 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6023 float_raise(float_flag_invalid, status);
6024 return floatx80_default_nan(status);
6025 }
158142c2
FB
6026 aSig = extractFloatx80Frac( a );
6027 aExp = extractFloatx80Exp( a );
6028 aSign = extractFloatx80Sign( a );
6029 bSig = extractFloatx80Frac( b );
6030 bExp = extractFloatx80Exp( b );
6031 bSign = extractFloatx80Sign( b );
6032 zSign = aSign ^ bSign;
6033 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6034 if ((uint64_t)(aSig << 1)) {
6035 return propagateFloatx80NaN(a, b, status);
6036 }
158142c2 6037 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6038 if ((uint64_t)(bSig << 1)) {
6039 return propagateFloatx80NaN(a, b, status);
6040 }
158142c2
FB
6041 goto invalid;
6042 }
0f605c88
LV
6043 return packFloatx80(zSign, floatx80_infinity_high,
6044 floatx80_infinity_low);
158142c2
FB
6045 }
6046 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6047 if ((uint64_t)(bSig << 1)) {
6048 return propagateFloatx80NaN(a, b, status);
6049 }
158142c2
FB
6050 return packFloatx80( zSign, 0, 0 );
6051 }
6052 if ( bExp == 0 ) {
6053 if ( bSig == 0 ) {
6054 if ( ( aExp | aSig ) == 0 ) {
6055 invalid:
ff32e16e 6056 float_raise(float_flag_invalid, status);
af39bc8c 6057 return floatx80_default_nan(status);
158142c2 6058 }
ff32e16e 6059 float_raise(float_flag_divbyzero, status);
0f605c88
LV
6060 return packFloatx80(zSign, floatx80_infinity_high,
6061 floatx80_infinity_low);
158142c2
FB
6062 }
6063 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6064 }
6065 if ( aExp == 0 ) {
6066 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6067 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6068 }
6069 zExp = aExp - bExp + 0x3FFE;
6070 rem1 = 0;
6071 if ( bSig <= aSig ) {
6072 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6073 ++zExp;
6074 }
6075 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6076 mul64To128( bSig, zSig0, &term0, &term1 );
6077 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 6078 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6079 --zSig0;
6080 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6081 }
6082 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 6083 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
6084 mul64To128( bSig, zSig1, &term1, &term2 );
6085 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 6086 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6087 --zSig1;
6088 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6089 }
6090 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6091 }
a2f2d288 6092 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6093 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6094}
6095
6096/*----------------------------------------------------------------------------
6097| Returns the remainder of the extended double-precision floating-point value
6098| `a' with respect to the corresponding value `b'. The operation is performed
6099| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6100*----------------------------------------------------------------------------*/
6101
e5a41ffa 6102floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
158142c2 6103{
ed086f3d 6104 flag aSign, zSign;
f4014512 6105 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6106 uint64_t aSig0, aSig1, bSig;
6107 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6108
d1eb8f2a
AD
6109 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6110 float_raise(float_flag_invalid, status);
6111 return floatx80_default_nan(status);
6112 }
158142c2
FB
6113 aSig0 = extractFloatx80Frac( a );
6114 aExp = extractFloatx80Exp( a );
6115 aSign = extractFloatx80Sign( a );
6116 bSig = extractFloatx80Frac( b );
6117 bExp = extractFloatx80Exp( b );
158142c2 6118 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6119 if ( (uint64_t) ( aSig0<<1 )
6120 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6121 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6122 }
6123 goto invalid;
6124 }
6125 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6126 if ((uint64_t)(bSig << 1)) {
6127 return propagateFloatx80NaN(a, b, status);
6128 }
158142c2
FB
6129 return a;
6130 }
6131 if ( bExp == 0 ) {
6132 if ( bSig == 0 ) {
6133 invalid:
ff32e16e 6134 float_raise(float_flag_invalid, status);
af39bc8c 6135 return floatx80_default_nan(status);
158142c2
FB
6136 }
6137 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6138 }
6139 if ( aExp == 0 ) {
bb98fe42 6140 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
158142c2
FB
6141 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6142 }
6143 bSig |= LIT64( 0x8000000000000000 );
6144 zSign = aSign;
6145 expDiff = aExp - bExp;
6146 aSig1 = 0;
6147 if ( expDiff < 0 ) {
6148 if ( expDiff < -1 ) return a;
6149 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6150 expDiff = 0;
6151 }
6152 q = ( bSig <= aSig0 );
6153 if ( q ) aSig0 -= bSig;
6154 expDiff -= 64;
6155 while ( 0 < expDiff ) {
6156 q = estimateDiv128To64( aSig0, aSig1, bSig );
6157 q = ( 2 < q ) ? q - 2 : 0;
6158 mul64To128( bSig, q, &term0, &term1 );
6159 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6160 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6161 expDiff -= 62;
6162 }
6163 expDiff += 64;
6164 if ( 0 < expDiff ) {
6165 q = estimateDiv128To64( aSig0, aSig1, bSig );
6166 q = ( 2 < q ) ? q - 2 : 0;
6167 q >>= 64 - expDiff;
6168 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6169 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6170 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6171 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6172 ++q;
6173 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6174 }
6175 }
6176 else {
6177 term1 = 0;
6178 term0 = bSig;
6179 }
6180 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6181 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6182 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6183 && ( q & 1 ) )
6184 ) {
6185 aSig0 = alternateASig0;
6186 aSig1 = alternateASig1;
6187 zSign = ! zSign;
6188 }
6189 return
6190 normalizeRoundAndPackFloatx80(
ff32e16e 6191 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6192
6193}
6194
6195/*----------------------------------------------------------------------------
6196| Returns the square root of the extended double-precision floating-point
6197| value `a'. The operation is performed according to the IEC/IEEE Standard
6198| for Binary Floating-Point Arithmetic.
6199*----------------------------------------------------------------------------*/
6200
e5a41ffa 6201floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2
FB
6202{
6203 flag aSign;
f4014512 6204 int32_t aExp, zExp;
bb98fe42
AF
6205 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6206 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6207
d1eb8f2a
AD
6208 if (floatx80_invalid_encoding(a)) {
6209 float_raise(float_flag_invalid, status);
6210 return floatx80_default_nan(status);
6211 }
158142c2
FB
6212 aSig0 = extractFloatx80Frac( a );
6213 aExp = extractFloatx80Exp( a );
6214 aSign = extractFloatx80Sign( a );
6215 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6216 if ((uint64_t)(aSig0 << 1)) {
6217 return propagateFloatx80NaN(a, a, status);
6218 }
158142c2
FB
6219 if ( ! aSign ) return a;
6220 goto invalid;
6221 }
6222 if ( aSign ) {
6223 if ( ( aExp | aSig0 ) == 0 ) return a;
6224 invalid:
ff32e16e 6225 float_raise(float_flag_invalid, status);
af39bc8c 6226 return floatx80_default_nan(status);
158142c2
FB
6227 }
6228 if ( aExp == 0 ) {
6229 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6230 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6231 }
6232 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6233 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6234 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6235 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6236 doubleZSig0 = zSig0<<1;
6237 mul64To128( zSig0, zSig0, &term0, &term1 );
6238 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6239 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6240 --zSig0;
6241 doubleZSig0 -= 2;
6242 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6243 }
6244 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6245 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
6246 if ( zSig1 == 0 ) zSig1 = 1;
6247 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6248 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6249 mul64To128( zSig1, zSig1, &term2, &term3 );
6250 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6251 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6252 --zSig1;
6253 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6254 term3 |= 1;
6255 term2 |= doubleZSig0;
6256 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6257 }
6258 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6259 }
6260 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6261 zSig0 |= doubleZSig0;
a2f2d288
PM
6262 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6263 0, zExp, zSig0, zSig1, status);
158142c2
FB
6264}
6265
6266/*----------------------------------------------------------------------------
b689362d
AJ
6267| Returns 1 if the extended double-precision floating-point value `a' is equal
6268| to the corresponding value `b', and 0 otherwise. The invalid exception is
6269| raised if either operand is a NaN. Otherwise, the comparison is performed
6270| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6271*----------------------------------------------------------------------------*/
6272
e5a41ffa 6273int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6274{
6275
d1eb8f2a
AD
6276 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6277 || (extractFloatx80Exp(a) == 0x7FFF
6278 && (uint64_t) (extractFloatx80Frac(a) << 1))
6279 || (extractFloatx80Exp(b) == 0x7FFF
6280 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 6281 ) {
ff32e16e 6282 float_raise(float_flag_invalid, status);
158142c2
FB
6283 return 0;
6284 }
6285 return
6286 ( a.low == b.low )
6287 && ( ( a.high == b.high )
6288 || ( ( a.low == 0 )
bb98fe42 6289 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6290 );
6291
6292}
6293
6294/*----------------------------------------------------------------------------
6295| Returns 1 if the extended double-precision floating-point value `a' is
6296| less than or equal to the corresponding value `b', and 0 otherwise. The
f5a64251
AJ
6297| invalid exception is raised if either operand is a NaN. The comparison is
6298| performed according to the IEC/IEEE Standard for Binary Floating-Point
6299| Arithmetic.
158142c2
FB
6300*----------------------------------------------------------------------------*/
6301
e5a41ffa 6302int floatx80_le(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6303{
6304 flag aSign, bSign;
6305
d1eb8f2a
AD
6306 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6307 || (extractFloatx80Exp(a) == 0x7FFF
6308 && (uint64_t) (extractFloatx80Frac(a) << 1))
6309 || (extractFloatx80Exp(b) == 0x7FFF
6310 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 6311 ) {
ff32e16e 6312 float_raise(float_flag_invalid, status);
158142c2
FB
6313 return 0;
6314 }
6315 aSign = extractFloatx80Sign( a );
6316 bSign = extractFloatx80Sign( b );
6317 if ( aSign != bSign ) {
6318 return
6319 aSign
bb98fe42 6320 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6321 == 0 );
6322 }
6323 return
6324 aSign ? le128( b.high, b.low, a.high, a.low )
6325 : le128( a.high, a.low, b.high, b.low );
6326
6327}
6328
6329/*----------------------------------------------------------------------------
6330| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
6331| less than the corresponding value `b', and 0 otherwise. The invalid
6332| exception is raised if either operand is a NaN. The comparison is performed
6333| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6334*----------------------------------------------------------------------------*/
6335
e5a41ffa 6336int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6337{
6338 flag aSign, bSign;
6339
d1eb8f2a
AD
6340 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6341 || (extractFloatx80Exp(a) == 0x7FFF
6342 && (uint64_t) (extractFloatx80Frac(a) << 1))
6343 || (extractFloatx80Exp(b) == 0x7FFF
6344 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 6345 ) {
ff32e16e 6346 float_raise(float_flag_invalid, status);
158142c2
FB
6347 return 0;
6348 }
6349 aSign = extractFloatx80Sign( a );
6350 bSign = extractFloatx80Sign( b );
6351 if ( aSign != bSign ) {
6352 return
6353 aSign
bb98fe42 6354 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6355 != 0 );
6356 }
6357 return
6358 aSign ? lt128( b.high, b.low, a.high, a.low )
6359 : lt128( a.high, a.low, b.high, b.low );
6360
6361}
6362
67b7861d
AJ
6363/*----------------------------------------------------------------------------
6364| Returns 1 if the extended double-precision floating-point values `a' and `b'
f5a64251
AJ
6365| cannot be compared, and 0 otherwise. The invalid exception is raised if
6366| either operand is a NaN. The comparison is performed according to the
6367| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
67b7861d 6368*----------------------------------------------------------------------------*/
e5a41ffa 6369int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
67b7861d 6370{
d1eb8f2a
AD
6371 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6372 || (extractFloatx80Exp(a) == 0x7FFF
6373 && (uint64_t) (extractFloatx80Frac(a) << 1))
6374 || (extractFloatx80Exp(b) == 0x7FFF
6375 && (uint64_t) (extractFloatx80Frac(b) << 1))
67b7861d 6376 ) {
ff32e16e 6377 float_raise(float_flag_invalid, status);
67b7861d
AJ
6378 return 1;
6379 }
6380 return 0;
6381}
6382
158142c2 6383/*----------------------------------------------------------------------------
b689362d 6384| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
6385| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6386| cause an exception. The comparison is performed according to the IEC/IEEE
6387| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6388*----------------------------------------------------------------------------*/
6389
e5a41ffa 6390int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6391{
6392
d1eb8f2a
AD
6393 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6394 float_raise(float_flag_invalid, status);
6395 return 0;
6396 }
158142c2 6397 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 6398 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 6399 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 6400 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 6401 ) {
af39bc8c
AM
6402 if (floatx80_is_signaling_nan(a, status)
6403 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6404 float_raise(float_flag_invalid, status);
b689362d 6405 }
158142c2
FB
6406 return 0;
6407 }
6408 return
6409 ( a.low == b.low )
6410 && ( ( a.high == b.high )
6411 || ( ( a.low == 0 )
bb98fe42 6412 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6413 );
6414
6415}
6416
6417/*----------------------------------------------------------------------------
6418| Returns 1 if the extended double-precision floating-point value `a' is less
6419| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
6420| do not cause an exception. Otherwise, the comparison is performed according
6421| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6422*----------------------------------------------------------------------------*/
6423
e5a41ffa 6424int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6425{
6426 flag aSign, bSign;
6427
d1eb8f2a
AD
6428 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6429 float_raise(float_flag_invalid, status);
6430 return 0;
6431 }
158142c2 6432 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 6433 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 6434 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 6435 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 6436 ) {
af39bc8c
AM
6437 if (floatx80_is_signaling_nan(a, status)
6438 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6439 float_raise(float_flag_invalid, status);
158142c2
FB
6440 }
6441 return 0;
6442 }
6443 aSign = extractFloatx80Sign( a );
6444 bSign = extractFloatx80Sign( b );
6445 if ( aSign != bSign ) {
6446 return
6447 aSign
bb98fe42 6448 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6449 == 0 );
6450 }
6451 return
6452 aSign ? le128( b.high, b.low, a.high, a.low )
6453 : le128( a.high, a.low, b.high, b.low );
6454
6455}
6456
6457/*----------------------------------------------------------------------------
6458| Returns 1 if the extended double-precision floating-point value `a' is less
6459| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
6460| an exception. Otherwise, the comparison is performed according to the
6461| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6462*----------------------------------------------------------------------------*/
6463
e5a41ffa 6464int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6465{
6466 flag aSign, bSign;
6467
d1eb8f2a
AD
6468 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6469 float_raise(float_flag_invalid, status);
6470 return 0;
6471 }
158142c2 6472 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 6473 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 6474 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 6475 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 6476 ) {
af39bc8c
AM
6477 if (floatx80_is_signaling_nan(a, status)
6478 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6479 float_raise(float_flag_invalid, status);
158142c2
FB
6480 }
6481 return 0;
6482 }
6483 aSign = extractFloatx80Sign( a );
6484 bSign = extractFloatx80Sign( b );
6485 if ( aSign != bSign ) {
6486 return
6487 aSign
bb98fe42 6488 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6489 != 0 );
6490 }
6491 return
6492 aSign ? lt128( b.high, b.low, a.high, a.low )
6493 : lt128( a.high, a.low, b.high, b.low );
6494
6495}
6496
67b7861d
AJ
6497/*----------------------------------------------------------------------------
6498| Returns 1 if the extended double-precision floating-point values `a' and `b'
6499| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
6500| The comparison is performed according to the IEC/IEEE Standard for Binary
6501| Floating-Point Arithmetic.
6502*----------------------------------------------------------------------------*/
e5a41ffa 6503int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
67b7861d 6504{
d1eb8f2a
AD
6505 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6506 float_raise(float_flag_invalid, status);
6507 return 1;
6508 }
67b7861d
AJ
6509 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
6510 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
6511 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
6512 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
6513 ) {
af39bc8c
AM
6514 if (floatx80_is_signaling_nan(a, status)
6515 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6516 float_raise(float_flag_invalid, status);
67b7861d
AJ
6517 }
6518 return 1;
6519 }
6520 return 0;
6521}
6522
158142c2
FB
6523/*----------------------------------------------------------------------------
6524| Returns the result of converting the quadruple-precision floating-point
6525| value `a' to the 32-bit two's complement integer format. The conversion
6526| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6527| Arithmetic---which means in particular that the conversion is rounded
6528| according to the current rounding mode. If `a' is a NaN, the largest
6529| positive integer is returned. Otherwise, if the conversion overflows, the
6530| largest integer with the same sign as `a' is returned.
6531*----------------------------------------------------------------------------*/
6532
f4014512 6533int32_t float128_to_int32(float128 a, float_status *status)
158142c2
FB
6534{
6535 flag aSign;
f4014512 6536 int32_t aExp, shiftCount;
bb98fe42 6537 uint64_t aSig0, aSig1;
158142c2
FB
6538
6539 aSig1 = extractFloat128Frac1( a );
6540 aSig0 = extractFloat128Frac0( a );
6541 aExp = extractFloat128Exp( a );
6542 aSign = extractFloat128Sign( a );
6543 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
6544 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
6545 aSig0 |= ( aSig1 != 0 );
6546 shiftCount = 0x4028 - aExp;
6547 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 6548 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
6549
6550}
6551
6552/*----------------------------------------------------------------------------
6553| Returns the result of converting the quadruple-precision floating-point
6554| value `a' to the 32-bit two's complement integer format. The conversion
6555| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6556| Arithmetic, except that the conversion is always rounded toward zero. If
6557| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
6558| conversion overflows, the largest integer with the same sign as `a' is
6559| returned.
6560*----------------------------------------------------------------------------*/
6561
f4014512 6562int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2
FB
6563{
6564 flag aSign;
f4014512 6565 int32_t aExp, shiftCount;
bb98fe42 6566 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 6567 int32_t z;
158142c2
FB
6568
6569 aSig1 = extractFloat128Frac1( a );
6570 aSig0 = extractFloat128Frac0( a );
6571 aExp = extractFloat128Exp( a );
6572 aSign = extractFloat128Sign( a );
6573 aSig0 |= ( aSig1 != 0 );
6574 if ( 0x401E < aExp ) {
6575 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6576 goto invalid;
6577 }
6578 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
6579 if (aExp || aSig0) {
6580 status->float_exception_flags |= float_flag_inexact;
6581 }
158142c2
FB
6582 return 0;
6583 }
6584 aSig0 |= LIT64( 0x0001000000000000 );
6585 shiftCount = 0x402F - aExp;
6586 savedASig = aSig0;
6587 aSig0 >>= shiftCount;
6588 z = aSig0;
6589 if ( aSign ) z = - z;
6590 if ( ( z < 0 ) ^ aSign ) {
6591 invalid:
ff32e16e 6592 float_raise(float_flag_invalid, status);
bb98fe42 6593 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
6594 }
6595 if ( ( aSig0<<shiftCount ) != savedASig ) {
a2f2d288 6596 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6597 }
6598 return z;
6599
6600}
6601
6602/*----------------------------------------------------------------------------
6603| Returns the result of converting the quadruple-precision floating-point
6604| value `a' to the 64-bit two's complement integer format. The conversion
6605| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6606| Arithmetic---which means in particular that the conversion is rounded
6607| according to the current rounding mode. If `a' is a NaN, the largest
6608| positive integer is returned. Otherwise, if the conversion overflows, the
6609| largest integer with the same sign as `a' is returned.
6610*----------------------------------------------------------------------------*/
6611
f42c2224 6612int64_t float128_to_int64(float128 a, float_status *status)
158142c2
FB
6613{
6614 flag aSign;
f4014512 6615 int32_t aExp, shiftCount;
bb98fe42 6616 uint64_t aSig0, aSig1;
158142c2
FB
6617
6618 aSig1 = extractFloat128Frac1( a );
6619 aSig0 = extractFloat128Frac0( a );
6620 aExp = extractFloat128Exp( a );
6621 aSign = extractFloat128Sign( a );
6622 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
6623 shiftCount = 0x402F - aExp;
6624 if ( shiftCount <= 0 ) {
6625 if ( 0x403E < aExp ) {
ff32e16e 6626 float_raise(float_flag_invalid, status);
158142c2
FB
6627 if ( ! aSign
6628 || ( ( aExp == 0x7FFF )
6629 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
6630 )
6631 ) {
6632 return LIT64( 0x7FFFFFFFFFFFFFFF );
6633 }
bb98fe42 6634 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
6635 }
6636 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6637 }
6638 else {
6639 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6640 }
ff32e16e 6641 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
6642
6643}
6644
6645/*----------------------------------------------------------------------------
6646| Returns the result of converting the quadruple-precision floating-point
6647| value `a' to the 64-bit two's complement integer format. The conversion
6648| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6649| Arithmetic, except that the conversion is always rounded toward zero.
6650| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
6651| the conversion overflows, the largest integer with the same sign as `a' is
6652| returned.
6653*----------------------------------------------------------------------------*/
6654
f42c2224 6655int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2
FB
6656{
6657 flag aSign;
f4014512 6658 int32_t aExp, shiftCount;
bb98fe42 6659 uint64_t aSig0, aSig1;
f42c2224 6660 int64_t z;
158142c2
FB
6661
6662 aSig1 = extractFloat128Frac1( a );
6663 aSig0 = extractFloat128Frac0( a );
6664 aExp = extractFloat128Exp( a );
6665 aSign = extractFloat128Sign( a );
6666 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
6667 shiftCount = aExp - 0x402F;
6668 if ( 0 < shiftCount ) {
6669 if ( 0x403E <= aExp ) {
6670 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
6671 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
6672 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
a2f2d288
PM
6673 if (aSig1) {
6674 status->float_exception_flags |= float_flag_inexact;
6675 }
158142c2
FB
6676 }
6677 else {
ff32e16e 6678 float_raise(float_flag_invalid, status);
158142c2
FB
6679 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
6680 return LIT64( 0x7FFFFFFFFFFFFFFF );
6681 }
6682 }
bb98fe42 6683 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
6684 }
6685 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 6686 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
a2f2d288 6687 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6688 }
6689 }
6690 else {
6691 if ( aExp < 0x3FFF ) {
6692 if ( aExp | aSig0 | aSig1 ) {
a2f2d288 6693 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6694 }
6695 return 0;
6696 }
6697 z = aSig0>>( - shiftCount );
6698 if ( aSig1
bb98fe42 6699 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
a2f2d288 6700 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6701 }
6702 }
6703 if ( aSign ) z = - z;
6704 return z;
6705
6706}
6707
2e6d8568
BR
6708/*----------------------------------------------------------------------------
6709| Returns the result of converting the quadruple-precision floating-point value
6710| `a' to the 64-bit unsigned integer format. The conversion is
6711| performed according to the IEC/IEEE Standard for Binary Floating-Point
6712| Arithmetic---which means in particular that the conversion is rounded
6713| according to the current rounding mode. If `a' is a NaN, the largest
6714| positive integer is returned. If the conversion overflows, the
6715| largest unsigned integer is returned. If 'a' is negative, the value is
6716| rounded and zero is returned; negative values that do not round to zero
6717| will raise the inexact exception.
6718*----------------------------------------------------------------------------*/
6719
6720uint64_t float128_to_uint64(float128 a, float_status *status)
6721{
6722 flag aSign;
6723 int aExp;
6724 int shiftCount;
6725 uint64_t aSig0, aSig1;
6726
6727 aSig0 = extractFloat128Frac0(a);
6728 aSig1 = extractFloat128Frac1(a);
6729 aExp = extractFloat128Exp(a);
6730 aSign = extractFloat128Sign(a);
6731 if (aSign && (aExp > 0x3FFE)) {
6732 float_raise(float_flag_invalid, status);
6733 if (float128_is_any_nan(a)) {
6734 return LIT64(0xFFFFFFFFFFFFFFFF);
6735 } else {
6736 return 0;
6737 }
6738 }
6739 if (aExp) {
6740 aSig0 |= LIT64(0x0001000000000000);
6741 }
6742 shiftCount = 0x402F - aExp;
6743 if (shiftCount <= 0) {
6744 if (0x403E < aExp) {
6745 float_raise(float_flag_invalid, status);
6746 return LIT64(0xFFFFFFFFFFFFFFFF);
6747 }
6748 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6749 } else {
6750 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6751 }
6752 return roundAndPackUint64(aSign, aSig0, aSig1, status);
6753}
6754
6755uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6756{
6757 uint64_t v;
6758 signed char current_rounding_mode = status->float_rounding_mode;
6759
6760 set_float_rounding_mode(float_round_to_zero, status);
6761 v = float128_to_uint64(a, status);
6762 set_float_rounding_mode(current_rounding_mode, status);
6763
6764 return v;
6765}
6766
158142c2
FB
6767/*----------------------------------------------------------------------------
6768| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
6769| value `a' to the 32-bit unsigned integer format. The conversion
6770| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6771| Arithmetic except that the conversion is always rounded toward zero.
6772| If `a' is a NaN, the largest positive integer is returned. Otherwise,
6773| if the conversion overflows, the largest unsigned integer is returned.
6774| If 'a' is negative, the value is rounded and zero is returned; negative
6775| values that do not round to zero will raise the inexact exception.
6776*----------------------------------------------------------------------------*/
6777
6778uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6779{
6780 uint64_t v;
6781 uint32_t res;
6782 int old_exc_flags = get_float_exception_flags(status);
6783
6784 v = float128_to_uint64_round_to_zero(a, status);
6785 if (v > 0xffffffff) {
6786 res = 0xffffffff;
6787 } else {
6788 return v;
6789 }
6790 set_float_exception_flags(old_exc_flags, status);
6791 float_raise(float_flag_invalid, status);
6792 return res;
6793}
6794
6795/*----------------------------------------------------------------------------
6796| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
6797| value `a' to the single-precision floating-point format. The conversion
6798| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6799| Arithmetic.
6800*----------------------------------------------------------------------------*/
6801
e5a41ffa 6802float32 float128_to_float32(float128 a, float_status *status)
158142c2
FB
6803{
6804 flag aSign;
f4014512 6805 int32_t aExp;
bb98fe42
AF
6806 uint64_t aSig0, aSig1;
6807 uint32_t zSig;
158142c2
FB
6808
6809 aSig1 = extractFloat128Frac1( a );
6810 aSig0 = extractFloat128Frac0( a );
6811 aExp = extractFloat128Exp( a );
6812 aSign = extractFloat128Sign( a );
6813 if ( aExp == 0x7FFF ) {
6814 if ( aSig0 | aSig1 ) {
ff32e16e 6815 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
6816 }
6817 return packFloat32( aSign, 0xFF, 0 );
6818 }
6819 aSig0 |= ( aSig1 != 0 );
6820 shift64RightJamming( aSig0, 18, &aSig0 );
6821 zSig = aSig0;
6822 if ( aExp || zSig ) {
6823 zSig |= 0x40000000;
6824 aExp -= 0x3F81;
6825 }
ff32e16e 6826 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
6827
6828}
6829
6830/*----------------------------------------------------------------------------
6831| Returns the result of converting the quadruple-precision floating-point
6832| value `a' to the double-precision floating-point format. The conversion
6833| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6834| Arithmetic.
6835*----------------------------------------------------------------------------*/
6836
e5a41ffa 6837float64 float128_to_float64(float128 a, float_status *status)
158142c2
FB
6838{
6839 flag aSign;
f4014512 6840 int32_t aExp;
bb98fe42 6841 uint64_t aSig0, aSig1;
158142c2
FB
6842
6843 aSig1 = extractFloat128Frac1( a );
6844 aSig0 = extractFloat128Frac0( a );
6845 aExp = extractFloat128Exp( a );
6846 aSign = extractFloat128Sign( a );
6847 if ( aExp == 0x7FFF ) {
6848 if ( aSig0 | aSig1 ) {
ff32e16e 6849 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
6850 }
6851 return packFloat64( aSign, 0x7FF, 0 );
6852 }
6853 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6854 aSig0 |= ( aSig1 != 0 );
6855 if ( aExp || aSig0 ) {
6856 aSig0 |= LIT64( 0x4000000000000000 );
6857 aExp -= 0x3C01;
6858 }
ff32e16e 6859 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
6860
6861}
6862
158142c2
FB
6863/*----------------------------------------------------------------------------
6864| Returns the result of converting the quadruple-precision floating-point
6865| value `a' to the extended double-precision floating-point format. The
6866| conversion is performed according to the IEC/IEEE Standard for Binary
6867| Floating-Point Arithmetic.
6868*----------------------------------------------------------------------------*/
6869
e5a41ffa 6870floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2
FB
6871{
6872 flag aSign;
f4014512 6873 int32_t aExp;
bb98fe42 6874 uint64_t aSig0, aSig1;
158142c2
FB
6875
6876 aSig1 = extractFloat128Frac1( a );
6877 aSig0 = extractFloat128Frac0( a );
6878 aExp = extractFloat128Exp( a );
6879 aSign = extractFloat128Sign( a );
6880 if ( aExp == 0x7FFF ) {
6881 if ( aSig0 | aSig1 ) {
ff32e16e 6882 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
158142c2 6883 }
0f605c88
LV
6884 return packFloatx80(aSign, floatx80_infinity_high,
6885 floatx80_infinity_low);
158142c2
FB
6886 }
6887 if ( aExp == 0 ) {
6888 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6889 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6890 }
6891 else {
6892 aSig0 |= LIT64( 0x0001000000000000 );
6893 }
6894 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6895 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6896
6897}
6898
158142c2
FB
6899/*----------------------------------------------------------------------------
6900| Rounds the quadruple-precision floating-point value `a' to an integer, and
6901| returns the result as a quadruple-precision floating-point value. The
6902| operation is performed according to the IEC/IEEE Standard for Binary
6903| Floating-Point Arithmetic.
6904*----------------------------------------------------------------------------*/
6905
e5a41ffa 6906float128 float128_round_to_int(float128 a, float_status *status)
158142c2
FB
6907{
6908 flag aSign;
f4014512 6909 int32_t aExp;
bb98fe42 6910 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
6911 float128 z;
6912
6913 aExp = extractFloat128Exp( a );
6914 if ( 0x402F <= aExp ) {
6915 if ( 0x406F <= aExp ) {
6916 if ( ( aExp == 0x7FFF )
6917 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6918 ) {
ff32e16e 6919 return propagateFloat128NaN(a, a, status);
158142c2
FB
6920 }
6921 return a;
6922 }
6923 lastBitMask = 1;
6924 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6925 roundBitsMask = lastBitMask - 1;
6926 z = a;
a2f2d288 6927 switch (status->float_rounding_mode) {
dc355b76 6928 case float_round_nearest_even:
158142c2
FB
6929 if ( lastBitMask ) {
6930 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6931 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6932 }
6933 else {
bb98fe42 6934 if ( (int64_t) z.low < 0 ) {
158142c2 6935 ++z.high;
bb98fe42 6936 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
6937 }
6938 }
dc355b76 6939 break;
f9288a76
PM
6940 case float_round_ties_away:
6941 if (lastBitMask) {
6942 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6943 } else {
6944 if ((int64_t) z.low < 0) {
6945 ++z.high;
6946 }
6947 }
6948 break;
dc355b76
PM
6949 case float_round_to_zero:
6950 break;
6951 case float_round_up:
6952 if (!extractFloat128Sign(z)) {
6953 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6954 }
6955 break;
6956 case float_round_down:
6957 if (extractFloat128Sign(z)) {
6958 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 6959 }
dc355b76
PM
6960 break;
6961 default:
6962 abort();
158142c2
FB
6963 }
6964 z.low &= ~ roundBitsMask;
6965 }
6966 else {
6967 if ( aExp < 0x3FFF ) {
bb98fe42 6968 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
a2f2d288 6969 status->float_exception_flags |= float_flag_inexact;
158142c2 6970 aSign = extractFloat128Sign( a );
a2f2d288 6971 switch (status->float_rounding_mode) {
158142c2
FB
6972 case float_round_nearest_even:
6973 if ( ( aExp == 0x3FFE )
6974 && ( extractFloat128Frac0( a )
6975 | extractFloat128Frac1( a ) )
6976 ) {
6977 return packFloat128( aSign, 0x3FFF, 0, 0 );
6978 }
6979 break;
f9288a76
PM
6980 case float_round_ties_away:
6981 if (aExp == 0x3FFE) {
6982 return packFloat128(aSign, 0x3FFF, 0, 0);
6983 }
6984 break;
158142c2
FB
6985 case float_round_down:
6986 return
6987 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6988 : packFloat128( 0, 0, 0, 0 );
6989 case float_round_up:
6990 return
6991 aSign ? packFloat128( 1, 0, 0, 0 )
6992 : packFloat128( 0, 0x3FFF, 0, 0 );
6993 }
6994 return packFloat128( aSign, 0, 0, 0 );
6995 }
6996 lastBitMask = 1;
6997 lastBitMask <<= 0x402F - aExp;
6998 roundBitsMask = lastBitMask - 1;
6999 z.low = 0;
7000 z.high = a.high;
a2f2d288 7001 switch (status->float_rounding_mode) {
dc355b76 7002 case float_round_nearest_even:
158142c2
FB
7003 z.high += lastBitMask>>1;
7004 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
7005 z.high &= ~ lastBitMask;
7006 }
dc355b76 7007 break;
f9288a76
PM
7008 case float_round_ties_away:
7009 z.high += lastBitMask>>1;
7010 break;
dc355b76
PM
7011 case float_round_to_zero:
7012 break;
7013 case float_round_up:
7014 if (!extractFloat128Sign(z)) {
158142c2
FB
7015 z.high |= ( a.low != 0 );
7016 z.high += roundBitsMask;
7017 }
dc355b76
PM
7018 break;
7019 case float_round_down:
7020 if (extractFloat128Sign(z)) {
7021 z.high |= (a.low != 0);
7022 z.high += roundBitsMask;
7023 }
7024 break;
7025 default:
7026 abort();
158142c2
FB
7027 }
7028 z.high &= ~ roundBitsMask;
7029 }
7030 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
a2f2d288 7031 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
7032 }
7033 return z;
7034
7035}
7036
7037/*----------------------------------------------------------------------------
7038| Returns the result of adding the absolute values of the quadruple-precision
7039| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
7040| before being returned. `zSign' is ignored if the result is a NaN.
7041| The addition is performed according to the IEC/IEEE Standard for Binary
7042| Floating-Point Arithmetic.
7043*----------------------------------------------------------------------------*/
7044
e5a41ffa
PM
7045static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
7046 float_status *status)
158142c2 7047{
f4014512 7048 int32_t aExp, bExp, zExp;
bb98fe42 7049 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 7050 int32_t expDiff;
158142c2
FB
7051
7052 aSig1 = extractFloat128Frac1( a );
7053 aSig0 = extractFloat128Frac0( a );
7054 aExp = extractFloat128Exp( a );
7055 bSig1 = extractFloat128Frac1( b );
7056 bSig0 = extractFloat128Frac0( b );
7057 bExp = extractFloat128Exp( b );
7058 expDiff = aExp - bExp;
7059 if ( 0 < expDiff ) {
7060 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7061 if (aSig0 | aSig1) {
7062 return propagateFloat128NaN(a, b, status);
7063 }
158142c2
FB
7064 return a;
7065 }
7066 if ( bExp == 0 ) {
7067 --expDiff;
7068 }
7069 else {
7070 bSig0 |= LIT64( 0x0001000000000000 );
7071 }
7072 shift128ExtraRightJamming(
7073 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
7074 zExp = aExp;
7075 }
7076 else if ( expDiff < 0 ) {
7077 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7078 if (bSig0 | bSig1) {
7079 return propagateFloat128NaN(a, b, status);
7080 }
158142c2
FB
7081 return packFloat128( zSign, 0x7FFF, 0, 0 );
7082 }
7083 if ( aExp == 0 ) {
7084 ++expDiff;
7085 }
7086 else {
7087 aSig0 |= LIT64( 0x0001000000000000 );
7088 }
7089 shift128ExtraRightJamming(
7090 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
7091 zExp = bExp;
7092 }
7093 else {
7094 if ( aExp == 0x7FFF ) {
7095 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 7096 return propagateFloat128NaN(a, b, status);
158142c2
FB
7097 }
7098 return a;
7099 }
7100 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 7101 if ( aExp == 0 ) {
a2f2d288 7102 if (status->flush_to_zero) {
e6afc87f 7103 if (zSig0 | zSig1) {
ff32e16e 7104 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
7105 }
7106 return packFloat128(zSign, 0, 0, 0);
7107 }
fe76d976
PB
7108 return packFloat128( zSign, 0, zSig0, zSig1 );
7109 }
158142c2
FB
7110 zSig2 = 0;
7111 zSig0 |= LIT64( 0x0002000000000000 );
7112 zExp = aExp;
7113 goto shiftRight1;
7114 }
7115 aSig0 |= LIT64( 0x0001000000000000 );
7116 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7117 --zExp;
7118 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
7119 ++zExp;
7120 shiftRight1:
7121 shift128ExtraRightJamming(
7122 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7123 roundAndPack:
ff32e16e 7124 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7125
7126}
7127
7128/*----------------------------------------------------------------------------
7129| Returns the result of subtracting the absolute values of the quadruple-
7130| precision floating-point values `a' and `b'. If `zSign' is 1, the
7131| difference is negated before being returned. `zSign' is ignored if the
7132| result is a NaN. The subtraction is performed according to the IEC/IEEE
7133| Standard for Binary Floating-Point Arithmetic.
7134*----------------------------------------------------------------------------*/
7135
e5a41ffa
PM
7136static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
7137 float_status *status)
158142c2 7138{
f4014512 7139 int32_t aExp, bExp, zExp;
bb98fe42 7140 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 7141 int32_t expDiff;
158142c2
FB
7142
7143 aSig1 = extractFloat128Frac1( a );
7144 aSig0 = extractFloat128Frac0( a );
7145 aExp = extractFloat128Exp( a );
7146 bSig1 = extractFloat128Frac1( b );
7147 bSig0 = extractFloat128Frac0( b );
7148 bExp = extractFloat128Exp( b );
7149 expDiff = aExp - bExp;
7150 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
7151 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
7152 if ( 0 < expDiff ) goto aExpBigger;
7153 if ( expDiff < 0 ) goto bExpBigger;
7154 if ( aExp == 0x7FFF ) {
7155 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 7156 return propagateFloat128NaN(a, b, status);
158142c2 7157 }
ff32e16e 7158 float_raise(float_flag_invalid, status);
af39bc8c 7159 return float128_default_nan(status);
158142c2
FB
7160 }
7161 if ( aExp == 0 ) {
7162 aExp = 1;
7163 bExp = 1;
7164 }
7165 if ( bSig0 < aSig0 ) goto aBigger;
7166 if ( aSig0 < bSig0 ) goto bBigger;
7167 if ( bSig1 < aSig1 ) goto aBigger;
7168 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
7169 return packFloat128(status->float_rounding_mode == float_round_down,
7170 0, 0, 0);
158142c2
FB
7171 bExpBigger:
7172 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7173 if (bSig0 | bSig1) {
7174 return propagateFloat128NaN(a, b, status);
7175 }
158142c2
FB
7176 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
7177 }
7178 if ( aExp == 0 ) {
7179 ++expDiff;
7180 }
7181 else {
7182 aSig0 |= LIT64( 0x4000000000000000 );
7183 }
7184 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7185 bSig0 |= LIT64( 0x4000000000000000 );
7186 bBigger:
7187 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
7188 zExp = bExp;
7189 zSign ^= 1;
7190 goto normalizeRoundAndPack;
7191 aExpBigger:
7192 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7193 if (aSig0 | aSig1) {
7194 return propagateFloat128NaN(a, b, status);
7195 }
158142c2
FB
7196 return a;
7197 }
7198 if ( bExp == 0 ) {
7199 --expDiff;
7200 }
7201 else {
7202 bSig0 |= LIT64( 0x4000000000000000 );
7203 }
7204 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
7205 aSig0 |= LIT64( 0x4000000000000000 );
7206 aBigger:
7207 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7208 zExp = aExp;
7209 normalizeRoundAndPack:
7210 --zExp;
ff32e16e
PM
7211 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
7212 status);
158142c2
FB
7213
7214}
7215
7216/*----------------------------------------------------------------------------
7217| Returns the result of adding the quadruple-precision floating-point values
7218| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
7219| for Binary Floating-Point Arithmetic.
7220*----------------------------------------------------------------------------*/
7221
e5a41ffa 7222float128 float128_add(float128 a, float128 b, float_status *status)
158142c2
FB
7223{
7224 flag aSign, bSign;
7225
7226 aSign = extractFloat128Sign( a );
7227 bSign = extractFloat128Sign( b );
7228 if ( aSign == bSign ) {
ff32e16e 7229 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7230 }
7231 else {
ff32e16e 7232 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7233 }
7234
7235}
7236
7237/*----------------------------------------------------------------------------
7238| Returns the result of subtracting the quadruple-precision floating-point
7239| values `a' and `b'. The operation is performed according to the IEC/IEEE
7240| Standard for Binary Floating-Point Arithmetic.
7241*----------------------------------------------------------------------------*/
7242
e5a41ffa 7243float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2
FB
7244{
7245 flag aSign, bSign;
7246
7247 aSign = extractFloat128Sign( a );
7248 bSign = extractFloat128Sign( b );
7249 if ( aSign == bSign ) {
ff32e16e 7250 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7251 }
7252 else {
ff32e16e 7253 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7254 }
7255
7256}
7257
7258/*----------------------------------------------------------------------------
7259| Returns the result of multiplying the quadruple-precision floating-point
7260| values `a' and `b'. The operation is performed according to the IEC/IEEE
7261| Standard for Binary Floating-Point Arithmetic.
7262*----------------------------------------------------------------------------*/
7263
e5a41ffa 7264float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2
FB
7265{
7266 flag aSign, bSign, zSign;
f4014512 7267 int32_t aExp, bExp, zExp;
bb98fe42 7268 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
7269
7270 aSig1 = extractFloat128Frac1( a );
7271 aSig0 = extractFloat128Frac0( a );
7272 aExp = extractFloat128Exp( a );
7273 aSign = extractFloat128Sign( a );
7274 bSig1 = extractFloat128Frac1( b );
7275 bSig0 = extractFloat128Frac0( b );
7276 bExp = extractFloat128Exp( b );
7277 bSign = extractFloat128Sign( b );
7278 zSign = aSign ^ bSign;
7279 if ( aExp == 0x7FFF ) {
7280 if ( ( aSig0 | aSig1 )
7281 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7282 return propagateFloat128NaN(a, b, status);
158142c2
FB
7283 }
7284 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
7285 return packFloat128( zSign, 0x7FFF, 0, 0 );
7286 }
7287 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7288 if (bSig0 | bSig1) {
7289 return propagateFloat128NaN(a, b, status);
7290 }
158142c2
FB
7291 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7292 invalid:
ff32e16e 7293 float_raise(float_flag_invalid, status);
af39bc8c 7294 return float128_default_nan(status);
158142c2
FB
7295 }
7296 return packFloat128( zSign, 0x7FFF, 0, 0 );
7297 }
7298 if ( aExp == 0 ) {
7299 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7300 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7301 }
7302 if ( bExp == 0 ) {
7303 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7304 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7305 }
7306 zExp = aExp + bExp - 0x4000;
7307 aSig0 |= LIT64( 0x0001000000000000 );
7308 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
7309 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
7310 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
7311 zSig2 |= ( zSig3 != 0 );
7312 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
7313 shift128ExtraRightJamming(
7314 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7315 ++zExp;
7316 }
ff32e16e 7317 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7318
7319}
7320
7321/*----------------------------------------------------------------------------
7322| Returns the result of dividing the quadruple-precision floating-point value
7323| `a' by the corresponding value `b'. The operation is performed according to
7324| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7325*----------------------------------------------------------------------------*/
7326
e5a41ffa 7327float128 float128_div(float128 a, float128 b, float_status *status)
158142c2
FB
7328{
7329 flag aSign, bSign, zSign;
f4014512 7330 int32_t aExp, bExp, zExp;
bb98fe42
AF
7331 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7332 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7333
7334 aSig1 = extractFloat128Frac1( a );
7335 aSig0 = extractFloat128Frac0( a );
7336 aExp = extractFloat128Exp( a );
7337 aSign = extractFloat128Sign( a );
7338 bSig1 = extractFloat128Frac1( b );
7339 bSig0 = extractFloat128Frac0( b );
7340 bExp = extractFloat128Exp( b );
7341 bSign = extractFloat128Sign( b );
7342 zSign = aSign ^ bSign;
7343 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7344 if (aSig0 | aSig1) {
7345 return propagateFloat128NaN(a, b, status);
7346 }
158142c2 7347 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7348 if (bSig0 | bSig1) {
7349 return propagateFloat128NaN(a, b, status);
7350 }
158142c2
FB
7351 goto invalid;
7352 }
7353 return packFloat128( zSign, 0x7FFF, 0, 0 );
7354 }
7355 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7356 if (bSig0 | bSig1) {
7357 return propagateFloat128NaN(a, b, status);
7358 }
158142c2
FB
7359 return packFloat128( zSign, 0, 0, 0 );
7360 }
7361 if ( bExp == 0 ) {
7362 if ( ( bSig0 | bSig1 ) == 0 ) {
7363 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7364 invalid:
ff32e16e 7365 float_raise(float_flag_invalid, status);
af39bc8c 7366 return float128_default_nan(status);
158142c2 7367 }
ff32e16e 7368 float_raise(float_flag_divbyzero, status);
158142c2
FB
7369 return packFloat128( zSign, 0x7FFF, 0, 0 );
7370 }
7371 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7372 }
7373 if ( aExp == 0 ) {
7374 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7375 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7376 }
7377 zExp = aExp - bExp + 0x3FFD;
7378 shortShift128Left(
7379 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
7380 shortShift128Left(
7381 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
7382 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
7383 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
7384 ++zExp;
7385 }
7386 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
7387 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
7388 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 7389 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7390 --zSig0;
7391 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
7392 }
7393 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
7394 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
7395 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
7396 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7397 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7398 --zSig1;
7399 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
7400 }
7401 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7402 }
7403 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 7404 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7405
7406}
7407
7408/*----------------------------------------------------------------------------
7409| Returns the remainder of the quadruple-precision floating-point value `a'
7410| with respect to the corresponding value `b'. The operation is performed
7411| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7412*----------------------------------------------------------------------------*/
7413
e5a41ffa 7414float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 7415{
ed086f3d 7416 flag aSign, zSign;
f4014512 7417 int32_t aExp, bExp, expDiff;
bb98fe42
AF
7418 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7419 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7420 int64_t sigMean0;
158142c2
FB
7421
7422 aSig1 = extractFloat128Frac1( a );
7423 aSig0 = extractFloat128Frac0( a );
7424 aExp = extractFloat128Exp( a );
7425 aSign = extractFloat128Sign( a );
7426 bSig1 = extractFloat128Frac1( b );
7427 bSig0 = extractFloat128Frac0( b );
7428 bExp = extractFloat128Exp( b );
158142c2
FB
7429 if ( aExp == 0x7FFF ) {
7430 if ( ( aSig0 | aSig1 )
7431 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7432 return propagateFloat128NaN(a, b, status);
158142c2
FB
7433 }
7434 goto invalid;
7435 }
7436 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7437 if (bSig0 | bSig1) {
7438 return propagateFloat128NaN(a, b, status);
7439 }
158142c2
FB
7440 return a;
7441 }
7442 if ( bExp == 0 ) {
7443 if ( ( bSig0 | bSig1 ) == 0 ) {
7444 invalid:
ff32e16e 7445 float_raise(float_flag_invalid, status);
af39bc8c 7446 return float128_default_nan(status);
158142c2
FB
7447 }
7448 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7449 }
7450 if ( aExp == 0 ) {
7451 if ( ( aSig0 | aSig1 ) == 0 ) return a;
7452 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7453 }
7454 expDiff = aExp - bExp;
7455 if ( expDiff < -1 ) return a;
7456 shortShift128Left(
7457 aSig0 | LIT64( 0x0001000000000000 ),
7458 aSig1,
7459 15 - ( expDiff < 0 ),
7460 &aSig0,
7461 &aSig1
7462 );
7463 shortShift128Left(
7464 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
7465 q = le128( bSig0, bSig1, aSig0, aSig1 );
7466 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7467 expDiff -= 64;
7468 while ( 0 < expDiff ) {
7469 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7470 q = ( 4 < q ) ? q - 4 : 0;
7471 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7472 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7473 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7474 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7475 expDiff -= 61;
7476 }
7477 if ( -64 < expDiff ) {
7478 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7479 q = ( 4 < q ) ? q - 4 : 0;
7480 q >>= - expDiff;
7481 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7482 expDiff += 52;
7483 if ( expDiff < 0 ) {
7484 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7485 }
7486 else {
7487 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7488 }
7489 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7490 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7491 }
7492 else {
7493 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7494 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7495 }
7496 do {
7497 alternateASig0 = aSig0;
7498 alternateASig1 = aSig1;
7499 ++q;
7500 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 7501 } while ( 0 <= (int64_t) aSig0 );
158142c2 7502 add128(
bb98fe42 7503 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
7504 if ( ( sigMean0 < 0 )
7505 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7506 aSig0 = alternateASig0;
7507 aSig1 = alternateASig1;
7508 }
bb98fe42 7509 zSign = ( (int64_t) aSig0 < 0 );
158142c2 7510 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
7511 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7512 status);
158142c2
FB
7513}
7514
7515/*----------------------------------------------------------------------------
7516| Returns the square root of the quadruple-precision floating-point value `a'.
7517| The operation is performed according to the IEC/IEEE Standard for Binary
7518| Floating-Point Arithmetic.
7519*----------------------------------------------------------------------------*/
7520
e5a41ffa 7521float128 float128_sqrt(float128 a, float_status *status)
158142c2
FB
7522{
7523 flag aSign;
f4014512 7524 int32_t aExp, zExp;
bb98fe42
AF
7525 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7526 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7527
7528 aSig1 = extractFloat128Frac1( a );
7529 aSig0 = extractFloat128Frac0( a );
7530 aExp = extractFloat128Exp( a );
7531 aSign = extractFloat128Sign( a );
7532 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7533 if (aSig0 | aSig1) {
7534 return propagateFloat128NaN(a, a, status);
7535 }
158142c2
FB
7536 if ( ! aSign ) return a;
7537 goto invalid;
7538 }
7539 if ( aSign ) {
7540 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7541 invalid:
ff32e16e 7542 float_raise(float_flag_invalid, status);
af39bc8c 7543 return float128_default_nan(status);
158142c2
FB
7544 }
7545 if ( aExp == 0 ) {
7546 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7547 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7548 }
7549 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
7550 aSig0 |= LIT64( 0x0001000000000000 );
7551 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7552 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7553 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7554 doubleZSig0 = zSig0<<1;
7555 mul64To128( zSig0, zSig0, &term0, &term1 );
7556 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 7557 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7558 --zSig0;
7559 doubleZSig0 -= 2;
7560 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7561 }
7562 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7563 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7564 if ( zSig1 == 0 ) zSig1 = 1;
7565 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7566 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7567 mul64To128( zSig1, zSig1, &term2, &term3 );
7568 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7569 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7570 --zSig1;
7571 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7572 term3 |= 1;
7573 term2 |= doubleZSig0;
7574 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7575 }
7576 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7577 }
7578 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 7579 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7580
7581}
7582
7583/*----------------------------------------------------------------------------
7584| Returns 1 if the quadruple-precision floating-point value `a' is equal to
b689362d
AJ
7585| the corresponding value `b', and 0 otherwise. The invalid exception is
7586| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
7587| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7588*----------------------------------------------------------------------------*/
7589
e5a41ffa 7590int float128_eq(float128 a, float128 b, float_status *status)
158142c2
FB
7591{
7592
7593 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7594 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7595 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7596 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7597 ) {
ff32e16e 7598 float_raise(float_flag_invalid, status);
158142c2
FB
7599 return 0;
7600 }
7601 return
7602 ( a.low == b.low )
7603 && ( ( a.high == b.high )
7604 || ( ( a.low == 0 )
bb98fe42 7605 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
7606 );
7607
7608}
7609
7610/*----------------------------------------------------------------------------
7611| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
7612| or equal to the corresponding value `b', and 0 otherwise. The invalid
7613| exception is raised if either operand is a NaN. The comparison is performed
7614| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
7615*----------------------------------------------------------------------------*/
7616
e5a41ffa 7617int float128_le(float128 a, float128 b, float_status *status)
158142c2
FB
7618{
7619 flag aSign, bSign;
7620
7621 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7622 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7623 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7624 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7625 ) {
ff32e16e 7626 float_raise(float_flag_invalid, status);
158142c2
FB
7627 return 0;
7628 }
7629 aSign = extractFloat128Sign( a );
7630 bSign = extractFloat128Sign( b );
7631 if ( aSign != bSign ) {
7632 return
7633 aSign
bb98fe42 7634 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7635 == 0 );
7636 }
7637 return
7638 aSign ? le128( b.high, b.low, a.high, a.low )
7639 : le128( a.high, a.low, b.high, b.low );
7640
7641}
7642
7643/*----------------------------------------------------------------------------
7644| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
7645| the corresponding value `b', and 0 otherwise. The invalid exception is
7646| raised if either operand is a NaN. The comparison is performed according
7647| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
7648*----------------------------------------------------------------------------*/
7649
e5a41ffa 7650int float128_lt(float128 a, float128 b, float_status *status)
158142c2
FB
7651{
7652 flag aSign, bSign;
7653
7654 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7655 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7656 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7657 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7658 ) {
ff32e16e 7659 float_raise(float_flag_invalid, status);
158142c2
FB
7660 return 0;
7661 }
7662 aSign = extractFloat128Sign( a );
7663 bSign = extractFloat128Sign( b );
7664 if ( aSign != bSign ) {
7665 return
7666 aSign
bb98fe42 7667 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7668 != 0 );
7669 }
7670 return
7671 aSign ? lt128( b.high, b.low, a.high, a.low )
7672 : lt128( a.high, a.low, b.high, b.low );
7673
7674}
7675
67b7861d
AJ
7676/*----------------------------------------------------------------------------
7677| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
f5a64251
AJ
7678| be compared, and 0 otherwise. The invalid exception is raised if either
7679| operand is a NaN. The comparison is performed according to the IEC/IEEE
7680| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
7681*----------------------------------------------------------------------------*/
7682
e5a41ffa 7683int float128_unordered(float128 a, float128 b, float_status *status)
67b7861d
AJ
7684{
7685 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7686 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7687 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7688 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7689 ) {
ff32e16e 7690 float_raise(float_flag_invalid, status);
67b7861d
AJ
7691 return 1;
7692 }
7693 return 0;
7694}
7695
158142c2
FB
7696/*----------------------------------------------------------------------------
7697| Returns 1 if the quadruple-precision floating-point value `a' is equal to
f5a64251
AJ
7698| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
7699| exception. The comparison is performed according to the IEC/IEEE Standard
7700| for Binary Floating-Point Arithmetic.
158142c2
FB
7701*----------------------------------------------------------------------------*/
7702
e5a41ffa 7703int float128_eq_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7704{
7705
7706 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7707 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7708 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7709 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7710 ) {
af39bc8c
AM
7711 if (float128_is_signaling_nan(a, status)
7712 || float128_is_signaling_nan(b, status)) {
ff32e16e 7713 float_raise(float_flag_invalid, status);
b689362d 7714 }
158142c2
FB
7715 return 0;
7716 }
7717 return
7718 ( a.low == b.low )
7719 && ( ( a.high == b.high )
7720 || ( ( a.low == 0 )
bb98fe42 7721 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
7722 );
7723
7724}
7725
7726/*----------------------------------------------------------------------------
7727| Returns 1 if the quadruple-precision floating-point value `a' is less than
7728| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
7729| cause an exception. Otherwise, the comparison is performed according to the
7730| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7731*----------------------------------------------------------------------------*/
7732
e5a41ffa 7733int float128_le_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7734{
7735 flag aSign, bSign;
7736
7737 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7738 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7739 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7740 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7741 ) {
af39bc8c
AM
7742 if (float128_is_signaling_nan(a, status)
7743 || float128_is_signaling_nan(b, status)) {
ff32e16e 7744 float_raise(float_flag_invalid, status);
158142c2
FB
7745 }
7746 return 0;
7747 }
7748 aSign = extractFloat128Sign( a );
7749 bSign = extractFloat128Sign( b );
7750 if ( aSign != bSign ) {
7751 return
7752 aSign
bb98fe42 7753 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7754 == 0 );
7755 }
7756 return
7757 aSign ? le128( b.high, b.low, a.high, a.low )
7758 : le128( a.high, a.low, b.high, b.low );
7759
7760}
7761
7762/*----------------------------------------------------------------------------
7763| Returns 1 if the quadruple-precision floating-point value `a' is less than
7764| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
7765| exception. Otherwise, the comparison is performed according to the IEC/IEEE
7766| Standard for Binary Floating-Point Arithmetic.
7767*----------------------------------------------------------------------------*/
7768
e5a41ffa 7769int float128_lt_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7770{
7771 flag aSign, bSign;
7772
7773 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7774 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7775 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7776 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7777 ) {
af39bc8c
AM
7778 if (float128_is_signaling_nan(a, status)
7779 || float128_is_signaling_nan(b, status)) {
ff32e16e 7780 float_raise(float_flag_invalid, status);
158142c2
FB
7781 }
7782 return 0;
7783 }
7784 aSign = extractFloat128Sign( a );
7785 bSign = extractFloat128Sign( b );
7786 if ( aSign != bSign ) {
7787 return
7788 aSign
bb98fe42 7789 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7790 != 0 );
7791 }
7792 return
7793 aSign ? lt128( b.high, b.low, a.high, a.low )
7794 : lt128( a.high, a.low, b.high, b.low );
7795
7796}
7797
67b7861d
AJ
7798/*----------------------------------------------------------------------------
7799| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
7800| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
7801| comparison is performed according to the IEC/IEEE Standard for Binary
7802| Floating-Point Arithmetic.
7803*----------------------------------------------------------------------------*/
7804
e5a41ffa 7805int float128_unordered_quiet(float128 a, float128 b, float_status *status)
67b7861d
AJ
7806{
7807 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7808 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7809 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7810 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7811 ) {
af39bc8c
AM
7812 if (float128_is_signaling_nan(a, status)
7813 || float128_is_signaling_nan(b, status)) {
ff32e16e 7814 float_raise(float_flag_invalid, status);
67b7861d
AJ
7815 }
7816 return 1;
7817 }
7818 return 0;
7819}
7820
e5a41ffa
PM
7821static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
7822 int is_quiet, float_status *status)
f6714d36
AJ
7823{
7824 flag aSign, bSign;
7825
d1eb8f2a
AD
7826 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7827 float_raise(float_flag_invalid, status);
7828 return float_relation_unordered;
7829 }
f6714d36
AJ
7830 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7831 ( extractFloatx80Frac( a )<<1 ) ) ||
7832 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7833 ( extractFloatx80Frac( b )<<1 ) )) {
7834 if (!is_quiet ||
af39bc8c
AM
7835 floatx80_is_signaling_nan(a, status) ||
7836 floatx80_is_signaling_nan(b, status)) {
ff32e16e 7837 float_raise(float_flag_invalid, status);
f6714d36
AJ
7838 }
7839 return float_relation_unordered;
7840 }
7841 aSign = extractFloatx80Sign( a );
7842 bSign = extractFloatx80Sign( b );
7843 if ( aSign != bSign ) {
7844
7845 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7846 ( ( a.low | b.low ) == 0 ) ) {
7847 /* zero case */
7848 return float_relation_equal;
7849 } else {
7850 return 1 - (2 * aSign);
7851 }
7852 } else {
7853 if (a.low == b.low && a.high == b.high) {
7854 return float_relation_equal;
7855 } else {
7856 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7857 }
7858 }
7859}
7860
e5a41ffa 7861int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 7862{
ff32e16e 7863 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
7864}
7865
e5a41ffa 7866int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
f6714d36 7867{
ff32e16e 7868 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
7869}
7870
e5a41ffa
PM
7871static inline int float128_compare_internal(float128 a, float128 b,
7872 int is_quiet, float_status *status)
1f587329
BS
7873{
7874 flag aSign, bSign;
7875
7876 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7877 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7878 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7879 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7880 if (!is_quiet ||
af39bc8c
AM
7881 float128_is_signaling_nan(a, status) ||
7882 float128_is_signaling_nan(b, status)) {
ff32e16e 7883 float_raise(float_flag_invalid, status);
1f587329
BS
7884 }
7885 return float_relation_unordered;
7886 }
7887 aSign = extractFloat128Sign( a );
7888 bSign = extractFloat128Sign( b );
7889 if ( aSign != bSign ) {
7890 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7891 /* zero case */
7892 return float_relation_equal;
7893 } else {
7894 return 1 - (2 * aSign);
7895 }
7896 } else {
7897 if (a.low == b.low && a.high == b.high) {
7898 return float_relation_equal;
7899 } else {
7900 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7901 }
7902 }
7903}
7904
e5a41ffa 7905int float128_compare(float128 a, float128 b, float_status *status)
1f587329 7906{
ff32e16e 7907 return float128_compare_internal(a, b, 0, status);
1f587329
BS
7908}
7909
e5a41ffa 7910int float128_compare_quiet(float128 a, float128 b, float_status *status)
1f587329 7911{
ff32e16e 7912 return float128_compare_internal(a, b, 1, status);
1f587329
BS
7913}
7914
e5a41ffa 7915floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb
PB
7916{
7917 flag aSign;
326b9e98 7918 int32_t aExp;
bb98fe42 7919 uint64_t aSig;
9ee6e8bb 7920
d1eb8f2a
AD
7921 if (floatx80_invalid_encoding(a)) {
7922 float_raise(float_flag_invalid, status);
7923 return floatx80_default_nan(status);
7924 }
9ee6e8bb
PB
7925 aSig = extractFloatx80Frac( a );
7926 aExp = extractFloatx80Exp( a );
7927 aSign = extractFloatx80Sign( a );
7928
326b9e98
AJ
7929 if ( aExp == 0x7FFF ) {
7930 if ( aSig<<1 ) {
ff32e16e 7931 return propagateFloatx80NaN(a, a, status);
326b9e98 7932 }
9ee6e8bb
PB
7933 return a;
7934 }
326b9e98 7935
3c85c37f
PM
7936 if (aExp == 0) {
7937 if (aSig == 0) {
7938 return a;
7939 }
7940 aExp++;
7941 }
69397542 7942
326b9e98
AJ
7943 if (n > 0x10000) {
7944 n = 0x10000;
7945 } else if (n < -0x10000) {
7946 n = -0x10000;
7947 }
7948
9ee6e8bb 7949 aExp += n;
a2f2d288
PM
7950 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7951 aSign, aExp, aSig, 0, status);
9ee6e8bb 7952}
9ee6e8bb 7953
e5a41ffa 7954float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb
PB
7955{
7956 flag aSign;
326b9e98 7957 int32_t aExp;
bb98fe42 7958 uint64_t aSig0, aSig1;
9ee6e8bb
PB
7959
7960 aSig1 = extractFloat128Frac1( a );
7961 aSig0 = extractFloat128Frac0( a );
7962 aExp = extractFloat128Exp( a );
7963 aSign = extractFloat128Sign( a );
7964 if ( aExp == 0x7FFF ) {
326b9e98 7965 if ( aSig0 | aSig1 ) {
ff32e16e 7966 return propagateFloat128NaN(a, a, status);
326b9e98 7967 }
9ee6e8bb
PB
7968 return a;
7969 }
3c85c37f 7970 if (aExp != 0) {
69397542 7971 aSig0 |= LIT64( 0x0001000000000000 );
3c85c37f 7972 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 7973 return a;
3c85c37f
PM
7974 } else {
7975 aExp++;
7976 }
69397542 7977
326b9e98
AJ
7978 if (n > 0x10000) {
7979 n = 0x10000;
7980 } else if (n < -0x10000) {
7981 n = -0x10000;
7982 }
7983
69397542
PB
7984 aExp += n - 1;
7985 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 7986 , status);
9ee6e8bb
PB
7987
7988}
f6b3b108
EC
7989
7990static void __attribute__((constructor)) softfloat_init(void)
7991{
7992 union_float64 ua, ub, uc, ur;
7993
7994 if (QEMU_NO_HARDFLOAT) {
7995 return;
7996 }
7997 /*
7998 * Test that the host's FMA is not obviously broken. For example,
7999 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
8000 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
8001 */
8002 ua.s = 0x0020000000000001ULL;
8003 ub.s = 0x3ca0000000000000ULL;
8004 uc.s = 0x0020000000000000ULL;
8005 ur.h = fma(ua.h, ub.h, uc.h);
8006 if (ur.s != 0x0020000000000001ULL) {
8007 force_soft_fma = true;
8008 }
8009}
This page took 1.815844 seconds and 4 git commands to generate.