]> Git Repo - qemu.git/blame - include/fpu/softfloat-macros.h
works with less than base ISA qemu-system-riscv32 -M virt -bios none -kernel output...
[qemu.git] / include / fpu / softfloat-macros.h
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support macros
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
16017c48
PM
11 *
12 * Any future contributions to this file after December 1st 2014 will be
13 * taken to be licensed under the Softfloat-2a license unless specifically
14 * indicated otherwise.
8d725fac 15 */
158142c2 16
a7d1ac78
PM
17/*
18===============================================================================
158142c2 19This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
a7d1ac78 20Arithmetic Package, Release 2a.
158142c2
FB
21
22Written by John R. Hauser. This work was made possible in part by the
23International Computer Science Institute, located at Suite 600, 1947 Center
24Street, Berkeley, California 94704. Funding was partially provided by the
25National Science Foundation under grant MIP-9311980. The original version
26of this code was written as part of a project to build a fixed-point vector
27processor in collaboration with the University of California at Berkeley,
28overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 29is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
30arithmetic/SoftFloat.html'.
31
a7d1ac78
PM
32THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
33has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
34TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
35PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
36AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
37
38Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
39(1) they include prominent notice that the work is derivative, and (2) they
40include prominent notice akin to these four paragraphs for those parts of
41this code that are retained.
158142c2 42
a7d1ac78
PM
43===============================================================================
44*/
158142c2 45
16017c48
PM
46/* BSD licensing:
47 * Copyright (c) 2006, Fabrice Bellard
48 * All rights reserved.
49 *
50 * Redistribution and use in source and binary forms, with or without
51 * modification, are permitted provided that the following conditions are met:
52 *
53 * 1. Redistributions of source code must retain the above copyright notice,
54 * this list of conditions and the following disclaimer.
55 *
56 * 2. Redistributions in binary form must reproduce the above copyright notice,
57 * this list of conditions and the following disclaimer in the documentation
58 * and/or other materials provided with the distribution.
59 *
60 * 3. Neither the name of the copyright holder nor the names of its contributors
61 * may be used to endorse or promote products derived from this software without
62 * specific prior written permission.
63 *
64 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
65 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
67 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
68 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
69 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
70 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
71 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
72 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
73 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
74 * THE POSSIBILITY OF SUCH DAMAGE.
75 */
76
f91005e1
MA
77#ifndef FPU_SOFTFLOAT_MACROS_H
78#define FPU_SOFTFLOAT_MACROS_H
79
5937fb63 80#include "fpu/softfloat-types.h"
cb3ad036 81#include "qemu/host-utils.h"
ec150c7e 82
463e45dc
RH
83/**
84 * shl_double: double-word merging left shift
85 * @l: left or most-significant word
86 * @r: right or least-significant word
87 * @c: shift count
88 *
89 * Shift @l left by @c bits, shifting in bits from @r.
90 */
91static inline uint64_t shl_double(uint64_t l, uint64_t r, int c)
92{
93#if defined(__x86_64__)
94 asm("shld %b2, %1, %0" : "+r"(l) : "r"(r), "ci"(c));
95 return l;
96#else
97 return c ? (l << c) | (r >> (64 - c)) : l;
98#endif
99}
100
101/**
102 * shr_double: double-word merging right shift
103 * @l: left or most-significant word
104 * @r: right or least-significant word
105 * @c: shift count
106 *
107 * Shift @r right by @c bits, shifting in bits from @l.
108 */
109static inline uint64_t shr_double(uint64_t l, uint64_t r, int c)
110{
111#if defined(__x86_64__)
112 asm("shrd %b2, %1, %0" : "+r"(r) : "r"(l), "ci"(c));
113 return r;
114#else
115 return c ? (r >> c) | (l << (64 - c)) : r;
116#endif
117}
118
158142c2
FB
119/*----------------------------------------------------------------------------
120| Shifts `a' right by the number of bits given in `count'. If any nonzero
121| bits are shifted off, they are ``jammed'' into the least significant bit of
122| the result by setting the least significant bit to 1. The value of `count'
123| can be arbitrarily large; in particular, if `count' is greater than 32, the
124| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
125| The result is stored in the location pointed to by `zPtr'.
126*----------------------------------------------------------------------------*/
127
07d792d2 128static inline void shift32RightJamming(uint32_t a, int count, uint32_t *zPtr)
158142c2 129{
bb98fe42 130 uint32_t z;
158142c2
FB
131
132 if ( count == 0 ) {
133 z = a;
134 }
135 else if ( count < 32 ) {
136 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
137 }
138 else {
139 z = ( a != 0 );
140 }
141 *zPtr = z;
142
143}
144
145/*----------------------------------------------------------------------------
146| Shifts `a' right by the number of bits given in `count'. If any nonzero
147| bits are shifted off, they are ``jammed'' into the least significant bit of
148| the result by setting the least significant bit to 1. The value of `count'
149| can be arbitrarily large; in particular, if `count' is greater than 64, the
150| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
151| The result is stored in the location pointed to by `zPtr'.
152*----------------------------------------------------------------------------*/
153
07d792d2 154static inline void shift64RightJamming(uint64_t a, int count, uint64_t *zPtr)
158142c2 155{
bb98fe42 156 uint64_t z;
158142c2
FB
157
158 if ( count == 0 ) {
159 z = a;
160 }
161 else if ( count < 64 ) {
162 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
163 }
164 else {
165 z = ( a != 0 );
166 }
167 *zPtr = z;
168
169}
170
171/*----------------------------------------------------------------------------
172| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
173| _plus_ the number of bits given in `count'. The shifted result is at most
174| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
175| bits shifted off form a second 64-bit result as follows: The _last_ bit
176| shifted off is the most-significant bit of the extra result, and the other
177| 63 bits of the extra result are all zero if and only if _all_but_the_last_
178| bits shifted off were all zero. This extra result is stored in the location
179| pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
a7d1ac78
PM
180| (This routine makes more sense if `a0' and `a1' are considered to form a
181| fixed-point value with binary point between `a0' and `a1'. This fixed-point
182| value is shifted right by the number of bits given in `count', and the
183| integer part of the result is returned at the location pointed to by
158142c2
FB
184| `z0Ptr'. The fractional part of the result may be slightly corrupted as
185| described above, and is returned at the location pointed to by `z1Ptr'.)
186*----------------------------------------------------------------------------*/
187
a49db98d 188static inline void
158142c2 189 shift64ExtraRightJamming(
07d792d2 190 uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
158142c2 191{
bb98fe42 192 uint64_t z0, z1;
8f506c70 193 int8_t negCount = ( - count ) & 63;
158142c2
FB
194
195 if ( count == 0 ) {
196 z1 = a1;
197 z0 = a0;
198 }
199 else if ( count < 64 ) {
200 z1 = ( a0<<negCount ) | ( a1 != 0 );
201 z0 = a0>>count;
202 }
203 else {
204 if ( count == 64 ) {
205 z1 = a0 | ( a1 != 0 );
206 }
207 else {
208 z1 = ( ( a0 | a1 ) != 0 );
209 }
210 z0 = 0;
211 }
212 *z1Ptr = z1;
213 *z0Ptr = z0;
214
215}
216
217/*----------------------------------------------------------------------------
218| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
219| number of bits given in `count'. Any bits shifted off are lost. The value
220| of `count' can be arbitrarily large; in particular, if `count' is greater
221| than 128, the result will be 0. The result is broken into two 64-bit pieces
222| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
223*----------------------------------------------------------------------------*/
224
a49db98d 225static inline void
158142c2 226 shift128Right(
07d792d2 227 uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
158142c2 228{
bb98fe42 229 uint64_t z0, z1;
8f506c70 230 int8_t negCount = ( - count ) & 63;
158142c2
FB
231
232 if ( count == 0 ) {
233 z1 = a1;
234 z0 = a0;
235 }
236 else if ( count < 64 ) {
237 z1 = ( a0<<negCount ) | ( a1>>count );
238 z0 = a0>>count;
239 }
240 else {
4039736e 241 z1 = (count < 128) ? (a0 >> (count & 63)) : 0;
158142c2
FB
242 z0 = 0;
243 }
244 *z1Ptr = z1;
245 *z0Ptr = z0;
246
247}
248
249/*----------------------------------------------------------------------------
250| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
251| number of bits given in `count'. If any nonzero bits are shifted off, they
252| are ``jammed'' into the least significant bit of the result by setting the
253| least significant bit to 1. The value of `count' can be arbitrarily large;
254| in particular, if `count' is greater than 128, the result will be either
255| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
256| nonzero. The result is broken into two 64-bit pieces which are stored at
257| the locations pointed to by `z0Ptr' and `z1Ptr'.
258*----------------------------------------------------------------------------*/
259
a49db98d 260static inline void
158142c2 261 shift128RightJamming(
07d792d2 262 uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
158142c2 263{
bb98fe42 264 uint64_t z0, z1;
8f506c70 265 int8_t negCount = ( - count ) & 63;
158142c2
FB
266
267 if ( count == 0 ) {
268 z1 = a1;
269 z0 = a0;
270 }
271 else if ( count < 64 ) {
272 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
273 z0 = a0>>count;
274 }
275 else {
276 if ( count == 64 ) {
277 z1 = a0 | ( a1 != 0 );
278 }
279 else if ( count < 128 ) {
280 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
281 }
282 else {
283 z1 = ( ( a0 | a1 ) != 0 );
284 }
285 z0 = 0;
286 }
287 *z1Ptr = z1;
288 *z0Ptr = z0;
289
290}
291
292/*----------------------------------------------------------------------------
293| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
294| by 64 _plus_ the number of bits given in `count'. The shifted result is
295| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
296| stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
297| off form a third 64-bit result as follows: The _last_ bit shifted off is
298| the most-significant bit of the extra result, and the other 63 bits of the
299| extra result are all zero if and only if _all_but_the_last_ bits shifted off
300| were all zero. This extra result is stored in the location pointed to by
301| `z2Ptr'. The value of `count' can be arbitrarily large.
302| (This routine makes more sense if `a0', `a1', and `a2' are considered
303| to form a fixed-point value with binary point between `a1' and `a2'. This
304| fixed-point value is shifted right by the number of bits given in `count',
305| and the integer part of the result is returned at the locations pointed to
306| by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
307| corrupted as described above, and is returned at the location pointed to by
308| `z2Ptr'.)
309*----------------------------------------------------------------------------*/
310
a49db98d 311static inline void
158142c2 312 shift128ExtraRightJamming(
bb98fe42
AF
313 uint64_t a0,
314 uint64_t a1,
315 uint64_t a2,
07d792d2 316 int count,
bb98fe42
AF
317 uint64_t *z0Ptr,
318 uint64_t *z1Ptr,
319 uint64_t *z2Ptr
158142c2
FB
320 )
321{
bb98fe42 322 uint64_t z0, z1, z2;
8f506c70 323 int8_t negCount = ( - count ) & 63;
158142c2
FB
324
325 if ( count == 0 ) {
326 z2 = a2;
327 z1 = a1;
328 z0 = a0;
329 }
330 else {
331 if ( count < 64 ) {
332 z2 = a1<<negCount;
333 z1 = ( a0<<negCount ) | ( a1>>count );
334 z0 = a0>>count;
335 }
336 else {
337 if ( count == 64 ) {
338 z2 = a1;
339 z1 = a0;
340 }
341 else {
342 a2 |= a1;
343 if ( count < 128 ) {
344 z2 = a0<<negCount;
345 z1 = a0>>( count & 63 );
346 }
347 else {
348 z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
349 z1 = 0;
350 }
351 }
352 z0 = 0;
353 }
354 z2 |= ( a2 != 0 );
355 }
356 *z2Ptr = z2;
357 *z1Ptr = z1;
358 *z0Ptr = z0;
359
360}
361
362/*----------------------------------------------------------------------------
363| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
364| number of bits given in `count'. Any bits shifted off are lost. The value
365| of `count' must be less than 64. The result is broken into two 64-bit
366| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
367*----------------------------------------------------------------------------*/
368
5dfbc9e4
RH
369static inline void shortShift128Left(uint64_t a0, uint64_t a1, int count,
370 uint64_t *z0Ptr, uint64_t *z1Ptr)
158142c2 371{
5dfbc9e4
RH
372 *z1Ptr = a1 << count;
373 *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63));
374}
158142c2 375
5dfbc9e4
RH
376/*----------------------------------------------------------------------------
377| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
378| number of bits given in `count'. Any bits shifted off are lost. The value
379| of `count' may be greater than 64. The result is broken into two 64-bit
380| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
381*----------------------------------------------------------------------------*/
158142c2 382
5dfbc9e4
RH
383static inline void shift128Left(uint64_t a0, uint64_t a1, int count,
384 uint64_t *z0Ptr, uint64_t *z1Ptr)
385{
386 if (count < 64) {
387 *z1Ptr = a1 << count;
388 *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63));
389 } else {
390 *z1Ptr = 0;
391 *z0Ptr = a1 << (count - 64);
392 }
158142c2
FB
393}
394
395/*----------------------------------------------------------------------------
396| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
397| by the number of bits given in `count'. Any bits shifted off are lost.
398| The value of `count' must be less than 64. The result is broken into three
399| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
400| `z1Ptr', and `z2Ptr'.
401*----------------------------------------------------------------------------*/
402
a49db98d 403static inline void
158142c2 404 shortShift192Left(
bb98fe42
AF
405 uint64_t a0,
406 uint64_t a1,
407 uint64_t a2,
07d792d2 408 int count,
bb98fe42
AF
409 uint64_t *z0Ptr,
410 uint64_t *z1Ptr,
411 uint64_t *z2Ptr
158142c2
FB
412 )
413{
bb98fe42 414 uint64_t z0, z1, z2;
8f506c70 415 int8_t negCount;
158142c2
FB
416
417 z2 = a2<<count;
418 z1 = a1<<count;
419 z0 = a0<<count;
420 if ( 0 < count ) {
421 negCount = ( ( - count ) & 63 );
422 z1 |= a2>>negCount;
423 z0 |= a1>>negCount;
424 }
425 *z2Ptr = z2;
426 *z1Ptr = z1;
427 *z0Ptr = z0;
428
429}
430
431/*----------------------------------------------------------------------------
432| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
433| value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
434| any carry out is lost. The result is broken into two 64-bit pieces which
435| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
436*----------------------------------------------------------------------------*/
437
cb3ad036
RH
438static inline void add128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
439 uint64_t *z0Ptr, uint64_t *z1Ptr)
158142c2 440{
cb3ad036
RH
441 bool c = 0;
442 *z1Ptr = uadd64_carry(a1, b1, &c);
443 *z0Ptr = uadd64_carry(a0, b0, &c);
158142c2
FB
444}
445
446/*----------------------------------------------------------------------------
447| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
448| 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
449| modulo 2^192, so any carry out is lost. The result is broken into three
450| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
451| `z1Ptr', and `z2Ptr'.
452*----------------------------------------------------------------------------*/
453
cb3ad036
RH
454static inline void add192(uint64_t a0, uint64_t a1, uint64_t a2,
455 uint64_t b0, uint64_t b1, uint64_t b2,
456 uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
158142c2 457{
cb3ad036
RH
458 bool c = 0;
459 *z2Ptr = uadd64_carry(a2, b2, &c);
460 *z1Ptr = uadd64_carry(a1, b1, &c);
461 *z0Ptr = uadd64_carry(a0, b0, &c);
158142c2
FB
462}
463
464/*----------------------------------------------------------------------------
465| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
466| 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
467| 2^128, so any borrow out (carry out) is lost. The result is broken into two
468| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
469| `z1Ptr'.
470*----------------------------------------------------------------------------*/
471
cb3ad036
RH
472static inline void sub128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
473 uint64_t *z0Ptr, uint64_t *z1Ptr)
158142c2 474{
cb3ad036
RH
475 bool c = 0;
476 *z1Ptr = usub64_borrow(a1, b1, &c);
477 *z0Ptr = usub64_borrow(a0, b0, &c);
158142c2
FB
478}
479
480/*----------------------------------------------------------------------------
481| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
482| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
483| Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
484| result is broken into three 64-bit pieces which are stored at the locations
485| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
486*----------------------------------------------------------------------------*/
487
cb3ad036
RH
488static inline void sub192(uint64_t a0, uint64_t a1, uint64_t a2,
489 uint64_t b0, uint64_t b1, uint64_t b2,
490 uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
158142c2 491{
cb3ad036
RH
492 bool c = 0;
493 *z2Ptr = usub64_borrow(a2, b2, &c);
494 *z1Ptr = usub64_borrow(a1, b1, &c);
495 *z0Ptr = usub64_borrow(a0, b0, &c);
158142c2
FB
496}
497
498/*----------------------------------------------------------------------------
499| Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
500| into two 64-bit pieces which are stored at the locations pointed to by
501| `z0Ptr' and `z1Ptr'.
502*----------------------------------------------------------------------------*/
503
b4d09b17
RH
504static inline void
505mul64To128(uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr)
158142c2 506{
b4d09b17 507 mulu64(z1Ptr, z0Ptr, a, b);
158142c2
FB
508}
509
510/*----------------------------------------------------------------------------
511| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
512| `b' to obtain a 192-bit product. The product is broken into three 64-bit
513| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
514| `z2Ptr'.
515*----------------------------------------------------------------------------*/
516
a49db98d 517static inline void
5ffb6bd9
RH
518mul128By64To192(uint64_t a0, uint64_t a1, uint64_t b,
519 uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
158142c2 520{
5ffb6bd9 521 uint64_t z0, z1, m1;
158142c2 522
5ffb6bd9
RH
523 mul64To128(a1, b, &m1, z2Ptr);
524 mul64To128(a0, b, &z0, &z1);
525 add128(z0, z1, 0, m1, z0Ptr, z1Ptr);
158142c2
FB
526}
527
528/*----------------------------------------------------------------------------
529| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
530| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
531| product. The product is broken into four 64-bit pieces which are stored at
532| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
533*----------------------------------------------------------------------------*/
534
cd55a56e
RH
535static inline void mul128To256(uint64_t a0, uint64_t a1,
536 uint64_t b0, uint64_t b1,
537 uint64_t *z0Ptr, uint64_t *z1Ptr,
538 uint64_t *z2Ptr, uint64_t *z3Ptr)
158142c2 539{
cd55a56e
RH
540 uint64_t z0, z1, z2;
541 uint64_t m0, m1, m2, n1, n2;
158142c2 542
cd55a56e
RH
543 mul64To128(a1, b0, &m1, &m2);
544 mul64To128(a0, b1, &n1, &n2);
545 mul64To128(a1, b1, &z2, z3Ptr);
546 mul64To128(a0, b0, &z0, &z1);
158142c2 547
cd55a56e
RH
548 add192( 0, m1, m2, 0, n1, n2, &m0, &m1, &m2);
549 add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr);
158142c2
FB
550}
551
552/*----------------------------------------------------------------------------
553| Returns an approximation to the 64-bit integer quotient obtained by dividing
554| `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
555| divisor `b' must be at least 2^63. If q is the exact quotient truncated
556| toward zero, the approximation returned lies between q and q + 2 inclusive.
557| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
558| unsigned integer is returned.
559*----------------------------------------------------------------------------*/
560
88857aca 561static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
158142c2 562{
bb98fe42
AF
563 uint64_t b0, b1;
564 uint64_t rem0, rem1, term0, term1;
565 uint64_t z;
158142c2 566
e9321124 567 if ( b <= a0 ) return UINT64_C(0xFFFFFFFFFFFFFFFF);
158142c2 568 b0 = b>>32;
e9321124 569 z = ( b0<<32 <= a0 ) ? UINT64_C(0xFFFFFFFF00000000) : ( a0 / b0 )<<32;
158142c2
FB
570 mul64To128( b, z, &term0, &term1 );
571 sub128( a0, a1, term0, term1, &rem0, &rem1 );
bb98fe42 572 while ( ( (int64_t) rem0 ) < 0 ) {
e9321124 573 z -= UINT64_C(0x100000000);
158142c2
FB
574 b1 = b<<32;
575 add128( rem0, rem1, b0, b1, &rem0, &rem1 );
576 }
577 rem0 = ( rem0<<32 ) | ( rem1>>32 );
578 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
579 return z;
580
581}
582
583/*----------------------------------------------------------------------------
584| Returns an approximation to the square root of the 32-bit significand given
585| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
586| `aExp' (the least significant bit) is 1, the integer returned approximates
587| 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
588| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
589| case, the approximation returned lies strictly within +/-2 of the exact
590| value.
591*----------------------------------------------------------------------------*/
592
88857aca 593static inline uint32_t estimateSqrt32(int aExp, uint32_t a)
158142c2 594{
bb98fe42 595 static const uint16_t sqrtOddAdjustments[] = {
158142c2
FB
596 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
597 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
598 };
bb98fe42 599 static const uint16_t sqrtEvenAdjustments[] = {
158142c2
FB
600 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
601 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
602 };
8f506c70 603 int8_t index;
bb98fe42 604 uint32_t z;
158142c2
FB
605
606 index = ( a>>27 ) & 15;
607 if ( aExp & 1 ) {
3f4cb3d3 608 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
158142c2
FB
609 z = ( ( a / z )<<14 ) + ( z<<15 );
610 a >>= 1;
611 }
612 else {
3f4cb3d3 613 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
158142c2
FB
614 z = a / z + z;
615 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
bb98fe42 616 if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
158142c2 617 }
bb98fe42 618 return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
158142c2
FB
619
620}
621
158142c2
FB
622/*----------------------------------------------------------------------------
623| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
624| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
625| Otherwise, returns 0.
626*----------------------------------------------------------------------------*/
627
c120391c 628static inline bool eq128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
158142c2 629{
c120391c 630 return a0 == b0 && a1 == b1;
158142c2
FB
631}
632
633/*----------------------------------------------------------------------------
634| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
635| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
636| Otherwise, returns 0.
637*----------------------------------------------------------------------------*/
638
c120391c 639static inline bool le128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
158142c2 640{
c120391c 641 return a0 < b0 || (a0 == b0 && a1 <= b1);
158142c2
FB
642}
643
644/*----------------------------------------------------------------------------
645| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
646| than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
647| returns 0.
648*----------------------------------------------------------------------------*/
649
c120391c 650static inline bool lt128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
158142c2 651{
c120391c 652 return a0 < b0 || (a0 == b0 && a1 < b1);
158142c2
FB
653}
654
655/*----------------------------------------------------------------------------
656| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
657| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
658| Otherwise, returns 0.
659*----------------------------------------------------------------------------*/
660
c120391c 661static inline bool ne128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
158142c2 662{
c120391c 663 return a0 != b0 || a1 != b1;
158142c2 664}
f91005e1 665
feaf2e9c
RH
666/*
667 * Similarly, comparisons of 192-bit values.
668 */
669
670static inline bool eq192(uint64_t a0, uint64_t a1, uint64_t a2,
671 uint64_t b0, uint64_t b1, uint64_t b2)
672{
673 return ((a0 ^ b0) | (a1 ^ b1) | (a2 ^ b2)) == 0;
674}
675
676static inline bool le192(uint64_t a0, uint64_t a1, uint64_t a2,
677 uint64_t b0, uint64_t b1, uint64_t b2)
678{
679 if (a0 != b0) {
680 return a0 < b0;
681 }
682 if (a1 != b1) {
683 return a1 < b1;
684 }
685 return a2 <= b2;
686}
687
688static inline bool lt192(uint64_t a0, uint64_t a1, uint64_t a2,
689 uint64_t b0, uint64_t b1, uint64_t b2)
690{
691 if (a0 != b0) {
692 return a0 < b0;
693 }
694 if (a1 != b1) {
695 return a1 < b1;
696 }
697 return a2 < b2;
698}
699
f91005e1 700#endif
This page took 0.913252 seconds and 5 git commands to generate.