]>
Commit | Line | Data |
---|---|---|
8d725fac AF |
1 | /* |
2 | * QEMU float support macros | |
3 | * | |
16017c48 PM |
4 | * The code in this source file is derived from release 2a of the SoftFloat |
5 | * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and | |
6 | * some later contributions) are provided under that license, as detailed below. | |
7 | * It has subsequently been modified by contributors to the QEMU Project, | |
8 | * so some portions are provided under: | |
9 | * the SoftFloat-2a license | |
10 | * the BSD license | |
16017c48 PM |
11 | * |
12 | * Any future contributions to this file after December 1st 2014 will be | |
13 | * taken to be licensed under the Softfloat-2a license unless specifically | |
14 | * indicated otherwise. | |
8d725fac | 15 | */ |
158142c2 | 16 | |
a7d1ac78 PM |
17 | /* |
18 | =============================================================================== | |
158142c2 | 19 | This C source fragment is part of the SoftFloat IEC/IEEE Floating-point |
a7d1ac78 | 20 | Arithmetic Package, Release 2a. |
158142c2 FB |
21 | |
22 | Written by John R. Hauser. This work was made possible in part by the | |
23 | International Computer Science Institute, located at Suite 600, 1947 Center | |
24 | Street, Berkeley, California 94704. Funding was partially provided by the | |
25 | National Science Foundation under grant MIP-9311980. The original version | |
26 | of this code was written as part of a project to build a fixed-point vector | |
27 | processor in collaboration with the University of California at Berkeley, | |
28 | overseen by Profs. Nelson Morgan and John Wawrzynek. More information | |
a7d1ac78 | 29 | is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ |
158142c2 FB |
30 | arithmetic/SoftFloat.html'. |
31 | ||
a7d1ac78 PM |
32 | THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort |
33 | has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT | |
34 | TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO | |
35 | PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY | |
36 | AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. | |
158142c2 FB |
37 | |
38 | Derivative works are acceptable, even for commercial purposes, so long as | |
a7d1ac78 PM |
39 | (1) they include prominent notice that the work is derivative, and (2) they |
40 | include prominent notice akin to these four paragraphs for those parts of | |
41 | this code that are retained. | |
158142c2 | 42 | |
a7d1ac78 PM |
43 | =============================================================================== |
44 | */ | |
158142c2 | 45 | |
16017c48 PM |
46 | /* BSD licensing: |
47 | * Copyright (c) 2006, Fabrice Bellard | |
48 | * All rights reserved. | |
49 | * | |
50 | * Redistribution and use in source and binary forms, with or without | |
51 | * modification, are permitted provided that the following conditions are met: | |
52 | * | |
53 | * 1. Redistributions of source code must retain the above copyright notice, | |
54 | * this list of conditions and the following disclaimer. | |
55 | * | |
56 | * 2. Redistributions in binary form must reproduce the above copyright notice, | |
57 | * this list of conditions and the following disclaimer in the documentation | |
58 | * and/or other materials provided with the distribution. | |
59 | * | |
60 | * 3. Neither the name of the copyright holder nor the names of its contributors | |
61 | * may be used to endorse or promote products derived from this software without | |
62 | * specific prior written permission. | |
63 | * | |
64 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
65 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
66 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
67 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | |
68 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
69 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
70 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
71 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
72 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
73 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF | |
74 | * THE POSSIBILITY OF SUCH DAMAGE. | |
75 | */ | |
76 | ||
f91005e1 MA |
77 | #ifndef FPU_SOFTFLOAT_MACROS_H |
78 | #define FPU_SOFTFLOAT_MACROS_H | |
79 | ||
5937fb63 | 80 | #include "fpu/softfloat-types.h" |
cb3ad036 | 81 | #include "qemu/host-utils.h" |
ec150c7e | 82 | |
463e45dc RH |
83 | /** |
84 | * shl_double: double-word merging left shift | |
85 | * @l: left or most-significant word | |
86 | * @r: right or least-significant word | |
87 | * @c: shift count | |
88 | * | |
89 | * Shift @l left by @c bits, shifting in bits from @r. | |
90 | */ | |
91 | static inline uint64_t shl_double(uint64_t l, uint64_t r, int c) | |
92 | { | |
93 | #if defined(__x86_64__) | |
94 | asm("shld %b2, %1, %0" : "+r"(l) : "r"(r), "ci"(c)); | |
95 | return l; | |
96 | #else | |
97 | return c ? (l << c) | (r >> (64 - c)) : l; | |
98 | #endif | |
99 | } | |
100 | ||
101 | /** | |
102 | * shr_double: double-word merging right shift | |
103 | * @l: left or most-significant word | |
104 | * @r: right or least-significant word | |
105 | * @c: shift count | |
106 | * | |
107 | * Shift @r right by @c bits, shifting in bits from @l. | |
108 | */ | |
109 | static inline uint64_t shr_double(uint64_t l, uint64_t r, int c) | |
110 | { | |
111 | #if defined(__x86_64__) | |
112 | asm("shrd %b2, %1, %0" : "+r"(r) : "r"(l), "ci"(c)); | |
113 | return r; | |
114 | #else | |
115 | return c ? (r >> c) | (l << (64 - c)) : r; | |
116 | #endif | |
117 | } | |
118 | ||
158142c2 FB |
119 | /*---------------------------------------------------------------------------- |
120 | | Shifts `a' right by the number of bits given in `count'. If any nonzero | |
121 | | bits are shifted off, they are ``jammed'' into the least significant bit of | |
122 | | the result by setting the least significant bit to 1. The value of `count' | |
123 | | can be arbitrarily large; in particular, if `count' is greater than 32, the | |
124 | | result will be either 0 or 1, depending on whether `a' is zero or nonzero. | |
125 | | The result is stored in the location pointed to by `zPtr'. | |
126 | *----------------------------------------------------------------------------*/ | |
127 | ||
07d792d2 | 128 | static inline void shift32RightJamming(uint32_t a, int count, uint32_t *zPtr) |
158142c2 | 129 | { |
bb98fe42 | 130 | uint32_t z; |
158142c2 FB |
131 | |
132 | if ( count == 0 ) { | |
133 | z = a; | |
134 | } | |
135 | else if ( count < 32 ) { | |
136 | z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); | |
137 | } | |
138 | else { | |
139 | z = ( a != 0 ); | |
140 | } | |
141 | *zPtr = z; | |
142 | ||
143 | } | |
144 | ||
145 | /*---------------------------------------------------------------------------- | |
146 | | Shifts `a' right by the number of bits given in `count'. If any nonzero | |
147 | | bits are shifted off, they are ``jammed'' into the least significant bit of | |
148 | | the result by setting the least significant bit to 1. The value of `count' | |
149 | | can be arbitrarily large; in particular, if `count' is greater than 64, the | |
150 | | result will be either 0 or 1, depending on whether `a' is zero or nonzero. | |
151 | | The result is stored in the location pointed to by `zPtr'. | |
152 | *----------------------------------------------------------------------------*/ | |
153 | ||
07d792d2 | 154 | static inline void shift64RightJamming(uint64_t a, int count, uint64_t *zPtr) |
158142c2 | 155 | { |
bb98fe42 | 156 | uint64_t z; |
158142c2 FB |
157 | |
158 | if ( count == 0 ) { | |
159 | z = a; | |
160 | } | |
161 | else if ( count < 64 ) { | |
162 | z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); | |
163 | } | |
164 | else { | |
165 | z = ( a != 0 ); | |
166 | } | |
167 | *zPtr = z; | |
168 | ||
169 | } | |
170 | ||
171 | /*---------------------------------------------------------------------------- | |
172 | | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 | |
173 | | _plus_ the number of bits given in `count'. The shifted result is at most | |
174 | | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The | |
175 | | bits shifted off form a second 64-bit result as follows: The _last_ bit | |
176 | | shifted off is the most-significant bit of the extra result, and the other | |
177 | | 63 bits of the extra result are all zero if and only if _all_but_the_last_ | |
178 | | bits shifted off were all zero. This extra result is stored in the location | |
179 | | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. | |
a7d1ac78 PM |
180 | | (This routine makes more sense if `a0' and `a1' are considered to form a |
181 | | fixed-point value with binary point between `a0' and `a1'. This fixed-point | |
182 | | value is shifted right by the number of bits given in `count', and the | |
183 | | integer part of the result is returned at the location pointed to by | |
158142c2 FB |
184 | | `z0Ptr'. The fractional part of the result may be slightly corrupted as |
185 | | described above, and is returned at the location pointed to by `z1Ptr'.) | |
186 | *----------------------------------------------------------------------------*/ | |
187 | ||
a49db98d | 188 | static inline void |
158142c2 | 189 | shift64ExtraRightJamming( |
07d792d2 | 190 | uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) |
158142c2 | 191 | { |
bb98fe42 | 192 | uint64_t z0, z1; |
8f506c70 | 193 | int8_t negCount = ( - count ) & 63; |
158142c2 FB |
194 | |
195 | if ( count == 0 ) { | |
196 | z1 = a1; | |
197 | z0 = a0; | |
198 | } | |
199 | else if ( count < 64 ) { | |
200 | z1 = ( a0<<negCount ) | ( a1 != 0 ); | |
201 | z0 = a0>>count; | |
202 | } | |
203 | else { | |
204 | if ( count == 64 ) { | |
205 | z1 = a0 | ( a1 != 0 ); | |
206 | } | |
207 | else { | |
208 | z1 = ( ( a0 | a1 ) != 0 ); | |
209 | } | |
210 | z0 = 0; | |
211 | } | |
212 | *z1Ptr = z1; | |
213 | *z0Ptr = z0; | |
214 | ||
215 | } | |
216 | ||
217 | /*---------------------------------------------------------------------------- | |
218 | | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the | |
219 | | number of bits given in `count'. Any bits shifted off are lost. The value | |
220 | | of `count' can be arbitrarily large; in particular, if `count' is greater | |
221 | | than 128, the result will be 0. The result is broken into two 64-bit pieces | |
222 | | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. | |
223 | *----------------------------------------------------------------------------*/ | |
224 | ||
a49db98d | 225 | static inline void |
158142c2 | 226 | shift128Right( |
07d792d2 | 227 | uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) |
158142c2 | 228 | { |
bb98fe42 | 229 | uint64_t z0, z1; |
8f506c70 | 230 | int8_t negCount = ( - count ) & 63; |
158142c2 FB |
231 | |
232 | if ( count == 0 ) { | |
233 | z1 = a1; | |
234 | z0 = a0; | |
235 | } | |
236 | else if ( count < 64 ) { | |
237 | z1 = ( a0<<negCount ) | ( a1>>count ); | |
238 | z0 = a0>>count; | |
239 | } | |
240 | else { | |
4039736e | 241 | z1 = (count < 128) ? (a0 >> (count & 63)) : 0; |
158142c2 FB |
242 | z0 = 0; |
243 | } | |
244 | *z1Ptr = z1; | |
245 | *z0Ptr = z0; | |
246 | ||
247 | } | |
248 | ||
249 | /*---------------------------------------------------------------------------- | |
250 | | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the | |
251 | | number of bits given in `count'. If any nonzero bits are shifted off, they | |
252 | | are ``jammed'' into the least significant bit of the result by setting the | |
253 | | least significant bit to 1. The value of `count' can be arbitrarily large; | |
254 | | in particular, if `count' is greater than 128, the result will be either | |
255 | | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or | |
256 | | nonzero. The result is broken into two 64-bit pieces which are stored at | |
257 | | the locations pointed to by `z0Ptr' and `z1Ptr'. | |
258 | *----------------------------------------------------------------------------*/ | |
259 | ||
a49db98d | 260 | static inline void |
158142c2 | 261 | shift128RightJamming( |
07d792d2 | 262 | uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) |
158142c2 | 263 | { |
bb98fe42 | 264 | uint64_t z0, z1; |
8f506c70 | 265 | int8_t negCount = ( - count ) & 63; |
158142c2 FB |
266 | |
267 | if ( count == 0 ) { | |
268 | z1 = a1; | |
269 | z0 = a0; | |
270 | } | |
271 | else if ( count < 64 ) { | |
272 | z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); | |
273 | z0 = a0>>count; | |
274 | } | |
275 | else { | |
276 | if ( count == 64 ) { | |
277 | z1 = a0 | ( a1 != 0 ); | |
278 | } | |
279 | else if ( count < 128 ) { | |
280 | z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); | |
281 | } | |
282 | else { | |
283 | z1 = ( ( a0 | a1 ) != 0 ); | |
284 | } | |
285 | z0 = 0; | |
286 | } | |
287 | *z1Ptr = z1; | |
288 | *z0Ptr = z0; | |
289 | ||
290 | } | |
291 | ||
292 | /*---------------------------------------------------------------------------- | |
293 | | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right | |
294 | | by 64 _plus_ the number of bits given in `count'. The shifted result is | |
295 | | at most 128 nonzero bits; these are broken into two 64-bit pieces which are | |
296 | | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted | |
297 | | off form a third 64-bit result as follows: The _last_ bit shifted off is | |
298 | | the most-significant bit of the extra result, and the other 63 bits of the | |
299 | | extra result are all zero if and only if _all_but_the_last_ bits shifted off | |
300 | | were all zero. This extra result is stored in the location pointed to by | |
301 | | `z2Ptr'. The value of `count' can be arbitrarily large. | |
302 | | (This routine makes more sense if `a0', `a1', and `a2' are considered | |
303 | | to form a fixed-point value with binary point between `a1' and `a2'. This | |
304 | | fixed-point value is shifted right by the number of bits given in `count', | |
305 | | and the integer part of the result is returned at the locations pointed to | |
306 | | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly | |
307 | | corrupted as described above, and is returned at the location pointed to by | |
308 | | `z2Ptr'.) | |
309 | *----------------------------------------------------------------------------*/ | |
310 | ||
a49db98d | 311 | static inline void |
158142c2 | 312 | shift128ExtraRightJamming( |
bb98fe42 AF |
313 | uint64_t a0, |
314 | uint64_t a1, | |
315 | uint64_t a2, | |
07d792d2 | 316 | int count, |
bb98fe42 AF |
317 | uint64_t *z0Ptr, |
318 | uint64_t *z1Ptr, | |
319 | uint64_t *z2Ptr | |
158142c2 FB |
320 | ) |
321 | { | |
bb98fe42 | 322 | uint64_t z0, z1, z2; |
8f506c70 | 323 | int8_t negCount = ( - count ) & 63; |
158142c2 FB |
324 | |
325 | if ( count == 0 ) { | |
326 | z2 = a2; | |
327 | z1 = a1; | |
328 | z0 = a0; | |
329 | } | |
330 | else { | |
331 | if ( count < 64 ) { | |
332 | z2 = a1<<negCount; | |
333 | z1 = ( a0<<negCount ) | ( a1>>count ); | |
334 | z0 = a0>>count; | |
335 | } | |
336 | else { | |
337 | if ( count == 64 ) { | |
338 | z2 = a1; | |
339 | z1 = a0; | |
340 | } | |
341 | else { | |
342 | a2 |= a1; | |
343 | if ( count < 128 ) { | |
344 | z2 = a0<<negCount; | |
345 | z1 = a0>>( count & 63 ); | |
346 | } | |
347 | else { | |
348 | z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); | |
349 | z1 = 0; | |
350 | } | |
351 | } | |
352 | z0 = 0; | |
353 | } | |
354 | z2 |= ( a2 != 0 ); | |
355 | } | |
356 | *z2Ptr = z2; | |
357 | *z1Ptr = z1; | |
358 | *z0Ptr = z0; | |
359 | ||
360 | } | |
361 | ||
362 | /*---------------------------------------------------------------------------- | |
363 | | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the | |
364 | | number of bits given in `count'. Any bits shifted off are lost. The value | |
365 | | of `count' must be less than 64. The result is broken into two 64-bit | |
366 | | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. | |
367 | *----------------------------------------------------------------------------*/ | |
368 | ||
5dfbc9e4 RH |
369 | static inline void shortShift128Left(uint64_t a0, uint64_t a1, int count, |
370 | uint64_t *z0Ptr, uint64_t *z1Ptr) | |
158142c2 | 371 | { |
5dfbc9e4 RH |
372 | *z1Ptr = a1 << count; |
373 | *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63)); | |
374 | } | |
158142c2 | 375 | |
5dfbc9e4 RH |
376 | /*---------------------------------------------------------------------------- |
377 | | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the | |
378 | | number of bits given in `count'. Any bits shifted off are lost. The value | |
379 | | of `count' may be greater than 64. The result is broken into two 64-bit | |
380 | | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. | |
381 | *----------------------------------------------------------------------------*/ | |
158142c2 | 382 | |
5dfbc9e4 RH |
383 | static inline void shift128Left(uint64_t a0, uint64_t a1, int count, |
384 | uint64_t *z0Ptr, uint64_t *z1Ptr) | |
385 | { | |
386 | if (count < 64) { | |
387 | *z1Ptr = a1 << count; | |
388 | *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63)); | |
389 | } else { | |
390 | *z1Ptr = 0; | |
391 | *z0Ptr = a1 << (count - 64); | |
392 | } | |
158142c2 FB |
393 | } |
394 | ||
395 | /*---------------------------------------------------------------------------- | |
396 | | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left | |
397 | | by the number of bits given in `count'. Any bits shifted off are lost. | |
398 | | The value of `count' must be less than 64. The result is broken into three | |
399 | | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', | |
400 | | `z1Ptr', and `z2Ptr'. | |
401 | *----------------------------------------------------------------------------*/ | |
402 | ||
a49db98d | 403 | static inline void |
158142c2 | 404 | shortShift192Left( |
bb98fe42 AF |
405 | uint64_t a0, |
406 | uint64_t a1, | |
407 | uint64_t a2, | |
07d792d2 | 408 | int count, |
bb98fe42 AF |
409 | uint64_t *z0Ptr, |
410 | uint64_t *z1Ptr, | |
411 | uint64_t *z2Ptr | |
158142c2 FB |
412 | ) |
413 | { | |
bb98fe42 | 414 | uint64_t z0, z1, z2; |
8f506c70 | 415 | int8_t negCount; |
158142c2 FB |
416 | |
417 | z2 = a2<<count; | |
418 | z1 = a1<<count; | |
419 | z0 = a0<<count; | |
420 | if ( 0 < count ) { | |
421 | negCount = ( ( - count ) & 63 ); | |
422 | z1 |= a2>>negCount; | |
423 | z0 |= a1>>negCount; | |
424 | } | |
425 | *z2Ptr = z2; | |
426 | *z1Ptr = z1; | |
427 | *z0Ptr = z0; | |
428 | ||
429 | } | |
430 | ||
431 | /*---------------------------------------------------------------------------- | |
432 | | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit | |
433 | | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so | |
434 | | any carry out is lost. The result is broken into two 64-bit pieces which | |
435 | | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. | |
436 | *----------------------------------------------------------------------------*/ | |
437 | ||
cb3ad036 RH |
438 | static inline void add128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, |
439 | uint64_t *z0Ptr, uint64_t *z1Ptr) | |
158142c2 | 440 | { |
cb3ad036 RH |
441 | bool c = 0; |
442 | *z1Ptr = uadd64_carry(a1, b1, &c); | |
443 | *z0Ptr = uadd64_carry(a0, b0, &c); | |
158142c2 FB |
444 | } |
445 | ||
446 | /*---------------------------------------------------------------------------- | |
447 | | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the | |
448 | | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is | |
449 | | modulo 2^192, so any carry out is lost. The result is broken into three | |
450 | | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', | |
451 | | `z1Ptr', and `z2Ptr'. | |
452 | *----------------------------------------------------------------------------*/ | |
453 | ||
cb3ad036 RH |
454 | static inline void add192(uint64_t a0, uint64_t a1, uint64_t a2, |
455 | uint64_t b0, uint64_t b1, uint64_t b2, | |
456 | uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr) | |
158142c2 | 457 | { |
cb3ad036 RH |
458 | bool c = 0; |
459 | *z2Ptr = uadd64_carry(a2, b2, &c); | |
460 | *z1Ptr = uadd64_carry(a1, b1, &c); | |
461 | *z0Ptr = uadd64_carry(a0, b0, &c); | |
158142c2 FB |
462 | } |
463 | ||
464 | /*---------------------------------------------------------------------------- | |
465 | | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the | |
466 | | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo | |
467 | | 2^128, so any borrow out (carry out) is lost. The result is broken into two | |
468 | | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and | |
469 | | `z1Ptr'. | |
470 | *----------------------------------------------------------------------------*/ | |
471 | ||
cb3ad036 RH |
472 | static inline void sub128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, |
473 | uint64_t *z0Ptr, uint64_t *z1Ptr) | |
158142c2 | 474 | { |
cb3ad036 RH |
475 | bool c = 0; |
476 | *z1Ptr = usub64_borrow(a1, b1, &c); | |
477 | *z0Ptr = usub64_borrow(a0, b0, &c); | |
158142c2 FB |
478 | } |
479 | ||
480 | /*---------------------------------------------------------------------------- | |
481 | | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' | |
482 | | from the 192-bit value formed by concatenating `a0', `a1', and `a2'. | |
483 | | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The | |
484 | | result is broken into three 64-bit pieces which are stored at the locations | |
485 | | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. | |
486 | *----------------------------------------------------------------------------*/ | |
487 | ||
cb3ad036 RH |
488 | static inline void sub192(uint64_t a0, uint64_t a1, uint64_t a2, |
489 | uint64_t b0, uint64_t b1, uint64_t b2, | |
490 | uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr) | |
158142c2 | 491 | { |
cb3ad036 RH |
492 | bool c = 0; |
493 | *z2Ptr = usub64_borrow(a2, b2, &c); | |
494 | *z1Ptr = usub64_borrow(a1, b1, &c); | |
495 | *z0Ptr = usub64_borrow(a0, b0, &c); | |
158142c2 FB |
496 | } |
497 | ||
498 | /*---------------------------------------------------------------------------- | |
499 | | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken | |
500 | | into two 64-bit pieces which are stored at the locations pointed to by | |
501 | | `z0Ptr' and `z1Ptr'. | |
502 | *----------------------------------------------------------------------------*/ | |
503 | ||
b4d09b17 RH |
504 | static inline void |
505 | mul64To128(uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr) | |
158142c2 | 506 | { |
b4d09b17 | 507 | mulu64(z1Ptr, z0Ptr, a, b); |
158142c2 FB |
508 | } |
509 | ||
510 | /*---------------------------------------------------------------------------- | |
511 | | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by | |
512 | | `b' to obtain a 192-bit product. The product is broken into three 64-bit | |
513 | | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and | |
514 | | `z2Ptr'. | |
515 | *----------------------------------------------------------------------------*/ | |
516 | ||
a49db98d | 517 | static inline void |
5ffb6bd9 RH |
518 | mul128By64To192(uint64_t a0, uint64_t a1, uint64_t b, |
519 | uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr) | |
158142c2 | 520 | { |
5ffb6bd9 | 521 | uint64_t z0, z1, m1; |
158142c2 | 522 | |
5ffb6bd9 RH |
523 | mul64To128(a1, b, &m1, z2Ptr); |
524 | mul64To128(a0, b, &z0, &z1); | |
525 | add128(z0, z1, 0, m1, z0Ptr, z1Ptr); | |
158142c2 FB |
526 | } |
527 | ||
528 | /*---------------------------------------------------------------------------- | |
529 | | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the | |
530 | | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit | |
531 | | product. The product is broken into four 64-bit pieces which are stored at | |
532 | | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. | |
533 | *----------------------------------------------------------------------------*/ | |
534 | ||
cd55a56e RH |
535 | static inline void mul128To256(uint64_t a0, uint64_t a1, |
536 | uint64_t b0, uint64_t b1, | |
537 | uint64_t *z0Ptr, uint64_t *z1Ptr, | |
538 | uint64_t *z2Ptr, uint64_t *z3Ptr) | |
158142c2 | 539 | { |
cd55a56e RH |
540 | uint64_t z0, z1, z2; |
541 | uint64_t m0, m1, m2, n1, n2; | |
158142c2 | 542 | |
cd55a56e RH |
543 | mul64To128(a1, b0, &m1, &m2); |
544 | mul64To128(a0, b1, &n1, &n2); | |
545 | mul64To128(a1, b1, &z2, z3Ptr); | |
546 | mul64To128(a0, b0, &z0, &z1); | |
158142c2 | 547 | |
cd55a56e RH |
548 | add192( 0, m1, m2, 0, n1, n2, &m0, &m1, &m2); |
549 | add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr); | |
158142c2 FB |
550 | } |
551 | ||
552 | /*---------------------------------------------------------------------------- | |
553 | | Returns an approximation to the 64-bit integer quotient obtained by dividing | |
554 | | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The | |
555 | | divisor `b' must be at least 2^63. If q is the exact quotient truncated | |
556 | | toward zero, the approximation returned lies between q and q + 2 inclusive. | |
557 | | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit | |
558 | | unsigned integer is returned. | |
559 | *----------------------------------------------------------------------------*/ | |
560 | ||
88857aca | 561 | static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b) |
158142c2 | 562 | { |
bb98fe42 AF |
563 | uint64_t b0, b1; |
564 | uint64_t rem0, rem1, term0, term1; | |
565 | uint64_t z; | |
158142c2 | 566 | |
e9321124 | 567 | if ( b <= a0 ) return UINT64_C(0xFFFFFFFFFFFFFFFF); |
158142c2 | 568 | b0 = b>>32; |
e9321124 | 569 | z = ( b0<<32 <= a0 ) ? UINT64_C(0xFFFFFFFF00000000) : ( a0 / b0 )<<32; |
158142c2 FB |
570 | mul64To128( b, z, &term0, &term1 ); |
571 | sub128( a0, a1, term0, term1, &rem0, &rem1 ); | |
bb98fe42 | 572 | while ( ( (int64_t) rem0 ) < 0 ) { |
e9321124 | 573 | z -= UINT64_C(0x100000000); |
158142c2 FB |
574 | b1 = b<<32; |
575 | add128( rem0, rem1, b0, b1, &rem0, &rem1 ); | |
576 | } | |
577 | rem0 = ( rem0<<32 ) | ( rem1>>32 ); | |
578 | z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; | |
579 | return z; | |
580 | ||
581 | } | |
582 | ||
583 | /*---------------------------------------------------------------------------- | |
584 | | Returns an approximation to the square root of the 32-bit significand given | |
585 | | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of | |
586 | | `aExp' (the least significant bit) is 1, the integer returned approximates | |
587 | | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' | |
588 | | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either | |
589 | | case, the approximation returned lies strictly within +/-2 of the exact | |
590 | | value. | |
591 | *----------------------------------------------------------------------------*/ | |
592 | ||
88857aca | 593 | static inline uint32_t estimateSqrt32(int aExp, uint32_t a) |
158142c2 | 594 | { |
bb98fe42 | 595 | static const uint16_t sqrtOddAdjustments[] = { |
158142c2 FB |
596 | 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, |
597 | 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 | |
598 | }; | |
bb98fe42 | 599 | static const uint16_t sqrtEvenAdjustments[] = { |
158142c2 FB |
600 | 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, |
601 | 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 | |
602 | }; | |
8f506c70 | 603 | int8_t index; |
bb98fe42 | 604 | uint32_t z; |
158142c2 FB |
605 | |
606 | index = ( a>>27 ) & 15; | |
607 | if ( aExp & 1 ) { | |
3f4cb3d3 | 608 | z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ]; |
158142c2 FB |
609 | z = ( ( a / z )<<14 ) + ( z<<15 ); |
610 | a >>= 1; | |
611 | } | |
612 | else { | |
3f4cb3d3 | 613 | z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ]; |
158142c2 FB |
614 | z = a / z + z; |
615 | z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); | |
bb98fe42 | 616 | if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 ); |
158142c2 | 617 | } |
bb98fe42 | 618 | return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 ); |
158142c2 FB |
619 | |
620 | } | |
621 | ||
158142c2 FB |
622 | /*---------------------------------------------------------------------------- |
623 | | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' | |
624 | | is equal to the 128-bit value formed by concatenating `b0' and `b1'. | |
625 | | Otherwise, returns 0. | |
626 | *----------------------------------------------------------------------------*/ | |
627 | ||
c120391c | 628 | static inline bool eq128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) |
158142c2 | 629 | { |
c120391c | 630 | return a0 == b0 && a1 == b1; |
158142c2 FB |
631 | } |
632 | ||
633 | /*---------------------------------------------------------------------------- | |
634 | | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less | |
635 | | than or equal to the 128-bit value formed by concatenating `b0' and `b1'. | |
636 | | Otherwise, returns 0. | |
637 | *----------------------------------------------------------------------------*/ | |
638 | ||
c120391c | 639 | static inline bool le128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) |
158142c2 | 640 | { |
c120391c | 641 | return a0 < b0 || (a0 == b0 && a1 <= b1); |
158142c2 FB |
642 | } |
643 | ||
644 | /*---------------------------------------------------------------------------- | |
645 | | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less | |
646 | | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, | |
647 | | returns 0. | |
648 | *----------------------------------------------------------------------------*/ | |
649 | ||
c120391c | 650 | static inline bool lt128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) |
158142c2 | 651 | { |
c120391c | 652 | return a0 < b0 || (a0 == b0 && a1 < b1); |
158142c2 FB |
653 | } |
654 | ||
655 | /*---------------------------------------------------------------------------- | |
656 | | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is | |
657 | | not equal to the 128-bit value formed by concatenating `b0' and `b1'. | |
658 | | Otherwise, returns 0. | |
659 | *----------------------------------------------------------------------------*/ | |
660 | ||
c120391c | 661 | static inline bool ne128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) |
158142c2 | 662 | { |
c120391c | 663 | return a0 != b0 || a1 != b1; |
158142c2 | 664 | } |
f91005e1 | 665 | |
feaf2e9c RH |
666 | /* |
667 | * Similarly, comparisons of 192-bit values. | |
668 | */ | |
669 | ||
670 | static inline bool eq192(uint64_t a0, uint64_t a1, uint64_t a2, | |
671 | uint64_t b0, uint64_t b1, uint64_t b2) | |
672 | { | |
673 | return ((a0 ^ b0) | (a1 ^ b1) | (a2 ^ b2)) == 0; | |
674 | } | |
675 | ||
676 | static inline bool le192(uint64_t a0, uint64_t a1, uint64_t a2, | |
677 | uint64_t b0, uint64_t b1, uint64_t b2) | |
678 | { | |
679 | if (a0 != b0) { | |
680 | return a0 < b0; | |
681 | } | |
682 | if (a1 != b1) { | |
683 | return a1 < b1; | |
684 | } | |
685 | return a2 <= b2; | |
686 | } | |
687 | ||
688 | static inline bool lt192(uint64_t a0, uint64_t a1, uint64_t a2, | |
689 | uint64_t b0, uint64_t b1, uint64_t b2) | |
690 | { | |
691 | if (a0 != b0) { | |
692 | return a0 < b0; | |
693 | } | |
694 | if (a1 != b1) { | |
695 | return a1 < b1; | |
696 | } | |
697 | return a2 < b2; | |
698 | } | |
699 | ||
f91005e1 | 700 | #endif |