Git Repo - qemu.git/blame - include/fpu/softfloat-macros.h

Commit	Line	Data
8d725fac AF	1	/*
	2	* QEMU float support macros
	3	*
16017c48 PM	4	* The code in this source file is derived from release 2a of the SoftFloat
	5	* IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
	6	* some later contributions) are provided under that license, as detailed below.
	7	* It has subsequently been modified by contributors to the QEMU Project,
	8	* so some portions are provided under:
	9	* the SoftFloat-2a license
	10	* the BSD license
16017c48 PM	11	*
	12	* Any future contributions to this file after December 1st 2014 will be
	13	* taken to be licensed under the Softfloat-2a license unless specifically
	14	* indicated otherwise.
8d725fac	15	*/
158142c2	16
a7d1ac78 PM	17	/*
a7d1ac78 PM	18	===============================================================================
158142c2	19	This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
a7d1ac78	20	Arithmetic Package, Release 2a.
158142c2 FB	21
	22	Written by John R. Hauser. This work was made possible in part by the
	23	International Computer Science Institute, located at Suite 600, 1947 Center
	24	Street, Berkeley, California 94704. Funding was partially provided by the
	25	National Science Foundation under grant MIP-9311980. The original version
	26	of this code was written as part of a project to build a fixed-point vector
	27	processor in collaboration with the University of California at Berkeley,
	28	overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78	29	is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2 FB	30	arithmetic/SoftFloat.html'.
158142c2 FB	31
a7d1ac78 PM	32	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
	33	has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
	34	TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
	35	PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
	36	AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2 FB	37
158142c2 FB	38	Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78 PM	39	(1) they include prominent notice that the work is derivative, and (2) they
	40	include prominent notice akin to these four paragraphs for those parts of
	41	this code that are retained.
158142c2	42
a7d1ac78 PM	43	===============================================================================
a7d1ac78 PM	44	*/
158142c2	45
16017c48 PM	46	/* BSD licensing:
	47	* Copyright (c) 2006, Fabrice Bellard
	48	* All rights reserved.
	49	*
	50	* Redistribution and use in source and binary forms, with or without
	51	* modification, are permitted provided that the following conditions are met:
	52	*
	53	* 1. Redistributions of source code must retain the above copyright notice,
	54	* this list of conditions and the following disclaimer.
	55	*
	56	* 2. Redistributions in binary form must reproduce the above copyright notice,
	57	* this list of conditions and the following disclaimer in the documentation
	58	* and/or other materials provided with the distribution.
	59	*
	60	* 3. Neither the name of the copyright holder nor the names of its contributors
	61	* may be used to endorse or promote products derived from this software without
	62	* specific prior written permission.
	63	*
	64	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	65	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	66	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	67	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
	68	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	69	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	70	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	71	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	72	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	73	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
	74	* THE POSSIBILITY OF SUCH DAMAGE.
	75	*/
	76
f91005e1 MA	77	#ifndef FPU_SOFTFLOAT_MACROS_H
	78	#define FPU_SOFTFLOAT_MACROS_H
	79
5937fb63	80	#include "fpu/softfloat-types.h"
cb3ad036	81	#include "qemu/host-utils.h"
ec150c7e	82
463e45dc RH	83	/**
	84	* shl_double: double-word merging left shift
	85	* @l: left or most-significant word
	86	* @r: right or least-significant word
	87	* @c: shift count
	88	*
	89	* Shift @l left by @c bits, shifting in bits from @r.
	90	*/
	91	static inline uint64_t shl_double(uint64_t l, uint64_t r, int c)
	92	{
	93	#if defined(__x86_64__)
	94	asm("shld %b2, %1, %0" : "+r"(l) : "r"(r), "ci"(c));
	95	return l;
	96	#else
	97	return c ? (l << c) \| (r >> (64 - c)) : l;
	98	#endif
	99	}
	100
	101	/**
	102	* shr_double: double-word merging right shift
	103	* @l: left or most-significant word
	104	* @r: right or least-significant word
	105	* @c: shift count
	106	*
	107	* Shift @r right by @c bits, shifting in bits from @l.
	108	*/
	109	static inline uint64_t shr_double(uint64_t l, uint64_t r, int c)
	110	{
	111	#if defined(__x86_64__)
	112	asm("shrd %b2, %1, %0" : "+r"(r) : "r"(l), "ci"(c));
	113	return r;
	114	#else
	115	return c ? (r >> c) \| (l << (64 - c)) : r;
	116	#endif
	117	}
	118
158142c2 FB	119	/*----------------------------------------------------------------------------
	120	\| Shifts `a' right by the number of bits given in `count'. If any nonzero
	121	\| bits are shifted off, they are ``jammed'' into the least significant bit of
	122	\| the result by setting the least significant bit to 1. The value of `count'
	123	\| can be arbitrarily large; in particular, if `count' is greater than 32, the
	124	\| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
	125	\| The result is stored in the location pointed to by `zPtr'.
	126	----------------------------------------------------------------------------/
	127
07d792d2	128	static inline void shift32RightJamming(uint32_t a, int count, uint32_t *zPtr)
158142c2	129	{
bb98fe42	130	uint32_t z;
158142c2 FB	131
	132	if ( count == 0 ) {
	133	z = a;
	134	}
	135	else if ( count < 32 ) {
	136	z = ( a>>count ) \| ( ( a<<( ( - count ) & 31 ) ) != 0 );
	137	}
	138	else {
	139	z = ( a != 0 );
	140	}
	141	*zPtr = z;
	142
	143	}
	144
	145	/*----------------------------------------------------------------------------
	146	\| Shifts `a' right by the number of bits given in `count'. If any nonzero
	147	\| bits are shifted off, they are ``jammed'' into the least significant bit of
	148	\| the result by setting the least significant bit to 1. The value of `count'
	149	\| can be arbitrarily large; in particular, if `count' is greater than 64, the
	150	\| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
	151	\| The result is stored in the location pointed to by `zPtr'.
	152	----------------------------------------------------------------------------/
	153
07d792d2	154	static inline void shift64RightJamming(uint64_t a, int count, uint64_t *zPtr)
158142c2	155	{
bb98fe42	156	uint64_t z;
158142c2 FB	157
	158	if ( count == 0 ) {
	159	z = a;
	160	}
	161	else if ( count < 64 ) {
	162	z = ( a>>count ) \| ( ( a<<( ( - count ) & 63 ) ) != 0 );
	163	}
	164	else {
	165	z = ( a != 0 );
	166	}
	167	*zPtr = z;
	168
	169	}
	170
	171	/*----------------------------------------------------------------------------
	172	\| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
	173	\| _plus_ the number of bits given in `count'. The shifted result is at most
	174	\| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
	175	\| bits shifted off form a second 64-bit result as follows: The _last_ bit
	176	\| shifted off is the most-significant bit of the extra result, and the other
	177	\| 63 bits of the extra result are all zero if and only if _all_but_the_last_
	178	\| bits shifted off were all zero. This extra result is stored in the location
	179	\| pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
a7d1ac78 PM	180	\| (This routine makes more sense if `a0' and `a1' are considered to form a
	181	\| fixed-point value with binary point between `a0' and `a1'. This fixed-point
	182	\| value is shifted right by the number of bits given in `count', and the
	183	\| integer part of the result is returned at the location pointed to by
158142c2 FB	184	\| `z0Ptr'. The fractional part of the result may be slightly corrupted as
	185	\| described above, and is returned at the location pointed to by `z1Ptr'.)
	186	----------------------------------------------------------------------------/
	187
a49db98d	188	static inline void
158142c2	189	shift64ExtraRightJamming(
07d792d2	190	uint64_t a0, uint64_t a1, int count, uint64_t z0Ptr, uint64_t z1Ptr)
158142c2	191	{
bb98fe42	192	uint64_t z0, z1;
8f506c70	193	int8_t negCount = ( - count ) & 63;
158142c2 FB	194
	195	if ( count == 0 ) {
	196	z1 = a1;
	197	z0 = a0;
	198	}
	199	else if ( count < 64 ) {
	200	z1 = ( a0<<negCount ) \| ( a1 != 0 );
	201	z0 = a0>>count;
	202	}
	203	else {
	204	if ( count == 64 ) {
	205	z1 = a0 \| ( a1 != 0 );
	206	}
	207	else {
	208	z1 = ( ( a0 \| a1 ) != 0 );
	209	}
	210	z0 = 0;
	211	}
	212	*z1Ptr = z1;
	213	*z0Ptr = z0;
	214
	215	}
	216
	217	/*----------------------------------------------------------------------------
	218	\| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
	219	\| number of bits given in `count'. Any bits shifted off are lost. The value
	220	\| of `count' can be arbitrarily large; in particular, if `count' is greater
	221	\| than 128, the result will be 0. The result is broken into two 64-bit pieces
	222	\| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
	223	----------------------------------------------------------------------------/
	224
a49db98d	225	static inline void
158142c2	226	shift128Right(
07d792d2	227	uint64_t a0, uint64_t a1, int count, uint64_t z0Ptr, uint64_t z1Ptr)
158142c2	228	{
bb98fe42	229	uint64_t z0, z1;
8f506c70	230	int8_t negCount = ( - count ) & 63;
158142c2 FB	231
	232	if ( count == 0 ) {
	233	z1 = a1;
	234	z0 = a0;
	235	}
	236	else if ( count < 64 ) {
	237	z1 = ( a0<<negCount ) \| ( a1>>count );
	238	z0 = a0>>count;
	239	}
	240	else {
4039736e	241	z1 = (count < 128) ? (a0 >> (count & 63)) : 0;
158142c2 FB	242	z0 = 0;
	243	}
	244	*z1Ptr = z1;
	245	*z0Ptr = z0;
	246
	247	}
	248
	249	/*----------------------------------------------------------------------------
	250	\| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
	251	\| number of bits given in `count'. If any nonzero bits are shifted off, they
	252	\| are ``jammed'' into the least significant bit of the result by setting the
	253	\| least significant bit to 1. The value of `count' can be arbitrarily large;
	254	\| in particular, if `count' is greater than 128, the result will be either
	255	\| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
	256	\| nonzero. The result is broken into two 64-bit pieces which are stored at
	257	\| the locations pointed to by `z0Ptr' and `z1Ptr'.
	258	----------------------------------------------------------------------------/
	259
a49db98d	260	static inline void
158142c2	261	shift128RightJamming(
07d792d2	262	uint64_t a0, uint64_t a1, int count, uint64_t z0Ptr, uint64_t z1Ptr)
158142c2	263	{
bb98fe42	264	uint64_t z0, z1;
8f506c70	265	int8_t negCount = ( - count ) & 63;
158142c2 FB	266
	267	if ( count == 0 ) {
	268	z1 = a1;
	269	z0 = a0;
	270	}
	271	else if ( count < 64 ) {
	272	z1 = ( a0<<negCount ) \| ( a1>>count ) \| ( ( a1<<negCount ) != 0 );
	273	z0 = a0>>count;
	274	}
	275	else {
	276	if ( count == 64 ) {
	277	z1 = a0 \| ( a1 != 0 );
	278	}
	279	else if ( count < 128 ) {
	280	z1 = ( a0>>( count & 63 ) ) \| ( ( ( a0<<negCount ) \| a1 ) != 0 );
	281	}
	282	else {
	283	z1 = ( ( a0 \| a1 ) != 0 );
	284	}
	285	z0 = 0;
	286	}
	287	*z1Ptr = z1;
	288	*z0Ptr = z0;
	289
	290	}
	291
	292	/*----------------------------------------------------------------------------
	293	\| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
	294	\| by 64 _plus_ the number of bits given in `count'. The shifted result is
	295	\| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
	296	\| stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
	297	\| off form a third 64-bit result as follows: The _last_ bit shifted off is
	298	\| the most-significant bit of the extra result, and the other 63 bits of the
	299	\| extra result are all zero if and only if _all_but_the_last_ bits shifted off
	300	\| were all zero. This extra result is stored in the location pointed to by
	301	\| `z2Ptr'. The value of `count' can be arbitrarily large.
	302	\| (This routine makes more sense if `a0', `a1', and `a2' are considered
	303	\| to form a fixed-point value with binary point between `a1' and `a2'. This
	304	\| fixed-point value is shifted right by the number of bits given in `count',
	305	\| and the integer part of the result is returned at the locations pointed to
	306	\| by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
	307	\| corrupted as described above, and is returned at the location pointed to by
	308	\| `z2Ptr'.)
	309	----------------------------------------------------------------------------/
	310
a49db98d	311	static inline void
158142c2	312	shift128ExtraRightJamming(
bb98fe42 AF	313	uint64_t a0,
	314	uint64_t a1,
	315	uint64_t a2,
07d792d2	316	int count,
bb98fe42 AF	317	uint64_t *z0Ptr,
	318	uint64_t *z1Ptr,
	319	uint64_t *z2Ptr
158142c2 FB	320	)
158142c2 FB	321	{
bb98fe42	322	uint64_t z0, z1, z2;
8f506c70	323	int8_t negCount = ( - count ) & 63;
158142c2 FB	324
	325	if ( count == 0 ) {
	326	z2 = a2;
	327	z1 = a1;
	328	z0 = a0;
	329	}
	330	else {
	331	if ( count < 64 ) {
	332	z2 = a1<<negCount;
	333	z1 = ( a0<<negCount ) \| ( a1>>count );
	334	z0 = a0>>count;
	335	}
	336	else {
	337	if ( count == 64 ) {
	338	z2 = a1;
	339	z1 = a0;
	340	}
	341	else {
	342	a2 \|= a1;
	343	if ( count < 128 ) {
	344	z2 = a0<<negCount;
	345	z1 = a0>>( count & 63 );
	346	}
	347	else {
	348	z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
	349	z1 = 0;
	350	}
	351	}
	352	z0 = 0;
	353	}
	354	z2 \|= ( a2 != 0 );
	355	}
	356	*z2Ptr = z2;
	357	*z1Ptr = z1;
	358	*z0Ptr = z0;
	359
	360	}
	361
	362	/*----------------------------------------------------------------------------
	363	\| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
	364	\| number of bits given in `count'. Any bits shifted off are lost. The value
	365	\| of `count' must be less than 64. The result is broken into two 64-bit
	366	\| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
	367	----------------------------------------------------------------------------/
	368
5dfbc9e4 RH	369	static inline void shortShift128Left(uint64_t a0, uint64_t a1, int count,
5dfbc9e4 RH	370	uint64_t z0Ptr, uint64_t z1Ptr)
158142c2	371	{
5dfbc9e4 RH	372	*z1Ptr = a1 << count;
	373	*z0Ptr = count == 0 ? a0 : (a0 << count) \| (a1 >> (-count & 63));
	374	}
158142c2	375
5dfbc9e4 RH	376	/*----------------------------------------------------------------------------
	377	\| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
	378	\| number of bits given in `count'. Any bits shifted off are lost. The value
	379	\| of `count' may be greater than 64. The result is broken into two 64-bit
	380	\| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
	381	----------------------------------------------------------------------------/
158142c2	382
5dfbc9e4 RH	383	static inline void shift128Left(uint64_t a0, uint64_t a1, int count,
	384	uint64_t z0Ptr, uint64_t z1Ptr)
	385	{
	386	if (count < 64) {
	387	*z1Ptr = a1 << count;
	388	*z0Ptr = count == 0 ? a0 : (a0 << count) \| (a1 >> (-count & 63));
	389	} else {
	390	*z1Ptr = 0;
	391	*z0Ptr = a1 << (count - 64);
	392	}
158142c2 FB	393	}
	394
	395	/*----------------------------------------------------------------------------
	396	\| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
	397	\| by the number of bits given in `count'. Any bits shifted off are lost.
	398	\| The value of `count' must be less than 64. The result is broken into three
	399	\| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
	400	\| `z1Ptr', and `z2Ptr'.
	401	----------------------------------------------------------------------------/
	402
a49db98d	403	static inline void
158142c2	404	shortShift192Left(
bb98fe42 AF	405	uint64_t a0,
	406	uint64_t a1,
	407	uint64_t a2,
07d792d2	408	int count,
bb98fe42 AF	409	uint64_t *z0Ptr,
	410	uint64_t *z1Ptr,
	411	uint64_t *z2Ptr
158142c2 FB	412	)
158142c2 FB	413	{
bb98fe42	414	uint64_t z0, z1, z2;
8f506c70	415	int8_t negCount;
158142c2 FB	416
	417	z2 = a2<<count;
	418	z1 = a1<<count;
	419	z0 = a0<<count;
	420	if ( 0 < count ) {
	421	negCount = ( ( - count ) & 63 );
	422	z1 \|= a2>>negCount;
	423	z0 \|= a1>>negCount;
	424	}
	425	*z2Ptr = z2;
	426	*z1Ptr = z1;
	427	*z0Ptr = z0;
	428
	429	}
	430
	431	/*----------------------------------------------------------------------------
	432	\| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
	433	\| value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
	434	\| any carry out is lost. The result is broken into two 64-bit pieces which
	435	\| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
	436	----------------------------------------------------------------------------/
	437
cb3ad036 RH	438	static inline void add128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
cb3ad036 RH	439	uint64_t z0Ptr, uint64_t z1Ptr)
158142c2	440	{
cb3ad036 RH	441	bool c = 0;
	442	*z1Ptr = uadd64_carry(a1, b1, &c);
	443	*z0Ptr = uadd64_carry(a0, b0, &c);
158142c2 FB	444	}
	445
	446	/*----------------------------------------------------------------------------
	447	\| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
	448	\| 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
	449	\| modulo 2^192, so any carry out is lost. The result is broken into three
	450	\| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
	451	\| `z1Ptr', and `z2Ptr'.
	452	----------------------------------------------------------------------------/
	453
cb3ad036 RH	454	static inline void add192(uint64_t a0, uint64_t a1, uint64_t a2,
	455	uint64_t b0, uint64_t b1, uint64_t b2,
	456	uint64_t z0Ptr, uint64_t z1Ptr, uint64_t *z2Ptr)
158142c2	457	{
cb3ad036 RH	458	bool c = 0;
	459	*z2Ptr = uadd64_carry(a2, b2, &c);
	460	*z1Ptr = uadd64_carry(a1, b1, &c);
	461	*z0Ptr = uadd64_carry(a0, b0, &c);
158142c2 FB	462	}
	463
	464	/*----------------------------------------------------------------------------
	465	\| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
	466	\| 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
	467	\| 2^128, so any borrow out (carry out) is lost. The result is broken into two
	468	\| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
	469	\| `z1Ptr'.
	470	----------------------------------------------------------------------------/
	471
cb3ad036 RH	472	static inline void sub128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
cb3ad036 RH	473	uint64_t z0Ptr, uint64_t z1Ptr)
158142c2	474	{
cb3ad036 RH	475	bool c = 0;
	476	*z1Ptr = usub64_borrow(a1, b1, &c);
	477	*z0Ptr = usub64_borrow(a0, b0, &c);
158142c2 FB	478	}
	479
	480	/*----------------------------------------------------------------------------
	481	\| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
	482	\| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
	483	\| Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
	484	\| result is broken into three 64-bit pieces which are stored at the locations
	485	\| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
	486	----------------------------------------------------------------------------/
	487
cb3ad036 RH	488	static inline void sub192(uint64_t a0, uint64_t a1, uint64_t a2,
	489	uint64_t b0, uint64_t b1, uint64_t b2,
	490	uint64_t z0Ptr, uint64_t z1Ptr, uint64_t *z2Ptr)
158142c2	491	{
cb3ad036 RH	492	bool c = 0;
	493	*z2Ptr = usub64_borrow(a2, b2, &c);
	494	*z1Ptr = usub64_borrow(a1, b1, &c);
	495	*z0Ptr = usub64_borrow(a0, b0, &c);
158142c2 FB	496	}
	497
	498	/*----------------------------------------------------------------------------
	499	\| Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
	500	\| into two 64-bit pieces which are stored at the locations pointed to by
	501	\| `z0Ptr' and `z1Ptr'.
	502	----------------------------------------------------------------------------/
	503
b4d09b17 RH	504	static inline void
b4d09b17 RH	505	mul64To128(uint64_t a, uint64_t b, uint64_t z0Ptr, uint64_t z1Ptr)
158142c2	506	{
b4d09b17	507	mulu64(z1Ptr, z0Ptr, a, b);
158142c2 FB	508	}
	509
	510	/*----------------------------------------------------------------------------
	511	\| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
	512	\| `b' to obtain a 192-bit product. The product is broken into three 64-bit
	513	\| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
	514	\| `z2Ptr'.
	515	----------------------------------------------------------------------------/
	516
a49db98d	517	static inline void
5ffb6bd9 RH	518	mul128By64To192(uint64_t a0, uint64_t a1, uint64_t b,
5ffb6bd9 RH	519	uint64_t z0Ptr, uint64_t z1Ptr, uint64_t *z2Ptr)
158142c2	520	{
5ffb6bd9	521	uint64_t z0, z1, m1;
158142c2	522
5ffb6bd9 RH	523	mul64To128(a1, b, &m1, z2Ptr);
	524	mul64To128(a0, b, &z0, &z1);
	525	add128(z0, z1, 0, m1, z0Ptr, z1Ptr);
158142c2 FB	526	}
	527
	528	/*----------------------------------------------------------------------------
	529	\| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
	530	\| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
	531	\| product. The product is broken into four 64-bit pieces which are stored at
	532	\| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
	533	----------------------------------------------------------------------------/
	534
cd55a56e RH	535	static inline void mul128To256(uint64_t a0, uint64_t a1,
	536	uint64_t b0, uint64_t b1,
	537	uint64_t z0Ptr, uint64_t z1Ptr,
	538	uint64_t z2Ptr, uint64_t z3Ptr)
158142c2	539	{
cd55a56e RH	540	uint64_t z0, z1, z2;
cd55a56e RH	541	uint64_t m0, m1, m2, n1, n2;
158142c2	542
cd55a56e RH	543	mul64To128(a1, b0, &m1, &m2);
	544	mul64To128(a0, b1, &n1, &n2);
	545	mul64To128(a1, b1, &z2, z3Ptr);
	546	mul64To128(a0, b0, &z0, &z1);
158142c2	547
cd55a56e RH	548	add192( 0, m1, m2, 0, n1, n2, &m0, &m1, &m2);
cd55a56e RH	549	add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr);
158142c2 FB	550	}
	551
	552	/*----------------------------------------------------------------------------
	553	\| Returns an approximation to the 64-bit integer quotient obtained by dividing
	554	\| `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
	555	\| divisor `b' must be at least 2^63. If q is the exact quotient truncated
	556	\| toward zero, the approximation returned lies between q and q + 2 inclusive.
	557	\| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
	558	\| unsigned integer is returned.
	559	----------------------------------------------------------------------------/
	560
88857aca	561	static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
158142c2	562	{
bb98fe42 AF	563	uint64_t b0, b1;
	564	uint64_t rem0, rem1, term0, term1;
	565	uint64_t z;
158142c2	566
e9321124	567	if ( b <= a0 ) return UINT64_C(0xFFFFFFFFFFFFFFFF);
158142c2	568	b0 = b>>32;
e9321124	569	z = ( b0<<32 <= a0 ) ? UINT64_C(0xFFFFFFFF00000000) : ( a0 / b0 )<<32;
158142c2 FB	570	mul64To128( b, z, &term0, &term1 );
158142c2 FB	571	sub128( a0, a1, term0, term1, &rem0, &rem1 );
bb98fe42	572	while ( ( (int64_t) rem0 ) < 0 ) {
e9321124	573	z -= UINT64_C(0x100000000);
158142c2 FB	574	b1 = b<<32;
	575	add128( rem0, rem1, b0, b1, &rem0, &rem1 );
	576	}
	577	rem0 = ( rem0<<32 ) \| ( rem1>>32 );
	578	z \|= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
	579	return z;
	580
	581	}
	582
	583	/*----------------------------------------------------------------------------
	584	\| Returns an approximation to the square root of the 32-bit significand given
	585	\| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
	586	\| `aExp' (the least significant bit) is 1, the integer returned approximates
	587	\| 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
	588	\| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
	589	\| case, the approximation returned lies strictly within +/-2 of the exact
	590	\| value.
	591	----------------------------------------------------------------------------/
	592
88857aca	593	static inline uint32_t estimateSqrt32(int aExp, uint32_t a)
158142c2	594	{
bb98fe42	595	static const uint16_t sqrtOddAdjustments[] = {
158142c2 FB	596	0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
	597	0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
	598	};
bb98fe42	599	static const uint16_t sqrtEvenAdjustments[] = {
158142c2 FB	600	0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
	601	0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
	602	};
8f506c70	603	int8_t index;
bb98fe42	604	uint32_t z;
158142c2 FB	605
	606	index = ( a>>27 ) & 15;
	607	if ( aExp & 1 ) {
3f4cb3d3	608	z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
158142c2 FB	609	z = ( ( a / z )<<14 ) + ( z<<15 );
	610	a >>= 1;
	611	}
	612	else {
3f4cb3d3	613	z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
158142c2 FB	614	z = a / z + z;
158142c2 FB	615	z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
bb98fe42	616	if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
158142c2	617	}
bb98fe42	618	return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
158142c2 FB	619
	620	}
	621
158142c2 FB	622	/*----------------------------------------------------------------------------
	623	\| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
	624	\| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
	625	\| Otherwise, returns 0.
	626	----------------------------------------------------------------------------/
	627
c120391c	628	static inline bool eq128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
158142c2	629	{
c120391c	630	return a0 == b0 && a1 == b1;
158142c2 FB	631	}
	632
	633	/*----------------------------------------------------------------------------
	634	\| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
	635	\| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
	636	\| Otherwise, returns 0.
	637	----------------------------------------------------------------------------/
	638
c120391c	639	static inline bool le128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
158142c2	640	{
c120391c	641	return a0 < b0 \|\| (a0 == b0 && a1 <= b1);
158142c2 FB	642	}
	643
	644	/*----------------------------------------------------------------------------
	645	\| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
	646	\| than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
	647	\| returns 0.
	648	----------------------------------------------------------------------------/
	649
c120391c	650	static inline bool lt128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
158142c2	651	{
c120391c	652	return a0 < b0 \|\| (a0 == b0 && a1 < b1);
158142c2 FB	653	}
	654
	655	/*----------------------------------------------------------------------------
	656	\| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
	657	\| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
	658	\| Otherwise, returns 0.
	659	----------------------------------------------------------------------------/
	660
c120391c	661	static inline bool ne128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
158142c2	662	{
c120391c	663	return a0 != b0 \|\| a1 != b1;
158142c2	664	}
f91005e1	665
feaf2e9c RH	666	/*
	667	* Similarly, comparisons of 192-bit values.
	668	*/
	669
	670	static inline bool eq192(uint64_t a0, uint64_t a1, uint64_t a2,
	671	uint64_t b0, uint64_t b1, uint64_t b2)
	672	{
	673	return ((a0 ^ b0) \| (a1 ^ b1) \| (a2 ^ b2)) == 0;
	674	}
	675
	676	static inline bool le192(uint64_t a0, uint64_t a1, uint64_t a2,
	677	uint64_t b0, uint64_t b1, uint64_t b2)
	678	{
	679	if (a0 != b0) {
	680	return a0 < b0;
	681	}
	682	if (a1 != b1) {
	683	return a1 < b1;
	684	}
	685	return a2 <= b2;
	686	}
	687
	688	static inline bool lt192(uint64_t a0, uint64_t a1, uint64_t a2,
	689	uint64_t b0, uint64_t b1, uint64_t b2)
	690	{
	691	if (a0 != b0) {
	692	return a0 < b0;
	693	}
	694	if (a1 != b1) {
	695	return a1 < b1;
	696	}
	697	return a2 < b2;
	698	}
	699
f91005e1	700	#endif