Git Repo - qemu.git/blame - target/arm/vfp

Commit	Line	Data
37356079 RH	1	/*
	2	* ARM VFP floating-point operations
	3	*
	4	* Copyright (c) 2003 Fabrice Bellard
	5	*
	6	* This library is free software; you can redistribute it and/or
	7	* modify it under the terms of the GNU Lesser General Public
	8	* License as published by the Free Software Foundation; either
	9	* version 2.1 of the License, or (at your option) any later version.
	10	*
	11	* This library is distributed in the hope that it will be useful,
	12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	14	* Lesser General Public License for more details.
	15	*
	16	* You should have received a copy of the GNU Lesser General Public
	17	* License along with this library; if not, see <http://www.gnu.org/licenses/>.
	18	*/
	19
	20	#include "qemu/osdep.h"
37356079 RH	21	#include "cpu.h"
37356079 RH	22	#include "exec/helper-proto.h"
37356079	23	#include "internals.h"
4a15527c PMD	24	#ifdef CONFIG_TCG
	25	#include "qemu/log.h"
	26	#include "fpu/softfloat.h"
	27	#endif
37356079 RH	28
	29	/* VFP support. We follow the convention used for VFP instructions:
	30	Single precision routines have a "s" suffix, double precision a
	31	"d" suffix. */
	32
4a15527c PMD	33	#ifdef CONFIG_TCG
4a15527c PMD	34
37356079 RH	35	/* Convert host exception flags to vfp form. */
	36	static inline int vfp_exceptbits_from_host(int host_bits)
	37	{
	38	int target_bits = 0;
	39
9798ac71	40	if (host_bits & float_flag_invalid) {
37356079	41	target_bits \|= 1;
9798ac71 PMD	42	}
9798ac71 PMD	43	if (host_bits & float_flag_divbyzero) {
37356079	44	target_bits \|= 2;
9798ac71 PMD	45	}
9798ac71 PMD	46	if (host_bits & float_flag_overflow) {
37356079	47	target_bits \|= 4;
9798ac71 PMD	48	}
9798ac71 PMD	49	if (host_bits & (float_flag_underflow \| float_flag_output_denormal)) {
37356079	50	target_bits \|= 8;
9798ac71 PMD	51	}
9798ac71 PMD	52	if (host_bits & float_flag_inexact) {
37356079	53	target_bits \|= 0x10;
9798ac71 PMD	54	}
9798ac71 PMD	55	if (host_bits & float_flag_input_denormal) {
37356079	56	target_bits \|= 0x80;
9798ac71	57	}
37356079 RH	58	return target_bits;
	59	}
	60
37356079 RH	61	/* Convert vfp exception flags to target form. */
	62	static inline int vfp_exceptbits_to_host(int target_bits)
	63	{
	64	int host_bits = 0;
	65
9798ac71	66	if (target_bits & 1) {
37356079	67	host_bits \|= float_flag_invalid;
9798ac71 PMD	68	}
9798ac71 PMD	69	if (target_bits & 2) {
37356079	70	host_bits \|= float_flag_divbyzero;
9798ac71 PMD	71	}
9798ac71 PMD	72	if (target_bits & 4) {
37356079	73	host_bits \|= float_flag_overflow;
9798ac71 PMD	74	}
9798ac71 PMD	75	if (target_bits & 8) {
37356079	76	host_bits \|= float_flag_underflow;
9798ac71 PMD	77	}
9798ac71 PMD	78	if (target_bits & 0x10) {
37356079	79	host_bits \|= float_flag_inexact;
9798ac71 PMD	80	}
9798ac71 PMD	81	if (target_bits & 0x80) {
37356079	82	host_bits \|= float_flag_input_denormal;
9798ac71	83	}
37356079 RH	84	return host_bits;
	85	}
	86
0c6ad948 PMD	87	static uint32_t vfp_get_fpscr_from_host(CPUARMState *env)
	88	{
	89	uint32_t i;
	90
	91	i = get_float_exception_flags(&env->vfp.fp_status);
	92	i \|= get_float_exception_flags(&env->vfp.standard_fp_status);
	93	/* FZ16 does not generate an input denormal exception. */
	94	i \|= (get_float_exception_flags(&env->vfp.fp_status_f16)
	95	& ~float_flag_input_denormal);
aaae563b PM	96	i \|= (get_float_exception_flags(&env->vfp.standard_fp_status_f16)
aaae563b PM	97	& ~float_flag_input_denormal);
0c6ad948 PMD	98	return vfp_exceptbits_from_host(i);
	99	}
	100
e9d65282 PMD	101	static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
	102	{
	103	int i;
	104	uint32_t changed = env->vfp.xregs[ARM_VFP_FPSCR];
	105
	106	changed ^= val;
	107	if (changed & (3 << 22)) {
	108	i = (val >> 22) & 3;
	109	switch (i) {
	110	case FPROUNDING_TIEEVEN:
	111	i = float_round_nearest_even;
	112	break;
	113	case FPROUNDING_POSINF:
	114	i = float_round_up;
	115	break;
	116	case FPROUNDING_NEGINF:
	117	i = float_round_down;
	118	break;
	119	case FPROUNDING_ZERO:
	120	i = float_round_to_zero;
	121	break;
	122	}
	123	set_float_rounding_mode(i, &env->vfp.fp_status);
	124	set_float_rounding_mode(i, &env->vfp.fp_status_f16);
	125	}
	126	if (changed & FPCR_FZ16) {
	127	bool ftz_enabled = val & FPCR_FZ16;
	128	set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
aaae563b	129	set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
e9d65282	130	set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
aaae563b	131	set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
e9d65282 PMD	132	}
	133	if (changed & FPCR_FZ) {
	134	bool ftz_enabled = val & FPCR_FZ;
	135	set_flush_to_zero(ftz_enabled, &env->vfp.fp_status);
	136	set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status);
	137	}
	138	if (changed & FPCR_DN) {
	139	bool dnan_enabled = val & FPCR_DN;
	140	set_default_nan_mode(dnan_enabled, &env->vfp.fp_status);
	141	set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16);
	142	}
	143
	144	/*
	145	* The exception flags are ORed together when we read fpscr so we
	146	* only need to preserve the current state in one of our
	147	* float_status values.
	148	*/
	149	i = vfp_exceptbits_to_host(val);
	150	set_float_exception_flags(i, &env->vfp.fp_status);
	151	set_float_exception_flags(0, &env->vfp.fp_status_f16);
	152	set_float_exception_flags(0, &env->vfp.standard_fp_status);
aaae563b	153	set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
e9d65282 PMD	154	}
e9d65282 PMD	155
4a15527c PMD	156	#else
	157
	158	static uint32_t vfp_get_fpscr_from_host(CPUARMState *env)
	159	{
	160	return 0;
	161	}
	162
	163	static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
	164	{
	165	}
	166
	167	#endif
	168
20e62dd8 PMD	169	uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
	170	{
	171	uint32_t i, fpscr;
	172
	173	fpscr = env->vfp.xregs[ARM_VFP_FPSCR]
	174	\| (env->vfp.vec_len << 16)
	175	\| (env->vfp.vec_stride << 20);
	176
8128c8e8 PM	177	/*
	178	* M-profile LTPSIZE overlaps A-profile Stride; whichever of the
	179	* two is not applicable to this CPU will always be zero.
	180	*/
	181	fpscr \|= env->v7m.ltpsize << 16;
	182
0c6ad948	183	fpscr \|= vfp_get_fpscr_from_host(env);
20e62dd8 PMD	184
	185	i = env->vfp.qc[0] \| env->vfp.qc[1] \| env->vfp.qc[2] \| env->vfp.qc[3];
	186	fpscr \|= i ? FPCR_QC : 0;
	187
	188	return fpscr;
	189	}
	190
	191	uint32_t vfp_get_fpscr(CPUARMState *env)
	192	{
	193	return HELPER(vfp_get_fpscr)(env);
	194	}
	195
37356079 RH	196	void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
37356079 RH	197	{
b26b5629 PM	198	ARMCPU *cpu = env_archcpu(env);
b26b5629 PM	199
37356079	200	/* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */
b26b5629	201	if (!cpu_isar_feature(any_fp16, cpu)) {
37356079 RH	202	val &= ~FPCR_FZ16;
	203	}
	204
d31e2ce6 PM	205	vfp_set_fpscr_to_host(env, val);
	206
	207	if (!arm_feature(env, ARM_FEATURE_M)) {
5bcf8ed9	208	/*
d31e2ce6 PM	209	* Short-vector length and stride; on M-profile these bits
	210	* are used for different purposes.
	211	* We can't make this conditional be "if MVFR0.FPShVec != 0",
	212	* because in v7A no-short-vector-support cores still had to
	213	* allow Stride/Len to be written with the only effect that
	214	* some insns are required to UNDEF if the guest sets them.
5bcf8ed9	215	*/
d31e2ce6 PM	216	env->vfp.vec_len = extract32(val, 16, 3);
d31e2ce6 PM	217	env->vfp.vec_stride = extract32(val, 20, 2);
b26b5629 PM	218	} else if (cpu_isar_feature(aa32_mve, cpu)) {
	219	env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT,
	220	FPCR_LTPSIZE_LENGTH);
5bcf8ed9 PM	221	}
5bcf8ed9 PM	222
c485ce2c PM	223	if (arm_feature(env, ARM_FEATURE_NEON) \|\|
c485ce2c PM	224	cpu_isar_feature(aa32_mve, cpu)) {
d31e2ce6 PM	225	/*
	226	* The bit we set within fpscr_q is arbitrary; the register as a
	227	* whole being zero/non-zero is what counts.
	228	* TODO: M-profile MVE also has a QC bit.
	229	*/
	230	env->vfp.qc[0] = val & FPCR_QC;
	231	env->vfp.qc[1] = 0;
	232	env->vfp.qc[2] = 0;
	233	env->vfp.qc[3] = 0;
	234	}
85795187	235
37356079 RH	236	/*
	237	* We don't implement trapped exception handling, so the
	238	* trap enable bits, IDE\|IXE\|UFE\|OFE\|DZE\|IOE are all RAZ/WI (not RES0!)
	239	*
d31e2ce6 PM	240	* The exception flags IOC\|DZC\|OFC\|UFC\|IXC\|IDC are stored in
	241	* fp_status; QC, Len and Stride are stored separately earlier.
	242	* Clear out all of those and the RES0 bits: only NZCV, AHP, DN,
	243	* FZ, RMode and FZ16 are kept in vfp.xregs[FPSCR].
37356079 RH	244	*/
37356079 RH	245	env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000;
37356079 RH	246	}
	247
	248	void vfp_set_fpscr(CPUARMState *env, uint32_t val)
	249	{
	250	HELPER(vfp_set_fpscr)(env, val);
	251	}
	252
4a15527c PMD	253	#ifdef CONFIG_TCG
4a15527c PMD	254
37356079 RH	255	#define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
	256
	257	#define VFP_BINOP(name) \
120a0eb3 PM	258	dh_ctype_f16 VFP_HELPER(name, h)(dh_ctype_f16 a, dh_ctype_f16 b, void *fpstp) \
	259	{ \
	260	float_status *fpst = fpstp; \
	261	return float16_ ## name(a, b, fpst); \
	262	} \
37356079 RH	263	float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \
	264	{ \
	265	float_status *fpst = fpstp; \
	266	return float32_ ## name(a, b, fpst); \
	267	} \
	268	float64 VFP_HELPER(name, d)(float64 a, float64 b, void *fpstp) \
	269	{ \
	270	float_status *fpst = fpstp; \
	271	return float64_ ## name(a, b, fpst); \
	272	}
	273	VFP_BINOP(add)
	274	VFP_BINOP(sub)
	275	VFP_BINOP(mul)
	276	VFP_BINOP(div)
	277	VFP_BINOP(min)
	278	VFP_BINOP(max)
	279	VFP_BINOP(minnum)
	280	VFP_BINOP(maxnum)
	281	#undef VFP_BINOP
	282
e7cb0ded PM	283	dh_ctype_f16 VFP_HELPER(neg, h)(dh_ctype_f16 a)
	284	{
	285	return float16_chs(a);
	286	}
	287
37356079 RH	288	float32 VFP_HELPER(neg, s)(float32 a)
	289	{
	290	return float32_chs(a);
	291	}
	292
	293	float64 VFP_HELPER(neg, d)(float64 a)
	294	{
	295	return float64_chs(a);
	296	}
	297
ce2d65a5 PM	298	dh_ctype_f16 VFP_HELPER(abs, h)(dh_ctype_f16 a)
	299	{
	300	return float16_abs(a);
	301	}
	302
37356079 RH	303	float32 VFP_HELPER(abs, s)(float32 a)
	304	{
	305	return float32_abs(a);
	306	}
	307
	308	float64 VFP_HELPER(abs, d)(float64 a)
	309	{
	310	return float64_abs(a);
	311	}
	312
ce2d65a5 PM	313	dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, CPUARMState *env)
	314	{
	315	return float16_sqrt(a, &env->vfp.fp_status_f16);
	316	}
	317
37356079 RH	318	float32 VFP_HELPER(sqrt, s)(float32 a, CPUARMState *env)
	319	{
	320	return float32_sqrt(a, &env->vfp.fp_status);
	321	}
	322
	323	float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env)
	324	{
	325	return float64_sqrt(a, &env->vfp.fp_status);
	326	}
	327
71bfd65c	328	static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
37356079 RH	329	{
	330	uint32_t flags;
	331	switch (cmp) {
	332	case float_relation_equal:
	333	flags = 0x6;
	334	break;
	335	case float_relation_less:
	336	flags = 0x8;
	337	break;
	338	case float_relation_greater:
	339	flags = 0x2;
	340	break;
	341	case float_relation_unordered:
	342	flags = 0x3;
	343	break;
	344	default:
	345	g_assert_not_reached();
	346	}
	347	env->vfp.xregs[ARM_VFP_FPSCR] =
	348	deposit32(env->vfp.xregs[ARM_VFP_FPSCR], 28, 4, flags);
	349	}
	350
	351	/* XXX: check quiet/signaling case */
1b88b054 PM	352	#define DO_VFP_cmp(P, FLOATTYPE, ARGTYPE, FPST) \
1b88b054 PM	353	void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
37356079 RH	354	{ \
37356079 RH	355	softfloat_to_vfp_compare(env, \
1b88b054	356	FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \
37356079	357	} \
1b88b054	358	void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
37356079 RH	359	{ \
37356079 RH	360	softfloat_to_vfp_compare(env, \
1b88b054	361	FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \
37356079	362	}
1b88b054 PM	363	DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16)
	364	DO_VFP_cmp(s, float32, float32, fp_status)
	365	DO_VFP_cmp(d, float64, float64, fp_status)
37356079 RH	366	#undef DO_VFP_cmp
	367
	368	/* Integer to float and float to integer conversions */
	369
	370	#define CONV_ITOF(name, ftype, fsz, sign) \
	371	ftype HELPER(name)(uint32_t x, void *fpstp) \
	372	{ \
	373	float_status *fpst = fpstp; \
	374	return sign##int32_to_##float##fsz((sign##int32_t)x, fpst); \
	375	}
	376
	377	#define CONV_FTOI(name, ftype, fsz, sign, round) \
	378	sign##int32_t HELPER(name)(ftype x, void *fpstp) \
	379	{ \
	380	float_status *fpst = fpstp; \
	381	if (float##fsz##_is_any_nan(x)) { \
	382	float_raise(float_flag_invalid, fpst); \
	383	return 0; \
	384	} \
	385	return float##fsz##_to_##sign##int32##round(x, fpst); \
	386	}
	387
	388	#define FLOAT_CONVS(name, p, ftype, fsz, sign) \
	389	CONV_ITOF(vfp_##name##to##p, ftype, fsz, sign) \
	390	CONV_FTOI(vfp_to##name##p, ftype, fsz, sign, ) \
	391	CONV_FTOI(vfp_to##name##z##p, ftype, fsz, sign, _round_to_zero)
	392
	393	FLOAT_CONVS(si, h, uint32_t, 16, )
	394	FLOAT_CONVS(si, s, float32, 32, )
	395	FLOAT_CONVS(si, d, float64, 64, )
	396	FLOAT_CONVS(ui, h, uint32_t, 16, u)
	397	FLOAT_CONVS(ui, s, float32, 32, u)
	398	FLOAT_CONVS(ui, d, float64, 64, u)
	399
	400	#undef CONV_ITOF
	401	#undef CONV_FTOI
	402	#undef FLOAT_CONVS
	403
	404	/* floating point conversion */
	405	float64 VFP_HELPER(fcvtd, s)(float32 x, CPUARMState *env)
	406	{
	407	return float32_to_float64(x, &env->vfp.fp_status);
	408	}
	409
	410	float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env)
	411	{
	412	return float64_to_float32(x, &env->vfp.fp_status);
	413	}
	414
3a98ac40 RH	415	uint32_t HELPER(bfcvt)(float32 x, void *status)
	416	{
	417	return float32_to_bfloat16(x, status);
	418	}
	419
d29b17ca RH	420	uint32_t HELPER(bfcvt_pair)(uint64_t pair, void *status)
	421	{
	422	bfloat16 lo = float32_to_bfloat16(extract64(pair, 0, 32), status);
	423	bfloat16 hi = float32_to_bfloat16(extract64(pair, 32, 32), status);
	424	return deposit32(lo, 16, 16, hi);
	425	}
	426
61db12d9 PM	427	/*
	428	* VFP3 fixed point conversion. The AArch32 versions of fix-to-float
	429	* must always round-to-nearest; the AArch64 ones honour the FPSCR
	430	* rounding mode. (For AArch32 Neon the standard-FPSCR is set to
	431	* round-to-nearest so either helper will work.) AArch32 float-to-fix
	432	* must round-to-zero.
	433	*/
5366f6ad PM	434	#define VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \
5366f6ad PM	435	ftype HELPER(vfp_##name##to##p)(uint##isz##_t x, uint32_t shift, \
37356079 RH	436	void *fpstp) \
	437	{ return itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); }
	438
61db12d9 PM	439	#define VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype) \
	440	ftype HELPER(vfp_##name##to##p##_round_to_nearest)(uint##isz##_t x, \
	441	uint32_t shift, \
	442	void *fpstp) \
	443	{ \
	444	ftype ret; \
	445	float_status *fpst = fpstp; \
	446	FloatRoundMode oldmode = fpst->float_rounding_mode; \
	447	fpst->float_rounding_mode = float_round_nearest_even; \
	448	ret = itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); \
	449	fpst->float_rounding_mode = oldmode; \
	450	return ret; \
	451	}
	452
5366f6ad PM	453	#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, ROUND, suff) \
5366f6ad PM	454	uint##isz##_t HELPER(vfp_to##name##p##suff)(ftype x, uint32_t shift, \
37356079 RH	455	void *fpst) \
	456	{ \
	457	if (unlikely(float##fsz##_is_any_nan(x))) { \
	458	float_raise(float_flag_invalid, fpst); \
	459	return 0; \
	460	} \
	461	return float##fsz##_to_##itype##_scalbn(x, ROUND, shift, fpst); \
	462	}
	463
5366f6ad PM	464	#define VFP_CONV_FIX(name, p, fsz, ftype, isz, itype) \
5366f6ad PM	465	VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \
61db12d9	466	VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype) \
5366f6ad	467	VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \
37356079	468	float_round_to_zero, _round_to_zero) \
5366f6ad	469	VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \
37356079 RH	470	get_float_rounding_mode(fpst), )
37356079 RH	471
5366f6ad PM	472	#define VFP_CONV_FIX_A64(name, p, fsz, ftype, isz, itype) \
	473	VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \
	474	VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \
37356079 RH	475	get_float_rounding_mode(fpst), )
37356079 RH	476
5366f6ad PM	477	VFP_CONV_FIX(sh, d, 64, float64, 64, int16)
	478	VFP_CONV_FIX(sl, d, 64, float64, 64, int32)
	479	VFP_CONV_FIX_A64(sq, d, 64, float64, 64, int64)
	480	VFP_CONV_FIX(uh, d, 64, float64, 64, uint16)
	481	VFP_CONV_FIX(ul, d, 64, float64, 64, uint32)
	482	VFP_CONV_FIX_A64(uq, d, 64, float64, 64, uint64)
	483	VFP_CONV_FIX(sh, s, 32, float32, 32, int16)
	484	VFP_CONV_FIX(sl, s, 32, float32, 32, int32)
	485	VFP_CONV_FIX_A64(sq, s, 32, float32, 64, int64)
	486	VFP_CONV_FIX(uh, s, 32, float32, 32, uint16)
	487	VFP_CONV_FIX(ul, s, 32, float32, 32, uint32)
	488	VFP_CONV_FIX_A64(uq, s, 32, float32, 64, uint64)
414ba270 PM	489	VFP_CONV_FIX(sh, h, 16, dh_ctype_f16, 32, int16)
	490	VFP_CONV_FIX(sl, h, 16, dh_ctype_f16, 32, int32)
	491	VFP_CONV_FIX_A64(sq, h, 16, dh_ctype_f16, 64, int64)
	492	VFP_CONV_FIX(uh, h, 16, dh_ctype_f16, 32, uint16)
	493	VFP_CONV_FIX(ul, h, 16, dh_ctype_f16, 32, uint32)
	494	VFP_CONV_FIX_A64(uq, h, 16, dh_ctype_f16, 64, uint64)
37356079 RH	495
	496	#undef VFP_CONV_FIX
	497	#undef VFP_CONV_FIX_FLOAT
	498	#undef VFP_CONV_FLOAT_FIX_ROUND
	499	#undef VFP_CONV_FIX_A64
	500
37356079 RH	501	/* Set the current fp rounding mode and return the old one.
	502	* The argument is a softfloat float_round_ value.
	503	*/
	504	uint32_t HELPER(set_rmode)(uint32_t rmode, void *fpstp)
	505	{
	506	float_status *fp_status = fpstp;
	507
	508	uint32_t prev_rmode = get_float_rounding_mode(fp_status);
	509	set_float_rounding_mode(rmode, fp_status);
	510
	511	return prev_rmode;
	512	}
	513
37356079 RH	514	/* Half precision conversions. */
	515	float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode)
	516	{
	517	/* Squash FZ16 to 0 for the duration of conversion. In this case,
	518	* it would affect flushing input denormals.
	519	*/
	520	float_status *fpst = fpstp;
c120391c	521	bool save = get_flush_inputs_to_zero(fpst);
37356079 RH	522	set_flush_inputs_to_zero(false, fpst);
	523	float32 r = float16_to_float32(a, !ahp_mode, fpst);
	524	set_flush_inputs_to_zero(save, fpst);
	525	return r;
	526	}
	527
	528	uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode)
	529	{
	530	/* Squash FZ16 to 0 for the duration of conversion. In this case,
	531	* it would affect flushing output denormals.
	532	*/
	533	float_status *fpst = fpstp;
c120391c	534	bool save = get_flush_to_zero(fpst);
37356079 RH	535	set_flush_to_zero(false, fpst);
	536	float16 r = float32_to_float16(a, !ahp_mode, fpst);
	537	set_flush_to_zero(save, fpst);
	538	return r;
	539	}
	540
	541	float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t ahp_mode)
	542	{
	543	/* Squash FZ16 to 0 for the duration of conversion. In this case,
	544	* it would affect flushing input denormals.
	545	*/
	546	float_status *fpst = fpstp;
c120391c	547	bool save = get_flush_inputs_to_zero(fpst);
37356079 RH	548	set_flush_inputs_to_zero(false, fpst);
	549	float64 r = float16_to_float64(a, !ahp_mode, fpst);
	550	set_flush_inputs_to_zero(save, fpst);
	551	return r;
	552	}
	553
	554	uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
	555	{
	556	/* Squash FZ16 to 0 for the duration of conversion. In this case,
	557	* it would affect flushing output denormals.
	558	*/
	559	float_status *fpst = fpstp;
c120391c	560	bool save = get_flush_to_zero(fpst);
37356079 RH	561	set_flush_to_zero(false, fpst);
	562	float16 r = float64_to_float16(a, !ahp_mode, fpst);
	563	set_flush_to_zero(save, fpst);
	564	return r;
	565	}
	566
37356079 RH	567	/* NEON helpers. */
	568
	569	/* Constants 256 and 512 are used in some helpers; we avoid relying on
	570	* int->float conversions at run-time. */
	571	#define float64_256 make_float64(0x4070000000000000LL)
	572	#define float64_512 make_float64(0x4080000000000000LL)
	573	#define float16_maxnorm make_float16(0x7bff)
	574	#define float32_maxnorm make_float32(0x7f7fffff)
	575	#define float64_maxnorm make_float64(0x7fefffffffffffffLL)
	576
	577	/* Reciprocal functions
	578	*
	579	* The algorithm that must be used to calculate the estimate
	580	* is specified by the ARM ARM, see FPRecipEstimate()/RecipEstimate
	581	*/
	582
	583	/* See RecipEstimate()
	584	*
	585	* input is a 9 bit fixed point number
	586	* input range 256 .. 511 for a number from 0.5 <= x < 1.0.
	587	* result range 256 .. 511 for a number from 1.0 to 511/256.
	588	*/
	589
	590	static int recip_estimate(int input)
	591	{
	592	int a, b, r;
	593	assert(256 <= input && input < 512);
	594	a = (input * 2) + 1;
	595	b = (1 << 19) / a;
	596	r = (b + 1) >> 1;
	597	assert(256 <= r && r < 512);
	598	return r;
	599	}
	600
	601	/*
	602	* Common wrapper to call recip_estimate
	603	*
	604	* The parameters are exponent and 64 bit fraction (without implicit
	605	* bit) where the binary point is nominally at bit 52. Returns a
	606	* float64 which can then be rounded to the appropriate size by the
	607	* callee.
	608	*/
	609
	610	static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
	611	{
	612	uint32_t scaled, estimate;
	613	uint64_t result_frac;
	614	int result_exp;
	615
	616	/* Handle sub-normals */
	617	if (*exp == 0) {
	618	if (extract64(frac, 51, 1) == 0) {
	619	*exp = -1;
	620	frac <<= 2;
	621	} else {
	622	frac <<= 1;
	623	}
	624	}
	625
	626	/* scaled = UInt('1':fraction<51:44>) */
	627	scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
	628	estimate = recip_estimate(scaled);
	629
	630	result_exp = exp_off - *exp;
631	result_frac = deposit64(0, 44, 8, estimate);
632	if (result_exp == 0) {
633	result_frac = deposit64(result_frac >> 1, 51, 1, 1);
634	} else if (result_exp == -1) {
635	result_frac = deposit64(result_frac >> 2, 50, 2, 1);
636	result_exp = 0;
637	}
638
639	*exp = result_exp;
640
641	return result_frac;
642	}
643
644	static bool round_to_inf(float_status *fpst, bool sign_bit)
645	{
646	switch (fpst->float_rounding_mode) {
647	case float_round_nearest_even: /* Round to Nearest */
648	return true;
649	case float_round_up: /* Round to +Inf */
650	return !sign_bit;
651	case float_round_down: /* Round to -Inf */
652	return sign_bit;
653	case float_round_to_zero: /* Round to Zero */
654	return false;
3dede407 RH	655	default:
3dede407 RH	656	g_assert_not_reached();
37356079	657	}
37356079 RH	658	}
	659
	660	uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
	661	{
	662	float_status *fpst = fpstp;
	663	float16 f16 = float16_squash_input_denormal(input, fpst);
	664	uint32_t f16_val = float16_val(f16);
	665	uint32_t f16_sign = float16_is_neg(f16);
	666	int f16_exp = extract32(f16_val, 10, 5);
	667	uint32_t f16_frac = extract32(f16_val, 0, 10);
	668	uint64_t f64_frac;
	669
	670	if (float16_is_any_nan(f16)) {
	671	float16 nan = f16;
	672	if (float16_is_signaling_nan(f16, fpst)) {
	673	float_raise(float_flag_invalid, fpst);
103e7579 JK	674	if (!fpst->default_nan_mode) {
	675	nan = float16_silence_nan(f16, fpst);
	676	}
37356079 RH	677	}
	678	if (fpst->default_nan_mode) {
	679	nan = float16_default_nan(fpst);
	680	}
	681	return nan;
	682	} else if (float16_is_infinity(f16)) {
	683	return float16_set_sign(float16_zero, float16_is_neg(f16));
	684	} else if (float16_is_zero(f16)) {
	685	float_raise(float_flag_divbyzero, fpst);
	686	return float16_set_sign(float16_infinity, float16_is_neg(f16));
	687	} else if (float16_abs(f16) < (1 << 8)) {
	688	/* Abs(value) < 2.0^-16 */
	689	float_raise(float_flag_overflow \| float_flag_inexact, fpst);
	690	if (round_to_inf(fpst, f16_sign)) {
	691	return float16_set_sign(float16_infinity, f16_sign);
	692	} else {
	693	return float16_set_sign(float16_maxnorm, f16_sign);
	694	}
	695	} else if (f16_exp >= 29 && fpst->flush_to_zero) {
	696	float_raise(float_flag_underflow, fpst);
	697	return float16_set_sign(float16_zero, float16_is_neg(f16));
	698	}
	699
	700	f64_frac = call_recip_estimate(&f16_exp, 29,
	701	((uint64_t) f16_frac) << (52 - 10));
	702
	703	/* result = sign : result_exp<4:0> : fraction<51:42> */
	704	f16_val = deposit32(0, 15, 1, f16_sign);
	705	f16_val = deposit32(f16_val, 10, 5, f16_exp);
	706	f16_val = deposit32(f16_val, 0, 10, extract64(f64_frac, 52 - 10, 10));
	707	return make_float16(f16_val);
	708	}
	709
	710	float32 HELPER(recpe_f32)(float32 input, void *fpstp)
	711	{
	712	float_status *fpst = fpstp;
	713	float32 f32 = float32_squash_input_denormal(input, fpst);
	714	uint32_t f32_val = float32_val(f32);
	715	bool f32_sign = float32_is_neg(f32);
	716	int f32_exp = extract32(f32_val, 23, 8);
	717	uint32_t f32_frac = extract32(f32_val, 0, 23);
	718	uint64_t f64_frac;
	719
	720	if (float32_is_any_nan(f32)) {
	721	float32 nan = f32;
	722	if (float32_is_signaling_nan(f32, fpst)) {
	723	float_raise(float_flag_invalid, fpst);
103e7579 JK	724	if (!fpst->default_nan_mode) {
	725	nan = float32_silence_nan(f32, fpst);
	726	}
37356079 RH	727	}
	728	if (fpst->default_nan_mode) {
	729	nan = float32_default_nan(fpst);
	730	}
	731	return nan;
	732	} else if (float32_is_infinity(f32)) {
	733	return float32_set_sign(float32_zero, float32_is_neg(f32));
	734	} else if (float32_is_zero(f32)) {
	735	float_raise(float_flag_divbyzero, fpst);
	736	return float32_set_sign(float32_infinity, float32_is_neg(f32));
	737	} else if (float32_abs(f32) < (1ULL << 21)) {
	738	/* Abs(value) < 2.0^-128 */
	739	float_raise(float_flag_overflow \| float_flag_inexact, fpst);
	740	if (round_to_inf(fpst, f32_sign)) {
	741	return float32_set_sign(float32_infinity, f32_sign);
	742	} else {
	743	return float32_set_sign(float32_maxnorm, f32_sign);
	744	}
	745	} else if (f32_exp >= 253 && fpst->flush_to_zero) {
	746	float_raise(float_flag_underflow, fpst);
	747	return float32_set_sign(float32_zero, float32_is_neg(f32));
	748	}
	749
	750	f64_frac = call_recip_estimate(&f32_exp, 253,
	751	((uint64_t) f32_frac) << (52 - 23));
	752
	753	/* result = sign : result_exp<7:0> : fraction<51:29> */
	754	f32_val = deposit32(0, 31, 1, f32_sign);
	755	f32_val = deposit32(f32_val, 23, 8, f32_exp);
	756	f32_val = deposit32(f32_val, 0, 23, extract64(f64_frac, 52 - 23, 23));
	757	return make_float32(f32_val);
	758	}
	759
	760	float64 HELPER(recpe_f64)(float64 input, void *fpstp)
	761	{
	762	float_status *fpst = fpstp;
	763	float64 f64 = float64_squash_input_denormal(input, fpst);
	764	uint64_t f64_val = float64_val(f64);
	765	bool f64_sign = float64_is_neg(f64);
	766	int f64_exp = extract64(f64_val, 52, 11);
	767	uint64_t f64_frac = extract64(f64_val, 0, 52);
	768
	769	/* Deal with any special cases */
	770	if (float64_is_any_nan(f64)) {
	771	float64 nan = f64;
	772	if (float64_is_signaling_nan(f64, fpst)) {
	773	float_raise(float_flag_invalid, fpst);
103e7579 JK	774	if (!fpst->default_nan_mode) {
	775	nan = float64_silence_nan(f64, fpst);
	776	}
37356079 RH	777	}
	778	if (fpst->default_nan_mode) {
	779	nan = float64_default_nan(fpst);
	780	}
	781	return nan;
	782	} else if (float64_is_infinity(f64)) {
	783	return float64_set_sign(float64_zero, float64_is_neg(f64));
	784	} else if (float64_is_zero(f64)) {
	785	float_raise(float_flag_divbyzero, fpst);
	786	return float64_set_sign(float64_infinity, float64_is_neg(f64));
	787	} else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) {
	788	/* Abs(value) < 2.0^-1024 */
	789	float_raise(float_flag_overflow \| float_flag_inexact, fpst);
	790	if (round_to_inf(fpst, f64_sign)) {
	791	return float64_set_sign(float64_infinity, f64_sign);
	792	} else {
	793	return float64_set_sign(float64_maxnorm, f64_sign);
	794	}
	795	} else if (f64_exp >= 2045 && fpst->flush_to_zero) {
	796	float_raise(float_flag_underflow, fpst);
	797	return float64_set_sign(float64_zero, float64_is_neg(f64));
	798	}
	799
	800	f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac);
	801
	802	/* result = sign : result_exp<10:0> : fraction<51:0>; */
	803	f64_val = deposit64(0, 63, 1, f64_sign);
	804	f64_val = deposit64(f64_val, 52, 11, f64_exp);
	805	f64_val = deposit64(f64_val, 0, 52, f64_frac);
	806	return make_float64(f64_val);
	807	}
	808
	809	/* The algorithm that must be used to calculate the estimate
	810	* is specified by the ARM ARM.
	811	*/
	812
	813	static int do_recip_sqrt_estimate(int a)
	814	{
	815	int b, estimate;
	816
	817	assert(128 <= a && a < 512);
	818	if (a < 256) {
	819	a = a * 2 + 1;
	820	} else {
	821	a = (a >> 1) << 1;
	822	a = (a + 1) * 2;
	823	}
	824	b = 512;
	825	while (a * (b + 1) * (b + 1) < (1 << 28)) {
	826	b += 1;
	827	}
	828	estimate = (b + 1) / 2;
	829	assert(256 <= estimate && estimate < 512);
	830
	831	return estimate;
	832	}
	833
	834
	835	static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
	836	{
	837	int estimate;
	838	uint32_t scaled;
	839
	840	if (*exp == 0) {
841	while (extract64(frac, 51, 1) == 0) {
842	frac = frac << 1;
843	*exp -= 1;
844	}
845	frac = extract64(frac, 0, 51) << 1;
846	}
847
848	if (*exp & 1) {
849	/* scaled = UInt('01':fraction<51:45>) */
850	scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
851	} else {
852	/* scaled = UInt('1':fraction<51:44>) */
853	scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
854	}
855	estimate = do_recip_sqrt_estimate(scaled);
856
857	exp = (exp_off - exp) / 2;
858	return extract64(estimate, 0, 8) << 44;
859	}
860
861	uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
862	{
863	float_status *s = fpstp;
864	float16 f16 = float16_squash_input_denormal(input, s);
865	uint16_t val = float16_val(f16);
866	bool f16_sign = float16_is_neg(f16);
867	int f16_exp = extract32(val, 10, 5);
868	uint16_t f16_frac = extract32(val, 0, 10);
869	uint64_t f64_frac;
870
871	if (float16_is_any_nan(f16)) {
872	float16 nan = f16;
873	if (float16_is_signaling_nan(f16, s)) {
874	float_raise(float_flag_invalid, s);
103e7579 JK	875	if (!s->default_nan_mode) {
	876	nan = float16_silence_nan(f16, fpstp);
	877	}
37356079 RH	878	}
	879	if (s->default_nan_mode) {
	880	nan = float16_default_nan(s);
	881	}
	882	return nan;
	883	} else if (float16_is_zero(f16)) {
	884	float_raise(float_flag_divbyzero, s);
	885	return float16_set_sign(float16_infinity, f16_sign);
	886	} else if (f16_sign) {
	887	float_raise(float_flag_invalid, s);
	888	return float16_default_nan(s);
	889	} else if (float16_is_infinity(f16)) {
	890	return float16_zero;
	891	}
	892
	893	/* Scale and normalize to a double-precision value between 0.25 and 1.0,
	894	* preserving the parity of the exponent. */
	895
	896	f64_frac = ((uint64_t) f16_frac) << (52 - 10);
	897
	898	f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac);
	899
	900	/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
	901	val = deposit32(0, 15, 1, f16_sign);
	902	val = deposit32(val, 10, 5, f16_exp);
	903	val = deposit32(val, 2, 8, extract64(f64_frac, 52 - 8, 8));
	904	return make_float16(val);
	905	}
	906
	907	float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
	908	{
	909	float_status *s = fpstp;
	910	float32 f32 = float32_squash_input_denormal(input, s);
	911	uint32_t val = float32_val(f32);
	912	uint32_t f32_sign = float32_is_neg(f32);
	913	int f32_exp = extract32(val, 23, 8);
	914	uint32_t f32_frac = extract32(val, 0, 23);
	915	uint64_t f64_frac;
	916
	917	if (float32_is_any_nan(f32)) {
	918	float32 nan = f32;
	919	if (float32_is_signaling_nan(f32, s)) {
	920	float_raise(float_flag_invalid, s);
103e7579 JK	921	if (!s->default_nan_mode) {
	922	nan = float32_silence_nan(f32, fpstp);
	923	}
37356079 RH	924	}
	925	if (s->default_nan_mode) {
	926	nan = float32_default_nan(s);
	927	}
	928	return nan;
	929	} else if (float32_is_zero(f32)) {
	930	float_raise(float_flag_divbyzero, s);
	931	return float32_set_sign(float32_infinity, float32_is_neg(f32));
	932	} else if (float32_is_neg(f32)) {
	933	float_raise(float_flag_invalid, s);
	934	return float32_default_nan(s);
	935	} else if (float32_is_infinity(f32)) {
	936	return float32_zero;
	937	}
	938
	939	/* Scale and normalize to a double-precision value between 0.25 and 1.0,
	940	* preserving the parity of the exponent. */
	941
	942	f64_frac = ((uint64_t) f32_frac) << 29;
	943
	944	f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac);
	945
	946	/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */
	947	val = deposit32(0, 31, 1, f32_sign);
	948	val = deposit32(val, 23, 8, f32_exp);
	949	val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
	950	return make_float32(val);
	951	}
	952
	953	float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
	954	{
	955	float_status *s = fpstp;
	956	float64 f64 = float64_squash_input_denormal(input, s);
	957	uint64_t val = float64_val(f64);
	958	bool f64_sign = float64_is_neg(f64);
	959	int f64_exp = extract64(val, 52, 11);
	960	uint64_t f64_frac = extract64(val, 0, 52);
	961
	962	if (float64_is_any_nan(f64)) {
	963	float64 nan = f64;
	964	if (float64_is_signaling_nan(f64, s)) {
	965	float_raise(float_flag_invalid, s);
103e7579 JK	966	if (!s->default_nan_mode) {
	967	nan = float64_silence_nan(f64, fpstp);
	968	}
37356079 RH	969	}
	970	if (s->default_nan_mode) {
	971	nan = float64_default_nan(s);
	972	}
	973	return nan;
	974	} else if (float64_is_zero(f64)) {
	975	float_raise(float_flag_divbyzero, s);
	976	return float64_set_sign(float64_infinity, float64_is_neg(f64));
	977	} else if (float64_is_neg(f64)) {
	978	float_raise(float_flag_invalid, s);
	979	return float64_default_nan(s);
	980	} else if (float64_is_infinity(f64)) {
	981	return float64_zero;
	982	}
	983
	984	f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac);
	985
	986	/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
	987	val = deposit64(0, 61, 1, f64_sign);
	988	val = deposit64(val, 52, 11, f64_exp);
	989	val = deposit64(val, 44, 8, extract64(f64_frac, 52 - 8, 8));
	990	return make_float64(val);
	991	}
	992
fe6fb4be	993	uint32_t HELPER(recpe_u32)(uint32_t a)
37356079	994	{
37356079 RH	995	int input, estimate;
	996
	997	if ((a & 0x80000000) == 0) {
	998	return 0xffffffff;
	999	}
	1000
	1001	input = extract32(a, 23, 9);
	1002	estimate = recip_estimate(input);
	1003
	1004	return deposit32(0, (32 - 9), 9, estimate);
	1005	}
	1006
fe6fb4be	1007	uint32_t HELPER(rsqrte_u32)(uint32_t a)
37356079 RH	1008	{
	1009	int estimate;
	1010
	1011	if ((a & 0xc0000000) == 0) {
	1012	return 0xffffffff;
	1013	}
	1014
	1015	estimate = do_recip_sqrt_estimate(extract32(a, 23, 9));
	1016
	1017	return deposit32(0, 23, 9, estimate);
	1018	}
	1019
	1020	/* VFPv4 fused multiply-accumulate */
9886fe28 PM	1021	dh_ctype_f16 VFP_HELPER(muladd, h)(dh_ctype_f16 a, dh_ctype_f16 b,
	1022	dh_ctype_f16 c, void *fpstp)
	1023	{
	1024	float_status *fpst = fpstp;
	1025	return float16_muladd(a, b, c, 0, fpst);
	1026	}
	1027
37356079 RH	1028	float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, void *fpstp)
	1029	{
	1030	float_status *fpst = fpstp;
	1031	return float32_muladd(a, b, c, 0, fpst);
	1032	}
	1033
	1034	float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp)
	1035	{
	1036	float_status *fpst = fpstp;
	1037	return float64_muladd(a, b, c, 0, fpst);
	1038	}
	1039
	1040	/* ARMv8 round to integral */
0a6f4b4c PM	1041	dh_ctype_f16 HELPER(rinth_exact)(dh_ctype_f16 x, void *fp_status)
	1042	{
	1043	return float16_round_to_int(x, fp_status);
	1044	}
	1045
37356079 RH	1046	float32 HELPER(rints_exact)(float32 x, void *fp_status)
	1047	{
	1048	return float32_round_to_int(x, fp_status);
	1049	}
	1050
	1051	float64 HELPER(rintd_exact)(float64 x, void *fp_status)
	1052	{
	1053	return float64_round_to_int(x, fp_status);
	1054	}
	1055
0a6f4b4c PM	1056	dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, void *fp_status)
	1057	{
	1058	int old_flags = get_float_exception_flags(fp_status), new_flags;
	1059	float16 ret;
	1060
	1061	ret = float16_round_to_int(x, fp_status);
	1062
	1063	/* Suppress any inexact exceptions the conversion produced */
	1064	if (!(old_flags & float_flag_inexact)) {
	1065	new_flags = get_float_exception_flags(fp_status);
	1066	set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
	1067	}
	1068
	1069	return ret;
	1070	}
	1071
37356079 RH	1072	float32 HELPER(rints)(float32 x, void *fp_status)
	1073	{
	1074	int old_flags = get_float_exception_flags(fp_status), new_flags;
	1075	float32 ret;
	1076
	1077	ret = float32_round_to_int(x, fp_status);
	1078
	1079	/* Suppress any inexact exceptions the conversion produced */
	1080	if (!(old_flags & float_flag_inexact)) {
	1081	new_flags = get_float_exception_flags(fp_status);
	1082	set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
	1083	}
	1084
	1085	return ret;
	1086	}
	1087
	1088	float64 HELPER(rintd)(float64 x, void *fp_status)
	1089	{
	1090	int old_flags = get_float_exception_flags(fp_status), new_flags;
	1091	float64 ret;
	1092
	1093	ret = float64_round_to_int(x, fp_status);
	1094
	1095	new_flags = get_float_exception_flags(fp_status);
	1096
	1097	/* Suppress any inexact exceptions the conversion produced */
	1098	if (!(old_flags & float_flag_inexact)) {
	1099	new_flags = get_float_exception_flags(fp_status);
	1100	set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
	1101	}
	1102
	1103	return ret;
	1104	}
	1105
	1106	/* Convert ARM rounding mode to softfloat */
	1107	int arm_rmode_to_sf(int rmode)
	1108	{
	1109	switch (rmode) {
	1110	case FPROUNDING_TIEAWAY:
	1111	rmode = float_round_ties_away;
	1112	break;
	1113	case FPROUNDING_ODD:
	1114	/* FIXME: add support for TIEAWAY and ODD */
	1115	qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n",
	1116	rmode);
	1117	/* fall through for now */
	1118	case FPROUNDING_TIEEVEN:
	1119	default:
	1120	rmode = float_round_nearest_even;
	1121	break;
	1122	case FPROUNDING_POSINF:
	1123	rmode = float_round_up;
	1124	break;
	1125	case FPROUNDING_NEGINF:
	1126	rmode = float_round_down;
	1127	break;
	1128	case FPROUNDING_ZERO:
	1129	rmode = float_round_to_zero;
	1130	break;
	1131	}
	1132	return rmode;
	1133	}
6c1f6f27 RH	1134
	1135	/*
	1136	* Implement float64 to int32_t conversion without saturation;
	1137	* the result is supplied modulo 2^32.
	1138	*/
	1139	uint64_t HELPER(fjcvtzs)(float64 value, void *vstatus)
	1140	{
	1141	float_status *status = vstatus;
	1142	uint32_t exp, sign;
	1143	uint64_t frac;
	1144	uint32_t inexact = 1; /* !Z */
	1145
	1146	sign = extract64(value, 63, 1);
	1147	exp = extract64(value, 52, 11);
	1148	frac = extract64(value, 0, 52);
	1149
	1150	if (exp == 0) {
	1151	/* While not inexact for IEEE FP, -0.0 is inexact for JavaScript. */
	1152	inexact = sign;
	1153	if (frac != 0) {
	1154	if (status->flush_inputs_to_zero) {
	1155	float_raise(float_flag_input_denormal, status);
	1156	} else {
	1157	float_raise(float_flag_inexact, status);
	1158	inexact = 1;
	1159	}
	1160	}
	1161	frac = 0;
	1162	} else if (exp == 0x7ff) {
	1163	/* This operation raises Invalid for both NaN and overflow (Inf). */
	1164	float_raise(float_flag_invalid, status);
	1165	frac = 0;
	1166	} else {
	1167	int true_exp = exp - 1023;
	1168	int shift = true_exp - 52;
	1169
	1170	/* Restore implicit bit. */
	1171	frac \|= 1ull << 52;
	1172
	1173	/* Shift the fraction into place. */
	1174	if (shift >= 0) {
	1175	/* The number is so large we must shift the fraction left. */
	1176	if (shift >= 64) {
	1177	/* The fraction is shifted out entirely. */
	1178	frac = 0;
	1179	} else {
	1180	frac <<= shift;
	1181	}
	1182	} else if (shift > -64) {
	1183	/* Normal case -- shift right and notice if bits shift out. */
	1184	inexact = (frac << (64 + shift)) != 0;
	1185	frac >>= -shift;
	1186	} else {
	1187	/* The fraction is shifted out entirely. */
	1188	frac = 0;
	1189	}
	1190
	1191	/* Notice overflow or inexact exceptions. */
	1192	if (true_exp > 31 \|\| frac > (sign ? 0x80000000ull : 0x7fffffff)) {
	1193	/* Overflow, for which this operation raises invalid. */
	1194	float_raise(float_flag_invalid, status);
	1195	inexact = 1;
	1196	} else if (inexact) {
	1197	float_raise(float_flag_inexact, status);
1198	}
1199
1200	/* Honor the sign. */
1201	if (sign) {
1202	frac = -frac;
1203	}
1204	}
1205
1206	/* Pack the result and the env->ZF representation of Z together. */
1207	return deposit64(frac, 32, 32, inexact);
1208	}
1209
1210	uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env)
1211	{
1212	uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status);
1213	uint32_t result = pair;
1214	uint32_t z = (pair >> 32) == 0;
1215
1216	/* Store Z, clear NCV, in FPSCR.NZCV. */
1217	env->vfp.xregs[ARM_VFP_FPSCR]
1218	= (env->vfp.xregs[ARM_VFP_FPSCR] & ~CPSR_NZCV) \| (z * CPSR_Z);
1219
1220	return result;
1221	}
6bea2563 RH	1222
	1223	/* Round a float32 to an integer that fits in int32_t or int64_t. */
	1224	static float32 frint_s(float32 f, float_status *fpst, int intsize)
	1225	{
	1226	int old_flags = get_float_exception_flags(fpst);
	1227	uint32_t exp = extract32(f, 23, 8);
	1228
	1229	if (unlikely(exp == 0xff)) {
	1230	/* NaN or Inf. */
	1231	goto overflow;
	1232	}
	1233
	1234	/* Round and re-extract the exponent. */
	1235	f = float32_round_to_int(f, fpst);
	1236	exp = extract32(f, 23, 8);
	1237
	1238	/* Validate the range of the result. */
	1239	if (exp < 126 + intsize) {
	1240	/* abs(F) <= INT{N}_MAX */
	1241	return f;
	1242	}
	1243	if (exp == 126 + intsize) {
	1244	uint32_t sign = extract32(f, 31, 1);
	1245	uint32_t frac = extract32(f, 0, 23);
	1246	if (sign && frac == 0) {
	1247	/* F == INT{N}_MIN */
	1248	return f;
	1249	}
	1250	}
	1251
	1252	overflow:
	1253	/*
	1254	* Raise Invalid and return INT{N}_MIN as a float. Revert any
	1255	* inexact exception float32_round_to_int may have raised.
	1256	*/
	1257	set_float_exception_flags(old_flags \| float_flag_invalid, fpst);
	1258	return (0x100u + 126u + intsize) << 23;
	1259	}
	1260
	1261	float32 HELPER(frint32_s)(float32 f, void *fpst)
	1262	{
	1263	return frint_s(f, fpst, 32);
	1264	}
	1265
	1266	float32 HELPER(frint64_s)(float32 f, void *fpst)
	1267	{
	1268	return frint_s(f, fpst, 64);
	1269	}
	1270
	1271	/* Round a float64 to an integer that fits in int32_t or int64_t. */
	1272	static float64 frint_d(float64 f, float_status *fpst, int intsize)
	1273	{
	1274	int old_flags = get_float_exception_flags(fpst);
	1275	uint32_t exp = extract64(f, 52, 11);
	1276
	1277	if (unlikely(exp == 0x7ff)) {
	1278	/* NaN or Inf. */
	1279	goto overflow;
	1280	}
	1281
	1282	/* Round and re-extract the exponent. */
	1283	f = float64_round_to_int(f, fpst);
	1284	exp = extract64(f, 52, 11);
	1285
1286	/* Validate the range of the result. */
1287	if (exp < 1022 + intsize) {
1288	/* abs(F) <= INT{N}_MAX */
1289	return f;
1290	}
1291	if (exp == 1022 + intsize) {
1292	uint64_t sign = extract64(f, 63, 1);
1293	uint64_t frac = extract64(f, 0, 52);
1294	if (sign && frac == 0) {
1295	/* F == INT{N}_MIN */
1296	return f;
1297	}
1298	}
1299
1300	overflow:
1301	/*
1302	* Raise Invalid and return INT{N}_MIN as a float. Revert any
1303	* inexact exception float64_round_to_int may have raised.
1304	*/
1305	set_float_exception_flags(old_flags \| float_flag_invalid, fpst);
1306	return (uint64_t)(0x800 + 1022 + intsize) << 52;
1307	}
1308
1309	float64 HELPER(frint32_d)(float64 f, void *fpst)
1310	{
1311	return frint_d(f, fpst, 32);
1312	}
1313
1314	float64 HELPER(frint64_d)(float64 f, void *fpst)
1315	{
1316	return frint_d(f, fpst, 64);
1317	}
4a15527c	1318
9ca1d776 MZ	1319	void HELPER(check_hcr_el2_trap)(CPUARMState *env, uint32_t rt, uint32_t reg)
	1320	{
	1321	uint32_t syndrome;
	1322
	1323	switch (reg) {
	1324	case ARM_VFP_MVFR0:
	1325	case ARM_VFP_MVFR1:
	1326	case ARM_VFP_MVFR2:
	1327	if (!(arm_hcr_el2_eff(env) & HCR_TID3)) {
	1328	return;
	1329	}
	1330	break;
	1331	case ARM_VFP_FPSID:
	1332	if (!(arm_hcr_el2_eff(env) & HCR_TID0)) {
	1333	return;
	1334	}
	1335	break;
	1336	default:
	1337	g_assert_not_reached();
	1338	}
	1339
	1340	syndrome = ((EC_FPIDTRAP << ARM_EL_EC_SHIFT)
	1341	\| ARM_EL_IL
	1342	\| (1 << 24) \| (0xe << 20) \| (7 << 14)
	1343	\| (reg << 10) \| (rt << 5) \| 1);
	1344
	1345	raise_exception(env, EXCP_HYP_TRAP, syndrome, 2);
	1346	}
	1347
4a15527c	1348	#endif