]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/arch/arm/vfp/vfp.h | |
3 | * | |
4 | * Copyright (C) 2004 ARM Limited. | |
5 | * Written by Deep Blue Solutions Limited. | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | */ | |
11 | ||
12 | static inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift) | |
13 | { | |
14 | if (shift) { | |
15 | if (shift < 32) | |
16 | val = val >> shift | ((val << (32 - shift)) != 0); | |
17 | else | |
18 | val = val != 0; | |
19 | } | |
20 | return val; | |
21 | } | |
22 | ||
23 | static inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift) | |
24 | { | |
25 | if (shift) { | |
26 | if (shift < 64) | |
27 | val = val >> shift | ((val << (64 - shift)) != 0); | |
28 | else | |
29 | val = val != 0; | |
30 | } | |
31 | return val; | |
32 | } | |
33 | ||
34 | static inline u32 vfp_hi64to32jamming(u64 val) | |
35 | { | |
36 | u32 v; | |
37 | ||
38 | asm( | |
39 | "cmp %Q1, #1 @ vfp_hi64to32jamming\n\t" | |
40 | "movcc %0, %R1\n\t" | |
41 | "orrcs %0, %R1, #1" | |
42 | : "=r" (v) : "r" (val) : "cc"); | |
43 | ||
44 | return v; | |
45 | } | |
46 | ||
47 | static inline void add128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml) | |
48 | { | |
49 | asm( "adds %Q0, %Q2, %Q4\n\t" | |
50 | "adcs %R0, %R2, %R4\n\t" | |
51 | "adcs %Q1, %Q3, %Q5\n\t" | |
52 | "adc %R1, %R3, %R5" | |
53 | : "=r" (nl), "=r" (nh) | |
54 | : "0" (nl), "1" (nh), "r" (ml), "r" (mh) | |
55 | : "cc"); | |
56 | *resh = nh; | |
57 | *resl = nl; | |
58 | } | |
59 | ||
60 | static inline void sub128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml) | |
61 | { | |
62 | asm( "subs %Q0, %Q2, %Q4\n\t" | |
63 | "sbcs %R0, %R2, %R4\n\t" | |
64 | "sbcs %Q1, %Q3, %Q5\n\t" | |
65 | "sbc %R1, %R3, %R5\n\t" | |
66 | : "=r" (nl), "=r" (nh) | |
67 | : "0" (nl), "1" (nh), "r" (ml), "r" (mh) | |
68 | : "cc"); | |
69 | *resh = nh; | |
70 | *resl = nl; | |
71 | } | |
72 | ||
73 | static inline void mul64to128(u64 *resh, u64 *resl, u64 n, u64 m) | |
74 | { | |
75 | u32 nh, nl, mh, ml; | |
76 | u64 rh, rma, rmb, rl; | |
77 | ||
78 | nl = n; | |
79 | ml = m; | |
80 | rl = (u64)nl * ml; | |
81 | ||
82 | nh = n >> 32; | |
83 | rma = (u64)nh * ml; | |
84 | ||
85 | mh = m >> 32; | |
86 | rmb = (u64)nl * mh; | |
87 | rma += rmb; | |
88 | ||
89 | rh = (u64)nh * mh; | |
90 | rh += ((u64)(rma < rmb) << 32) + (rma >> 32); | |
91 | ||
92 | rma <<= 32; | |
93 | rl += rma; | |
94 | rh += (rl < rma); | |
95 | ||
96 | *resl = rl; | |
97 | *resh = rh; | |
98 | } | |
99 | ||
100 | static inline void shift64left(u64 *resh, u64 *resl, u64 n) | |
101 | { | |
102 | *resh = n >> 63; | |
103 | *resl = n << 1; | |
104 | } | |
105 | ||
106 | static inline u64 vfp_hi64multiply64(u64 n, u64 m) | |
107 | { | |
108 | u64 rh, rl; | |
109 | mul64to128(&rh, &rl, n, m); | |
110 | return rh | (rl != 0); | |
111 | } | |
112 | ||
113 | static inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m) | |
114 | { | |
115 | u64 mh, ml, remh, reml, termh, terml, z; | |
116 | ||
117 | if (nh >= m) | |
118 | return ~0ULL; | |
119 | mh = m >> 32; | |
438a7616 RK |
120 | if (mh << 32 <= nh) { |
121 | z = 0xffffffff00000000ULL; | |
122 | } else { | |
123 | z = nh; | |
124 | do_div(z, mh); | |
125 | z <<= 32; | |
126 | } | |
1da177e4 LT |
127 | mul64to128(&termh, &terml, m, z); |
128 | sub128(&remh, &reml, nh, nl, termh, terml); | |
129 | ml = m << 32; | |
130 | while ((s64)remh < 0) { | |
131 | z -= 0x100000000ULL; | |
132 | add128(&remh, &reml, remh, reml, mh, ml); | |
133 | } | |
134 | remh = (remh << 32) | (reml >> 32); | |
438a7616 RK |
135 | if (mh << 32 <= remh) { |
136 | z |= 0xffffffff; | |
137 | } else { | |
138 | do_div(remh, mh); | |
139 | z |= remh; | |
140 | } | |
1da177e4 LT |
141 | return z; |
142 | } | |
143 | ||
144 | /* | |
145 | * Operations on unpacked elements | |
146 | */ | |
147 | #define vfp_sign_negate(sign) (sign ^ 0x8000) | |
148 | ||
149 | /* | |
150 | * Single-precision | |
151 | */ | |
152 | struct vfp_single { | |
153 | s16 exponent; | |
154 | u16 sign; | |
155 | u32 significand; | |
156 | }; | |
157 | ||
158 | extern s32 vfp_get_float(unsigned int reg); | |
0355b3e0 | 159 | extern void vfp_put_float(s32 val, unsigned int reg); |
1da177e4 LT |
160 | |
161 | /* | |
162 | * VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa | |
163 | * VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent | |
164 | * VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand | |
165 | * which are not propagated to the float upon packing. | |
166 | */ | |
167 | #define VFP_SINGLE_MANTISSA_BITS (23) | |
168 | #define VFP_SINGLE_EXPONENT_BITS (8) | |
169 | #define VFP_SINGLE_LOW_BITS (32 - VFP_SINGLE_MANTISSA_BITS - 2) | |
170 | #define VFP_SINGLE_LOW_BITS_MASK ((1 << VFP_SINGLE_LOW_BITS) - 1) | |
171 | ||
172 | /* | |
173 | * The bit in an unpacked float which indicates that it is a quiet NaN | |
174 | */ | |
175 | #define VFP_SINGLE_SIGNIFICAND_QNAN (1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS)) | |
176 | ||
177 | /* | |
178 | * Operations on packed single-precision numbers | |
179 | */ | |
180 | #define vfp_single_packed_sign(v) ((v) & 0x80000000) | |
181 | #define vfp_single_packed_negate(v) ((v) ^ 0x80000000) | |
182 | #define vfp_single_packed_abs(v) ((v) & ~0x80000000) | |
183 | #define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1)) | |
184 | #define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1)) | |
185 | ||
186 | /* | |
187 | * Unpack a single-precision float. Note that this returns the magnitude | |
188 | * of the single-precision float mantissa with the 1. if necessary, | |
189 | * aligned to bit 30. | |
190 | */ | |
191 | static inline void vfp_single_unpack(struct vfp_single *s, s32 val) | |
192 | { | |
193 | u32 significand; | |
194 | ||
195 | s->sign = vfp_single_packed_sign(val) >> 16, | |
196 | s->exponent = vfp_single_packed_exponent(val); | |
197 | ||
198 | significand = (u32) val; | |
199 | significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2; | |
200 | if (s->exponent && s->exponent != 255) | |
201 | significand |= 0x40000000; | |
202 | s->significand = significand; | |
203 | } | |
204 | ||
205 | /* | |
206 | * Re-pack a single-precision float. This assumes that the float is | |
207 | * already normalised such that the MSB is bit 30, _not_ bit 31. | |
208 | */ | |
209 | static inline s32 vfp_single_pack(struct vfp_single *s) | |
210 | { | |
211 | u32 val; | |
212 | val = (s->sign << 16) + | |
213 | (s->exponent << VFP_SINGLE_MANTISSA_BITS) + | |
214 | (s->significand >> VFP_SINGLE_LOW_BITS); | |
215 | return (s32)val; | |
216 | } | |
217 | ||
218 | #define VFP_NUMBER (1<<0) | |
219 | #define VFP_ZERO (1<<1) | |
220 | #define VFP_DENORMAL (1<<2) | |
221 | #define VFP_INFINITY (1<<3) | |
222 | #define VFP_NAN (1<<4) | |
223 | #define VFP_NAN_SIGNAL (1<<5) | |
224 | ||
225 | #define VFP_QNAN (VFP_NAN) | |
226 | #define VFP_SNAN (VFP_NAN|VFP_NAN_SIGNAL) | |
227 | ||
228 | static inline int vfp_single_type(struct vfp_single *s) | |
229 | { | |
230 | int type = VFP_NUMBER; | |
231 | if (s->exponent == 255) { | |
232 | if (s->significand == 0) | |
233 | type = VFP_INFINITY; | |
234 | else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN) | |
235 | type = VFP_QNAN; | |
236 | else | |
237 | type = VFP_SNAN; | |
238 | } else if (s->exponent == 0) { | |
239 | if (s->significand == 0) | |
240 | type |= VFP_ZERO; | |
241 | else | |
242 | type |= VFP_DENORMAL; | |
243 | } | |
244 | return type; | |
245 | } | |
246 | ||
247 | #ifndef DEBUG | |
248 | #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except) | |
249 | u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions); | |
250 | #else | |
251 | u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func); | |
252 | #endif | |
253 | ||
254 | /* | |
255 | * Double-precision | |
256 | */ | |
257 | struct vfp_double { | |
258 | s16 exponent; | |
259 | u16 sign; | |
260 | u64 significand; | |
261 | }; | |
262 | ||
263 | /* | |
264 | * VFP_REG_ZERO is a special register number for vfp_get_double | |
265 | * which returns (double)0.0. This is useful for the compare with | |
266 | * zero instructions. | |
267 | */ | |
268 | #define VFP_REG_ZERO 16 | |
269 | extern u64 vfp_get_double(unsigned int reg); | |
0355b3e0 | 270 | extern void vfp_put_double(u64 val, unsigned int reg); |
1da177e4 LT |
271 | |
272 | #define VFP_DOUBLE_MANTISSA_BITS (52) | |
273 | #define VFP_DOUBLE_EXPONENT_BITS (11) | |
274 | #define VFP_DOUBLE_LOW_BITS (64 - VFP_DOUBLE_MANTISSA_BITS - 2) | |
275 | #define VFP_DOUBLE_LOW_BITS_MASK ((1 << VFP_DOUBLE_LOW_BITS) - 1) | |
276 | ||
277 | /* | |
278 | * The bit in an unpacked double which indicates that it is a quiet NaN | |
279 | */ | |
280 | #define VFP_DOUBLE_SIGNIFICAND_QNAN (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS)) | |
281 | ||
282 | /* | |
283 | * Operations on packed single-precision numbers | |
284 | */ | |
285 | #define vfp_double_packed_sign(v) ((v) & (1ULL << 63)) | |
286 | #define vfp_double_packed_negate(v) ((v) ^ (1ULL << 63)) | |
287 | #define vfp_double_packed_abs(v) ((v) & ~(1ULL << 63)) | |
288 | #define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1)) | |
289 | #define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1)) | |
290 | ||
291 | /* | |
292 | * Unpack a double-precision float. Note that this returns the magnitude | |
293 | * of the double-precision float mantissa with the 1. if necessary, | |
294 | * aligned to bit 62. | |
295 | */ | |
296 | static inline void vfp_double_unpack(struct vfp_double *s, s64 val) | |
297 | { | |
298 | u64 significand; | |
299 | ||
300 | s->sign = vfp_double_packed_sign(val) >> 48; | |
301 | s->exponent = vfp_double_packed_exponent(val); | |
302 | ||
303 | significand = (u64) val; | |
304 | significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2; | |
305 | if (s->exponent && s->exponent != 2047) | |
306 | significand |= (1ULL << 62); | |
307 | s->significand = significand; | |
308 | } | |
309 | ||
310 | /* | |
311 | * Re-pack a double-precision float. This assumes that the float is | |
312 | * already normalised such that the MSB is bit 30, _not_ bit 31. | |
313 | */ | |
314 | static inline s64 vfp_double_pack(struct vfp_double *s) | |
315 | { | |
316 | u64 val; | |
317 | val = ((u64)s->sign << 48) + | |
318 | ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) + | |
319 | (s->significand >> VFP_DOUBLE_LOW_BITS); | |
320 | return (s64)val; | |
321 | } | |
322 | ||
323 | static inline int vfp_double_type(struct vfp_double *s) | |
324 | { | |
325 | int type = VFP_NUMBER; | |
326 | if (s->exponent == 2047) { | |
327 | if (s->significand == 0) | |
328 | type = VFP_INFINITY; | |
329 | else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN) | |
330 | type = VFP_QNAN; | |
331 | else | |
332 | type = VFP_SNAN; | |
333 | } else if (s->exponent == 0) { | |
334 | if (s->significand == 0) | |
335 | type |= VFP_ZERO; | |
336 | else | |
337 | type |= VFP_DENORMAL; | |
338 | } | |
339 | return type; | |
340 | } | |
341 | ||
342 | u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func); | |
343 | ||
1da177e4 LT |
344 | u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand); |
345 | ||
346 | /* | |
347 | * A special flag to tell the normalisation code not to normalise. | |
348 | */ | |
349 | #define VFP_NAN_FLAG 0x100 | |
7c6f2514 DJ |
350 | |
351 | /* | |
352 | * A bit pattern used to indicate the initial (unset) value of the | |
353 | * exception mask, in case nothing handles an instruction. This | |
354 | * doesn't include the NAN flag, which get masked out before | |
355 | * we check for an error. | |
356 | */ | |
357 | #define VFP_EXCEPTION_ERROR ((u32)-1 & ~VFP_NAN_FLAG) | |
4cc9bd2e GF |
358 | |
359 | /* | |
baf97ce6 RK |
360 | * A flag to tell vfp instruction type. |
361 | * OP_SCALAR - this operation always operates in scalar mode | |
362 | * OP_SD - the instruction exceptionally writes to a single precision result. | |
363 | * OP_DD - the instruction exceptionally writes to a double precision result. | |
4cc9bd2e GF |
364 | */ |
365 | #define OP_SCALAR (1 << 0) | |
366 | #define OP_SD (1 << 1) | |
baf97ce6 | 367 | #define OP_DD (1 << 1) |
4cc9bd2e GF |
368 | |
369 | struct op { | |
370 | u32 (* const fn)(int dd, int dn, int dm, u32 fpscr); | |
371 | u32 flags; | |
372 | }; | |
c6428464 CM |
373 | |
374 | #ifdef CONFIG_SMP | |
375 | extern void vfp_save_state(void *location, u32 fpexc); | |
376 | #endif |