]>
Commit | Line | Data |
---|---|---|
f299f437 BS |
1 | /* |
2 | * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers | |
3 | * | |
4 | * Copyright (c) 2003 Fabrice Bellard | |
5 | * | |
6 | * This library is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2 of the License, or (at your option) any later version. | |
10 | * | |
11 | * This library is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. | |
18 | */ | |
19 | ||
b6a0aa05 | 20 | #include "qemu/osdep.h" |
f299f437 BS |
21 | #include <math.h> |
22 | #include "cpu.h" | |
2ef6175a | 23 | #include "exec/helper-proto.h" |
c334a388 | 24 | #include "qemu/host-utils.h" |
f08b6170 | 25 | #include "exec/cpu_ldst.h" |
92fc4b58 | 26 | |
f299f437 BS |
27 | #define FPU_RC_MASK 0xc00 |
28 | #define FPU_RC_NEAR 0x000 | |
29 | #define FPU_RC_DOWN 0x400 | |
30 | #define FPU_RC_UP 0x800 | |
31 | #define FPU_RC_CHOP 0xc00 | |
32 | ||
33 | #define MAXTAN 9223372036854775808.0 | |
34 | ||
35 | /* the following deal with x86 long double-precision numbers */ | |
36 | #define MAXEXPD 0x7fff | |
37 | #define EXPBIAS 16383 | |
38 | #define EXPD(fp) (fp.l.upper & 0x7fff) | |
39 | #define SIGND(fp) ((fp.l.upper) & 0x8000) | |
40 | #define MANTD(fp) (fp.l.lower) | |
41 | #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS | |
42 | ||
43 | #define FPUS_IE (1 << 0) | |
44 | #define FPUS_DE (1 << 1) | |
45 | #define FPUS_ZE (1 << 2) | |
46 | #define FPUS_OE (1 << 3) | |
47 | #define FPUS_UE (1 << 4) | |
48 | #define FPUS_PE (1 << 5) | |
49 | #define FPUS_SF (1 << 6) | |
50 | #define FPUS_SE (1 << 7) | |
51 | #define FPUS_B (1 << 15) | |
52 | ||
53 | #define FPUC_EM 0x3f | |
54 | ||
55 | #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) | |
56 | #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) | |
57 | #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) | |
58 | ||
d3eb5eae | 59 | static inline void fpush(CPUX86State *env) |
f299f437 BS |
60 | { |
61 | env->fpstt = (env->fpstt - 1) & 7; | |
62 | env->fptags[env->fpstt] = 0; /* validate stack entry */ | |
63 | } | |
64 | ||
d3eb5eae | 65 | static inline void fpop(CPUX86State *env) |
f299f437 BS |
66 | { |
67 | env->fptags[env->fpstt] = 1; /* invalidate stack entry */ | |
68 | env->fpstt = (env->fpstt + 1) & 7; | |
69 | } | |
70 | ||
6cad09d2 PD |
71 | static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr, |
72 | uintptr_t retaddr) | |
f299f437 BS |
73 | { |
74 | CPU_LDoubleU temp; | |
75 | ||
6cad09d2 PD |
76 | temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); |
77 | temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); | |
f299f437 BS |
78 | return temp.d; |
79 | } | |
80 | ||
6cad09d2 PD |
81 | static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, |
82 | uintptr_t retaddr) | |
f299f437 BS |
83 | { |
84 | CPU_LDoubleU temp; | |
85 | ||
86 | temp.d = f; | |
6cad09d2 PD |
87 | cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); |
88 | cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); | |
f299f437 BS |
89 | } |
90 | ||
91 | /* x87 FPU helpers */ | |
92 | ||
d3eb5eae | 93 | static inline double floatx80_to_double(CPUX86State *env, floatx80 a) |
f299f437 BS |
94 | { |
95 | union { | |
96 | float64 f64; | |
97 | double d; | |
98 | } u; | |
99 | ||
100 | u.f64 = floatx80_to_float64(a, &env->fp_status); | |
101 | return u.d; | |
102 | } | |
103 | ||
d3eb5eae | 104 | static inline floatx80 double_to_floatx80(CPUX86State *env, double a) |
f299f437 BS |
105 | { |
106 | union { | |
107 | float64 f64; | |
108 | double d; | |
109 | } u; | |
110 | ||
111 | u.d = a; | |
112 | return float64_to_floatx80(u.f64, &env->fp_status); | |
113 | } | |
114 | ||
d3eb5eae | 115 | static void fpu_set_exception(CPUX86State *env, int mask) |
f299f437 BS |
116 | { |
117 | env->fpus |= mask; | |
118 | if (env->fpus & (~env->fpuc & FPUC_EM)) { | |
119 | env->fpus |= FPUS_SE | FPUS_B; | |
120 | } | |
121 | } | |
122 | ||
d3eb5eae | 123 | static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) |
f299f437 BS |
124 | { |
125 | if (floatx80_is_zero(b)) { | |
d3eb5eae | 126 | fpu_set_exception(env, FPUS_ZE); |
f299f437 BS |
127 | } |
128 | return floatx80_div(a, b, &env->fp_status); | |
129 | } | |
130 | ||
6cad09d2 | 131 | static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) |
f299f437 BS |
132 | { |
133 | if (env->cr[0] & CR0_NE_MASK) { | |
6cad09d2 | 134 | raise_exception_ra(env, EXCP10_COPR, retaddr); |
f299f437 BS |
135 | } |
136 | #if !defined(CONFIG_USER_ONLY) | |
137 | else { | |
138 | cpu_set_ferr(env); | |
139 | } | |
140 | #endif | |
141 | } | |
142 | ||
d3eb5eae | 143 | void helper_flds_FT0(CPUX86State *env, uint32_t val) |
f299f437 BS |
144 | { |
145 | union { | |
146 | float32 f; | |
147 | uint32_t i; | |
148 | } u; | |
149 | ||
150 | u.i = val; | |
151 | FT0 = float32_to_floatx80(u.f, &env->fp_status); | |
152 | } | |
153 | ||
d3eb5eae | 154 | void helper_fldl_FT0(CPUX86State *env, uint64_t val) |
f299f437 BS |
155 | { |
156 | union { | |
157 | float64 f; | |
158 | uint64_t i; | |
159 | } u; | |
160 | ||
161 | u.i = val; | |
162 | FT0 = float64_to_floatx80(u.f, &env->fp_status); | |
163 | } | |
164 | ||
d3eb5eae | 165 | void helper_fildl_FT0(CPUX86State *env, int32_t val) |
f299f437 BS |
166 | { |
167 | FT0 = int32_to_floatx80(val, &env->fp_status); | |
168 | } | |
169 | ||
d3eb5eae | 170 | void helper_flds_ST0(CPUX86State *env, uint32_t val) |
f299f437 BS |
171 | { |
172 | int new_fpstt; | |
173 | union { | |
174 | float32 f; | |
175 | uint32_t i; | |
176 | } u; | |
177 | ||
178 | new_fpstt = (env->fpstt - 1) & 7; | |
179 | u.i = val; | |
180 | env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); | |
181 | env->fpstt = new_fpstt; | |
182 | env->fptags[new_fpstt] = 0; /* validate stack entry */ | |
183 | } | |
184 | ||
d3eb5eae | 185 | void helper_fldl_ST0(CPUX86State *env, uint64_t val) |
f299f437 BS |
186 | { |
187 | int new_fpstt; | |
188 | union { | |
189 | float64 f; | |
190 | uint64_t i; | |
191 | } u; | |
192 | ||
193 | new_fpstt = (env->fpstt - 1) & 7; | |
194 | u.i = val; | |
195 | env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); | |
196 | env->fpstt = new_fpstt; | |
197 | env->fptags[new_fpstt] = 0; /* validate stack entry */ | |
198 | } | |
199 | ||
d3eb5eae | 200 | void helper_fildl_ST0(CPUX86State *env, int32_t val) |
f299f437 BS |
201 | { |
202 | int new_fpstt; | |
203 | ||
204 | new_fpstt = (env->fpstt - 1) & 7; | |
205 | env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); | |
206 | env->fpstt = new_fpstt; | |
207 | env->fptags[new_fpstt] = 0; /* validate stack entry */ | |
208 | } | |
209 | ||
d3eb5eae | 210 | void helper_fildll_ST0(CPUX86State *env, int64_t val) |
f299f437 BS |
211 | { |
212 | int new_fpstt; | |
213 | ||
214 | new_fpstt = (env->fpstt - 1) & 7; | |
215 | env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); | |
216 | env->fpstt = new_fpstt; | |
217 | env->fptags[new_fpstt] = 0; /* validate stack entry */ | |
218 | } | |
219 | ||
d3eb5eae | 220 | uint32_t helper_fsts_ST0(CPUX86State *env) |
f299f437 BS |
221 | { |
222 | union { | |
223 | float32 f; | |
224 | uint32_t i; | |
225 | } u; | |
226 | ||
227 | u.f = floatx80_to_float32(ST0, &env->fp_status); | |
228 | return u.i; | |
229 | } | |
230 | ||
d3eb5eae | 231 | uint64_t helper_fstl_ST0(CPUX86State *env) |
f299f437 BS |
232 | { |
233 | union { | |
234 | float64 f; | |
235 | uint64_t i; | |
236 | } u; | |
237 | ||
238 | u.f = floatx80_to_float64(ST0, &env->fp_status); | |
239 | return u.i; | |
240 | } | |
241 | ||
d3eb5eae | 242 | int32_t helper_fist_ST0(CPUX86State *env) |
f299f437 BS |
243 | { |
244 | int32_t val; | |
245 | ||
246 | val = floatx80_to_int32(ST0, &env->fp_status); | |
247 | if (val != (int16_t)val) { | |
248 | val = -32768; | |
249 | } | |
250 | return val; | |
251 | } | |
252 | ||
d3eb5eae | 253 | int32_t helper_fistl_ST0(CPUX86State *env) |
f299f437 BS |
254 | { |
255 | int32_t val; | |
ea32aaf1 DP |
256 | signed char old_exp_flags; |
257 | ||
258 | old_exp_flags = get_float_exception_flags(&env->fp_status); | |
259 | set_float_exception_flags(0, &env->fp_status); | |
f299f437 BS |
260 | |
261 | val = floatx80_to_int32(ST0, &env->fp_status); | |
ea32aaf1 DP |
262 | if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { |
263 | val = 0x80000000; | |
264 | } | |
265 | set_float_exception_flags(get_float_exception_flags(&env->fp_status) | |
266 | | old_exp_flags, &env->fp_status); | |
f299f437 BS |
267 | return val; |
268 | } | |
269 | ||
d3eb5eae | 270 | int64_t helper_fistll_ST0(CPUX86State *env) |
f299f437 BS |
271 | { |
272 | int64_t val; | |
ea32aaf1 DP |
273 | signed char old_exp_flags; |
274 | ||
275 | old_exp_flags = get_float_exception_flags(&env->fp_status); | |
276 | set_float_exception_flags(0, &env->fp_status); | |
f299f437 | 277 | |
178846bd | 278 | val = floatx80_to_int64(ST0, &env->fp_status); |
ea32aaf1 DP |
279 | if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { |
280 | val = 0x8000000000000000ULL; | |
281 | } | |
282 | set_float_exception_flags(get_float_exception_flags(&env->fp_status) | |
283 | | old_exp_flags, &env->fp_status); | |
f299f437 BS |
284 | return val; |
285 | } | |
286 | ||
d3eb5eae | 287 | int32_t helper_fistt_ST0(CPUX86State *env) |
f299f437 BS |
288 | { |
289 | int32_t val; | |
290 | ||
291 | val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); | |
292 | if (val != (int16_t)val) { | |
293 | val = -32768; | |
294 | } | |
295 | return val; | |
296 | } | |
297 | ||
d3eb5eae | 298 | int32_t helper_fisttl_ST0(CPUX86State *env) |
f299f437 BS |
299 | { |
300 | int32_t val; | |
301 | ||
302 | val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); | |
303 | return val; | |
304 | } | |
305 | ||
d3eb5eae | 306 | int64_t helper_fisttll_ST0(CPUX86State *env) |
f299f437 BS |
307 | { |
308 | int64_t val; | |
309 | ||
310 | val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); | |
311 | return val; | |
312 | } | |
313 | ||
d3eb5eae | 314 | void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) |
f299f437 BS |
315 | { |
316 | int new_fpstt; | |
317 | ||
318 | new_fpstt = (env->fpstt - 1) & 7; | |
6cad09d2 | 319 | env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC()); |
f299f437 BS |
320 | env->fpstt = new_fpstt; |
321 | env->fptags[new_fpstt] = 0; /* validate stack entry */ | |
322 | } | |
323 | ||
d3eb5eae | 324 | void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) |
f299f437 | 325 | { |
6cad09d2 | 326 | helper_fstt(env, ST0, ptr, GETPC()); |
f299f437 BS |
327 | } |
328 | ||
d3eb5eae | 329 | void helper_fpush(CPUX86State *env) |
f299f437 | 330 | { |
d3eb5eae | 331 | fpush(env); |
f299f437 BS |
332 | } |
333 | ||
d3eb5eae | 334 | void helper_fpop(CPUX86State *env) |
f299f437 | 335 | { |
d3eb5eae | 336 | fpop(env); |
f299f437 BS |
337 | } |
338 | ||
d3eb5eae | 339 | void helper_fdecstp(CPUX86State *env) |
f299f437 BS |
340 | { |
341 | env->fpstt = (env->fpstt - 1) & 7; | |
342 | env->fpus &= ~0x4700; | |
343 | } | |
344 | ||
d3eb5eae | 345 | void helper_fincstp(CPUX86State *env) |
f299f437 BS |
346 | { |
347 | env->fpstt = (env->fpstt + 1) & 7; | |
348 | env->fpus &= ~0x4700; | |
349 | } | |
350 | ||
351 | /* FPU move */ | |
352 | ||
d3eb5eae | 353 | void helper_ffree_STN(CPUX86State *env, int st_index) |
f299f437 BS |
354 | { |
355 | env->fptags[(env->fpstt + st_index) & 7] = 1; | |
356 | } | |
357 | ||
d3eb5eae | 358 | void helper_fmov_ST0_FT0(CPUX86State *env) |
f299f437 BS |
359 | { |
360 | ST0 = FT0; | |
361 | } | |
362 | ||
d3eb5eae | 363 | void helper_fmov_FT0_STN(CPUX86State *env, int st_index) |
f299f437 BS |
364 | { |
365 | FT0 = ST(st_index); | |
366 | } | |
367 | ||
d3eb5eae | 368 | void helper_fmov_ST0_STN(CPUX86State *env, int st_index) |
f299f437 BS |
369 | { |
370 | ST0 = ST(st_index); | |
371 | } | |
372 | ||
d3eb5eae | 373 | void helper_fmov_STN_ST0(CPUX86State *env, int st_index) |
f299f437 BS |
374 | { |
375 | ST(st_index) = ST0; | |
376 | } | |
377 | ||
d3eb5eae | 378 | void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) |
f299f437 BS |
379 | { |
380 | floatx80 tmp; | |
381 | ||
382 | tmp = ST(st_index); | |
383 | ST(st_index) = ST0; | |
384 | ST0 = tmp; | |
385 | } | |
386 | ||
387 | /* FPU operations */ | |
388 | ||
389 | static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; | |
390 | ||
d3eb5eae | 391 | void helper_fcom_ST0_FT0(CPUX86State *env) |
f299f437 BS |
392 | { |
393 | int ret; | |
394 | ||
395 | ret = floatx80_compare(ST0, FT0, &env->fp_status); | |
396 | env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; | |
397 | } | |
398 | ||
d3eb5eae | 399 | void helper_fucom_ST0_FT0(CPUX86State *env) |
f299f437 BS |
400 | { |
401 | int ret; | |
402 | ||
403 | ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); | |
404 | env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; | |
405 | } | |
406 | ||
407 | static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; | |
408 | ||
d3eb5eae | 409 | void helper_fcomi_ST0_FT0(CPUX86State *env) |
f299f437 BS |
410 | { |
411 | int eflags; | |
412 | int ret; | |
413 | ||
414 | ret = floatx80_compare(ST0, FT0, &env->fp_status); | |
d3eb5eae | 415 | eflags = cpu_cc_compute_all(env, CC_OP); |
f299f437 BS |
416 | eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; |
417 | CC_SRC = eflags; | |
418 | } | |
419 | ||
d3eb5eae | 420 | void helper_fucomi_ST0_FT0(CPUX86State *env) |
f299f437 BS |
421 | { |
422 | int eflags; | |
423 | int ret; | |
424 | ||
425 | ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); | |
d3eb5eae | 426 | eflags = cpu_cc_compute_all(env, CC_OP); |
f299f437 BS |
427 | eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; |
428 | CC_SRC = eflags; | |
429 | } | |
430 | ||
d3eb5eae | 431 | void helper_fadd_ST0_FT0(CPUX86State *env) |
f299f437 BS |
432 | { |
433 | ST0 = floatx80_add(ST0, FT0, &env->fp_status); | |
434 | } | |
435 | ||
d3eb5eae | 436 | void helper_fmul_ST0_FT0(CPUX86State *env) |
f299f437 BS |
437 | { |
438 | ST0 = floatx80_mul(ST0, FT0, &env->fp_status); | |
439 | } | |
440 | ||
d3eb5eae | 441 | void helper_fsub_ST0_FT0(CPUX86State *env) |
f299f437 BS |
442 | { |
443 | ST0 = floatx80_sub(ST0, FT0, &env->fp_status); | |
444 | } | |
445 | ||
d3eb5eae | 446 | void helper_fsubr_ST0_FT0(CPUX86State *env) |
f299f437 BS |
447 | { |
448 | ST0 = floatx80_sub(FT0, ST0, &env->fp_status); | |
449 | } | |
450 | ||
d3eb5eae | 451 | void helper_fdiv_ST0_FT0(CPUX86State *env) |
f299f437 | 452 | { |
d3eb5eae | 453 | ST0 = helper_fdiv(env, ST0, FT0); |
f299f437 BS |
454 | } |
455 | ||
d3eb5eae | 456 | void helper_fdivr_ST0_FT0(CPUX86State *env) |
f299f437 | 457 | { |
d3eb5eae | 458 | ST0 = helper_fdiv(env, FT0, ST0); |
f299f437 BS |
459 | } |
460 | ||
461 | /* fp operations between STN and ST0 */ | |
462 | ||
d3eb5eae | 463 | void helper_fadd_STN_ST0(CPUX86State *env, int st_index) |
f299f437 BS |
464 | { |
465 | ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); | |
466 | } | |
467 | ||
d3eb5eae | 468 | void helper_fmul_STN_ST0(CPUX86State *env, int st_index) |
f299f437 BS |
469 | { |
470 | ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); | |
471 | } | |
472 | ||
d3eb5eae | 473 | void helper_fsub_STN_ST0(CPUX86State *env, int st_index) |
f299f437 BS |
474 | { |
475 | ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); | |
476 | } | |
477 | ||
d3eb5eae | 478 | void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) |
f299f437 BS |
479 | { |
480 | ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); | |
481 | } | |
482 | ||
d3eb5eae | 483 | void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) |
f299f437 BS |
484 | { |
485 | floatx80 *p; | |
486 | ||
487 | p = &ST(st_index); | |
d3eb5eae | 488 | *p = helper_fdiv(env, *p, ST0); |
f299f437 BS |
489 | } |
490 | ||
d3eb5eae | 491 | void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) |
f299f437 BS |
492 | { |
493 | floatx80 *p; | |
494 | ||
495 | p = &ST(st_index); | |
d3eb5eae | 496 | *p = helper_fdiv(env, ST0, *p); |
f299f437 BS |
497 | } |
498 | ||
499 | /* misc FPU operations */ | |
d3eb5eae | 500 | void helper_fchs_ST0(CPUX86State *env) |
f299f437 BS |
501 | { |
502 | ST0 = floatx80_chs(ST0); | |
503 | } | |
504 | ||
d3eb5eae | 505 | void helper_fabs_ST0(CPUX86State *env) |
f299f437 BS |
506 | { |
507 | ST0 = floatx80_abs(ST0); | |
508 | } | |
509 | ||
d3eb5eae | 510 | void helper_fld1_ST0(CPUX86State *env) |
f299f437 BS |
511 | { |
512 | ST0 = floatx80_one; | |
513 | } | |
514 | ||
d3eb5eae | 515 | void helper_fldl2t_ST0(CPUX86State *env) |
f299f437 BS |
516 | { |
517 | ST0 = floatx80_l2t; | |
518 | } | |
519 | ||
d3eb5eae | 520 | void helper_fldl2e_ST0(CPUX86State *env) |
f299f437 BS |
521 | { |
522 | ST0 = floatx80_l2e; | |
523 | } | |
524 | ||
d3eb5eae | 525 | void helper_fldpi_ST0(CPUX86State *env) |
f299f437 BS |
526 | { |
527 | ST0 = floatx80_pi; | |
528 | } | |
529 | ||
d3eb5eae | 530 | void helper_fldlg2_ST0(CPUX86State *env) |
f299f437 BS |
531 | { |
532 | ST0 = floatx80_lg2; | |
533 | } | |
534 | ||
d3eb5eae | 535 | void helper_fldln2_ST0(CPUX86State *env) |
f299f437 BS |
536 | { |
537 | ST0 = floatx80_ln2; | |
538 | } | |
539 | ||
d3eb5eae | 540 | void helper_fldz_ST0(CPUX86State *env) |
f299f437 BS |
541 | { |
542 | ST0 = floatx80_zero; | |
543 | } | |
544 | ||
d3eb5eae | 545 | void helper_fldz_FT0(CPUX86State *env) |
f299f437 BS |
546 | { |
547 | FT0 = floatx80_zero; | |
548 | } | |
549 | ||
d3eb5eae | 550 | uint32_t helper_fnstsw(CPUX86State *env) |
f299f437 BS |
551 | { |
552 | return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; | |
553 | } | |
554 | ||
d3eb5eae | 555 | uint32_t helper_fnstcw(CPUX86State *env) |
f299f437 BS |
556 | { |
557 | return env->fpuc; | |
558 | } | |
559 | ||
5bde1407 | 560 | void update_fp_status(CPUX86State *env) |
f299f437 BS |
561 | { |
562 | int rnd_type; | |
563 | ||
564 | /* set rounding mode */ | |
565 | switch (env->fpuc & FPU_RC_MASK) { | |
566 | default: | |
567 | case FPU_RC_NEAR: | |
568 | rnd_type = float_round_nearest_even; | |
569 | break; | |
570 | case FPU_RC_DOWN: | |
571 | rnd_type = float_round_down; | |
572 | break; | |
573 | case FPU_RC_UP: | |
574 | rnd_type = float_round_up; | |
575 | break; | |
576 | case FPU_RC_CHOP: | |
577 | rnd_type = float_round_to_zero; | |
578 | break; | |
579 | } | |
580 | set_float_rounding_mode(rnd_type, &env->fp_status); | |
581 | switch ((env->fpuc >> 8) & 3) { | |
582 | case 0: | |
583 | rnd_type = 32; | |
584 | break; | |
585 | case 2: | |
586 | rnd_type = 64; | |
587 | break; | |
588 | case 3: | |
589 | default: | |
590 | rnd_type = 80; | |
591 | break; | |
592 | } | |
593 | set_floatx80_rounding_precision(rnd_type, &env->fp_status); | |
594 | } | |
595 | ||
d3eb5eae | 596 | void helper_fldcw(CPUX86State *env, uint32_t val) |
f299f437 | 597 | { |
5bde1407 | 598 | cpu_set_fpuc(env, val); |
f299f437 BS |
599 | } |
600 | ||
d3eb5eae | 601 | void helper_fclex(CPUX86State *env) |
f299f437 BS |
602 | { |
603 | env->fpus &= 0x7f00; | |
604 | } | |
605 | ||
d3eb5eae | 606 | void helper_fwait(CPUX86State *env) |
f299f437 BS |
607 | { |
608 | if (env->fpus & FPUS_SE) { | |
6cad09d2 | 609 | fpu_raise_exception(env, GETPC()); |
f299f437 BS |
610 | } |
611 | } | |
612 | ||
d3eb5eae | 613 | void helper_fninit(CPUX86State *env) |
f299f437 BS |
614 | { |
615 | env->fpus = 0; | |
616 | env->fpstt = 0; | |
5bde1407 | 617 | cpu_set_fpuc(env, 0x37f); |
f299f437 BS |
618 | env->fptags[0] = 1; |
619 | env->fptags[1] = 1; | |
620 | env->fptags[2] = 1; | |
621 | env->fptags[3] = 1; | |
622 | env->fptags[4] = 1; | |
623 | env->fptags[5] = 1; | |
624 | env->fptags[6] = 1; | |
625 | env->fptags[7] = 1; | |
626 | } | |
627 | ||
628 | /* BCD ops */ | |
629 | ||
d3eb5eae | 630 | void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) |
f299f437 BS |
631 | { |
632 | floatx80 tmp; | |
633 | uint64_t val; | |
634 | unsigned int v; | |
635 | int i; | |
636 | ||
637 | val = 0; | |
638 | for (i = 8; i >= 0; i--) { | |
6cad09d2 | 639 | v = cpu_ldub_data_ra(env, ptr + i, GETPC()); |
f299f437 BS |
640 | val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); |
641 | } | |
642 | tmp = int64_to_floatx80(val, &env->fp_status); | |
6cad09d2 | 643 | if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { |
18b41f95 | 644 | tmp = floatx80_chs(tmp); |
f299f437 | 645 | } |
d3eb5eae | 646 | fpush(env); |
f299f437 BS |
647 | ST0 = tmp; |
648 | } | |
649 | ||
d3eb5eae | 650 | void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) |
f299f437 BS |
651 | { |
652 | int v; | |
653 | target_ulong mem_ref, mem_end; | |
654 | int64_t val; | |
655 | ||
656 | val = floatx80_to_int64(ST0, &env->fp_status); | |
657 | mem_ref = ptr; | |
658 | mem_end = mem_ref + 9; | |
659 | if (val < 0) { | |
6cad09d2 | 660 | cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); |
f299f437 BS |
661 | val = -val; |
662 | } else { | |
6cad09d2 | 663 | cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); |
f299f437 BS |
664 | } |
665 | while (mem_ref < mem_end) { | |
666 | if (val == 0) { | |
667 | break; | |
668 | } | |
669 | v = val % 100; | |
670 | val = val / 100; | |
671 | v = ((v / 10) << 4) | (v % 10); | |
6cad09d2 | 672 | cpu_stb_data_ra(env, mem_ref++, v, GETPC()); |
f299f437 BS |
673 | } |
674 | while (mem_ref < mem_end) { | |
6cad09d2 | 675 | cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); |
f299f437 BS |
676 | } |
677 | } | |
678 | ||
d3eb5eae | 679 | void helper_f2xm1(CPUX86State *env) |
f299f437 | 680 | { |
d3eb5eae | 681 | double val = floatx80_to_double(env, ST0); |
f299f437 BS |
682 | |
683 | val = pow(2.0, val) - 1.0; | |
d3eb5eae | 684 | ST0 = double_to_floatx80(env, val); |
f299f437 BS |
685 | } |
686 | ||
d3eb5eae | 687 | void helper_fyl2x(CPUX86State *env) |
f299f437 | 688 | { |
d3eb5eae | 689 | double fptemp = floatx80_to_double(env, ST0); |
f299f437 BS |
690 | |
691 | if (fptemp > 0.0) { | |
692 | fptemp = log(fptemp) / log(2.0); /* log2(ST) */ | |
d3eb5eae BS |
693 | fptemp *= floatx80_to_double(env, ST1); |
694 | ST1 = double_to_floatx80(env, fptemp); | |
695 | fpop(env); | |
f299f437 BS |
696 | } else { |
697 | env->fpus &= ~0x4700; | |
698 | env->fpus |= 0x400; | |
699 | } | |
700 | } | |
701 | ||
d3eb5eae | 702 | void helper_fptan(CPUX86State *env) |
f299f437 | 703 | { |
d3eb5eae | 704 | double fptemp = floatx80_to_double(env, ST0); |
f299f437 BS |
705 | |
706 | if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { | |
707 | env->fpus |= 0x400; | |
708 | } else { | |
709 | fptemp = tan(fptemp); | |
d3eb5eae BS |
710 | ST0 = double_to_floatx80(env, fptemp); |
711 | fpush(env); | |
f299f437 BS |
712 | ST0 = floatx80_one; |
713 | env->fpus &= ~0x400; /* C2 <-- 0 */ | |
714 | /* the above code is for |arg| < 2**52 only */ | |
715 | } | |
716 | } | |
717 | ||
d3eb5eae | 718 | void helper_fpatan(CPUX86State *env) |
f299f437 BS |
719 | { |
720 | double fptemp, fpsrcop; | |
721 | ||
d3eb5eae BS |
722 | fpsrcop = floatx80_to_double(env, ST1); |
723 | fptemp = floatx80_to_double(env, ST0); | |
724 | ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp)); | |
725 | fpop(env); | |
f299f437 BS |
726 | } |
727 | ||
d3eb5eae | 728 | void helper_fxtract(CPUX86State *env) |
f299f437 BS |
729 | { |
730 | CPU_LDoubleU temp; | |
731 | ||
732 | temp.d = ST0; | |
733 | ||
734 | if (floatx80_is_zero(ST0)) { | |
735 | /* Easy way to generate -inf and raising division by 0 exception */ | |
736 | ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, | |
737 | &env->fp_status); | |
d3eb5eae | 738 | fpush(env); |
f299f437 BS |
739 | ST0 = temp.d; |
740 | } else { | |
741 | int expdif; | |
742 | ||
743 | expdif = EXPD(temp) - EXPBIAS; | |
744 | /* DP exponent bias */ | |
745 | ST0 = int32_to_floatx80(expdif, &env->fp_status); | |
d3eb5eae | 746 | fpush(env); |
f299f437 BS |
747 | BIASEXPONENT(temp); |
748 | ST0 = temp.d; | |
749 | } | |
750 | } | |
751 | ||
d3eb5eae | 752 | void helper_fprem1(CPUX86State *env) |
f299f437 BS |
753 | { |
754 | double st0, st1, dblq, fpsrcop, fptemp; | |
755 | CPU_LDoubleU fpsrcop1, fptemp1; | |
756 | int expdif; | |
757 | signed long long int q; | |
758 | ||
d3eb5eae BS |
759 | st0 = floatx80_to_double(env, ST0); |
760 | st1 = floatx80_to_double(env, ST1); | |
f299f437 BS |
761 | |
762 | if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) { | |
d3eb5eae | 763 | ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */ |
f299f437 BS |
764 | env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ |
765 | return; | |
766 | } | |
767 | ||
768 | fpsrcop = st0; | |
769 | fptemp = st1; | |
770 | fpsrcop1.d = ST0; | |
771 | fptemp1.d = ST1; | |
772 | expdif = EXPD(fpsrcop1) - EXPD(fptemp1); | |
773 | ||
774 | if (expdif < 0) { | |
775 | /* optimisation? taken from the AMD docs */ | |
776 | env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ | |
777 | /* ST0 is unchanged */ | |
778 | return; | |
779 | } | |
780 | ||
781 | if (expdif < 53) { | |
782 | dblq = fpsrcop / fptemp; | |
783 | /* round dblq towards nearest integer */ | |
784 | dblq = rint(dblq); | |
785 | st0 = fpsrcop - fptemp * dblq; | |
786 | ||
787 | /* convert dblq to q by truncating towards zero */ | |
788 | if (dblq < 0.0) { | |
789 | q = (signed long long int)(-dblq); | |
790 | } else { | |
791 | q = (signed long long int)dblq; | |
792 | } | |
793 | ||
794 | env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ | |
795 | /* (C0,C3,C1) <-- (q2,q1,q0) */ | |
796 | env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ | |
797 | env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ | |
798 | env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ | |
799 | } else { | |
800 | env->fpus |= 0x400; /* C2 <-- 1 */ | |
801 | fptemp = pow(2.0, expdif - 50); | |
802 | fpsrcop = (st0 / st1) / fptemp; | |
803 | /* fpsrcop = integer obtained by chopping */ | |
804 | fpsrcop = (fpsrcop < 0.0) ? | |
805 | -(floor(fabs(fpsrcop))) : floor(fpsrcop); | |
806 | st0 -= (st1 * fpsrcop * fptemp); | |
807 | } | |
d3eb5eae | 808 | ST0 = double_to_floatx80(env, st0); |
f299f437 BS |
809 | } |
810 | ||
d3eb5eae | 811 | void helper_fprem(CPUX86State *env) |
f299f437 BS |
812 | { |
813 | double st0, st1, dblq, fpsrcop, fptemp; | |
814 | CPU_LDoubleU fpsrcop1, fptemp1; | |
815 | int expdif; | |
816 | signed long long int q; | |
817 | ||
d3eb5eae BS |
818 | st0 = floatx80_to_double(env, ST0); |
819 | st1 = floatx80_to_double(env, ST1); | |
f299f437 BS |
820 | |
821 | if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) { | |
d3eb5eae | 822 | ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */ |
f299f437 BS |
823 | env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ |
824 | return; | |
825 | } | |
826 | ||
827 | fpsrcop = st0; | |
828 | fptemp = st1; | |
829 | fpsrcop1.d = ST0; | |
830 | fptemp1.d = ST1; | |
831 | expdif = EXPD(fpsrcop1) - EXPD(fptemp1); | |
832 | ||
833 | if (expdif < 0) { | |
834 | /* optimisation? taken from the AMD docs */ | |
835 | env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ | |
836 | /* ST0 is unchanged */ | |
837 | return; | |
838 | } | |
839 | ||
840 | if (expdif < 53) { | |
841 | dblq = fpsrcop / fptemp; /* ST0 / ST1 */ | |
842 | /* round dblq towards zero */ | |
843 | dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq); | |
844 | st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */ | |
845 | ||
846 | /* convert dblq to q by truncating towards zero */ | |
847 | if (dblq < 0.0) { | |
848 | q = (signed long long int)(-dblq); | |
849 | } else { | |
850 | q = (signed long long int)dblq; | |
851 | } | |
852 | ||
853 | env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ | |
854 | /* (C0,C3,C1) <-- (q2,q1,q0) */ | |
855 | env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ | |
856 | env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ | |
857 | env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ | |
858 | } else { | |
859 | int N = 32 + (expdif % 32); /* as per AMD docs */ | |
860 | ||
861 | env->fpus |= 0x400; /* C2 <-- 1 */ | |
862 | fptemp = pow(2.0, (double)(expdif - N)); | |
863 | fpsrcop = (st0 / st1) / fptemp; | |
864 | /* fpsrcop = integer obtained by chopping */ | |
865 | fpsrcop = (fpsrcop < 0.0) ? | |
866 | -(floor(fabs(fpsrcop))) : floor(fpsrcop); | |
867 | st0 -= (st1 * fpsrcop * fptemp); | |
868 | } | |
d3eb5eae | 869 | ST0 = double_to_floatx80(env, st0); |
f299f437 BS |
870 | } |
871 | ||
d3eb5eae | 872 | void helper_fyl2xp1(CPUX86State *env) |
f299f437 | 873 | { |
d3eb5eae | 874 | double fptemp = floatx80_to_double(env, ST0); |
f299f437 BS |
875 | |
876 | if ((fptemp + 1.0) > 0.0) { | |
877 | fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */ | |
d3eb5eae BS |
878 | fptemp *= floatx80_to_double(env, ST1); |
879 | ST1 = double_to_floatx80(env, fptemp); | |
880 | fpop(env); | |
f299f437 BS |
881 | } else { |
882 | env->fpus &= ~0x4700; | |
883 | env->fpus |= 0x400; | |
884 | } | |
885 | } | |
886 | ||
d3eb5eae | 887 | void helper_fsqrt(CPUX86State *env) |
f299f437 BS |
888 | { |
889 | if (floatx80_is_neg(ST0)) { | |
890 | env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ | |
891 | env->fpus |= 0x400; | |
892 | } | |
893 | ST0 = floatx80_sqrt(ST0, &env->fp_status); | |
894 | } | |
895 | ||
d3eb5eae | 896 | void helper_fsincos(CPUX86State *env) |
f299f437 | 897 | { |
d3eb5eae | 898 | double fptemp = floatx80_to_double(env, ST0); |
f299f437 BS |
899 | |
900 | if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { | |
901 | env->fpus |= 0x400; | |
902 | } else { | |
d3eb5eae BS |
903 | ST0 = double_to_floatx80(env, sin(fptemp)); |
904 | fpush(env); | |
905 | ST0 = double_to_floatx80(env, cos(fptemp)); | |
f299f437 BS |
906 | env->fpus &= ~0x400; /* C2 <-- 0 */ |
907 | /* the above code is for |arg| < 2**63 only */ | |
908 | } | |
909 | } | |
910 | ||
d3eb5eae | 911 | void helper_frndint(CPUX86State *env) |
f299f437 BS |
912 | { |
913 | ST0 = floatx80_round_to_int(ST0, &env->fp_status); | |
914 | } | |
915 | ||
d3eb5eae | 916 | void helper_fscale(CPUX86State *env) |
f299f437 BS |
917 | { |
918 | if (floatx80_is_any_nan(ST1)) { | |
919 | ST0 = ST1; | |
920 | } else { | |
921 | int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); | |
922 | ST0 = floatx80_scalbn(ST0, n, &env->fp_status); | |
923 | } | |
924 | } | |
925 | ||
d3eb5eae | 926 | void helper_fsin(CPUX86State *env) |
f299f437 | 927 | { |
d3eb5eae | 928 | double fptemp = floatx80_to_double(env, ST0); |
f299f437 BS |
929 | |
930 | if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { | |
931 | env->fpus |= 0x400; | |
932 | } else { | |
d3eb5eae | 933 | ST0 = double_to_floatx80(env, sin(fptemp)); |
f299f437 BS |
934 | env->fpus &= ~0x400; /* C2 <-- 0 */ |
935 | /* the above code is for |arg| < 2**53 only */ | |
936 | } | |
937 | } | |
938 | ||
d3eb5eae | 939 | void helper_fcos(CPUX86State *env) |
f299f437 | 940 | { |
d3eb5eae | 941 | double fptemp = floatx80_to_double(env, ST0); |
f299f437 BS |
942 | |
943 | if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { | |
944 | env->fpus |= 0x400; | |
945 | } else { | |
d3eb5eae | 946 | ST0 = double_to_floatx80(env, cos(fptemp)); |
f299f437 BS |
947 | env->fpus &= ~0x400; /* C2 <-- 0 */ |
948 | /* the above code is for |arg| < 2**63 only */ | |
949 | } | |
950 | } | |
951 | ||
d3eb5eae | 952 | void helper_fxam_ST0(CPUX86State *env) |
f299f437 BS |
953 | { |
954 | CPU_LDoubleU temp; | |
955 | int expdif; | |
956 | ||
957 | temp.d = ST0; | |
958 | ||
959 | env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ | |
960 | if (SIGND(temp)) { | |
961 | env->fpus |= 0x200; /* C1 <-- 1 */ | |
962 | } | |
963 | ||
964 | /* XXX: test fptags too */ | |
965 | expdif = EXPD(temp); | |
966 | if (expdif == MAXEXPD) { | |
967 | if (MANTD(temp) == 0x8000000000000000ULL) { | |
968 | env->fpus |= 0x500; /* Infinity */ | |
969 | } else { | |
970 | env->fpus |= 0x100; /* NaN */ | |
971 | } | |
972 | } else if (expdif == 0) { | |
973 | if (MANTD(temp) == 0) { | |
974 | env->fpus |= 0x4000; /* Zero */ | |
975 | } else { | |
976 | env->fpus |= 0x4400; /* Denormal */ | |
977 | } | |
978 | } else { | |
979 | env->fpus |= 0x400; | |
980 | } | |
981 | } | |
982 | ||
6cad09d2 PD |
983 | static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, |
984 | uintptr_t retaddr) | |
f299f437 BS |
985 | { |
986 | int fpus, fptag, exp, i; | |
987 | uint64_t mant; | |
988 | CPU_LDoubleU tmp; | |
989 | ||
990 | fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; | |
991 | fptag = 0; | |
992 | for (i = 7; i >= 0; i--) { | |
993 | fptag <<= 2; | |
994 | if (env->fptags[i]) { | |
995 | fptag |= 3; | |
996 | } else { | |
997 | tmp.d = env->fpregs[i].d; | |
998 | exp = EXPD(tmp); | |
999 | mant = MANTD(tmp); | |
1000 | if (exp == 0 && mant == 0) { | |
1001 | /* zero */ | |
1002 | fptag |= 1; | |
1003 | } else if (exp == 0 || exp == MAXEXPD | |
1004 | || (mant & (1LL << 63)) == 0) { | |
1005 | /* NaNs, infinity, denormal */ | |
1006 | fptag |= 2; | |
1007 | } | |
1008 | } | |
1009 | } | |
1010 | if (data32) { | |
1011 | /* 32 bit */ | |
6cad09d2 PD |
1012 | cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); |
1013 | cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); | |
1014 | cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); | |
1015 | cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */ | |
1016 | cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */ | |
1017 | cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */ | |
1018 | cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */ | |
f299f437 BS |
1019 | } else { |
1020 | /* 16 bit */ | |
6cad09d2 PD |
1021 | cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); |
1022 | cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); | |
1023 | cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); | |
1024 | cpu_stw_data_ra(env, ptr + 6, 0, retaddr); | |
1025 | cpu_stw_data_ra(env, ptr + 8, 0, retaddr); | |
1026 | cpu_stw_data_ra(env, ptr + 10, 0, retaddr); | |
1027 | cpu_stw_data_ra(env, ptr + 12, 0, retaddr); | |
f299f437 BS |
1028 | } |
1029 | } | |
1030 | ||
6cad09d2 PD |
1031 | void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) |
1032 | { | |
1033 | do_fstenv(env, ptr, data32, GETPC()); | |
1034 | } | |
1035 | ||
1036 | static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, | |
1037 | uintptr_t retaddr) | |
f299f437 BS |
1038 | { |
1039 | int i, fpus, fptag; | |
1040 | ||
1041 | if (data32) { | |
6cad09d2 PD |
1042 | cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); |
1043 | fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); | |
1044 | fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); | |
f299f437 | 1045 | } else { |
6cad09d2 PD |
1046 | cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); |
1047 | fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); | |
1048 | fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); | |
f299f437 BS |
1049 | } |
1050 | env->fpstt = (fpus >> 11) & 7; | |
1051 | env->fpus = fpus & ~0x3800; | |
1052 | for (i = 0; i < 8; i++) { | |
1053 | env->fptags[i] = ((fptag & 3) == 3); | |
1054 | fptag >>= 2; | |
1055 | } | |
1056 | } | |
1057 | ||
6cad09d2 PD |
1058 | void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) |
1059 | { | |
1060 | do_fldenv(env, ptr, data32, GETPC()); | |
1061 | } | |
1062 | ||
d3eb5eae | 1063 | void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) |
f299f437 BS |
1064 | { |
1065 | floatx80 tmp; | |
1066 | int i; | |
1067 | ||
6cad09d2 | 1068 | do_fstenv(env, ptr, data32, GETPC()); |
f299f437 BS |
1069 | |
1070 | ptr += (14 << data32); | |
1071 | for (i = 0; i < 8; i++) { | |
1072 | tmp = ST(i); | |
6cad09d2 | 1073 | helper_fstt(env, tmp, ptr, GETPC()); |
f299f437 BS |
1074 | ptr += 10; |
1075 | } | |
1076 | ||
1077 | /* fninit */ | |
1078 | env->fpus = 0; | |
1079 | env->fpstt = 0; | |
5bde1407 | 1080 | cpu_set_fpuc(env, 0x37f); |
f299f437 BS |
1081 | env->fptags[0] = 1; |
1082 | env->fptags[1] = 1; | |
1083 | env->fptags[2] = 1; | |
1084 | env->fptags[3] = 1; | |
1085 | env->fptags[4] = 1; | |
1086 | env->fptags[5] = 1; | |
1087 | env->fptags[6] = 1; | |
1088 | env->fptags[7] = 1; | |
1089 | } | |
1090 | ||
d3eb5eae | 1091 | void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) |
f299f437 BS |
1092 | { |
1093 | floatx80 tmp; | |
1094 | int i; | |
1095 | ||
6cad09d2 | 1096 | do_fldenv(env, ptr, data32, GETPC()); |
f299f437 BS |
1097 | ptr += (14 << data32); |
1098 | ||
1099 | for (i = 0; i < 8; i++) { | |
6cad09d2 | 1100 | tmp = helper_fldt(env, ptr, GETPC()); |
f299f437 BS |
1101 | ST(i) = tmp; |
1102 | ptr += 10; | |
1103 | } | |
1104 | } | |
1105 | ||
1106 | #if defined(CONFIG_USER_ONLY) | |
d3eb5eae | 1107 | void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) |
f299f437 | 1108 | { |
d3eb5eae | 1109 | helper_fsave(env, ptr, data32); |
f299f437 BS |
1110 | } |
1111 | ||
d3eb5eae | 1112 | void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) |
f299f437 | 1113 | { |
d3eb5eae | 1114 | helper_frstor(env, ptr, data32); |
f299f437 BS |
1115 | } |
1116 | #endif | |
1117 | ||
64dbaff0 | 1118 | static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) |
f299f437 | 1119 | { |
64dbaff0 | 1120 | int fpus, fptag, i; |
f299f437 BS |
1121 | target_ulong addr; |
1122 | ||
f299f437 BS |
1123 | fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; |
1124 | fptag = 0; | |
1125 | for (i = 0; i < 8; i++) { | |
1126 | fptag |= (env->fptags[i] << i); | |
1127 | } | |
64dbaff0 RH |
1128 | cpu_stw_data_ra(env, ptr, env->fpuc, ra); |
1129 | cpu_stw_data_ra(env, ptr + 2, fpus, ra); | |
1130 | cpu_stw_data_ra(env, ptr + 4, fptag ^ 0xff, ra); | |
1131 | ||
1132 | /* In 32-bit mode this is eip, sel, dp, sel. | |
1133 | In 64-bit mode this is rip, rdp. | |
1134 | But in either case we don't write actual data, just zeros. */ | |
1135 | cpu_stq_data_ra(env, ptr + 0x08, 0, ra); /* eip+sel; rip */ | |
1136 | cpu_stq_data_ra(env, ptr + 0x10, 0, ra); /* edp+sel; rdp */ | |
f299f437 BS |
1137 | |
1138 | addr = ptr + 0x20; | |
1139 | for (i = 0; i < 8; i++) { | |
64dbaff0 RH |
1140 | floatx80 tmp = ST(i); |
1141 | helper_fstt(env, tmp, addr, ra); | |
f299f437 BS |
1142 | addr += 16; |
1143 | } | |
64dbaff0 RH |
1144 | } |
1145 | ||
1146 | static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) | |
1147 | { | |
1148 | cpu_stl_data_ra(env, ptr + 0x18, env->mxcsr, ra); /* mxcsr */ | |
1149 | cpu_stl_data_ra(env, ptr + 0x1c, 0x0000ffff, ra); /* mxcsr_mask */ | |
1150 | } | |
1151 | ||
1152 | static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) | |
1153 | { | |
1154 | int i, nb_xmm_regs; | |
1155 | target_ulong addr; | |
1156 | ||
1157 | if (env->hflags & HF_CS64_MASK) { | |
1158 | nb_xmm_regs = 16; | |
1159 | } else { | |
1160 | nb_xmm_regs = 8; | |
1161 | } | |
1162 | ||
1163 | addr = ptr + 0xa0; | |
1164 | for (i = 0; i < nb_xmm_regs; i++) { | |
1165 | cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); | |
1166 | cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); | |
1167 | addr += 16; | |
1168 | } | |
1169 | } | |
1170 | ||
1171 | void helper_fxsave(CPUX86State *env, target_ulong ptr) | |
1172 | { | |
1173 | uintptr_t ra = GETPC(); | |
1174 | ||
1175 | /* The operand must be 16 byte aligned */ | |
1176 | if (ptr & 0xf) { | |
1177 | raise_exception_ra(env, EXCP0D_GPF, ra); | |
1178 | } | |
1179 | ||
1180 | do_xsave_fpu(env, ptr, ra); | |
f299f437 BS |
1181 | |
1182 | if (env->cr[4] & CR4_OSFXSR_MASK) { | |
64dbaff0 | 1183 | do_xsave_mxcsr(env, ptr, ra); |
f299f437 BS |
1184 | /* Fast FXSAVE leaves out the XMM registers */ |
1185 | if (!(env->efer & MSR_EFER_FFXSR) | |
1186 | || (env->hflags & HF_CPL_MASK) | |
1187 | || !(env->hflags & HF_LMA_MASK)) { | |
64dbaff0 | 1188 | do_xsave_sse(env, ptr, ra); |
f299f437 BS |
1189 | } |
1190 | } | |
1191 | } | |
1192 | ||
19dc85db RH |
1193 | static uint64_t get_xinuse(CPUX86State *env) |
1194 | { | |
1195 | /* We don't track XINUSE. We could calculate it here, but it's | |
1196 | probably less work to simply indicate all components in use. */ | |
1197 | return -1; | |
1198 | } | |
1199 | ||
1200 | void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) | |
1201 | { | |
1202 | uintptr_t ra = GETPC(); | |
1203 | uint64_t old_bv, new_bv; | |
1204 | ||
1205 | /* The OS must have enabled XSAVE. */ | |
1206 | if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { | |
1207 | raise_exception_ra(env, EXCP06_ILLOP, ra); | |
1208 | } | |
1209 | ||
1210 | /* The operand must be 64 byte aligned. */ | |
1211 | if (ptr & 63) { | |
1212 | raise_exception_ra(env, EXCP0D_GPF, ra); | |
1213 | } | |
1214 | ||
1215 | /* Never save anything not enabled by XCR0. */ | |
1216 | rfbm &= env->xcr0; | |
1217 | ||
1218 | if (rfbm & XSTATE_FP) { | |
1219 | do_xsave_fpu(env, ptr, ra); | |
1220 | } | |
1221 | if (rfbm & XSTATE_SSE) { | |
1222 | do_xsave_mxcsr(env, ptr, ra); | |
1223 | do_xsave_sse(env, ptr, ra); | |
1224 | } | |
1225 | ||
1226 | /* Update the XSTATE_BV field. */ | |
1227 | old_bv = cpu_ldq_data_ra(env, ptr + 512, ra); | |
1228 | new_bv = (old_bv & ~rfbm) | (get_xinuse(env) & rfbm); | |
1229 | cpu_stq_data_ra(env, ptr + 512, new_bv, ra); | |
1230 | } | |
1231 | ||
64dbaff0 | 1232 | static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) |
f299f437 | 1233 | { |
64dbaff0 | 1234 | int i, fpus, fptag; |
f299f437 BS |
1235 | target_ulong addr; |
1236 | ||
64dbaff0 RH |
1237 | cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, ra)); |
1238 | fpus = cpu_lduw_data_ra(env, ptr + 2, ra); | |
1239 | fptag = cpu_lduw_data_ra(env, ptr + 4, ra); | |
f299f437 BS |
1240 | env->fpstt = (fpus >> 11) & 7; |
1241 | env->fpus = fpus & ~0x3800; | |
1242 | fptag ^= 0xff; | |
1243 | for (i = 0; i < 8; i++) { | |
1244 | env->fptags[i] = ((fptag >> i) & 1); | |
1245 | } | |
1246 | ||
1247 | addr = ptr + 0x20; | |
1248 | for (i = 0; i < 8; i++) { | |
64dbaff0 | 1249 | floatx80 tmp = helper_fldt(env, addr, ra); |
f299f437 BS |
1250 | ST(i) = tmp; |
1251 | addr += 16; | |
1252 | } | |
64dbaff0 RH |
1253 | } |
1254 | ||
1255 | static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) | |
1256 | { | |
1257 | cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + 0x18, ra)); | |
1258 | } | |
1259 | ||
1260 | static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) | |
1261 | { | |
1262 | int i, nb_xmm_regs; | |
1263 | target_ulong addr; | |
1264 | ||
1265 | if (env->hflags & HF_CS64_MASK) { | |
1266 | nb_xmm_regs = 16; | |
1267 | } else { | |
1268 | nb_xmm_regs = 8; | |
1269 | } | |
1270 | ||
1271 | addr = ptr + 0xa0; | |
1272 | for (i = 0; i < nb_xmm_regs; i++) { | |
1273 | env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); | |
1274 | env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); | |
1275 | addr += 16; | |
1276 | } | |
1277 | } | |
1278 | ||
1279 | void helper_fxrstor(CPUX86State *env, target_ulong ptr) | |
1280 | { | |
1281 | uintptr_t ra = GETPC(); | |
1282 | ||
1283 | /* The operand must be 16 byte aligned */ | |
1284 | if (ptr & 0xf) { | |
1285 | raise_exception_ra(env, EXCP0D_GPF, ra); | |
1286 | } | |
1287 | ||
1288 | do_xrstor_fpu(env, ptr, ra); | |
f299f437 BS |
1289 | |
1290 | if (env->cr[4] & CR4_OSFXSR_MASK) { | |
64dbaff0 RH |
1291 | do_xrstor_mxcsr(env, ptr, ra); |
1292 | /* Fast FXRSTOR leaves out the XMM registers */ | |
f299f437 BS |
1293 | if (!(env->efer & MSR_EFER_FFXSR) |
1294 | || (env->hflags & HF_CPL_MASK) | |
1295 | || !(env->hflags & HF_LMA_MASK)) { | |
64dbaff0 | 1296 | do_xrstor_sse(env, ptr, ra); |
f299f437 BS |
1297 | } |
1298 | } | |
1299 | } | |
1300 | ||
19dc85db RH |
1301 | void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) |
1302 | { | |
1303 | uintptr_t ra = GETPC(); | |
1304 | uint64_t xstate_bv, xcomp_bv0, xcomp_bv1; | |
1305 | ||
1306 | rfbm &= env->xcr0; | |
1307 | ||
1308 | /* The OS must have enabled XSAVE. */ | |
1309 | if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { | |
1310 | raise_exception_ra(env, EXCP06_ILLOP, ra); | |
1311 | } | |
1312 | ||
1313 | /* The operand must be 64 byte aligned. */ | |
1314 | if (ptr & 63) { | |
1315 | raise_exception_ra(env, EXCP0D_GPF, ra); | |
1316 | } | |
1317 | ||
1318 | xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra); | |
1319 | ||
1320 | if ((int64_t)xstate_bv < 0) { | |
1321 | /* FIXME: Compact form. */ | |
1322 | raise_exception_ra(env, EXCP0D_GPF, ra); | |
1323 | } | |
1324 | ||
1325 | /* Standard form. */ | |
1326 | ||
1327 | /* The XSTATE field must not set bits not present in XCR0. */ | |
1328 | if (xstate_bv & ~env->xcr0) { | |
1329 | raise_exception_ra(env, EXCP0D_GPF, ra); | |
1330 | } | |
1331 | ||
1332 | /* The XCOMP field must be zero. */ | |
1333 | xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra); | |
1334 | xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra); | |
1335 | if (xcomp_bv0 || xcomp_bv1) { | |
1336 | raise_exception_ra(env, EXCP0D_GPF, ra); | |
1337 | } | |
1338 | ||
1339 | if (rfbm & XSTATE_FP) { | |
1340 | if (xstate_bv & XSTATE_FP) { | |
1341 | do_xrstor_fpu(env, ptr, ra); | |
1342 | } else { | |
1343 | helper_fninit(env); | |
1344 | memset(env->fpregs, 0, sizeof(env->fpregs)); | |
1345 | } | |
1346 | } | |
1347 | if (rfbm & XSTATE_SSE) { | |
1348 | /* Note that the standard form of XRSTOR loads MXCSR from memory | |
1349 | whether or not the XSTATE_BV bit is set. */ | |
1350 | do_xrstor_mxcsr(env, ptr, ra); | |
1351 | if (xstate_bv & XSTATE_SSE) { | |
1352 | do_xrstor_sse(env, ptr, ra); | |
1353 | } else { | |
1354 | /* ??? When AVX is implemented, we may have to be more | |
1355 | selective in the clearing. */ | |
1356 | memset(env->xmm_regs, 0, sizeof(env->xmm_regs)); | |
1357 | } | |
1358 | } | |
1359 | } | |
1360 | ||
1361 | uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) | |
1362 | { | |
1363 | /* The OS must have enabled XSAVE. */ | |
1364 | if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { | |
1365 | raise_exception_ra(env, EXCP06_ILLOP, GETPC()); | |
1366 | } | |
1367 | ||
1368 | switch (ecx) { | |
1369 | case 0: | |
1370 | return env->xcr0; | |
1371 | case 1: | |
1372 | /* FIXME: #GP if !CPUID.(EAX=0DH,ECX=1):EAX.XG1[bit 2]. */ | |
1373 | return env->xcr0 & get_xinuse(env); | |
1374 | } | |
1375 | raise_exception_ra(env, EXCP0D_GPF, GETPC()); | |
1376 | } | |
1377 | ||
1378 | void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) | |
1379 | { | |
1380 | uint32_t dummy, ena_lo, ena_hi; | |
1381 | uint64_t ena; | |
1382 | ||
1383 | /* The OS must have enabled XSAVE. */ | |
1384 | if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { | |
1385 | raise_exception_ra(env, EXCP06_ILLOP, GETPC()); | |
1386 | } | |
1387 | ||
1388 | /* Only XCR0 is defined at present; the FPU may not be disabled. */ | |
1389 | if (ecx != 0 || (mask & XSTATE_FP) == 0) { | |
1390 | goto do_gpf; | |
1391 | } | |
1392 | ||
1393 | /* Disallow enabling unimplemented features. */ | |
1394 | cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); | |
1395 | ena = ((uint64_t)ena_hi << 32) | ena_lo; | |
1396 | if (mask & ~ena) { | |
1397 | goto do_gpf; | |
1398 | } | |
1399 | ||
1400 | env->xcr0 = mask; | |
1401 | return; | |
1402 | ||
1403 | do_gpf: | |
1404 | raise_exception_ra(env, EXCP0D_GPF, GETPC()); | |
1405 | } | |
1406 | ||
f299f437 BS |
1407 | void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f) |
1408 | { | |
1409 | CPU_LDoubleU temp; | |
1410 | ||
1411 | temp.d = f; | |
1412 | *pmant = temp.l.lower; | |
1413 | *pexp = temp.l.upper; | |
1414 | } | |
1415 | ||
1416 | floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper) | |
1417 | { | |
1418 | CPU_LDoubleU temp; | |
1419 | ||
1420 | temp.l.upper = upper; | |
1421 | temp.l.lower = mant; | |
1422 | return temp.d; | |
1423 | } | |
1424 | ||
1425 | /* MMX/SSE */ | |
1426 | /* XXX: optimize by storing fptt and fptags in the static cpu state */ | |
1427 | ||
1428 | #define SSE_DAZ 0x0040 | |
1429 | #define SSE_RC_MASK 0x6000 | |
1430 | #define SSE_RC_NEAR 0x0000 | |
1431 | #define SSE_RC_DOWN 0x2000 | |
1432 | #define SSE_RC_UP 0x4000 | |
1433 | #define SSE_RC_CHOP 0x6000 | |
1434 | #define SSE_FZ 0x8000 | |
1435 | ||
4e47e39a | 1436 | void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr) |
f299f437 BS |
1437 | { |
1438 | int rnd_type; | |
1439 | ||
4e47e39a RH |
1440 | env->mxcsr = mxcsr; |
1441 | ||
f299f437 | 1442 | /* set rounding mode */ |
4e47e39a | 1443 | switch (mxcsr & SSE_RC_MASK) { |
f299f437 BS |
1444 | default: |
1445 | case SSE_RC_NEAR: | |
1446 | rnd_type = float_round_nearest_even; | |
1447 | break; | |
1448 | case SSE_RC_DOWN: | |
1449 | rnd_type = float_round_down; | |
1450 | break; | |
1451 | case SSE_RC_UP: | |
1452 | rnd_type = float_round_up; | |
1453 | break; | |
1454 | case SSE_RC_CHOP: | |
1455 | rnd_type = float_round_to_zero; | |
1456 | break; | |
1457 | } | |
1458 | set_float_rounding_mode(rnd_type, &env->sse_status); | |
1459 | ||
1460 | /* set denormals are zero */ | |
4e47e39a | 1461 | set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); |
f299f437 BS |
1462 | |
1463 | /* set flush to zero */ | |
4e47e39a | 1464 | set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status); |
f299f437 BS |
1465 | } |
1466 | ||
5bde1407 PD |
1467 | void cpu_set_fpuc(CPUX86State *env, uint16_t val) |
1468 | { | |
1469 | env->fpuc = val; | |
1470 | update_fp_status(env); | |
1471 | } | |
1472 | ||
d3eb5eae | 1473 | void helper_ldmxcsr(CPUX86State *env, uint32_t val) |
f299f437 | 1474 | { |
4e47e39a | 1475 | cpu_set_mxcsr(env, val); |
f299f437 BS |
1476 | } |
1477 | ||
d3eb5eae | 1478 | void helper_enter_mmx(CPUX86State *env) |
f299f437 BS |
1479 | { |
1480 | env->fpstt = 0; | |
1481 | *(uint32_t *)(env->fptags) = 0; | |
1482 | *(uint32_t *)(env->fptags + 4) = 0; | |
1483 | } | |
1484 | ||
d3eb5eae | 1485 | void helper_emms(CPUX86State *env) |
f299f437 BS |
1486 | { |
1487 | /* set to empty state */ | |
1488 | *(uint32_t *)(env->fptags) = 0x01010101; | |
1489 | *(uint32_t *)(env->fptags + 4) = 0x01010101; | |
1490 | } | |
1491 | ||
1492 | /* XXX: suppress */ | |
d3eb5eae | 1493 | void helper_movq(CPUX86State *env, void *d, void *s) |
f299f437 BS |
1494 | { |
1495 | *(uint64_t *)d = *(uint64_t *)s; | |
1496 | } | |
1497 | ||
1498 | #define SHIFT 0 | |
1499 | #include "ops_sse.h" | |
1500 | ||
1501 | #define SHIFT 1 | |
1502 | #include "ops_sse.h" |