target-i386/fpu_helper.c

   1 /*
   2  *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include <math.h>
  22 #include "cpu.h"
  23 #include "exec/helper-proto.h"
  24 #include "qemu/host-utils.h"
  25 #include "exec/cpu_ldst.h"
  26
  27 #define FPU_RC_MASK         0xc00
  28 #define FPU_RC_NEAR         0x000
  29 #define FPU_RC_DOWN         0x400
  30 #define FPU_RC_UP           0x800
  31 #define FPU_RC_CHOP         0xc00
  32
  33 #define MAXTAN 9223372036854775808.0
  34
  35 /* the following deal with x86 long double-precision numbers */
  36 #define MAXEXPD 0x7fff
  37 #define EXPBIAS 16383
  38 #define EXPD(fp)        (fp.l.upper & 0x7fff)
  39 #define SIGND(fp)       ((fp.l.upper) & 0x8000)
  40 #define MANTD(fp)       (fp.l.lower)
  41 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
  42
  43 #define FPUS_IE (1 << 0)
  44 #define FPUS_DE (1 << 1)
  45 #define FPUS_ZE (1 << 2)
  46 #define FPUS_OE (1 << 3)
  47 #define FPUS_UE (1 << 4)
  48 #define FPUS_PE (1 << 5)
  49 #define FPUS_SF (1 << 6)
  50 #define FPUS_SE (1 << 7)
  51 #define FPUS_B  (1 << 15)
  52
  53 #define FPUC_EM 0x3f
  54
  55 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
  56 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
  57 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
  58
  59 static inline void fpush(CPUX86State *env)
  60 {
  61     env->fpstt = (env->fpstt - 1) & 7;
  62     env->fptags[env->fpstt] = 0; /* validate stack entry */
  63 }
  64
  65 static inline void fpop(CPUX86State *env)
  66 {
  67     env->fptags[env->fpstt] = 1; /* invalidate stack entry */
  68     env->fpstt = (env->fpstt + 1) & 7;
  69 }
  70
  71 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
  72                                    uintptr_t retaddr)
  73 {
  74     CPU_LDoubleU temp;
  75
  76     temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
  77     temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
  78     return temp.d;
  79 }
  80
  81 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
  82                                uintptr_t retaddr)
  83 {
  84     CPU_LDoubleU temp;
  85
  86     temp.d = f;
  87     cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
  88     cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
  89 }
  90
  91 /* x87 FPU helpers */
  92
  93 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
  94 {
  95     union {
  96         float64 f64;
  97         double d;
  98     } u;
  99
 100     u.f64 = floatx80_to_float64(a, &env->fp_status);
 101     return u.d;
 102 }
 103
 104 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
 105 {
 106     union {
 107         float64 f64;
 108         double d;
 109     } u;
 110
 111     u.d = a;
 112     return float64_to_floatx80(u.f64, &env->fp_status);
 113 }
 114
 115 static void fpu_set_exception(CPUX86State *env, int mask)
 116 {
 117     env->fpus |= mask;
 118     if (env->fpus & (~env->fpuc & FPUC_EM)) {
 119         env->fpus |= FPUS_SE | FPUS_B;
 120     }
 121 }
 122
 123 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
 124 {
 125     if (floatx80_is_zero(b)) {
 126         fpu_set_exception(env, FPUS_ZE);
 127     }
 128     return floatx80_div(a, b, &env->fp_status);
 129 }
 130
 131 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
 132 {
 133     if (env->cr[0] & CR0_NE_MASK) {
 134         raise_exception_ra(env, EXCP10_COPR, retaddr);
 135     }
 136 #if !defined(CONFIG_USER_ONLY)
 137     else {
 138         cpu_set_ferr(env);
 139     }
 140 #endif
 141 }
 142
 143 void helper_flds_FT0(CPUX86State *env, uint32_t val)
 144 {
 145     union {
 146         float32 f;
 147         uint32_t i;
 148     } u;
 149
 150     u.i = val;
 151     FT0 = float32_to_floatx80(u.f, &env->fp_status);
 152 }
 153
 154 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 155 {
 156     union {
 157         float64 f;
 158         uint64_t i;
 159     } u;
 160
 161     u.i = val;
 162     FT0 = float64_to_floatx80(u.f, &env->fp_status);
 163 }
 164
 165 void helper_fildl_FT0(CPUX86State *env, int32_t val)
 166 {
 167     FT0 = int32_to_floatx80(val, &env->fp_status);
 168 }
 169
 170 void helper_flds_ST0(CPUX86State *env, uint32_t val)
 171 {
 172     int new_fpstt;
 173     union {
 174         float32 f;
 175         uint32_t i;
 176     } u;
 177
 178     new_fpstt = (env->fpstt - 1) & 7;
 179     u.i = val;
 180     env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
 181     env->fpstt = new_fpstt;
 182     env->fptags[new_fpstt] = 0; /* validate stack entry */
 183 }
 184
 185 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 186 {
 187     int new_fpstt;
 188     union {
 189         float64 f;
 190         uint64_t i;
 191     } u;
 192
 193     new_fpstt = (env->fpstt - 1) & 7;
 194     u.i = val;
 195     env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
 196     env->fpstt = new_fpstt;
 197     env->fptags[new_fpstt] = 0; /* validate stack entry */
 198 }
 199
 200 void helper_fildl_ST0(CPUX86State *env, int32_t val)
 201 {
 202     int new_fpstt;
 203
 204     new_fpstt = (env->fpstt - 1) & 7;
 205     env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
 206     env->fpstt = new_fpstt;
 207     env->fptags[new_fpstt] = 0; /* validate stack entry */
 208 }
 209
 210 void helper_fildll_ST0(CPUX86State *env, int64_t val)
 211 {
 212     int new_fpstt;
 213
 214     new_fpstt = (env->fpstt - 1) & 7;
 215     env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
 216     env->fpstt = new_fpstt;
 217     env->fptags[new_fpstt] = 0; /* validate stack entry */
 218 }
 219
 220 uint32_t helper_fsts_ST0(CPUX86State *env)
 221 {
 222     union {
 223         float32 f;
 224         uint32_t i;
 225     } u;
 226
 227     u.f = floatx80_to_float32(ST0, &env->fp_status);
 228     return u.i;
 229 }
 230
 231 uint64_t helper_fstl_ST0(CPUX86State *env)
 232 {
 233     union {
 234         float64 f;
 235         uint64_t i;
 236     } u;
 237
 238     u.f = floatx80_to_float64(ST0, &env->fp_status);
 239     return u.i;
 240 }
 241
 242 int32_t helper_fist_ST0(CPUX86State *env)
 243 {
 244     int32_t val;
 245
 246     val = floatx80_to_int32(ST0, &env->fp_status);
 247     if (val != (int16_t)val) {
 248         val = -32768;
 249     }
 250     return val;
 251 }
 252
 253 int32_t helper_fistl_ST0(CPUX86State *env)
 254 {
 255     int32_t val;
 256     signed char old_exp_flags;
 257
 258     old_exp_flags = get_float_exception_flags(&env->fp_status);
 259     set_float_exception_flags(0, &env->fp_status);
 260
 261     val = floatx80_to_int32(ST0, &env->fp_status);
 262     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 263         val = 0x80000000;
 264     }
 265     set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 266                                 | old_exp_flags, &env->fp_status);
 267     return val;
 268 }
 269
 270 int64_t helper_fistll_ST0(CPUX86State *env)
 271 {
 272     int64_t val;
 273     signed char old_exp_flags;
 274
 275     old_exp_flags = get_float_exception_flags(&env->fp_status);
 276     set_float_exception_flags(0, &env->fp_status);
 277
 278     val = floatx80_to_int64(ST0, &env->fp_status);
 279     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 280         val = 0x8000000000000000ULL;
 281     }
 282     set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 283                                 | old_exp_flags, &env->fp_status);
 284     return val;
 285 }
 286
 287 int32_t helper_fistt_ST0(CPUX86State *env)
 288 {
 289     int32_t val;
 290
 291     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 292     if (val != (int16_t)val) {
 293         val = -32768;
 294     }
 295     return val;
 296 }
 297
 298 int32_t helper_fisttl_ST0(CPUX86State *env)
 299 {
 300     int32_t val;
 301
 302     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 303     return val;
 304 }
 305
 306 int64_t helper_fisttll_ST0(CPUX86State *env)
 307 {
 308     int64_t val;
 309
 310     val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
 311     return val;
 312 }
 313
 314 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
 315 {
 316     int new_fpstt;
 317
 318     new_fpstt = (env->fpstt - 1) & 7;
 319     env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
 320     env->fpstt = new_fpstt;
 321     env->fptags[new_fpstt] = 0; /* validate stack entry */
 322 }
 323
 324 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
 325 {
 326     helper_fstt(env, ST0, ptr, GETPC());
 327 }
 328
 329 void helper_fpush(CPUX86State *env)
 330 {
 331     fpush(env);
 332 }
 333
 334 void helper_fpop(CPUX86State *env)
 335 {
 336     fpop(env);
 337 }
 338
 339 void helper_fdecstp(CPUX86State *env)
 340 {
 341     env->fpstt = (env->fpstt - 1) & 7;
 342     env->fpus &= ~0x4700;
 343 }
 344
 345 void helper_fincstp(CPUX86State *env)
 346 {
 347     env->fpstt = (env->fpstt + 1) & 7;
 348     env->fpus &= ~0x4700;
 349 }
 350
 351 /* FPU move */
 352
 353 void helper_ffree_STN(CPUX86State *env, int st_index)
 354 {
 355     env->fptags[(env->fpstt + st_index) & 7] = 1;
 356 }
 357
 358 void helper_fmov_ST0_FT0(CPUX86State *env)
 359 {
 360     ST0 = FT0;
 361 }
 362
 363 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
 364 {
 365     FT0 = ST(st_index);
 366 }
 367
 368 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
 369 {
 370     ST0 = ST(st_index);
 371 }
 372
 373 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
 374 {
 375     ST(st_index) = ST0;
 376 }
 377
 378 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
 379 {
 380     floatx80 tmp;
 381
 382     tmp = ST(st_index);
 383     ST(st_index) = ST0;
 384     ST0 = tmp;
 385 }
 386
 387 /* FPU operations */
 388
 389 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
 390
 391 void helper_fcom_ST0_FT0(CPUX86State *env)
 392 {
 393     int ret;
 394
 395     ret = floatx80_compare(ST0, FT0, &env->fp_status);
 396     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 397 }
 398
 399 void helper_fucom_ST0_FT0(CPUX86State *env)
 400 {
 401     int ret;
 402
 403     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 404     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 405 }
 406
 407 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 408
 409 void helper_fcomi_ST0_FT0(CPUX86State *env)
 410 {
 411     int eflags;
 412     int ret;
 413
 414     ret = floatx80_compare(ST0, FT0, &env->fp_status);
 415     eflags = cpu_cc_compute_all(env, CC_OP);
 416     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 417     CC_SRC = eflags;
 418 }
 419
 420 void helper_fucomi_ST0_FT0(CPUX86State *env)
 421 {
 422     int eflags;
 423     int ret;
 424
 425     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 426     eflags = cpu_cc_compute_all(env, CC_OP);
 427     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 428     CC_SRC = eflags;
 429 }
 430
 431 void helper_fadd_ST0_FT0(CPUX86State *env)
 432 {
 433     ST0 = floatx80_add(ST0, FT0, &env->fp_status);
 434 }
 435
 436 void helper_fmul_ST0_FT0(CPUX86State *env)
 437 {
 438     ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
 439 }
 440
 441 void helper_fsub_ST0_FT0(CPUX86State *env)
 442 {
 443     ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
 444 }
 445
 446 void helper_fsubr_ST0_FT0(CPUX86State *env)
 447 {
 448     ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
 449 }
 450
 451 void helper_fdiv_ST0_FT0(CPUX86State *env)
 452 {
 453     ST0 = helper_fdiv(env, ST0, FT0);
 454 }
 455
 456 void helper_fdivr_ST0_FT0(CPUX86State *env)
 457 {
 458     ST0 = helper_fdiv(env, FT0, ST0);
 459 }
 460
 461 /* fp operations between STN and ST0 */
 462
 463 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
 464 {
 465     ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
 466 }
 467
 468 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
 469 {
 470     ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
 471 }
 472
 473 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
 474 {
 475     ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
 476 }
 477
 478 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
 479 {
 480     ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
 481 }
 482
 483 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
 484 {
 485     floatx80 *p;
 486
 487     p = &ST(st_index);
 488     *p = helper_fdiv(env, *p, ST0);
 489 }
 490
 491 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
 492 {
 493     floatx80 *p;
 494
 495     p = &ST(st_index);
 496     *p = helper_fdiv(env, ST0, *p);
 497 }
 498
 499 /* misc FPU operations */
 500 void helper_fchs_ST0(CPUX86State *env)
 501 {
 502     ST0 = floatx80_chs(ST0);
 503 }
 504
 505 void helper_fabs_ST0(CPUX86State *env)
 506 {
 507     ST0 = floatx80_abs(ST0);
 508 }
 509
 510 void helper_fld1_ST0(CPUX86State *env)
 511 {
 512     ST0 = floatx80_one;
 513 }
 514
 515 void helper_fldl2t_ST0(CPUX86State *env)
 516 {
 517     ST0 = floatx80_l2t;
 518 }
 519
 520 void helper_fldl2e_ST0(CPUX86State *env)
 521 {
 522     ST0 = floatx80_l2e;
 523 }
 524
 525 void helper_fldpi_ST0(CPUX86State *env)
 526 {
 527     ST0 = floatx80_pi;
 528 }
 529
 530 void helper_fldlg2_ST0(CPUX86State *env)
 531 {
 532     ST0 = floatx80_lg2;
 533 }
 534
 535 void helper_fldln2_ST0(CPUX86State *env)
 536 {
 537     ST0 = floatx80_ln2;
 538 }
 539
 540 void helper_fldz_ST0(CPUX86State *env)
 541 {
 542     ST0 = floatx80_zero;
 543 }
 544
 545 void helper_fldz_FT0(CPUX86State *env)
 546 {
 547     FT0 = floatx80_zero;
 548 }
 549
 550 uint32_t helper_fnstsw(CPUX86State *env)
 551 {
 552     return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 553 }
 554
 555 uint32_t helper_fnstcw(CPUX86State *env)
 556 {
 557     return env->fpuc;
 558 }
 559
 560 void update_fp_status(CPUX86State *env)
 561 {
 562     int rnd_type;
 563
 564     /* set rounding mode */
 565     switch (env->fpuc & FPU_RC_MASK) {
 566     default:
 567     case FPU_RC_NEAR:
 568         rnd_type = float_round_nearest_even;
 569         break;
 570     case FPU_RC_DOWN:
 571         rnd_type = float_round_down;
 572         break;
 573     case FPU_RC_UP:
 574         rnd_type = float_round_up;
 575         break;
 576     case FPU_RC_CHOP:
 577         rnd_type = float_round_to_zero;
 578         break;
 579     }
 580     set_float_rounding_mode(rnd_type, &env->fp_status);
 581     switch ((env->fpuc >> 8) & 3) {
 582     case 0:
 583         rnd_type = 32;
 584         break;
 585     case 2:
 586         rnd_type = 64;
 587         break;
 588     case 3:
 589     default:
 590         rnd_type = 80;
 591         break;
 592     }
 593     set_floatx80_rounding_precision(rnd_type, &env->fp_status);
 594 }
 595
 596 void helper_fldcw(CPUX86State *env, uint32_t val)
 597 {
 598     cpu_set_fpuc(env, val);
 599 }
 600
 601 void helper_fclex(CPUX86State *env)
 602 {
 603     env->fpus &= 0x7f00;
 604 }
 605
 606 void helper_fwait(CPUX86State *env)
 607 {
 608     if (env->fpus & FPUS_SE) {
 609         fpu_raise_exception(env, GETPC());
 610     }
 611 }
 612
 613 void helper_fninit(CPUX86State *env)
 614 {
 615     env->fpus = 0;
 616     env->fpstt = 0;
 617     cpu_set_fpuc(env, 0x37f);
 618     env->fptags[0] = 1;
 619     env->fptags[1] = 1;
 620     env->fptags[2] = 1;
 621     env->fptags[3] = 1;
 622     env->fptags[4] = 1;
 623     env->fptags[5] = 1;
 624     env->fptags[6] = 1;
 625     env->fptags[7] = 1;
 626 }
 627
 628 /* BCD ops */
 629
 630 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
 631 {
 632     floatx80 tmp;
 633     uint64_t val;
 634     unsigned int v;
 635     int i;
 636
 637     val = 0;
 638     for (i = 8; i >= 0; i--) {
 639         v = cpu_ldub_data_ra(env, ptr + i, GETPC());
 640         val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
 641     }
 642     tmp = int64_to_floatx80(val, &env->fp_status);
 643     if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
 644         tmp = floatx80_chs(tmp);
 645     }
 646     fpush(env);
 647     ST0 = tmp;
 648 }
 649
 650 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
 651 {
 652     int v;
 653     target_ulong mem_ref, mem_end;
 654     int64_t val;
 655
 656     val = floatx80_to_int64(ST0, &env->fp_status);
 657     mem_ref = ptr;
 658     mem_end = mem_ref + 9;
 659     if (val < 0) {
 660         cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
 661         val = -val;
 662     } else {
 663         cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
 664     }
 665     while (mem_ref < mem_end) {
 666         if (val == 0) {
 667             break;
 668         }
 669         v = val % 100;
 670         val = val / 100;
 671         v = ((v / 10) << 4) | (v % 10);
 672         cpu_stb_data_ra(env, mem_ref++, v, GETPC());
 673     }
 674     while (mem_ref < mem_end) {
 675         cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 676     }
 677 }
 678
 679 void helper_f2xm1(CPUX86State *env)
 680 {
 681     double val = floatx80_to_double(env, ST0);
 682
 683     val = pow(2.0, val) - 1.0;
 684     ST0 = double_to_floatx80(env, val);
 685 }
 686
 687 void helper_fyl2x(CPUX86State *env)
 688 {
 689     double fptemp = floatx80_to_double(env, ST0);
 690
 691     if (fptemp > 0.0) {
 692         fptemp = log(fptemp) / log(2.0); /* log2(ST) */
 693         fptemp *= floatx80_to_double(env, ST1);
 694         ST1 = double_to_floatx80(env, fptemp);
 695         fpop(env);
 696     } else {
 697         env->fpus &= ~0x4700;
 698         env->fpus |= 0x400;
 699     }
 700 }
 701
 702 void helper_fptan(CPUX86State *env)
 703 {
 704     double fptemp = floatx80_to_double(env, ST0);
 705
 706     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 707         env->fpus |= 0x400;
 708     } else {
 709         fptemp = tan(fptemp);
 710         ST0 = double_to_floatx80(env, fptemp);
 711         fpush(env);
 712         ST0 = floatx80_one;
 713         env->fpus &= ~0x400; /* C2 <-- 0 */
 714         /* the above code is for |arg| < 2**52 only */
 715     }
 716 }
 717
 718 void helper_fpatan(CPUX86State *env)
 719 {
 720     double fptemp, fpsrcop;
 721
 722     fpsrcop = floatx80_to_double(env, ST1);
 723     fptemp = floatx80_to_double(env, ST0);
 724     ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
 725     fpop(env);
 726 }
 727
 728 void helper_fxtract(CPUX86State *env)
 729 {
 730     CPU_LDoubleU temp;
 731
 732     temp.d = ST0;
 733
 734     if (floatx80_is_zero(ST0)) {
 735         /* Easy way to generate -inf and raising division by 0 exception */
 736         ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
 737                            &env->fp_status);
 738         fpush(env);
 739         ST0 = temp.d;
 740     } else {
 741         int expdif;
 742
 743         expdif = EXPD(temp) - EXPBIAS;
 744         /* DP exponent bias */
 745         ST0 = int32_to_floatx80(expdif, &env->fp_status);
 746         fpush(env);
 747         BIASEXPONENT(temp);
 748         ST0 = temp.d;
 749     }
 750 }
 751
 752 void helper_fprem1(CPUX86State *env)
 753 {
 754     double st0, st1, dblq, fpsrcop, fptemp;
 755     CPU_LDoubleU fpsrcop1, fptemp1;
 756     int expdif;
 757     signed long long int q;
 758
 759     st0 = floatx80_to_double(env, ST0);
 760     st1 = floatx80_to_double(env, ST1);
 761
 762     if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 763         ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 764         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 765         return;
 766     }
 767
 768     fpsrcop = st0;
 769     fptemp = st1;
 770     fpsrcop1.d = ST0;
 771     fptemp1.d = ST1;
 772     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 773
 774     if (expdif < 0) {
 775         /* optimisation? taken from the AMD docs */
 776         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 777         /* ST0 is unchanged */
 778         return;
 779     }
 780
 781     if (expdif < 53) {
 782         dblq = fpsrcop / fptemp;
 783         /* round dblq towards nearest integer */
 784         dblq = rint(dblq);
 785         st0 = fpsrcop - fptemp * dblq;
 786
 787         /* convert dblq to q by truncating towards zero */
 788         if (dblq < 0.0) {
 789             q = (signed long long int)(-dblq);
 790         } else {
 791             q = (signed long long int)dblq;
 792         }
 793
 794         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 795         /* (C0,C3,C1) <-- (q2,q1,q0) */
 796         env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 797         env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 798         env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 799     } else {
 800         env->fpus |= 0x400;  /* C2 <-- 1 */
 801         fptemp = pow(2.0, expdif - 50);
 802         fpsrcop = (st0 / st1) / fptemp;
 803         /* fpsrcop = integer obtained by chopping */
 804         fpsrcop = (fpsrcop < 0.0) ?
 805                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 806         st0 -= (st1 * fpsrcop * fptemp);
 807     }
 808     ST0 = double_to_floatx80(env, st0);
 809 }
 810
 811 void helper_fprem(CPUX86State *env)
 812 {
 813     double st0, st1, dblq, fpsrcop, fptemp;
 814     CPU_LDoubleU fpsrcop1, fptemp1;
 815     int expdif;
 816     signed long long int q;
 817
 818     st0 = floatx80_to_double(env, ST0);
 819     st1 = floatx80_to_double(env, ST1);
 820
 821     if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 822         ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 823         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 824         return;
 825     }
 826
 827     fpsrcop = st0;
 828     fptemp = st1;
 829     fpsrcop1.d = ST0;
 830     fptemp1.d = ST1;
 831     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 832
 833     if (expdif < 0) {
 834         /* optimisation? taken from the AMD docs */
 835         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 836         /* ST0 is unchanged */
 837         return;
 838     }
 839
 840     if (expdif < 53) {
 841         dblq = fpsrcop / fptemp; /* ST0 / ST1 */
 842         /* round dblq towards zero */
 843         dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
 844         st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
 845
 846         /* convert dblq to q by truncating towards zero */
 847         if (dblq < 0.0) {
 848             q = (signed long long int)(-dblq);
 849         } else {
 850             q = (signed long long int)dblq;
 851         }
 852
 853         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 854         /* (C0,C3,C1) <-- (q2,q1,q0) */
 855         env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 856         env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 857         env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 858     } else {
 859         int N = 32 + (expdif % 32); /* as per AMD docs */
 860
 861         env->fpus |= 0x400;  /* C2 <-- 1 */
 862         fptemp = pow(2.0, (double)(expdif - N));
 863         fpsrcop = (st0 / st1) / fptemp;
 864         /* fpsrcop = integer obtained by chopping */
 865         fpsrcop = (fpsrcop < 0.0) ?
 866                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 867         st0 -= (st1 * fpsrcop * fptemp);
 868     }
 869     ST0 = double_to_floatx80(env, st0);
 870 }
 871
 872 void helper_fyl2xp1(CPUX86State *env)
 873 {
 874     double fptemp = floatx80_to_double(env, ST0);
 875
 876     if ((fptemp + 1.0) > 0.0) {
 877         fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
 878         fptemp *= floatx80_to_double(env, ST1);
 879         ST1 = double_to_floatx80(env, fptemp);
 880         fpop(env);
 881     } else {
 882         env->fpus &= ~0x4700;
 883         env->fpus |= 0x400;
 884     }
 885 }
 886
 887 void helper_fsqrt(CPUX86State *env)
 888 {
 889     if (floatx80_is_neg(ST0)) {
 890         env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
 891         env->fpus |= 0x400;
 892     }
 893     ST0 = floatx80_sqrt(ST0, &env->fp_status);
 894 }
 895
 896 void helper_fsincos(CPUX86State *env)
 897 {
 898     double fptemp = floatx80_to_double(env, ST0);
 899
 900     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 901         env->fpus |= 0x400;
 902     } else {
 903         ST0 = double_to_floatx80(env, sin(fptemp));
 904         fpush(env);
 905         ST0 = double_to_floatx80(env, cos(fptemp));
 906         env->fpus &= ~0x400;  /* C2 <-- 0 */
 907         /* the above code is for |arg| < 2**63 only */
 908     }
 909 }
 910
 911 void helper_frndint(CPUX86State *env)
 912 {
 913     ST0 = floatx80_round_to_int(ST0, &env->fp_status);
 914 }
 915
 916 void helper_fscale(CPUX86State *env)
 917 {
 918     if (floatx80_is_any_nan(ST1)) {
 919         ST0 = ST1;
 920     } else {
 921         int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
 922         ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
 923     }
 924 }
 925
 926 void helper_fsin(CPUX86State *env)
 927 {
 928     double fptemp = floatx80_to_double(env, ST0);
 929
 930     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 931         env->fpus |= 0x400;
 932     } else {
 933         ST0 = double_to_floatx80(env, sin(fptemp));
 934         env->fpus &= ~0x400;  /* C2 <-- 0 */
 935         /* the above code is for |arg| < 2**53 only */
 936     }
 937 }
 938
 939 void helper_fcos(CPUX86State *env)
 940 {
 941     double fptemp = floatx80_to_double(env, ST0);
 942
 943     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 944         env->fpus |= 0x400;
 945     } else {
 946         ST0 = double_to_floatx80(env, cos(fptemp));
 947         env->fpus &= ~0x400;  /* C2 <-- 0 */
 948         /* the above code is for |arg| < 2**63 only */
 949     }
 950 }
 951
 952 void helper_fxam_ST0(CPUX86State *env)
 953 {
 954     CPU_LDoubleU temp;
 955     int expdif;
 956
 957     temp.d = ST0;
 958
 959     env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 960     if (SIGND(temp)) {
 961         env->fpus |= 0x200; /* C1 <-- 1 */
 962     }
 963
 964     /* XXX: test fptags too */
 965     expdif = EXPD(temp);
 966     if (expdif == MAXEXPD) {
 967         if (MANTD(temp) == 0x8000000000000000ULL) {
 968             env->fpus |= 0x500; /* Infinity */
 969         } else {
 970             env->fpus |= 0x100; /* NaN */
 971         }
 972     } else if (expdif == 0) {
 973         if (MANTD(temp) == 0) {
 974             env->fpus |=  0x4000; /* Zero */
 975         } else {
 976             env->fpus |= 0x4400; /* Denormal */
 977         }
 978     } else {
 979         env->fpus |= 0x400;
 980     }
 981 }
 982
 983 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
 984                       uintptr_t retaddr)
 985 {
 986     int fpus, fptag, exp, i;
 987     uint64_t mant;
 988     CPU_LDoubleU tmp;
 989
 990     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 991     fptag = 0;
 992     for (i = 7; i >= 0; i--) {
 993         fptag <<= 2;
 994         if (env->fptags[i]) {
 995             fptag |= 3;
 996         } else {
 997             tmp.d = env->fpregs[i].d;
 998             exp = EXPD(tmp);
 999             mant = MANTD(tmp);
1000             if (exp == 0 && mant == 0) {
1001                 /* zero */
1002                 fptag |= 1;
1003             } else if (exp == 0 || exp == MAXEXPD
1004                        || (mant & (1LL << 63)) == 0) {
1005                 /* NaNs, infinity, denormal */
1006                 fptag |= 2;
1007             }
1008         }
1009     }
1010     if (data32) {
1011         /* 32 bit */
1012         cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1013         cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1014         cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1015         cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1016         cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1017         cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1018         cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1019     } else {
1020         /* 16 bit */
1021         cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1022         cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1023         cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1024         cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1025         cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1026         cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1027         cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1028     }
1029 }
1030
1031 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1032 {
1033     do_fstenv(env, ptr, data32, GETPC());
1034 }
1035
1036 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1037                       uintptr_t retaddr)
1038 {
1039     int i, fpus, fptag;
1040
1041     if (data32) {
1042         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1043         fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1044         fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1045     } else {
1046         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1047         fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1048         fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1049     }
1050     env->fpstt = (fpus >> 11) & 7;
1051     env->fpus = fpus & ~0x3800;
1052     for (i = 0; i < 8; i++) {
1053         env->fptags[i] = ((fptag & 3) == 3);
1054         fptag >>= 2;
1055     }
1056 }
1057
1058 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1059 {
1060     do_fldenv(env, ptr, data32, GETPC());
1061 }
1062
1063 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1064 {
1065     floatx80 tmp;
1066     int i;
1067
1068     do_fstenv(env, ptr, data32, GETPC());
1069
1070     ptr += (14 << data32);
1071     for (i = 0; i < 8; i++) {
1072         tmp = ST(i);
1073         helper_fstt(env, tmp, ptr, GETPC());
1074         ptr += 10;
1075     }
1076
1077     /* fninit */
1078     env->fpus = 0;
1079     env->fpstt = 0;
1080     cpu_set_fpuc(env, 0x37f);
1081     env->fptags[0] = 1;
1082     env->fptags[1] = 1;
1083     env->fptags[2] = 1;
1084     env->fptags[3] = 1;
1085     env->fptags[4] = 1;
1086     env->fptags[5] = 1;
1087     env->fptags[6] = 1;
1088     env->fptags[7] = 1;
1089 }
1090
1091 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1092 {
1093     floatx80 tmp;
1094     int i;
1095
1096     do_fldenv(env, ptr, data32, GETPC());
1097     ptr += (14 << data32);
1098
1099     for (i = 0; i < 8; i++) {
1100         tmp = helper_fldt(env, ptr, GETPC());
1101         ST(i) = tmp;
1102         ptr += 10;
1103     }
1104 }
1105
1106 #if defined(CONFIG_USER_ONLY)
1107 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1108 {
1109     helper_fsave(env, ptr, data32);
1110 }
1111
1112 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1113 {
1114     helper_frstor(env, ptr, data32);
1115 }
1116 #endif
1117
1118 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1119 {
1120     int fpus, fptag, i;
1121     target_ulong addr;
1122
1123     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1124     fptag = 0;
1125     for (i = 0; i < 8; i++) {
1126         fptag |= (env->fptags[i] << i);
1127     }
1128     cpu_stw_data_ra(env, ptr, env->fpuc, ra);
1129     cpu_stw_data_ra(env, ptr + 2, fpus, ra);
1130     cpu_stw_data_ra(env, ptr + 4, fptag ^ 0xff, ra);
1131
1132     /* In 32-bit mode this is eip, sel, dp, sel.
1133        In 64-bit mode this is rip, rdp.
1134        But in either case we don't write actual data, just zeros.  */
1135     cpu_stq_data_ra(env, ptr + 0x08, 0, ra); /* eip+sel; rip */
1136     cpu_stq_data_ra(env, ptr + 0x10, 0, ra); /* edp+sel; rdp */
1137
1138     addr = ptr + 0x20;
1139     for (i = 0; i < 8; i++) {
1140         floatx80 tmp = ST(i);
1141         helper_fstt(env, tmp, addr, ra);
1142         addr += 16;
1143     }
1144 }
1145
1146 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1147 {
1148     cpu_stl_data_ra(env, ptr + 0x18, env->mxcsr, ra); /* mxcsr */
1149     cpu_stl_data_ra(env, ptr + 0x1c, 0x0000ffff, ra); /* mxcsr_mask */
1150 }
1151
1152 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1153 {
1154     int i, nb_xmm_regs;
1155     target_ulong addr;
1156
1157     if (env->hflags & HF_CS64_MASK) {
1158         nb_xmm_regs = 16;
1159     } else {
1160         nb_xmm_regs = 8;
1161     }
1162
1163     addr = ptr + 0xa0;
1164     for (i = 0; i < nb_xmm_regs; i++) {
1165         cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1166         cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1167         addr += 16;
1168     }
1169 }
1170
1171 static void do_xsave_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1172 {
1173     int i;
1174
1175     for (i = 0; i < 4; i++, addr += 16) {
1176         cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1177         cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1178     }
1179 }
1180
1181 static void do_xsave_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1182 {
1183     cpu_stq_data_ra(env, addr, env->bndcs_regs.cfgu, ra);
1184     cpu_stq_data_ra(env, addr + 8, env->bndcs_regs.sts, ra);
1185 }
1186
1187 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1188 {
1189     uintptr_t ra = GETPC();
1190
1191     /* The operand must be 16 byte aligned */
1192     if (ptr & 0xf) {
1193         raise_exception_ra(env, EXCP0D_GPF, ra);
1194     }
1195
1196     do_xsave_fpu(env, ptr, ra);
1197
1198     if (env->cr[4] & CR4_OSFXSR_MASK) {
1199         do_xsave_mxcsr(env, ptr, ra);
1200         /* Fast FXSAVE leaves out the XMM registers */
1201         if (!(env->efer & MSR_EFER_FFXSR)
1202             || (env->hflags & HF_CPL_MASK)
1203             || !(env->hflags & HF_LMA_MASK)) {
1204             do_xsave_sse(env, ptr, ra);
1205         }
1206     }
1207 }
1208
1209 static uint64_t get_xinuse(CPUX86State *env)
1210 {
1211     uint64_t inuse = -1;
1212
1213     /* For the most part, we don't track XINUSE.  We could calculate it
1214        here for all components, but it's probably less work to simply
1215        indicate in use.  That said, the state of BNDREGS is important
1216        enough to track in HFLAGS, so we might as well use that here.  */
1217     if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1218        inuse &= ~XSTATE_BNDREGS;
1219     }
1220     return inuse;
1221 }
1222
1223 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1224                      uint64_t inuse, uint64_t opt, uintptr_t ra)
1225 {
1226     uint64_t old_bv, new_bv;
1227
1228     /* The OS must have enabled XSAVE.  */
1229     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1230         raise_exception_ra(env, EXCP06_ILLOP, ra);
1231     }
1232
1233     /* The operand must be 64 byte aligned.  */
1234     if (ptr & 63) {
1235         raise_exception_ra(env, EXCP0D_GPF, ra);
1236     }
1237
1238     /* Never save anything not enabled by XCR0.  */
1239     rfbm &= env->xcr0;
1240     opt &= rfbm;
1241
1242     if (opt & XSTATE_FP) {
1243         do_xsave_fpu(env, ptr, ra);
1244     }
1245     if (rfbm & XSTATE_SSE) {
1246         /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
1247         do_xsave_mxcsr(env, ptr, ra);
1248     }
1249     if (opt & XSTATE_SSE) {
1250         do_xsave_sse(env, ptr, ra);
1251     }
1252     if (opt & XSTATE_BNDREGS) {
1253         target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS].offset;
1254         do_xsave_bndregs(env, ptr + off, ra);
1255     }
1256     if (opt & XSTATE_BNDCSR) {
1257         target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR].offset;
1258         do_xsave_bndcsr(env, ptr + off, ra);
1259     }
1260
1261     /* Update the XSTATE_BV field.  */
1262     old_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
1263     new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1264     cpu_stq_data_ra(env, ptr + 512, new_bv, ra);
1265 }
1266
1267 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1268 {
1269     do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1270 }
1271
1272 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1273 {
1274     uint64_t inuse = get_xinuse(env);
1275     do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1276 }
1277
1278 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1279 {
1280     int i, fpus, fptag;
1281     target_ulong addr;
1282
1283     cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, ra));
1284     fpus = cpu_lduw_data_ra(env, ptr + 2, ra);
1285     fptag = cpu_lduw_data_ra(env, ptr + 4, ra);
1286     env->fpstt = (fpus >> 11) & 7;
1287     env->fpus = fpus & ~0x3800;
1288     fptag ^= 0xff;
1289     for (i = 0; i < 8; i++) {
1290         env->fptags[i] = ((fptag >> i) & 1);
1291     }
1292
1293     addr = ptr + 0x20;
1294     for (i = 0; i < 8; i++) {
1295         floatx80 tmp = helper_fldt(env, addr, ra);
1296         ST(i) = tmp;
1297         addr += 16;
1298     }
1299 }
1300
1301 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1302 {
1303     cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + 0x18, ra));
1304 }
1305
1306 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1307 {
1308     int i, nb_xmm_regs;
1309     target_ulong addr;
1310
1311     if (env->hflags & HF_CS64_MASK) {
1312         nb_xmm_regs = 16;
1313     } else {
1314         nb_xmm_regs = 8;
1315     }
1316
1317     addr = ptr + 0xa0;
1318     for (i = 0; i < nb_xmm_regs; i++) {
1319         env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1320         env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1321         addr += 16;
1322     }
1323 }
1324
1325 static void do_xrstor_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1326 {
1327     int i;
1328
1329     for (i = 0; i < 4; i++, addr += 16) {
1330         env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1331         env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1332     }
1333 }
1334
1335 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1336 {
1337     /* FIXME: Extend highest implemented bit of linear address.  */
1338     env->bndcs_regs.cfgu = cpu_ldq_data_ra(env, addr, ra);
1339     env->bndcs_regs.sts = cpu_ldq_data_ra(env, addr + 8, ra);
1340 }
1341
1342 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1343 {
1344     uintptr_t ra = GETPC();
1345
1346     /* The operand must be 16 byte aligned */
1347     if (ptr & 0xf) {
1348         raise_exception_ra(env, EXCP0D_GPF, ra);
1349     }
1350
1351     do_xrstor_fpu(env, ptr, ra);
1352
1353     if (env->cr[4] & CR4_OSFXSR_MASK) {
1354         do_xrstor_mxcsr(env, ptr, ra);
1355         /* Fast FXRSTOR leaves out the XMM registers */
1356         if (!(env->efer & MSR_EFER_FFXSR)
1357             || (env->hflags & HF_CPL_MASK)
1358             || !(env->hflags & HF_LMA_MASK)) {
1359             do_xrstor_sse(env, ptr, ra);
1360         }
1361     }
1362 }
1363
1364 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1365 {
1366     uintptr_t ra = GETPC();
1367     uint64_t xstate_bv, xcomp_bv0, xcomp_bv1;
1368
1369     rfbm &= env->xcr0;
1370
1371     /* The OS must have enabled XSAVE.  */
1372     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1373         raise_exception_ra(env, EXCP06_ILLOP, ra);
1374     }
1375
1376     /* The operand must be 64 byte aligned.  */
1377     if (ptr & 63) {
1378         raise_exception_ra(env, EXCP0D_GPF, ra);
1379     }
1380
1381     xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
1382
1383     if ((int64_t)xstate_bv < 0) {
1384         /* FIXME: Compact form.  */
1385         raise_exception_ra(env, EXCP0D_GPF, ra);
1386     }
1387
1388     /* Standard form.  */
1389
1390     /* The XSTATE field must not set bits not present in XCR0.  */
1391     if (xstate_bv & ~env->xcr0) {
1392         raise_exception_ra(env, EXCP0D_GPF, ra);
1393     }
1394
1395     /* The XCOMP field must be zero.  */
1396     xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra);
1397     xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra);
1398     if (xcomp_bv0 || xcomp_bv1) {
1399         raise_exception_ra(env, EXCP0D_GPF, ra);
1400     }
1401
1402     if (rfbm & XSTATE_FP) {
1403         if (xstate_bv & XSTATE_FP) {
1404             do_xrstor_fpu(env, ptr, ra);
1405         } else {
1406             helper_fninit(env);
1407             memset(env->fpregs, 0, sizeof(env->fpregs));
1408         }
1409     }
1410     if (rfbm & XSTATE_SSE) {
1411         /* Note that the standard form of XRSTOR loads MXCSR from memory
1412            whether or not the XSTATE_BV bit is set.  */
1413         do_xrstor_mxcsr(env, ptr, ra);
1414         if (xstate_bv & XSTATE_SSE) {
1415             do_xrstor_sse(env, ptr, ra);
1416         } else {
1417             /* ??? When AVX is implemented, we may have to be more
1418                selective in the clearing.  */
1419             memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1420         }
1421     }
1422     if (rfbm & XSTATE_BNDREGS) {
1423         if (xstate_bv & XSTATE_BNDREGS) {
1424             target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS].offset;
1425             do_xrstor_bndregs(env, ptr + off, ra);
1426             env->hflags |= HF_MPX_IU_MASK;
1427         } else {
1428             memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1429             env->hflags &= ~HF_MPX_IU_MASK;
1430         }
1431     }
1432     if (rfbm & XSTATE_BNDCSR) {
1433         if (xstate_bv & XSTATE_BNDCSR) {
1434             target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR].offset;
1435             do_xrstor_bndcsr(env, ptr + off, ra);
1436         } else {
1437             memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1438         }
1439         cpu_sync_bndcs_hflags(env);
1440     }
1441 }
1442
1443 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1444 {
1445     /* The OS must have enabled XSAVE.  */
1446     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1447         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1448     }
1449
1450     switch (ecx) {
1451     case 0:
1452         return env->xcr0;
1453     case 1:
1454         if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1455             return env->xcr0 & get_xinuse(env);
1456         }
1457         break;
1458     }
1459     raise_exception_ra(env, EXCP0D_GPF, GETPC());
1460 }
1461
1462 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1463 {
1464     uint32_t dummy, ena_lo, ena_hi;
1465     uint64_t ena;
1466
1467     /* The OS must have enabled XSAVE.  */
1468     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1469         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1470     }
1471
1472     /* Only XCR0 is defined at present; the FPU may not be disabled.  */
1473     if (ecx != 0 || (mask & XSTATE_FP) == 0) {
1474         goto do_gpf;
1475     }
1476
1477     /* Disallow enabling unimplemented features.  */
1478     cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1479     ena = ((uint64_t)ena_hi << 32) | ena_lo;
1480     if (mask & ~ena) {
1481         goto do_gpf;
1482     }
1483
1484     /* Disallow enabling only half of MPX.  */
1485     if ((mask ^ (mask * (XSTATE_BNDCSR / XSTATE_BNDREGS))) & XSTATE_BNDCSR) {
1486         goto do_gpf;
1487     }
1488
1489     env->xcr0 = mask;
1490     cpu_sync_bndcs_hflags(env);
1491     return;
1492
1493  do_gpf:
1494     raise_exception_ra(env, EXCP0D_GPF, GETPC());
1495 }
1496
1497 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
1498 {
1499     CPU_LDoubleU temp;
1500
1501     temp.d = f;
1502     *pmant = temp.l.lower;
1503     *pexp = temp.l.upper;
1504 }
1505
1506 floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
1507 {
1508     CPU_LDoubleU temp;
1509
1510     temp.l.upper = upper;
1511     temp.l.lower = mant;
1512     return temp.d;
1513 }
1514
1515 /* MMX/SSE */
1516 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1517
1518 #define SSE_DAZ             0x0040
1519 #define SSE_RC_MASK         0x6000
1520 #define SSE_RC_NEAR         0x0000
1521 #define SSE_RC_DOWN         0x2000
1522 #define SSE_RC_UP           0x4000
1523 #define SSE_RC_CHOP         0x6000
1524 #define SSE_FZ              0x8000
1525
1526 void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr)
1527 {
1528     int rnd_type;
1529
1530     env->mxcsr = mxcsr;
1531
1532     /* set rounding mode */
1533     switch (mxcsr & SSE_RC_MASK) {
1534     default:
1535     case SSE_RC_NEAR:
1536         rnd_type = float_round_nearest_even;
1537         break;
1538     case SSE_RC_DOWN:
1539         rnd_type = float_round_down;
1540         break;
1541     case SSE_RC_UP:
1542         rnd_type = float_round_up;
1543         break;
1544     case SSE_RC_CHOP:
1545         rnd_type = float_round_to_zero;
1546         break;
1547     }
1548     set_float_rounding_mode(rnd_type, &env->sse_status);
1549
1550     /* set denormals are zero */
1551     set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1552
1553     /* set flush to zero */
1554     set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1555 }
1556
1557 void cpu_set_fpuc(CPUX86State *env, uint16_t val)
1558 {
1559     env->fpuc = val;
1560     update_fp_status(env);
1561 }
1562
1563 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1564 {
1565     cpu_set_mxcsr(env, val);
1566 }
1567
1568 void helper_enter_mmx(CPUX86State *env)
1569 {
1570     env->fpstt = 0;
1571     *(uint32_t *)(env->fptags) = 0;
1572     *(uint32_t *)(env->fptags + 4) = 0;
1573 }
1574
1575 void helper_emms(CPUX86State *env)
1576 {
1577     /* set to empty state */
1578     *(uint32_t *)(env->fptags) = 0x01010101;
1579     *(uint32_t *)(env->fptags + 4) = 0x01010101;
1580 }
1581
1582 /* XXX: suppress */
1583 void helper_movq(CPUX86State *env, void *d, void *s)
1584 {
1585     *(uint64_t *)d = *(uint64_t *)s;
1586 }
1587
1588 #define SHIFT 0
1589 #include "ops_sse.h"
1590
1591 #define SHIFT 1
1592 #include "ops_sse.h"