2 * i386 micro operations
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 /* n must be a constant to be efficient */
25 static inline target_long lshift(target_long x, int n)
33 /* we define the various pieces of code used by the JIT */
37 #include "opreg_template.h"
43 #include "opreg_template.h"
49 #include "opreg_template.h"
55 #include "opreg_template.h"
61 #include "opreg_template.h"
67 #include "opreg_template.h"
73 #include "opreg_template.h"
79 #include "opreg_template.h"
85 #define REG (env->regs[8])
87 #include "opreg_template.h"
91 #define REG (env->regs[9])
93 #include "opreg_template.h"
97 #define REG (env->regs[10])
99 #include "opreg_template.h"
103 #define REG (env->regs[11])
105 #include "opreg_template.h"
109 #define REG (env->regs[12])
111 #include "opreg_template.h"
115 #define REG (env->regs[13])
117 #include "opreg_template.h"
121 #define REG (env->regs[14])
123 #include "opreg_template.h"
127 #define REG (env->regs[15])
129 #include "opreg_template.h"
135 /* operations with flags */
137 /* update flags with T0 and T1 (add/sub case) */
138 void OPPROTO op_update2_cc(void)
144 /* update flags with T0 (logic operation case) */
145 void OPPROTO op_update1_cc(void)
150 void OPPROTO op_update_neg_cc(void)
156 void OPPROTO op_cmpl_T0_T1_cc(void)
162 void OPPROTO op_update_inc_cc(void)
164 CC_SRC = cc_table[CC_OP].compute_c();
168 void OPPROTO op_testl_T0_T1_cc(void)
173 /* operations without flags */
175 void OPPROTO op_addl_T0_T1(void)
180 void OPPROTO op_orl_T0_T1(void)
185 void OPPROTO op_andl_T0_T1(void)
190 void OPPROTO op_subl_T0_T1(void)
195 void OPPROTO op_xorl_T0_T1(void)
200 void OPPROTO op_negl_T0(void)
205 void OPPROTO op_incl_T0(void)
210 void OPPROTO op_decl_T0(void)
215 void OPPROTO op_notl_T0(void)
220 void OPPROTO op_bswapl_T0(void)
226 void OPPROTO op_bswapq_T0(void)
232 /* multiply/divide */
234 /* XXX: add eflags optimizations */
235 /* XXX: add non P4 style flags */
237 void OPPROTO op_mulb_AL_T0(void)
240 res = (uint8_t)EAX * (uint8_t)T0;
241 EAX = (EAX & ~0xffff) | res;
243 CC_SRC = (res & 0xff00);
246 void OPPROTO op_imulb_AL_T0(void)
249 res = (int8_t)EAX * (int8_t)T0;
250 EAX = (EAX & ~0xffff) | (res & 0xffff);
252 CC_SRC = (res != (int8_t)res);
255 void OPPROTO op_mulw_AX_T0(void)
258 res = (uint16_t)EAX * (uint16_t)T0;
259 EAX = (EAX & ~0xffff) | (res & 0xffff);
260 EDX = (EDX & ~0xffff) | ((res >> 16) & 0xffff);
265 void OPPROTO op_imulw_AX_T0(void)
268 res = (int16_t)EAX * (int16_t)T0;
269 EAX = (EAX & ~0xffff) | (res & 0xffff);
270 EDX = (EDX & ~0xffff) | ((res >> 16) & 0xffff);
272 CC_SRC = (res != (int16_t)res);
275 void OPPROTO op_mull_EAX_T0(void)
278 res = (uint64_t)((uint32_t)EAX) * (uint64_t)((uint32_t)T0);
280 EDX = (uint32_t)(res >> 32);
281 CC_DST = (uint32_t)res;
282 CC_SRC = (uint32_t)(res >> 32);
285 void OPPROTO op_imull_EAX_T0(void)
288 res = (int64_t)((int32_t)EAX) * (int64_t)((int32_t)T0);
289 EAX = (uint32_t)(res);
290 EDX = (uint32_t)(res >> 32);
292 CC_SRC = (res != (int32_t)res);
295 void OPPROTO op_imulw_T0_T1(void)
298 res = (int16_t)T0 * (int16_t)T1;
301 CC_SRC = (res != (int16_t)res);
304 void OPPROTO op_imull_T0_T1(void)
307 res = (int64_t)((int32_t)T0) * (int64_t)((int32_t)T1);
310 CC_SRC = (res != (int32_t)res);
314 void OPPROTO op_mulq_EAX_T0(void)
316 helper_mulq_EAX_T0();
319 void OPPROTO op_imulq_EAX_T0(void)
321 helper_imulq_EAX_T0();
324 void OPPROTO op_imulq_T0_T1(void)
326 helper_imulq_T0_T1();
330 /* division, flags are undefined */
332 void OPPROTO op_divb_AL_T0(void)
334 unsigned int num, den, q, r;
336 num = (EAX & 0xffff);
339 raise_exception(EXCP00_DIVZ);
343 raise_exception(EXCP00_DIVZ);
345 r = (num % den) & 0xff;
346 EAX = (EAX & ~0xffff) | (r << 8) | q;
349 void OPPROTO op_idivb_AL_T0(void)
356 raise_exception(EXCP00_DIVZ);
360 raise_exception(EXCP00_DIVZ);
362 r = (num % den) & 0xff;
363 EAX = (EAX & ~0xffff) | (r << 8) | q;
366 void OPPROTO op_divw_AX_T0(void)
368 unsigned int num, den, q, r;
370 num = (EAX & 0xffff) | ((EDX & 0xffff) << 16);
373 raise_exception(EXCP00_DIVZ);
377 raise_exception(EXCP00_DIVZ);
379 r = (num % den) & 0xffff;
380 EAX = (EAX & ~0xffff) | q;
381 EDX = (EDX & ~0xffff) | r;
384 void OPPROTO op_idivw_AX_T0(void)
388 num = (EAX & 0xffff) | ((EDX & 0xffff) << 16);
391 raise_exception(EXCP00_DIVZ);
395 raise_exception(EXCP00_DIVZ);
397 r = (num % den) & 0xffff;
398 EAX = (EAX & ~0xffff) | q;
399 EDX = (EDX & ~0xffff) | r;
402 void OPPROTO op_divl_EAX_T0(void)
404 helper_divl_EAX_T0();
407 void OPPROTO op_idivl_EAX_T0(void)
409 helper_idivl_EAX_T0();
413 void OPPROTO op_divq_EAX_T0(void)
415 helper_divq_EAX_T0();
418 void OPPROTO op_idivq_EAX_T0(void)
420 helper_idivq_EAX_T0();
424 /* constant load & misc op */
426 /* XXX: consistent names */
427 void OPPROTO op_movl_T0_imu(void)
429 T0 = (uint32_t)PARAM1;
432 void OPPROTO op_movl_T0_im(void)
434 T0 = (int32_t)PARAM1;
437 void OPPROTO op_addl_T0_im(void)
442 void OPPROTO op_andl_T0_ffff(void)
447 void OPPROTO op_andl_T0_im(void)
452 void OPPROTO op_movl_T0_T1(void)
457 void OPPROTO op_movl_T1_imu(void)
459 T1 = (uint32_t)PARAM1;
462 void OPPROTO op_movl_T1_im(void)
464 T1 = (int32_t)PARAM1;
467 void OPPROTO op_addl_T1_im(void)
472 void OPPROTO op_movl_T1_A0(void)
477 void OPPROTO op_movl_A0_im(void)
479 A0 = (uint32_t)PARAM1;
482 void OPPROTO op_addl_A0_im(void)
484 A0 = (uint32_t)(A0 + PARAM1);
487 void OPPROTO op_movl_A0_seg(void)
489 A0 = (uint32_t)*(target_ulong *)((char *)env + PARAM1);
492 void OPPROTO op_addl_A0_seg(void)
494 A0 = (uint32_t)(A0 + *(target_ulong *)((char *)env + PARAM1));
497 void OPPROTO op_addl_A0_AL(void)
499 A0 = (uint32_t)(A0 + (EAX & 0xff));
502 #ifdef WORDS_BIGENDIAN
503 typedef union UREG64 {
504 struct { uint16_t v3, v2, v1, v0; } w;
505 struct { uint32_t v1, v0; } l;
509 typedef union UREG64 {
510 struct { uint16_t v0, v1, v2, v3; } w;
511 struct { uint32_t v0, v1; } l;
526 void OPPROTO op_movq_T0_im64(void)
531 void OPPROTO op_movq_T1_im64(void)
536 void OPPROTO op_movq_A0_im(void)
538 A0 = (int32_t)PARAM1;
541 void OPPROTO op_movq_A0_im64(void)
546 void OPPROTO op_addq_A0_im(void)
548 A0 = (A0 + (int32_t)PARAM1);
551 void OPPROTO op_addq_A0_im64(void)
556 void OPPROTO op_movq_A0_seg(void)
558 A0 = *(target_ulong *)((char *)env + PARAM1);
561 void OPPROTO op_addq_A0_seg(void)
563 A0 += *(target_ulong *)((char *)env + PARAM1);
566 void OPPROTO op_addq_A0_AL(void)
568 A0 = (A0 + (EAX & 0xff));
573 void OPPROTO op_andl_A0_ffff(void)
580 #define MEMSUFFIX _raw
583 #if !defined(CONFIG_USER_ONLY)
584 #define MEMSUFFIX _kernel
587 #define MEMSUFFIX _user
593 void OPPROTO op_jmp_T0(void)
598 void OPPROTO op_movl_eip_im(void)
600 EIP = (uint32_t)PARAM1;
604 void OPPROTO op_movq_eip_im(void)
606 EIP = (int32_t)PARAM1;
609 void OPPROTO op_movq_eip_im64(void)
615 void OPPROTO op_hlt(void)
617 env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */
618 env->hflags |= HF_HALTED_MASK;
619 env->exception_index = EXCP_HLT;
623 void OPPROTO op_debug(void)
625 env->exception_index = EXCP_DEBUG;
629 void OPPROTO op_raise_interrupt(void)
631 int intno, next_eip_addend;
633 next_eip_addend = PARAM2;
634 raise_interrupt(intno, 1, 0, next_eip_addend);
637 void OPPROTO op_raise_exception(void)
640 exception_index = PARAM1;
641 raise_exception(exception_index);
644 void OPPROTO op_into(void)
647 eflags = cc_table[CC_OP].compute_all();
649 raise_interrupt(EXCP04_INTO, 1, 0, PARAM1);
654 void OPPROTO op_cli(void)
656 env->eflags &= ~IF_MASK;
659 void OPPROTO op_sti(void)
661 env->eflags |= IF_MASK;
664 void OPPROTO op_set_inhibit_irq(void)
666 env->hflags |= HF_INHIBIT_IRQ_MASK;
669 void OPPROTO op_reset_inhibit_irq(void)
671 env->hflags &= ~HF_INHIBIT_IRQ_MASK;
675 /* vm86plus instructions */
676 void OPPROTO op_cli_vm(void)
678 env->eflags &= ~VIF_MASK;
681 void OPPROTO op_sti_vm(void)
683 env->eflags |= VIF_MASK;
684 if (env->eflags & VIP_MASK) {
686 raise_exception(EXCP0D_GPF);
692 void OPPROTO op_boundw(void)
698 if (v < low || v > high) {
699 raise_exception(EXCP05_BOUND);
704 void OPPROTO op_boundl(void)
710 if (v < low || v > high) {
711 raise_exception(EXCP05_BOUND);
716 void OPPROTO op_cmpxchg8b(void)
721 void OPPROTO op_movl_T0_0(void)
726 void OPPROTO op_exit_tb(void)
731 /* multiple size ops */
736 #include "ops_template.h"
740 #include "ops_template.h"
744 #include "ops_template.h"
750 #include "ops_template.h"
757 void OPPROTO op_movsbl_T0_T0(void)
762 void OPPROTO op_movzbl_T0_T0(void)
767 void OPPROTO op_movswl_T0_T0(void)
772 void OPPROTO op_movzwl_T0_T0(void)
777 void OPPROTO op_movswl_EAX_AX(void)
783 void OPPROTO op_movslq_T0_T0(void)
788 void OPPROTO op_movslq_RAX_EAX(void)
794 void OPPROTO op_movsbw_AX_AL(void)
796 EAX = (EAX & ~0xffff) | ((int8_t)EAX & 0xffff);
799 void OPPROTO op_movslq_EDX_EAX(void)
801 EDX = (int32_t)EAX >> 31;
804 void OPPROTO op_movswl_DX_AX(void)
806 EDX = (EDX & ~0xffff) | (((int16_t)EAX >> 15) & 0xffff);
810 void OPPROTO op_movsqo_RDX_RAX(void)
812 EDX = (int64_t)EAX >> 63;
816 /* string ops helpers */
818 void OPPROTO op_addl_ESI_T0(void)
820 ESI = (uint32_t)(ESI + T0);
823 void OPPROTO op_addw_ESI_T0(void)
825 ESI = (ESI & ~0xffff) | ((ESI + T0) & 0xffff);
828 void OPPROTO op_addl_EDI_T0(void)
830 EDI = (uint32_t)(EDI + T0);
833 void OPPROTO op_addw_EDI_T0(void)
835 EDI = (EDI & ~0xffff) | ((EDI + T0) & 0xffff);
838 void OPPROTO op_decl_ECX(void)
840 ECX = (uint32_t)(ECX - 1);
843 void OPPROTO op_decw_ECX(void)
845 ECX = (ECX & ~0xffff) | ((ECX - 1) & 0xffff);
849 void OPPROTO op_addq_ESI_T0(void)
854 void OPPROTO op_addq_EDI_T0(void)
859 void OPPROTO op_decq_ECX(void)
867 void op_addl_A0_SS(void)
869 A0 = (uint32_t)(A0 + env->segs[R_SS].base);
872 void op_subl_A0_2(void)
874 A0 = (uint32_t)(A0 - 2);
877 void op_subl_A0_4(void)
879 A0 = (uint32_t)(A0 - 4);
882 void op_addl_ESP_4(void)
884 ESP = (uint32_t)(ESP + 4);
887 void op_addl_ESP_2(void)
889 ESP = (uint32_t)(ESP + 2);
892 void op_addw_ESP_4(void)
894 ESP = (ESP & ~0xffff) | ((ESP + 4) & 0xffff);
897 void op_addw_ESP_2(void)
899 ESP = (ESP & ~0xffff) | ((ESP + 2) & 0xffff);
902 void op_addl_ESP_im(void)
904 ESP = (uint32_t)(ESP + PARAM1);
907 void op_addw_ESP_im(void)
909 ESP = (ESP & ~0xffff) | ((ESP + PARAM1) & 0xffff);
913 void op_subq_A0_2(void)
918 void op_subq_A0_8(void)
923 void op_addq_ESP_8(void)
928 void op_addq_ESP_im(void)
934 void OPPROTO op_rdtsc(void)
939 void OPPROTO op_cpuid(void)
944 void OPPROTO op_enter_level(void)
946 helper_enter_level(PARAM1, PARAM2);
950 void OPPROTO op_enter64_level(void)
952 helper_enter64_level(PARAM1, PARAM2);
956 void OPPROTO op_sysenter(void)
961 void OPPROTO op_sysexit(void)
967 void OPPROTO op_syscall(void)
969 helper_syscall(PARAM1);
972 void OPPROTO op_sysret(void)
974 helper_sysret(PARAM1);
978 void OPPROTO op_rdmsr(void)
983 void OPPROTO op_wrmsr(void)
991 void OPPROTO op_aam(void)
998 EAX = (EAX & ~0xffff) | al | (ah << 8);
1002 void OPPROTO op_aad(void)
1007 ah = (EAX >> 8) & 0xff;
1008 al = ((ah * base) + al) & 0xff;
1009 EAX = (EAX & ~0xffff) | al;
1013 void OPPROTO op_aaa(void)
1019 eflags = cc_table[CC_OP].compute_all();
1022 ah = (EAX >> 8) & 0xff;
1024 icarry = (al > 0xf9);
1025 if (((al & 0x0f) > 9 ) || af) {
1026 al = (al + 6) & 0x0f;
1027 ah = (ah + 1 + icarry) & 0xff;
1028 eflags |= CC_C | CC_A;
1030 eflags &= ~(CC_C | CC_A);
1033 EAX = (EAX & ~0xffff) | al | (ah << 8);
1038 void OPPROTO op_aas(void)
1044 eflags = cc_table[CC_OP].compute_all();
1047 ah = (EAX >> 8) & 0xff;
1050 if (((al & 0x0f) > 9 ) || af) {
1051 al = (al - 6) & 0x0f;
1052 ah = (ah - 1 - icarry) & 0xff;
1053 eflags |= CC_C | CC_A;
1055 eflags &= ~(CC_C | CC_A);
1058 EAX = (EAX & ~0xffff) | al | (ah << 8);
1063 void OPPROTO op_daa(void)
1068 eflags = cc_table[CC_OP].compute_all();
1074 if (((al & 0x0f) > 9 ) || af) {
1075 al = (al + 6) & 0xff;
1078 if ((al > 0x9f) || cf) {
1079 al = (al + 0x60) & 0xff;
1082 EAX = (EAX & ~0xff) | al;
1083 /* well, speed is not an issue here, so we compute the flags by hand */
1084 eflags |= (al == 0) << 6; /* zf */
1085 eflags |= parity_table[al]; /* pf */
1086 eflags |= (al & 0x80); /* sf */
1091 void OPPROTO op_das(void)
1093 int al, al1, af, cf;
1096 eflags = cc_table[CC_OP].compute_all();
1103 if (((al & 0x0f) > 9 ) || af) {
1107 al = (al - 6) & 0xff;
1109 if ((al1 > 0x99) || cf) {
1110 al = (al - 0x60) & 0xff;
1113 EAX = (EAX & ~0xff) | al;
1114 /* well, speed is not an issue here, so we compute the flags by hand */
1115 eflags |= (al == 0) << 6; /* zf */
1116 eflags |= parity_table[al]; /* pf */
1117 eflags |= (al & 0x80); /* sf */
1122 /* segment handling */
1124 /* never use it with R_CS */
1125 void OPPROTO op_movl_seg_T0(void)
1127 load_seg(PARAM1, T0);
1130 /* faster VM86 version */
1131 void OPPROTO op_movl_seg_T0_vm(void)
1136 selector = T0 & 0xffff;
1137 /* env->segs[] access */
1138 sc = (SegmentCache *)((char *)env + PARAM1);
1139 sc->selector = selector;
1140 sc->base = (selector << 4);
1143 void OPPROTO op_movl_T0_seg(void)
1145 T0 = env->segs[PARAM1].selector;
1148 void OPPROTO op_lsl(void)
1153 void OPPROTO op_lar(void)
1158 void OPPROTO op_verr(void)
1163 void OPPROTO op_verw(void)
1168 void OPPROTO op_arpl(void)
1170 if ((T0 & 3) < (T1 & 3)) {
1171 /* XXX: emulate bug or 0xff3f0000 oring as in bochs ? */
1172 T0 = (T0 & ~3) | (T1 & 3);
1180 void OPPROTO op_arpl_update(void)
1183 eflags = cc_table[CC_OP].compute_all();
1184 CC_SRC = (eflags & ~CC_Z) | T1;
1187 /* T0: segment, T1:eip */
1188 void OPPROTO op_ljmp_protected_T0_T1(void)
1190 helper_ljmp_protected_T0_T1(PARAM1);
1193 void OPPROTO op_lcall_real_T0_T1(void)
1195 helper_lcall_real_T0_T1(PARAM1, PARAM2);
1198 void OPPROTO op_lcall_protected_T0_T1(void)
1200 helper_lcall_protected_T0_T1(PARAM1, PARAM2);
1203 void OPPROTO op_iret_real(void)
1205 helper_iret_real(PARAM1);
1208 void OPPROTO op_iret_protected(void)
1210 helper_iret_protected(PARAM1, PARAM2);
1213 void OPPROTO op_lret_protected(void)
1215 helper_lret_protected(PARAM1, PARAM2);
1218 void OPPROTO op_lldt_T0(void)
1223 void OPPROTO op_ltr_T0(void)
1228 /* CR registers access */
1229 void OPPROTO op_movl_crN_T0(void)
1231 helper_movl_crN_T0(PARAM1);
1234 #if !defined(CONFIG_USER_ONLY)
1235 void OPPROTO op_movtl_T0_cr8(void)
1237 T0 = cpu_get_apic_tpr(env);
1241 /* DR registers access */
1242 void OPPROTO op_movl_drN_T0(void)
1244 helper_movl_drN_T0(PARAM1);
1247 void OPPROTO op_lmsw_T0(void)
1249 /* only 4 lower bits of CR0 are modified. PE cannot be set to zero
1250 if already set to one. */
1251 T0 = (env->cr[0] & ~0xe) | (T0 & 0xf);
1252 helper_movl_crN_T0(0);
1255 void OPPROTO op_invlpg_A0(void)
1260 void OPPROTO op_movl_T0_env(void)
1262 T0 = *(uint32_t *)((char *)env + PARAM1);
1265 void OPPROTO op_movl_env_T0(void)
1267 *(uint32_t *)((char *)env + PARAM1) = T0;
1270 void OPPROTO op_movl_env_T1(void)
1272 *(uint32_t *)((char *)env + PARAM1) = T1;
1275 void OPPROTO op_movtl_T0_env(void)
1277 T0 = *(target_ulong *)((char *)env + PARAM1);
1280 void OPPROTO op_movtl_env_T0(void)
1282 *(target_ulong *)((char *)env + PARAM1) = T0;
1285 void OPPROTO op_movtl_T1_env(void)
1287 T1 = *(target_ulong *)((char *)env + PARAM1);
1290 void OPPROTO op_movtl_env_T1(void)
1292 *(target_ulong *)((char *)env + PARAM1) = T1;
1295 void OPPROTO op_clts(void)
1297 env->cr[0] &= ~CR0_TS_MASK;
1298 env->hflags &= ~HF_TS_MASK;
1301 /* flags handling */
1303 void OPPROTO op_goto_tb0(void)
1305 GOTO_TB(op_goto_tb0, PARAM1, 0);
1308 void OPPROTO op_goto_tb1(void)
1310 GOTO_TB(op_goto_tb1, PARAM1, 1);
1313 void OPPROTO op_jmp_label(void)
1315 GOTO_LABEL_PARAM(1);
1318 void OPPROTO op_jnz_T0_label(void)
1321 GOTO_LABEL_PARAM(1);
1325 void OPPROTO op_jz_T0_label(void)
1328 GOTO_LABEL_PARAM(1);
1332 /* slow set cases (compute x86 flags) */
1333 void OPPROTO op_seto_T0_cc(void)
1336 eflags = cc_table[CC_OP].compute_all();
1337 T0 = (eflags >> 11) & 1;
1340 void OPPROTO op_setb_T0_cc(void)
1342 T0 = cc_table[CC_OP].compute_c();
1345 void OPPROTO op_setz_T0_cc(void)
1348 eflags = cc_table[CC_OP].compute_all();
1349 T0 = (eflags >> 6) & 1;
1352 void OPPROTO op_setbe_T0_cc(void)
1355 eflags = cc_table[CC_OP].compute_all();
1356 T0 = (eflags & (CC_Z | CC_C)) != 0;
1359 void OPPROTO op_sets_T0_cc(void)
1362 eflags = cc_table[CC_OP].compute_all();
1363 T0 = (eflags >> 7) & 1;
1366 void OPPROTO op_setp_T0_cc(void)
1369 eflags = cc_table[CC_OP].compute_all();
1370 T0 = (eflags >> 2) & 1;
1373 void OPPROTO op_setl_T0_cc(void)
1376 eflags = cc_table[CC_OP].compute_all();
1377 T0 = ((eflags ^ (eflags >> 4)) >> 7) & 1;
1380 void OPPROTO op_setle_T0_cc(void)
1383 eflags = cc_table[CC_OP].compute_all();
1384 T0 = (((eflags ^ (eflags >> 4)) & 0x80) || (eflags & CC_Z)) != 0;
1387 void OPPROTO op_xor_T0_1(void)
1392 void OPPROTO op_set_cc_op(void)
1397 void OPPROTO op_mov_T0_cc(void)
1399 T0 = cc_table[CC_OP].compute_all();
1402 /* XXX: clear VIF/VIP in all ops ? */
1404 void OPPROTO op_movl_eflags_T0(void)
1406 load_eflags(T0, (TF_MASK | AC_MASK | ID_MASK | NT_MASK));
1409 void OPPROTO op_movw_eflags_T0(void)
1411 load_eflags(T0, (TF_MASK | AC_MASK | ID_MASK | NT_MASK) & 0xffff);
1414 void OPPROTO op_movl_eflags_T0_io(void)
1416 load_eflags(T0, (TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK));
1419 void OPPROTO op_movw_eflags_T0_io(void)
1421 load_eflags(T0, (TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK) & 0xffff);
1424 void OPPROTO op_movl_eflags_T0_cpl0(void)
1426 load_eflags(T0, (TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK));
1429 void OPPROTO op_movw_eflags_T0_cpl0(void)
1431 load_eflags(T0, (TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK) & 0xffff);
1435 /* vm86plus version */
1436 void OPPROTO op_movw_eflags_T0_vm(void)
1440 CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
1441 DF = 1 - (2 * ((eflags >> 10) & 1));
1442 /* we also update some system flags as in user mode */
1443 env->eflags = (env->eflags & ~(FL_UPDATE_MASK16 | VIF_MASK)) |
1444 (eflags & FL_UPDATE_MASK16);
1445 if (eflags & IF_MASK) {
1446 env->eflags |= VIF_MASK;
1447 if (env->eflags & VIP_MASK) {
1449 raise_exception(EXCP0D_GPF);
1455 void OPPROTO op_movl_eflags_T0_vm(void)
1459 CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
1460 DF = 1 - (2 * ((eflags >> 10) & 1));
1461 /* we also update some system flags as in user mode */
1462 env->eflags = (env->eflags & ~(FL_UPDATE_MASK32 | VIF_MASK)) |
1463 (eflags & FL_UPDATE_MASK32);
1464 if (eflags & IF_MASK) {
1465 env->eflags |= VIF_MASK;
1466 if (env->eflags & VIP_MASK) {
1468 raise_exception(EXCP0D_GPF);
1475 /* XXX: compute only O flag */
1476 void OPPROTO op_movb_eflags_T0(void)
1479 of = cc_table[CC_OP].compute_all() & CC_O;
1480 CC_SRC = (T0 & (CC_S | CC_Z | CC_A | CC_P | CC_C)) | of;
1483 void OPPROTO op_movl_T0_eflags(void)
1486 eflags = cc_table[CC_OP].compute_all();
1487 eflags |= (DF & DF_MASK);
1488 eflags |= env->eflags & ~(VM_MASK | RF_MASK);
1492 /* vm86plus version */
1494 void OPPROTO op_movl_T0_eflags_vm(void)
1497 eflags = cc_table[CC_OP].compute_all();
1498 eflags |= (DF & DF_MASK);
1499 eflags |= env->eflags & ~(VM_MASK | RF_MASK | IF_MASK);
1500 if (env->eflags & VIF_MASK)
1506 void OPPROTO op_cld(void)
1511 void OPPROTO op_std(void)
1516 void OPPROTO op_clc(void)
1519 eflags = cc_table[CC_OP].compute_all();
1524 void OPPROTO op_stc(void)
1527 eflags = cc_table[CC_OP].compute_all();
1532 void OPPROTO op_cmc(void)
1535 eflags = cc_table[CC_OP].compute_all();
1540 void OPPROTO op_salc(void)
1543 cf = cc_table[CC_OP].compute_c();
1544 EAX = (EAX & ~0xff) | ((-cf) & 0xff);
1547 static int compute_all_eflags(void)
1552 static int compute_c_eflags(void)
1554 return CC_SRC & CC_C;
1557 CCTable cc_table[CC_OP_NB] = {
1558 [CC_OP_DYNAMIC] = { /* should never happen */ },
1560 [CC_OP_EFLAGS] = { compute_all_eflags, compute_c_eflags },
1562 [CC_OP_MULB] = { compute_all_mulb, compute_c_mull },
1563 [CC_OP_MULW] = { compute_all_mulw, compute_c_mull },
1564 [CC_OP_MULL] = { compute_all_mull, compute_c_mull },
1566 [CC_OP_ADDB] = { compute_all_addb, compute_c_addb },
1567 [CC_OP_ADDW] = { compute_all_addw, compute_c_addw },
1568 [CC_OP_ADDL] = { compute_all_addl, compute_c_addl },
1570 [CC_OP_ADCB] = { compute_all_adcb, compute_c_adcb },
1571 [CC_OP_ADCW] = { compute_all_adcw, compute_c_adcw },
1572 [CC_OP_ADCL] = { compute_all_adcl, compute_c_adcl },
1574 [CC_OP_SUBB] = { compute_all_subb, compute_c_subb },
1575 [CC_OP_SUBW] = { compute_all_subw, compute_c_subw },
1576 [CC_OP_SUBL] = { compute_all_subl, compute_c_subl },
1578 [CC_OP_SBBB] = { compute_all_sbbb, compute_c_sbbb },
1579 [CC_OP_SBBW] = { compute_all_sbbw, compute_c_sbbw },
1580 [CC_OP_SBBL] = { compute_all_sbbl, compute_c_sbbl },
1582 [CC_OP_LOGICB] = { compute_all_logicb, compute_c_logicb },
1583 [CC_OP_LOGICW] = { compute_all_logicw, compute_c_logicw },
1584 [CC_OP_LOGICL] = { compute_all_logicl, compute_c_logicl },
1586 [CC_OP_INCB] = { compute_all_incb, compute_c_incl },
1587 [CC_OP_INCW] = { compute_all_incw, compute_c_incl },
1588 [CC_OP_INCL] = { compute_all_incl, compute_c_incl },
1590 [CC_OP_DECB] = { compute_all_decb, compute_c_incl },
1591 [CC_OP_DECW] = { compute_all_decw, compute_c_incl },
1592 [CC_OP_DECL] = { compute_all_decl, compute_c_incl },
1594 [CC_OP_SHLB] = { compute_all_shlb, compute_c_shlb },
1595 [CC_OP_SHLW] = { compute_all_shlw, compute_c_shlw },
1596 [CC_OP_SHLL] = { compute_all_shll, compute_c_shll },
1598 [CC_OP_SARB] = { compute_all_sarb, compute_c_sarl },
1599 [CC_OP_SARW] = { compute_all_sarw, compute_c_sarl },
1600 [CC_OP_SARL] = { compute_all_sarl, compute_c_sarl },
1602 #ifdef TARGET_X86_64
1603 [CC_OP_MULQ] = { compute_all_mulq, compute_c_mull },
1605 [CC_OP_ADDQ] = { compute_all_addq, compute_c_addq },
1607 [CC_OP_ADCQ] = { compute_all_adcq, compute_c_adcq },
1609 [CC_OP_SUBQ] = { compute_all_subq, compute_c_subq },
1611 [CC_OP_SBBQ] = { compute_all_sbbq, compute_c_sbbq },
1613 [CC_OP_LOGICQ] = { compute_all_logicq, compute_c_logicq },
1615 [CC_OP_INCQ] = { compute_all_incq, compute_c_incl },
1617 [CC_OP_DECQ] = { compute_all_decq, compute_c_incl },
1619 [CC_OP_SHLQ] = { compute_all_shlq, compute_c_shlq },
1621 [CC_OP_SARQ] = { compute_all_sarq, compute_c_sarl },
1625 /* floating point support. Some of the code for complicated x87
1626 functions comes from the LGPL'ed x86 emulator found in the Willows
1627 TWIN windows emulator. */
1631 void OPPROTO op_flds_FT0_A0(void)
1633 #ifdef USE_FP_CONVERT
1634 FP_CONVERT.i32 = ldl(A0);
1641 void OPPROTO op_fldl_FT0_A0(void)
1643 #ifdef USE_FP_CONVERT
1644 FP_CONVERT.i64 = ldq(A0);
1651 /* helpers are needed to avoid static constant reference. XXX: find a better way */
1652 #ifdef USE_INT_TO_FLOAT_HELPERS
1654 void helper_fild_FT0_A0(void)
1656 FT0 = (CPU86_LDouble)ldsw(A0);
1659 void helper_fildl_FT0_A0(void)
1661 FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
1664 void helper_fildll_FT0_A0(void)
1666 FT0 = (CPU86_LDouble)((int64_t)ldq(A0));
1669 void OPPROTO op_fild_FT0_A0(void)
1671 helper_fild_FT0_A0();
1674 void OPPROTO op_fildl_FT0_A0(void)
1676 helper_fildl_FT0_A0();
1679 void OPPROTO op_fildll_FT0_A0(void)
1681 helper_fildll_FT0_A0();
1686 void OPPROTO op_fild_FT0_A0(void)
1688 #ifdef USE_FP_CONVERT
1689 FP_CONVERT.i32 = ldsw(A0);
1690 FT0 = (CPU86_LDouble)FP_CONVERT.i32;
1692 FT0 = (CPU86_LDouble)ldsw(A0);
1696 void OPPROTO op_fildl_FT0_A0(void)
1698 #ifdef USE_FP_CONVERT
1699 FP_CONVERT.i32 = (int32_t) ldl(A0);
1700 FT0 = (CPU86_LDouble)FP_CONVERT.i32;
1702 FT0 = (CPU86_LDouble)((int32_t)ldl(A0));
1706 void OPPROTO op_fildll_FT0_A0(void)
1708 #ifdef USE_FP_CONVERT
1709 FP_CONVERT.i64 = (int64_t) ldq(A0);
1710 FT0 = (CPU86_LDouble)FP_CONVERT.i64;
1712 FT0 = (CPU86_LDouble)((int64_t)ldq(A0));
1719 void OPPROTO op_flds_ST0_A0(void)
1722 new_fpstt = (env->fpstt - 1) & 7;
1723 #ifdef USE_FP_CONVERT
1724 FP_CONVERT.i32 = ldl(A0);
1725 env->fpregs[new_fpstt].d = FP_CONVERT.f;
1727 env->fpregs[new_fpstt].d = ldfl(A0);
1729 env->fpstt = new_fpstt;
1730 env->fptags[new_fpstt] = 0; /* validate stack entry */
1733 void OPPROTO op_fldl_ST0_A0(void)
1736 new_fpstt = (env->fpstt - 1) & 7;
1737 #ifdef USE_FP_CONVERT
1738 FP_CONVERT.i64 = ldq(A0);
1739 env->fpregs[new_fpstt].d = FP_CONVERT.d;
1741 env->fpregs[new_fpstt].d = ldfq(A0);
1743 env->fpstt = new_fpstt;
1744 env->fptags[new_fpstt] = 0; /* validate stack entry */
1747 void OPPROTO op_fldt_ST0_A0(void)
1749 helper_fldt_ST0_A0();
1752 /* helpers are needed to avoid static constant reference. XXX: find a better way */
1753 #ifdef USE_INT_TO_FLOAT_HELPERS
1755 void helper_fild_ST0_A0(void)
1758 new_fpstt = (env->fpstt - 1) & 7;
1759 env->fpregs[new_fpstt].d = (CPU86_LDouble)ldsw(A0);
1760 env->fpstt = new_fpstt;
1761 env->fptags[new_fpstt] = 0; /* validate stack entry */
1764 void helper_fildl_ST0_A0(void)
1767 new_fpstt = (env->fpstt - 1) & 7;
1768 env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0));
1769 env->fpstt = new_fpstt;
1770 env->fptags[new_fpstt] = 0; /* validate stack entry */
1773 void helper_fildll_ST0_A0(void)
1776 new_fpstt = (env->fpstt - 1) & 7;
1777 env->fpregs[new_fpstt].d = (CPU86_LDouble)((int64_t)ldq(A0));
1778 env->fpstt = new_fpstt;
1779 env->fptags[new_fpstt] = 0; /* validate stack entry */
1782 void OPPROTO op_fild_ST0_A0(void)
1784 helper_fild_ST0_A0();
1787 void OPPROTO op_fildl_ST0_A0(void)
1789 helper_fildl_ST0_A0();
1792 void OPPROTO op_fildll_ST0_A0(void)
1794 helper_fildll_ST0_A0();
1799 void OPPROTO op_fild_ST0_A0(void)
1802 new_fpstt = (env->fpstt - 1) & 7;
1803 #ifdef USE_FP_CONVERT
1804 FP_CONVERT.i32 = ldsw(A0);
1805 env->fpregs[new_fpstt].d = (CPU86_LDouble)FP_CONVERT.i32;
1807 env->fpregs[new_fpstt].d = (CPU86_LDouble)ldsw(A0);
1809 env->fpstt = new_fpstt;
1810 env->fptags[new_fpstt] = 0; /* validate stack entry */
1813 void OPPROTO op_fildl_ST0_A0(void)
1816 new_fpstt = (env->fpstt - 1) & 7;
1817 #ifdef USE_FP_CONVERT
1818 FP_CONVERT.i32 = (int32_t) ldl(A0);
1819 env->fpregs[new_fpstt].d = (CPU86_LDouble)FP_CONVERT.i32;
1821 env->fpregs[new_fpstt].d = (CPU86_LDouble)((int32_t)ldl(A0));
1823 env->fpstt = new_fpstt;
1824 env->fptags[new_fpstt] = 0; /* validate stack entry */
1827 void OPPROTO op_fildll_ST0_A0(void)
1830 new_fpstt = (env->fpstt - 1) & 7;
1831 #ifdef USE_FP_CONVERT
1832 FP_CONVERT.i64 = (int64_t) ldq(A0);
1833 env->fpregs[new_fpstt].d = (CPU86_LDouble)FP_CONVERT.i64;
1835 env->fpregs[new_fpstt].d = (CPU86_LDouble)((int64_t)ldq(A0));
1837 env->fpstt = new_fpstt;
1838 env->fptags[new_fpstt] = 0; /* validate stack entry */
1845 void OPPROTO op_fsts_ST0_A0(void)
1847 #ifdef USE_FP_CONVERT
1848 FP_CONVERT.f = (float)ST0;
1849 stfl(A0, FP_CONVERT.f);
1851 stfl(A0, (float)ST0);
1856 void OPPROTO op_fstl_ST0_A0(void)
1858 stfq(A0, (double)ST0);
1862 void OPPROTO op_fstt_ST0_A0(void)
1864 helper_fstt_ST0_A0();
1867 void OPPROTO op_fist_ST0_A0(void)
1869 #if defined(__sparc__) && !defined(__sparc_v9__)
1870 register CPU86_LDouble d asm("o0");
1877 val = floatx_to_int32(d, &env->fp_status);
1878 if (val != (int16_t)val)
1884 void OPPROTO op_fistl_ST0_A0(void)
1886 #if defined(__sparc__) && !defined(__sparc_v9__)
1887 register CPU86_LDouble d asm("o0");
1894 val = floatx_to_int32(d, &env->fp_status);
1899 void OPPROTO op_fistll_ST0_A0(void)
1901 #if defined(__sparc__) && !defined(__sparc_v9__)
1902 register CPU86_LDouble d asm("o0");
1909 val = floatx_to_int64(d, &env->fp_status);
1914 void OPPROTO op_fistt_ST0_A0(void)
1916 #if defined(__sparc__) && !defined(__sparc_v9__)
1917 register CPU86_LDouble d asm("o0");
1924 val = floatx_to_int32_round_to_zero(d, &env->fp_status);
1925 if (val != (int16_t)val)
1931 void OPPROTO op_fisttl_ST0_A0(void)
1933 #if defined(__sparc__) && !defined(__sparc_v9__)
1934 register CPU86_LDouble d asm("o0");
1941 val = floatx_to_int32_round_to_zero(d, &env->fp_status);
1946 void OPPROTO op_fisttll_ST0_A0(void)
1948 #if defined(__sparc__) && !defined(__sparc_v9__)
1949 register CPU86_LDouble d asm("o0");
1956 val = floatx_to_int64_round_to_zero(d, &env->fp_status);
1961 void OPPROTO op_fbld_ST0_A0(void)
1963 helper_fbld_ST0_A0();
1966 void OPPROTO op_fbst_ST0_A0(void)
1968 helper_fbst_ST0_A0();
1973 void OPPROTO op_fpush(void)
1978 void OPPROTO op_fpop(void)
1983 void OPPROTO op_fdecstp(void)
1985 env->fpstt = (env->fpstt - 1) & 7;
1986 env->fpus &= (~0x4700);
1989 void OPPROTO op_fincstp(void)
1991 env->fpstt = (env->fpstt + 1) & 7;
1992 env->fpus &= (~0x4700);
1995 void OPPROTO op_ffree_STN(void)
1997 env->fptags[(env->fpstt + PARAM1) & 7] = 1;
2000 void OPPROTO op_fmov_ST0_FT0(void)
2005 void OPPROTO op_fmov_FT0_STN(void)
2010 void OPPROTO op_fmov_ST0_STN(void)
2015 void OPPROTO op_fmov_STN_ST0(void)
2020 void OPPROTO op_fxchg_ST0_STN(void)
2028 /* FPU operations */
2030 const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
2032 void OPPROTO op_fcom_ST0_FT0(void)
2036 ret = floatx_compare(ST0, FT0, &env->fp_status);
2037 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
2041 void OPPROTO op_fucom_ST0_FT0(void)
2045 ret = floatx_compare_quiet(ST0, FT0, &env->fp_status);
2046 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret+ 1];
2050 const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
2052 void OPPROTO op_fcomi_ST0_FT0(void)
2057 ret = floatx_compare(ST0, FT0, &env->fp_status);
2058 eflags = cc_table[CC_OP].compute_all();
2059 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
2064 void OPPROTO op_fucomi_ST0_FT0(void)
2069 ret = floatx_compare_quiet(ST0, FT0, &env->fp_status);
2070 eflags = cc_table[CC_OP].compute_all();
2071 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
2076 void OPPROTO op_fcmov_ST0_STN_T0(void)
2084 void OPPROTO op_fadd_ST0_FT0(void)
2089 void OPPROTO op_fmul_ST0_FT0(void)
2094 void OPPROTO op_fsub_ST0_FT0(void)
2099 void OPPROTO op_fsubr_ST0_FT0(void)
2104 void OPPROTO op_fdiv_ST0_FT0(void)
2106 ST0 = helper_fdiv(ST0, FT0);
2109 void OPPROTO op_fdivr_ST0_FT0(void)
2111 ST0 = helper_fdiv(FT0, ST0);
2114 /* fp operations between STN and ST0 */
2116 void OPPROTO op_fadd_STN_ST0(void)
2121 void OPPROTO op_fmul_STN_ST0(void)
2126 void OPPROTO op_fsub_STN_ST0(void)
2131 void OPPROTO op_fsubr_STN_ST0(void)
2138 void OPPROTO op_fdiv_STN_ST0(void)
2142 *p = helper_fdiv(*p, ST0);
2145 void OPPROTO op_fdivr_STN_ST0(void)
2149 *p = helper_fdiv(ST0, *p);
2152 /* misc FPU operations */
2153 void OPPROTO op_fchs_ST0(void)
2155 ST0 = floatx_chs(ST0);
2158 void OPPROTO op_fabs_ST0(void)
2160 ST0 = floatx_abs(ST0);
2163 void OPPROTO op_fxam_ST0(void)
2168 void OPPROTO op_fld1_ST0(void)
2173 void OPPROTO op_fldl2t_ST0(void)
2178 void OPPROTO op_fldl2e_ST0(void)
2183 void OPPROTO op_fldpi_ST0(void)
2188 void OPPROTO op_fldlg2_ST0(void)
2193 void OPPROTO op_fldln2_ST0(void)
2198 void OPPROTO op_fldz_ST0(void)
2203 void OPPROTO op_fldz_FT0(void)
2208 /* associated heplers to reduce generated code length and to simplify
2209 relocation (FP constants are usually stored in .rodata section) */
2211 void OPPROTO op_f2xm1(void)
2216 void OPPROTO op_fyl2x(void)
2221 void OPPROTO op_fptan(void)
2226 void OPPROTO op_fpatan(void)
2231 void OPPROTO op_fxtract(void)
2236 void OPPROTO op_fprem1(void)
2242 void OPPROTO op_fprem(void)
2247 void OPPROTO op_fyl2xp1(void)
2252 void OPPROTO op_fsqrt(void)
2257 void OPPROTO op_fsincos(void)
2262 void OPPROTO op_frndint(void)
2267 void OPPROTO op_fscale(void)
2272 void OPPROTO op_fsin(void)
2277 void OPPROTO op_fcos(void)
2282 void OPPROTO op_fnstsw_A0(void)
2285 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2290 void OPPROTO op_fnstsw_EAX(void)
2293 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2294 EAX = (EAX & ~0xffff) | fpus;
2297 void OPPROTO op_fnstcw_A0(void)
2303 void OPPROTO op_fldcw_A0(void)
2305 env->fpuc = lduw(A0);
2309 void OPPROTO op_fclex(void)
2311 env->fpus &= 0x7f00;
2314 void OPPROTO op_fwait(void)
2316 if (env->fpus & FPUS_SE)
2317 fpu_raise_exception();
2321 void OPPROTO op_fninit(void)
2336 void OPPROTO op_fnstenv_A0(void)
2338 helper_fstenv(A0, PARAM1);
2341 void OPPROTO op_fldenv_A0(void)
2343 helper_fldenv(A0, PARAM1);
2346 void OPPROTO op_fnsave_A0(void)
2348 helper_fsave(A0, PARAM1);
2351 void OPPROTO op_frstor_A0(void)
2353 helper_frstor(A0, PARAM1);
2356 /* threading support */
2357 void OPPROTO op_lock(void)
2362 void OPPROTO op_unlock(void)
2368 static inline void memcpy16(void *d, void *s)
2370 ((uint32_t *)d)[0] = ((uint32_t *)s)[0];
2371 ((uint32_t *)d)[1] = ((uint32_t *)s)[1];
2372 ((uint32_t *)d)[2] = ((uint32_t *)s)[2];
2373 ((uint32_t *)d)[3] = ((uint32_t *)s)[3];
2376 void OPPROTO op_movo(void)
2378 /* XXX: badly generated code */
2380 d = (XMMReg *)((char *)env + PARAM1);
2381 s = (XMMReg *)((char *)env + PARAM2);
2385 void OPPROTO op_movq(void)
2388 d = (uint64_t *)((char *)env + PARAM1);
2389 s = (uint64_t *)((char *)env + PARAM2);
2393 void OPPROTO op_movl(void)
2396 d = (uint32_t *)((char *)env + PARAM1);
2397 s = (uint32_t *)((char *)env + PARAM2);
2401 void OPPROTO op_movq_env_0(void)
2404 d = (uint64_t *)((char *)env + PARAM1);
2408 void OPPROTO op_fxsave_A0(void)
2410 helper_fxsave(A0, PARAM1);
2413 void OPPROTO op_fxrstor_A0(void)
2415 helper_fxrstor(A0, PARAM1);
2418 /* XXX: optimize by storing fptt and fptags in the static cpu state */
2419 void OPPROTO op_enter_mmx(void)
2422 *(uint32_t *)(env->fptags) = 0;
2423 *(uint32_t *)(env->fptags + 4) = 0;
2426 void OPPROTO op_emms(void)
2428 /* set to empty state */
2429 *(uint32_t *)(env->fptags) = 0x01010101;
2430 *(uint32_t *)(env->fptags + 4) = 0x01010101;
2434 #include "ops_sse.h"
2437 #include "ops_sse.h"