2 * i386 micro operations (included several times to generate
3 * different operand sizes)
5 * Copyright (c) 2003 Fabrice Bellard
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #define DATA_BITS (1 << (3 + SHIFT))
23 #define SHIFT_MASK (DATA_BITS - 1)
24 #define SIGN_MASK (1 << (DATA_BITS - 1))
28 #define DATA_TYPE uint8_t
29 #define DATA_STYPE int8_t
30 #define DATA_MASK 0xff
33 #define DATA_TYPE uint16_t
34 #define DATA_STYPE int16_t
35 #define DATA_MASK 0xffff
38 #define DATA_TYPE uint32_t
39 #define DATA_STYPE int32_t
40 #define DATA_MASK 0xffffffff
42 #error unhandled operand size
45 /* dynamic flags computation */
47 static int glue(compute_all_add, SUFFIX)(void)
49 int cf, pf, af, zf, sf, of;
52 src2 = CC_DST - CC_SRC;
53 cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
54 pf = parity_table[(uint8_t)CC_DST];
55 af = (CC_DST ^ src1 ^ src2) & 0x10;
56 zf = ((DATA_TYPE)CC_DST == 0) << 6;
57 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
58 of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
59 return cf | pf | af | zf | sf | of;
62 static int glue(compute_c_add, SUFFIX)(void)
66 cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
70 static int glue(compute_all_adc, SUFFIX)(void)
72 int cf, pf, af, zf, sf, of;
75 src2 = CC_DST - CC_SRC - 1;
76 cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
77 pf = parity_table[(uint8_t)CC_DST];
78 af = (CC_DST ^ src1 ^ src2) & 0x10;
79 zf = ((DATA_TYPE)CC_DST == 0) << 6;
80 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
81 of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
82 return cf | pf | af | zf | sf | of;
85 static int glue(compute_c_adc, SUFFIX)(void)
89 cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
93 static int glue(compute_all_sub, SUFFIX)(void)
95 int cf, pf, af, zf, sf, of;
98 src2 = CC_SRC - CC_DST;
99 cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
100 pf = parity_table[(uint8_t)CC_DST];
101 af = (CC_DST ^ src1 ^ src2) & 0x10;
102 zf = ((DATA_TYPE)CC_DST == 0) << 6;
103 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
104 of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
105 return cf | pf | af | zf | sf | of;
108 static int glue(compute_c_sub, SUFFIX)(void)
112 src2 = CC_SRC - CC_DST;
113 cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
117 static int glue(compute_all_sbb, SUFFIX)(void)
119 int cf, pf, af, zf, sf, of;
122 src2 = CC_SRC - CC_DST - 1;
123 cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
124 pf = parity_table[(uint8_t)CC_DST];
125 af = (CC_DST ^ src1 ^ src2) & 0x10;
126 zf = ((DATA_TYPE)CC_DST == 0) << 6;
127 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
128 of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
129 return cf | pf | af | zf | sf | of;
132 static int glue(compute_c_sbb, SUFFIX)(void)
136 src2 = CC_SRC - CC_DST - 1;
137 cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
141 static int glue(compute_all_logic, SUFFIX)(void)
143 int cf, pf, af, zf, sf, of;
145 pf = parity_table[(uint8_t)CC_DST];
147 zf = ((DATA_TYPE)CC_DST == 0) << 6;
148 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
150 return cf | pf | af | zf | sf | of;
153 static int glue(compute_c_logic, SUFFIX)(void)
158 static int glue(compute_all_inc, SUFFIX)(void)
160 int cf, pf, af, zf, sf, of;
165 pf = parity_table[(uint8_t)CC_DST];
166 af = (CC_DST ^ src1 ^ src2) & 0x10;
167 zf = ((DATA_TYPE)CC_DST == 0) << 6;
168 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
169 of = ((CC_DST & DATA_MASK) == SIGN_MASK) << 11;
170 return cf | pf | af | zf | sf | of;
174 static int glue(compute_c_inc, SUFFIX)(void)
180 static int glue(compute_all_dec, SUFFIX)(void)
182 int cf, pf, af, zf, sf, of;
187 pf = parity_table[(uint8_t)CC_DST];
188 af = (CC_DST ^ src1 ^ src2) & 0x10;
189 zf = ((DATA_TYPE)CC_DST == 0) << 6;
190 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
191 of = ((CC_DST & DATA_MASK) == ((uint32_t)SIGN_MASK - 1)) << 11;
192 return cf | pf | af | zf | sf | of;
195 static int glue(compute_all_shl, SUFFIX)(void)
197 int cf, pf, af, zf, sf, of;
198 cf = (CC_SRC >> (DATA_BITS - 1)) & CC_C;
199 pf = parity_table[(uint8_t)CC_DST];
200 af = 0; /* undefined */
201 zf = ((DATA_TYPE)CC_DST == 0) << 6;
202 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
203 /* of is defined if shift count == 1 */
204 of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O;
205 return cf | pf | af | zf | sf | of;
209 static int glue(compute_c_shl, SUFFIX)(void)
215 static int glue(compute_all_sar, SUFFIX)(void)
217 int cf, pf, af, zf, sf, of;
219 pf = parity_table[(uint8_t)CC_DST];
220 af = 0; /* undefined */
221 zf = ((DATA_TYPE)CC_DST == 0) << 6;
222 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
223 /* of is defined if shift count == 1 */
224 of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O;
225 return cf | pf | af | zf | sf | of;
228 /* various optimized jumps cases */
230 void OPPROTO glue(op_jb_sub, SUFFIX)(void)
234 src2 = CC_SRC - CC_DST;
236 if ((DATA_TYPE)src1 < (DATA_TYPE)src2)
243 void OPPROTO glue(op_jz_sub, SUFFIX)(void)
245 if ((DATA_TYPE)CC_DST == 0)
252 void OPPROTO glue(op_jbe_sub, SUFFIX)(void)
256 src2 = CC_SRC - CC_DST;
258 if ((DATA_TYPE)src1 <= (DATA_TYPE)src2)
265 void OPPROTO glue(op_js_sub, SUFFIX)(void)
267 if (CC_DST & SIGN_MASK)
274 void OPPROTO glue(op_jl_sub, SUFFIX)(void)
278 src2 = CC_SRC - CC_DST;
280 if ((DATA_STYPE)src1 < (DATA_STYPE)src2)
287 void OPPROTO glue(op_jle_sub, SUFFIX)(void)
291 src2 = CC_SRC - CC_DST;
293 if ((DATA_STYPE)src1 <= (DATA_STYPE)src2)
304 void OPPROTO glue(op_loopnz, SUFFIX)(void)
308 eflags = cc_table[CC_OP].compute_all();
309 tmp = (ECX - 1) & DATA_MASK;
310 ECX = (ECX & ~DATA_MASK) | tmp;
311 if (tmp != 0 && !(eflags & CC_Z))
318 void OPPROTO glue(op_loopz, SUFFIX)(void)
322 eflags = cc_table[CC_OP].compute_all();
323 tmp = (ECX - 1) & DATA_MASK;
324 ECX = (ECX & ~DATA_MASK) | tmp;
325 if (tmp != 0 && (eflags & CC_Z))
332 void OPPROTO glue(op_loop, SUFFIX)(void)
335 tmp = (ECX - 1) & DATA_MASK;
336 ECX = (ECX & ~DATA_MASK) | tmp;
344 void OPPROTO glue(op_jecxz, SUFFIX)(void)
346 if ((DATA_TYPE)ECX == 0)
355 /* various optimized set cases */
357 void OPPROTO glue(op_setb_T0_sub, SUFFIX)(void)
361 src2 = CC_SRC - CC_DST;
363 T0 = ((DATA_TYPE)src1 < (DATA_TYPE)src2);
366 void OPPROTO glue(op_setz_T0_sub, SUFFIX)(void)
368 T0 = ((DATA_TYPE)CC_DST == 0);
371 void OPPROTO glue(op_setbe_T0_sub, SUFFIX)(void)
375 src2 = CC_SRC - CC_DST;
377 T0 = ((DATA_TYPE)src1 <= (DATA_TYPE)src2);
380 void OPPROTO glue(op_sets_T0_sub, SUFFIX)(void)
382 T0 = lshift(CC_DST, -(DATA_BITS - 1)) & 1;
385 void OPPROTO glue(op_setl_T0_sub, SUFFIX)(void)
389 src2 = CC_SRC - CC_DST;
391 T0 = ((DATA_STYPE)src1 < (DATA_STYPE)src2);
394 void OPPROTO glue(op_setle_T0_sub, SUFFIX)(void)
398 src2 = CC_SRC - CC_DST;
400 T0 = ((DATA_STYPE)src1 <= (DATA_STYPE)src2);
405 void OPPROTO glue(glue(op_rol, SUFFIX), _T0_T1_cc)(void)
408 count = T1 & SHIFT_MASK;
410 CC_SRC = cc_table[CC_OP].compute_all() & ~(CC_O | CC_C);
413 T0 = (T0 << count) | (T0 >> (DATA_BITS - count));
414 CC_SRC |= (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) |
416 CC_OP = CC_OP_EFLAGS;
421 void OPPROTO glue(glue(op_rol, SUFFIX), _T0_T1)(void)
424 count = T1 & SHIFT_MASK;
427 T0 = (T0 << count) | (T0 >> (DATA_BITS - count));
432 void OPPROTO glue(glue(op_ror, SUFFIX), _T0_T1_cc)(void)
435 count = T1 & SHIFT_MASK;
437 CC_SRC = cc_table[CC_OP].compute_all() & ~(CC_O | CC_C);
440 T0 = (T0 >> count) | (T0 << (DATA_BITS - count));
441 CC_SRC |= (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) |
442 ((T0 >> (DATA_BITS - 1)) & CC_C);
443 CC_OP = CC_OP_EFLAGS;
448 void OPPROTO glue(glue(op_ror, SUFFIX), _T0_T1)(void)
451 count = T1 & SHIFT_MASK;
454 T0 = (T0 >> count) | (T0 << (DATA_BITS - count));
459 void OPPROTO glue(glue(op_rcl, SUFFIX), _T0_T1_cc)(void)
461 int count, res, eflags;
466 count = rclw_table[count];
468 count = rclb_table[count];
471 eflags = cc_table[CC_OP].compute_all();
474 res = (T0 << count) | ((eflags & CC_C) << (count - 1));
476 res |= T0 >> (DATA_BITS + 1 - count);
478 CC_SRC = (eflags & ~(CC_C | CC_O)) |
479 (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) |
480 ((src >> (DATA_BITS - count)) & CC_C);
481 CC_OP = CC_OP_EFLAGS;
486 void OPPROTO glue(glue(op_rcr, SUFFIX), _T0_T1_cc)(void)
488 int count, res, eflags;
493 count = rclw_table[count];
495 count = rclb_table[count];
498 eflags = cc_table[CC_OP].compute_all();
501 res = (T0 >> count) | ((eflags & CC_C) << (DATA_BITS - count));
503 res |= T0 << (DATA_BITS + 1 - count);
505 CC_SRC = (eflags & ~(CC_C | CC_O)) |
506 (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) |
507 ((src >> (count - 1)) & CC_C);
508 CC_OP = CC_OP_EFLAGS;
513 void OPPROTO glue(glue(op_shl, SUFFIX), _T0_T1_cc)(void)
518 CC_SRC = (DATA_TYPE)T0 << (count - 1);
521 CC_OP = CC_OP_SHLB + SHIFT;
526 void OPPROTO glue(glue(op_shl, SUFFIX), _T0_T1)(void)
534 void OPPROTO glue(glue(op_shr, SUFFIX), _T0_T1_cc)(void)
540 CC_SRC = T0 >> (count - 1);
543 CC_OP = CC_OP_SARB + SHIFT;
548 void OPPROTO glue(glue(op_shr, SUFFIX), _T0_T1)(void)
557 void OPPROTO glue(glue(op_sar, SUFFIX), _T0_T1_cc)(void)
562 src = (DATA_STYPE)T0;
563 CC_SRC = src >> (count - 1);
566 CC_OP = CC_OP_SARB + SHIFT;
571 void OPPROTO glue(glue(op_sar, SUFFIX), _T0_T1)(void)
575 src = (DATA_STYPE)T0;
581 /* XXX: overflow flag might be incorrect in some cases in shldw */
582 void OPPROTO glue(glue(op_shld, SUFFIX), _T0_T1_im_cc)(void)
588 res = T1 | (T0 << 16);
589 CC_SRC = res >> (32 - count);
592 res |= T1 << (count - 16);
597 void OPPROTO glue(glue(op_shld, SUFFIX), _T0_T1_ECX_cc)(void)
604 res = T1 | (T0 << 16);
605 CC_SRC = res >> (32 - count);
608 res |= T1 << (count - 16);
611 CC_OP = CC_OP_SARB + SHIFT;
615 void OPPROTO glue(glue(op_shrd, SUFFIX), _T0_T1_im_cc)(void)
621 res = (T0 & 0xffff) | (T1 << 16);
622 CC_SRC = res >> (count - 1);
625 res |= T1 << (32 - count);
631 void OPPROTO glue(glue(op_shrd, SUFFIX), _T0_T1_ECX_cc)(void)
638 res = (T0 & 0xffff) | (T1 << 16);
639 CC_SRC = res >> (count - 1);
642 res |= T1 << (32 - count);
645 CC_OP = CC_OP_SARB + SHIFT;
651 void OPPROTO glue(glue(op_shld, SUFFIX), _T0_T1_im_cc)(void)
657 CC_SRC = T0 << (count - 1);
658 T0 = (T0 << count) | (T1 >> (DATA_BITS - count));
662 void OPPROTO glue(glue(op_shld, SUFFIX), _T0_T1_ECX_cc)(void)
669 CC_SRC = T0 << (count - 1);
670 T0 = (T0 << count) | (T1 >> (DATA_BITS - count));
672 CC_OP = CC_OP_SHLB + SHIFT;
676 void OPPROTO glue(glue(op_shrd, SUFFIX), _T0_T1_im_cc)(void)
682 CC_SRC = T0 >> (count - 1);
683 T0 = (T0 >> count) | (T1 << (DATA_BITS - count));
688 void OPPROTO glue(glue(op_shrd, SUFFIX), _T0_T1_ECX_cc)(void)
695 CC_SRC = T0 >> (count - 1);
696 T0 = (T0 >> count) | (T1 << (DATA_BITS - count));
698 CC_OP = CC_OP_SARB + SHIFT;
703 /* carry add/sub (we only need to set CC_OP differently) */
705 void OPPROTO glue(glue(op_adc, SUFFIX), _T0_T1_cc)(void)
708 cf = cc_table[CC_OP].compute_c();
712 CC_OP = CC_OP_ADDB + SHIFT + cf * 3;
715 void OPPROTO glue(glue(op_sbb, SUFFIX), _T0_T1_cc)(void)
718 cf = cc_table[CC_OP].compute_c();
722 CC_OP = CC_OP_SUBB + SHIFT + cf * 3;
725 void OPPROTO glue(glue(op_cmpxchg, SUFFIX), _T0_T1_EAX_cc)(void)
729 if ((DATA_TYPE)CC_DST == 0) {
732 EAX = (EAX & ~DATA_MASK) | (T0 & DATA_MASK);
740 void OPPROTO glue(glue(op_bt, SUFFIX), _T0_T1_cc)(void)
743 count = T1 & SHIFT_MASK;
744 CC_SRC = T0 >> count;
747 void OPPROTO glue(glue(op_bts, SUFFIX), _T0_T1_cc)(void)
750 count = T1 & SHIFT_MASK;
751 CC_SRC = T0 >> count;
755 void OPPROTO glue(glue(op_btr, SUFFIX), _T0_T1_cc)(void)
758 count = T1 & SHIFT_MASK;
759 CC_SRC = T0 >> count;
763 void OPPROTO glue(glue(op_btc, SUFFIX), _T0_T1_cc)(void)
766 count = T1 & SHIFT_MASK;
767 CC_SRC = T0 >> count;
771 void OPPROTO glue(glue(op_bsf, SUFFIX), _T0_cc)(void)
774 res = T0 & DATA_MASK;
777 while ((res & 1) == 0) {
782 CC_DST = 1; /* ZF = 1 */
784 CC_DST = 0; /* ZF = 1 */
789 void OPPROTO glue(glue(op_bsr, SUFFIX), _T0_cc)(void)
792 res = T0 & DATA_MASK;
794 count = DATA_BITS - 1;
795 while ((res & SIGN_MASK) == 0) {
800 CC_DST = 1; /* ZF = 1 */
802 CC_DST = 0; /* ZF = 1 */
809 /* string operations */
810 /* XXX: maybe use lower level instructions to ease exception handling */
812 void OPPROTO glue(op_movs, SUFFIX)(void)
815 v = glue(ldu, SUFFIX)((void *)ESI);
816 glue(st, SUFFIX)((void *)EDI, v);
817 ESI += (DF << SHIFT);
818 EDI += (DF << SHIFT);
821 void OPPROTO glue(op_rep_movs, SUFFIX)(void)
826 v = glue(ldu, SUFFIX)((void *)ESI);
827 glue(st, SUFFIX)((void *)EDI, v);
835 void OPPROTO glue(op_stos, SUFFIX)(void)
837 glue(st, SUFFIX)((void *)EDI, EAX);
838 EDI += (DF << SHIFT);
841 void OPPROTO glue(op_rep_stos, SUFFIX)(void)
846 glue(st, SUFFIX)((void *)EDI, EAX);
853 void OPPROTO glue(op_lods, SUFFIX)(void)
856 v = glue(ldu, SUFFIX)((void *)ESI);
858 EAX = (EAX & ~0xff) | v;
860 EAX = (EAX & ~0xffff) | v;
864 ESI += (DF << SHIFT);
867 /* don't know if it is used */
868 void OPPROTO glue(op_rep_lods, SUFFIX)(void)
873 v = glue(ldu, SUFFIX)((void *)ESI);
875 EAX = (EAX & ~0xff) | v;
877 EAX = (EAX & ~0xffff) | v;
887 void OPPROTO glue(op_scas, SUFFIX)(void)
891 v = glue(ldu, SUFFIX)((void *)EDI);
892 EDI += (DF << SHIFT);
897 void OPPROTO glue(op_repz_scas, SUFFIX)(void)
902 /* NOTE: the flags are not modified if ECX == 0 */
903 v1 = EAX & DATA_MASK;
906 v2 = glue(ldu, SUFFIX)((void *)EDI);
914 CC_OP = CC_OP_SUBB + SHIFT;
919 void OPPROTO glue(op_repnz_scas, SUFFIX)(void)
924 /* NOTE: the flags are not modified if ECX == 0 */
925 v1 = EAX & DATA_MASK;
928 v2 = glue(ldu, SUFFIX)((void *)EDI);
936 CC_OP = CC_OP_SUBB + SHIFT;
941 void OPPROTO glue(op_cmps, SUFFIX)(void)
944 v1 = glue(ldu, SUFFIX)((void *)ESI);
945 v2 = glue(ldu, SUFFIX)((void *)EDI);
946 ESI += (DF << SHIFT);
947 EDI += (DF << SHIFT);
952 void OPPROTO glue(op_repz_cmps, SUFFIX)(void)
958 v1 = glue(ldu, SUFFIX)((void *)ESI);
959 v2 = glue(ldu, SUFFIX)((void *)EDI);
968 CC_OP = CC_OP_SUBB + SHIFT;
973 void OPPROTO glue(op_repnz_cmps, SUFFIX)(void)
979 v1 = glue(ldu, SUFFIX)((void *)ESI);
980 v2 = glue(ldu, SUFFIX)((void *)EDI);
989 CC_OP = CC_OP_SUBB + SHIFT;
996 void OPPROTO glue(op_outs, SUFFIX)(void)
1000 v = glue(ldu, SUFFIX)((void *)ESI);
1001 glue(cpu_x86_out, SUFFIX)(dx, v);
1002 ESI += (DF << SHIFT);
1005 void OPPROTO glue(op_rep_outs, SUFFIX)(void)
1008 inc = (DF << SHIFT);
1011 v = glue(ldu, SUFFIX)((void *)ESI);
1012 glue(cpu_x86_out, SUFFIX)(dx, v);
1019 void OPPROTO glue(op_ins, SUFFIX)(void)
1023 v = glue(cpu_x86_in, SUFFIX)(dx);
1024 glue(st, SUFFIX)((void *)EDI, v);
1025 EDI += (DF << SHIFT);
1028 void OPPROTO glue(op_rep_ins, SUFFIX)(void)
1031 inc = (DF << SHIFT);
1034 v = glue(cpu_x86_in, SUFFIX)(dx);
1035 glue(st, SUFFIX)((void *)EDI, v);
1036 EDI += (DF << SHIFT);
1042 void OPPROTO glue(glue(op_out, SUFFIX), _T0_T1)(void)
1044 glue(cpu_x86_out, SUFFIX)(T0 & 0xffff, T1 & DATA_MASK);
1047 void OPPROTO glue(glue(op_in, SUFFIX), _T0_T1)(void)
1049 T1 = glue(cpu_x86_in, SUFFIX)(T0 & 0xffff);