4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
21 #include "qemu/host-utils.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
26 #include "exec/cpu_ldst.h"
28 #include "exec/helper-proto.h"
29 #include "exec/helper-gen.h"
31 #include "trace-tcg.h"
35 #define PREFIX_REPZ 0x01
36 #define PREFIX_REPNZ 0x02
37 #define PREFIX_LOCK 0x04
38 #define PREFIX_DATA 0x08
39 #define PREFIX_ADR 0x10
40 #define PREFIX_VEX 0x20
43 #define CODE64(s) ((s)->code64)
44 #define REX_X(s) ((s)->rex_x)
45 #define REX_B(s) ((s)->rex_b)
60 /* For a switch indexed by MODRM, match all memory operands for a given OP. */
61 #define CASE_MODRM_MEM_OP(OP) \
62 case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
63 case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
64 case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
66 #define CASE_MODRM_OP(OP) \
67 case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
68 case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
69 case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
70 case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
72 //#define MACRO_TEST 1
74 /* global register indexes */
75 static TCGv_env cpu_env;
77 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
78 static TCGv_i32 cpu_cc_op;
79 static TCGv cpu_regs[CPU_NB_REGS];
80 static TCGv cpu_seg_base[6];
81 static TCGv_i64 cpu_bndl[4];
82 static TCGv_i64 cpu_bndu[4];
84 static TCGv cpu_T0, cpu_T1;
85 /* local register indexes (only used inside old micro ops) */
86 static TCGv cpu_tmp0, cpu_tmp4;
87 static TCGv_ptr cpu_ptr0, cpu_ptr1;
88 static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
89 static TCGv_i64 cpu_tmp1_i64;
91 #include "exec/gen-icount.h"
94 static int x86_64_hregs;
97 typedef struct DisasContext {
98 /* current insn context */
99 int override; /* -1 if no override */
103 target_ulong pc_start;
104 target_ulong pc; /* pc = eip + cs_base */
105 int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
106 static state change (stop translation) */
107 /* current block context */
108 target_ulong cs_base; /* base of CS segment */
109 int pe; /* protected mode */
110 int code32; /* 32 bit code segment */
112 int lma; /* long mode active */
113 int code64; /* 64 bit code segment */
116 int vex_l; /* vex vector length */
117 int vex_v; /* vex vvvv register, without 1's compliment. */
118 int ss32; /* 32 bit stack segment */
119 CCOp cc_op; /* current CC operation */
121 int addseg; /* non zero if either DS/ES/SS have a non zero base */
122 int f_st; /* currently unused */
123 int vm86; /* vm86 mode */
126 int tf; /* TF cpu flag */
127 int singlestep_enabled; /* "hardware" single step enabled */
128 int jmp_opt; /* use direct block chaining for direct jumps */
129 int repz_opt; /* optimize jumps within repz instructions */
130 int mem_index; /* select memory access functions */
131 uint64_t flags; /* all execution flags */
132 struct TranslationBlock *tb;
133 int popl_esp_hack; /* for correct popl with esp base handling */
134 int rip_offset; /* only used in x86_64, but left for simplicity */
136 int cpuid_ext_features;
137 int cpuid_ext2_features;
138 int cpuid_ext3_features;
139 int cpuid_7_0_ebx_features;
140 int cpuid_xsave_features;
143 static void gen_eob(DisasContext *s);
144 static void gen_jmp(DisasContext *s, target_ulong eip);
145 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
146 static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
148 /* i386 arith/logic operations */
168 OP_SHL1, /* undocumented */
184 /* I386 int registers */
185 OR_EAX, /* MUST be even numbered */
194 OR_TMP0 = 16, /* temporary operand register */
196 OR_A0, /* temporary register used when doing address evaluation */
206 /* Bit set if the global variable is live after setting CC_OP to X. */
207 static const uint8_t cc_op_live[CC_OP_NB] = {
208 [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
209 [CC_OP_EFLAGS] = USES_CC_SRC,
210 [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
211 [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
212 [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
213 [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
214 [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
215 [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
216 [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
217 [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
218 [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
219 [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
220 [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
221 [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
222 [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
223 [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
227 static void set_cc_op(DisasContext *s, CCOp op)
231 if (s->cc_op == op) {
235 /* Discard CC computation that will no longer be used. */
236 dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
237 if (dead & USES_CC_DST) {
238 tcg_gen_discard_tl(cpu_cc_dst);
240 if (dead & USES_CC_SRC) {
241 tcg_gen_discard_tl(cpu_cc_src);
243 if (dead & USES_CC_SRC2) {
244 tcg_gen_discard_tl(cpu_cc_src2);
246 if (dead & USES_CC_SRCT) {
247 tcg_gen_discard_tl(cpu_cc_srcT);
250 if (op == CC_OP_DYNAMIC) {
251 /* The DYNAMIC setting is translator only, and should never be
252 stored. Thus we always consider it clean. */
253 s->cc_op_dirty = false;
255 /* Discard any computed CC_OP value (see shifts). */
256 if (s->cc_op == CC_OP_DYNAMIC) {
257 tcg_gen_discard_i32(cpu_cc_op);
259 s->cc_op_dirty = true;
264 static void gen_update_cc_op(DisasContext *s)
266 if (s->cc_op_dirty) {
267 tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
268 s->cc_op_dirty = false;
274 #define NB_OP_SIZES 4
276 #else /* !TARGET_X86_64 */
278 #define NB_OP_SIZES 3
280 #endif /* !TARGET_X86_64 */
282 #if defined(HOST_WORDS_BIGENDIAN)
283 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
284 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
285 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
286 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
287 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
289 #define REG_B_OFFSET 0
290 #define REG_H_OFFSET 1
291 #define REG_W_OFFSET 0
292 #define REG_L_OFFSET 0
293 #define REG_LH_OFFSET 4
296 /* In instruction encodings for byte register accesses the
297 * register number usually indicates "low 8 bits of register N";
298 * however there are some special cases where N 4..7 indicates
299 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
300 * true for this special case, false otherwise.
302 static inline bool byte_reg_is_xH(int reg)
308 if (reg >= 8 || x86_64_hregs) {
315 /* Select the size of a push/pop operation. */
316 static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot)
319 return ot == MO_16 ? MO_16 : MO_64;
325 /* Select the size of the stack pointer. */
326 static inline TCGMemOp mo_stacksize(DisasContext *s)
328 return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
331 /* Select only size 64 else 32. Used for SSE operand sizes. */
332 static inline TCGMemOp mo_64_32(TCGMemOp ot)
335 return ot == MO_64 ? MO_64 : MO_32;
341 /* Select size 8 if lsb of B is clear, else OT. Used for decoding
342 byte vs word opcodes. */
343 static inline TCGMemOp mo_b_d(int b, TCGMemOp ot)
345 return b & 1 ? ot : MO_8;
348 /* Select size 8 if lsb of B is clear, else OT capped at 32.
349 Used for decoding operand size of port opcodes. */
350 static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
352 return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
355 static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
359 if (!byte_reg_is_xH(reg)) {
360 tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
362 tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
366 tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
369 /* For x86_64, this sets the higher half of register to zero.
370 For i386, this is equivalent to a mov. */
371 tcg_gen_ext32u_tl(cpu_regs[reg], t0);
375 tcg_gen_mov_tl(cpu_regs[reg], t0);
383 static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
385 if (ot == MO_8 && byte_reg_is_xH(reg)) {
386 tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
388 tcg_gen_mov_tl(t0, cpu_regs[reg]);
392 static void gen_add_A0_im(DisasContext *s, int val)
394 tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
396 tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
400 static inline void gen_op_jmp_v(TCGv dest)
402 tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
405 static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
407 tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
408 gen_op_mov_reg_v(size, reg, cpu_tmp0);
411 static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
413 tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T0);
414 gen_op_mov_reg_v(size, reg, cpu_tmp0);
417 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
419 tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
422 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
424 tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
427 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
430 gen_op_st_v(s, idx, cpu_T0, cpu_A0);
432 gen_op_mov_reg_v(idx, d, cpu_T0);
436 static inline void gen_jmp_im(target_ulong pc)
438 tcg_gen_movi_tl(cpu_tmp0, pc);
439 gen_op_jmp_v(cpu_tmp0);
442 /* Compute SEG:REG into A0. SEG is selected from the override segment
443 (OVR_SEG) and the default segment (DEF_SEG). OVR_SEG may be -1 to
444 indicate no override. */
445 static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
446 int def_seg, int ovr_seg)
452 tcg_gen_mov_tl(cpu_A0, a0);
459 if (ovr_seg < 0 && s->addseg) {
463 tcg_gen_ext32u_tl(cpu_A0, a0);
469 tcg_gen_ext16u_tl(cpu_A0, a0);
484 TCGv seg = cpu_seg_base[ovr_seg];
486 if (aflag == MO_64) {
487 tcg_gen_add_tl(cpu_A0, a0, seg);
488 } else if (CODE64(s)) {
489 tcg_gen_ext32u_tl(cpu_A0, a0);
490 tcg_gen_add_tl(cpu_A0, cpu_A0, seg);
492 tcg_gen_add_tl(cpu_A0, a0, seg);
493 tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
498 static inline void gen_string_movl_A0_ESI(DisasContext *s)
500 gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
503 static inline void gen_string_movl_A0_EDI(DisasContext *s)
505 gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
508 static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
510 tcg_gen_ld32s_tl(cpu_T0, cpu_env, offsetof(CPUX86State, df));
511 tcg_gen_shli_tl(cpu_T0, cpu_T0, ot);
514 static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
519 tcg_gen_ext8s_tl(dst, src);
521 tcg_gen_ext8u_tl(dst, src);
526 tcg_gen_ext16s_tl(dst, src);
528 tcg_gen_ext16u_tl(dst, src);
534 tcg_gen_ext32s_tl(dst, src);
536 tcg_gen_ext32u_tl(dst, src);
545 static void gen_extu(TCGMemOp ot, TCGv reg)
547 gen_ext_tl(reg, reg, ot, false);
550 static void gen_exts(TCGMemOp ot, TCGv reg)
552 gen_ext_tl(reg, reg, ot, true);
555 static inline void gen_op_jnz_ecx(TCGMemOp size, TCGLabel *label1)
557 tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
558 gen_extu(size, cpu_tmp0);
559 tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
562 static inline void gen_op_jz_ecx(TCGMemOp size, TCGLabel *label1)
564 tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
565 gen_extu(size, cpu_tmp0);
566 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
569 static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
573 gen_helper_inb(v, cpu_env, n);
576 gen_helper_inw(v, cpu_env, n);
579 gen_helper_inl(v, cpu_env, n);
586 static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n)
590 gen_helper_outb(cpu_env, v, n);
593 gen_helper_outw(cpu_env, v, n);
596 gen_helper_outl(cpu_env, v, n);
603 static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
606 target_ulong next_eip;
608 if (s->pe && (s->cpl > s->iopl || s->vm86)) {
609 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
612 gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
615 gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
618 gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
624 if(s->flags & HF_SVMI_MASK) {
627 svm_flags |= (1 << (4 + ot));
628 next_eip = s->pc - s->cs_base;
629 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
630 gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
631 tcg_const_i32(svm_flags),
632 tcg_const_i32(next_eip - cur_eip));
636 static inline void gen_movs(DisasContext *s, TCGMemOp ot)
638 gen_string_movl_A0_ESI(s);
639 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
640 gen_string_movl_A0_EDI(s);
641 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
642 gen_op_movl_T0_Dshift(ot);
643 gen_op_add_reg_T0(s->aflag, R_ESI);
644 gen_op_add_reg_T0(s->aflag, R_EDI);
647 static void gen_op_update1_cc(void)
649 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
652 static void gen_op_update2_cc(void)
654 tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
655 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
658 static void gen_op_update3_cc(TCGv reg)
660 tcg_gen_mov_tl(cpu_cc_src2, reg);
661 tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
662 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
665 static inline void gen_op_testl_T0_T1_cc(void)
667 tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
670 static void gen_op_update_neg_cc(void)
672 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
673 tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
674 tcg_gen_movi_tl(cpu_cc_srcT, 0);
677 /* compute all eflags to cc_src */
678 static void gen_compute_eflags(DisasContext *s)
680 TCGv zero, dst, src1, src2;
683 if (s->cc_op == CC_OP_EFLAGS) {
686 if (s->cc_op == CC_OP_CLR) {
687 tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
688 set_cc_op(s, CC_OP_EFLAGS);
697 /* Take care to not read values that are not live. */
698 live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
699 dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
701 zero = tcg_const_tl(0);
702 if (dead & USES_CC_DST) {
705 if (dead & USES_CC_SRC) {
708 if (dead & USES_CC_SRC2) {
714 gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
715 set_cc_op(s, CC_OP_EFLAGS);
722 typedef struct CCPrepare {
732 /* compute eflags.C to reg */
733 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
739 case CC_OP_SUBB ... CC_OP_SUBQ:
740 /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
741 size = s->cc_op - CC_OP_SUBB;
742 t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
743 /* If no temporary was used, be careful not to alias t1 and t0. */
744 t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg;
745 tcg_gen_mov_tl(t0, cpu_cc_srcT);
749 case CC_OP_ADDB ... CC_OP_ADDQ:
750 /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
751 size = s->cc_op - CC_OP_ADDB;
752 t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
753 t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
755 return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
756 .reg2 = t1, .mask = -1, .use_reg2 = true };
758 case CC_OP_LOGICB ... CC_OP_LOGICQ:
760 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
762 case CC_OP_INCB ... CC_OP_INCQ:
763 case CC_OP_DECB ... CC_OP_DECQ:
764 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
765 .mask = -1, .no_setcond = true };
767 case CC_OP_SHLB ... CC_OP_SHLQ:
768 /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
769 size = s->cc_op - CC_OP_SHLB;
770 shift = (8 << size) - 1;
771 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
772 .mask = (target_ulong)1 << shift };
774 case CC_OP_MULB ... CC_OP_MULQ:
775 return (CCPrepare) { .cond = TCG_COND_NE,
776 .reg = cpu_cc_src, .mask = -1 };
778 case CC_OP_BMILGB ... CC_OP_BMILGQ:
779 size = s->cc_op - CC_OP_BMILGB;
780 t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
781 return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
785 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
786 .mask = -1, .no_setcond = true };
789 case CC_OP_SARB ... CC_OP_SARQ:
791 return (CCPrepare) { .cond = TCG_COND_NE,
792 .reg = cpu_cc_src, .mask = CC_C };
795 /* The need to compute only C from CC_OP_DYNAMIC is important
796 in efficiently implementing e.g. INC at the start of a TB. */
798 gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
799 cpu_cc_src2, cpu_cc_op);
800 return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
801 .mask = -1, .no_setcond = true };
805 /* compute eflags.P to reg */
806 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
808 gen_compute_eflags(s);
809 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
813 /* compute eflags.S to reg */
814 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
818 gen_compute_eflags(s);
824 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
827 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
830 TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
831 TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
832 return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
837 /* compute eflags.O to reg */
838 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
843 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
844 .mask = -1, .no_setcond = true };
846 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
848 gen_compute_eflags(s);
849 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
854 /* compute eflags.Z to reg */
855 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
859 gen_compute_eflags(s);
865 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
868 return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
871 TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
872 TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
873 return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
878 /* perform a conditional store into register 'reg' according to jump opcode
879 value 'b'. In the fast case, T0 is guaranted not to be used. */
880 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
882 int inv, jcc_op, cond;
888 jcc_op = (b >> 1) & 7;
891 case CC_OP_SUBB ... CC_OP_SUBQ:
892 /* We optimize relational operators for the cmp/jcc case. */
893 size = s->cc_op - CC_OP_SUBB;
896 tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
897 gen_extu(size, cpu_tmp4);
898 t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
899 cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
900 .reg2 = t0, .mask = -1, .use_reg2 = true };
909 tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
910 gen_exts(size, cpu_tmp4);
911 t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
912 cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
913 .reg2 = t0, .mask = -1, .use_reg2 = true };
923 /* This actually generates good code for JC, JZ and JS. */
926 cc = gen_prepare_eflags_o(s, reg);
929 cc = gen_prepare_eflags_c(s, reg);
932 cc = gen_prepare_eflags_z(s, reg);
935 gen_compute_eflags(s);
936 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
937 .mask = CC_Z | CC_C };
940 cc = gen_prepare_eflags_s(s, reg);
943 cc = gen_prepare_eflags_p(s, reg);
946 gen_compute_eflags(s);
947 if (TCGV_EQUAL(reg, cpu_cc_src)) {
950 tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
951 tcg_gen_xor_tl(reg, reg, cpu_cc_src);
952 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
957 gen_compute_eflags(s);
958 if (TCGV_EQUAL(reg, cpu_cc_src)) {
961 tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
962 tcg_gen_xor_tl(reg, reg, cpu_cc_src);
963 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
964 .mask = CC_S | CC_Z };
971 cc.cond = tcg_invert_cond(cc.cond);
976 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
978 CCPrepare cc = gen_prepare_cc(s, b, reg);
981 if (cc.cond == TCG_COND_EQ) {
982 tcg_gen_xori_tl(reg, cc.reg, 1);
984 tcg_gen_mov_tl(reg, cc.reg);
989 if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
990 cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
991 tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
992 tcg_gen_andi_tl(reg, reg, 1);
996 tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1000 tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1002 tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1006 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1008 gen_setcc1(s, JCC_B << 1, reg);
1011 /* generate a conditional jump to label 'l1' according to jump opcode
1012 value 'b'. In the fast case, T0 is guaranted not to be used. */
1013 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1015 CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1017 if (cc.mask != -1) {
1018 tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1022 tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1024 tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1028 /* Generate a conditional jump to label 'l1' according to jump opcode
1029 value 'b'. In the fast case, T0 is guaranted not to be used.
1030 A translation block must end soon. */
1031 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1033 CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1035 gen_update_cc_op(s);
1036 if (cc.mask != -1) {
1037 tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1040 set_cc_op(s, CC_OP_DYNAMIC);
1042 tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1044 tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1048 /* XXX: does not work with gdbstub "ice" single step - not a
1050 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1052 TCGLabel *l1 = gen_new_label();
1053 TCGLabel *l2 = gen_new_label();
1054 gen_op_jnz_ecx(s->aflag, l1);
1056 gen_jmp_tb(s, next_eip, 1);
1061 static inline void gen_stos(DisasContext *s, TCGMemOp ot)
1063 gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
1064 gen_string_movl_A0_EDI(s);
1065 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1066 gen_op_movl_T0_Dshift(ot);
1067 gen_op_add_reg_T0(s->aflag, R_EDI);
1070 static inline void gen_lods(DisasContext *s, TCGMemOp ot)
1072 gen_string_movl_A0_ESI(s);
1073 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1074 gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
1075 gen_op_movl_T0_Dshift(ot);
1076 gen_op_add_reg_T0(s->aflag, R_ESI);
1079 static inline void gen_scas(DisasContext *s, TCGMemOp ot)
1081 gen_string_movl_A0_EDI(s);
1082 gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1083 gen_op(s, OP_CMPL, ot, R_EAX);
1084 gen_op_movl_T0_Dshift(ot);
1085 gen_op_add_reg_T0(s->aflag, R_EDI);
1088 static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
1090 gen_string_movl_A0_EDI(s);
1091 gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1092 gen_string_movl_A0_ESI(s);
1093 gen_op(s, OP_CMPL, ot, OR_TMP0);
1094 gen_op_movl_T0_Dshift(ot);
1095 gen_op_add_reg_T0(s->aflag, R_ESI);
1096 gen_op_add_reg_T0(s->aflag, R_EDI);
1099 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1101 if (s->flags & HF_IOBPT_MASK) {
1102 TCGv_i32 t_size = tcg_const_i32(1 << ot);
1103 TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1105 gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1106 tcg_temp_free_i32(t_size);
1107 tcg_temp_free(t_next);
1112 static inline void gen_ins(DisasContext *s, TCGMemOp ot)
1114 if (s->tb->cflags & CF_USE_ICOUNT) {
1117 gen_string_movl_A0_EDI(s);
1118 /* Note: we must do this dummy write first to be restartable in
1119 case of page fault. */
1120 tcg_gen_movi_tl(cpu_T0, 0);
1121 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1122 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1123 tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1124 gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
1125 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1126 gen_op_movl_T0_Dshift(ot);
1127 gen_op_add_reg_T0(s->aflag, R_EDI);
1128 gen_bpt_io(s, cpu_tmp2_i32, ot);
1129 if (s->tb->cflags & CF_USE_ICOUNT) {
1134 static inline void gen_outs(DisasContext *s, TCGMemOp ot)
1136 if (s->tb->cflags & CF_USE_ICOUNT) {
1139 gen_string_movl_A0_ESI(s);
1140 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1142 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1143 tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1144 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T0);
1145 gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
1146 gen_op_movl_T0_Dshift(ot);
1147 gen_op_add_reg_T0(s->aflag, R_ESI);
1148 gen_bpt_io(s, cpu_tmp2_i32, ot);
1149 if (s->tb->cflags & CF_USE_ICOUNT) {
1154 /* same method as Valgrind : we generate jumps to current or next
1156 #define GEN_REPZ(op) \
1157 static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot, \
1158 target_ulong cur_eip, target_ulong next_eip) \
1161 gen_update_cc_op(s); \
1162 l2 = gen_jz_ecx_string(s, next_eip); \
1163 gen_ ## op(s, ot); \
1164 gen_op_add_reg_im(s->aflag, R_ECX, -1); \
1165 /* a loop would cause two single step exceptions if ECX = 1 \
1166 before rep string_insn */ \
1168 gen_op_jz_ecx(s->aflag, l2); \
1169 gen_jmp(s, cur_eip); \
1172 #define GEN_REPZ2(op) \
1173 static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot, \
1174 target_ulong cur_eip, \
1175 target_ulong next_eip, \
1179 gen_update_cc_op(s); \
1180 l2 = gen_jz_ecx_string(s, next_eip); \
1181 gen_ ## op(s, ot); \
1182 gen_op_add_reg_im(s->aflag, R_ECX, -1); \
1183 gen_update_cc_op(s); \
1184 gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); \
1186 gen_op_jz_ecx(s->aflag, l2); \
1187 gen_jmp(s, cur_eip); \
1198 static void gen_helper_fp_arith_ST0_FT0(int op)
1202 gen_helper_fadd_ST0_FT0(cpu_env);
1205 gen_helper_fmul_ST0_FT0(cpu_env);
1208 gen_helper_fcom_ST0_FT0(cpu_env);
1211 gen_helper_fcom_ST0_FT0(cpu_env);
1214 gen_helper_fsub_ST0_FT0(cpu_env);
1217 gen_helper_fsubr_ST0_FT0(cpu_env);
1220 gen_helper_fdiv_ST0_FT0(cpu_env);
1223 gen_helper_fdivr_ST0_FT0(cpu_env);
1228 /* NOTE the exception in "r" op ordering */
1229 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1231 TCGv_i32 tmp = tcg_const_i32(opreg);
1234 gen_helper_fadd_STN_ST0(cpu_env, tmp);
1237 gen_helper_fmul_STN_ST0(cpu_env, tmp);
1240 gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1243 gen_helper_fsub_STN_ST0(cpu_env, tmp);
1246 gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1249 gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1254 /* if d == OR_TMP0, it means memory operand (address in A0) */
1255 static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
1258 gen_op_mov_v_reg(ot, cpu_T0, d);
1259 } else if (!(s1->prefix & PREFIX_LOCK)) {
1260 gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1264 gen_compute_eflags_c(s1, cpu_tmp4);
1265 if (s1->prefix & PREFIX_LOCK) {
1266 tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1);
1267 tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
1268 s1->mem_index, ot | MO_LE);
1270 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1271 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
1272 gen_op_st_rm_T0_A0(s1, ot, d);
1274 gen_op_update3_cc(cpu_tmp4);
1275 set_cc_op(s1, CC_OP_ADCB + ot);
1278 gen_compute_eflags_c(s1, cpu_tmp4);
1279 if (s1->prefix & PREFIX_LOCK) {
1280 tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4);
1281 tcg_gen_neg_tl(cpu_T0, cpu_T0);
1282 tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
1283 s1->mem_index, ot | MO_LE);
1285 tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1286 tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
1287 gen_op_st_rm_T0_A0(s1, ot, d);
1289 gen_op_update3_cc(cpu_tmp4);
1290 set_cc_op(s1, CC_OP_SBBB + ot);
1293 if (s1->prefix & PREFIX_LOCK) {
1294 tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1295 s1->mem_index, ot | MO_LE);
1297 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1298 gen_op_st_rm_T0_A0(s1, ot, d);
1300 gen_op_update2_cc();
1301 set_cc_op(s1, CC_OP_ADDB + ot);
1304 if (s1->prefix & PREFIX_LOCK) {
1305 tcg_gen_neg_tl(cpu_T0, cpu_T1);
1306 tcg_gen_atomic_fetch_add_tl(cpu_cc_srcT, cpu_A0, cpu_T0,
1307 s1->mem_index, ot | MO_LE);
1308 tcg_gen_sub_tl(cpu_T0, cpu_cc_srcT, cpu_T1);
1310 tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1311 tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1312 gen_op_st_rm_T0_A0(s1, ot, d);
1314 gen_op_update2_cc();
1315 set_cc_op(s1, CC_OP_SUBB + ot);
1319 if (s1->prefix & PREFIX_LOCK) {
1320 tcg_gen_atomic_and_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1321 s1->mem_index, ot | MO_LE);
1323 tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
1324 gen_op_st_rm_T0_A0(s1, ot, d);
1326 gen_op_update1_cc();
1327 set_cc_op(s1, CC_OP_LOGICB + ot);
1330 if (s1->prefix & PREFIX_LOCK) {
1331 tcg_gen_atomic_or_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1332 s1->mem_index, ot | MO_LE);
1334 tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1335 gen_op_st_rm_T0_A0(s1, ot, d);
1337 gen_op_update1_cc();
1338 set_cc_op(s1, CC_OP_LOGICB + ot);
1341 if (s1->prefix & PREFIX_LOCK) {
1342 tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
1343 s1->mem_index, ot | MO_LE);
1345 tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
1346 gen_op_st_rm_T0_A0(s1, ot, d);
1348 gen_op_update1_cc();
1349 set_cc_op(s1, CC_OP_LOGICB + ot);
1352 tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
1353 tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1354 tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
1355 set_cc_op(s1, CC_OP_SUBB + ot);
1360 /* if d == OR_TMP0, it means memory operand (address in A0) */
1361 static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
1363 if (s1->prefix & PREFIX_LOCK) {
1364 tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1);
1365 tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
1366 s1->mem_index, ot | MO_LE);
1369 gen_op_mov_v_reg(ot, cpu_T0, d);
1371 gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1373 tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1));
1374 gen_op_st_rm_T0_A0(s1, ot, d);
1377 gen_compute_eflags_c(s1, cpu_cc_src);
1378 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1379 set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1382 static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
1383 TCGv shm1, TCGv count, bool is_right)
1385 TCGv_i32 z32, s32, oldop;
1388 /* Store the results into the CC variables. If we know that the
1389 variable must be dead, store unconditionally. Otherwise we'll
1390 need to not disrupt the current contents. */
1391 z_tl = tcg_const_tl(0);
1392 if (cc_op_live[s->cc_op] & USES_CC_DST) {
1393 tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1394 result, cpu_cc_dst);
1396 tcg_gen_mov_tl(cpu_cc_dst, result);
1398 if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1399 tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1402 tcg_gen_mov_tl(cpu_cc_src, shm1);
1404 tcg_temp_free(z_tl);
1406 /* Get the two potential CC_OP values into temporaries. */
1407 tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1408 if (s->cc_op == CC_OP_DYNAMIC) {
1411 tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
1412 oldop = cpu_tmp3_i32;
1415 /* Conditionally store the CC_OP value. */
1416 z32 = tcg_const_i32(0);
1417 s32 = tcg_temp_new_i32();
1418 tcg_gen_trunc_tl_i32(s32, count);
1419 tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
1420 tcg_temp_free_i32(z32);
1421 tcg_temp_free_i32(s32);
1423 /* The CC_OP value is no longer predictable. */
1424 set_cc_op(s, CC_OP_DYNAMIC);
1427 static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1428 int is_right, int is_arith)
1430 target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1433 if (op1 == OR_TMP0) {
1434 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1436 gen_op_mov_v_reg(ot, cpu_T0, op1);
1439 tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1440 tcg_gen_subi_tl(cpu_tmp0, cpu_T1, 1);
1444 gen_exts(ot, cpu_T0);
1445 tcg_gen_sar_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1446 tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
1448 gen_extu(ot, cpu_T0);
1449 tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1450 tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
1453 tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1454 tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
1458 gen_op_st_rm_T0_A0(s, ot, op1);
1460 gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, cpu_T1, is_right);
1463 static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1464 int is_right, int is_arith)
1466 int mask = (ot == MO_64 ? 0x3f : 0x1f);
1470 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1472 gen_op_mov_v_reg(ot, cpu_T0, op1);
1478 gen_exts(ot, cpu_T0);
1479 tcg_gen_sari_tl(cpu_tmp4, cpu_T0, op2 - 1);
1480 tcg_gen_sari_tl(cpu_T0, cpu_T0, op2);
1482 gen_extu(ot, cpu_T0);
1483 tcg_gen_shri_tl(cpu_tmp4, cpu_T0, op2 - 1);
1484 tcg_gen_shri_tl(cpu_T0, cpu_T0, op2);
1487 tcg_gen_shli_tl(cpu_tmp4, cpu_T0, op2 - 1);
1488 tcg_gen_shli_tl(cpu_T0, cpu_T0, op2);
1493 gen_op_st_rm_T0_A0(s, ot, op1);
1495 /* update eflags if non zero shift */
1497 tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
1498 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1499 set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1503 static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
1505 target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1509 if (op1 == OR_TMP0) {
1510 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1512 gen_op_mov_v_reg(ot, cpu_T0, op1);
1515 tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1519 /* Replicate the 8-bit input so that a 32-bit rotate works. */
1520 tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
1521 tcg_gen_muli_tl(cpu_T0, cpu_T0, 0x01010101);
1524 /* Replicate the 16-bit input so that a 32-bit rotate works. */
1525 tcg_gen_deposit_tl(cpu_T0, cpu_T0, cpu_T0, 16, 16);
1528 #ifdef TARGET_X86_64
1530 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1531 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
1533 tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1535 tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1537 tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1542 tcg_gen_rotr_tl(cpu_T0, cpu_T0, cpu_T1);
1544 tcg_gen_rotl_tl(cpu_T0, cpu_T0, cpu_T1);
1550 gen_op_st_rm_T0_A0(s, ot, op1);
1552 /* We'll need the flags computed into CC_SRC. */
1553 gen_compute_eflags(s);
1555 /* The value that was "rotated out" is now present at the other end
1556 of the word. Compute C into CC_DST and O into CC_SRC2. Note that
1557 since we've computed the flags into CC_SRC, these variables are
1560 tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1561 tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1562 tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1564 tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1565 tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1567 tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1568 tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1570 /* Now conditionally store the new CC_OP value. If the shift count
1571 is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1572 Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1573 exactly as we computed above. */
1574 t0 = tcg_const_i32(0);
1575 t1 = tcg_temp_new_i32();
1576 tcg_gen_trunc_tl_i32(t1, cpu_T1);
1577 tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
1578 tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
1579 tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1580 cpu_tmp2_i32, cpu_tmp3_i32);
1581 tcg_temp_free_i32(t0);
1582 tcg_temp_free_i32(t1);
1584 /* The CC_OP value is no longer predictable. */
1585 set_cc_op(s, CC_OP_DYNAMIC);
1588 static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1591 int mask = (ot == MO_64 ? 0x3f : 0x1f);
1595 if (op1 == OR_TMP0) {
1596 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1598 gen_op_mov_v_reg(ot, cpu_T0, op1);
1604 #ifdef TARGET_X86_64
1606 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1608 tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1610 tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1612 tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1617 tcg_gen_rotri_tl(cpu_T0, cpu_T0, op2);
1619 tcg_gen_rotli_tl(cpu_T0, cpu_T0, op2);
1630 shift = mask + 1 - shift;
1632 gen_extu(ot, cpu_T0);
1633 tcg_gen_shli_tl(cpu_tmp0, cpu_T0, shift);
1634 tcg_gen_shri_tl(cpu_T0, cpu_T0, mask + 1 - shift);
1635 tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
1641 gen_op_st_rm_T0_A0(s, ot, op1);
1644 /* Compute the flags into CC_SRC. */
1645 gen_compute_eflags(s);
1647 /* The value that was "rotated out" is now present at the other end
1648 of the word. Compute C into CC_DST and O into CC_SRC2. Note that
1649 since we've computed the flags into CC_SRC, these variables are
1652 tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1653 tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1654 tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1656 tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1657 tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1659 tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1660 tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1661 set_cc_op(s, CC_OP_ADCOX);
1665 /* XXX: add faster immediate = 1 case */
1666 static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1669 gen_compute_eflags(s);
1670 assert(s->cc_op == CC_OP_EFLAGS);
1674 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1676 gen_op_mov_v_reg(ot, cpu_T0, op1);
1681 gen_helper_rcrb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1684 gen_helper_rcrw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1687 gen_helper_rcrl(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1689 #ifdef TARGET_X86_64
1691 gen_helper_rcrq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1700 gen_helper_rclb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1703 gen_helper_rclw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1706 gen_helper_rcll(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1708 #ifdef TARGET_X86_64
1710 gen_helper_rclq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1718 gen_op_st_rm_T0_A0(s, ot, op1);
1721 /* XXX: add faster immediate case */
1722 static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1723 bool is_right, TCGv count_in)
1725 target_ulong mask = (ot == MO_64 ? 63 : 31);
1729 if (op1 == OR_TMP0) {
1730 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1732 gen_op_mov_v_reg(ot, cpu_T0, op1);
1735 count = tcg_temp_new();
1736 tcg_gen_andi_tl(count, count_in, mask);
1740 /* Note: we implement the Intel behaviour for shift count > 16.
1741 This means "shrdw C, B, A" shifts A:B:A >> C. Build the B:A
1742 portion by constructing it as a 32-bit value. */
1744 tcg_gen_deposit_tl(cpu_tmp0, cpu_T0, cpu_T1, 16, 16);
1745 tcg_gen_mov_tl(cpu_T1, cpu_T0);
1746 tcg_gen_mov_tl(cpu_T0, cpu_tmp0);
1748 tcg_gen_deposit_tl(cpu_T1, cpu_T0, cpu_T1, 16, 16);
1751 #ifdef TARGET_X86_64
1753 /* Concatenate the two 32-bit values and use a 64-bit shift. */
1754 tcg_gen_subi_tl(cpu_tmp0, count, 1);
1756 tcg_gen_concat_tl_i64(cpu_T0, cpu_T0, cpu_T1);
1757 tcg_gen_shr_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1758 tcg_gen_shr_i64(cpu_T0, cpu_T0, count);
1760 tcg_gen_concat_tl_i64(cpu_T0, cpu_T1, cpu_T0);
1761 tcg_gen_shl_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1762 tcg_gen_shl_i64(cpu_T0, cpu_T0, count);
1763 tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
1764 tcg_gen_shri_i64(cpu_T0, cpu_T0, 32);
1769 tcg_gen_subi_tl(cpu_tmp0, count, 1);
1771 tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1773 tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1774 tcg_gen_shr_tl(cpu_T0, cpu_T0, count);
1775 tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
1777 tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1779 /* Only needed if count > 16, for Intel behaviour. */
1780 tcg_gen_subfi_tl(cpu_tmp4, 33, count);
1781 tcg_gen_shr_tl(cpu_tmp4, cpu_T1, cpu_tmp4);
1782 tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
1785 tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1786 tcg_gen_shl_tl(cpu_T0, cpu_T0, count);
1787 tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
1789 tcg_gen_movi_tl(cpu_tmp4, 0);
1790 tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
1792 tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1797 gen_op_st_rm_T0_A0(s, ot, op1);
1799 gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, count, is_right);
1800 tcg_temp_free(count);
1803 static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
1806 gen_op_mov_v_reg(ot, cpu_T1, s);
1809 gen_rot_rm_T1(s1, ot, d, 0);
1812 gen_rot_rm_T1(s1, ot, d, 1);
1816 gen_shift_rm_T1(s1, ot, d, 0, 0);
1819 gen_shift_rm_T1(s1, ot, d, 1, 0);
1822 gen_shift_rm_T1(s1, ot, d, 1, 1);
1825 gen_rotc_rm_T1(s1, ot, d, 0);
1828 gen_rotc_rm_T1(s1, ot, d, 1);
1833 static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
1837 gen_rot_rm_im(s1, ot, d, c, 0);
1840 gen_rot_rm_im(s1, ot, d, c, 1);
1844 gen_shift_rm_im(s1, ot, d, c, 0, 0);
1847 gen_shift_rm_im(s1, ot, d, c, 1, 0);
1850 gen_shift_rm_im(s1, ot, d, c, 1, 1);
1853 /* currently not optimized */
1854 tcg_gen_movi_tl(cpu_T1, c);
1855 gen_shift(s1, op, ot, d, OR_TMP1);
1860 /* Decompose an address. */
1862 typedef struct AddressParts {
1870 static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1873 int def_seg, base, index, scale, mod, rm;
1882 mod = (modrm >> 6) & 3;
1884 base = rm | REX_B(s);
1887 /* Normally filtered out earlier, but including this path
1888 simplifies multi-byte nop, as well as bndcl, bndcu, bndcn. */
1897 int code = cpu_ldub_code(env, s->pc++);
1898 scale = (code >> 6) & 3;
1899 index = ((code >> 3) & 7) | REX_X(s);
1901 index = -1; /* no index */
1903 base = (code & 7) | REX_B(s);
1909 if ((base & 7) == 5) {
1911 disp = (int32_t)cpu_ldl_code(env, s->pc);
1913 if (CODE64(s) && !havesib) {
1915 disp += s->pc + s->rip_offset;
1920 disp = (int8_t)cpu_ldub_code(env, s->pc++);
1924 disp = (int32_t)cpu_ldl_code(env, s->pc);
1929 /* For correct popl handling with esp. */
1930 if (base == R_ESP && s->popl_esp_hack) {
1931 disp += s->popl_esp_hack;
1933 if (base == R_EBP || base == R_ESP) {
1942 disp = cpu_lduw_code(env, s->pc);
1946 } else if (mod == 1) {
1947 disp = (int8_t)cpu_ldub_code(env, s->pc++);
1949 disp = (int16_t)cpu_lduw_code(env, s->pc);
1994 return (AddressParts){ def_seg, base, index, scale, disp };
1997 /* Compute the address, with a minimum number of TCG ops. */
1998 static TCGv gen_lea_modrm_1(AddressParts a)
2005 ea = cpu_regs[a.index];
2007 tcg_gen_shli_tl(cpu_A0, cpu_regs[a.index], a.scale);
2011 tcg_gen_add_tl(cpu_A0, ea, cpu_regs[a.base]);
2014 } else if (a.base >= 0) {
2015 ea = cpu_regs[a.base];
2017 if (TCGV_IS_UNUSED(ea)) {
2018 tcg_gen_movi_tl(cpu_A0, a.disp);
2020 } else if (a.disp != 0) {
2021 tcg_gen_addi_tl(cpu_A0, ea, a.disp);
2028 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2030 AddressParts a = gen_lea_modrm_0(env, s, modrm);
2031 TCGv ea = gen_lea_modrm_1(a);
2032 gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2035 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2037 (void)gen_lea_modrm_0(env, s, modrm);
2040 /* Used for BNDCL, BNDCU, BNDCN. */
2041 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2042 TCGCond cond, TCGv_i64 bndv)
2044 TCGv ea = gen_lea_modrm_1(gen_lea_modrm_0(env, s, modrm));
2046 tcg_gen_extu_tl_i64(cpu_tmp1_i64, ea);
2048 tcg_gen_ext32u_i64(cpu_tmp1_i64, cpu_tmp1_i64);
2050 tcg_gen_setcond_i64(cond, cpu_tmp1_i64, cpu_tmp1_i64, bndv);
2051 tcg_gen_extrl_i64_i32(cpu_tmp2_i32, cpu_tmp1_i64);
2052 gen_helper_bndck(cpu_env, cpu_tmp2_i32);
2055 /* used for LEA and MOV AX, mem */
2056 static void gen_add_A0_ds_seg(DisasContext *s)
2058 gen_lea_v_seg(s, s->aflag, cpu_A0, R_DS, s->override);
2061 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2063 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2064 TCGMemOp ot, int reg, int is_store)
2068 mod = (modrm >> 6) & 3;
2069 rm = (modrm & 7) | REX_B(s);
2073 gen_op_mov_v_reg(ot, cpu_T0, reg);
2074 gen_op_mov_reg_v(ot, rm, cpu_T0);
2076 gen_op_mov_v_reg(ot, cpu_T0, rm);
2078 gen_op_mov_reg_v(ot, reg, cpu_T0);
2081 gen_lea_modrm(env, s, modrm);
2084 gen_op_mov_v_reg(ot, cpu_T0, reg);
2085 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
2087 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
2089 gen_op_mov_reg_v(ot, reg, cpu_T0);
2094 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot)
2100 ret = cpu_ldub_code(env, s->pc);
2104 ret = cpu_lduw_code(env, s->pc);
2108 #ifdef TARGET_X86_64
2111 ret = cpu_ldl_code(env, s->pc);
2120 static inline int insn_const_size(TCGMemOp ot)
2129 static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2131 #ifndef CONFIG_USER_ONLY
2132 return (pc & TARGET_PAGE_MASK) == (s->tb->pc & TARGET_PAGE_MASK) ||
2133 (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2139 static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2141 target_ulong pc = s->cs_base + eip;
2143 if (use_goto_tb(s, pc)) {
2144 /* jump to same page: we can use a direct jump */
2145 tcg_gen_goto_tb(tb_num);
2147 tcg_gen_exit_tb((uintptr_t)s->tb + tb_num);
2149 /* jump to another page: currently not optimized */
2155 static inline void gen_jcc(DisasContext *s, int b,
2156 target_ulong val, target_ulong next_eip)
2161 l1 = gen_new_label();
2164 gen_goto_tb(s, 0, next_eip);
2167 gen_goto_tb(s, 1, val);
2168 s->is_jmp = DISAS_TB_JUMP;
2170 l1 = gen_new_label();
2171 l2 = gen_new_label();
2174 gen_jmp_im(next_eip);
2184 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
2189 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2191 cc = gen_prepare_cc(s, b, cpu_T1);
2192 if (cc.mask != -1) {
2193 TCGv t0 = tcg_temp_new();
2194 tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2198 cc.reg2 = tcg_const_tl(cc.imm);
2201 tcg_gen_movcond_tl(cc.cond, cpu_T0, cc.reg, cc.reg2,
2202 cpu_T0, cpu_regs[reg]);
2203 gen_op_mov_reg_v(ot, reg, cpu_T0);
2205 if (cc.mask != -1) {
2206 tcg_temp_free(cc.reg);
2209 tcg_temp_free(cc.reg2);
2213 static inline void gen_op_movl_T0_seg(int seg_reg)
2215 tcg_gen_ld32u_tl(cpu_T0, cpu_env,
2216 offsetof(CPUX86State,segs[seg_reg].selector));
2219 static inline void gen_op_movl_seg_T0_vm(int seg_reg)
2221 tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
2222 tcg_gen_st32_tl(cpu_T0, cpu_env,
2223 offsetof(CPUX86State,segs[seg_reg].selector));
2224 tcg_gen_shli_tl(cpu_seg_base[seg_reg], cpu_T0, 4);
2227 /* move T0 to seg_reg and compute if the CPU state may change. Never
2228 call this function with seg_reg == R_CS */
2229 static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
2231 if (s->pe && !s->vm86) {
2232 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
2233 gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
2234 /* abort translation because the addseg value may change or
2235 because ss32 may change. For R_SS, translation must always
2236 stop as a special handling must be done to disable hardware
2237 interrupts for the next instruction */
2238 if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
2239 s->is_jmp = DISAS_TB_JUMP;
2241 gen_op_movl_seg_T0_vm(seg_reg);
2242 if (seg_reg == R_SS)
2243 s->is_jmp = DISAS_TB_JUMP;
2247 static inline int svm_is_rep(int prefixes)
2249 return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2253 gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2254 uint32_t type, uint64_t param)
2256 /* no SVM activated; fast case */
2257 if (likely(!(s->flags & HF_SVMI_MASK)))
2259 gen_update_cc_op(s);
2260 gen_jmp_im(pc_start - s->cs_base);
2261 gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2262 tcg_const_i64(param));
2266 gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2268 gen_svm_check_intercept_param(s, pc_start, type, 0);
2271 static inline void gen_stack_update(DisasContext *s, int addend)
2273 gen_op_add_reg_im(mo_stacksize(s), R_ESP, addend);
2276 /* Generate a push. It depends on ss32, addseg and dflag. */
2277 static void gen_push_v(DisasContext *s, TCGv val)
2279 TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2280 TCGMemOp a_ot = mo_stacksize(s);
2281 int size = 1 << d_ot;
2282 TCGv new_esp = cpu_A0;
2284 tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
2289 tcg_gen_mov_tl(new_esp, cpu_A0);
2291 gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2294 gen_op_st_v(s, d_ot, val, cpu_A0);
2295 gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
2298 /* two step pop is necessary for precise exceptions */
2299 static TCGMemOp gen_pop_T0(DisasContext *s)
2301 TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2303 gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2304 gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2309 static inline void gen_pop_update(DisasContext *s, TCGMemOp ot)
2311 gen_stack_update(s, 1 << ot);
2314 static inline void gen_stack_A0(DisasContext *s)
2316 gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2319 static void gen_pusha(DisasContext *s)
2321 TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2322 TCGMemOp d_ot = s->dflag;
2323 int size = 1 << d_ot;
2326 for (i = 0; i < 8; i++) {
2327 tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], (i - 8) * size);
2328 gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2329 gen_op_st_v(s, d_ot, cpu_regs[7 - i], cpu_A0);
2332 gen_stack_update(s, -8 * size);
2335 static void gen_popa(DisasContext *s)
2337 TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2338 TCGMemOp d_ot = s->dflag;
2339 int size = 1 << d_ot;
2342 for (i = 0; i < 8; i++) {
2343 /* ESP is not reloaded */
2344 if (7 - i == R_ESP) {
2347 tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], i * size);
2348 gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2349 gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2350 gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
2353 gen_stack_update(s, 8 * size);
2356 static void gen_enter(DisasContext *s, int esp_addend, int level)
2358 TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2359 TCGMemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2360 int size = 1 << d_ot;
2362 /* Push BP; compute FrameTemp into T1. */
2363 tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
2364 gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
2365 gen_op_st_v(s, d_ot, cpu_regs[R_EBP], cpu_A0);
2371 /* Copy level-1 pointers from the previous frame. */
2372 for (i = 1; i < level; ++i) {
2373 tcg_gen_subi_tl(cpu_A0, cpu_regs[R_EBP], size * i);
2374 gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2375 gen_op_ld_v(s, d_ot, cpu_tmp0, cpu_A0);
2377 tcg_gen_subi_tl(cpu_A0, cpu_T1, size * i);
2378 gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2379 gen_op_st_v(s, d_ot, cpu_tmp0, cpu_A0);
2382 /* Push the current FrameTemp as the last level. */
2383 tcg_gen_subi_tl(cpu_A0, cpu_T1, size * level);
2384 gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2385 gen_op_st_v(s, d_ot, cpu_T1, cpu_A0);
2388 /* Copy the FrameTemp value to EBP. */
2389 gen_op_mov_reg_v(a_ot, R_EBP, cpu_T1);
2391 /* Compute the final value of ESP. */
2392 tcg_gen_subi_tl(cpu_T1, cpu_T1, esp_addend + size * level);
2393 gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2396 static void gen_leave(DisasContext *s)
2398 TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2399 TCGMemOp a_ot = mo_stacksize(s);
2401 gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2402 gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2404 tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
2406 gen_op_mov_reg_v(d_ot, R_EBP, cpu_T0);
2407 gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2410 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
2412 gen_update_cc_op(s);
2413 gen_jmp_im(cur_eip);
2414 gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
2415 s->is_jmp = DISAS_TB_JUMP;
2418 /* Generate #UD for the current instruction. The assumption here is that
2419 the instruction is known, but it isn't allowed in the current cpu mode. */
2420 static void gen_illegal_opcode(DisasContext *s)
2422 gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
2425 /* Similarly, except that the assumption here is that we don't decode
2426 the instruction at all -- either a missing opcode, an unimplemented
2427 feature, or just a bogus instruction stream. */
2428 static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2430 gen_illegal_opcode(s);
2432 if (qemu_loglevel_mask(LOG_UNIMP)) {
2433 target_ulong pc = s->pc_start, end = s->pc;
2435 qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2436 for (; pc < end; ++pc) {
2437 qemu_log(" %02x", cpu_ldub_code(env, pc));
2444 /* an interrupt is different from an exception because of the
2446 static void gen_interrupt(DisasContext *s, int intno,
2447 target_ulong cur_eip, target_ulong next_eip)
2449 gen_update_cc_op(s);
2450 gen_jmp_im(cur_eip);
2451 gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2452 tcg_const_i32(next_eip - cur_eip));
2453 s->is_jmp = DISAS_TB_JUMP;
2456 static void gen_debug(DisasContext *s, target_ulong cur_eip)
2458 gen_update_cc_op(s);
2459 gen_jmp_im(cur_eip);
2460 gen_helper_debug(cpu_env);
2461 s->is_jmp = DISAS_TB_JUMP;
2464 static void gen_set_hflag(DisasContext *s, uint32_t mask)
2466 if ((s->flags & mask) == 0) {
2467 TCGv_i32 t = tcg_temp_new_i32();
2468 tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2469 tcg_gen_ori_i32(t, t, mask);
2470 tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2471 tcg_temp_free_i32(t);
2476 static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2478 if (s->flags & mask) {
2479 TCGv_i32 t = tcg_temp_new_i32();
2480 tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2481 tcg_gen_andi_i32(t, t, ~mask);
2482 tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2483 tcg_temp_free_i32(t);
2488 /* Clear BND registers during legacy branches. */
2489 static void gen_bnd_jmp(DisasContext *s)
2491 /* Clear the registers only if BND prefix is missing, MPX is enabled,
2492 and if the BNDREGs are known to be in use (non-zero) already.
2493 The helper itself will check BNDPRESERVE at runtime. */
2494 if ((s->prefix & PREFIX_REPNZ) == 0
2495 && (s->flags & HF_MPX_EN_MASK) != 0
2496 && (s->flags & HF_MPX_IU_MASK) != 0) {
2497 gen_helper_bnd_jmp(cpu_env);
2501 /* Generate an end of block. Trace exception is also generated if needed.
2502 If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2503 If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2504 S->TF. This is used by the syscall/sysret insns. */
2505 static void gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2507 gen_update_cc_op(s);
2509 /* If several instructions disable interrupts, only the first does it. */
2510 if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2511 gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2513 gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2516 if (s->tb->flags & HF_RF_MASK) {
2517 gen_helper_reset_rf(cpu_env);
2519 if (s->singlestep_enabled) {
2520 gen_helper_debug(cpu_env);
2521 } else if (recheck_tf) {
2522 gen_helper_rechecking_single_step(cpu_env);
2525 gen_helper_single_step(cpu_env);
2529 s->is_jmp = DISAS_TB_JUMP;
2533 If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set. */
2534 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2536 gen_eob_worker(s, inhibit, false);
2539 /* End of block, resetting the inhibit irq flag. */
2540 static void gen_eob(DisasContext *s)
2542 gen_eob_worker(s, false, false);
2545 /* generate a jump to eip. No segment change must happen before as a
2546 direct call to the next block may occur */
2547 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2549 gen_update_cc_op(s);
2550 set_cc_op(s, CC_OP_DYNAMIC);
2552 gen_goto_tb(s, tb_num, eip);
2553 s->is_jmp = DISAS_TB_JUMP;
2560 static void gen_jmp(DisasContext *s, target_ulong eip)
2562 gen_jmp_tb(s, eip, 0);
2565 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2567 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2568 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
2571 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2573 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
2574 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2577 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2579 int mem_index = s->mem_index;
2580 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2581 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2582 tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2583 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2584 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2587 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2589 int mem_index = s->mem_index;
2590 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2591 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2592 tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2593 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2594 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2597 static inline void gen_op_movo(int d_offset, int s_offset)
2599 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2600 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2601 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2602 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2605 static inline void gen_op_movq(int d_offset, int s_offset)
2607 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2608 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2611 static inline void gen_op_movl(int d_offset, int s_offset)
2613 tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
2614 tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
2617 static inline void gen_op_movq_env_0(int d_offset)
2619 tcg_gen_movi_i64(cpu_tmp1_i64, 0);
2620 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2623 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2624 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2625 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2626 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2627 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2628 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2630 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2631 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2634 #define SSE_SPECIAL ((void *)1)
2635 #define SSE_DUMMY ((void *)2)
2637 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2638 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2639 gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2641 static const SSEFunc_0_epp sse_op_table1[256][4] = {
2642 /* 3DNow! extensions */
2643 [0x0e] = { SSE_DUMMY }, /* femms */
2644 [0x0f] = { SSE_DUMMY }, /* pf... */
2645 /* pure SSE operations */
2646 [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2647 [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2648 [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2649 [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */
2650 [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2651 [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2652 [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd, movshdup */
2653 [0x17] = { SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd */
2655 [0x28] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */
2656 [0x29] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */
2657 [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2658 [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2659 [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2660 [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2661 [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2662 [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2663 [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2664 [0x51] = SSE_FOP(sqrt),
2665 [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2666 [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2667 [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2668 [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2669 [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2670 [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2671 [0x58] = SSE_FOP(add),
2672 [0x59] = SSE_FOP(mul),
2673 [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2674 gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2675 [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2676 [0x5c] = SSE_FOP(sub),
2677 [0x5d] = SSE_FOP(min),
2678 [0x5e] = SSE_FOP(div),
2679 [0x5f] = SSE_FOP(max),
2681 [0xc2] = SSE_FOP(cmpeq),
2682 [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2683 (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2685 /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX. */
2686 [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2687 [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2689 /* MMX ops and their SSE extensions */
2690 [0x60] = MMX_OP2(punpcklbw),
2691 [0x61] = MMX_OP2(punpcklwd),
2692 [0x62] = MMX_OP2(punpckldq),
2693 [0x63] = MMX_OP2(packsswb),
2694 [0x64] = MMX_OP2(pcmpgtb),
2695 [0x65] = MMX_OP2(pcmpgtw),
2696 [0x66] = MMX_OP2(pcmpgtl),
2697 [0x67] = MMX_OP2(packuswb),
2698 [0x68] = MMX_OP2(punpckhbw),
2699 [0x69] = MMX_OP2(punpckhwd),
2700 [0x6a] = MMX_OP2(punpckhdq),
2701 [0x6b] = MMX_OP2(packssdw),
2702 [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2703 [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2704 [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2705 [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2706 [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2707 (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2708 (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2709 (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2710 [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2711 [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2712 [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2713 [0x74] = MMX_OP2(pcmpeqb),
2714 [0x75] = MMX_OP2(pcmpeqw),
2715 [0x76] = MMX_OP2(pcmpeql),
2716 [0x77] = { SSE_DUMMY }, /* emms */
2717 [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2718 [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2719 [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2720 [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2721 [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2722 [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2723 [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2724 [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2725 [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2726 [0xd1] = MMX_OP2(psrlw),
2727 [0xd2] = MMX_OP2(psrld),
2728 [0xd3] = MMX_OP2(psrlq),
2729 [0xd4] = MMX_OP2(paddq),
2730 [0xd5] = MMX_OP2(pmullw),
2731 [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2732 [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2733 [0xd8] = MMX_OP2(psubusb),
2734 [0xd9] = MMX_OP2(psubusw),
2735 [0xda] = MMX_OP2(pminub),
2736 [0xdb] = MMX_OP2(pand),
2737 [0xdc] = MMX_OP2(paddusb),
2738 [0xdd] = MMX_OP2(paddusw),
2739 [0xde] = MMX_OP2(pmaxub),
2740 [0xdf] = MMX_OP2(pandn),
2741 [0xe0] = MMX_OP2(pavgb),
2742 [0xe1] = MMX_OP2(psraw),
2743 [0xe2] = MMX_OP2(psrad),
2744 [0xe3] = MMX_OP2(pavgw),
2745 [0xe4] = MMX_OP2(pmulhuw),
2746 [0xe5] = MMX_OP2(pmulhw),
2747 [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2748 [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */
2749 [0xe8] = MMX_OP2(psubsb),
2750 [0xe9] = MMX_OP2(psubsw),
2751 [0xea] = MMX_OP2(pminsw),
2752 [0xeb] = MMX_OP2(por),
2753 [0xec] = MMX_OP2(paddsb),
2754 [0xed] = MMX_OP2(paddsw),
2755 [0xee] = MMX_OP2(pmaxsw),
2756 [0xef] = MMX_OP2(pxor),
2757 [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2758 [0xf1] = MMX_OP2(psllw),
2759 [0xf2] = MMX_OP2(pslld),
2760 [0xf3] = MMX_OP2(psllq),
2761 [0xf4] = MMX_OP2(pmuludq),
2762 [0xf5] = MMX_OP2(pmaddwd),
2763 [0xf6] = MMX_OP2(psadbw),
2764 [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2765 (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2766 [0xf8] = MMX_OP2(psubb),
2767 [0xf9] = MMX_OP2(psubw),
2768 [0xfa] = MMX_OP2(psubl),
2769 [0xfb] = MMX_OP2(psubq),
2770 [0xfc] = MMX_OP2(paddb),
2771 [0xfd] = MMX_OP2(paddw),
2772 [0xfe] = MMX_OP2(paddl),
2775 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2776 [0 + 2] = MMX_OP2(psrlw),
2777 [0 + 4] = MMX_OP2(psraw),
2778 [0 + 6] = MMX_OP2(psllw),
2779 [8 + 2] = MMX_OP2(psrld),
2780 [8 + 4] = MMX_OP2(psrad),
2781 [8 + 6] = MMX_OP2(pslld),
2782 [16 + 2] = MMX_OP2(psrlq),
2783 [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2784 [16 + 6] = MMX_OP2(psllq),
2785 [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2788 static const SSEFunc_0_epi sse_op_table3ai[] = {
2789 gen_helper_cvtsi2ss,
2793 #ifdef TARGET_X86_64
2794 static const SSEFunc_0_epl sse_op_table3aq[] = {
2795 gen_helper_cvtsq2ss,
2800 static const SSEFunc_i_ep sse_op_table3bi[] = {
2801 gen_helper_cvttss2si,
2802 gen_helper_cvtss2si,
2803 gen_helper_cvttsd2si,
2807 #ifdef TARGET_X86_64
2808 static const SSEFunc_l_ep sse_op_table3bq[] = {
2809 gen_helper_cvttss2sq,
2810 gen_helper_cvtss2sq,
2811 gen_helper_cvttsd2sq,
2816 static const SSEFunc_0_epp sse_op_table4[8][4] = {
2827 static const SSEFunc_0_epp sse_op_table5[256] = {
2828 [0x0c] = gen_helper_pi2fw,
2829 [0x0d] = gen_helper_pi2fd,
2830 [0x1c] = gen_helper_pf2iw,
2831 [0x1d] = gen_helper_pf2id,
2832 [0x8a] = gen_helper_pfnacc,
2833 [0x8e] = gen_helper_pfpnacc,
2834 [0x90] = gen_helper_pfcmpge,
2835 [0x94] = gen_helper_pfmin,
2836 [0x96] = gen_helper_pfrcp,
2837 [0x97] = gen_helper_pfrsqrt,
2838 [0x9a] = gen_helper_pfsub,
2839 [0x9e] = gen_helper_pfadd,
2840 [0xa0] = gen_helper_pfcmpgt,
2841 [0xa4] = gen_helper_pfmax,
2842 [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2843 [0xa7] = gen_helper_movq, /* pfrsqit1 */
2844 [0xaa] = gen_helper_pfsubr,
2845 [0xae] = gen_helper_pfacc,
2846 [0xb0] = gen_helper_pfcmpeq,
2847 [0xb4] = gen_helper_pfmul,
2848 [0xb6] = gen_helper_movq, /* pfrcpit2 */
2849 [0xb7] = gen_helper_pmulhrw_mmx,
2850 [0xbb] = gen_helper_pswapd,
2851 [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2854 struct SSEOpHelper_epp {
2855 SSEFunc_0_epp op[2];
2859 struct SSEOpHelper_eppi {
2860 SSEFunc_0_eppi op[2];
2864 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2865 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2866 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2867 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2868 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2869 CPUID_EXT_PCLMULQDQ }
2870 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2872 static const struct SSEOpHelper_epp sse_op_table6[256] = {
2873 [0x00] = SSSE3_OP(pshufb),
2874 [0x01] = SSSE3_OP(phaddw),
2875 [0x02] = SSSE3_OP(phaddd),
2876 [0x03] = SSSE3_OP(phaddsw),
2877 [0x04] = SSSE3_OP(pmaddubsw),
2878 [0x05] = SSSE3_OP(phsubw),
2879 [0x06] = SSSE3_OP(phsubd),
2880 [0x07] = SSSE3_OP(phsubsw),
2881 [0x08] = SSSE3_OP(psignb),
2882 [0x09] = SSSE3_OP(psignw),
2883 [0x0a] = SSSE3_OP(psignd),
2884 [0x0b] = SSSE3_OP(pmulhrsw),
2885 [0x10] = SSE41_OP(pblendvb),
2886 [0x14] = SSE41_OP(blendvps),
2887 [0x15] = SSE41_OP(blendvpd),
2888 [0x17] = SSE41_OP(ptest),
2889 [0x1c] = SSSE3_OP(pabsb),
2890 [0x1d] = SSSE3_OP(pabsw),
2891 [0x1e] = SSSE3_OP(pabsd),
2892 [0x20] = SSE41_OP(pmovsxbw),
2893 [0x21] = SSE41_OP(pmovsxbd),
2894 [0x22] = SSE41_OP(pmovsxbq),
2895 [0x23] = SSE41_OP(pmovsxwd),
2896 [0x24] = SSE41_OP(pmovsxwq),
2897 [0x25] = SSE41_OP(pmovsxdq),
2898 [0x28] = SSE41_OP(pmuldq),
2899 [0x29] = SSE41_OP(pcmpeqq),
2900 [0x2a] = SSE41_SPECIAL, /* movntqda */
2901 [0x2b] = SSE41_OP(packusdw),
2902 [0x30] = SSE41_OP(pmovzxbw),
2903 [0x31] = SSE41_OP(pmovzxbd),
2904 [0x32] = SSE41_OP(pmovzxbq),
2905 [0x33] = SSE41_OP(pmovzxwd),
2906 [0x34] = SSE41_OP(pmovzxwq),
2907 [0x35] = SSE41_OP(pmovzxdq),
2908 [0x37] = SSE42_OP(pcmpgtq),
2909 [0x38] = SSE41_OP(pminsb),
2910 [0x39] = SSE41_OP(pminsd),
2911 [0x3a] = SSE41_OP(pminuw),
2912 [0x3b] = SSE41_OP(pminud),
2913 [0x3c] = SSE41_OP(pmaxsb),
2914 [0x3d] = SSE41_OP(pmaxsd),
2915 [0x3e] = SSE41_OP(pmaxuw),
2916 [0x3f] = SSE41_OP(pmaxud),
2917 [0x40] = SSE41_OP(pmulld),
2918 [0x41] = SSE41_OP(phminposuw),
2919 [0xdb] = AESNI_OP(aesimc),
2920 [0xdc] = AESNI_OP(aesenc),
2921 [0xdd] = AESNI_OP(aesenclast),
2922 [0xde] = AESNI_OP(aesdec),
2923 [0xdf] = AESNI_OP(aesdeclast),
2926 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
2927 [0x08] = SSE41_OP(roundps),
2928 [0x09] = SSE41_OP(roundpd),
2929 [0x0a] = SSE41_OP(roundss),
2930 [0x0b] = SSE41_OP(roundsd),
2931 [0x0c] = SSE41_OP(blendps),
2932 [0x0d] = SSE41_OP(blendpd),
2933 [0x0e] = SSE41_OP(pblendw),
2934 [0x0f] = SSSE3_OP(palignr),
2935 [0x14] = SSE41_SPECIAL, /* pextrb */
2936 [0x15] = SSE41_SPECIAL, /* pextrw */
2937 [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
2938 [0x17] = SSE41_SPECIAL, /* extractps */
2939 [0x20] = SSE41_SPECIAL, /* pinsrb */
2940 [0x21] = SSE41_SPECIAL, /* insertps */
2941 [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
2942 [0x40] = SSE41_OP(dpps),
2943 [0x41] = SSE41_OP(dppd),
2944 [0x42] = SSE41_OP(mpsadbw),
2945 [0x44] = PCLMULQDQ_OP(pclmulqdq),
2946 [0x60] = SSE42_OP(pcmpestrm),
2947 [0x61] = SSE42_OP(pcmpestri),
2948 [0x62] = SSE42_OP(pcmpistrm),
2949 [0x63] = SSE42_OP(pcmpistri),
2950 [0xdf] = AESNI_OP(aeskeygenassist),
2953 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
2954 target_ulong pc_start, int rex_r)
2956 int b1, op1_offset, op2_offset, is_xmm, val;
2957 int modrm, mod, rm, reg;
2958 SSEFunc_0_epp sse_fn_epp;
2959 SSEFunc_0_eppi sse_fn_eppi;
2960 SSEFunc_0_ppi sse_fn_ppi;
2961 SSEFunc_0_eppt sse_fn_eppt;
2965 if (s->prefix & PREFIX_DATA)
2967 else if (s->prefix & PREFIX_REPZ)
2969 else if (s->prefix & PREFIX_REPNZ)
2973 sse_fn_epp = sse_op_table1[b][b1];
2977 if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
2987 /* simple MMX/SSE operation */
2988 if (s->flags & HF_TS_MASK) {
2989 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
2992 if (s->flags & HF_EM_MASK) {
2994 gen_illegal_opcode(s);
2998 && !(s->flags & HF_OSFXSR_MASK)
2999 && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) {
3003 if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3004 /* If we were fully decoding this we might use illegal_op. */
3008 gen_helper_emms(cpu_env);
3013 gen_helper_emms(cpu_env);
3016 /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3017 the static cpu state) */
3019 gen_helper_enter_mmx(cpu_env);
3022 modrm = cpu_ldub_code(env, s->pc++);
3023 reg = ((modrm >> 3) & 7);
3026 mod = (modrm >> 6) & 3;
3027 if (sse_fn_epp == SSE_SPECIAL) {
3030 case 0x0e7: /* movntq */
3034 gen_lea_modrm(env, s, modrm);
3035 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3037 case 0x1e7: /* movntdq */
3038 case 0x02b: /* movntps */
3039 case 0x12b: /* movntps */
3042 gen_lea_modrm(env, s, modrm);
3043 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3045 case 0x3f0: /* lddqu */
3048 gen_lea_modrm(env, s, modrm);
3049 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3051 case 0x22b: /* movntss */
3052 case 0x32b: /* movntsd */
3055 gen_lea_modrm(env, s, modrm);
3057 gen_stq_env_A0(s, offsetof(CPUX86State,
3058 xmm_regs[reg].ZMM_Q(0)));
3060 tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
3061 xmm_regs[reg].ZMM_L(0)));
3062 gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
3065 case 0x6e: /* movd mm, ea */
3066 #ifdef TARGET_X86_64
3067 if (s->dflag == MO_64) {
3068 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3069 tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
3073 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3074 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3075 offsetof(CPUX86State,fpregs[reg].mmx));
3076 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3077 gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
3080 case 0x16e: /* movd xmm, ea */
3081 #ifdef TARGET_X86_64
3082 if (s->dflag == MO_64) {
3083 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3084 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3085 offsetof(CPUX86State,xmm_regs[reg]));
3086 gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T0);
3090 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3091 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3092 offsetof(CPUX86State,xmm_regs[reg]));
3093 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3094 gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
3097 case 0x6f: /* movq mm, ea */
3099 gen_lea_modrm(env, s, modrm);
3100 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3103 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3104 offsetof(CPUX86State,fpregs[rm].mmx));
3105 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3106 offsetof(CPUX86State,fpregs[reg].mmx));
3109 case 0x010: /* movups */
3110 case 0x110: /* movupd */
3111 case 0x028: /* movaps */
3112 case 0x128: /* movapd */
3113 case 0x16f: /* movdqa xmm, ea */
3114 case 0x26f: /* movdqu xmm, ea */
3116 gen_lea_modrm(env, s, modrm);
3117 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3119 rm = (modrm & 7) | REX_B(s);
3120 gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
3121 offsetof(CPUX86State,xmm_regs[rm]));
3124 case 0x210: /* movss xmm, ea */
3126 gen_lea_modrm(env, s, modrm);
3127 gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
3128 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3129 tcg_gen_movi_tl(cpu_T0, 0);
3130 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3131 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3132 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3134 rm = (modrm & 7) | REX_B(s);
3135 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3136 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3139 case 0x310: /* movsd xmm, ea */
3141 gen_lea_modrm(env, s, modrm);
3142 gen_ldq_env_A0(s, offsetof(CPUX86State,
3143 xmm_regs[reg].ZMM_Q(0)));
3144 tcg_gen_movi_tl(cpu_T0, 0);
3145 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3146 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3148 rm = (modrm & 7) | REX_B(s);
3149 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3150 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3153 case 0x012: /* movlps */
3154 case 0x112: /* movlpd */
3156 gen_lea_modrm(env, s, modrm);
3157 gen_ldq_env_A0(s, offsetof(CPUX86State,
3158 xmm_regs[reg].ZMM_Q(0)));
3161 rm = (modrm & 7) | REX_B(s);
3162 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3163 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3166 case 0x212: /* movsldup */
3168 gen_lea_modrm(env, s, modrm);
3169 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3171 rm = (modrm & 7) | REX_B(s);
3172 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3173 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3174 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3175 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3177 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3178 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3179 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3180 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3182 case 0x312: /* movddup */
3184 gen_lea_modrm(env, s, modrm);
3185 gen_ldq_env_A0(s, offsetof(CPUX86State,
3186 xmm_regs[reg].ZMM_Q(0)));
3188 rm = (modrm & 7) | REX_B(s);
3189 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3190 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3192 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3193 offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3195 case 0x016: /* movhps */
3196 case 0x116: /* movhpd */
3198 gen_lea_modrm(env, s, modrm);
3199 gen_ldq_env_A0(s, offsetof(CPUX86State,
3200 xmm_regs[reg].ZMM_Q(1)));
3203 rm = (modrm & 7) | REX_B(s);
3204 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3205 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3208 case 0x216: /* movshdup */
3210 gen_lea_modrm(env, s, modrm);
3211 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3213 rm = (modrm & 7) | REX_B(s);
3214 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3215 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3216 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3217 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3219 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3220 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3221 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3222 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3227 int bit_index, field_length;
3229 if (b1 == 1 && reg != 0)
3231 field_length = cpu_ldub_code(env, s->pc++) & 0x3F;
3232 bit_index = cpu_ldub_code(env, s->pc++) & 0x3F;
3233 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3234 offsetof(CPUX86State,xmm_regs[reg]));
3236 gen_helper_extrq_i(cpu_env, cpu_ptr0,
3237 tcg_const_i32(bit_index),
3238 tcg_const_i32(field_length));
3240 gen_helper_insertq_i(cpu_env, cpu_ptr0,
3241 tcg_const_i32(bit_index),
3242 tcg_const_i32(field_length));
3245 case 0x7e: /* movd ea, mm */
3246 #ifdef TARGET_X86_64
3247 if (s->dflag == MO_64) {
3248 tcg_gen_ld_i64(cpu_T0, cpu_env,
3249 offsetof(CPUX86State,fpregs[reg].mmx));
3250 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3254 tcg_gen_ld32u_tl(cpu_T0, cpu_env,
3255 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3256 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3259 case 0x17e: /* movd ea, xmm */
3260 #ifdef TARGET_X86_64
3261 if (s->dflag == MO_64) {
3262 tcg_gen_ld_i64(cpu_T0, cpu_env,
3263 offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3264 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3268 tcg_gen_ld32u_tl(cpu_T0, cpu_env,
3269 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3270 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3273 case 0x27e: /* movq xmm, ea */
3275 gen_lea_modrm(env, s, modrm);
3276 gen_ldq_env_A0(s, offsetof(CPUX86State,
3277 xmm_regs[reg].ZMM_Q(0)));
3279 rm = (modrm & 7) | REX_B(s);
3280 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3281 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3283 gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
3285 case 0x7f: /* movq ea, mm */
3287 gen_lea_modrm(env, s, modrm);
3288 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3291 gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
3292 offsetof(CPUX86State,fpregs[reg].mmx));
3295 case 0x011: /* movups */
3296 case 0x111: /* movupd */
3297 case 0x029: /* movaps */
3298 case 0x129: /* movapd */
3299 case 0x17f: /* movdqa ea, xmm */
3300 case 0x27f: /* movdqu ea, xmm */
3302 gen_lea_modrm(env, s, modrm);
3303 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3305 rm = (modrm & 7) | REX_B(s);
3306 gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
3307 offsetof(CPUX86State,xmm_regs[reg]));
3310 case 0x211: /* movss ea, xmm */
3312 gen_lea_modrm(env, s, modrm);
3313 tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3314 gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
3316 rm = (modrm & 7) | REX_B(s);
3317 gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
3318 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3321 case 0x311: /* movsd ea, xmm */
3323 gen_lea_modrm(env, s, modrm);
3324 gen_stq_env_A0(s, offsetof(CPUX86State,
3325 xmm_regs[reg].ZMM_Q(0)));
3327 rm = (modrm & 7) | REX_B(s);
3328 gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
3329 offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3332 case 0x013: /* movlps */
3333 case 0x113: /* movlpd */
3335 gen_lea_modrm(env, s, modrm);
3336 gen_stq_env_A0(s, offsetof(CPUX86State,
3337 xmm_regs[reg].ZMM_Q(0)));
3342 case 0x017: /* movhps */
3343 case 0x117: /* movhpd */
3345 gen_lea_modrm(env, s, modrm);
3346 gen_stq_env_A0(s, offsetof(CPUX86State,
3347 xmm_regs[reg].ZMM_Q(1)));
3352 case 0x71: /* shift mm, im */
3355 case 0x171: /* shift xmm, im */
3361 val = cpu_ldub_code(env, s->pc++);
3363 tcg_gen_movi_tl(cpu_T0, val);
3364 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
3365 tcg_gen_movi_tl(cpu_T0, 0);
3366 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(1)));
3367 op1_offset = offsetof(CPUX86State,xmm_t0);
3369 tcg_gen_movi_tl(cpu_T0, val);
3370 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
3371 tcg_gen_movi_tl(cpu_T0, 0);
3372 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
3373 op1_offset = offsetof(CPUX86State,mmx_t0);
3375 sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3376 (((modrm >> 3)) & 7)][b1];
3381 rm = (modrm & 7) | REX_B(s);
3382 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3385 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3387 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3388 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
3389 sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3391 case 0x050: /* movmskps */
3392 rm = (modrm & 7) | REX_B(s);
3393 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3394 offsetof(CPUX86State,xmm_regs[rm]));
3395 gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3396 tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3398 case 0x150: /* movmskpd */
3399 rm = (modrm & 7) | REX_B(s);
3400 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3401 offsetof(CPUX86State,xmm_regs[rm]));
3402 gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3403 tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3405 case 0x02a: /* cvtpi2ps */
3406 case 0x12a: /* cvtpi2pd */
3407 gen_helper_enter_mmx(cpu_env);
3409 gen_lea_modrm(env, s, modrm);
3410 op2_offset = offsetof(CPUX86State,mmx_t0);
3411 gen_ldq_env_A0(s, op2_offset);
3414 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3416 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3417 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3418 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3421 gen_helper_cvtpi2ps(cpu_env, cpu_ptr0, cpu_ptr1);
3425 gen_helper_cvtpi2pd(cpu_env, cpu_ptr0, cpu_ptr1);
3429 case 0x22a: /* cvtsi2ss */
3430 case 0x32a: /* cvtsi2sd */
3431 ot = mo_64_32(s->dflag);
3432 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3433 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3434 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3436 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3437 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3438 sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
3440 #ifdef TARGET_X86_64
3441 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3442 sse_fn_epl(cpu_env, cpu_ptr0, cpu_T0);
3448 case 0x02c: /* cvttps2pi */
3449 case 0x12c: /* cvttpd2pi */
3450 case 0x02d: /* cvtps2pi */
3451 case 0x12d: /* cvtpd2pi */
3452 gen_helper_enter_mmx(cpu_env);
3454 gen_lea_modrm(env, s, modrm);
3455 op2_offset = offsetof(CPUX86State,xmm_t0);
3456 gen_ldo_env_A0(s, op2_offset);
3458 rm = (modrm & 7) | REX_B(s);
3459 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3461 op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3462 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3463 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3466 gen_helper_cvttps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3469 gen_helper_cvttpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3472 gen_helper_cvtps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3475 gen_helper_cvtpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3479 case 0x22c: /* cvttss2si */
3480 case 0x32c: /* cvttsd2si */
3481 case 0x22d: /* cvtss2si */
3482 case 0x32d: /* cvtsd2si */
3483 ot = mo_64_32(s->dflag);
3485 gen_lea_modrm(env, s, modrm);
3487 gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3489 gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
3490 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
3492 op2_offset = offsetof(CPUX86State,xmm_t0);
3494 rm = (modrm & 7) | REX_B(s);
3495 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3497 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3499 SSEFunc_i_ep sse_fn_i_ep =
3500 sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3501 sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3502 tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
3504 #ifdef TARGET_X86_64
3505 SSEFunc_l_ep sse_fn_l_ep =
3506 sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3507 sse_fn_l_ep(cpu_T0, cpu_env, cpu_ptr0);
3512 gen_op_mov_reg_v(ot, reg, cpu_T0);
3514 case 0xc4: /* pinsrw */
3517 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3518 val = cpu_ldub_code(env, s->pc++);
3521 tcg_gen_st16_tl(cpu_T0, cpu_env,
3522 offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3525 tcg_gen_st16_tl(cpu_T0, cpu_env,
3526 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3529 case 0xc5: /* pextrw */
3533 ot = mo_64_32(s->dflag);
3534 val = cpu_ldub_code(env, s->pc++);
3537 rm = (modrm & 7) | REX_B(s);
3538 tcg_gen_ld16u_tl(cpu_T0, cpu_env,
3539 offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3543 tcg_gen_ld16u_tl(cpu_T0, cpu_env,
3544 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3546 reg = ((modrm >> 3) & 7) | rex_r;
3547 gen_op_mov_reg_v(ot, reg, cpu_T0);
3549 case 0x1d6: /* movq ea, xmm */
3551 gen_lea_modrm(env, s, modrm);
3552 gen_stq_env_A0(s, offsetof(CPUX86State,
3553 xmm_regs[reg].ZMM_Q(0)));
3555 rm = (modrm & 7) | REX_B(s);
3556 gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
3557 offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3558 gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3561 case 0x2d6: /* movq2dq */
3562 gen_helper_enter_mmx(cpu_env);
3564 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3565 offsetof(CPUX86State,fpregs[rm].mmx));
3566 gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
3568 case 0x3d6: /* movdq2q */
3569 gen_helper_enter_mmx(cpu_env);
3570 rm = (modrm & 7) | REX_B(s);
3571 gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
3572 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3574 case 0xd7: /* pmovmskb */
3579 rm = (modrm & 7) | REX_B(s);
3580 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
3581 gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3584 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
3585 gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3587 reg = ((modrm >> 3) & 7) | rex_r;
3588 tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3594 if ((b & 0xf0) == 0xf0) {
3597 modrm = cpu_ldub_code(env, s->pc++);
3599 reg = ((modrm >> 3) & 7) | rex_r;
3600 mod = (modrm >> 6) & 3;
3605 sse_fn_epp = sse_op_table6[b].op[b1];
3609 if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3613 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3615 op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3617 op2_offset = offsetof(CPUX86State,xmm_t0);
3618 gen_lea_modrm(env, s, modrm);
3620 case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3621 case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3622 case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3623 gen_ldq_env_A0(s, op2_offset +
3624 offsetof(ZMMReg, ZMM_Q(0)));
3626 case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3627 case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3628 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
3629 s->mem_index, MO_LEUL);
3630 tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
3631 offsetof(ZMMReg, ZMM_L(0)));
3633 case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3634 tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0,
3635 s->mem_index, MO_LEUW);
3636 tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
3637 offsetof(ZMMReg, ZMM_W(0)));
3639 case 0x2a: /* movntqda */
3640 gen_ldo_env_A0(s, op1_offset);
3643 gen_ldo_env_A0(s, op2_offset);
3647 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3649 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3651 op2_offset = offsetof(CPUX86State,mmx_t0);
3652 gen_lea_modrm(env, s, modrm);
3653 gen_ldq_env_A0(s, op2_offset);
3656 if (sse_fn_epp == SSE_SPECIAL) {
3660 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3661 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3662 sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3665 set_cc_op(s, CC_OP_EFLAGS);
3672 /* Various integer extensions at 0f 38 f[0-f]. */
3673 b = modrm | (b1 << 8);
3674 modrm = cpu_ldub_code(env, s->pc++);
3675 reg = ((modrm >> 3) & 7) | rex_r;
3678 case 0x3f0: /* crc32 Gd,Eb */
3679 case 0x3f1: /* crc32 Gd,Ey */
3681 if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3684 if ((b & 0xff) == 0xf0) {
3686 } else if (s->dflag != MO_64) {
3687 ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3692 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
3693 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3694 gen_helper_crc32(cpu_T0, cpu_tmp2_i32,
3695 cpu_T0, tcg_const_i32(8 << ot));
3697 ot = mo_64_32(s->dflag);
3698 gen_op_mov_reg_v(ot, reg, cpu_T0);
3701 case 0x1f0: /* crc32 or movbe */
3703 /* For these insns, the f3 prefix is supposed to have priority
3704 over the 66 prefix, but that's not what we implement above
3706 if (s->prefix & PREFIX_REPNZ) {
3710 case 0x0f0: /* movbe Gy,My */
3711 case 0x0f1: /* movbe My,Gy */
3712 if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3715 if (s->dflag != MO_64) {
3716 ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3721 gen_lea_modrm(env, s, modrm);
3723 tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
3724 s->mem_index, ot | MO_BE);
3725 gen_op_mov_reg_v(ot, reg, cpu_T0);
3727 tcg_gen_qemu_st_tl(cpu_regs[reg], cpu_A0,
3728 s->mem_index, ot | MO_BE);
3732 case 0x0f2: /* andn Gy, By, Ey */
3733 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3734 || !(s->prefix & PREFIX_VEX)
3738 ot = mo_64_32(s->dflag);
3739 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3740 tcg_gen_andc_tl(cpu_T0, cpu_regs[s->vex_v], cpu_T0);
3741 gen_op_mov_reg_v(ot, reg, cpu_T0);
3742 gen_op_update1_cc();
3743 set_cc_op(s, CC_OP_LOGICB + ot);
3746 case 0x0f7: /* bextr Gy, Ey, By */
3747 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3748 || !(s->prefix & PREFIX_VEX)
3752 ot = mo_64_32(s->dflag);
3756 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3757 /* Extract START, and shift the operand.
3758 Shifts larger than operand size get zeros. */
3759 tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
3760 tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_A0);
3762 bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3763 zero = tcg_const_tl(0);
3764 tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, cpu_A0, bound,
3766 tcg_temp_free(zero);
3768 /* Extract the LEN into a mask. Lengths larger than
3769 operand size get all ones. */
3770 tcg_gen_extract_tl(cpu_A0, cpu_regs[s->vex_v], 8, 8);
3771 tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
3773 tcg_temp_free(bound);
3774 tcg_gen_movi_tl(cpu_T1, 1);
3775 tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_A0);
3776 tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
3777 tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
3779 gen_op_mov_reg_v(ot, reg, cpu_T0);
3780 gen_op_update1_cc();
3781 set_cc_op(s, CC_OP_LOGICB + ot);
3785 case 0x0f5: /* bzhi Gy, Ey, By */
3786 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3787 || !(s->prefix & PREFIX_VEX)
3791 ot = mo_64_32(s->dflag);
3792 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3793 tcg_gen_ext8u_tl(cpu_T1, cpu_regs[s->vex_v]);
3795 TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3796 /* Note that since we're using BMILG (in order to get O
3797 cleared) we need to store the inverse into C. */
3798 tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3800 tcg_gen_movcond_tl(TCG_COND_GT, cpu_T1, cpu_T1,
3801 bound, bound, cpu_T1);
3802 tcg_temp_free(bound);
3804 tcg_gen_movi_tl(cpu_A0, -1);
3805 tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T1);
3806 tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_A0);
3807 gen_op_mov_reg_v(ot, reg, cpu_T0);
3808 gen_op_update1_cc();
3809 set_cc_op(s, CC_OP_BMILGB + ot);
3812 case 0x3f6: /* mulx By, Gy, rdx, Ey */
3813 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3814 || !(s->prefix & PREFIX_VEX)
3818 ot = mo_64_32(s->dflag);
3819 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3822 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3823 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
3824 tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
3825 cpu_tmp2_i32, cpu_tmp3_i32);
3826 tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32);
3827 tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
3829 #ifdef TARGET_X86_64
3831 tcg_gen_mulu2_i64(cpu_T0, cpu_T1,
3832 cpu_T0, cpu_regs[R_EDX]);
3833 tcg_gen_mov_i64(cpu_regs[s->vex_v], cpu_T0);
3834 tcg_gen_mov_i64(cpu_regs[reg], cpu_T1);
3840 case 0x3f5: /* pdep Gy, By, Ey */
3841 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3842 || !(s->prefix & PREFIX_VEX)
3846 ot = mo_64_32(s->dflag);
3847 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3848 /* Note that by zero-extending the mask operand, we
3849 automatically handle zero-extending the result. */
3851 tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
3853 tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
3855 gen_helper_pdep(cpu_regs[reg], cpu_T0, cpu_T1);
3858 case 0x2f5: /* pext Gy, By, Ey */
3859 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3860 || !(s->prefix & PREFIX_VEX)
3864 ot = mo_64_32(s->dflag);
3865 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3866 /* Note that by zero-extending the mask operand, we
3867 automatically handle zero-extending the result. */
3869 tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
3871 tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
3873 gen_helper_pext(cpu_regs[reg], cpu_T0, cpu_T1);
3876 case 0x1f6: /* adcx Gy, Ey */
3877 case 0x2f6: /* adox Gy, Ey */
3878 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3881 TCGv carry_in, carry_out, zero;
3884 ot = mo_64_32(s->dflag);
3885 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3887 /* Re-use the carry-out from a previous round. */
3888 TCGV_UNUSED(carry_in);
3889 carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3893 carry_in = cpu_cc_dst;
3894 end_op = CC_OP_ADCX;
3896 end_op = CC_OP_ADCOX;
3901 end_op = CC_OP_ADCOX;
3903 carry_in = cpu_cc_src2;
3904 end_op = CC_OP_ADOX;
3908 end_op = CC_OP_ADCOX;
3909 carry_in = carry_out;
3912 end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
3915 /* If we can't reuse carry-out, get it out of EFLAGS. */
3916 if (TCGV_IS_UNUSED(carry_in)) {
3917 if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
3918 gen_compute_eflags(s);
3920 carry_in = cpu_tmp0;
3921 tcg_gen_extract_tl(carry_in, cpu_cc_src,
3922 ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
3926 #ifdef TARGET_X86_64
3928 /* If we know TL is 64-bit, and we want a 32-bit
3929 result, just do everything in 64-bit arithmetic. */
3930 tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
3931 tcg_gen_ext32u_i64(cpu_T0, cpu_T0);
3932 tcg_gen_add_i64(cpu_T0, cpu_T0, cpu_regs[reg]);
3933 tcg_gen_add_i64(cpu_T0, cpu_T0, carry_in);
3934 tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T0);
3935 tcg_gen_shri_i64(carry_out, cpu_T0, 32);
3939 /* Otherwise compute the carry-out in two steps. */
3940 zero = tcg_const_tl(0);
3941 tcg_gen_add2_tl(cpu_T0, carry_out,
3944 tcg_gen_add2_tl(cpu_regs[reg], carry_out,
3945 cpu_regs[reg], carry_out,
3947 tcg_temp_free(zero);
3950 set_cc_op(s, end_op);
3954 case 0x1f7: /* shlx Gy, Ey, By */
3955 case 0x2f7: /* sarx Gy, Ey, By */
3956 case 0x3f7: /* shrx Gy, Ey, By */
3957 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3958 || !(s->prefix & PREFIX_VEX)
3962 ot = mo_64_32(s->dflag);
3963 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3965 tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 63);
3967 tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31);
3970 tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
3971 } else if (b == 0x2f7) {
3973 tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
3975 tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
3978 tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
3980 tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
3982 gen_op_mov_reg_v(ot, reg, cpu_T0);
3988 case 0x3f3: /* Group 17 */
3989 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3990 || !(s->prefix & PREFIX_VEX)
3994 ot = mo_64_32(s->dflag);
3995 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3998 case 1: /* blsr By,Ey */
3999 tcg_gen_neg_tl(cpu_T1, cpu_T0);
4000 tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
4001 gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
4002 gen_op_update2_cc();
4003 set_cc_op(s, CC_OP_BMILGB + ot);
4006 case 2: /* blsmsk By,Ey */
4007 tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
4008 tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
4009 tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
4010 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4011 set_cc_op(s, CC_OP_BMILGB + ot);
4014 case 3: /* blsi By, Ey */
4015 tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
4016 tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
4017 tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
4018 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4019 set_cc_op(s, CC_OP_BMILGB + ot);
4035 modrm = cpu_ldub_code(env, s->pc++);
4037 reg = ((modrm >> 3) & 7) | rex_r;
4038 mod = (modrm >> 6) & 3;
4043 sse_fn_eppi = sse_op_table7[b].op[b1];
4047 if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4050 if (sse_fn_eppi == SSE_SPECIAL) {
4051 ot = mo_64_32(s->dflag);
4052 rm = (modrm & 7) | REX_B(s);
4054 gen_lea_modrm(env, s, modrm);
4055 reg = ((modrm >> 3) & 7) | rex_r;
4056 val = cpu_ldub_code(env, s->pc++);
4058 case 0x14: /* pextrb */
4059 tcg_gen_ld8u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4060 xmm_regs[reg].ZMM_B(val & 15)));
4062 gen_op_mov_reg_v(ot, rm, cpu_T0);
4064 tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4065 s->mem_index, MO_UB);
4068 case 0x15: /* pextrw */
4069 tcg_gen_ld16u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4070 xmm_regs[reg].ZMM_W(val & 7)));
4072 gen_op_mov_reg_v(ot, rm, cpu_T0);
4074 tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4075 s->mem_index, MO_LEUW);
4079 if (ot == MO_32) { /* pextrd */
4080 tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4081 offsetof(CPUX86State,
4082 xmm_regs[reg].ZMM_L(val & 3)));
4084 tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
4086 tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
4087 s->mem_index, MO_LEUL);
4089 } else { /* pextrq */
4090 #ifdef TARGET_X86_64
4091 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
4092 offsetof(CPUX86State,
4093 xmm_regs[reg].ZMM_Q(val & 1)));
4095 tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
4097 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
4098 s->mem_index, MO_LEQ);
4105 case 0x17: /* extractps */
4106 tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4107 xmm_regs[reg].ZMM_L(val & 3)));
4109 gen_op_mov_reg_v(ot, rm, cpu_T0);
4111 tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4112 s->mem_index, MO_LEUL);
4115 case 0x20: /* pinsrb */
4117 gen_op_mov_v_reg(MO_32, cpu_T0, rm);
4119 tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
4120 s->mem_index, MO_UB);
4122 tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4123 xmm_regs[reg].ZMM_B(val & 15)));
4125 case 0x21: /* insertps */
4127 tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4128 offsetof(CPUX86State,xmm_regs[rm]
4129 .ZMM_L((val >> 6) & 3)));
4131 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4132 s->mem_index, MO_LEUL);
4134 tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4135 offsetof(CPUX86State,xmm_regs[reg]
4136 .ZMM_L((val >> 4) & 3)));
4138 tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4139 cpu_env, offsetof(CPUX86State,
4140 xmm_regs[reg].ZMM_L(0)));
4142 tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4143 cpu_env, offsetof(CPUX86State,
4144 xmm_regs[reg].ZMM_L(1)));
4146 tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4147 cpu_env, offsetof(CPUX86State,
4148 xmm_regs[reg].ZMM_L(2)));
4150 tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4151 cpu_env, offsetof(CPUX86State,
4152 xmm_regs[reg].ZMM_L(3)));
4155 if (ot == MO_32) { /* pinsrd */
4157 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]);
4159 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4160 s->mem_index, MO_LEUL);
4162 tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4163 offsetof(CPUX86State,
4164 xmm_regs[reg].ZMM_L(val & 3)));
4165 } else { /* pinsrq */
4166 #ifdef TARGET_X86_64
4168 gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
4170 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
4171 s->mem_index, MO_LEQ);
4173 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
4174 offsetof(CPUX86State,
4175 xmm_regs[reg].ZMM_Q(val & 1)));
4186 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4188 op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4190 op2_offset = offsetof(CPUX86State,xmm_t0);
4191 gen_lea_modrm(env, s, modrm);
4192 gen_ldo_env_A0(s, op2_offset);
4195 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4197 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4199 op2_offset = offsetof(CPUX86State,mmx_t0);
4200 gen_lea_modrm(env, s, modrm);
4201 gen_ldq_env_A0(s, op2_offset);
4204 val = cpu_ldub_code(env, s->pc++);
4206 if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4207 set_cc_op(s, CC_OP_EFLAGS);
4209 if (s->dflag == MO_64) {
4210 /* The helper must use entire 64-bit gp registers */
4215 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4216 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4217 sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4221 /* Various integer extensions at 0f 3a f[0-f]. */
4222 b = modrm | (b1 << 8);
4223 modrm = cpu_ldub_code(env, s->pc++);
4224 reg = ((modrm >> 3) & 7) | rex_r;
4227 case 0x3f0: /* rorx Gy,Ey, Ib */
4228 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4229 || !(s->prefix & PREFIX_VEX)
4233 ot = mo_64_32(s->dflag);
4234 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4235 b = cpu_ldub_code(env, s->pc++);
4237 tcg_gen_rotri_tl(cpu_T0, cpu_T0, b & 63);
4239 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4240 tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
4241 tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
4243 gen_op_mov_reg_v(ot, reg, cpu_T0);
4253 gen_unknown_opcode(env, s);
4257 /* generic MMX or SSE operation */
4259 case 0x70: /* pshufx insn */
4260 case 0xc6: /* pshufx insn */
4261 case 0xc2: /* compare insns */
4268 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4272 gen_lea_modrm(env, s, modrm);
4273 op2_offset = offsetof(CPUX86State,xmm_t0);
4279 /* Most sse scalar operations. */
4282 } else if (b1 == 3) {
4287 case 0x2e: /* ucomis[sd] */
4288 case 0x2f: /* comis[sd] */
4300 gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
4301 tcg_gen_st32_tl(cpu_T0, cpu_env,
4302 offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4306 gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4309 /* 128 bit access */
4310 gen_ldo_env_A0(s, op2_offset);
4314 rm = (modrm & 7) | REX_B(s);
4315 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4318 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4320 gen_lea_modrm(env, s, modrm);
4321 op2_offset = offsetof(CPUX86State,mmx_t0);
4322 gen_ldq_env_A0(s, op2_offset);
4325 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4329 case 0x0f: /* 3DNow! data insns */
4330 val = cpu_ldub_code(env, s->pc++);
4331 sse_fn_epp = sse_op_table5[val];
4335 if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4338 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4339 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4340 sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4342 case 0x70: /* pshufx insn */
4343 case 0xc6: /* pshufx insn */
4344 val = cpu_ldub_code(env, s->pc++);
4345 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4346 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4347 /* XXX: introduce a new table? */
4348 sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4349 sse_fn_ppi(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4353 val = cpu_ldub_code(env, s->pc++);
4356 sse_fn_epp = sse_op_table4[val][b1];
4358 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4359 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4360 sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4363 /* maskmov : we must prepare A0 */
4366 tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EDI]);
4367 gen_extu(s->aflag, cpu_A0);
4368 gen_add_A0_ds_seg(s);
4370 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4371 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4372 /* XXX: introduce a new table? */
4373 sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4374 sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, cpu_A0);
4377 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4378 tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4379 sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4382 if (b == 0x2e || b == 0x2f) {
4383 set_cc_op(s, CC_OP_EFLAGS);
4388 /* convert one instruction. s->is_jmp is set if the translation must
4389 be stopped. Return the next pc value */
4390 static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
4391 target_ulong pc_start)
4395 TCGMemOp ot, aflag, dflag;
4396 int modrm, reg, rm, mod, op, opreg, val;
4397 target_ulong next_eip, tval;
4400 s->pc_start = s->pc = pc_start;
4405 #ifdef TARGET_X86_64
4410 s->rip_offset = 0; /* for relative ip address */
4414 b = cpu_ldub_code(env, s->pc);
4416 /* Collect prefixes. */
4419 prefixes |= PREFIX_REPZ;
4422 prefixes |= PREFIX_REPNZ;
4425 prefixes |= PREFIX_LOCK;
4446 prefixes |= PREFIX_DATA;
4449 prefixes |= PREFIX_ADR;
4451 #ifdef TARGET_X86_64
4455 rex_w = (b >> 3) & 1;
4456 rex_r = (b & 0x4) << 1;
4457 s->rex_x = (b & 0x2) << 2;
4458 REX_B(s) = (b & 0x1) << 3;
4459 x86_64_hregs = 1; /* select uniform byte register addressing */
4464 case 0xc5: /* 2-byte VEX */
4465 case 0xc4: /* 3-byte VEX */
4466 /* VEX prefixes cannot be used except in 32-bit mode.
4467 Otherwise the instruction is LES or LDS. */
4468 if (s->code32 && !s->vm86) {
4469 static const int pp_prefix[4] = {
4470 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4472 int vex3, vex2 = cpu_ldub_code(env, s->pc);
4474 if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4475 /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4476 otherwise the instruction is LES or LDS. */
4481 /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4482 if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4483 | PREFIX_LOCK | PREFIX_DATA)) {
4486 #ifdef TARGET_X86_64
4491 rex_r = (~vex2 >> 4) & 8;
4494 b = cpu_ldub_code(env, s->pc++);
4496 #ifdef TARGET_X86_64
4497 s->rex_x = (~vex2 >> 3) & 8;
4498 s->rex_b = (~vex2 >> 2) & 8;
4500 vex3 = cpu_ldub_code(env, s->pc++);
4501 rex_w = (vex3 >> 7) & 1;
4502 switch (vex2 & 0x1f) {
4503 case 0x01: /* Implied 0f leading opcode bytes. */
4504 b = cpu_ldub_code(env, s->pc++) | 0x100;
4506 case 0x02: /* Implied 0f 38 leading opcode bytes. */
4509 case 0x03: /* Implied 0f 3a leading opcode bytes. */
4512 default: /* Reserved for future use. */
4516 s->vex_v = (~vex3 >> 3) & 0xf;
4517 s->vex_l = (vex3 >> 2) & 1;
4518 prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4523 /* Post-process prefixes. */
4525 /* In 64-bit mode, the default data size is 32-bit. Select 64-bit
4526 data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4527 over 0x66 if both are present. */
4528 dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4529 /* In 64-bit mode, 0x67 selects 32-bit addressing. */
4530 aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4532 /* In 16/32-bit mode, 0x66 selects the opposite data size. */
4533 if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4538 /* In 16/32-bit mode, 0x67 selects the opposite addressing. */
4539 if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4546 s->prefix = prefixes;
4550 /* now check op code */
4554 /**************************/
4555 /* extended op code */
4556 b = cpu_ldub_code(env, s->pc++) | 0x100;
4559 /**************************/
4574 ot = mo_b_d(b, dflag);
4577 case 0: /* OP Ev, Gv */
4578 modrm = cpu_ldub_code(env, s->pc++);
4579 reg = ((modrm >> 3) & 7) | rex_r;
4580 mod = (modrm >> 6) & 3;
4581 rm = (modrm & 7) | REX_B(s);
4583 gen_lea_modrm(env, s, modrm);
4585 } else if (op == OP_XORL && rm == reg) {
4587 /* xor reg, reg optimisation */
4588 set_cc_op(s, CC_OP_CLR);
4589 tcg_gen_movi_tl(cpu_T0, 0);
4590 gen_op_mov_reg_v(ot, reg, cpu_T0);
4595 gen_op_mov_v_reg(ot, cpu_T1, reg);
4596 gen_op(s, op, ot, opreg);
4598 case 1: /* OP Gv, Ev */
4599 modrm = cpu_ldub_code(env, s->pc++);
4600 mod = (modrm >> 6) & 3;
4601 reg = ((modrm >> 3) & 7) | rex_r;
4602 rm = (modrm & 7) | REX_B(s);
4604 gen_lea_modrm(env, s, modrm);
4605 gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4606 } else if (op == OP_XORL && rm == reg) {
4609 gen_op_mov_v_reg(ot, cpu_T1, rm);
4611 gen_op(s, op, ot, reg);
4613 case 2: /* OP A, Iv */
4614 val = insn_get(env, s, ot);
4615 tcg_gen_movi_tl(cpu_T1, val);
4616 gen_op(s, op, ot, OR_EAX);
4625 case 0x80: /* GRP1 */
4631 ot = mo_b_d(b, dflag);
4633 modrm = cpu_ldub_code(env, s->pc++);
4634 mod = (modrm >> 6) & 3;
4635 rm = (modrm & 7) | REX_B(s);
4636 op = (modrm >> 3) & 7;
4642 s->rip_offset = insn_const_size(ot);
4643 gen_lea_modrm(env, s, modrm);
4654 val = insn_get(env, s, ot);
4657 val = (int8_t)insn_get(env, s, MO_8);
4660 tcg_gen_movi_tl(cpu_T1, val);
4661 gen_op(s, op, ot, opreg);
4665 /**************************/
4666 /* inc, dec, and other misc arith */
4667 case 0x40 ... 0x47: /* inc Gv */
4669 gen_inc(s, ot, OR_EAX + (b & 7), 1);
4671 case 0x48 ... 0x4f: /* dec Gv */
4673 gen_inc(s, ot, OR_EAX + (b & 7), -1);
4675 case 0xf6: /* GRP3 */
4677 ot = mo_b_d(b, dflag);
4679 modrm = cpu_ldub_code(env, s->pc++);
4680 mod = (modrm >> 6) & 3;
4681 rm = (modrm & 7) | REX_B(s);
4682 op = (modrm >> 3) & 7;
4685 s->rip_offset = insn_const_size(ot);
4687 gen_lea_modrm(env, s, modrm);
4688 /* For those below that handle locked memory, don't load here. */
4689 if (!(s->prefix & PREFIX_LOCK)
4691 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
4694 gen_op_mov_v_reg(ot, cpu_T0, rm);
4699 val = insn_get(env, s, ot);
4700 tcg_gen_movi_tl(cpu_T1, val);
4701 gen_op_testl_T0_T1_cc();
4702 set_cc_op(s, CC_OP_LOGICB + ot);
4705 if (s->prefix & PREFIX_LOCK) {
4709 tcg_gen_movi_tl(cpu_T0, ~0);
4710 tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
4711 s->mem_index, ot | MO_LE);
4713 tcg_gen_not_tl(cpu_T0, cpu_T0);
4715 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
4717 gen_op_mov_reg_v(ot, rm, cpu_T0);
4722 if (s->prefix & PREFIX_LOCK) {
4724 TCGv a0, t0, t1, t2;
4729 a0 = tcg_temp_local_new();
4730 t0 = tcg_temp_local_new();
4731 label1 = gen_new_label();
4733 tcg_gen_mov_tl(a0, cpu_A0);
4734 tcg_gen_mov_tl(t0, cpu_T0);
4736 gen_set_label(label1);
4737 t1 = tcg_temp_new();
4738 t2 = tcg_temp_new();
4739 tcg_gen_mov_tl(t2, t0);
4740 tcg_gen_neg_tl(t1, t0);
4741 tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4742 s->mem_index, ot | MO_LE);
4744 tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4748 tcg_gen_mov_tl(cpu_T0, t0);
4751 tcg_gen_neg_tl(cpu_T0, cpu_T0);
4753 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
4755 gen_op_mov_reg_v(ot, rm, cpu_T0);
4758 gen_op_update_neg_cc();
4759 set_cc_op(s, CC_OP_SUBB + ot);
4764 gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
4765 tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
4766 tcg_gen_ext8u_tl(cpu_T1, cpu_T1);
4767 /* XXX: use 32 bit mul which could be faster */
4768 tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4769 gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4770 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4771 tcg_gen_andi_tl(cpu_cc_src, cpu_T0, 0xff00);
4772 set_cc_op(s, CC_OP_MULB);
4775 gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
4776 tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4777 tcg_gen_ext16u_tl(cpu_T1, cpu_T1);
4778 /* XXX: use 32 bit mul which could be faster */
4779 tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4780 gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4781 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4782 tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
4783 gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4784 tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
4785 set_cc_op(s, CC_OP_MULW);
4789 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4790 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4791 tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4792 cpu_tmp2_i32, cpu_tmp3_i32);
4793 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4794 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4795 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4796 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4797 set_cc_op(s, CC_OP_MULL);
4799 #ifdef TARGET_X86_64
4801 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4802 cpu_T0, cpu_regs[R_EAX]);
4803 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4804 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4805 set_cc_op(s, CC_OP_MULQ);
4813 gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
4814 tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
4815 tcg_gen_ext8s_tl(cpu_T1, cpu_T1);
4816 /* XXX: use 32 bit mul which could be faster */
4817 tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4818 gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4819 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4820 tcg_gen_ext8s_tl(cpu_tmp0, cpu_T0);
4821 tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
4822 set_cc_op(s, CC_OP_MULB);
4825 gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
4826 tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
4827 tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
4828 /* XXX: use 32 bit mul which could be faster */
4829 tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4830 gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4831 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4832 tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
4833 tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
4834 tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
4835 gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4836 set_cc_op(s, CC_OP_MULW);
4840 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4841 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4842 tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4843 cpu_tmp2_i32, cpu_tmp3_i32);
4844 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4845 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4846 tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
4847 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4848 tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
4849 tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
4850 set_cc_op(s, CC_OP_MULL);
4852 #ifdef TARGET_X86_64
4854 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4855 cpu_T0, cpu_regs[R_EAX]);
4856 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4857 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4858 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4859 set_cc_op(s, CC_OP_MULQ);
4867 gen_helper_divb_AL(cpu_env, cpu_T0);
4870 gen_helper_divw_AX(cpu_env, cpu_T0);
4874 gen_helper_divl_EAX(cpu_env, cpu_T0);
4876 #ifdef TARGET_X86_64
4878 gen_helper_divq_EAX(cpu_env, cpu_T0);
4886 gen_helper_idivb_AL(cpu_env, cpu_T0);
4889 gen_helper_idivw_AX(cpu_env, cpu_T0);
4893 gen_helper_idivl_EAX(cpu_env, cpu_T0);
4895 #ifdef TARGET_X86_64
4897 gen_helper_idivq_EAX(cpu_env, cpu_T0);
4907 case 0xfe: /* GRP4 */
4908 case 0xff: /* GRP5 */
4909 ot = mo_b_d(b, dflag);
4911 modrm = cpu_ldub_code(env, s->pc++);
4912 mod = (modrm >> 6) & 3;
4913 rm = (modrm & 7) | REX_B(s);
4914 op = (modrm >> 3) & 7;
4915 if (op >= 2 && b == 0xfe) {
4919 if (op == 2 || op == 4) {
4920 /* operand size for jumps is 64 bit */
4922 } else if (op == 3 || op == 5) {
4923 ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
4924 } else if (op == 6) {
4925 /* default push size is 64 bit */
4926 ot = mo_pushpop(s, dflag);
4930 gen_lea_modrm(env, s, modrm);
4931 if (op >= 2 && op != 3 && op != 5)
4932 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
4934 gen_op_mov_v_reg(ot, cpu_T0, rm);
4938 case 0: /* inc Ev */
4943 gen_inc(s, ot, opreg, 1);
4945 case 1: /* dec Ev */
4950 gen_inc(s, ot, opreg, -1);
4952 case 2: /* call Ev */
4953 /* XXX: optimize if memory (no 'and' is necessary) */
4954 if (dflag == MO_16) {
4955 tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4957 next_eip = s->pc - s->cs_base;
4958 tcg_gen_movi_tl(cpu_T1, next_eip);
4959 gen_push_v(s, cpu_T1);
4960 gen_op_jmp_v(cpu_T0);
4964 case 3: /* lcall Ev */
4965 gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4966 gen_add_A0_im(s, 1 << ot);
4967 gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
4969 if (s->pe && !s->vm86) {
4970 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4971 gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
4972 tcg_const_i32(dflag - 1),
4973 tcg_const_tl(s->pc - s->cs_base));
4975 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4976 gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1,
4977 tcg_const_i32(dflag - 1),
4978 tcg_const_i32(s->pc - s->cs_base));
4982 case 4: /* jmp Ev */
4983 if (dflag == MO_16) {
4984 tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4986 gen_op_jmp_v(cpu_T0);
4990 case 5: /* ljmp Ev */
4991 gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4992 gen_add_A0_im(s, 1 << ot);
4993 gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
4995 if (s->pe && !s->vm86) {
4996 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4997 gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
4998 tcg_const_tl(s->pc - s->cs_base));
5000 gen_op_movl_seg_T0_vm(R_CS);
5001 gen_op_jmp_v(cpu_T1);
5005 case 6: /* push Ev */
5006 gen_push_v(s, cpu_T0);
5013 case 0x84: /* test Ev, Gv */
5015 ot = mo_b_d(b, dflag);
5017 modrm = cpu_ldub_code(env, s->pc++);
5018 reg = ((modrm >> 3) & 7) | rex_r;
5020 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5021 gen_op_mov_v_reg(ot, cpu_T1, reg);
5022 gen_op_testl_T0_T1_cc();
5023 set_cc_op(s, CC_OP_LOGICB + ot);
5026 case 0xa8: /* test eAX, Iv */
5028 ot = mo_b_d(b, dflag);
5029 val = insn_get(env, s, ot);
5031 gen_op_mov_v_reg(ot, cpu_T0, OR_EAX);
5032 tcg_gen_movi_tl(cpu_T1, val);
5033 gen_op_testl_T0_T1_cc();
5034 set_cc_op(s, CC_OP_LOGICB + ot);
5037 case 0x98: /* CWDE/CBW */
5039 #ifdef TARGET_X86_64
5041 gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
5042 tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
5043 gen_op_mov_reg_v(MO_64, R_EAX, cpu_T0);
5047 gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
5048 tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5049 gen_op_mov_reg_v(MO_32, R_EAX, cpu_T0);
5052 gen_op_mov_v_reg(MO_8, cpu_T0, R_EAX);
5053 tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
5054 gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
5060 case 0x99: /* CDQ/CWD */
5062 #ifdef TARGET_X86_64
5064 gen_op_mov_v_reg(MO_64, cpu_T0, R_EAX);
5065 tcg_gen_sari_tl(cpu_T0, cpu_T0, 63);
5066 gen_op_mov_reg_v(MO_64, R_EDX, cpu_T0);
5070 gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
5071 tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
5072 tcg_gen_sari_tl(cpu_T0, cpu_T0, 31);
5073 gen_op_mov_reg_v(MO_32, R_EDX, cpu_T0);
5076 gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
5077 tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5078 tcg_gen_sari_tl(cpu_T0, cpu_T0, 15);
5079 gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
5085 case 0x1af: /* imul Gv, Ev */
5086 case 0x69: /* imul Gv, Ev, I */
5089 modrm = cpu_ldub_code(env, s->pc++);
5090 reg = ((modrm >> 3) & 7) | rex_r;
5092 s->rip_offset = insn_const_size(ot);
5095 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5097 val = insn_get(env, s, ot);
5098 tcg_gen_movi_tl(cpu_T1, val);
5099 } else if (b == 0x6b) {
5100 val = (int8_t)insn_get(env, s, MO_8);
5101 tcg_gen_movi_tl(cpu_T1, val);
5103 gen_op_mov_v_reg(ot, cpu_T1, reg);
5106 #ifdef TARGET_X86_64
5108 tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, cpu_T0, cpu_T1);
5109 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5110 tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5111 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1);
5115 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
5116 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
5117 tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
5118 cpu_tmp2_i32, cpu_tmp3_i32);
5119 tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
5120 tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
5121 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5122 tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
5123 tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
5126 tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5127 tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
5128 /* XXX: use 32 bit mul which could be faster */
5129 tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
5130 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
5131 tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
5132 tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
5133 gen_op_mov_reg_v(ot, reg, cpu_T0);
5136 set_cc_op(s, CC_OP_MULB + ot);
5139 case 0x1c1: /* xadd Ev, Gv */
5140 ot = mo_b_d(b, dflag);
5141 modrm = cpu_ldub_code(env, s->pc++);
5142 reg = ((modrm >> 3) & 7) | rex_r;
5143 mod = (modrm >> 6) & 3;
5144 gen_op_mov_v_reg(ot, cpu_T0, reg);
5146 rm = (modrm & 7) | REX_B(s);
5147 gen_op_mov_v_reg(ot, cpu_T1, rm);
5148 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5149 gen_op_mov_reg_v(ot, reg, cpu_T1);
5150 gen_op_mov_reg_v(ot, rm, cpu_T0);
5152 gen_lea_modrm(env, s, modrm);
5153 if (s->prefix & PREFIX_LOCK) {
5154 tcg_gen_atomic_fetch_add_tl(cpu_T1, cpu_A0, cpu_T0,
5155 s->mem_index, ot | MO_LE);
5156 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5158 gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5159 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5160 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5162 gen_op_mov_reg_v(ot, reg, cpu_T1);
5164 gen_op_update2_cc();
5165 set_cc_op(s, CC_OP_ADDB + ot);
5168 case 0x1b1: /* cmpxchg Ev, Gv */
5170 TCGv oldv, newv, cmpv;
5172 ot = mo_b_d(b, dflag);
5173 modrm = cpu_ldub_code(env, s->pc++);
5174 reg = ((modrm >> 3) & 7) | rex_r;
5175 mod = (modrm >> 6) & 3;
5176 oldv = tcg_temp_new();
5177 newv = tcg_temp_new();
5178 cmpv = tcg_temp_new();
5179 gen_op_mov_v_reg(ot, newv, reg);
5180 tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5182 if (s->prefix & PREFIX_LOCK) {
5186 gen_lea_modrm(env, s, modrm);
5187 tcg_gen_atomic_cmpxchg_tl(oldv, cpu_A0, cmpv, newv,
5188 s->mem_index, ot | MO_LE);
5189 gen_op_mov_reg_v(ot, R_EAX, oldv);
5192 rm = (modrm & 7) | REX_B(s);
5193 gen_op_mov_v_reg(ot, oldv, rm);
5195 gen_lea_modrm(env, s, modrm);
5196 gen_op_ld_v(s, ot, oldv, cpu_A0);
5197 rm = 0; /* avoid warning */
5201 /* store value = (old == cmp ? new : old); */
5202 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5204 gen_op_mov_reg_v(ot, R_EAX, oldv);
5205 gen_op_mov_reg_v(ot, rm, newv);
5207 /* Perform an unconditional store cycle like physical cpu;
5208 must be before changing accumulator to ensure
5209 idempotency if the store faults and the instruction
5211 gen_op_st_v(s, ot, newv, cpu_A0);
5212 gen_op_mov_reg_v(ot, R_EAX, oldv);
5215 tcg_gen_mov_tl(cpu_cc_src, oldv);
5216 tcg_gen_mov_tl(cpu_cc_srcT, cmpv);
5217 tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5218 set_cc_op(s, CC_OP_SUBB + ot);
5219 tcg_temp_free(oldv);
5220 tcg_temp_free(newv);
5221 tcg_temp_free(cmpv);
5224 case 0x1c7: /* cmpxchg8b */
5225 modrm = cpu_ldub_code(env, s->pc++);
5226 mod = (modrm >> 6) & 3;
5227 if ((mod == 3) || ((modrm & 0x38) != 0x8))
5229 #ifdef TARGET_X86_64
5230 if (dflag == MO_64) {
5231 if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
5233 gen_lea_modrm(env, s, modrm);
5234 if ((s->prefix & PREFIX_LOCK) && parallel_cpus) {
5235 gen_helper_cmpxchg16b(cpu_env, cpu_A0);
5237 gen_helper_cmpxchg16b_unlocked(cpu_env, cpu_A0);
5242 if (!(s->cpuid_features & CPUID_CX8))
5244 gen_lea_modrm(env, s, modrm);
5245 if ((s->prefix & PREFIX_LOCK) && parallel_cpus) {
5246 gen_helper_cmpxchg8b(cpu_env, cpu_A0);
5248 gen_helper_cmpxchg8b_unlocked(cpu_env, cpu_A0);
5251 set_cc_op(s, CC_OP_EFLAGS);
5254 /**************************/
5256 case 0x50 ... 0x57: /* push */
5257 gen_op_mov_v_reg(MO_32, cpu_T0, (b & 7) | REX_B(s));
5258 gen_push_v(s, cpu_T0);
5260 case 0x58 ... 0x5f: /* pop */
5262 /* NOTE: order is important for pop %sp */
5263 gen_pop_update(s, ot);
5264 gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T0);
5266 case 0x60: /* pusha */
5271 case 0x61: /* popa */
5276 case 0x68: /* push Iv */
5278 ot = mo_pushpop(s, dflag);
5280 val = insn_get(env, s, ot);
5282 val = (int8_t)insn_get(env, s, MO_8);
5283 tcg_gen_movi_tl(cpu_T0, val);
5284 gen_push_v(s, cpu_T0);
5286 case 0x8f: /* pop Ev */
5287 modrm = cpu_ldub_code(env, s->pc++);
5288 mod = (modrm >> 6) & 3;
5291 /* NOTE: order is important for pop %sp */
5292 gen_pop_update(s, ot);
5293 rm = (modrm & 7) | REX_B(s);
5294 gen_op_mov_reg_v(ot, rm, cpu_T0);
5296 /* NOTE: order is important too for MMU exceptions */
5297 s->popl_esp_hack = 1 << ot;
5298 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5299 s->popl_esp_hack = 0;
5300 gen_pop_update(s, ot);
5303 case 0xc8: /* enter */
5306 val = cpu_lduw_code(env, s->pc);
5308 level = cpu_ldub_code(env, s->pc++);
5309 gen_enter(s, val, level);
5312 case 0xc9: /* leave */
5315 case 0x06: /* push es */
5316 case 0x0e: /* push cs */
5317 case 0x16: /* push ss */
5318 case 0x1e: /* push ds */
5321 gen_op_movl_T0_seg(b >> 3);
5322 gen_push_v(s, cpu_T0);
5324 case 0x1a0: /* push fs */
5325 case 0x1a8: /* push gs */
5326 gen_op_movl_T0_seg((b >> 3) & 7);
5327 gen_push_v(s, cpu_T0);
5329 case 0x07: /* pop es */
5330 case 0x17: /* pop ss */
5331 case 0x1f: /* pop ds */
5336 gen_movl_seg_T0(s, reg);
5337 gen_pop_update(s, ot);
5338 /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp. */
5340 gen_jmp_im(s->pc - s->cs_base);
5343 gen_eob_inhibit_irq(s, true);
5349 case 0x1a1: /* pop fs */
5350 case 0x1a9: /* pop gs */
5352 gen_movl_seg_T0(s, (b >> 3) & 7);
5353 gen_pop_update(s, ot);
5355 gen_jmp_im(s->pc - s->cs_base);
5360 /**************************/
5363 case 0x89: /* mov Gv, Ev */
5364 ot = mo_b_d(b, dflag);
5365 modrm = cpu_ldub_code(env, s->pc++);
5366 reg = ((modrm >> 3) & 7) | rex_r;
5368 /* generate a generic store */
5369 gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5372 case 0xc7: /* mov Ev, Iv */
5373 ot = mo_b_d(b, dflag);
5374 modrm = cpu_ldub_code(env, s->pc++);
5375 mod = (modrm >> 6) & 3;
5377 s->rip_offset = insn_const_size(ot);
5378 gen_lea_modrm(env, s, modrm);
5380 val = insn_get(env, s, ot);
5381 tcg_gen_movi_tl(cpu_T0, val);
5383 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5385 gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T0);
5389 case 0x8b: /* mov Ev, Gv */
5390 ot = mo_b_d(b, dflag);
5391 modrm = cpu_ldub_code(env, s->pc++);
5392 reg = ((modrm >> 3) & 7) | rex_r;
5394 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5395 gen_op_mov_reg_v(ot, reg, cpu_T0);
5397 case 0x8e: /* mov seg, Gv */
5398 modrm = cpu_ldub_code(env, s->pc++);
5399 reg = (modrm >> 3) & 7;
5400 if (reg >= 6 || reg == R_CS)
5402 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5403 gen_movl_seg_T0(s, reg);
5404 /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp. */
5406 gen_jmp_im(s->pc - s->cs_base);
5409 gen_eob_inhibit_irq(s, true);
5415 case 0x8c: /* mov Gv, seg */
5416 modrm = cpu_ldub_code(env, s->pc++);
5417 reg = (modrm >> 3) & 7;
5418 mod = (modrm >> 6) & 3;
5421 gen_op_movl_T0_seg(reg);
5422 ot = mod == 3 ? dflag : MO_16;
5423 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5426 case 0x1b6: /* movzbS Gv, Eb */
5427 case 0x1b7: /* movzwS Gv, Eb */
5428 case 0x1be: /* movsbS Gv, Eb */
5429 case 0x1bf: /* movswS Gv, Eb */
5434 /* d_ot is the size of destination */
5436 /* ot is the size of source */
5437 ot = (b & 1) + MO_8;
5438 /* s_ot is the sign+size of source */
5439 s_ot = b & 8 ? MO_SIGN | ot : ot;
5441 modrm = cpu_ldub_code(env, s->pc++);
5442 reg = ((modrm >> 3) & 7) | rex_r;
5443 mod = (modrm >> 6) & 3;
5444 rm = (modrm & 7) | REX_B(s);
5447 if (s_ot == MO_SB && byte_reg_is_xH(rm)) {
5448 tcg_gen_sextract_tl(cpu_T0, cpu_regs[rm - 4], 8, 8);
5450 gen_op_mov_v_reg(ot, cpu_T0, rm);
5453 tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
5456 tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
5459 tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
5463 tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5467 gen_op_mov_reg_v(d_ot, reg, cpu_T0);
5469 gen_lea_modrm(env, s, modrm);
5470 gen_op_ld_v(s, s_ot, cpu_T0, cpu_A0);
5471 gen_op_mov_reg_v(d_ot, reg, cpu_T0);
5476 case 0x8d: /* lea */
5477 modrm = cpu_ldub_code(env, s->pc++);
5478 mod = (modrm >> 6) & 3;
5481 reg = ((modrm >> 3) & 7) | rex_r;
5483 AddressParts a = gen_lea_modrm_0(env, s, modrm);
5484 TCGv ea = gen_lea_modrm_1(a);
5485 gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5486 gen_op_mov_reg_v(dflag, reg, cpu_A0);
5490 case 0xa0: /* mov EAX, Ov */
5492 case 0xa2: /* mov Ov, EAX */
5495 target_ulong offset_addr;
5497 ot = mo_b_d(b, dflag);
5499 #ifdef TARGET_X86_64
5501 offset_addr = cpu_ldq_code(env, s->pc);
5506 offset_addr = insn_get(env, s, s->aflag);
5509 tcg_gen_movi_tl(cpu_A0, offset_addr);
5510 gen_add_A0_ds_seg(s);
5512 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
5513 gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
5515 gen_op_mov_v_reg(ot, cpu_T0, R_EAX);
5516 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5520 case 0xd7: /* xlat */
5521 tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EBX]);
5522 tcg_gen_ext8u_tl(cpu_T0, cpu_regs[R_EAX]);
5523 tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T0);
5524 gen_extu(s->aflag, cpu_A0);
5525 gen_add_A0_ds_seg(s);
5526 gen_op_ld_v(s, MO_8, cpu_T0, cpu_A0);
5527 gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
5529 case 0xb0 ... 0xb7: /* mov R, Ib */
5530 val = insn_get(env, s, MO_8);
5531 tcg_gen_movi_tl(cpu_T0, val);
5532 gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), cpu_T0);
5534 case 0xb8 ... 0xbf: /* mov R, Iv */
5535 #ifdef TARGET_X86_64
5536 if (dflag == MO_64) {
5539 tmp = cpu_ldq_code(env, s->pc);
5541 reg = (b & 7) | REX_B(s);
5542 tcg_gen_movi_tl(cpu_T0, tmp);
5543 gen_op_mov_reg_v(MO_64, reg, cpu_T0);
5548 val = insn_get(env, s, ot);
5549 reg = (b & 7) | REX_B(s);
5550 tcg_gen_movi_tl(cpu_T0, val);
5551 gen_op_mov_reg_v(ot, reg, cpu_T0);
5555 case 0x91 ... 0x97: /* xchg R, EAX */
5558 reg = (b & 7) | REX_B(s);
5562 case 0x87: /* xchg Ev, Gv */
5563 ot = mo_b_d(b, dflag);
5564 modrm = cpu_ldub_code(env, s->pc++);
5565 reg = ((modrm >> 3) & 7) | rex_r;
5566 mod = (modrm >> 6) & 3;
5568 rm = (modrm & 7) | REX_B(s);
5570 gen_op_mov_v_reg(ot, cpu_T0, reg);
5571 gen_op_mov_v_reg(ot, cpu_T1, rm);
5572 gen_op_mov_reg_v(ot, rm, cpu_T0);
5573 gen_op_mov_reg_v(ot, reg, cpu_T1);
5575 gen_lea_modrm(env, s, modrm);
5576 gen_op_mov_v_reg(ot, cpu_T0, reg);
5577 /* for xchg, lock is implicit */
5578 tcg_gen_atomic_xchg_tl(cpu_T1, cpu_A0, cpu_T0,
5579 s->mem_index, ot | MO_LE);
5580 gen_op_mov_reg_v(ot, reg, cpu_T1);
5583 case 0xc4: /* les Gv */
5584 /* In CODE64 this is VEX3; see above. */
5587 case 0xc5: /* lds Gv */
5588 /* In CODE64 this is VEX2; see above. */
5591 case 0x1b2: /* lss Gv */
5594 case 0x1b4: /* lfs Gv */
5597 case 0x1b5: /* lgs Gv */
5600 ot = dflag != MO_16 ? MO_32 : MO_16;
5601 modrm = cpu_ldub_code(env, s->pc++);
5602 reg = ((modrm >> 3) & 7) | rex_r;
5603 mod = (modrm >> 6) & 3;
5606 gen_lea_modrm(env, s, modrm);
5607 gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5608 gen_add_A0_im(s, 1 << ot);
5609 /* load the segment first to handle exceptions properly */
5610 gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
5611 gen_movl_seg_T0(s, op);
5612 /* then put the data */
5613 gen_op_mov_reg_v(ot, reg, cpu_T1);
5615 gen_jmp_im(s->pc - s->cs_base);
5620 /************************/
5628 ot = mo_b_d(b, dflag);
5629 modrm = cpu_ldub_code(env, s->pc++);
5630 mod = (modrm >> 6) & 3;
5631 op = (modrm >> 3) & 7;
5637 gen_lea_modrm(env, s, modrm);
5640 opreg = (modrm & 7) | REX_B(s);
5645 gen_shift(s, op, ot, opreg, OR_ECX);
5648 shift = cpu_ldub_code(env, s->pc++);
5650 gen_shifti(s, op, ot, opreg, shift);
5665 case 0x1a4: /* shld imm */
5669 case 0x1a5: /* shld cl */
5673 case 0x1ac: /* shrd imm */
5677 case 0x1ad: /* shrd cl */
5682 modrm = cpu_ldub_code(env, s->pc++);
5683 mod = (modrm >> 6) & 3;
5684 rm = (modrm & 7) | REX_B(s);
5685 reg = ((modrm >> 3) & 7) | rex_r;
5687 gen_lea_modrm(env, s, modrm);
5692 gen_op_mov_v_reg(ot, cpu_T1, reg);
5695 TCGv imm = tcg_const_tl(cpu_ldub_code(env, s->pc++));
5696 gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5699 gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5703 /************************/
5706 if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5707 /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5708 /* XXX: what to do if illegal op ? */
5709 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5712 modrm = cpu_ldub_code(env, s->pc++);
5713 mod = (modrm >> 6) & 3;
5715 op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5718 gen_lea_modrm(env, s, modrm);
5720 case 0x00 ... 0x07: /* fxxxs */
5721 case 0x10 ... 0x17: /* fixxxl */
5722 case 0x20 ... 0x27: /* fxxxl */
5723 case 0x30 ... 0x37: /* fixxx */
5730 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5731 s->mem_index, MO_LEUL);
5732 gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
5735 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5736 s->mem_index, MO_LEUL);
5737 gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
5740 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
5741 s->mem_index, MO_LEQ);
5742 gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
5746 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5747 s->mem_index, MO_LESW);
5748 gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
5752 gen_helper_fp_arith_ST0_FT0(op1);
5754 /* fcomp needs pop */
5755 gen_helper_fpop(cpu_env);
5759 case 0x08: /* flds */
5760 case 0x0a: /* fsts */
5761 case 0x0b: /* fstps */
5762 case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5763 case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5764 case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5769 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5770 s->mem_index, MO_LEUL);
5771 gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
5774 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5775 s->mem_index, MO_LEUL);
5776 gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
5779 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
5780 s->mem_index, MO_LEQ);
5781 gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
5785 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5786 s->mem_index, MO_LESW);
5787 gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
5792 /* XXX: the corresponding CPUID bit must be tested ! */
5795 gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
5796 tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5797 s->mem_index, MO_LEUL);
5800 gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
5801 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
5802 s->mem_index, MO_LEQ);
5806 gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
5807 tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5808 s->mem_index, MO_LEUW);
5811 gen_helper_fpop(cpu_env);
5816 gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
5817 tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5818 s->mem_index, MO_LEUL);
5821 gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
5822 tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5823 s->mem_index, MO_LEUL);
5826 gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
5827 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
5828 s->mem_index, MO_LEQ);
5832 gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
5833 tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5834 s->mem_index, MO_LEUW);
5838 gen_helper_fpop(cpu_env);
5842 case 0x0c: /* fldenv mem */
5843 gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5845 case 0x0d: /* fldcw mem */
5846 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5847 s->mem_index, MO_LEUW);
5848 gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
5850 case 0x0e: /* fnstenv mem */
5851 gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5853 case 0x0f: /* fnstcw mem */
5854 gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
5855 tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5856 s->mem_index, MO_LEUW);
5858 case 0x1d: /* fldt mem */
5859 gen_helper_fldt_ST0(cpu_env, cpu_A0);
5861 case 0x1f: /* fstpt mem */
5862 gen_helper_fstt_ST0(cpu_env, cpu_A0);
5863 gen_helper_fpop(cpu_env);
5865 case 0x2c: /* frstor mem */
5866 gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5868 case 0x2e: /* fnsave mem */
5869 gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5871 case 0x2f: /* fnstsw mem */
5872 gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
5873 tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5874 s->mem_index, MO_LEUW);
5876 case 0x3c: /* fbld */
5877 gen_helper_fbld_ST0(cpu_env, cpu_A0);
5879 case 0x3e: /* fbstp */
5880 gen_helper_fbst_ST0(cpu_env, cpu_A0);
5881 gen_helper_fpop(cpu_env);
5883 case 0x3d: /* fildll */
5884 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
5885 gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
5887 case 0x3f: /* fistpll */
5888 gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
5889 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
5890 gen_helper_fpop(cpu_env);
5896 /* register float ops */
5900 case 0x08: /* fld sti */
5901 gen_helper_fpush(cpu_env);
5902 gen_helper_fmov_ST0_STN(cpu_env,
5903 tcg_const_i32((opreg + 1) & 7));
5905 case 0x09: /* fxchg sti */
5906 case 0x29: /* fxchg4 sti, undocumented op */
5907 case 0x39: /* fxchg7 sti, undocumented op */
5908 gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
5910 case 0x0a: /* grp d9/2 */
5913 /* check exceptions (FreeBSD FPU probe) */
5914 gen_helper_fwait(cpu_env);
5920 case 0x0c: /* grp d9/4 */
5923 gen_helper_fchs_ST0(cpu_env);
5926 gen_helper_fabs_ST0(cpu_env);
5929 gen_helper_fldz_FT0(cpu_env);
5930 gen_helper_fcom_ST0_FT0(cpu_env);
5933 gen_helper_fxam_ST0(cpu_env);
5939 case 0x0d: /* grp d9/5 */
5943 gen_helper_fpush(cpu_env);
5944 gen_helper_fld1_ST0(cpu_env);
5947 gen_helper_fpush(cpu_env);
5948 gen_helper_fldl2t_ST0(cpu_env);
5951 gen_helper_fpush(cpu_env);
5952 gen_helper_fldl2e_ST0(cpu_env);
5955 gen_helper_fpush(cpu_env);
5956 gen_helper_fldpi_ST0(cpu_env);
5959 gen_helper_fpush(cpu_env);
5960 gen_helper_fldlg2_ST0(cpu_env);
5963 gen_helper_fpush(cpu_env);
5964 gen_helper_fldln2_ST0(cpu_env);
5967 gen_helper_fpush(cpu_env);
5968 gen_helper_fldz_ST0(cpu_env);
5975 case 0x0e: /* grp d9/6 */
5978 gen_helper_f2xm1(cpu_env);
5981 gen_helper_fyl2x(cpu_env);
5984 gen_helper_fptan(cpu_env);
5986 case 3: /* fpatan */
5987 gen_helper_fpatan(cpu_env);
5989 case 4: /* fxtract */
5990 gen_helper_fxtract(cpu_env);
5992 case 5: /* fprem1 */
5993 gen_helper_fprem1(cpu_env);
5995 case 6: /* fdecstp */
5996 gen_helper_fdecstp(cpu_env);
5999 case 7: /* fincstp */
6000 gen_helper_fincstp(cpu_env);
6004 case 0x0f: /* grp d9/7 */
6007 gen_helper_fprem(cpu_env);
6009 case 1: /* fyl2xp1 */
6010 gen_helper_fyl2xp1(cpu_env);
6013 gen_helper_fsqrt(cpu_env);
6015 case 3: /* fsincos */
6016 gen_helper_fsincos(cpu_env);
6018 case 5: /* fscale */
6019 gen_helper_fscale(cpu_env);
6021 case 4: /* frndint */
6022 gen_helper_frndint(cpu_env);
6025 gen_helper_fsin(cpu_env);
6029 gen_helper_fcos(cpu_env);
6033 case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6034 case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6035 case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6041 gen_helper_fp_arith_STN_ST0(op1, opreg);
6043 gen_helper_fpop(cpu_env);
6045 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6046 gen_helper_fp_arith_ST0_FT0(op1);
6050 case 0x02: /* fcom */
6051 case 0x22: /* fcom2, undocumented op */
6052 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6053 gen_helper_fcom_ST0_FT0(cpu_env);
6055 case 0x03: /* fcomp */
6056 case 0x23: /* fcomp3, undocumented op */
6057 case 0x32: /* fcomp5, undocumented op */
6058 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6059 gen_helper_fcom_ST0_FT0(cpu_env);
6060 gen_helper_fpop(cpu_env);
6062 case 0x15: /* da/5 */
6064 case 1: /* fucompp */
6065 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6066 gen_helper_fucom_ST0_FT0(cpu_env);
6067 gen_helper_fpop(cpu_env);
6068 gen_helper_fpop(cpu_env);
6076 case 0: /* feni (287 only, just do nop here) */
6078 case 1: /* fdisi (287 only, just do nop here) */
6081 gen_helper_fclex(cpu_env);
6083 case 3: /* fninit */
6084 gen_helper_fninit(cpu_env);
6086 case 4: /* fsetpm (287 only, just do nop here) */
6092 case 0x1d: /* fucomi */
6093 if (!(s->cpuid_features & CPUID_CMOV)) {
6096 gen_update_cc_op(s);
6097 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6098 gen_helper_fucomi_ST0_FT0(cpu_env);
6099 set_cc_op(s, CC_OP_EFLAGS);
6101 case 0x1e: /* fcomi */
6102 if (!(s->cpuid_features & CPUID_CMOV)) {
6105 gen_update_cc_op(s);
6106 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6107 gen_helper_fcomi_ST0_FT0(cpu_env);
6108 set_cc_op(s, CC_OP_EFLAGS);
6110 case 0x28: /* ffree sti */
6111 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6113 case 0x2a: /* fst sti */
6114 gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6116 case 0x2b: /* fstp sti */
6117 case 0x0b: /* fstp1 sti, undocumented op */
6118 case 0x3a: /* fstp8 sti, undocumented op */
6119 case 0x3b: /* fstp9 sti, undocumented op */
6120 gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6121 gen_helper_fpop(cpu_env);
6123 case 0x2c: /* fucom st(i) */
6124 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6125 gen_helper_fucom_ST0_FT0(cpu_env);
6127 case 0x2d: /* fucomp st(i) */
6128 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6129 gen_helper_fucom_ST0_FT0(cpu_env);
6130 gen_helper_fpop(cpu_env);
6132 case 0x33: /* de/3 */
6134 case 1: /* fcompp */
6135 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6136 gen_helper_fcom_ST0_FT0(cpu_env);
6137 gen_helper_fpop(cpu_env);
6138 gen_helper_fpop(cpu_env);
6144 case 0x38: /* ffreep sti, undocumented op */
6145 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6146 gen_helper_fpop(cpu_env);
6148 case 0x3c: /* df/4 */
6151 gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
6152 tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
6153 gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
6159 case 0x3d: /* fucomip */
6160 if (!(s->cpuid_features & CPUID_CMOV)) {
6163 gen_update_cc_op(s);
6164 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6165 gen_helper_fucomi_ST0_FT0(cpu_env);
6166 gen_helper_fpop(cpu_env);
6167 set_cc_op(s, CC_OP_EFLAGS);
6169 case 0x3e: /* fcomip */
6170 if (!(s->cpuid_features & CPUID_CMOV)) {
6173 gen_update_cc_op(s);
6174 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6175 gen_helper_fcomi_ST0_FT0(cpu_env);
6176 gen_helper_fpop(cpu_env);
6177 set_cc_op(s, CC_OP_EFLAGS);
6179 case 0x10 ... 0x13: /* fcmovxx */
6184 static const uint8_t fcmov_cc[8] = {
6191 if (!(s->cpuid_features & CPUID_CMOV)) {
6194 op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6195 l1 = gen_new_label();
6196 gen_jcc1_noeob(s, op1, l1);
6197 gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6206 /************************/
6209 case 0xa4: /* movsS */
6211 ot = mo_b_d(b, dflag);
6212 if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6213 gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6219 case 0xaa: /* stosS */
6221 ot = mo_b_d(b, dflag);
6222 if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6223 gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6228 case 0xac: /* lodsS */
6230 ot = mo_b_d(b, dflag);
6231 if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6232 gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6237 case 0xae: /* scasS */
6239 ot = mo_b_d(b, dflag);
6240 if (prefixes & PREFIX_REPNZ) {
6241 gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6242 } else if (prefixes & PREFIX_REPZ) {
6243 gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6249 case 0xa6: /* cmpsS */
6251 ot = mo_b_d(b, dflag);
6252 if (prefixes & PREFIX_REPNZ) {
6253 gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6254 } else if (prefixes & PREFIX_REPZ) {
6255 gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6260 case 0x6c: /* insS */
6262 ot = mo_b_d32(b, dflag);
6263 tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6264 gen_check_io(s, ot, pc_start - s->cs_base,
6265 SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
6266 if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6267 gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6270 if (s->tb->cflags & CF_USE_ICOUNT) {
6271 gen_jmp(s, s->pc - s->cs_base);
6275 case 0x6e: /* outsS */
6277 ot = mo_b_d32(b, dflag);
6278 tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6279 gen_check_io(s, ot, pc_start - s->cs_base,
6280 svm_is_rep(prefixes) | 4);
6281 if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6282 gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6285 if (s->tb->cflags & CF_USE_ICOUNT) {
6286 gen_jmp(s, s->pc - s->cs_base);
6291 /************************/
6296 ot = mo_b_d32(b, dflag);
6297 val = cpu_ldub_code(env, s->pc++);
6298 tcg_gen_movi_tl(cpu_T0, val);
6299 gen_check_io(s, ot, pc_start - s->cs_base,
6300 SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6301 if (s->tb->cflags & CF_USE_ICOUNT) {
6304 tcg_gen_movi_i32(cpu_tmp2_i32, val);
6305 gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
6306 gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
6307 gen_bpt_io(s, cpu_tmp2_i32, ot);
6308 if (s->tb->cflags & CF_USE_ICOUNT) {
6310 gen_jmp(s, s->pc - s->cs_base);
6315 ot = mo_b_d32(b, dflag);
6316 val = cpu_ldub_code(env, s->pc++);
6317 tcg_gen_movi_tl(cpu_T0, val);
6318 gen_check_io(s, ot, pc_start - s->cs_base,
6319 svm_is_rep(prefixes));
6320 gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
6322 if (s->tb->cflags & CF_USE_ICOUNT) {
6325 tcg_gen_movi_i32(cpu_tmp2_i32, val);
6326 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
6327 gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
6328 gen_bpt_io(s, cpu_tmp2_i32, ot);
6329 if (s->tb->cflags & CF_USE_ICOUNT) {
6331 gen_jmp(s, s->pc - s->cs_base);
6336 ot = mo_b_d32(b, dflag);
6337 tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6338 gen_check_io(s, ot, pc_start - s->cs_base,
6339 SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6340 if (s->tb->cflags & CF_USE_ICOUNT) {
6343 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6344 gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
6345 gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
6346 gen_bpt_io(s, cpu_tmp2_i32, ot);
6347 if (s->tb->cflags & CF_USE_ICOUNT) {
6349 gen_jmp(s, s->pc - s->cs_base);
6354 ot = mo_b_d32(b, dflag);
6355 tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
6356 gen_check_io(s, ot, pc_start - s->cs_base,
6357 svm_is_rep(prefixes));
6358 gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
6360 if (s->tb->cflags & CF_USE_ICOUNT) {
6363 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6364 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
6365 gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
6366 gen_bpt_io(s, cpu_tmp2_i32, ot);
6367 if (s->tb->cflags & CF_USE_ICOUNT) {
6369 gen_jmp(s, s->pc - s->cs_base);
6373 /************************/
6375 case 0xc2: /* ret im */
6376 val = cpu_ldsw_code(env, s->pc);
6379 gen_stack_update(s, val + (1 << ot));
6380 /* Note that gen_pop_T0 uses a zero-extending load. */
6381 gen_op_jmp_v(cpu_T0);
6385 case 0xc3: /* ret */
6387 gen_pop_update(s, ot);
6388 /* Note that gen_pop_T0 uses a zero-extending load. */
6389 gen_op_jmp_v(cpu_T0);
6393 case 0xca: /* lret im */
6394 val = cpu_ldsw_code(env, s->pc);
6397 if (s->pe && !s->vm86) {
6398 gen_update_cc_op(s);
6399 gen_jmp_im(pc_start - s->cs_base);
6400 gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6401 tcg_const_i32(val));
6405 gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
6406 /* NOTE: keeping EIP updated is not a problem in case of
6408 gen_op_jmp_v(cpu_T0);
6410 gen_add_A0_im(s, 1 << dflag);
6411 gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
6412 gen_op_movl_seg_T0_vm(R_CS);
6413 /* add stack offset */
6414 gen_stack_update(s, val + (2 << dflag));
6418 case 0xcb: /* lret */
6421 case 0xcf: /* iret */
6422 gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
6425 gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6426 set_cc_op(s, CC_OP_EFLAGS);
6427 } else if (s->vm86) {
6429 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6431 gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6432 set_cc_op(s, CC_OP_EFLAGS);
6435 gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6436 tcg_const_i32(s->pc - s->cs_base));
6437 set_cc_op(s, CC_OP_EFLAGS);
6439 /* TF handling for the syscall insn is different. The TF bit is checked
6440 after the syscall insn completes. This allows #DB to not be
6441 generated after one has entered CPL0 if TF is set in FMASK. */
6442 gen_eob_worker(s, false, true);
6444 case 0xe8: /* call im */
6446 if (dflag != MO_16) {
6447 tval = (int32_t)insn_get(env, s, MO_32);
6449 tval = (int16_t)insn_get(env, s, MO_16);
6451 next_eip = s->pc - s->cs_base;
6453 if (dflag == MO_16) {
6455 } else if (!CODE64(s)) {
6458 tcg_gen_movi_tl(cpu_T0, next_eip);
6459 gen_push_v(s, cpu_T0);
6464 case 0x9a: /* lcall im */
6466 unsigned int selector, offset;
6471 offset = insn_get(env, s, ot);
6472 selector = insn_get(env, s, MO_16);
6474 tcg_gen_movi_tl(cpu_T0, selector);
6475 tcg_gen_movi_tl(cpu_T1, offset);
6478 case 0xe9: /* jmp im */
6479 if (dflag != MO_16) {
6480 tval = (int32_t)insn_get(env, s, MO_32);
6482 tval = (int16_t)insn_get(env, s, MO_16);
6484 tval += s->pc - s->cs_base;
6485 if (dflag == MO_16) {
6487 } else if (!CODE64(s)) {
6493 case 0xea: /* ljmp im */
6495 unsigned int selector, offset;
6500 offset = insn_get(env, s, ot);
6501 selector = insn_get(env, s, MO_16);
6503 tcg_gen_movi_tl(cpu_T0, selector);
6504 tcg_gen_movi_tl(cpu_T1, offset);
6507 case 0xeb: /* jmp Jb */
6508 tval = (int8_t)insn_get(env, s, MO_8);
6509 tval += s->pc - s->cs_base;
6510 if (dflag == MO_16) {
6515 case 0x70 ... 0x7f: /* jcc Jb */
6516 tval = (int8_t)insn_get(env, s, MO_8);
6518 case 0x180 ... 0x18f: /* jcc Jv */
6519 if (dflag != MO_16) {
6520 tval = (int32_t)insn_get(env, s, MO_32);
6522 tval = (int16_t)insn_get(env, s, MO_16);
6525 next_eip = s->pc - s->cs_base;
6527 if (dflag == MO_16) {
6531 gen_jcc(s, b, tval, next_eip);
6534 case 0x190 ... 0x19f: /* setcc Gv */
6535 modrm = cpu_ldub_code(env, s->pc++);
6536 gen_setcc1(s, b, cpu_T0);
6537 gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6539 case 0x140 ... 0x14f: /* cmov Gv, Ev */
6540 if (!(s->cpuid_features & CPUID_CMOV)) {
6544 modrm = cpu_ldub_code(env, s->pc++);
6545 reg = ((modrm >> 3) & 7) | rex_r;
6546 gen_cmovcc1(env, s, ot, b, modrm, reg);
6549 /************************/
6551 case 0x9c: /* pushf */
6552 gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
6553 if (s->vm86 && s->iopl != 3) {
6554 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6556 gen_update_cc_op(s);
6557 gen_helper_read_eflags(cpu_T0, cpu_env);
6558 gen_push_v(s, cpu_T0);
6561 case 0x9d: /* popf */
6562 gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
6563 if (s->vm86 && s->iopl != 3) {
6564 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6568 if (dflag != MO_16) {
6569 gen_helper_write_eflags(cpu_env, cpu_T0,
6570 tcg_const_i32((TF_MASK | AC_MASK |
6575 gen_helper_write_eflags(cpu_env, cpu_T0,
6576 tcg_const_i32((TF_MASK | AC_MASK |
6578 IF_MASK | IOPL_MASK)
6582 if (s->cpl <= s->iopl) {
6583 if (dflag != MO_16) {
6584 gen_helper_write_eflags(cpu_env, cpu_T0,
6585 tcg_const_i32((TF_MASK |
6591 gen_helper_write_eflags(cpu_env, cpu_T0,
6592 tcg_const_i32((TF_MASK |
6600 if (dflag != MO_16) {
6601 gen_helper_write_eflags(cpu_env, cpu_T0,
6602 tcg_const_i32((TF_MASK | AC_MASK |
6603 ID_MASK | NT_MASK)));
6605 gen_helper_write_eflags(cpu_env, cpu_T0,
6606 tcg_const_i32((TF_MASK | AC_MASK |
6612 gen_pop_update(s, ot);
6613 set_cc_op(s, CC_OP_EFLAGS);
6614 /* abort translation because TF/AC flag may change */
6615 gen_jmp_im(s->pc - s->cs_base);
6619 case 0x9e: /* sahf */
6620 if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6622 gen_op_mov_v_reg(MO_8, cpu_T0, R_AH);
6623 gen_compute_eflags(s);
6624 tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6625 tcg_gen_andi_tl(cpu_T0, cpu_T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6626 tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T0);
6628 case 0x9f: /* lahf */
6629 if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6631 gen_compute_eflags(s);
6632 /* Note: gen_compute_eflags() only gives the condition codes */
6633 tcg_gen_ori_tl(cpu_T0, cpu_cc_src, 0x02);
6634 gen_op_mov_reg_v(MO_8, R_AH, cpu_T0);
6636 case 0xf5: /* cmc */
6637 gen_compute_eflags(s);
6638 tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6640 case 0xf8: /* clc */
6641 gen_compute_eflags(s);
6642 tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6644 case 0xf9: /* stc */
6645 gen_compute_eflags(s);
6646 tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6648 case 0xfc: /* cld */
6649 tcg_gen_movi_i32(cpu_tmp2_i32, 1);
6650 tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6652 case 0xfd: /* std */
6653 tcg_gen_movi_i32(cpu_tmp2_i32, -1);
6654 tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6657 /************************/
6658 /* bit operations */
6659 case 0x1ba: /* bt/bts/btr/btc Gv, im */
6661 modrm = cpu_ldub_code(env, s->pc++);
6662 op = (modrm >> 3) & 7;
6663 mod = (modrm >> 6) & 3;
6664 rm = (modrm & 7) | REX_B(s);
6667 gen_lea_modrm(env, s, modrm);
6668 if (!(s->prefix & PREFIX_LOCK)) {
6669 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6672 gen_op_mov_v_reg(ot, cpu_T0, rm);
6675 val = cpu_ldub_code(env, s->pc++);
6676 tcg_gen_movi_tl(cpu_T1, val);
6681 case 0x1a3: /* bt Gv, Ev */
6684 case 0x1ab: /* bts */
6687 case 0x1b3: /* btr */
6690 case 0x1bb: /* btc */
6694 modrm = cpu_ldub_code(env, s->pc++);
6695 reg = ((modrm >> 3) & 7) | rex_r;
6696 mod = (modrm >> 6) & 3;
6697 rm = (modrm & 7) | REX_B(s);
6698 gen_op_mov_v_reg(MO_32, cpu_T1, reg);
6700 AddressParts a = gen_lea_modrm_0(env, s, modrm);
6701 /* specific case: we need to add a displacement */
6702 gen_exts(ot, cpu_T1);
6703 tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
6704 tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
6705 tcg_gen_add_tl(cpu_A0, gen_lea_modrm_1(a), cpu_tmp0);
6706 gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
6707 if (!(s->prefix & PREFIX_LOCK)) {
6708 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6711 gen_op_mov_v_reg(ot, cpu_T0, rm);
6714 tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
6715 tcg_gen_movi_tl(cpu_tmp0, 1);
6716 tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
6717 if (s->prefix & PREFIX_LOCK) {
6720 /* Needs no atomic ops; we surpressed the normal
6721 memory load for LOCK above so do it now. */
6722 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
6725 tcg_gen_atomic_fetch_or_tl(cpu_T0, cpu_A0, cpu_tmp0,
6726 s->mem_index, ot | MO_LE);
6729 tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
6730 tcg_gen_atomic_fetch_and_tl(cpu_T0, cpu_A0, cpu_tmp0,
6731 s->mem_index, ot | MO_LE);
6735 tcg_gen_atomic_fetch_xor_tl(cpu_T0, cpu_A0, cpu_tmp0,
6736 s->mem_index, ot | MO_LE);
6739 tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
6741 tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
6744 /* Data already loaded; nothing to do. */
6747 tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
6750 tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
6754 tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
6759 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
6761 gen_op_mov_reg_v(ot, rm, cpu_T0);
6766 /* Delay all CC updates until after the store above. Note that
6767 C is the result of the test, Z is unchanged, and the others
6768 are all undefined. */
6770 case CC_OP_MULB ... CC_OP_MULQ:
6771 case CC_OP_ADDB ... CC_OP_ADDQ:
6772 case CC_OP_ADCB ... CC_OP_ADCQ:
6773 case CC_OP_SUBB ... CC_OP_SUBQ:
6774 case CC_OP_SBBB ... CC_OP_SBBQ:
6775 case CC_OP_LOGICB ... CC_OP_LOGICQ:
6776 case CC_OP_INCB ... CC_OP_INCQ:
6777 case CC_OP_DECB ... CC_OP_DECQ:
6778 case CC_OP_SHLB ... CC_OP_SHLQ:
6779 case CC_OP_SARB ... CC_OP_SARQ:
6780 case CC_OP_BMILGB ... CC_OP_BMILGQ:
6781 /* Z was going to be computed from the non-zero status of CC_DST.
6782 We can get that same Z value (and the new C value) by leaving
6783 CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
6785 tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
6786 set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
6789 /* Otherwise, generate EFLAGS and replace the C bit. */
6790 gen_compute_eflags(s);
6791 tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4,
6796 case 0x1bc: /* bsf / tzcnt */
6797 case 0x1bd: /* bsr / lzcnt */
6799 modrm = cpu_ldub_code(env, s->pc++);
6800 reg = ((modrm >> 3) & 7) | rex_r;
6801 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
6802 gen_extu(ot, cpu_T0);
6804 /* Note that lzcnt and tzcnt are in different extensions. */
6805 if ((prefixes & PREFIX_REPZ)
6807 ? s->cpuid_ext3_features & CPUID_EXT3_ABM
6808 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
6810 /* For lzcnt/tzcnt, C bit is defined related to the input. */
6811 tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
6813 /* For lzcnt, reduce the target_ulong result by the
6814 number of zeros that we expect to find at the top. */
6815 tcg_gen_clzi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS);
6816 tcg_gen_subi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - size);
6818 /* For tzcnt, a zero input must return the operand size. */
6819 tcg_gen_ctzi_tl(cpu_T0, cpu_T0, size);
6821 /* For lzcnt/tzcnt, Z bit is defined related to the result. */
6822 gen_op_update1_cc();
6823 set_cc_op(s, CC_OP_BMILGB + ot);
6825 /* For bsr/bsf, only the Z bit is defined and it is related
6826 to the input and not the result. */
6827 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
6828 set_cc_op(s, CC_OP_LOGICB + ot);
6830 /* ??? The manual says that the output is undefined when the
6831 input is zero, but real hardware leaves it unchanged, and
6832 real programs appear to depend on that. Accomplish this
6833 by passing the output as the value to return upon zero. */
6835 /* For bsr, return the bit index of the first 1 bit,
6836 not the count of leading zeros. */
6837 tcg_gen_xori_tl(cpu_T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
6838 tcg_gen_clz_tl(cpu_T0, cpu_T0, cpu_T1);
6839 tcg_gen_xori_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - 1);
6841 tcg_gen_ctz_tl(cpu_T0, cpu_T0, cpu_regs[reg]);
6844 gen_op_mov_reg_v(ot, reg, cpu_T0);
6846 /************************/
6848 case 0x27: /* daa */
6851 gen_update_cc_op(s);
6852 gen_helper_daa(cpu_env);
6853 set_cc_op(s, CC_OP_EFLAGS);
6855 case 0x2f: /* das */
6858 gen_update_cc_op(s);
6859 gen_helper_das(cpu_env);
6860 set_cc_op(s, CC_OP_EFLAGS);
6862 case 0x37: /* aaa */
6865 gen_update_cc_op(s);
6866 gen_helper_aaa(cpu_env);
6867 set_cc_op(s, CC_OP_EFLAGS);
6869 case 0x3f: /* aas */
6872 gen_update_cc_op(s);
6873 gen_helper_aas(cpu_env);
6874 set_cc_op(s, CC_OP_EFLAGS);
6876 case 0xd4: /* aam */
6879 val = cpu_ldub_code(env, s->pc++);
6881 gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
6883 gen_helper_aam(cpu_env, tcg_const_i32(val));
6884 set_cc_op(s, CC_OP_LOGICB);
6887 case 0xd5: /* aad */
6890 val = cpu_ldub_code(env, s->pc++);
6891 gen_helper_aad(cpu_env, tcg_const_i32(val));
6892 set_cc_op(s, CC_OP_LOGICB);
6894 /************************/
6896 case 0x90: /* nop */
6897 /* XXX: correct lock test for all insn */
6898 if (prefixes & PREFIX_LOCK) {
6901 /* If REX_B is set, then this is xchg eax, r8d, not a nop. */
6903 goto do_xchg_reg_eax;
6905 if (prefixes & PREFIX_REPZ) {
6906 gen_update_cc_op(s);
6907 gen_jmp_im(pc_start - s->cs_base);
6908 gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
6909 s->is_jmp = DISAS_TB_JUMP;
6912 case 0x9b: /* fwait */
6913 if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
6914 (HF_MP_MASK | HF_TS_MASK)) {
6915 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
6917 gen_helper_fwait(cpu_env);
6920 case 0xcc: /* int3 */
6921 gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
6923 case 0xcd: /* int N */
6924 val = cpu_ldub_code(env, s->pc++);
6925 if (s->vm86 && s->iopl != 3) {
6926 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6928 gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
6931 case 0xce: /* into */
6934 gen_update_cc_op(s);
6935 gen_jmp_im(pc_start - s->cs_base);
6936 gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
6939 case 0xf1: /* icebp (undocumented, exits to external debugger) */
6940 gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
6942 gen_debug(s, pc_start - s->cs_base);
6945 tb_flush(CPU(x86_env_get_cpu(env)));
6946 qemu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM);
6950 case 0xfa: /* cli */
6952 if (s->cpl <= s->iopl) {
6953 gen_helper_cli(cpu_env);
6955 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6959 gen_helper_cli(cpu_env);
6961 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6965 case 0xfb: /* sti */
6966 if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
6967 gen_helper_sti(cpu_env);
6968 /* interruptions are enabled only the first insn after sti */
6969 gen_jmp_im(s->pc - s->cs_base);
6970 gen_eob_inhibit_irq(s, true);
6972 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6975 case 0x62: /* bound */
6979 modrm = cpu_ldub_code(env, s->pc++);
6980 reg = (modrm >> 3) & 7;
6981 mod = (modrm >> 6) & 3;
6984 gen_op_mov_v_reg(ot, cpu_T0, reg);
6985 gen_lea_modrm(env, s, modrm);
6986 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
6988 gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32);
6990 gen_helper_boundl(cpu_env, cpu_A0, cpu_tmp2_i32);
6993 case 0x1c8 ... 0x1cf: /* bswap reg */
6994 reg = (b & 7) | REX_B(s);
6995 #ifdef TARGET_X86_64
6996 if (dflag == MO_64) {
6997 gen_op_mov_v_reg(MO_64, cpu_T0, reg);
6998 tcg_gen_bswap64_i64(cpu_T0, cpu_T0);
6999 gen_op_mov_reg_v(MO_64, reg, cpu_T0);
7003 gen_op_mov_v_reg(MO_32, cpu_T0, reg);
7004 tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
7005 tcg_gen_bswap32_tl(cpu_T0, cpu_T0);
7006 gen_op_mov_reg_v(MO_32, reg, cpu_T0);
7009 case 0xd6: /* salc */
7012 gen_compute_eflags_c(s, cpu_T0);
7013 tcg_gen_neg_tl(cpu_T0, cpu_T0);
7014 gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
7016 case 0xe0: /* loopnz */
7017 case 0xe1: /* loopz */
7018 case 0xe2: /* loop */
7019 case 0xe3: /* jecxz */
7021 TCGLabel *l1, *l2, *l3;
7023 tval = (int8_t)insn_get(env, s, MO_8);
7024 next_eip = s->pc - s->cs_base;
7026 if (dflag == MO_16) {
7030 l1 = gen_new_label();
7031 l2 = gen_new_label();
7032 l3 = gen_new_label();
7035 case 0: /* loopnz */
7037 gen_op_add_reg_im(s->aflag, R_ECX, -1);
7038 gen_op_jz_ecx(s->aflag, l3);
7039 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7042 gen_op_add_reg_im(s->aflag, R_ECX, -1);
7043 gen_op_jnz_ecx(s->aflag, l1);
7047 gen_op_jz_ecx(s->aflag, l1);
7052 gen_jmp_im(next_eip);
7061 case 0x130: /* wrmsr */
7062 case 0x132: /* rdmsr */
7064 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7066 gen_update_cc_op(s);
7067 gen_jmp_im(pc_start - s->cs_base);
7069 gen_helper_rdmsr(cpu_env);
7071 gen_helper_wrmsr(cpu_env);
7075 case 0x131: /* rdtsc */
7076 gen_update_cc_op(s);
7077 gen_jmp_im(pc_start - s->cs_base);
7078 if (s->tb->cflags & CF_USE_ICOUNT) {
7081 gen_helper_rdtsc(cpu_env);
7082 if (s->tb->cflags & CF_USE_ICOUNT) {
7084 gen_jmp(s, s->pc - s->cs_base);
7087 case 0x133: /* rdpmc */
7088 gen_update_cc_op(s);
7089 gen_jmp_im(pc_start - s->cs_base);
7090 gen_helper_rdpmc(cpu_env);
7092 case 0x134: /* sysenter */
7093 /* For Intel SYSENTER is valid on 64-bit */
7094 if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7097 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7099 gen_helper_sysenter(cpu_env);
7103 case 0x135: /* sysexit */
7104 /* For Intel SYSEXIT is valid on 64-bit */
7105 if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7108 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7110 gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7114 #ifdef TARGET_X86_64
7115 case 0x105: /* syscall */
7116 /* XXX: is it usable in real mode ? */
7117 gen_update_cc_op(s);
7118 gen_jmp_im(pc_start - s->cs_base);
7119 gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7122 case 0x107: /* sysret */
7124 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7126 gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7127 /* condition codes are modified only in long mode */
7129 set_cc_op(s, CC_OP_EFLAGS);
7131 /* TF handling for the sysret insn is different. The TF bit is
7132 checked after the sysret insn completes. This allows #DB to be
7133 generated "as if" the syscall insn in userspace has just
7135 gen_eob_worker(s, false, true);
7139 case 0x1a2: /* cpuid */
7140 gen_update_cc_op(s);
7141 gen_jmp_im(pc_start - s->cs_base);
7142 gen_helper_cpuid(cpu_env);
7144 case 0xf4: /* hlt */
7146 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7148 gen_update_cc_op(s);
7149 gen_jmp_im(pc_start - s->cs_base);
7150 gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7151 s->is_jmp = DISAS_TB_JUMP;
7155 modrm = cpu_ldub_code(env, s->pc++);
7156 mod = (modrm >> 6) & 3;
7157 op = (modrm >> 3) & 7;
7160 if (!s->pe || s->vm86)
7162 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
7163 tcg_gen_ld32u_tl(cpu_T0, cpu_env,
7164 offsetof(CPUX86State, ldt.selector));
7165 ot = mod == 3 ? dflag : MO_16;
7166 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7169 if (!s->pe || s->vm86)
7172 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7174 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
7175 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7176 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
7177 gen_helper_lldt(cpu_env, cpu_tmp2_i32);
7181 if (!s->pe || s->vm86)
7183 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
7184 tcg_gen_ld32u_tl(cpu_T0, cpu_env,
7185 offsetof(CPUX86State, tr.selector));
7186 ot = mod == 3 ? dflag : MO_16;
7187 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7190 if (!s->pe || s->vm86)
7193 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7195 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
7196 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7197 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
7198 gen_helper_ltr(cpu_env, cpu_tmp2_i32);
7203 if (!s->pe || s->vm86)
7205 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7206 gen_update_cc_op(s);
7208 gen_helper_verr(cpu_env, cpu_T0);
7210 gen_helper_verw(cpu_env, cpu_T0);
7212 set_cc_op(s, CC_OP_EFLAGS);
7220 modrm = cpu_ldub_code(env, s->pc++);
7222 CASE_MODRM_MEM_OP(0): /* sgdt */
7223 gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
7224 gen_lea_modrm(env, s, modrm);
7225 tcg_gen_ld32u_tl(cpu_T0,
7226 cpu_env, offsetof(CPUX86State, gdt.limit));
7227 gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
7228 gen_add_A0_im(s, 2);
7229 tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
7230 if (dflag == MO_16) {
7231 tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7233 gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7236 case 0xc8: /* monitor */
7237 if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7240 gen_update_cc_op(s);
7241 gen_jmp_im(pc_start - s->cs_base);
7242 tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EAX]);
7243 gen_extu(s->aflag, cpu_A0);
7244 gen_add_A0_ds_seg(s);
7245 gen_helper_monitor(cpu_env, cpu_A0);
7248 case 0xc9: /* mwait */
7249 if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7252 gen_update_cc_op(s);
7253 gen_jmp_im(pc_start - s->cs_base);
7254 gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7258 case 0xca: /* clac */
7259 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7263 gen_helper_clac(cpu_env);
7264 gen_jmp_im(s->pc - s->cs_base);
7268 case 0xcb: /* stac */
7269 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7273 gen_helper_stac(cpu_env);
7274 gen_jmp_im(s->pc - s->cs_base);
7278 CASE_MODRM_MEM_OP(1): /* sidt */
7279 gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
7280 gen_lea_modrm(env, s, modrm);
7281 tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
7282 gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
7283 gen_add_A0_im(s, 2);
7284 tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
7285 if (dflag == MO_16) {
7286 tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7288 gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7291 case 0xd0: /* xgetbv */
7292 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7293 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7294 | PREFIX_REPZ | PREFIX_REPNZ))) {
7297 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7298 gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
7299 tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
7302 case 0xd1: /* xsetbv */
7303 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7304 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7305 | PREFIX_REPZ | PREFIX_REPNZ))) {
7309 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7312 tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7314 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7315 gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
7316 /* End TB because translation flags may change. */
7317 gen_jmp_im(s->pc - s->cs_base);
7321 case 0xd8: /* VMRUN */
7322 if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7326 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7329 gen_update_cc_op(s);
7330 gen_jmp_im(pc_start - s->cs_base);
7331 gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7332 tcg_const_i32(s->pc - pc_start));
7334 s->is_jmp = DISAS_TB_JUMP;
7337 case 0xd9: /* VMMCALL */
7338 if (!(s->flags & HF_SVME_MASK)) {
7341 gen_update_cc_op(s);
7342 gen_jmp_im(pc_start - s->cs_base);
7343 gen_helper_vmmcall(cpu_env);
7346 case 0xda: /* VMLOAD */
7347 if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7351 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7354 gen_update_cc_op(s);
7355 gen_jmp_im(pc_start - s->cs_base);
7356 gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7359 case 0xdb: /* VMSAVE */
7360 if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7364 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7367 gen_update_cc_op(s);
7368 gen_jmp_im(pc_start - s->cs_base);
7369 gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7372 case 0xdc: /* STGI */
7373 if ((!(s->flags & HF_SVME_MASK)
7374 && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7379 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7382 gen_update_cc_op(s);
7383 gen_jmp_im(pc_start - s->cs_base);
7384 gen_helper_stgi(cpu_env);
7387 case 0xdd: /* CLGI */
7388 if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7392 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7395 gen_update_cc_op(s);
7396 gen_jmp_im(pc_start - s->cs_base);
7397 gen_helper_clgi(cpu_env);
7400 case 0xde: /* SKINIT */
7401 if ((!(s->flags & HF_SVME_MASK)
7402 && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7406 gen_update_cc_op(s);
7407 gen_jmp_im(pc_start - s->cs_base);
7408 gen_helper_skinit(cpu_env);
7411 case 0xdf: /* INVLPGA */
7412 if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7416 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7419 gen_update_cc_op(s);
7420 gen_jmp_im(pc_start - s->cs_base);
7421 gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
7424 CASE_MODRM_MEM_OP(2): /* lgdt */
7426 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7429 gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
7430 gen_lea_modrm(env, s, modrm);
7431 gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
7432 gen_add_A0_im(s, 2);
7433 gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7434 if (dflag == MO_16) {
7435 tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7437 tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
7438 tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7441 CASE_MODRM_MEM_OP(3): /* lidt */
7443 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7446 gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
7447 gen_lea_modrm(env, s, modrm);
7448 gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
7449 gen_add_A0_im(s, 2);
7450 gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
7451 if (dflag == MO_16) {
7452 tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
7454 tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
7455 tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
7458 CASE_MODRM_OP(4): /* smsw */
7459 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
7460 tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]));
7462 mod = (modrm >> 6) & 3;
7463 ot = (mod != 3 ? MO_16 : s->dflag);
7467 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7469 case 0xee: /* rdpkru */
7470 if (prefixes & PREFIX_LOCK) {
7473 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7474 gen_helper_rdpkru(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
7475 tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
7477 case 0xef: /* wrpkru */
7478 if (prefixes & PREFIX_LOCK) {
7481 tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
7483 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
7484 gen_helper_wrpkru(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
7486 CASE_MODRM_OP(6): /* lmsw */
7488 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7491 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7492 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7493 gen_helper_lmsw(cpu_env, cpu_T0);
7494 gen_jmp_im(s->pc - s->cs_base);
7498 CASE_MODRM_MEM_OP(7): /* invlpg */
7500 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7503 gen_update_cc_op(s);
7504 gen_jmp_im(pc_start - s->cs_base);
7505 gen_lea_modrm(env, s, modrm);
7506 gen_helper_invlpg(cpu_env, cpu_A0);
7507 gen_jmp_im(s->pc - s->cs_base);
7511 case 0xf8: /* swapgs */
7512 #ifdef TARGET_X86_64
7515 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7517 tcg_gen_mov_tl(cpu_T0, cpu_seg_base[R_GS]);
7518 tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7519 offsetof(CPUX86State, kernelgsbase));
7520 tcg_gen_st_tl(cpu_T0, cpu_env,
7521 offsetof(CPUX86State, kernelgsbase));
7528 case 0xf9: /* rdtscp */
7529 if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7532 gen_update_cc_op(s);
7533 gen_jmp_im(pc_start - s->cs_base);
7534 if (s->tb->cflags & CF_USE_ICOUNT) {
7537 gen_helper_rdtscp(cpu_env);
7538 if (s->tb->cflags & CF_USE_ICOUNT) {
7540 gen_jmp(s, s->pc - s->cs_base);
7549 case 0x108: /* invd */
7550 case 0x109: /* wbinvd */
7552 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7554 gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7558 case 0x63: /* arpl or movslS (x86_64) */
7559 #ifdef TARGET_X86_64
7562 /* d_ot is the size of destination */
7565 modrm = cpu_ldub_code(env, s->pc++);
7566 reg = ((modrm >> 3) & 7) | rex_r;
7567 mod = (modrm >> 6) & 3;
7568 rm = (modrm & 7) | REX_B(s);
7571 gen_op_mov_v_reg(MO_32, cpu_T0, rm);
7573 if (d_ot == MO_64) {
7574 tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
7576 gen_op_mov_reg_v(d_ot, reg, cpu_T0);
7578 gen_lea_modrm(env, s, modrm);
7579 gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, cpu_A0);
7580 gen_op_mov_reg_v(d_ot, reg, cpu_T0);
7586 TCGv t0, t1, t2, a0;
7588 if (!s->pe || s->vm86)
7590 t0 = tcg_temp_local_new();
7591 t1 = tcg_temp_local_new();
7592 t2 = tcg_temp_local_new();
7594 modrm = cpu_ldub_code(env, s->pc++);
7595 reg = (modrm >> 3) & 7;
7596 mod = (modrm >> 6) & 3;
7599 gen_lea_modrm(env, s, modrm);
7600 gen_op_ld_v(s, ot, t0, cpu_A0);
7601 a0 = tcg_temp_local_new();
7602 tcg_gen_mov_tl(a0, cpu_A0);
7604 gen_op_mov_v_reg(ot, t0, rm);
7607 gen_op_mov_v_reg(ot, t1, reg);
7608 tcg_gen_andi_tl(cpu_tmp0, t0, 3);
7609 tcg_gen_andi_tl(t1, t1, 3);
7610 tcg_gen_movi_tl(t2, 0);
7611 label1 = gen_new_label();
7612 tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1);
7613 tcg_gen_andi_tl(t0, t0, ~3);
7614 tcg_gen_or_tl(t0, t0, t1);
7615 tcg_gen_movi_tl(t2, CC_Z);
7616 gen_set_label(label1);
7618 gen_op_st_v(s, ot, t0, a0);
7621 gen_op_mov_reg_v(ot, rm, t0);
7623 gen_compute_eflags(s);
7624 tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7625 tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7631 case 0x102: /* lar */
7632 case 0x103: /* lsl */
7636 if (!s->pe || s->vm86)
7638 ot = dflag != MO_16 ? MO_32 : MO_16;
7639 modrm = cpu_ldub_code(env, s->pc++);
7640 reg = ((modrm >> 3) & 7) | rex_r;
7641 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7642 t0 = tcg_temp_local_new();
7643 gen_update_cc_op(s);
7645 gen_helper_lar(t0, cpu_env, cpu_T0);
7647 gen_helper_lsl(t0, cpu_env, cpu_T0);
7649 tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
7650 label1 = gen_new_label();
7651 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
7652 gen_op_mov_reg_v(ot, reg, t0);
7653 gen_set_label(label1);
7654 set_cc_op(s, CC_OP_EFLAGS);
7659 modrm = cpu_ldub_code(env, s->pc++);
7660 mod = (modrm >> 6) & 3;
7661 op = (modrm >> 3) & 7;
7663 case 0: /* prefetchnta */
7664 case 1: /* prefetchnt0 */
7665 case 2: /* prefetchnt0 */
7666 case 3: /* prefetchnt0 */
7669 gen_nop_modrm(env, s, modrm);
7670 /* nothing more to do */
7672 default: /* nop (multi byte) */
7673 gen_nop_modrm(env, s, modrm);
7678 modrm = cpu_ldub_code(env, s->pc++);
7679 if (s->flags & HF_MPX_EN_MASK) {
7680 mod = (modrm >> 6) & 3;
7681 reg = ((modrm >> 3) & 7) | rex_r;
7682 if (prefixes & PREFIX_REPZ) {
7685 || (prefixes & PREFIX_LOCK)
7686 || s->aflag == MO_16) {
7689 gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7690 } else if (prefixes & PREFIX_REPNZ) {
7693 || (prefixes & PREFIX_LOCK)
7694 || s->aflag == MO_16) {
7697 TCGv_i64 notu = tcg_temp_new_i64();
7698 tcg_gen_not_i64(notu, cpu_bndu[reg]);
7699 gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7700 tcg_temp_free_i64(notu);
7701 } else if (prefixes & PREFIX_DATA) {
7702 /* bndmov -- from reg/mem */
7703 if (reg >= 4 || s->aflag == MO_16) {
7707 int reg2 = (modrm & 7) | REX_B(s);
7708 if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7711 if (s->flags & HF_MPX_IU_MASK) {
7712 tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7713 tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7716 gen_lea_modrm(env, s, modrm);
7718 tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
7719 s->mem_index, MO_LEQ);
7720 tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
7721 tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
7722 s->mem_index, MO_LEQ);
7724 tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
7725 s->mem_index, MO_LEUL);
7726 tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
7727 tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
7728 s->mem_index, MO_LEUL);
7730 /* bnd registers are now in-use */
7731 gen_set_hflag(s, HF_MPX_IU_MASK);
7733 } else if (mod != 3) {
7735 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7737 || (prefixes & PREFIX_LOCK)
7738 || s->aflag == MO_16
7743 tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
7745 tcg_gen_movi_tl(cpu_A0, 0);
7747 gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
7749 tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
7751 tcg_gen_movi_tl(cpu_T0, 0);
7754 gen_helper_bndldx64(cpu_bndl[reg], cpu_env, cpu_A0, cpu_T0);
7755 tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7756 offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7758 gen_helper_bndldx32(cpu_bndu[reg], cpu_env, cpu_A0, cpu_T0);
7759 tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7760 tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7762 gen_set_hflag(s, HF_MPX_IU_MASK);
7765 gen_nop_modrm(env, s, modrm);
7768 modrm = cpu_ldub_code(env, s->pc++);
7769 if (s->flags & HF_MPX_EN_MASK) {
7770 mod = (modrm >> 6) & 3;
7771 reg = ((modrm >> 3) & 7) | rex_r;
7772 if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7775 || (prefixes & PREFIX_LOCK)
7776 || s->aflag == MO_16) {
7779 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7781 tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7783 tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7785 } else if (a.base == -1) {
7786 /* no base register has lower bound of 0 */
7787 tcg_gen_movi_i64(cpu_bndl[reg], 0);
7789 /* rip-relative generates #ud */
7792 tcg_gen_not_tl(cpu_A0, gen_lea_modrm_1(a));
7794 tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
7796 tcg_gen_extu_tl_i64(cpu_bndu[reg], cpu_A0);
7797 /* bnd registers are now in-use */
7798 gen_set_hflag(s, HF_MPX_IU_MASK);
7800 } else if (prefixes & PREFIX_REPNZ) {
7803 || (prefixes & PREFIX_LOCK)
7804 || s->aflag == MO_16) {
7807 gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
7808 } else if (prefixes & PREFIX_DATA) {
7809 /* bndmov -- to reg/mem */
7810 if (reg >= 4 || s->aflag == MO_16) {
7814 int reg2 = (modrm & 7) | REX_B(s);
7815 if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7818 if (s->flags & HF_MPX_IU_MASK) {
7819 tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
7820 tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
7823 gen_lea_modrm(env, s, modrm);
7825 tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
7826 s->mem_index, MO_LEQ);
7827 tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
7828 tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
7829 s->mem_index, MO_LEQ);
7831 tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
7832 s->mem_index, MO_LEUL);
7833 tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
7834 tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
7835 s->mem_index, MO_LEUL);
7838 } else if (mod != 3) {
7840 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7842 || (prefixes & PREFIX_LOCK)
7843 || s->aflag == MO_16
7848 tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
7850 tcg_gen_movi_tl(cpu_A0, 0);
7852 gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
7854 tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
7856 tcg_gen_movi_tl(cpu_T0, 0);
7859 gen_helper_bndstx64(cpu_env, cpu_A0, cpu_T0,
7860 cpu_bndl[reg], cpu_bndu[reg]);
7862 gen_helper_bndstx32(cpu_env, cpu_A0, cpu_T0,
7863 cpu_bndl[reg], cpu_bndu[reg]);
7867 gen_nop_modrm(env, s, modrm);
7869 case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
7870 modrm = cpu_ldub_code(env, s->pc++);
7871 gen_nop_modrm(env, s, modrm);
7873 case 0x120: /* mov reg, crN */
7874 case 0x122: /* mov crN, reg */
7876 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7878 modrm = cpu_ldub_code(env, s->pc++);
7879 /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7880 * AMD documentation (24594.pdf) and testing of
7881 * intel 386 and 486 processors all show that the mod bits
7882 * are assumed to be 1's, regardless of actual values.
7884 rm = (modrm & 7) | REX_B(s);
7885 reg = ((modrm >> 3) & 7) | rex_r;
7890 if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
7891 (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
7900 gen_update_cc_op(s);
7901 gen_jmp_im(pc_start - s->cs_base);
7903 gen_op_mov_v_reg(ot, cpu_T0, rm);
7904 gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
7906 gen_jmp_im(s->pc - s->cs_base);
7909 gen_helper_read_crN(cpu_T0, cpu_env, tcg_const_i32(reg));
7910 gen_op_mov_reg_v(ot, rm, cpu_T0);
7918 case 0x121: /* mov reg, drN */
7919 case 0x123: /* mov drN, reg */
7921 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7923 modrm = cpu_ldub_code(env, s->pc++);
7924 /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7925 * AMD documentation (24594.pdf) and testing of
7926 * intel 386 and 486 processors all show that the mod bits
7927 * are assumed to be 1's, regardless of actual values.
7929 rm = (modrm & 7) | REX_B(s);
7930 reg = ((modrm >> 3) & 7) | rex_r;
7939 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
7940 gen_op_mov_v_reg(ot, cpu_T0, rm);
7941 tcg_gen_movi_i32(cpu_tmp2_i32, reg);
7942 gen_helper_set_dr(cpu_env, cpu_tmp2_i32, cpu_T0);
7943 gen_jmp_im(s->pc - s->cs_base);
7946 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
7947 tcg_gen_movi_i32(cpu_tmp2_i32, reg);
7948 gen_helper_get_dr(cpu_T0, cpu_env, cpu_tmp2_i32);
7949 gen_op_mov_reg_v(ot, rm, cpu_T0);
7953 case 0x106: /* clts */
7955 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7957 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7958 gen_helper_clts(cpu_env);
7959 /* abort block because static cpu state changed */
7960 gen_jmp_im(s->pc - s->cs_base);
7964 /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
7965 case 0x1c3: /* MOVNTI reg, mem */
7966 if (!(s->cpuid_features & CPUID_SSE2))
7968 ot = mo_64_32(dflag);
7969 modrm = cpu_ldub_code(env, s->pc++);
7970 mod = (modrm >> 6) & 3;
7973 reg = ((modrm >> 3) & 7) | rex_r;
7974 /* generate a generic store */
7975 gen_ldst_modrm(env, s, modrm, ot, reg, 1);
7978 modrm = cpu_ldub_code(env, s->pc++);
7980 CASE_MODRM_MEM_OP(0): /* fxsave */
7981 if (!(s->cpuid_features & CPUID_FXSR)
7982 || (prefixes & PREFIX_LOCK)) {
7985 if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
7986 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7989 gen_lea_modrm(env, s, modrm);
7990 gen_helper_fxsave(cpu_env, cpu_A0);
7993 CASE_MODRM_MEM_OP(1): /* fxrstor */
7994 if (!(s->cpuid_features & CPUID_FXSR)
7995 || (prefixes & PREFIX_LOCK)) {
7998 if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
7999 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8002 gen_lea_modrm(env, s, modrm);
8003 gen_helper_fxrstor(cpu_env, cpu_A0);
8006 CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8007 if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8010 if (s->flags & HF_TS_MASK) {
8011 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8014 gen_lea_modrm(env, s, modrm);
8015 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, s->mem_index, MO_LEUL);
8016 gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
8019 CASE_MODRM_MEM_OP(3): /* stmxcsr */
8020 if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8023 if (s->flags & HF_TS_MASK) {
8024 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8027 gen_lea_modrm(env, s, modrm);
8028 tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, mxcsr));
8029 gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
8032 CASE_MODRM_MEM_OP(4): /* xsave */
8033 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8034 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8035 | PREFIX_REPZ | PREFIX_REPNZ))) {
8038 gen_lea_modrm(env, s, modrm);
8039 tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
8041 gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
8044 CASE_MODRM_MEM_OP(5): /* xrstor */
8045 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8046 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8047 | PREFIX_REPZ | PREFIX_REPNZ))) {
8050 gen_lea_modrm(env, s, modrm);
8051 tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
8053 gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64);
8054 /* XRSTOR is how MPX is enabled, which changes how
8055 we translate. Thus we need to end the TB. */
8056 gen_update_cc_op(s);
8057 gen_jmp_im(s->pc - s->cs_base);
8061 CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8062 if (prefixes & PREFIX_LOCK) {
8065 if (prefixes & PREFIX_DATA) {
8067 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8070 gen_nop_modrm(env, s, modrm);
8073 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8074 || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8075 || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8078 gen_lea_modrm(env, s, modrm);
8079 tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
8081 gen_helper_xsaveopt(cpu_env, cpu_A0, cpu_tmp1_i64);
8085 CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8086 if (prefixes & PREFIX_LOCK) {
8089 if (prefixes & PREFIX_DATA) {
8091 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8096 if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8097 || !(s->cpuid_features & CPUID_CLFLUSH)) {
8101 gen_nop_modrm(env, s, modrm);
8104 case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8105 case 0xc8 ... 0xc8: /* rdgsbase (f3 0f ae /1) */
8106 case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8107 case 0xd8 ... 0xd8: /* wrgsbase (f3 0f ae /3) */
8109 && (prefixes & PREFIX_REPZ)
8110 && !(prefixes & PREFIX_LOCK)
8111 && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8112 TCGv base, treg, src, dst;
8114 /* Preserve hflags bits by testing CR4 at runtime. */
8115 tcg_gen_movi_i32(cpu_tmp2_i32, CR4_FSGSBASE_MASK);
8116 gen_helper_cr4_testbit(cpu_env, cpu_tmp2_i32);
8118 base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8119 treg = cpu_regs[(modrm & 7) | REX_B(s)];
8123 dst = base, src = treg;
8126 dst = treg, src = base;
8129 if (s->dflag == MO_32) {
8130 tcg_gen_ext32u_tl(dst, src);
8132 tcg_gen_mov_tl(dst, src);
8138 case 0xf8: /* sfence / pcommit */
8139 if (prefixes & PREFIX_DATA) {
8141 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8142 || (prefixes & PREFIX_LOCK)) {
8148 case 0xf9 ... 0xff: /* sfence */
8149 if (!(s->cpuid_features & CPUID_SSE)
8150 || (prefixes & PREFIX_LOCK)) {
8153 tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8155 case 0xe8 ... 0xef: /* lfence */
8156 if (!(s->cpuid_features & CPUID_SSE)
8157 || (prefixes & PREFIX_LOCK)) {
8160 tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8162 case 0xf0 ... 0xf7: /* mfence */
8163 if (!(s->cpuid_features & CPUID_SSE2)
8164 || (prefixes & PREFIX_LOCK)) {
8167 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8175 case 0x10d: /* 3DNow! prefetch(w) */
8176 modrm = cpu_ldub_code(env, s->pc++);
8177 mod = (modrm >> 6) & 3;
8180 gen_nop_modrm(env, s, modrm);
8182 case 0x1aa: /* rsm */
8183 gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
8184 if (!(s->flags & HF_SMM_MASK))
8186 gen_update_cc_op(s);
8187 gen_jmp_im(s->pc - s->cs_base);
8188 gen_helper_rsm(cpu_env);
8191 case 0x1b8: /* SSE4.2 popcnt */
8192 if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8195 if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8198 modrm = cpu_ldub_code(env, s->pc++);
8199 reg = ((modrm >> 3) & 7) | rex_r;
8201 if (s->prefix & PREFIX_DATA) {
8204 ot = mo_64_32(dflag);
8207 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8208 gen_helper_popcnt(cpu_T0, cpu_env, cpu_T0, tcg_const_i32(ot));
8209 gen_op_mov_reg_v(ot, reg, cpu_T0);
8211 set_cc_op(s, CC_OP_EFLAGS);
8213 case 0x10e ... 0x10f:
8214 /* 3DNow! instructions, ignore prefixes */
8215 s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8216 case 0x110 ... 0x117:
8217 case 0x128 ... 0x12f:
8218 case 0x138 ... 0x13a:
8219 case 0x150 ... 0x179:
8220 case 0x17c ... 0x17f:
8222 case 0x1c4 ... 0x1c6:
8223 case 0x1d0 ... 0x1fe:
8224 gen_sse(env, s, b, pc_start, rex_r);
8231 gen_illegal_opcode(s);
8234 gen_unknown_opcode(env, s);
8238 void tcg_x86_init(void)
8240 static const char reg_names[CPU_NB_REGS][4] = {
8241 #ifdef TARGET_X86_64
8269 static const char seg_base_names[6][8] = {
8277 static const char bnd_regl_names[4][8] = {
8278 "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8280 static const char bnd_regu_names[4][8] = {
8281 "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8284 static bool initialized;
8291 cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
8292 tcg_ctx.tcg_env = cpu_env;
8293 cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8294 offsetof(CPUX86State, cc_op), "cc_op");
8295 cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8297 cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8299 cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8302 for (i = 0; i < CPU_NB_REGS; ++i) {
8303 cpu_regs[i] = tcg_global_mem_new(cpu_env,
8304 offsetof(CPUX86State, regs[i]),
8308 for (i = 0; i < 6; ++i) {
8310 = tcg_global_mem_new(cpu_env,
8311 offsetof(CPUX86State, segs[i].base),
8315 for (i = 0; i < 4; ++i) {
8317 = tcg_global_mem_new_i64(cpu_env,
8318 offsetof(CPUX86State, bnd_regs[i].lb),
8321 = tcg_global_mem_new_i64(cpu_env,
8322 offsetof(CPUX86State, bnd_regs[i].ub),
8327 /* generate intermediate code for basic block 'tb'. */
8328 void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
8330 X86CPU *cpu = x86_env_get_cpu(env);
8331 CPUState *cs = CPU(cpu);
8332 DisasContext dc1, *dc = &dc1;
8333 target_ulong pc_ptr;
8335 target_ulong pc_start;
8336 target_ulong cs_base;
8340 /* generate intermediate code */
8342 cs_base = tb->cs_base;
8345 dc->pe = (flags >> HF_PE_SHIFT) & 1;
8346 dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
8347 dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
8348 dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
8350 dc->vm86 = (flags >> VM_SHIFT) & 1;
8351 dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
8352 dc->iopl = (flags >> IOPL_SHIFT) & 3;
8353 dc->tf = (flags >> TF_SHIFT) & 1;
8354 dc->singlestep_enabled = cs->singlestep_enabled;
8355 dc->cc_op = CC_OP_DYNAMIC;
8356 dc->cc_op_dirty = false;
8357 dc->cs_base = cs_base;
8359 dc->popl_esp_hack = 0;
8360 /* select memory access functions */
8362 #ifdef CONFIG_SOFTMMU
8363 dc->mem_index = cpu_mmu_index(env, false);
8365 dc->cpuid_features = env->features[FEAT_1_EDX];
8366 dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8367 dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8368 dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8369 dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8370 dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8371 #ifdef TARGET_X86_64
8372 dc->lma = (flags >> HF_LMA_SHIFT) & 1;
8373 dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
8376 dc->jmp_opt = !(dc->tf || cs->singlestep_enabled ||
8377 (flags & HF_INHIBIT_IRQ_MASK));
8378 /* Do not optimize repz jumps at all in icount mode, because
8379 rep movsS instructions are execured with different paths
8380 in !repz_opt and repz_opt modes. The first one was used
8381 always except single step mode. And this setting
8382 disables jumps optimization and control paths become
8383 equivalent in run and single step modes.
8384 Now there will be no jump optimization for repz in
8385 record/replay modes and there will always be an
8386 additional step for ecx=0 when icount is enabled.
8388 dc->repz_opt = !dc->jmp_opt && !(tb->cflags & CF_USE_ICOUNT);
8390 /* check addseg logic */
8391 if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
8392 printf("ERROR addseg\n");
8395 cpu_T0 = tcg_temp_new();
8396 cpu_T1 = tcg_temp_new();
8397 cpu_A0 = tcg_temp_new();
8399 cpu_tmp0 = tcg_temp_new();
8400 cpu_tmp1_i64 = tcg_temp_new_i64();
8401 cpu_tmp2_i32 = tcg_temp_new_i32();
8402 cpu_tmp3_i32 = tcg_temp_new_i32();
8403 cpu_tmp4 = tcg_temp_new();
8404 cpu_ptr0 = tcg_temp_new_ptr();
8405 cpu_ptr1 = tcg_temp_new_ptr();
8406 cpu_cc_srcT = tcg_temp_local_new();
8408 dc->is_jmp = DISAS_NEXT;
8411 max_insns = tb->cflags & CF_COUNT_MASK;
8412 if (max_insns == 0) {
8413 max_insns = CF_COUNT_MASK;
8415 if (max_insns > TCG_MAX_INSNS) {
8416 max_insns = TCG_MAX_INSNS;
8421 tcg_gen_insn_start(pc_ptr, dc->cc_op);
8424 /* If RF is set, suppress an internally generated breakpoint. */
8425 if (unlikely(cpu_breakpoint_test(cs, pc_ptr,
8426 tb->flags & HF_RF_MASK
8427 ? BP_GDB : BP_ANY))) {
8428 gen_debug(dc, pc_ptr - dc->cs_base);
8429 /* The address covered by the breakpoint must be included in
8430 [tb->pc, tb->pc + tb->size) in order to for it to be
8431 properly cleared -- thus we increment the PC here so that
8432 the logic setting tb->size below does the right thing. */
8434 goto done_generating;
8436 if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
8440 pc_ptr = disas_insn(env, dc, pc_ptr);
8441 /* stop translation if indicated */
8444 /* if single step mode, we generate only one instruction and
8445 generate an exception */
8446 /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8447 the flag and abort the translation to give the irqs a
8448 change to be happen */
8449 if (dc->tf || dc->singlestep_enabled ||
8450 (flags & HF_INHIBIT_IRQ_MASK)) {
8451 gen_jmp_im(pc_ptr - dc->cs_base);
8455 /* Do not cross the boundary of the pages in icount mode,
8456 it can cause an exception. Do it only when boundary is
8457 crossed by the first instruction in the block.
8458 If current instruction already crossed the bound - it's ok,
8459 because an exception hasn't stopped this code.
8461 if ((tb->cflags & CF_USE_ICOUNT)
8462 && ((pc_ptr & TARGET_PAGE_MASK)
8463 != ((pc_ptr + TARGET_MAX_INSN_SIZE - 1) & TARGET_PAGE_MASK)
8464 || (pc_ptr & ~TARGET_PAGE_MASK) == 0)) {
8465 gen_jmp_im(pc_ptr - dc->cs_base);
8469 /* if too long translation, stop generation too */
8470 if (tcg_op_buf_full() ||
8471 (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) ||
8472 num_insns >= max_insns) {
8473 gen_jmp_im(pc_ptr - dc->cs_base);
8478 gen_jmp_im(pc_ptr - dc->cs_base);
8483 if (tb->cflags & CF_LAST_IO)
8486 gen_tb_end(tb, num_insns);
8489 if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
8490 && qemu_log_in_addr_range(pc_start)) {
8493 qemu_log("----------------\n");
8494 qemu_log("IN: %s\n", lookup_symbol(pc_start));
8495 #ifdef TARGET_X86_64
8500 disas_flags = !dc->code32;
8501 log_target_disas(cs, pc_start, pc_ptr - pc_start, disas_flags);
8507 tb->size = pc_ptr - pc_start;
8508 tb->icount = num_insns;
8511 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8514 int cc_op = data[1];
8515 env->eip = data[0] - tb->cs_base;
8516 if (cc_op != CC_OP_DYNAMIC) {