1 /******************************************************************************
4 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6 * Copyright (c) 2005 Keir Fraser
8 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9 * privileged instructions:
11 * Copyright (C) 2006 Qumranet
12 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
17 * This work is licensed under the terms of the GNU GPL, version 2. See
18 * the COPYING file in the top-level directory.
20 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
23 #include <linux/kvm_host.h>
24 #include "kvm_cache_regs.h"
25 #include <asm/kvm_emulate.h>
26 #include <linux/stringify.h>
27 #include <asm/debugreg.h>
36 #define OpImplicit 1ull /* No generic decode */
37 #define OpReg 2ull /* Register */
38 #define OpMem 3ull /* Memory */
39 #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
40 #define OpDI 5ull /* ES:DI/EDI/RDI */
41 #define OpMem64 6ull /* Memory, 64-bit */
42 #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
43 #define OpDX 8ull /* DX register */
44 #define OpCL 9ull /* CL register (for shifts) */
45 #define OpImmByte 10ull /* 8-bit sign extended immediate */
46 #define OpOne 11ull /* Implied 1 */
47 #define OpImm 12ull /* Sign extended up to 32-bit immediate */
48 #define OpMem16 13ull /* Memory operand (16-bit). */
49 #define OpMem32 14ull /* Memory operand (32-bit). */
50 #define OpImmU 15ull /* Immediate operand, zero extended */
51 #define OpSI 16ull /* SI/ESI/RSI */
52 #define OpImmFAddr 17ull /* Immediate far address */
53 #define OpMemFAddr 18ull /* Far address in memory */
54 #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
55 #define OpES 20ull /* ES */
56 #define OpCS 21ull /* CS */
57 #define OpSS 22ull /* SS */
58 #define OpDS 23ull /* DS */
59 #define OpFS 24ull /* FS */
60 #define OpGS 25ull /* GS */
61 #define OpMem8 26ull /* 8-bit zero extended memory operand */
62 #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
63 #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
64 #define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
65 #define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
67 #define OpBits 5 /* Width of operand field */
68 #define OpMask ((1ull << OpBits) - 1)
71 * Opcode effective-address decode tables.
72 * Note that we only emulate instructions that have at least one memory
73 * operand (excluding implicit stack references). We assume that stack
74 * references and instruction fetches will never occur in special memory
75 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
79 /* Operand sizes: 8-bit operands or specified/overridden size. */
80 #define ByteOp (1<<0) /* 8-bit operands. */
81 /* Destination operand type. */
83 #define ImplicitOps (OpImplicit << DstShift)
84 #define DstReg (OpReg << DstShift)
85 #define DstMem (OpMem << DstShift)
86 #define DstAcc (OpAcc << DstShift)
87 #define DstDI (OpDI << DstShift)
88 #define DstMem64 (OpMem64 << DstShift)
89 #define DstMem16 (OpMem16 << DstShift)
90 #define DstImmUByte (OpImmUByte << DstShift)
91 #define DstDX (OpDX << DstShift)
92 #define DstAccLo (OpAccLo << DstShift)
93 #define DstMask (OpMask << DstShift)
94 /* Source operand type. */
96 #define SrcNone (OpNone << SrcShift)
97 #define SrcReg (OpReg << SrcShift)
98 #define SrcMem (OpMem << SrcShift)
99 #define SrcMem16 (OpMem16 << SrcShift)
100 #define SrcMem32 (OpMem32 << SrcShift)
101 #define SrcImm (OpImm << SrcShift)
102 #define SrcImmByte (OpImmByte << SrcShift)
103 #define SrcOne (OpOne << SrcShift)
104 #define SrcImmUByte (OpImmUByte << SrcShift)
105 #define SrcImmU (OpImmU << SrcShift)
106 #define SrcSI (OpSI << SrcShift)
107 #define SrcXLat (OpXLat << SrcShift)
108 #define SrcImmFAddr (OpImmFAddr << SrcShift)
109 #define SrcMemFAddr (OpMemFAddr << SrcShift)
110 #define SrcAcc (OpAcc << SrcShift)
111 #define SrcImmU16 (OpImmU16 << SrcShift)
112 #define SrcImm64 (OpImm64 << SrcShift)
113 #define SrcDX (OpDX << SrcShift)
114 #define SrcMem8 (OpMem8 << SrcShift)
115 #define SrcAccHi (OpAccHi << SrcShift)
116 #define SrcMask (OpMask << SrcShift)
117 #define BitOp (1<<11)
118 #define MemAbs (1<<12) /* Memory operand is absolute displacement */
119 #define String (1<<13) /* String instruction (rep capable) */
120 #define Stack (1<<14) /* Stack instruction (push/pop) */
121 #define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */
122 #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
123 #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
124 #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
125 #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
126 #define Escape (5<<15) /* Escape to coprocessor instruction */
127 #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
128 #define ModeDual (7<<15) /* Different instruction for 32/64 bit */
129 #define Sse (1<<18) /* SSE Vector instruction */
130 /* Generic ModRM decode. */
131 #define ModRM (1<<19)
132 /* Destination is only written; never read. */
135 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
136 #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
137 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
138 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
139 #define Undefined (1<<25) /* No Such Instruction */
140 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
141 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
143 #define PageTable (1 << 29) /* instruction used to write page table */
144 #define NotImpl (1 << 30) /* instruction is not implemented */
145 /* Source 2 operand type */
146 #define Src2Shift (31)
147 #define Src2None (OpNone << Src2Shift)
148 #define Src2Mem (OpMem << Src2Shift)
149 #define Src2CL (OpCL << Src2Shift)
150 #define Src2ImmByte (OpImmByte << Src2Shift)
151 #define Src2One (OpOne << Src2Shift)
152 #define Src2Imm (OpImm << Src2Shift)
153 #define Src2ES (OpES << Src2Shift)
154 #define Src2CS (OpCS << Src2Shift)
155 #define Src2SS (OpSS << Src2Shift)
156 #define Src2DS (OpDS << Src2Shift)
157 #define Src2FS (OpFS << Src2Shift)
158 #define Src2GS (OpGS << Src2Shift)
159 #define Src2Mask (OpMask << Src2Shift)
160 #define Mmx ((u64)1 << 40) /* MMX Vector instruction */
161 #define AlignMask ((u64)7 << 41)
162 #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
163 #define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
164 #define Avx ((u64)3 << 41) /* Advanced Vector Extensions */
165 #define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
166 #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
167 #define NoWrite ((u64)1 << 45) /* No writeback */
168 #define SrcWrite ((u64)1 << 46) /* Write back src operand */
169 #define NoMod ((u64)1 << 47) /* Mod field is ignored */
170 #define Intercept ((u64)1 << 48) /* Has valid intercept field */
171 #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
172 #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
173 #define NearBranch ((u64)1 << 52) /* Near branches */
174 #define No16 ((u64)1 << 53) /* No 16 bit operand */
175 #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
177 #define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
179 #define X2(x...) x, x
180 #define X3(x...) X2(x), x
181 #define X4(x...) X2(x), X2(x)
182 #define X5(x...) X4(x), x
183 #define X6(x...) X4(x), X2(x)
184 #define X7(x...) X4(x), X3(x)
185 #define X8(x...) X4(x), X4(x)
186 #define X16(x...) X8(x), X8(x)
188 #define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
189 #define FASTOP_SIZE 8
192 * fastop functions have a special calling convention:
197 * flags: rflags (in/out)
198 * ex: rsi (in:fastop pointer, out:zero if exception)
200 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
201 * different operand sizes can be reached by calculation, rather than a jump
202 * table (which would be bigger than the code).
204 * fastop functions are declared as taking a never-defined fastop parameter,
205 * so they can't be called from C directly.
214 int (*execute)(struct x86_emulate_ctxt *ctxt);
215 const struct opcode *group;
216 const struct group_dual *gdual;
217 const struct gprefix *gprefix;
218 const struct escape *esc;
219 const struct instr_dual *idual;
220 const struct mode_dual *mdual;
221 void (*fastop)(struct fastop *fake);
223 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
227 struct opcode mod012[8];
228 struct opcode mod3[8];
232 struct opcode pfx_no;
233 struct opcode pfx_66;
234 struct opcode pfx_f2;
235 struct opcode pfx_f3;
240 struct opcode high[64];
244 struct opcode mod012;
249 struct opcode mode32;
250 struct opcode mode64;
253 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
255 enum x86_transfer_type {
257 X86_TRANSFER_CALL_JMP,
259 X86_TRANSFER_TASK_SWITCH,
262 static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
264 if (!(ctxt->regs_valid & (1 << nr))) {
265 ctxt->regs_valid |= 1 << nr;
266 ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
268 return ctxt->_regs[nr];
271 static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
273 ctxt->regs_valid |= 1 << nr;
274 ctxt->regs_dirty |= 1 << nr;
275 return &ctxt->_regs[nr];
278 static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
281 return reg_write(ctxt, nr);
284 static void writeback_registers(struct x86_emulate_ctxt *ctxt)
288 for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
289 ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
292 static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
294 ctxt->regs_dirty = 0;
295 ctxt->regs_valid = 0;
299 * These EFLAGS bits are restored from saved value during emulation, and
300 * any changes are written back to the saved value after emulation.
302 #define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
303 X86_EFLAGS_PF|X86_EFLAGS_CF)
311 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
313 #define FOP_FUNC(name) \
314 ".align " __stringify(FASTOP_SIZE) " \n\t" \
315 ".type " name ", @function \n\t" \
318 #define FOP_RET "ret \n\t"
320 #define FOP_START(op) \
321 extern void em_##op(struct fastop *fake); \
322 asm(".pushsection .text, \"ax\" \n\t" \
323 ".global em_" #op " \n\t" \
330 FOP_FUNC(__stringify(__UNIQUE_ID(nop))) \
333 #define FOP1E(op, dst) \
334 FOP_FUNC(#op "_" #dst) \
335 "10: " #op " %" #dst " \n\t" FOP_RET
337 #define FOP1EEX(op, dst) \
338 FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
340 #define FASTOP1(op) \
345 ON64(FOP1E(op##q, rax)) \
348 /* 1-operand, using src2 (for MUL/DIV r/m) */
349 #define FASTOP1SRC2(op, name) \
354 ON64(FOP1E(op, rcx)) \
357 /* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
358 #define FASTOP1SRC2EX(op, name) \
363 ON64(FOP1EEX(op, rcx)) \
366 #define FOP2E(op, dst, src) \
367 FOP_FUNC(#op "_" #dst "_" #src) \
368 #op " %" #src ", %" #dst " \n\t" FOP_RET
370 #define FASTOP2(op) \
372 FOP2E(op##b, al, dl) \
373 FOP2E(op##w, ax, dx) \
374 FOP2E(op##l, eax, edx) \
375 ON64(FOP2E(op##q, rax, rdx)) \
378 /* 2 operand, word only */
379 #define FASTOP2W(op) \
382 FOP2E(op##w, ax, dx) \
383 FOP2E(op##l, eax, edx) \
384 ON64(FOP2E(op##q, rax, rdx)) \
387 /* 2 operand, src is CL */
388 #define FASTOP2CL(op) \
390 FOP2E(op##b, al, cl) \
391 FOP2E(op##w, ax, cl) \
392 FOP2E(op##l, eax, cl) \
393 ON64(FOP2E(op##q, rax, cl)) \
396 /* 2 operand, src and dest are reversed */
397 #define FASTOP2R(op, name) \
399 FOP2E(op##b, dl, al) \
400 FOP2E(op##w, dx, ax) \
401 FOP2E(op##l, edx, eax) \
402 ON64(FOP2E(op##q, rdx, rax)) \
405 #define FOP3E(op, dst, src, src2) \
406 FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
407 #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
409 /* 3-operand, word-only, src2=cl */
410 #define FASTOP3WCL(op) \
413 FOP3E(op##w, ax, dx, cl) \
414 FOP3E(op##l, eax, edx, cl) \
415 ON64(FOP3E(op##q, rax, rdx, cl)) \
418 /* Special case for SETcc - 1 instruction per cc */
419 #define FOP_SETCC(op) \
421 ".type " #op ", @function \n\t" \
426 asm(".global kvm_fastop_exception \n"
427 "kvm_fastop_exception: xor %esi, %esi; ret");
448 FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
452 * XXX: inoutclob user must know where the argument is being expanded.
453 * Relying on CC_HAVE_ASM_GOTO would allow us to remove _fault.
455 #define asm_safe(insn, inoutclob...) \
459 asm volatile("1:" insn "\n" \
461 ".pushsection .fixup, \"ax\"\n" \
462 "3: movl $1, %[_fault]\n" \
465 _ASM_EXTABLE(1b, 3b) \
466 : [_fault] "+qm"(_fault) inoutclob ); \
468 _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
471 static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
472 enum x86_intercept intercept,
473 enum x86_intercept_stage stage)
475 struct x86_instruction_info info = {
476 .intercept = intercept,
477 .rep_prefix = ctxt->rep_prefix,
478 .modrm_mod = ctxt->modrm_mod,
479 .modrm_reg = ctxt->modrm_reg,
480 .modrm_rm = ctxt->modrm_rm,
481 .src_val = ctxt->src.val64,
482 .dst_val = ctxt->dst.val64,
483 .src_bytes = ctxt->src.bytes,
484 .dst_bytes = ctxt->dst.bytes,
485 .ad_bytes = ctxt->ad_bytes,
486 .next_rip = ctxt->eip,
489 return ctxt->ops->intercept(ctxt, &info, stage);
492 static void assign_masked(ulong *dest, ulong src, ulong mask)
494 *dest = (*dest & ~mask) | (src & mask);
497 static void assign_register(unsigned long *reg, u64 val, int bytes)
499 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
502 *(u8 *)reg = (u8)val;
505 *(u16 *)reg = (u16)val;
509 break; /* 64b: zero-extend */
516 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
518 return (1UL << (ctxt->ad_bytes << 3)) - 1;
521 static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
524 struct desc_struct ss;
526 if (ctxt->mode == X86EMUL_MODE_PROT64)
528 ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
529 return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
532 static int stack_size(struct x86_emulate_ctxt *ctxt)
534 return (__fls(stack_mask(ctxt)) + 1) >> 3;
537 /* Access/update address held in a register, based on addressing mode. */
538 static inline unsigned long
539 address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
541 if (ctxt->ad_bytes == sizeof(unsigned long))
544 return reg & ad_mask(ctxt);
547 static inline unsigned long
548 register_address(struct x86_emulate_ctxt *ctxt, int reg)
550 return address_mask(ctxt, reg_read(ctxt, reg));
553 static void masked_increment(ulong *reg, ulong mask, int inc)
555 assign_masked(reg, *reg + inc, mask);
559 register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
561 ulong *preg = reg_rmw(ctxt, reg);
563 assign_register(preg, *preg + inc, ctxt->ad_bytes);
566 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
568 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
571 static u32 desc_limit_scaled(struct desc_struct *desc)
573 u32 limit = get_desc_limit(desc);
575 return desc->g ? (limit << 12) | 0xfff : limit;
578 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
580 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
583 return ctxt->ops->get_cached_segment_base(ctxt, seg);
586 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
587 u32 error, bool valid)
590 ctxt->exception.vector = vec;
591 ctxt->exception.error_code = error;
592 ctxt->exception.error_code_valid = valid;
593 return X86EMUL_PROPAGATE_FAULT;
596 static int emulate_db(struct x86_emulate_ctxt *ctxt)
598 return emulate_exception(ctxt, DB_VECTOR, 0, false);
601 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
603 return emulate_exception(ctxt, GP_VECTOR, err, true);
606 static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
608 return emulate_exception(ctxt, SS_VECTOR, err, true);
611 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
613 return emulate_exception(ctxt, UD_VECTOR, 0, false);
616 static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
618 return emulate_exception(ctxt, TS_VECTOR, err, true);
621 static int emulate_de(struct x86_emulate_ctxt *ctxt)
623 return emulate_exception(ctxt, DE_VECTOR, 0, false);
626 static int emulate_nm(struct x86_emulate_ctxt *ctxt)
628 return emulate_exception(ctxt, NM_VECTOR, 0, false);
631 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
634 struct desc_struct desc;
636 ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
640 static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
645 struct desc_struct desc;
647 ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
648 ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
652 * x86 defines three classes of vector instructions: explicitly
653 * aligned, explicitly unaligned, and the rest, which change behaviour
654 * depending on whether they're AVX encoded or not.
656 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
657 * subject to the same check. FXSAVE and FXRSTOR are checked here too as their
658 * 512 bytes of data must be aligned to a 16 byte boundary.
660 static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
662 u64 alignment = ctxt->d & AlignMask;
664 if (likely(size < 16))
679 static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
680 struct segmented_address addr,
681 unsigned *max_size, unsigned size,
682 bool write, bool fetch,
683 enum x86emul_mode mode, ulong *linear)
685 struct desc_struct desc;
691 la = seg_base(ctxt, addr.seg) + addr.ea;
694 case X86EMUL_MODE_PROT64:
696 if (is_noncanonical_address(la))
699 *max_size = min_t(u64, ~0u, (1ull << 48) - la);
700 if (size > *max_size)
704 *linear = la = (u32)la;
705 usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
709 /* code segment in protected mode or read-only data segment */
710 if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
711 || !(desc.type & 2)) && write)
713 /* unreadable code segment */
714 if (!fetch && (desc.type & 8) && !(desc.type & 2))
716 lim = desc_limit_scaled(&desc);
717 if (!(desc.type & 8) && (desc.type & 4)) {
718 /* expand-down segment */
721 lim = desc.d ? 0xffffffff : 0xffff;
725 if (lim == 0xffffffff)
728 *max_size = (u64)lim + 1 - addr.ea;
729 if (size > *max_size)
734 if (la & (insn_alignment(ctxt, size) - 1))
735 return emulate_gp(ctxt, 0);
736 return X86EMUL_CONTINUE;
738 if (addr.seg == VCPU_SREG_SS)
739 return emulate_ss(ctxt, 0);
741 return emulate_gp(ctxt, 0);
744 static int linearize(struct x86_emulate_ctxt *ctxt,
745 struct segmented_address addr,
746 unsigned size, bool write,
750 return __linearize(ctxt, addr, &max_size, size, write, false,
754 static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
755 enum x86emul_mode mode)
760 struct segmented_address addr = { .seg = VCPU_SREG_CS,
763 if (ctxt->op_bytes != sizeof(unsigned long))
764 addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
765 rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
766 if (rc == X86EMUL_CONTINUE)
767 ctxt->_eip = addr.ea;
771 static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
773 return assign_eip(ctxt, dst, ctxt->mode);
776 static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
777 const struct desc_struct *cs_desc)
779 enum x86emul_mode mode = ctxt->mode;
783 if (ctxt->mode >= X86EMUL_MODE_PROT16) {
787 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
789 mode = X86EMUL_MODE_PROT64;
791 mode = X86EMUL_MODE_PROT32; /* temporary value */
794 if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
795 mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
796 rc = assign_eip(ctxt, dst, mode);
797 if (rc == X86EMUL_CONTINUE)
802 static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
804 return assign_eip_near(ctxt, ctxt->_eip + rel);
807 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
808 struct segmented_address addr,
815 rc = linearize(ctxt, addr, size, false, &linear);
816 if (rc != X86EMUL_CONTINUE)
818 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
821 static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
822 struct segmented_address addr,
829 rc = linearize(ctxt, addr, size, true, &linear);
830 if (rc != X86EMUL_CONTINUE)
832 return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception);
836 * Prefetch the remaining bytes of the instruction without crossing page
837 * boundary if they are not in fetch_cache yet.
839 static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
842 unsigned size, max_size;
843 unsigned long linear;
844 int cur_size = ctxt->fetch.end - ctxt->fetch.data;
845 struct segmented_address addr = { .seg = VCPU_SREG_CS,
846 .ea = ctxt->eip + cur_size };
849 * We do not know exactly how many bytes will be needed, and
850 * __linearize is expensive, so fetch as much as possible. We
851 * just have to avoid going beyond the 15 byte limit, the end
852 * of the segment, or the end of the page.
854 * __linearize is called with size 0 so that it does not do any
855 * boundary check itself. Instead, we use max_size to check
858 rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
860 if (unlikely(rc != X86EMUL_CONTINUE))
863 size = min_t(unsigned, 15UL ^ cur_size, max_size);
864 size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
867 * One instruction can only straddle two pages,
868 * and one has been loaded at the beginning of
869 * x86_decode_insn. So, if not enough bytes
870 * still, we must have hit the 15-byte boundary.
872 if (unlikely(size < op_size))
873 return emulate_gp(ctxt, 0);
875 rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
876 size, &ctxt->exception);
877 if (unlikely(rc != X86EMUL_CONTINUE))
879 ctxt->fetch.end += size;
880 return X86EMUL_CONTINUE;
883 static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
886 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
888 if (unlikely(done_size < size))
889 return __do_insn_fetch_bytes(ctxt, size - done_size);
891 return X86EMUL_CONTINUE;
894 /* Fetch next part of the instruction being emulated. */
895 #define insn_fetch(_type, _ctxt) \
898 rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
899 if (rc != X86EMUL_CONTINUE) \
901 ctxt->_eip += sizeof(_type); \
902 _x = *(_type __aligned(1) *) ctxt->fetch.ptr; \
903 ctxt->fetch.ptr += sizeof(_type); \
907 #define insn_fetch_arr(_arr, _size, _ctxt) \
909 rc = do_insn_fetch_bytes(_ctxt, _size); \
910 if (rc != X86EMUL_CONTINUE) \
912 ctxt->_eip += (_size); \
913 memcpy(_arr, ctxt->fetch.ptr, _size); \
914 ctxt->fetch.ptr += (_size); \
918 * Given the 'reg' portion of a ModRM byte, and a register block, return a
919 * pointer into the block that addresses the relevant register.
920 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
922 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
926 int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
928 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
929 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
931 p = reg_rmw(ctxt, modrm_reg);
935 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
936 struct segmented_address addr,
937 u16 *size, unsigned long *address, int op_bytes)
944 rc = segmented_read_std(ctxt, addr, size, 2);
945 if (rc != X86EMUL_CONTINUE)
948 rc = segmented_read_std(ctxt, addr, address, op_bytes);
962 FASTOP1SRC2(mul, mul_ex);
963 FASTOP1SRC2(imul, imul_ex);
964 FASTOP1SRC2EX(div, div_ex);
965 FASTOP1SRC2EX(idiv, idiv_ex);
994 FASTOP2R(cmp, cmp_r);
996 static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
998 /* If src is zero, do not writeback, but update flags */
999 if (ctxt->src.val == 0)
1000 ctxt->dst.type = OP_NONE;
1001 return fastop(ctxt, em_bsf);
1004 static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
1006 /* If src is zero, do not writeback, but update flags */
1007 if (ctxt->src.val == 0)
1008 ctxt->dst.type = OP_NONE;
1009 return fastop(ctxt, em_bsr);
1012 static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
1015 void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
1017 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1018 asm("push %[flags]; popf; call *%[fastop]"
1019 : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
1023 static void fetch_register_operand(struct operand *op)
1025 switch (op->bytes) {
1027 op->val = *(u8 *)op->addr.reg;
1030 op->val = *(u16 *)op->addr.reg;
1033 op->val = *(u32 *)op->addr.reg;
1036 op->val = *(u64 *)op->addr.reg;
1041 static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
1043 ctxt->ops->get_fpu(ctxt);
1045 case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
1046 case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
1047 case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
1048 case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
1049 case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
1050 case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
1051 case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
1052 case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
1053 #ifdef CONFIG_X86_64
1054 case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
1055 case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
1056 case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
1057 case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
1058 case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
1059 case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
1060 case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
1061 case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
1065 ctxt->ops->put_fpu(ctxt);
1068 static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
1071 ctxt->ops->get_fpu(ctxt);
1073 case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
1074 case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
1075 case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
1076 case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
1077 case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
1078 case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
1079 case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
1080 case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
1081 #ifdef CONFIG_X86_64
1082 case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
1083 case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
1084 case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
1085 case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
1086 case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
1087 case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
1088 case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
1089 case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
1093 ctxt->ops->put_fpu(ctxt);
1096 static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
1098 ctxt->ops->get_fpu(ctxt);
1100 case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
1101 case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
1102 case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
1103 case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
1104 case 4: asm("movq %%mm4, %0" : "=m"(*data)); break;
1105 case 5: asm("movq %%mm5, %0" : "=m"(*data)); break;
1106 case 6: asm("movq %%mm6, %0" : "=m"(*data)); break;
1107 case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
1110 ctxt->ops->put_fpu(ctxt);
1113 static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
1115 ctxt->ops->get_fpu(ctxt);
1117 case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
1118 case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
1119 case 2: asm("movq %0, %%mm2" : : "m"(*data)); break;
1120 case 3: asm("movq %0, %%mm3" : : "m"(*data)); break;
1121 case 4: asm("movq %0, %%mm4" : : "m"(*data)); break;
1122 case 5: asm("movq %0, %%mm5" : : "m"(*data)); break;
1123 case 6: asm("movq %0, %%mm6" : : "m"(*data)); break;
1124 case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
1127 ctxt->ops->put_fpu(ctxt);
1130 static int em_fninit(struct x86_emulate_ctxt *ctxt)
1132 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1133 return emulate_nm(ctxt);
1135 ctxt->ops->get_fpu(ctxt);
1136 asm volatile("fninit");
1137 ctxt->ops->put_fpu(ctxt);
1138 return X86EMUL_CONTINUE;
1141 static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1145 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1146 return emulate_nm(ctxt);
1148 ctxt->ops->get_fpu(ctxt);
1149 asm volatile("fnstcw %0": "+m"(fcw));
1150 ctxt->ops->put_fpu(ctxt);
1152 ctxt->dst.val = fcw;
1154 return X86EMUL_CONTINUE;
1157 static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1161 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1162 return emulate_nm(ctxt);
1164 ctxt->ops->get_fpu(ctxt);
1165 asm volatile("fnstsw %0": "+m"(fsw));
1166 ctxt->ops->put_fpu(ctxt);
1168 ctxt->dst.val = fsw;
1170 return X86EMUL_CONTINUE;
1173 static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1176 unsigned reg = ctxt->modrm_reg;
1178 if (!(ctxt->d & ModRM))
1179 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
1181 if (ctxt->d & Sse) {
1185 read_sse_reg(ctxt, &op->vec_val, reg);
1188 if (ctxt->d & Mmx) {
1197 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1198 op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1200 fetch_register_operand(op);
1201 op->orig_val = op->val;
1204 static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1206 if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1207 ctxt->modrm_seg = VCPU_SREG_SS;
1210 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1214 int index_reg, base_reg, scale;
1215 int rc = X86EMUL_CONTINUE;
1218 ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
1219 index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
1220 base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1222 ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1223 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1224 ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1225 ctxt->modrm_seg = VCPU_SREG_DS;
1227 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1229 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1230 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1232 if (ctxt->d & Sse) {
1235 op->addr.xmm = ctxt->modrm_rm;
1236 read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm);
1239 if (ctxt->d & Mmx) {
1242 op->addr.mm = ctxt->modrm_rm & 7;
1245 fetch_register_operand(op);
1251 if (ctxt->ad_bytes == 2) {
1252 unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1253 unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1254 unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1255 unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1257 /* 16-bit ModR/M decode. */
1258 switch (ctxt->modrm_mod) {
1260 if (ctxt->modrm_rm == 6)
1261 modrm_ea += insn_fetch(u16, ctxt);
1264 modrm_ea += insn_fetch(s8, ctxt);
1267 modrm_ea += insn_fetch(u16, ctxt);
1270 switch (ctxt->modrm_rm) {
1272 modrm_ea += bx + si;
1275 modrm_ea += bx + di;
1278 modrm_ea += bp + si;
1281 modrm_ea += bp + di;
1290 if (ctxt->modrm_mod != 0)
1297 if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1298 (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1299 ctxt->modrm_seg = VCPU_SREG_SS;
1300 modrm_ea = (u16)modrm_ea;
1302 /* 32/64-bit ModR/M decode. */
1303 if ((ctxt->modrm_rm & 7) == 4) {
1304 sib = insn_fetch(u8, ctxt);
1305 index_reg |= (sib >> 3) & 7;
1306 base_reg |= sib & 7;
1309 if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1310 modrm_ea += insn_fetch(s32, ctxt);
1312 modrm_ea += reg_read(ctxt, base_reg);
1313 adjust_modrm_seg(ctxt, base_reg);
1314 /* Increment ESP on POP [ESP] */
1315 if ((ctxt->d & IncSP) &&
1316 base_reg == VCPU_REGS_RSP)
1317 modrm_ea += ctxt->op_bytes;
1320 modrm_ea += reg_read(ctxt, index_reg) << scale;
1321 } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1322 modrm_ea += insn_fetch(s32, ctxt);
1323 if (ctxt->mode == X86EMUL_MODE_PROT64)
1324 ctxt->rip_relative = 1;
1326 base_reg = ctxt->modrm_rm;
1327 modrm_ea += reg_read(ctxt, base_reg);
1328 adjust_modrm_seg(ctxt, base_reg);
1330 switch (ctxt->modrm_mod) {
1332 modrm_ea += insn_fetch(s8, ctxt);
1335 modrm_ea += insn_fetch(s32, ctxt);
1339 op->addr.mem.ea = modrm_ea;
1340 if (ctxt->ad_bytes != 8)
1341 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1347 static int decode_abs(struct x86_emulate_ctxt *ctxt,
1350 int rc = X86EMUL_CONTINUE;
1353 switch (ctxt->ad_bytes) {
1355 op->addr.mem.ea = insn_fetch(u16, ctxt);
1358 op->addr.mem.ea = insn_fetch(u32, ctxt);
1361 op->addr.mem.ea = insn_fetch(u64, ctxt);
1368 static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1372 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1373 mask = ~((long)ctxt->dst.bytes * 8 - 1);
1375 if (ctxt->src.bytes == 2)
1376 sv = (s16)ctxt->src.val & (s16)mask;
1377 else if (ctxt->src.bytes == 4)
1378 sv = (s32)ctxt->src.val & (s32)mask;
1380 sv = (s64)ctxt->src.val & (s64)mask;
1382 ctxt->dst.addr.mem.ea = address_mask(ctxt,
1383 ctxt->dst.addr.mem.ea + (sv >> 3));
1386 /* only subword offset */
1387 ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1390 static int read_emulated(struct x86_emulate_ctxt *ctxt,
1391 unsigned long addr, void *dest, unsigned size)
1394 struct read_cache *mc = &ctxt->mem_read;
1396 if (mc->pos < mc->end)
1399 WARN_ON((mc->end + size) >= sizeof(mc->data));
1401 rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1403 if (rc != X86EMUL_CONTINUE)
1409 memcpy(dest, mc->data + mc->pos, size);
1411 return X86EMUL_CONTINUE;
1414 static int segmented_read(struct x86_emulate_ctxt *ctxt,
1415 struct segmented_address addr,
1422 rc = linearize(ctxt, addr, size, false, &linear);
1423 if (rc != X86EMUL_CONTINUE)
1425 return read_emulated(ctxt, linear, data, size);
1428 static int segmented_write(struct x86_emulate_ctxt *ctxt,
1429 struct segmented_address addr,
1436 rc = linearize(ctxt, addr, size, true, &linear);
1437 if (rc != X86EMUL_CONTINUE)
1439 return ctxt->ops->write_emulated(ctxt, linear, data, size,
1443 static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1444 struct segmented_address addr,
1445 const void *orig_data, const void *data,
1451 rc = linearize(ctxt, addr, size, true, &linear);
1452 if (rc != X86EMUL_CONTINUE)
1454 return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1455 size, &ctxt->exception);
1458 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1459 unsigned int size, unsigned short port,
1462 struct read_cache *rc = &ctxt->io_read;
1464 if (rc->pos == rc->end) { /* refill pio read ahead */
1465 unsigned int in_page, n;
1466 unsigned int count = ctxt->rep_prefix ?
1467 address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1468 in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1469 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1470 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1471 n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1474 rc->pos = rc->end = 0;
1475 if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1480 if (ctxt->rep_prefix && (ctxt->d & String) &&
1481 !(ctxt->eflags & X86_EFLAGS_DF)) {
1482 ctxt->dst.data = rc->data + rc->pos;
1483 ctxt->dst.type = OP_MEM_STR;
1484 ctxt->dst.count = (rc->end - rc->pos) / size;
1487 memcpy(dest, rc->data + rc->pos, size);
1493 static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1494 u16 index, struct desc_struct *desc)
1499 ctxt->ops->get_idt(ctxt, &dt);
1501 if (dt.size < index * 8 + 7)
1502 return emulate_gp(ctxt, index << 3 | 0x2);
1504 addr = dt.address + index * 8;
1505 return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
1509 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1510 u16 selector, struct desc_ptr *dt)
1512 const struct x86_emulate_ops *ops = ctxt->ops;
1515 if (selector & 1 << 2) {
1516 struct desc_struct desc;
1519 memset (dt, 0, sizeof *dt);
1520 if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1524 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1525 dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1527 ops->get_gdt(ctxt, dt);
1530 static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1531 u16 selector, ulong *desc_addr_p)
1534 u16 index = selector >> 3;
1537 get_descriptor_table_ptr(ctxt, selector, &dt);
1539 if (dt.size < index * 8 + 7)
1540 return emulate_gp(ctxt, selector & 0xfffc);
1542 addr = dt.address + index * 8;
1544 #ifdef CONFIG_X86_64
1545 if (addr >> 32 != 0) {
1548 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1549 if (!(efer & EFER_LMA))
1554 *desc_addr_p = addr;
1555 return X86EMUL_CONTINUE;
1558 /* allowed just for 8 bytes segments */
1559 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1560 u16 selector, struct desc_struct *desc,
1565 rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1566 if (rc != X86EMUL_CONTINUE)
1569 return ctxt->ops->read_std(ctxt, *desc_addr_p, desc, sizeof(*desc),
1573 /* allowed just for 8 bytes segments */
1574 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1575 u16 selector, struct desc_struct *desc)
1580 rc = get_descriptor_ptr(ctxt, selector, &addr);
1581 if (rc != X86EMUL_CONTINUE)
1584 return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc,
1588 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1589 u16 selector, int seg, u8 cpl,
1590 enum x86_transfer_type transfer,
1591 struct desc_struct *desc)
1593 struct desc_struct seg_desc, old_desc;
1595 unsigned err_vec = GP_VECTOR;
1597 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1603 memset(&seg_desc, 0, sizeof seg_desc);
1605 if (ctxt->mode == X86EMUL_MODE_REAL) {
1606 /* set real mode segment descriptor (keep limit etc. for
1608 ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1609 set_desc_base(&seg_desc, selector << 4);
1611 } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1612 /* VM86 needs a clean new segment descriptor */
1613 set_desc_base(&seg_desc, selector << 4);
1614 set_desc_limit(&seg_desc, 0xffff);
1624 /* TR should be in GDT only */
1625 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1628 /* NULL selector is not valid for TR, CS and (except for long mode) SS */
1629 if (null_selector) {
1630 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
1633 if (seg == VCPU_SREG_SS) {
1634 if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
1638 * ctxt->ops->set_segment expects the CPL to be in
1639 * SS.DPL, so fake an expand-up 32-bit data segment.
1649 /* Skip all following checks */
1653 ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1654 if (ret != X86EMUL_CONTINUE)
1657 err_code = selector & 0xfffc;
1658 err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1661 /* can't load system descriptor into segment selector */
1662 if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1663 if (transfer == X86_TRANSFER_CALL_JMP)
1664 return X86EMUL_UNHANDLEABLE;
1669 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1678 * segment is not a writable data segment or segment
1679 * selector's RPL != CPL or segment selector's RPL != CPL
1681 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1685 if (!(seg_desc.type & 8))
1688 if (seg_desc.type & 4) {
1694 if (rpl > cpl || dpl != cpl)
1697 /* in long-mode d/b must be clear if l is set */
1698 if (seg_desc.d && seg_desc.l) {
1701 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1702 if (efer & EFER_LMA)
1706 /* CS(RPL) <- CPL */
1707 selector = (selector & 0xfffc) | cpl;
1710 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1712 old_desc = seg_desc;
1713 seg_desc.type |= 2; /* busy */
1714 ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1715 sizeof(seg_desc), &ctxt->exception);
1716 if (ret != X86EMUL_CONTINUE)
1719 case VCPU_SREG_LDTR:
1720 if (seg_desc.s || seg_desc.type != 2)
1723 default: /* DS, ES, FS, or GS */
1725 * segment is not a data or readable code segment or
1726 * ((segment is a data or nonconforming code segment)
1727 * and (both RPL and CPL > DPL))
1729 if ((seg_desc.type & 0xa) == 0x8 ||
1730 (((seg_desc.type & 0xc) != 0xc) &&
1731 (rpl > dpl && cpl > dpl)))
1737 /* mark segment as accessed */
1738 if (!(seg_desc.type & 1)) {
1740 ret = write_segment_descriptor(ctxt, selector,
1742 if (ret != X86EMUL_CONTINUE)
1745 } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1746 ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3,
1747 sizeof(base3), &ctxt->exception);
1748 if (ret != X86EMUL_CONTINUE)
1750 if (is_noncanonical_address(get_desc_base(&seg_desc) |
1751 ((u64)base3 << 32)))
1752 return emulate_gp(ctxt, 0);
1755 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1758 return X86EMUL_CONTINUE;
1760 return emulate_exception(ctxt, err_vec, err_code, true);
1763 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1764 u16 selector, int seg)
1766 u8 cpl = ctxt->ops->cpl(ctxt);
1769 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
1770 * they can load it at CPL<3 (Intel's manual says only LSS can,
1773 * However, the Intel manual says that putting IST=1/DPL=3 in
1774 * an interrupt gate will result in SS=3 (the AMD manual instead
1775 * says it doesn't), so allow SS=3 in __load_segment_descriptor
1776 * and only forbid it here.
1778 if (seg == VCPU_SREG_SS && selector == 3 &&
1779 ctxt->mode == X86EMUL_MODE_PROT64)
1780 return emulate_exception(ctxt, GP_VECTOR, 0, true);
1782 return __load_segment_descriptor(ctxt, selector, seg, cpl,
1783 X86_TRANSFER_NONE, NULL);
1786 static void write_register_operand(struct operand *op)
1788 return assign_register(op->addr.reg, op->val, op->bytes);
1791 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1795 write_register_operand(op);
1798 if (ctxt->lock_prefix)
1799 return segmented_cmpxchg(ctxt,
1805 return segmented_write(ctxt,
1811 return segmented_write(ctxt,
1814 op->bytes * op->count);
1817 write_sse_reg(ctxt, &op->vec_val, op->addr.xmm);
1820 write_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
1828 return X86EMUL_CONTINUE;
1831 static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
1833 struct segmented_address addr;
1835 rsp_increment(ctxt, -bytes);
1836 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1837 addr.seg = VCPU_SREG_SS;
1839 return segmented_write(ctxt, addr, data, bytes);
1842 static int em_push(struct x86_emulate_ctxt *ctxt)
1844 /* Disable writeback. */
1845 ctxt->dst.type = OP_NONE;
1846 return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1849 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1850 void *dest, int len)
1853 struct segmented_address addr;
1855 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1856 addr.seg = VCPU_SREG_SS;
1857 rc = segmented_read(ctxt, addr, dest, len);
1858 if (rc != X86EMUL_CONTINUE)
1861 rsp_increment(ctxt, len);
1865 static int em_pop(struct x86_emulate_ctxt *ctxt)
1867 return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1870 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1871 void *dest, int len)
1874 unsigned long val, change_mask;
1875 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1876 int cpl = ctxt->ops->cpl(ctxt);
1878 rc = emulate_pop(ctxt, &val, len);
1879 if (rc != X86EMUL_CONTINUE)
1882 change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1883 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
1884 X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
1885 X86_EFLAGS_AC | X86_EFLAGS_ID;
1887 switch(ctxt->mode) {
1888 case X86EMUL_MODE_PROT64:
1889 case X86EMUL_MODE_PROT32:
1890 case X86EMUL_MODE_PROT16:
1892 change_mask |= X86_EFLAGS_IOPL;
1894 change_mask |= X86_EFLAGS_IF;
1896 case X86EMUL_MODE_VM86:
1898 return emulate_gp(ctxt, 0);
1899 change_mask |= X86_EFLAGS_IF;
1901 default: /* real mode */
1902 change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1906 *(unsigned long *)dest =
1907 (ctxt->eflags & ~change_mask) | (val & change_mask);
1912 static int em_popf(struct x86_emulate_ctxt *ctxt)
1914 ctxt->dst.type = OP_REG;
1915 ctxt->dst.addr.reg = &ctxt->eflags;
1916 ctxt->dst.bytes = ctxt->op_bytes;
1917 return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1920 static int em_enter(struct x86_emulate_ctxt *ctxt)
1923 unsigned frame_size = ctxt->src.val;
1924 unsigned nesting_level = ctxt->src2.val & 31;
1928 return X86EMUL_UNHANDLEABLE;
1930 rbp = reg_read(ctxt, VCPU_REGS_RBP);
1931 rc = push(ctxt, &rbp, stack_size(ctxt));
1932 if (rc != X86EMUL_CONTINUE)
1934 assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1936 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1937 reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1939 return X86EMUL_CONTINUE;
1942 static int em_leave(struct x86_emulate_ctxt *ctxt)
1944 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1946 return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1949 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1951 int seg = ctxt->src2.val;
1953 ctxt->src.val = get_segment_selector(ctxt, seg);
1954 if (ctxt->op_bytes == 4) {
1955 rsp_increment(ctxt, -2);
1959 return em_push(ctxt);
1962 static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1964 int seg = ctxt->src2.val;
1965 unsigned long selector;
1968 rc = emulate_pop(ctxt, &selector, 2);
1969 if (rc != X86EMUL_CONTINUE)
1972 if (ctxt->modrm_reg == VCPU_SREG_SS)
1973 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1974 if (ctxt->op_bytes > 2)
1975 rsp_increment(ctxt, ctxt->op_bytes - 2);
1977 rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1981 static int em_pusha(struct x86_emulate_ctxt *ctxt)
1983 unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1984 int rc = X86EMUL_CONTINUE;
1985 int reg = VCPU_REGS_RAX;
1987 while (reg <= VCPU_REGS_RDI) {
1988 (reg == VCPU_REGS_RSP) ?
1989 (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
1992 if (rc != X86EMUL_CONTINUE)
2001 static int em_pushf(struct x86_emulate_ctxt *ctxt)
2003 ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
2004 return em_push(ctxt);
2007 static int em_popa(struct x86_emulate_ctxt *ctxt)
2009 int rc = X86EMUL_CONTINUE;
2010 int reg = VCPU_REGS_RDI;
2013 while (reg >= VCPU_REGS_RAX) {
2014 if (reg == VCPU_REGS_RSP) {
2015 rsp_increment(ctxt, ctxt->op_bytes);
2019 rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
2020 if (rc != X86EMUL_CONTINUE)
2022 assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
2028 static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2030 const struct x86_emulate_ops *ops = ctxt->ops;
2037 /* TODO: Add limit checks */
2038 ctxt->src.val = ctxt->eflags;
2040 if (rc != X86EMUL_CONTINUE)
2043 ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
2045 ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
2047 if (rc != X86EMUL_CONTINUE)
2050 ctxt->src.val = ctxt->_eip;
2052 if (rc != X86EMUL_CONTINUE)
2055 ops->get_idt(ctxt, &dt);
2057 eip_addr = dt.address + (irq << 2);
2058 cs_addr = dt.address + (irq << 2) + 2;
2060 rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception);
2061 if (rc != X86EMUL_CONTINUE)
2064 rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception);
2065 if (rc != X86EMUL_CONTINUE)
2068 rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2069 if (rc != X86EMUL_CONTINUE)
2077 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2081 invalidate_registers(ctxt);
2082 rc = __emulate_int_real(ctxt, irq);
2083 if (rc == X86EMUL_CONTINUE)
2084 writeback_registers(ctxt);
2088 static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2090 switch(ctxt->mode) {
2091 case X86EMUL_MODE_REAL:
2092 return __emulate_int_real(ctxt, irq);
2093 case X86EMUL_MODE_VM86:
2094 case X86EMUL_MODE_PROT16:
2095 case X86EMUL_MODE_PROT32:
2096 case X86EMUL_MODE_PROT64:
2098 /* Protected mode interrupts unimplemented yet */
2099 return X86EMUL_UNHANDLEABLE;
2103 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2105 int rc = X86EMUL_CONTINUE;
2106 unsigned long temp_eip = 0;
2107 unsigned long temp_eflags = 0;
2108 unsigned long cs = 0;
2109 unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
2110 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
2111 X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
2112 X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
2113 X86_EFLAGS_AC | X86_EFLAGS_ID |
2115 unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
2118 /* TODO: Add stack limit check */
2120 rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2122 if (rc != X86EMUL_CONTINUE)
2125 if (temp_eip & ~0xffff)
2126 return emulate_gp(ctxt, 0);
2128 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2130 if (rc != X86EMUL_CONTINUE)
2133 rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2135 if (rc != X86EMUL_CONTINUE)
2138 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2140 if (rc != X86EMUL_CONTINUE)
2143 ctxt->_eip = temp_eip;
2145 if (ctxt->op_bytes == 4)
2146 ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2147 else if (ctxt->op_bytes == 2) {
2148 ctxt->eflags &= ~0xffff;
2149 ctxt->eflags |= temp_eflags;
2152 ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2153 ctxt->eflags |= X86_EFLAGS_FIXED;
2154 ctxt->ops->set_nmi_mask(ctxt, false);
2159 static int em_iret(struct x86_emulate_ctxt *ctxt)
2161 switch(ctxt->mode) {
2162 case X86EMUL_MODE_REAL:
2163 return emulate_iret_real(ctxt);
2164 case X86EMUL_MODE_VM86:
2165 case X86EMUL_MODE_PROT16:
2166 case X86EMUL_MODE_PROT32:
2167 case X86EMUL_MODE_PROT64:
2169 /* iret from protected mode unimplemented yet */
2170 return X86EMUL_UNHANDLEABLE;
2174 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2178 struct desc_struct new_desc;
2179 u8 cpl = ctxt->ops->cpl(ctxt);
2181 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2183 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2184 X86_TRANSFER_CALL_JMP,
2186 if (rc != X86EMUL_CONTINUE)
2189 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
2190 /* Error handling is not implemented. */
2191 if (rc != X86EMUL_CONTINUE)
2192 return X86EMUL_UNHANDLEABLE;
2197 static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2199 return assign_eip_near(ctxt, ctxt->src.val);
2202 static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2207 old_eip = ctxt->_eip;
2208 rc = assign_eip_near(ctxt, ctxt->src.val);
2209 if (rc != X86EMUL_CONTINUE)
2211 ctxt->src.val = old_eip;
2216 static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2218 u64 old = ctxt->dst.orig_val64;
2220 if (ctxt->dst.bytes == 16)
2221 return X86EMUL_UNHANDLEABLE;
2223 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2224 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2225 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2226 *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2227 ctxt->eflags &= ~X86_EFLAGS_ZF;
2229 ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2230 (u32) reg_read(ctxt, VCPU_REGS_RBX);
2232 ctxt->eflags |= X86_EFLAGS_ZF;
2234 return X86EMUL_CONTINUE;
2237 static int em_ret(struct x86_emulate_ctxt *ctxt)
2242 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2243 if (rc != X86EMUL_CONTINUE)
2246 return assign_eip_near(ctxt, eip);
2249 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2252 unsigned long eip, cs;
2253 int cpl = ctxt->ops->cpl(ctxt);
2254 struct desc_struct new_desc;
2256 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2257 if (rc != X86EMUL_CONTINUE)
2259 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2260 if (rc != X86EMUL_CONTINUE)
2262 /* Outer-privilege level return is not implemented */
2263 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2264 return X86EMUL_UNHANDLEABLE;
2265 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2268 if (rc != X86EMUL_CONTINUE)
2270 rc = assign_eip_far(ctxt, eip, &new_desc);
2271 /* Error handling is not implemented. */
2272 if (rc != X86EMUL_CONTINUE)
2273 return X86EMUL_UNHANDLEABLE;
2278 static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2282 rc = em_ret_far(ctxt);
2283 if (rc != X86EMUL_CONTINUE)
2285 rsp_increment(ctxt, ctxt->src.val);
2286 return X86EMUL_CONTINUE;
2289 static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2291 /* Save real source value, then compare EAX against destination. */
2292 ctxt->dst.orig_val = ctxt->dst.val;
2293 ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2294 ctxt->src.orig_val = ctxt->src.val;
2295 ctxt->src.val = ctxt->dst.orig_val;
2296 fastop(ctxt, em_cmp);
2298 if (ctxt->eflags & X86_EFLAGS_ZF) {
2299 /* Success: write back to memory; no update of EAX */
2300 ctxt->src.type = OP_NONE;
2301 ctxt->dst.val = ctxt->src.orig_val;
2303 /* Failure: write the value we saw to EAX. */
2304 ctxt->src.type = OP_REG;
2305 ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2306 ctxt->src.val = ctxt->dst.orig_val;
2307 /* Create write-cycle to dest by writing the same value */
2308 ctxt->dst.val = ctxt->dst.orig_val;
2310 return X86EMUL_CONTINUE;
2313 static int em_lseg(struct x86_emulate_ctxt *ctxt)
2315 int seg = ctxt->src2.val;
2319 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2321 rc = load_segment_descriptor(ctxt, sel, seg);
2322 if (rc != X86EMUL_CONTINUE)
2325 ctxt->dst.val = ctxt->src.val;
2329 static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
2331 u32 eax, ebx, ecx, edx;
2335 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2336 return edx & bit(X86_FEATURE_LM);
2339 #define GET_SMSTATE(type, smbase, offset) \
2342 int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \
2344 if (r != X86EMUL_CONTINUE) \
2345 return X86EMUL_UNHANDLEABLE; \
2349 static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
2351 desc->g = (flags >> 23) & 1;
2352 desc->d = (flags >> 22) & 1;
2353 desc->l = (flags >> 21) & 1;
2354 desc->avl = (flags >> 20) & 1;
2355 desc->p = (flags >> 15) & 1;
2356 desc->dpl = (flags >> 13) & 3;
2357 desc->s = (flags >> 12) & 1;
2358 desc->type = (flags >> 8) & 15;
2361 static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
2363 struct desc_struct desc;
2367 selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4);
2370 offset = 0x7f84 + n * 12;
2372 offset = 0x7f2c + (n - 3) * 12;
2374 set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
2375 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
2376 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset));
2377 ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
2378 return X86EMUL_CONTINUE;
2381 static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
2383 struct desc_struct desc;
2388 offset = 0x7e00 + n * 16;
2390 selector = GET_SMSTATE(u16, smbase, offset);
2391 rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8);
2392 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
2393 set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
2394 base3 = GET_SMSTATE(u32, smbase, offset + 12);
2396 ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
2397 return X86EMUL_CONTINUE;
2400 static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2406 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
2407 * Then enable protected mode. However, PCID cannot be enabled
2408 * if EFER.LMA=0, so set it separately.
2410 bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2412 return X86EMUL_UNHANDLEABLE;
2414 bad = ctxt->ops->set_cr(ctxt, 0, cr0);
2416 return X86EMUL_UNHANDLEABLE;
2418 if (cr4 & X86_CR4_PCIDE) {
2419 bad = ctxt->ops->set_cr(ctxt, 4, cr4);
2421 return X86EMUL_UNHANDLEABLE;
2424 return X86EMUL_CONTINUE;
2427 static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
2429 struct desc_struct desc;
2435 cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
2436 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
2437 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
2438 ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
2440 for (i = 0; i < 8; i++)
2441 *reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4);
2443 val = GET_SMSTATE(u32, smbase, 0x7fcc);
2444 ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
2445 val = GET_SMSTATE(u32, smbase, 0x7fc8);
2446 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2448 selector = GET_SMSTATE(u32, smbase, 0x7fc4);
2449 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f64));
2450 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f60));
2451 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f5c));
2452 ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
2454 selector = GET_SMSTATE(u32, smbase, 0x7fc0);
2455 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f80));
2456 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f7c));
2457 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f78));
2458 ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
2460 dt.address = GET_SMSTATE(u32, smbase, 0x7f74);
2461 dt.size = GET_SMSTATE(u32, smbase, 0x7f70);
2462 ctxt->ops->set_gdt(ctxt, &dt);
2464 dt.address = GET_SMSTATE(u32, smbase, 0x7f58);
2465 dt.size = GET_SMSTATE(u32, smbase, 0x7f54);
2466 ctxt->ops->set_idt(ctxt, &dt);
2468 for (i = 0; i < 6; i++) {
2469 int r = rsm_load_seg_32(ctxt, smbase, i);
2470 if (r != X86EMUL_CONTINUE)
2474 cr4 = GET_SMSTATE(u32, smbase, 0x7f14);
2476 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
2478 return rsm_enter_protected_mode(ctxt, cr0, cr4);
2481 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2483 struct desc_struct desc;
2490 for (i = 0; i < 16; i++)
2491 *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
2493 ctxt->_eip = GET_SMSTATE(u64, smbase, 0x7f78);
2494 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;
2496 val = GET_SMSTATE(u32, smbase, 0x7f68);
2497 ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
2498 val = GET_SMSTATE(u32, smbase, 0x7f60);
2499 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2501 cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
2502 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50));
2503 cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
2504 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
2505 val = GET_SMSTATE(u64, smbase, 0x7ed0);
2506 ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
2508 selector = GET_SMSTATE(u32, smbase, 0x7e90);
2509 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e92) << 8);
2510 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e94));
2511 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e98));
2512 base3 = GET_SMSTATE(u32, smbase, 0x7e9c);
2513 ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
2515 dt.size = GET_SMSTATE(u32, smbase, 0x7e84);
2516 dt.address = GET_SMSTATE(u64, smbase, 0x7e88);
2517 ctxt->ops->set_idt(ctxt, &dt);
2519 selector = GET_SMSTATE(u32, smbase, 0x7e70);
2520 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e72) << 8);
2521 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e74));
2522 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e78));
2523 base3 = GET_SMSTATE(u32, smbase, 0x7e7c);
2524 ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
2526 dt.size = GET_SMSTATE(u32, smbase, 0x7e64);
2527 dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
2528 ctxt->ops->set_gdt(ctxt, &dt);
2530 r = rsm_enter_protected_mode(ctxt, cr0, cr4);
2531 if (r != X86EMUL_CONTINUE)
2534 for (i = 0; i < 6; i++) {
2535 r = rsm_load_seg_64(ctxt, smbase, i);
2536 if (r != X86EMUL_CONTINUE)
2540 return X86EMUL_CONTINUE;
2543 static int em_rsm(struct x86_emulate_ctxt *ctxt)
2545 unsigned long cr0, cr4, efer;
2549 if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0)
2550 return emulate_ud(ctxt);
2553 * Get back to real mode, to prepare a safe state in which to load
2554 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
2555 * supports long mode.
2557 cr4 = ctxt->ops->get_cr(ctxt, 4);
2558 if (emulator_has_longmode(ctxt)) {
2559 struct desc_struct cs_desc;
2561 /* Zero CR4.PCIDE before CR0.PG. */
2562 if (cr4 & X86_CR4_PCIDE) {
2563 ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2564 cr4 &= ~X86_CR4_PCIDE;
2567 /* A 32-bit code segment is required to clear EFER.LMA. */
2568 memset(&cs_desc, 0, sizeof(cs_desc));
2570 cs_desc.s = cs_desc.g = cs_desc.p = 1;
2571 ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
2574 /* For the 64-bit case, this will clear EFER.LMA. */
2575 cr0 = ctxt->ops->get_cr(ctxt, 0);
2576 if (cr0 & X86_CR0_PE)
2577 ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
2579 /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */
2580 if (cr4 & X86_CR4_PAE)
2581 ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
2583 /* And finally go back to 32-bit mode. */
2585 ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
2587 smbase = ctxt->ops->get_smbase(ctxt);
2588 if (emulator_has_longmode(ctxt))
2589 ret = rsm_load_state_64(ctxt, smbase + 0x8000);
2591 ret = rsm_load_state_32(ctxt, smbase + 0x8000);
2593 if (ret != X86EMUL_CONTINUE) {
2594 /* FIXME: should triple fault */
2595 return X86EMUL_UNHANDLEABLE;
2598 if ((ctxt->emul_flags & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
2599 ctxt->ops->set_nmi_mask(ctxt, false);
2601 ctxt->emul_flags &= ~X86EMUL_SMM_INSIDE_NMI_MASK;
2602 ctxt->emul_flags &= ~X86EMUL_SMM_MASK;
2603 return X86EMUL_CONTINUE;
2607 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
2608 struct desc_struct *cs, struct desc_struct *ss)
2610 cs->l = 0; /* will be adjusted later */
2611 set_desc_base(cs, 0); /* flat segment */
2612 cs->g = 1; /* 4kb granularity */
2613 set_desc_limit(cs, 0xfffff); /* 4GB limit */
2614 cs->type = 0x0b; /* Read, Execute, Accessed */
2616 cs->dpl = 0; /* will be adjusted later */
2621 set_desc_base(ss, 0); /* flat segment */
2622 set_desc_limit(ss, 0xfffff); /* 4GB limit */
2623 ss->g = 1; /* 4kb granularity */
2625 ss->type = 0x03; /* Read/Write, Accessed */
2626 ss->d = 1; /* 32bit stack segment */
2633 static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
2635 u32 eax, ebx, ecx, edx;
2638 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2639 return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
2640 && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
2641 && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
2644 static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
2646 const struct x86_emulate_ops *ops = ctxt->ops;
2647 u32 eax, ebx, ecx, edx;
2650 * syscall should always be enabled in longmode - so only become
2651 * vendor specific (cpuid) if other modes are active...
2653 if (ctxt->mode == X86EMUL_MODE_PROT64)
2658 ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2660 * Intel ("GenuineIntel")
2661 * remark: Intel CPUs only support "syscall" in 64bit
2662 * longmode. Also an 64bit guest with a
2663 * 32bit compat-app running will #UD !! While this
2664 * behaviour can be fixed (by emulating) into AMD
2665 * response - CPUs of AMD can't behave like Intel.
2667 if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx &&
2668 ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx &&
2669 edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx)
2672 /* AMD ("AuthenticAMD") */
2673 if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx &&
2674 ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx &&
2675 edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
2678 /* AMD ("AMDisbetter!") */
2679 if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx &&
2680 ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx &&
2681 edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx)
2684 /* default: (not Intel, not AMD), apply Intel's stricter rules... */
2688 static int em_syscall(struct x86_emulate_ctxt *ctxt)
2690 const struct x86_emulate_ops *ops = ctxt->ops;
2691 struct desc_struct cs, ss;
2696 /* syscall is not available in real mode */
2697 if (ctxt->mode == X86EMUL_MODE_REAL ||
2698 ctxt->mode == X86EMUL_MODE_VM86)
2699 return emulate_ud(ctxt);
2701 if (!(em_syscall_is_enabled(ctxt)))
2702 return emulate_ud(ctxt);
2704 ops->get_msr(ctxt, MSR_EFER, &efer);
2705 setup_syscalls_segments(ctxt, &cs, &ss);
2707 if (!(efer & EFER_SCE))
2708 return emulate_ud(ctxt);
2710 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2712 cs_sel = (u16)(msr_data & 0xfffc);
2713 ss_sel = (u16)(msr_data + 8);
2715 if (efer & EFER_LMA) {
2719 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2720 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2722 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2723 if (efer & EFER_LMA) {
2724 #ifdef CONFIG_X86_64
2725 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2728 ctxt->mode == X86EMUL_MODE_PROT64 ?
2729 MSR_LSTAR : MSR_CSTAR, &msr_data);
2730 ctxt->_eip = msr_data;
2732 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2733 ctxt->eflags &= ~msr_data;
2734 ctxt->eflags |= X86_EFLAGS_FIXED;
2738 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2739 ctxt->_eip = (u32)msr_data;
2741 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2744 return X86EMUL_CONTINUE;
2747 static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2749 const struct x86_emulate_ops *ops = ctxt->ops;
2750 struct desc_struct cs, ss;
2755 ops->get_msr(ctxt, MSR_EFER, &efer);
2756 /* inject #GP if in real mode */
2757 if (ctxt->mode == X86EMUL_MODE_REAL)
2758 return emulate_gp(ctxt, 0);
2761 * Not recognized on AMD in compat mode (but is recognized in legacy
2764 if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
2765 && !vendor_intel(ctxt))
2766 return emulate_ud(ctxt);
2768 /* sysenter/sysexit have not been tested in 64bit mode. */
2769 if (ctxt->mode == X86EMUL_MODE_PROT64)
2770 return X86EMUL_UNHANDLEABLE;
2772 setup_syscalls_segments(ctxt, &cs, &ss);
2774 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2775 if ((msr_data & 0xfffc) == 0x0)
2776 return emulate_gp(ctxt, 0);
2778 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2779 cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2780 ss_sel = cs_sel + 8;
2781 if (efer & EFER_LMA) {
2786 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2787 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2789 ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2790 ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2792 ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2793 *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
2796 return X86EMUL_CONTINUE;
2799 static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2801 const struct x86_emulate_ops *ops = ctxt->ops;
2802 struct desc_struct cs, ss;
2803 u64 msr_data, rcx, rdx;
2805 u16 cs_sel = 0, ss_sel = 0;
2807 /* inject #GP if in real mode or Virtual 8086 mode */
2808 if (ctxt->mode == X86EMUL_MODE_REAL ||
2809 ctxt->mode == X86EMUL_MODE_VM86)
2810 return emulate_gp(ctxt, 0);
2812 setup_syscalls_segments(ctxt, &cs, &ss);
2814 if ((ctxt->rex_prefix & 0x8) != 0x0)
2815 usermode = X86EMUL_MODE_PROT64;
2817 usermode = X86EMUL_MODE_PROT32;
2819 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2820 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2824 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2826 case X86EMUL_MODE_PROT32:
2827 cs_sel = (u16)(msr_data + 16);
2828 if ((msr_data & 0xfffc) == 0x0)
2829 return emulate_gp(ctxt, 0);
2830 ss_sel = (u16)(msr_data + 24);
2834 case X86EMUL_MODE_PROT64:
2835 cs_sel = (u16)(msr_data + 32);
2836 if (msr_data == 0x0)
2837 return emulate_gp(ctxt, 0);
2838 ss_sel = cs_sel + 8;
2841 if (is_noncanonical_address(rcx) ||
2842 is_noncanonical_address(rdx))
2843 return emulate_gp(ctxt, 0);
2846 cs_sel |= SEGMENT_RPL_MASK;
2847 ss_sel |= SEGMENT_RPL_MASK;
2849 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2850 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2853 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2855 return X86EMUL_CONTINUE;
2858 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2861 if (ctxt->mode == X86EMUL_MODE_REAL)
2863 if (ctxt->mode == X86EMUL_MODE_VM86)
2865 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2866 return ctxt->ops->cpl(ctxt) > iopl;
2869 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2872 const struct x86_emulate_ops *ops = ctxt->ops;
2873 struct desc_struct tr_seg;
2876 u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2877 unsigned mask = (1 << len) - 1;
2880 ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2883 if (desc_limit_scaled(&tr_seg) < 103)
2885 base = get_desc_base(&tr_seg);
2886 #ifdef CONFIG_X86_64
2887 base |= ((u64)base3) << 32;
2889 r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL);
2890 if (r != X86EMUL_CONTINUE)
2892 if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2894 r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL);
2895 if (r != X86EMUL_CONTINUE)
2897 if ((perm >> bit_idx) & mask)
2902 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
2908 if (emulator_bad_iopl(ctxt))
2909 if (!emulator_io_port_access_allowed(ctxt, port, len))
2912 ctxt->perm_ok = true;
2917 static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
2920 * Intel CPUs mask the counter and pointers in quite strange
2921 * manner when ECX is zero due to REP-string optimizations.
2923 #ifdef CONFIG_X86_64
2924 if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
2927 *reg_write(ctxt, VCPU_REGS_RCX) = 0;
2930 case 0xa4: /* movsb */
2931 case 0xa5: /* movsd/w */
2932 *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
2934 case 0xaa: /* stosb */
2935 case 0xab: /* stosd/w */
2936 *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
2941 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2942 struct tss_segment_16 *tss)
2944 tss->ip = ctxt->_eip;
2945 tss->flag = ctxt->eflags;
2946 tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
2947 tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
2948 tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
2949 tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
2950 tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
2951 tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
2952 tss->si = reg_read(ctxt, VCPU_REGS_RSI);
2953 tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2955 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2956 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2957 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2958 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2959 tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2962 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2963 struct tss_segment_16 *tss)
2968 ctxt->_eip = tss->ip;
2969 ctxt->eflags = tss->flag | 2;
2970 *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
2971 *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
2972 *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
2973 *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
2974 *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
2975 *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
2976 *reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
2977 *reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2980 * SDM says that segment selectors are loaded before segment
2983 set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
2984 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2985 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2986 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2987 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2992 * Now load segment descriptors. If fault happens at this stage
2993 * it is handled in a context of new task
2995 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2996 X86_TRANSFER_TASK_SWITCH, NULL);
2997 if (ret != X86EMUL_CONTINUE)
2999 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3000 X86_TRANSFER_TASK_SWITCH, NULL);
3001 if (ret != X86EMUL_CONTINUE)
3003 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3004 X86_TRANSFER_TASK_SWITCH, NULL);
3005 if (ret != X86EMUL_CONTINUE)
3007 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3008 X86_TRANSFER_TASK_SWITCH, NULL);
3009 if (ret != X86EMUL_CONTINUE)
3011 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3012 X86_TRANSFER_TASK_SWITCH, NULL);
3013 if (ret != X86EMUL_CONTINUE)
3016 return X86EMUL_CONTINUE;
3019 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
3020 u16 tss_selector, u16 old_tss_sel,
3021 ulong old_tss_base, struct desc_struct *new_desc)
3023 const struct x86_emulate_ops *ops = ctxt->ops;
3024 struct tss_segment_16 tss_seg;
3026 u32 new_tss_base = get_desc_base(new_desc);
3028 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
3030 if (ret != X86EMUL_CONTINUE)
3033 save_state_to_tss16(ctxt, &tss_seg);
3035 ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
3037 if (ret != X86EMUL_CONTINUE)
3040 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
3042 if (ret != X86EMUL_CONTINUE)
3045 if (old_tss_sel != 0xffff) {
3046 tss_seg.prev_task_link = old_tss_sel;
3048 ret = ops->write_std(ctxt, new_tss_base,
3049 &tss_seg.prev_task_link,
3050 sizeof tss_seg.prev_task_link,
3052 if (ret != X86EMUL_CONTINUE)
3056 return load_state_from_tss16(ctxt, &tss_seg);
3059 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
3060 struct tss_segment_32 *tss)
3062 /* CR3 and ldt selector are not saved intentionally */
3063 tss->eip = ctxt->_eip;
3064 tss->eflags = ctxt->eflags;
3065 tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
3066 tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
3067 tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
3068 tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
3069 tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
3070 tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
3071 tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
3072 tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
3074 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
3075 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
3076 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
3077 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
3078 tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
3079 tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
3082 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
3083 struct tss_segment_32 *tss)
3088 if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
3089 return emulate_gp(ctxt, 0);
3090 ctxt->_eip = tss->eip;
3091 ctxt->eflags = tss->eflags | 2;
3093 /* General purpose registers */
3094 *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
3095 *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
3096 *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
3097 *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
3098 *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
3099 *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
3100 *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
3101 *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
3104 * SDM says that segment selectors are loaded before segment
3105 * descriptors. This is important because CPL checks will
3108 set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
3109 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
3110 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
3111 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
3112 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
3113 set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
3114 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
3117 * If we're switching between Protected Mode and VM86, we need to make
3118 * sure to update the mode before loading the segment descriptors so
3119 * that the selectors are interpreted correctly.
3121 if (ctxt->eflags & X86_EFLAGS_VM) {
3122 ctxt->mode = X86EMUL_MODE_VM86;
3125 ctxt->mode = X86EMUL_MODE_PROT32;
3130 * Now load segment descriptors. If fault happenes at this stage
3131 * it is handled in a context of new task
3133 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
3134 cpl, X86_TRANSFER_TASK_SWITCH, NULL);
3135 if (ret != X86EMUL_CONTINUE)
3137 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3138 X86_TRANSFER_TASK_SWITCH, NULL);
3139 if (ret != X86EMUL_CONTINUE)
3141 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3142 X86_TRANSFER_TASK_SWITCH, NULL);
3143 if (ret != X86EMUL_CONTINUE)
3145 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3146 X86_TRANSFER_TASK_SWITCH, NULL);
3147 if (ret != X86EMUL_CONTINUE)
3149 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3150 X86_TRANSFER_TASK_SWITCH, NULL);
3151 if (ret != X86EMUL_CONTINUE)
3153 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
3154 X86_TRANSFER_TASK_SWITCH, NULL);
3155 if (ret != X86EMUL_CONTINUE)
3157 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
3158 X86_TRANSFER_TASK_SWITCH, NULL);
3163 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
3164 u16 tss_selector, u16 old_tss_sel,
3165 ulong old_tss_base, struct desc_struct *new_desc)
3167 const struct x86_emulate_ops *ops = ctxt->ops;
3168 struct tss_segment_32 tss_seg;
3170 u32 new_tss_base = get_desc_base(new_desc);
3171 u32 eip_offset = offsetof(struct tss_segment_32, eip);
3172 u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
3174 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
3176 if (ret != X86EMUL_CONTINUE)
3179 save_state_to_tss32(ctxt, &tss_seg);
3181 /* Only GP registers and segment selectors are saved */
3182 ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
3183 ldt_sel_offset - eip_offset, &ctxt->exception);
3184 if (ret != X86EMUL_CONTINUE)
3187 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
3189 if (ret != X86EMUL_CONTINUE)
3192 if (old_tss_sel != 0xffff) {
3193 tss_seg.prev_task_link = old_tss_sel;
3195 ret = ops->write_std(ctxt, new_tss_base,
3196 &tss_seg.prev_task_link,
3197 sizeof tss_seg.prev_task_link,
3199 if (ret != X86EMUL_CONTINUE)
3203 return load_state_from_tss32(ctxt, &tss_seg);
3206 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
3207 u16 tss_selector, int idt_index, int reason,
3208 bool has_error_code, u32 error_code)
3210 const struct x86_emulate_ops *ops = ctxt->ops;
3211 struct desc_struct curr_tss_desc, next_tss_desc;
3213 u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
3214 ulong old_tss_base =
3215 ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
3217 ulong desc_addr, dr7;
3219 /* FIXME: old_tss_base == ~0 ? */
3221 ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
3222 if (ret != X86EMUL_CONTINUE)
3224 ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
3225 if (ret != X86EMUL_CONTINUE)
3228 /* FIXME: check that next_tss_desc is tss */
3231 * Check privileges. The three cases are task switch caused by...
3233 * 1. jmp/call/int to task gate: Check against DPL of the task gate
3234 * 2. Exception/IRQ/iret: No check is performed
3235 * 3. jmp/call to TSS/task-gate: No check is performed since the
3236 * hardware checks it before exiting.
3238 if (reason == TASK_SWITCH_GATE) {
3239 if (idt_index != -1) {
3240 /* Software interrupts */
3241 struct desc_struct task_gate_desc;
3244 ret = read_interrupt_descriptor(ctxt, idt_index,
3246 if (ret != X86EMUL_CONTINUE)
3249 dpl = task_gate_desc.dpl;
3250 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
3251 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
3255 desc_limit = desc_limit_scaled(&next_tss_desc);
3256 if (!next_tss_desc.p ||
3257 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
3258 desc_limit < 0x2b)) {
3259 return emulate_ts(ctxt, tss_selector & 0xfffc);
3262 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
3263 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
3264 write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
3267 if (reason == TASK_SWITCH_IRET)
3268 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
3270 /* set back link to prev task only if NT bit is set in eflags
3271 note that old_tss_sel is not used after this point */
3272 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
3273 old_tss_sel = 0xffff;
3275 if (next_tss_desc.type & 8)
3276 ret = task_switch_32(ctxt, tss_selector, old_tss_sel,
3277 old_tss_base, &next_tss_desc);
3279 ret = task_switch_16(ctxt, tss_selector, old_tss_sel,
3280 old_tss_base, &next_tss_desc);
3281 if (ret != X86EMUL_CONTINUE)
3284 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
3285 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
3287 if (reason != TASK_SWITCH_IRET) {
3288 next_tss_desc.type |= (1 << 1); /* set busy flag */
3289 write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
3292 ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
3293 ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
3295 if (has_error_code) {
3296 ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
3297 ctxt->lock_prefix = 0;
3298 ctxt->src.val = (unsigned long) error_code;
3299 ret = em_push(ctxt);
3302 ops->get_dr(ctxt, 7, &dr7);
3303 ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
3308 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
3309 u16 tss_selector, int idt_index, int reason,
3310 bool has_error_code, u32 error_code)
3314 invalidate_registers(ctxt);
3315 ctxt->_eip = ctxt->eip;
3316 ctxt->dst.type = OP_NONE;
3318 rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3319 has_error_code, error_code);
3321 if (rc == X86EMUL_CONTINUE) {
3322 ctxt->eip = ctxt->_eip;
3323 writeback_registers(ctxt);
3326 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3329 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
3332 int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3334 register_address_increment(ctxt, reg, df * op->bytes);
3335 op->addr.mem.ea = register_address(ctxt, reg);
3338 static int em_das(struct x86_emulate_ctxt *ctxt)
3341 bool af, cf, old_cf;
3343 cf = ctxt->eflags & X86_EFLAGS_CF;
3349 af = ctxt->eflags & X86_EFLAGS_AF;
3350 if ((al & 0x0f) > 9 || af) {
3352 cf = old_cf | (al >= 250);
3357 if (old_al > 0x99 || old_cf) {
3363 /* Set PF, ZF, SF */
3364 ctxt->src.type = OP_IMM;
3366 ctxt->src.bytes = 1;
3367 fastop(ctxt, em_or);
3368 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3370 ctxt->eflags |= X86_EFLAGS_CF;
3372 ctxt->eflags |= X86_EFLAGS_AF;
3373 return X86EMUL_CONTINUE;
3376 static int em_aam(struct x86_emulate_ctxt *ctxt)
3380 if (ctxt->src.val == 0)
3381 return emulate_de(ctxt);
3383 al = ctxt->dst.val & 0xff;
3384 ah = al / ctxt->src.val;
3385 al %= ctxt->src.val;
3387 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3389 /* Set PF, ZF, SF */
3390 ctxt->src.type = OP_IMM;
3392 ctxt->src.bytes = 1;
3393 fastop(ctxt, em_or);
3395 return X86EMUL_CONTINUE;
3398 static int em_aad(struct x86_emulate_ctxt *ctxt)
3400 u8 al = ctxt->dst.val & 0xff;
3401 u8 ah = (ctxt->dst.val >> 8) & 0xff;
3403 al = (al + (ah * ctxt->src.val)) & 0xff;
3405 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3407 /* Set PF, ZF, SF */
3408 ctxt->src.type = OP_IMM;
3410 ctxt->src.bytes = 1;
3411 fastop(ctxt, em_or);
3413 return X86EMUL_CONTINUE;
3416 static int em_call(struct x86_emulate_ctxt *ctxt)
3419 long rel = ctxt->src.val;
3421 ctxt->src.val = (unsigned long)ctxt->_eip;
3422 rc = jmp_rel(ctxt, rel);
3423 if (rc != X86EMUL_CONTINUE)
3425 return em_push(ctxt);
3428 static int em_call_far(struct x86_emulate_ctxt *ctxt)
3433 struct desc_struct old_desc, new_desc;
3434 const struct x86_emulate_ops *ops = ctxt->ops;
3435 int cpl = ctxt->ops->cpl(ctxt);
3436 enum x86emul_mode prev_mode = ctxt->mode;
3438 old_eip = ctxt->_eip;
3439 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3441 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3442 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3443 X86_TRANSFER_CALL_JMP, &new_desc);
3444 if (rc != X86EMUL_CONTINUE)
3447 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
3448 if (rc != X86EMUL_CONTINUE)
3451 ctxt->src.val = old_cs;
3453 if (rc != X86EMUL_CONTINUE)
3456 ctxt->src.val = old_eip;
3458 /* If we failed, we tainted the memory, but the very least we should
3460 if (rc != X86EMUL_CONTINUE) {
3461 pr_warn_once("faulting far call emulation tainted memory\n");
3466 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3467 ctxt->mode = prev_mode;
3472 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3477 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3478 if (rc != X86EMUL_CONTINUE)
3480 rc = assign_eip_near(ctxt, eip);
3481 if (rc != X86EMUL_CONTINUE)
3483 rsp_increment(ctxt, ctxt->src.val);
3484 return X86EMUL_CONTINUE;
3487 static int em_xchg(struct x86_emulate_ctxt *ctxt)
3489 /* Write back the register source. */
3490 ctxt->src.val = ctxt->dst.val;
3491 write_register_operand(&ctxt->src);
3493 /* Write back the memory destination with implicit LOCK prefix. */
3494 ctxt->dst.val = ctxt->src.orig_val;
3495 ctxt->lock_prefix = 1;
3496 return X86EMUL_CONTINUE;
3499 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3501 ctxt->dst.val = ctxt->src2.val;
3502 return fastop(ctxt, em_imul);
3505 static int em_cwd(struct x86_emulate_ctxt *ctxt)
3507 ctxt->dst.type = OP_REG;
3508 ctxt->dst.bytes = ctxt->src.bytes;
3509 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3510 ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3512 return X86EMUL_CONTINUE;
3515 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3519 ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3520 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3521 *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3522 return X86EMUL_CONTINUE;
3525 static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3529 if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3530 return emulate_gp(ctxt, 0);
3531 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3532 *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3533 return X86EMUL_CONTINUE;
3536 static int em_mov(struct x86_emulate_ctxt *ctxt)
3538 memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3539 return X86EMUL_CONTINUE;
3542 #define FFL(x) bit(X86_FEATURE_##x)
3544 static int em_movbe(struct x86_emulate_ctxt *ctxt)
3546 u32 ebx, ecx, edx, eax = 1;
3550 * Check MOVBE is set in the guest-visible CPUID leaf.
3552 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
3553 if (!(ecx & FFL(MOVBE)))
3554 return emulate_ud(ctxt);
3556 switch (ctxt->op_bytes) {
3559 * From MOVBE definition: "...When the operand size is 16 bits,
3560 * the upper word of the destination register remains unchanged
3563 * Both casting ->valptr and ->val to u16 breaks strict aliasing
3564 * rules so we have to do the operation almost per hand.
3566 tmp = (u16)ctxt->src.val;
3567 ctxt->dst.val &= ~0xffffUL;
3568 ctxt->dst.val |= (unsigned long)swab16(tmp);
3571 ctxt->dst.val = swab32((u32)ctxt->src.val);
3574 ctxt->dst.val = swab64(ctxt->src.val);
3579 return X86EMUL_CONTINUE;
3582 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3584 if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
3585 return emulate_gp(ctxt, 0);
3587 /* Disable writeback. */
3588 ctxt->dst.type = OP_NONE;
3589 return X86EMUL_CONTINUE;
3592 static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3596 if (ctxt->mode == X86EMUL_MODE_PROT64)
3597 val = ctxt->src.val & ~0ULL;
3599 val = ctxt->src.val & ~0U;
3601 /* #UD condition is already handled. */
3602 if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3603 return emulate_gp(ctxt, 0);
3605 /* Disable writeback. */
3606 ctxt->dst.type = OP_NONE;
3607 return X86EMUL_CONTINUE;
3610 static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3614 msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3615 | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3616 if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
3617 return emulate_gp(ctxt, 0);
3619 return X86EMUL_CONTINUE;
3622 static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3626 if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data))
3627 return emulate_gp(ctxt, 0);
3629 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3630 *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3631 return X86EMUL_CONTINUE;
3634 static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3636 if (ctxt->modrm_reg > VCPU_SREG_GS)
3637 return emulate_ud(ctxt);
3639 ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
3640 if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3641 ctxt->dst.bytes = 2;
3642 return X86EMUL_CONTINUE;
3645 static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3647 u16 sel = ctxt->src.val;
3649 if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3650 return emulate_ud(ctxt);
3652 if (ctxt->modrm_reg == VCPU_SREG_SS)
3653 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3655 /* Disable writeback. */
3656 ctxt->dst.type = OP_NONE;
3657 return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3660 static int em_lldt(struct x86_emulate_ctxt *ctxt)
3662 u16 sel = ctxt->src.val;
3664 /* Disable writeback. */
3665 ctxt->dst.type = OP_NONE;
3666 return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3669 static int em_ltr(struct x86_emulate_ctxt *ctxt)
3671 u16 sel = ctxt->src.val;
3673 /* Disable writeback. */
3674 ctxt->dst.type = OP_NONE;
3675 return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3678 static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3683 rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
3684 if (rc == X86EMUL_CONTINUE)
3685 ctxt->ops->invlpg(ctxt, linear);
3686 /* Disable writeback. */
3687 ctxt->dst.type = OP_NONE;
3688 return X86EMUL_CONTINUE;
3691 static int em_clts(struct x86_emulate_ctxt *ctxt)
3695 cr0 = ctxt->ops->get_cr(ctxt, 0);
3697 ctxt->ops->set_cr(ctxt, 0, cr0);
3698 return X86EMUL_CONTINUE;
3701 static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3703 int rc = ctxt->ops->fix_hypercall(ctxt);
3705 if (rc != X86EMUL_CONTINUE)
3708 /* Let the processor re-execute the fixed hypercall */
3709 ctxt->_eip = ctxt->eip;
3710 /* Disable writeback. */
3711 ctxt->dst.type = OP_NONE;
3712 return X86EMUL_CONTINUE;
3715 static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3716 void (*get)(struct x86_emulate_ctxt *ctxt,
3717 struct desc_ptr *ptr))
3719 struct desc_ptr desc_ptr;
3721 if (ctxt->mode == X86EMUL_MODE_PROT64)
3723 get(ctxt, &desc_ptr);
3724 if (ctxt->op_bytes == 2) {
3726 desc_ptr.address &= 0x00ffffff;
3728 /* Disable writeback. */
3729 ctxt->dst.type = OP_NONE;
3730 return segmented_write_std(ctxt, ctxt->dst.addr.mem,
3731 &desc_ptr, 2 + ctxt->op_bytes);
3734 static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3736 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3739 static int em_sidt(struct x86_emulate_ctxt *ctxt)
3741 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3744 static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3746 struct desc_ptr desc_ptr;
3749 if (ctxt->mode == X86EMUL_MODE_PROT64)
3751 rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3752 &desc_ptr.size, &desc_ptr.address,
3754 if (rc != X86EMUL_CONTINUE)
3756 if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3757 is_noncanonical_address(desc_ptr.address))
3758 return emulate_gp(ctxt, 0);
3760 ctxt->ops->set_gdt(ctxt, &desc_ptr);
3762 ctxt->ops->set_idt(ctxt, &desc_ptr);
3763 /* Disable writeback. */
3764 ctxt->dst.type = OP_NONE;
3765 return X86EMUL_CONTINUE;
3768 static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3770 return em_lgdt_lidt(ctxt, true);
3773 static int em_lidt(struct x86_emulate_ctxt *ctxt)
3775 return em_lgdt_lidt(ctxt, false);
3778 static int em_smsw(struct x86_emulate_ctxt *ctxt)
3780 if (ctxt->dst.type == OP_MEM)
3781 ctxt->dst.bytes = 2;
3782 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3783 return X86EMUL_CONTINUE;
3786 static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3788 ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3789 | (ctxt->src.val & 0x0f));
3790 ctxt->dst.type = OP_NONE;
3791 return X86EMUL_CONTINUE;
3794 static int em_loop(struct x86_emulate_ctxt *ctxt)
3796 int rc = X86EMUL_CONTINUE;
3798 register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3799 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3800 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3801 rc = jmp_rel(ctxt, ctxt->src.val);
3806 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3808 int rc = X86EMUL_CONTINUE;
3810 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3811 rc = jmp_rel(ctxt, ctxt->src.val);
3816 static int em_in(struct x86_emulate_ctxt *ctxt)
3818 if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3820 return X86EMUL_IO_NEEDED;
3822 return X86EMUL_CONTINUE;
3825 static int em_out(struct x86_emulate_ctxt *ctxt)
3827 ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3829 /* Disable writeback. */
3830 ctxt->dst.type = OP_NONE;
3831 return X86EMUL_CONTINUE;
3834 static int em_cli(struct x86_emulate_ctxt *ctxt)
3836 if (emulator_bad_iopl(ctxt))
3837 return emulate_gp(ctxt, 0);
3839 ctxt->eflags &= ~X86_EFLAGS_IF;
3840 return X86EMUL_CONTINUE;
3843 static int em_sti(struct x86_emulate_ctxt *ctxt)
3845 if (emulator_bad_iopl(ctxt))
3846 return emulate_gp(ctxt, 0);
3848 ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3849 ctxt->eflags |= X86_EFLAGS_IF;
3850 return X86EMUL_CONTINUE;
3853 static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3855 u32 eax, ebx, ecx, edx;
3857 eax = reg_read(ctxt, VCPU_REGS_RAX);
3858 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3859 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
3860 *reg_write(ctxt, VCPU_REGS_RAX) = eax;
3861 *reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3862 *reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3863 *reg_write(ctxt, VCPU_REGS_RDX) = edx;
3864 return X86EMUL_CONTINUE;
3867 static int em_sahf(struct x86_emulate_ctxt *ctxt)
3871 flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
3873 flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3875 ctxt->eflags &= ~0xffUL;
3876 ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3877 return X86EMUL_CONTINUE;
3880 static int em_lahf(struct x86_emulate_ctxt *ctxt)
3882 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
3883 *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
3884 return X86EMUL_CONTINUE;
3887 static int em_bswap(struct x86_emulate_ctxt *ctxt)
3889 switch (ctxt->op_bytes) {
3890 #ifdef CONFIG_X86_64
3892 asm("bswap %0" : "+r"(ctxt->dst.val));
3896 asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
3899 return X86EMUL_CONTINUE;
3902 static int em_clflush(struct x86_emulate_ctxt *ctxt)
3904 /* emulating clflush regardless of cpuid */
3905 return X86EMUL_CONTINUE;
3908 static int em_movsxd(struct x86_emulate_ctxt *ctxt)
3910 ctxt->dst.val = (s32) ctxt->src.val;
3911 return X86EMUL_CONTINUE;
3914 static int check_fxsr(struct x86_emulate_ctxt *ctxt)
3916 u32 eax = 1, ebx, ecx = 0, edx;
3918 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
3919 if (!(edx & FFL(FXSR)))
3920 return emulate_ud(ctxt);
3922 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
3923 return emulate_nm(ctxt);
3926 * Don't emulate a case that should never be hit, instead of working
3927 * around a lack of fxsave64/fxrstor64 on old compilers.
3929 if (ctxt->mode >= X86EMUL_MODE_PROT64)
3930 return X86EMUL_UNHANDLEABLE;
3932 return X86EMUL_CONTINUE;
3936 * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
3939 * - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs
3940 * preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
3942 * 3) 64-bit mode with REX.W prefix
3943 * - like (2), but XMM 8-15 are being saved and restored
3944 * 4) 64-bit mode without REX.W prefix
3945 * - like (3), but FIP and FDP are 64 bit
3947 * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
3948 * desired result. (4) is not emulated.
3950 * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
3951 * and FPU DS) should match.
3953 static int em_fxsave(struct x86_emulate_ctxt *ctxt)
3955 struct fxregs_state fx_state;
3959 rc = check_fxsr(ctxt);
3960 if (rc != X86EMUL_CONTINUE)
3963 ctxt->ops->get_fpu(ctxt);
3965 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
3967 ctxt->ops->put_fpu(ctxt);
3969 if (rc != X86EMUL_CONTINUE)
3972 if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)
3973 size = offsetof(struct fxregs_state, xmm_space[8 * 16/4]);
3975 size = offsetof(struct fxregs_state, xmm_space[0]);
3977 return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
3980 static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt,
3981 struct fxregs_state *new)
3983 int rc = X86EMUL_CONTINUE;
3984 struct fxregs_state old;
3986 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(old));
3987 if (rc != X86EMUL_CONTINUE)
3991 * 64 bit host will restore XMM 8-15, which is not correct on non-64
3992 * bit guests. Load the current values in order to preserve 64 bit
3993 * XMMs after fxrstor.
3995 #ifdef CONFIG_X86_64
3996 /* XXX: accessing XMM 8-15 very awkwardly */
3997 memcpy(&new->xmm_space[8 * 16/4], &old.xmm_space[8 * 16/4], 8 * 16);
4001 * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but
4002 * does save and restore MXCSR.
4004 if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))
4005 memcpy(new->xmm_space, old.xmm_space, 8 * 16);
4010 static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
4012 struct fxregs_state fx_state;
4015 rc = check_fxsr(ctxt);
4016 if (rc != X86EMUL_CONTINUE)
4019 rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, 512);
4020 if (rc != X86EMUL_CONTINUE)
4023 if (fx_state.mxcsr >> 16)
4024 return emulate_gp(ctxt, 0);
4026 ctxt->ops->get_fpu(ctxt);
4028 if (ctxt->mode < X86EMUL_MODE_PROT64)
4029 rc = fxrstor_fixup(ctxt, &fx_state);
4031 if (rc == X86EMUL_CONTINUE)
4032 rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
4034 ctxt->ops->put_fpu(ctxt);
4039 static bool valid_cr(int nr)
4051 static int check_cr_read(struct x86_emulate_ctxt *ctxt)
4053 if (!valid_cr(ctxt->modrm_reg))
4054 return emulate_ud(ctxt);
4056 return X86EMUL_CONTINUE;
4059 static int check_cr_write(struct x86_emulate_ctxt *ctxt)
4061 u64 new_val = ctxt->src.val64;
4062 int cr = ctxt->modrm_reg;
4065 static u64 cr_reserved_bits[] = {
4066 0xffffffff00000000ULL,
4067 0, 0, 0, /* CR3 checked later */
4074 return emulate_ud(ctxt);
4076 if (new_val & cr_reserved_bits[cr])
4077 return emulate_gp(ctxt, 0);
4082 if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) ||
4083 ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD)))
4084 return emulate_gp(ctxt, 0);
4086 cr4 = ctxt->ops->get_cr(ctxt, 4);
4087 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4089 if ((new_val & X86_CR0_PG) && (efer & EFER_LME) &&
4090 !(cr4 & X86_CR4_PAE))
4091 return emulate_gp(ctxt, 0);
4098 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4099 if (efer & EFER_LMA)
4100 rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
4103 return emulate_gp(ctxt, 0);
4108 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4110 if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE))
4111 return emulate_gp(ctxt, 0);
4117 return X86EMUL_CONTINUE;
4120 static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
4124 ctxt->ops->get_dr(ctxt, 7, &dr7);
4126 /* Check if DR7.Global_Enable is set */
4127 return dr7 & (1 << 13);
4130 static int check_dr_read(struct x86_emulate_ctxt *ctxt)
4132 int dr = ctxt->modrm_reg;
4136 return emulate_ud(ctxt);
4138 cr4 = ctxt->ops->get_cr(ctxt, 4);
4139 if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
4140 return emulate_ud(ctxt);
4142 if (check_dr7_gd(ctxt)) {
4145 ctxt->ops->get_dr(ctxt, 6, &dr6);
4147 dr6 |= DR6_BD | DR6_RTM;
4148 ctxt->ops->set_dr(ctxt, 6, dr6);
4149 return emulate_db(ctxt);
4152 return X86EMUL_CONTINUE;
4155 static int check_dr_write(struct x86_emulate_ctxt *ctxt)
4157 u64 new_val = ctxt->src.val64;
4158 int dr = ctxt->modrm_reg;
4160 if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
4161 return emulate_gp(ctxt, 0);
4163 return check_dr_read(ctxt);
4166 static int check_svme(struct x86_emulate_ctxt *ctxt)
4170 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4172 if (!(efer & EFER_SVME))
4173 return emulate_ud(ctxt);
4175 return X86EMUL_CONTINUE;
4178 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
4180 u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
4182 /* Valid physical address? */
4183 if (rax & 0xffff000000000000ULL)
4184 return emulate_gp(ctxt, 0);
4186 return check_svme(ctxt);
4189 static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
4191 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4193 if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
4194 return emulate_ud(ctxt);
4196 return X86EMUL_CONTINUE;
4199 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
4201 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4202 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
4204 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
4205 ctxt->ops->check_pmc(ctxt, rcx))
4206 return emulate_gp(ctxt, 0);
4208 return X86EMUL_CONTINUE;
4211 static int check_perm_in(struct x86_emulate_ctxt *ctxt)
4213 ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
4214 if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes))
4215 return emulate_gp(ctxt, 0);
4217 return X86EMUL_CONTINUE;
4220 static int check_perm_out(struct x86_emulate_ctxt *ctxt)
4222 ctxt->src.bytes = min(ctxt->src.bytes, 4u);
4223 if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes))
4224 return emulate_gp(ctxt, 0);
4226 return X86EMUL_CONTINUE;
4229 #define D(_y) { .flags = (_y) }
4230 #define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
4231 #define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
4232 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4233 #define N D(NotImpl)
4234 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
4235 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
4236 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
4237 #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
4238 #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
4239 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
4240 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
4241 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
4242 #define II(_f, _e, _i) \
4243 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4244 #define IIP(_f, _e, _i, _p) \
4245 { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
4246 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4247 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4249 #define D2bv(_f) D((_f) | ByteOp), D(_f)
4250 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4251 #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
4252 #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
4253 #define I2bvIP(_f, _e, _i, _p) \
4254 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4256 #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
4257 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
4258 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4260 static const struct opcode group7_rm0[] = {
4262 I(SrcNone | Priv | EmulateOnUD, em_hypercall),
4266 static const struct opcode group7_rm1[] = {
4267 DI(SrcNone | Priv, monitor),
4268 DI(SrcNone | Priv, mwait),
4272 static const struct opcode group7_rm3[] = {
4273 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
4274 II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
4275 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
4276 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
4277 DIP(SrcNone | Prot | Priv, stgi, check_svme),
4278 DIP(SrcNone | Prot | Priv, clgi, check_svme),
4279 DIP(SrcNone | Prot | Priv, skinit, check_svme),
4280 DIP(SrcNone | Prot | Priv, invlpga, check_svme),
4283 static const struct opcode group7_rm7[] = {
4285 DIP(SrcNone, rdtscp, check_rdtsc),
4289 static const struct opcode group1[] = {
4291 F(Lock | PageTable, em_or),
4294 F(Lock | PageTable, em_and),
4300 static const struct opcode group1A[] = {
4301 I(DstMem | SrcNone | Mov | Stack | IncSP, em_pop), N, N, N, N, N, N, N,
4304 static const struct opcode group2[] = {
4305 F(DstMem | ModRM, em_rol),
4306 F(DstMem | ModRM, em_ror),
4307 F(DstMem | ModRM, em_rcl),
4308 F(DstMem | ModRM, em_rcr),
4309 F(DstMem | ModRM, em_shl),
4310 F(DstMem | ModRM, em_shr),
4311 F(DstMem | ModRM, em_shl),
4312 F(DstMem | ModRM, em_sar),
4315 static const struct opcode group3[] = {
4316 F(DstMem | SrcImm | NoWrite, em_test),
4317 F(DstMem | SrcImm | NoWrite, em_test),
4318 F(DstMem | SrcNone | Lock, em_not),
4319 F(DstMem | SrcNone | Lock, em_neg),
4320 F(DstXacc | Src2Mem, em_mul_ex),
4321 F(DstXacc | Src2Mem, em_imul_ex),
4322 F(DstXacc | Src2Mem, em_div_ex),
4323 F(DstXacc | Src2Mem, em_idiv_ex),
4326 static const struct opcode group4[] = {
4327 F(ByteOp | DstMem | SrcNone | Lock, em_inc),
4328 F(ByteOp | DstMem | SrcNone | Lock, em_dec),
4332 static const struct opcode group5[] = {
4333 F(DstMem | SrcNone | Lock, em_inc),
4334 F(DstMem | SrcNone | Lock, em_dec),
4335 I(SrcMem | NearBranch, em_call_near_abs),
4336 I(SrcMemFAddr | ImplicitOps, em_call_far),
4337 I(SrcMem | NearBranch, em_jmp_abs),
4338 I(SrcMemFAddr | ImplicitOps, em_jmp_far),
4339 I(SrcMem | Stack, em_push), D(Undefined),
4342 static const struct opcode group6[] = {
4343 DI(Prot | DstMem, sldt),
4344 DI(Prot | DstMem, str),
4345 II(Prot | Priv | SrcMem16, em_lldt, lldt),
4346 II(Prot | Priv | SrcMem16, em_ltr, ltr),
4350 static const struct group_dual group7 = { {
4351 II(Mov | DstMem, em_sgdt, sgdt),
4352 II(Mov | DstMem, em_sidt, sidt),
4353 II(SrcMem | Priv, em_lgdt, lgdt),
4354 II(SrcMem | Priv, em_lidt, lidt),
4355 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4356 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4357 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
4361 N, EXT(0, group7_rm3),
4362 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4363 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4367 static const struct opcode group8[] = {
4369 F(DstMem | SrcImmByte | NoWrite, em_bt),
4370 F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
4371 F(DstMem | SrcImmByte | Lock, em_btr),
4372 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
4375 static const struct group_dual group9 = { {
4376 N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4378 N, N, N, N, N, N, N, N,
4381 static const struct opcode group11[] = {
4382 I(DstMem | SrcImm | Mov | PageTable, em_mov),
4386 static const struct gprefix pfx_0f_ae_7 = {
4387 I(SrcMem | ByteOp, em_clflush), N, N, N,
4390 static const struct group_dual group15 = { {
4391 I(ModRM | Aligned16, em_fxsave),
4392 I(ModRM | Aligned16, em_fxrstor),
4393 N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4395 N, N, N, N, N, N, N, N,
4398 static const struct gprefix pfx_0f_6f_0f_7f = {
4399 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
4402 static const struct instr_dual instr_dual_0f_2b = {
4406 static const struct gprefix pfx_0f_2b = {
4407 ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4410 static const struct gprefix pfx_0f_28_0f_29 = {
4411 I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4414 static const struct gprefix pfx_0f_e7 = {
4415 N, I(Sse, em_mov), N, N,
4418 static const struct escape escape_d9 = { {
4419 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4422 N, N, N, N, N, N, N, N,
4424 N, N, N, N, N, N, N, N,
4426 N, N, N, N, N, N, N, N,
4428 N, N, N, N, N, N, N, N,
4430 N, N, N, N, N, N, N, N,
4432 N, N, N, N, N, N, N, N,
4434 N, N, N, N, N, N, N, N,
4436 N, N, N, N, N, N, N, N,
4439 static const struct escape escape_db = { {
4440 N, N, N, N, N, N, N, N,
4443 N, N, N, N, N, N, N, N,
4445 N, N, N, N, N, N, N, N,
4447 N, N, N, N, N, N, N, N,
4449 N, N, N, N, N, N, N, N,
4451 N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
4453 N, N, N, N, N, N, N, N,
4455 N, N, N, N, N, N, N, N,
4457 N, N, N, N, N, N, N, N,
4460 static const struct escape escape_dd = { {
4461 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4464 N, N, N, N, N, N, N, N,
4466 N, N, N, N, N, N, N, N,
4468 N, N, N, N, N, N, N, N,
4470 N, N, N, N, N, N, N, N,
4472 N, N, N, N, N, N, N, N,
4474 N, N, N, N, N, N, N, N,
4476 N, N, N, N, N, N, N, N,
4478 N, N, N, N, N, N, N, N,
4481 static const struct instr_dual instr_dual_0f_c3 = {
4482 I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
4485 static const struct mode_dual mode_dual_63 = {
4486 N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
4489 static const struct opcode opcode_table[256] = {
4491 F6ALU(Lock, em_add),
4492 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
4493 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4495 F6ALU(Lock | PageTable, em_or),
4496 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
4499 F6ALU(Lock, em_adc),
4500 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
4501 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4503 F6ALU(Lock, em_sbb),
4504 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
4505 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4507 F6ALU(Lock | PageTable, em_and), N, N,
4509 F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4511 F6ALU(Lock, em_xor), N, N,
4513 F6ALU(NoWrite, em_cmp), N, N,
4515 X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
4517 X8(I(SrcReg | Stack, em_push)),
4519 X8(I(DstReg | Stack, em_pop)),
4521 I(ImplicitOps | Stack | No64, em_pusha),
4522 I(ImplicitOps | Stack | No64, em_popa),
4523 N, MD(ModRM, &mode_dual_63),
4526 I(SrcImm | Mov | Stack, em_push),
4527 I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4528 I(SrcImmByte | Mov | Stack, em_push),
4529 I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4530 I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4531 I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4533 X16(D(SrcImmByte | NearBranch)),
4535 G(ByteOp | DstMem | SrcImm, group1),
4536 G(DstMem | SrcImm, group1),
4537 G(ByteOp | DstMem | SrcImm | No64, group1),
4538 G(DstMem | SrcImmByte, group1),
4539 F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4540 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4542 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4543 I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4544 I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4545 D(ModRM | SrcMem | NoAccess | DstReg),
4546 I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
4549 DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4551 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4552 I(SrcImmFAddr | No64, em_call_far), N,
4553 II(ImplicitOps | Stack, em_pushf, pushf),
4554 II(ImplicitOps | Stack, em_popf, popf),
4555 I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4557 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4558 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4559 I2bv(SrcSI | DstDI | Mov | String, em_mov),
4560 F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r),
4562 F2bv(DstAcc | SrcImm | NoWrite, em_test),
4563 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
4564 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4565 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4567 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4569 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4571 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4572 I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
4573 I(ImplicitOps | NearBranch, em_ret),
4574 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4575 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4576 G(ByteOp, group11), G(0, group11),
4578 I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
4579 I(ImplicitOps | SrcImmU16, em_ret_far_imm),
4580 I(ImplicitOps, em_ret_far),
4581 D(ImplicitOps), DI(SrcImmByte, intn),
4582 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
4584 G(Src2One | ByteOp, group2), G(Src2One, group2),
4585 G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4586 I(DstAcc | SrcImmUByte | No64, em_aam),
4587 I(DstAcc | SrcImmUByte | No64, em_aad),
4588 F(DstAcc | ByteOp | No64, em_salc),
4589 I(DstAcc | SrcXLat | ByteOp, em_mov),
4591 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4593 X3(I(SrcImmByte | NearBranch, em_loop)),
4594 I(SrcImmByte | NearBranch, em_jcxz),
4595 I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
4596 I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4598 I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
4599 I(SrcImmFAddr | No64, em_jmp_far),
4600 D(SrcImmByte | ImplicitOps | NearBranch),
4601 I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
4602 I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4604 N, DI(ImplicitOps, icebp), N, N,
4605 DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4606 G(ByteOp, group3), G(0, group3),
4608 D(ImplicitOps), D(ImplicitOps),
4609 I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4610 D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4613 static const struct opcode twobyte_table[256] = {
4615 G(0, group6), GD(0, &group7), N, N,
4616 N, I(ImplicitOps | EmulateOnUD, em_syscall),
4617 II(ImplicitOps | Priv, em_clts, clts), N,
4618 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4619 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4621 N, N, N, N, N, N, N, N,
4622 D(ImplicitOps | ModRM | SrcMem | NoAccess),
4623 N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
4625 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
4626 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
4627 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4629 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
4632 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
4633 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4634 N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4637 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4638 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4639 II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4640 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4641 I(ImplicitOps | EmulateOnUD, em_sysenter),
4642 I(ImplicitOps | Priv | EmulateOnUD, em_sysexit),
4644 N, N, N, N, N, N, N, N,
4646 X16(D(DstReg | SrcMem | ModRM)),
4648 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4653 N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4658 N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4660 X16(D(SrcImm | NearBranch)),
4662 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4664 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4665 II(ImplicitOps, em_cpuid, cpuid),
4666 F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4667 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4668 F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4670 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4671 II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4672 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4673 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4674 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4675 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4677 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4678 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4679 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4680 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4681 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4682 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4686 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4687 I(DstReg | SrcMem | ModRM, em_bsf_c),
4688 I(DstReg | SrcMem | ModRM, em_bsr_c),
4689 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4691 F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4692 N, ID(0, &instr_dual_0f_c3),
4693 N, N, N, GD(0, &group9),
4695 X8(I(DstReg, em_bswap)),
4697 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4699 N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
4700 N, N, N, N, N, N, N, N,
4702 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4705 static const struct instr_dual instr_dual_0f_38_f0 = {
4706 I(DstReg | SrcMem | Mov, em_movbe), N
4709 static const struct instr_dual instr_dual_0f_38_f1 = {
4710 I(DstMem | SrcReg | Mov, em_movbe), N
4713 static const struct gprefix three_byte_0f_38_f0 = {
4714 ID(0, &instr_dual_0f_38_f0), N, N, N
4717 static const struct gprefix three_byte_0f_38_f1 = {
4718 ID(0, &instr_dual_0f_38_f1), N, N, N
4722 * Insns below are selected by the prefix which indexed by the third opcode
4725 static const struct opcode opcode_map_0f_38[256] = {
4727 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4729 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4731 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4732 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4753 static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4757 size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4763 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4764 unsigned size, bool sign_extension)
4766 int rc = X86EMUL_CONTINUE;
4770 op->addr.mem.ea = ctxt->_eip;
4771 /* NB. Immediates are sign-extended as necessary. */
4772 switch (op->bytes) {
4774 op->val = insn_fetch(s8, ctxt);
4777 op->val = insn_fetch(s16, ctxt);
4780 op->val = insn_fetch(s32, ctxt);
4783 op->val = insn_fetch(s64, ctxt);
4786 if (!sign_extension) {
4787 switch (op->bytes) {
4795 op->val &= 0xffffffff;
4803 static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4806 int rc = X86EMUL_CONTINUE;
4810 decode_register_operand(ctxt, op);
4813 rc = decode_imm(ctxt, op, 1, false);
4816 ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4820 if (ctxt->d & BitOp)
4821 fetch_bit_operand(ctxt);
4822 op->orig_val = op->val;
4825 ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4829 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4830 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4831 fetch_register_operand(op);
4832 op->orig_val = op->val;
4836 op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
4837 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4838 fetch_register_operand(op);
4839 op->orig_val = op->val;
4842 if (ctxt->d & ByteOp) {
4847 op->bytes = ctxt->op_bytes;
4848 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4849 fetch_register_operand(op);
4850 op->orig_val = op->val;
4854 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4856 register_address(ctxt, VCPU_REGS_RDI);
4857 op->addr.mem.seg = VCPU_SREG_ES;
4864 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4865 fetch_register_operand(op);
4870 op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4873 rc = decode_imm(ctxt, op, 1, true);
4881 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4884 rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4887 ctxt->memop.bytes = 1;
4888 if (ctxt->memop.type == OP_REG) {
4889 ctxt->memop.addr.reg = decode_register(ctxt,
4890 ctxt->modrm_rm, true);
4891 fetch_register_operand(&ctxt->memop);
4895 ctxt->memop.bytes = 2;
4898 ctxt->memop.bytes = 4;
4901 rc = decode_imm(ctxt, op, 2, false);
4904 rc = decode_imm(ctxt, op, imm_size(ctxt), false);
4908 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4910 register_address(ctxt, VCPU_REGS_RSI);
4911 op->addr.mem.seg = ctxt->seg_override;
4917 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4920 reg_read(ctxt, VCPU_REGS_RBX) +
4921 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4922 op->addr.mem.seg = ctxt->seg_override;
4927 op->addr.mem.ea = ctxt->_eip;
4928 op->bytes = ctxt->op_bytes + 2;
4929 insn_fetch_arr(op->valptr, op->bytes, ctxt);
4932 ctxt->memop.bytes = ctxt->op_bytes + 2;
4936 op->val = VCPU_SREG_ES;
4940 op->val = VCPU_SREG_CS;
4944 op->val = VCPU_SREG_SS;
4948 op->val = VCPU_SREG_DS;
4952 op->val = VCPU_SREG_FS;
4956 op->val = VCPU_SREG_GS;
4959 /* Special instructions do their own operand decoding. */
4961 op->type = OP_NONE; /* Disable writeback. */
4969 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
4971 int rc = X86EMUL_CONTINUE;
4972 int mode = ctxt->mode;
4973 int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
4974 bool op_prefix = false;
4975 bool has_seg_override = false;
4976 struct opcode opcode;
4978 ctxt->memop.type = OP_NONE;
4979 ctxt->memopp = NULL;
4980 ctxt->_eip = ctxt->eip;
4981 ctxt->fetch.ptr = ctxt->fetch.data;
4982 ctxt->fetch.end = ctxt->fetch.data + insn_len;
4983 ctxt->opcode_len = 1;
4985 memcpy(ctxt->fetch.data, insn, insn_len);
4987 rc = __do_insn_fetch_bytes(ctxt, 1);
4988 if (rc != X86EMUL_CONTINUE)
4993 case X86EMUL_MODE_REAL:
4994 case X86EMUL_MODE_VM86:
4995 case X86EMUL_MODE_PROT16:
4996 def_op_bytes = def_ad_bytes = 2;
4998 case X86EMUL_MODE_PROT32:
4999 def_op_bytes = def_ad_bytes = 4;
5001 #ifdef CONFIG_X86_64
5002 case X86EMUL_MODE_PROT64:
5008 return EMULATION_FAILED;
5011 ctxt->op_bytes = def_op_bytes;
5012 ctxt->ad_bytes = def_ad_bytes;
5014 /* Legacy prefixes. */
5016 switch (ctxt->b = insn_fetch(u8, ctxt)) {
5017 case 0x66: /* operand-size override */
5019 /* switch between 2/4 bytes */
5020 ctxt->op_bytes = def_op_bytes ^ 6;
5022 case 0x67: /* address-size override */
5023 if (mode == X86EMUL_MODE_PROT64)
5024 /* switch between 4/8 bytes */
5025 ctxt->ad_bytes = def_ad_bytes ^ 12;
5027 /* switch between 2/4 bytes */
5028 ctxt->ad_bytes = def_ad_bytes ^ 6;
5030 case 0x26: /* ES override */
5031 case 0x2e: /* CS override */
5032 case 0x36: /* SS override */
5033 case 0x3e: /* DS override */
5034 has_seg_override = true;
5035 ctxt->seg_override = (ctxt->b >> 3) & 3;
5037 case 0x64: /* FS override */
5038 case 0x65: /* GS override */
5039 has_seg_override = true;
5040 ctxt->seg_override = ctxt->b & 7;
5042 case 0x40 ... 0x4f: /* REX */
5043 if (mode != X86EMUL_MODE_PROT64)
5045 ctxt->rex_prefix = ctxt->b;
5047 case 0xf0: /* LOCK */
5048 ctxt->lock_prefix = 1;
5050 case 0xf2: /* REPNE/REPNZ */
5051 case 0xf3: /* REP/REPE/REPZ */
5052 ctxt->rep_prefix = ctxt->b;
5058 /* Any legacy prefix after a REX prefix nullifies its effect. */
5060 ctxt->rex_prefix = 0;
5066 if (ctxt->rex_prefix & 8)
5067 ctxt->op_bytes = 8; /* REX.W */
5069 /* Opcode byte(s). */
5070 opcode = opcode_table[ctxt->b];
5071 /* Two-byte opcode? */
5072 if (ctxt->b == 0x0f) {
5073 ctxt->opcode_len = 2;
5074 ctxt->b = insn_fetch(u8, ctxt);
5075 opcode = twobyte_table[ctxt->b];
5077 /* 0F_38 opcode map */
5078 if (ctxt->b == 0x38) {
5079 ctxt->opcode_len = 3;
5080 ctxt->b = insn_fetch(u8, ctxt);
5081 opcode = opcode_map_0f_38[ctxt->b];
5084 ctxt->d = opcode.flags;
5086 if (ctxt->d & ModRM)
5087 ctxt->modrm = insn_fetch(u8, ctxt);
5089 /* vex-prefix instructions are not implemented */
5090 if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
5091 (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
5095 while (ctxt->d & GroupMask) {
5096 switch (ctxt->d & GroupMask) {
5098 goffset = (ctxt->modrm >> 3) & 7;
5099 opcode = opcode.u.group[goffset];
5102 goffset = (ctxt->modrm >> 3) & 7;
5103 if ((ctxt->modrm >> 6) == 3)
5104 opcode = opcode.u.gdual->mod3[goffset];
5106 opcode = opcode.u.gdual->mod012[goffset];
5109 goffset = ctxt->modrm & 7;
5110 opcode = opcode.u.group[goffset];
5113 if (ctxt->rep_prefix && op_prefix)
5114 return EMULATION_FAILED;
5115 simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
5116 switch (simd_prefix) {
5117 case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
5118 case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
5119 case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
5120 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
5124 if (ctxt->modrm > 0xbf)
5125 opcode = opcode.u.esc->high[ctxt->modrm - 0xc0];
5127 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
5130 if ((ctxt->modrm >> 6) == 3)
5131 opcode = opcode.u.idual->mod3;
5133 opcode = opcode.u.idual->mod012;
5136 if (ctxt->mode == X86EMUL_MODE_PROT64)
5137 opcode = opcode.u.mdual->mode64;
5139 opcode = opcode.u.mdual->mode32;
5142 return EMULATION_FAILED;
5145 ctxt->d &= ~(u64)GroupMask;
5146 ctxt->d |= opcode.flags;
5151 return EMULATION_FAILED;
5153 ctxt->execute = opcode.u.execute;
5155 if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD)))
5156 return EMULATION_FAILED;
5158 if (unlikely(ctxt->d &
5159 (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
5162 * These are copied unconditionally here, and checked unconditionally
5163 * in x86_emulate_insn.
5165 ctxt->check_perm = opcode.check_perm;
5166 ctxt->intercept = opcode.intercept;
5168 if (ctxt->d & NotImpl)
5169 return EMULATION_FAILED;
5171 if (mode == X86EMUL_MODE_PROT64) {
5172 if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
5174 else if (ctxt->d & NearBranch)
5178 if (ctxt->d & Op3264) {
5179 if (mode == X86EMUL_MODE_PROT64)
5185 if ((ctxt->d & No16) && ctxt->op_bytes == 2)
5189 ctxt->op_bytes = 16;
5190 else if (ctxt->d & Mmx)
5194 /* ModRM and SIB bytes. */
5195 if (ctxt->d & ModRM) {
5196 rc = decode_modrm(ctxt, &ctxt->memop);
5197 if (!has_seg_override) {
5198 has_seg_override = true;
5199 ctxt->seg_override = ctxt->modrm_seg;
5201 } else if (ctxt->d & MemAbs)
5202 rc = decode_abs(ctxt, &ctxt->memop);
5203 if (rc != X86EMUL_CONTINUE)
5206 if (!has_seg_override)
5207 ctxt->seg_override = VCPU_SREG_DS;
5209 ctxt->memop.addr.mem.seg = ctxt->seg_override;
5212 * Decode and fetch the source operand: register, memory
5215 rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5216 if (rc != X86EMUL_CONTINUE)
5220 * Decode and fetch the second source operand: register, memory
5223 rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5224 if (rc != X86EMUL_CONTINUE)
5227 /* Decode and fetch the destination operand: register or memory. */
5228 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5230 if (ctxt->rip_relative && likely(ctxt->memopp))
5231 ctxt->memopp->addr.mem.ea = address_mask(ctxt,
5232 ctxt->memopp->addr.mem.ea + ctxt->_eip);
5235 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5238 bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
5240 return ctxt->d & PageTable;
5243 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
5245 /* The second termination condition only applies for REPE
5246 * and REPNE. Test if the repeat string operation prefix is
5247 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5248 * corresponding termination condition according to:
5249 * - if REPE/REPZ and ZF = 0 then done
5250 * - if REPNE/REPNZ and ZF = 1 then done
5252 if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
5253 (ctxt->b == 0xae) || (ctxt->b == 0xaf))
5254 && (((ctxt->rep_prefix == REPE_PREFIX) &&
5255 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5256 || ((ctxt->rep_prefix == REPNE_PREFIX) &&
5257 ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5263 static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5267 ctxt->ops->get_fpu(ctxt);
5268 rc = asm_safe("fwait");
5269 ctxt->ops->put_fpu(ctxt);
5271 if (unlikely(rc != X86EMUL_CONTINUE))
5272 return emulate_exception(ctxt, MF_VECTOR, 0, false);
5274 return X86EMUL_CONTINUE;
5277 static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
5280 if (op->type == OP_MM)
5281 read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
5284 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
5286 register void *__sp asm(_ASM_SP);
5287 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
5289 if (!(ctxt->d & ByteOp))
5290 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
5292 asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
5293 : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
5294 [fastop]"+S"(fop), "+r"(__sp)
5295 : "c"(ctxt->src2.val));
5297 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
5298 if (!fop) /* exception is returned in fop variable */
5299 return emulate_de(ctxt);
5300 return X86EMUL_CONTINUE;
5303 void init_decode_cache(struct x86_emulate_ctxt *ctxt)
5305 memset(&ctxt->rip_relative, 0,
5306 (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
5308 ctxt->io_read.pos = 0;
5309 ctxt->io_read.end = 0;
5310 ctxt->mem_read.end = 0;
5313 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
5315 const struct x86_emulate_ops *ops = ctxt->ops;
5316 int rc = X86EMUL_CONTINUE;
5317 int saved_dst_type = ctxt->dst.type;
5319 ctxt->mem_read.pos = 0;
5321 /* LOCK prefix is allowed only with some instructions */
5322 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5323 rc = emulate_ud(ctxt);
5327 if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5328 rc = emulate_ud(ctxt);
5332 if (unlikely(ctxt->d &
5333 (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5334 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
5335 (ctxt->d & Undefined)) {
5336 rc = emulate_ud(ctxt);
5340 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
5341 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
5342 rc = emulate_ud(ctxt);
5346 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5347 rc = emulate_nm(ctxt);
5351 if (ctxt->d & Mmx) {
5352 rc = flush_pending_x87_faults(ctxt);
5353 if (rc != X86EMUL_CONTINUE)
5356 * Now that we know the fpu is exception safe, we can fetch
5359 fetch_possible_mmx_operand(ctxt, &ctxt->src);
5360 fetch_possible_mmx_operand(ctxt, &ctxt->src2);
5361 if (!(ctxt->d & Mov))
5362 fetch_possible_mmx_operand(ctxt, &ctxt->dst);
5365 if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
5366 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5367 X86_ICPT_PRE_EXCEPT);
5368 if (rc != X86EMUL_CONTINUE)
5372 /* Instruction can only be executed in protected mode */
5373 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
5374 rc = emulate_ud(ctxt);
5378 /* Privileged instruction can be executed only in CPL=0 */
5379 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5380 if (ctxt->d & PrivUD)
5381 rc = emulate_ud(ctxt);
5383 rc = emulate_gp(ctxt, 0);
5387 /* Do instruction specific permission checks */
5388 if (ctxt->d & CheckPerm) {
5389 rc = ctxt->check_perm(ctxt);
5390 if (rc != X86EMUL_CONTINUE)
5394 if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5395 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5396 X86_ICPT_POST_EXCEPT);
5397 if (rc != X86EMUL_CONTINUE)
5401 if (ctxt->rep_prefix && (ctxt->d & String)) {
5402 /* All REP prefixes have the same first termination condition */
5403 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5404 string_registers_quirk(ctxt);
5405 ctxt->eip = ctxt->_eip;
5406 ctxt->eflags &= ~X86_EFLAGS_RF;
5412 if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
5413 rc = segmented_read(ctxt, ctxt->src.addr.mem,
5414 ctxt->src.valptr, ctxt->src.bytes);
5415 if (rc != X86EMUL_CONTINUE)
5417 ctxt->src.orig_val64 = ctxt->src.val64;
5420 if (ctxt->src2.type == OP_MEM) {
5421 rc = segmented_read(ctxt, ctxt->src2.addr.mem,
5422 &ctxt->src2.val, ctxt->src2.bytes);
5423 if (rc != X86EMUL_CONTINUE)
5427 if ((ctxt->d & DstMask) == ImplicitOps)
5431 if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5432 /* optimisation - avoid slow emulated read if Mov */
5433 rc = segmented_read(ctxt, ctxt->dst.addr.mem,
5434 &ctxt->dst.val, ctxt->dst.bytes);
5435 if (rc != X86EMUL_CONTINUE) {
5436 if (!(ctxt->d & NoWrite) &&
5437 rc == X86EMUL_PROPAGATE_FAULT &&
5438 ctxt->exception.vector == PF_VECTOR)
5439 ctxt->exception.error_code |= PFERR_WRITE_MASK;
5443 /* Copy full 64-bit value for CMPXCHG8B. */
5444 ctxt->dst.orig_val64 = ctxt->dst.val64;
5448 if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5449 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5450 X86_ICPT_POST_MEMACCESS);
5451 if (rc != X86EMUL_CONTINUE)
5455 if (ctxt->rep_prefix && (ctxt->d & String))
5456 ctxt->eflags |= X86_EFLAGS_RF;
5458 ctxt->eflags &= ~X86_EFLAGS_RF;
5460 if (ctxt->execute) {
5461 if (ctxt->d & Fastop) {
5462 void (*fop)(struct fastop *) = (void *)ctxt->execute;
5463 rc = fastop(ctxt, fop);
5464 if (rc != X86EMUL_CONTINUE)
5468 rc = ctxt->execute(ctxt);
5469 if (rc != X86EMUL_CONTINUE)
5474 if (ctxt->opcode_len == 2)
5476 else if (ctxt->opcode_len == 3)
5477 goto threebyte_insn;
5480 case 0x70 ... 0x7f: /* jcc (short) */
5481 if (test_cc(ctxt->b, ctxt->eflags))
5482 rc = jmp_rel(ctxt, ctxt->src.val);
5484 case 0x8d: /* lea r16/r32, m */
5485 ctxt->dst.val = ctxt->src.addr.mem.ea;
5487 case 0x90 ... 0x97: /* nop / xchg reg, rax */
5488 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5489 ctxt->dst.type = OP_NONE;
5493 case 0x98: /* cbw/cwde/cdqe */
5494 switch (ctxt->op_bytes) {
5495 case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
5496 case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
5497 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5500 case 0xcc: /* int3 */
5501 rc = emulate_int(ctxt, 3);
5503 case 0xcd: /* int n */
5504 rc = emulate_int(ctxt, ctxt->src.val);
5506 case 0xce: /* into */
5507 if (ctxt->eflags & X86_EFLAGS_OF)
5508 rc = emulate_int(ctxt, 4);
5510 case 0xe9: /* jmp rel */
5511 case 0xeb: /* jmp rel short */
5512 rc = jmp_rel(ctxt, ctxt->src.val);
5513 ctxt->dst.type = OP_NONE; /* Disable writeback. */
5515 case 0xf4: /* hlt */
5516 ctxt->ops->halt(ctxt);
5518 case 0xf5: /* cmc */
5519 /* complement carry flag from eflags reg */
5520 ctxt->eflags ^= X86_EFLAGS_CF;
5522 case 0xf8: /* clc */
5523 ctxt->eflags &= ~X86_EFLAGS_CF;
5525 case 0xf9: /* stc */
5526 ctxt->eflags |= X86_EFLAGS_CF;
5528 case 0xfc: /* cld */
5529 ctxt->eflags &= ~X86_EFLAGS_DF;
5531 case 0xfd: /* std */
5532 ctxt->eflags |= X86_EFLAGS_DF;
5535 goto cannot_emulate;
5538 if (rc != X86EMUL_CONTINUE)
5542 if (ctxt->d & SrcWrite) {
5543 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
5544 rc = writeback(ctxt, &ctxt->src);
5545 if (rc != X86EMUL_CONTINUE)
5548 if (!(ctxt->d & NoWrite)) {
5549 rc = writeback(ctxt, &ctxt->dst);
5550 if (rc != X86EMUL_CONTINUE)
5555 * restore dst type in case the decoding will be reused
5556 * (happens for string instruction )
5558 ctxt->dst.type = saved_dst_type;
5560 if ((ctxt->d & SrcMask) == SrcSI)
5561 string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5563 if ((ctxt->d & DstMask) == DstDI)
5564 string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5566 if (ctxt->rep_prefix && (ctxt->d & String)) {
5568 struct read_cache *r = &ctxt->io_read;
5569 if ((ctxt->d & SrcMask) == SrcSI)
5570 count = ctxt->src.count;
5572 count = ctxt->dst.count;
5573 register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5575 if (!string_insn_completed(ctxt)) {
5577 * Re-enter guest when pio read ahead buffer is empty
5578 * or, if it is not used, after each 1024 iteration.
5580 if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5581 (r->end == 0 || r->end != r->pos)) {
5583 * Reset read cache. Usually happens before
5584 * decode, but since instruction is restarted
5585 * we have to do it here.
5587 ctxt->mem_read.end = 0;
5588 writeback_registers(ctxt);
5589 return EMULATION_RESTART;
5591 goto done; /* skip rip writeback */
5593 ctxt->eflags &= ~X86_EFLAGS_RF;
5596 ctxt->eip = ctxt->_eip;
5599 if (rc == X86EMUL_PROPAGATE_FAULT) {
5600 WARN_ON(ctxt->exception.vector > 0x1f);
5601 ctxt->have_exception = true;
5603 if (rc == X86EMUL_INTERCEPTED)
5604 return EMULATION_INTERCEPTED;
5606 if (rc == X86EMUL_CONTINUE)
5607 writeback_registers(ctxt);
5609 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
5613 case 0x09: /* wbinvd */
5614 (ctxt->ops->wbinvd)(ctxt);
5616 case 0x08: /* invd */
5617 case 0x0d: /* GrpP (prefetch) */
5618 case 0x18: /* Grp16 (prefetch/nop) */
5619 case 0x1f: /* nop */
5621 case 0x20: /* mov cr, reg */
5622 ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5624 case 0x21: /* mov from dr to reg */
5625 ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
5627 case 0x40 ... 0x4f: /* cmov */
5628 if (test_cc(ctxt->b, ctxt->eflags))
5629 ctxt->dst.val = ctxt->src.val;
5630 else if (ctxt->op_bytes != 4)
5631 ctxt->dst.type = OP_NONE; /* no writeback */
5633 case 0x80 ... 0x8f: /* jnz rel, etc*/
5634 if (test_cc(ctxt->b, ctxt->eflags))
5635 rc = jmp_rel(ctxt, ctxt->src.val);
5637 case 0x90 ... 0x9f: /* setcc r/m8 */
5638 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5640 case 0xb6 ... 0xb7: /* movzx */
5641 ctxt->dst.bytes = ctxt->op_bytes;
5642 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5643 : (u16) ctxt->src.val;
5645 case 0xbe ... 0xbf: /* movsx */
5646 ctxt->dst.bytes = ctxt->op_bytes;
5647 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5648 (s16) ctxt->src.val;
5651 goto cannot_emulate;
5656 if (rc != X86EMUL_CONTINUE)
5662 return EMULATION_FAILED;
5665 void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
5667 invalidate_registers(ctxt);
5670 void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
5672 writeback_registers(ctxt);