1 // SPDX-License-Identifier: GPL-2.0-only
2 /******************************************************************************
5 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
7 * Copyright (c) 2005 Keir Fraser
9 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
10 * privileged instructions:
12 * Copyright (C) 2006 Qumranet
13 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
18 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 #include <linux/kvm_host.h>
23 #include "kvm_cache_regs.h"
24 #include "kvm_emulate.h"
25 #include <linux/stringify.h>
26 #include <asm/debugreg.h>
27 #include <asm/nospec-branch.h>
39 #define OpImplicit 1ull /* No generic decode */
40 #define OpReg 2ull /* Register */
41 #define OpMem 3ull /* Memory */
42 #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
43 #define OpDI 5ull /* ES:DI/EDI/RDI */
44 #define OpMem64 6ull /* Memory, 64-bit */
45 #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
46 #define OpDX 8ull /* DX register */
47 #define OpCL 9ull /* CL register (for shifts) */
48 #define OpImmByte 10ull /* 8-bit sign extended immediate */
49 #define OpOne 11ull /* Implied 1 */
50 #define OpImm 12ull /* Sign extended up to 32-bit immediate */
51 #define OpMem16 13ull /* Memory operand (16-bit). */
52 #define OpMem32 14ull /* Memory operand (32-bit). */
53 #define OpImmU 15ull /* Immediate operand, zero extended */
54 #define OpSI 16ull /* SI/ESI/RSI */
55 #define OpImmFAddr 17ull /* Immediate far address */
56 #define OpMemFAddr 18ull /* Far address in memory */
57 #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
58 #define OpES 20ull /* ES */
59 #define OpCS 21ull /* CS */
60 #define OpSS 22ull /* SS */
61 #define OpDS 23ull /* DS */
62 #define OpFS 24ull /* FS */
63 #define OpGS 25ull /* GS */
64 #define OpMem8 26ull /* 8-bit zero extended memory operand */
65 #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
66 #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
67 #define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
68 #define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
70 #define OpBits 5 /* Width of operand field */
71 #define OpMask ((1ull << OpBits) - 1)
74 * Opcode effective-address decode tables.
75 * Note that we only emulate instructions that have at least one memory
76 * operand (excluding implicit stack references). We assume that stack
77 * references and instruction fetches will never occur in special memory
78 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
82 /* Operand sizes: 8-bit operands or specified/overridden size. */
83 #define ByteOp (1<<0) /* 8-bit operands. */
84 /* Destination operand type. */
86 #define ImplicitOps (OpImplicit << DstShift)
87 #define DstReg (OpReg << DstShift)
88 #define DstMem (OpMem << DstShift)
89 #define DstAcc (OpAcc << DstShift)
90 #define DstDI (OpDI << DstShift)
91 #define DstMem64 (OpMem64 << DstShift)
92 #define DstMem16 (OpMem16 << DstShift)
93 #define DstImmUByte (OpImmUByte << DstShift)
94 #define DstDX (OpDX << DstShift)
95 #define DstAccLo (OpAccLo << DstShift)
96 #define DstMask (OpMask << DstShift)
97 /* Source operand type. */
99 #define SrcNone (OpNone << SrcShift)
100 #define SrcReg (OpReg << SrcShift)
101 #define SrcMem (OpMem << SrcShift)
102 #define SrcMem16 (OpMem16 << SrcShift)
103 #define SrcMem32 (OpMem32 << SrcShift)
104 #define SrcImm (OpImm << SrcShift)
105 #define SrcImmByte (OpImmByte << SrcShift)
106 #define SrcOne (OpOne << SrcShift)
107 #define SrcImmUByte (OpImmUByte << SrcShift)
108 #define SrcImmU (OpImmU << SrcShift)
109 #define SrcSI (OpSI << SrcShift)
110 #define SrcXLat (OpXLat << SrcShift)
111 #define SrcImmFAddr (OpImmFAddr << SrcShift)
112 #define SrcMemFAddr (OpMemFAddr << SrcShift)
113 #define SrcAcc (OpAcc << SrcShift)
114 #define SrcImmU16 (OpImmU16 << SrcShift)
115 #define SrcImm64 (OpImm64 << SrcShift)
116 #define SrcDX (OpDX << SrcShift)
117 #define SrcMem8 (OpMem8 << SrcShift)
118 #define SrcAccHi (OpAccHi << SrcShift)
119 #define SrcMask (OpMask << SrcShift)
120 #define BitOp (1<<11)
121 #define MemAbs (1<<12) /* Memory operand is absolute displacement */
122 #define String (1<<13) /* String instruction (rep capable) */
123 #define Stack (1<<14) /* Stack instruction (push/pop) */
124 #define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */
125 #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
126 #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
127 #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
128 #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
129 #define Escape (5<<15) /* Escape to coprocessor instruction */
130 #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
131 #define ModeDual (7<<15) /* Different instruction for 32/64 bit */
132 #define Sse (1<<18) /* SSE Vector instruction */
133 /* Generic ModRM decode. */
134 #define ModRM (1<<19)
135 /* Destination is only written; never read. */
138 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
139 #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
140 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
141 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
142 #define Undefined (1<<25) /* No Such Instruction */
143 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
144 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
146 #define PageTable (1 << 29) /* instruction used to write page table */
147 #define NotImpl (1 << 30) /* instruction is not implemented */
148 /* Source 2 operand type */
149 #define Src2Shift (31)
150 #define Src2None (OpNone << Src2Shift)
151 #define Src2Mem (OpMem << Src2Shift)
152 #define Src2CL (OpCL << Src2Shift)
153 #define Src2ImmByte (OpImmByte << Src2Shift)
154 #define Src2One (OpOne << Src2Shift)
155 #define Src2Imm (OpImm << Src2Shift)
156 #define Src2ES (OpES << Src2Shift)
157 #define Src2CS (OpCS << Src2Shift)
158 #define Src2SS (OpSS << Src2Shift)
159 #define Src2DS (OpDS << Src2Shift)
160 #define Src2FS (OpFS << Src2Shift)
161 #define Src2GS (OpGS << Src2Shift)
162 #define Src2Mask (OpMask << Src2Shift)
163 #define Mmx ((u64)1 << 40) /* MMX Vector instruction */
164 #define AlignMask ((u64)7 << 41)
165 #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
166 #define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
167 #define Avx ((u64)3 << 41) /* Advanced Vector Extensions */
168 #define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
169 #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
170 #define NoWrite ((u64)1 << 45) /* No writeback */
171 #define SrcWrite ((u64)1 << 46) /* Write back src operand */
172 #define NoMod ((u64)1 << 47) /* Mod field is ignored */
173 #define Intercept ((u64)1 << 48) /* Has valid intercept field */
174 #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
175 #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
176 #define NearBranch ((u64)1 << 52) /* Near branches */
177 #define No16 ((u64)1 << 53) /* No 16 bit operand */
178 #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
179 #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
180 #define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */
182 #define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
184 #define X2(x...) x, x
185 #define X3(x...) X2(x), x
186 #define X4(x...) X2(x), X2(x)
187 #define X5(x...) X4(x), x
188 #define X6(x...) X4(x), X2(x)
189 #define X7(x...) X4(x), X3(x)
190 #define X8(x...) X4(x), X4(x)
191 #define X16(x...) X8(x), X8(x)
198 int (*execute)(struct x86_emulate_ctxt *ctxt);
199 const struct opcode *group;
200 const struct group_dual *gdual;
201 const struct gprefix *gprefix;
202 const struct escape *esc;
203 const struct instr_dual *idual;
204 const struct mode_dual *mdual;
205 void (*fastop)(struct fastop *fake);
207 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
211 struct opcode mod012[8];
212 struct opcode mod3[8];
216 struct opcode pfx_no;
217 struct opcode pfx_66;
218 struct opcode pfx_f2;
219 struct opcode pfx_f3;
224 struct opcode high[64];
228 struct opcode mod012;
233 struct opcode mode32;
234 struct opcode mode64;
237 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
239 enum x86_transfer_type {
241 X86_TRANSFER_CALL_JMP,
243 X86_TRANSFER_TASK_SWITCH,
246 static void writeback_registers(struct x86_emulate_ctxt *ctxt)
248 unsigned long dirty = ctxt->regs_dirty;
251 for_each_set_bit(reg, &dirty, NR_EMULATOR_GPRS)
252 ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
255 static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
257 ctxt->regs_dirty = 0;
258 ctxt->regs_valid = 0;
262 * These EFLAGS bits are restored from saved value during emulation, and
263 * any changes are written back to the saved value after emulation.
265 #define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
266 X86_EFLAGS_PF|X86_EFLAGS_CF)
275 * fastop functions have a special calling convention:
280 * flags: rflags (in/out)
281 * ex: rsi (in:fastop pointer, out:zero if exception)
283 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
284 * different operand sizes can be reached by calculation, rather than a jump
285 * table (which would be bigger than the code).
287 * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR
288 * and 1 for the straight line speculation INT3, leaves 7 bytes for the
289 * body of the function. Currently none is larger than 4.
291 static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
293 #define FASTOP_SIZE 16
295 #define __FOP_FUNC(name) \
296 ".align " __stringify(FASTOP_SIZE) " \n\t" \
297 ".type " name ", @function \n\t" \
302 #define FOP_FUNC(name) \
305 #define __FOP_RET(name) \
307 ".size " name ", .-" name "\n\t"
309 #define FOP_RET(name) \
312 #define __FOP_START(op, align) \
313 extern void em_##op(struct fastop *fake); \
314 asm(".pushsection .text, \"ax\" \n\t" \
315 ".global em_" #op " \n\t" \
316 ".align " __stringify(align) " \n\t" \
319 #define FOP_START(op) __FOP_START(op, FASTOP_SIZE)
324 #define __FOPNOP(name) \
329 __FOPNOP(__stringify(__UNIQUE_ID(nop)))
331 #define FOP1E(op, dst) \
332 __FOP_FUNC(#op "_" #dst) \
333 "10: " #op " %" #dst " \n\t" \
334 __FOP_RET(#op "_" #dst)
336 #define FOP1EEX(op, dst) \
337 FOP1E(op, dst) _ASM_EXTABLE_TYPE_REG(10b, 11b, EX_TYPE_ZERO_REG, %%esi)
339 #define FASTOP1(op) \
344 ON64(FOP1E(op##q, rax)) \
347 /* 1-operand, using src2 (for MUL/DIV r/m) */
348 #define FASTOP1SRC2(op, name) \
353 ON64(FOP1E(op, rcx)) \
356 /* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
357 #define FASTOP1SRC2EX(op, name) \
362 ON64(FOP1EEX(op, rcx)) \
365 #define FOP2E(op, dst, src) \
366 __FOP_FUNC(#op "_" #dst "_" #src) \
367 #op " %" #src ", %" #dst " \n\t" \
368 __FOP_RET(#op "_" #dst "_" #src)
370 #define FASTOP2(op) \
372 FOP2E(op##b, al, dl) \
373 FOP2E(op##w, ax, dx) \
374 FOP2E(op##l, eax, edx) \
375 ON64(FOP2E(op##q, rax, rdx)) \
378 /* 2 operand, word only */
379 #define FASTOP2W(op) \
382 FOP2E(op##w, ax, dx) \
383 FOP2E(op##l, eax, edx) \
384 ON64(FOP2E(op##q, rax, rdx)) \
387 /* 2 operand, src is CL */
388 #define FASTOP2CL(op) \
390 FOP2E(op##b, al, cl) \
391 FOP2E(op##w, ax, cl) \
392 FOP2E(op##l, eax, cl) \
393 ON64(FOP2E(op##q, rax, cl)) \
396 /* 2 operand, src and dest are reversed */
397 #define FASTOP2R(op, name) \
399 FOP2E(op##b, dl, al) \
400 FOP2E(op##w, dx, ax) \
401 FOP2E(op##l, edx, eax) \
402 ON64(FOP2E(op##q, rdx, rax)) \
405 #define FOP3E(op, dst, src, src2) \
406 __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
407 #op " %" #src2 ", %" #src ", %" #dst " \n\t"\
408 __FOP_RET(#op "_" #dst "_" #src "_" #src2)
410 /* 3-operand, word-only, src2=cl */
411 #define FASTOP3WCL(op) \
414 FOP3E(op##w, ax, dx, cl) \
415 FOP3E(op##l, eax, edx, cl) \
416 ON64(FOP3E(op##q, rax, rdx, cl)) \
419 /* Special case for SETcc - 1 instruction per cc */
420 #define FOP_SETCC(op) \
446 "pushf; sbb %al, %al; popf \n\t"
451 * XXX: inoutclob user must know where the argument is being expanded.
452 * Using asm goto would allow us to remove _fault.
454 #define asm_safe(insn, inoutclob...) \
458 asm volatile("1:" insn "\n" \
460 _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %[_fault]) \
461 : [_fault] "+r"(_fault) inoutclob ); \
463 _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
466 static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
467 enum x86_intercept intercept,
468 enum x86_intercept_stage stage)
470 struct x86_instruction_info info = {
471 .intercept = intercept,
472 .rep_prefix = ctxt->rep_prefix,
473 .modrm_mod = ctxt->modrm_mod,
474 .modrm_reg = ctxt->modrm_reg,
475 .modrm_rm = ctxt->modrm_rm,
476 .src_val = ctxt->src.val64,
477 .dst_val = ctxt->dst.val64,
478 .src_bytes = ctxt->src.bytes,
479 .dst_bytes = ctxt->dst.bytes,
480 .ad_bytes = ctxt->ad_bytes,
481 .next_rip = ctxt->eip,
484 return ctxt->ops->intercept(ctxt, &info, stage);
487 static void assign_masked(ulong *dest, ulong src, ulong mask)
489 *dest = (*dest & ~mask) | (src & mask);
492 static void assign_register(unsigned long *reg, u64 val, int bytes)
494 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
497 *(u8 *)reg = (u8)val;
500 *(u16 *)reg = (u16)val;
504 break; /* 64b: zero-extend */
511 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
513 return (1UL << (ctxt->ad_bytes << 3)) - 1;
516 static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
519 struct desc_struct ss;
521 if (ctxt->mode == X86EMUL_MODE_PROT64)
523 ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
524 return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
527 static int stack_size(struct x86_emulate_ctxt *ctxt)
529 return (__fls(stack_mask(ctxt)) + 1) >> 3;
532 /* Access/update address held in a register, based on addressing mode. */
533 static inline unsigned long
534 address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
536 if (ctxt->ad_bytes == sizeof(unsigned long))
539 return reg & ad_mask(ctxt);
542 static inline unsigned long
543 register_address(struct x86_emulate_ctxt *ctxt, int reg)
545 return address_mask(ctxt, reg_read(ctxt, reg));
548 static void masked_increment(ulong *reg, ulong mask, int inc)
550 assign_masked(reg, *reg + inc, mask);
554 register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
556 ulong *preg = reg_rmw(ctxt, reg);
558 assign_register(preg, *preg + inc, ctxt->ad_bytes);
561 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
563 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
566 static u32 desc_limit_scaled(struct desc_struct *desc)
568 u32 limit = get_desc_limit(desc);
570 return desc->g ? (limit << 12) | 0xfff : limit;
573 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
575 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
578 return ctxt->ops->get_cached_segment_base(ctxt, seg);
581 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
582 u32 error, bool valid)
584 if (KVM_EMULATOR_BUG_ON(vec > 0x1f, ctxt))
585 return X86EMUL_UNHANDLEABLE;
587 ctxt->exception.vector = vec;
588 ctxt->exception.error_code = error;
589 ctxt->exception.error_code_valid = valid;
590 return X86EMUL_PROPAGATE_FAULT;
593 static int emulate_db(struct x86_emulate_ctxt *ctxt)
595 return emulate_exception(ctxt, DB_VECTOR, 0, false);
598 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
600 return emulate_exception(ctxt, GP_VECTOR, err, true);
603 static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
605 return emulate_exception(ctxt, SS_VECTOR, err, true);
608 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
610 return emulate_exception(ctxt, UD_VECTOR, 0, false);
613 static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
615 return emulate_exception(ctxt, TS_VECTOR, err, true);
618 static int emulate_de(struct x86_emulate_ctxt *ctxt)
620 return emulate_exception(ctxt, DE_VECTOR, 0, false);
623 static int emulate_nm(struct x86_emulate_ctxt *ctxt)
625 return emulate_exception(ctxt, NM_VECTOR, 0, false);
628 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
631 struct desc_struct desc;
633 ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
637 static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
642 struct desc_struct desc;
644 ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
645 ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
648 static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
650 return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
653 static inline bool emul_is_noncanonical_address(u64 la,
654 struct x86_emulate_ctxt *ctxt)
656 return !__is_canonical_address(la, ctxt_virt_addr_bits(ctxt));
660 * x86 defines three classes of vector instructions: explicitly
661 * aligned, explicitly unaligned, and the rest, which change behaviour
662 * depending on whether they're AVX encoded or not.
664 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
665 * subject to the same check. FXSAVE and FXRSTOR are checked here too as their
666 * 512 bytes of data must be aligned to a 16 byte boundary.
668 static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
670 u64 alignment = ctxt->d & AlignMask;
672 if (likely(size < 16))
687 static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
688 struct segmented_address addr,
689 unsigned *max_size, unsigned size,
690 enum x86emul_mode mode, ulong *linear,
693 struct desc_struct desc;
700 la = seg_base(ctxt, addr.seg) + addr.ea;
703 case X86EMUL_MODE_PROT64:
704 *linear = la = ctxt->ops->get_untagged_addr(ctxt, la, flags);
705 va_bits = ctxt_virt_addr_bits(ctxt);
706 if (!__is_canonical_address(la, va_bits))
709 *max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
710 if (size > *max_size)
714 *linear = la = (u32)la;
715 usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
719 /* code segment in protected mode or read-only data segment */
720 if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8)) || !(desc.type & 2)) &&
721 (flags & X86EMUL_F_WRITE))
723 /* unreadable code segment */
724 if (!(flags & X86EMUL_F_FETCH) && (desc.type & 8) && !(desc.type & 2))
726 lim = desc_limit_scaled(&desc);
727 if (!(desc.type & 8) && (desc.type & 4)) {
728 /* expand-down segment */
731 lim = desc.d ? 0xffffffff : 0xffff;
735 if (lim == 0xffffffff)
738 *max_size = (u64)lim + 1 - addr.ea;
739 if (size > *max_size)
744 if (la & (insn_alignment(ctxt, size) - 1))
745 return emulate_gp(ctxt, 0);
746 return X86EMUL_CONTINUE;
748 if (addr.seg == VCPU_SREG_SS)
749 return emulate_ss(ctxt, 0);
751 return emulate_gp(ctxt, 0);
754 static int linearize(struct x86_emulate_ctxt *ctxt,
755 struct segmented_address addr,
756 unsigned size, bool write,
760 return __linearize(ctxt, addr, &max_size, size, ctxt->mode, linear,
761 write ? X86EMUL_F_WRITE : 0);
764 static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
769 struct segmented_address addr = { .seg = VCPU_SREG_CS,
772 if (ctxt->op_bytes != sizeof(unsigned long))
773 addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
774 rc = __linearize(ctxt, addr, &max_size, 1, ctxt->mode, &linear,
776 if (rc == X86EMUL_CONTINUE)
777 ctxt->_eip = addr.ea;
781 static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt)
784 struct desc_struct cs;
788 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
790 if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
791 /* Real mode. cpu must not have long mode active */
793 return X86EMUL_UNHANDLEABLE;
794 ctxt->mode = X86EMUL_MODE_REAL;
795 return X86EMUL_CONTINUE;
798 if (ctxt->eflags & X86_EFLAGS_VM) {
799 /* Protected/VM86 mode. cpu must not have long mode active */
801 return X86EMUL_UNHANDLEABLE;
802 ctxt->mode = X86EMUL_MODE_VM86;
803 return X86EMUL_CONTINUE;
806 if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS))
807 return X86EMUL_UNHANDLEABLE;
809 if (efer & EFER_LMA) {
811 /* Proper long mode */
812 ctxt->mode = X86EMUL_MODE_PROT64;
814 /* 32 bit compatibility mode*/
815 ctxt->mode = X86EMUL_MODE_PROT32;
817 ctxt->mode = X86EMUL_MODE_PROT16;
820 /* Legacy 32 bit / 16 bit mode */
821 ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
824 return X86EMUL_CONTINUE;
827 static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
829 return assign_eip(ctxt, dst);
832 static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst)
834 int rc = emulator_recalc_and_set_mode(ctxt);
836 if (rc != X86EMUL_CONTINUE)
839 return assign_eip(ctxt, dst);
842 static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
844 return assign_eip_near(ctxt, ctxt->_eip + rel);
847 static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
848 void *data, unsigned size)
850 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true);
853 static int linear_write_system(struct x86_emulate_ctxt *ctxt,
854 ulong linear, void *data,
857 return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true);
860 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
861 struct segmented_address addr,
868 rc = linearize(ctxt, addr, size, false, &linear);
869 if (rc != X86EMUL_CONTINUE)
871 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false);
874 static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
875 struct segmented_address addr,
882 rc = linearize(ctxt, addr, size, true, &linear);
883 if (rc != X86EMUL_CONTINUE)
885 return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false);
889 * Prefetch the remaining bytes of the instruction without crossing page
890 * boundary if they are not in fetch_cache yet.
892 static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
895 unsigned size, max_size;
896 unsigned long linear;
897 int cur_size = ctxt->fetch.end - ctxt->fetch.data;
898 struct segmented_address addr = { .seg = VCPU_SREG_CS,
899 .ea = ctxt->eip + cur_size };
902 * We do not know exactly how many bytes will be needed, and
903 * __linearize is expensive, so fetch as much as possible. We
904 * just have to avoid going beyond the 15 byte limit, the end
905 * of the segment, or the end of the page.
907 * __linearize is called with size 0 so that it does not do any
908 * boundary check itself. Instead, we use max_size to check
911 rc = __linearize(ctxt, addr, &max_size, 0, ctxt->mode, &linear,
913 if (unlikely(rc != X86EMUL_CONTINUE))
916 size = min_t(unsigned, 15UL ^ cur_size, max_size);
917 size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
920 * One instruction can only straddle two pages,
921 * and one has been loaded at the beginning of
922 * x86_decode_insn. So, if not enough bytes
923 * still, we must have hit the 15-byte boundary.
925 if (unlikely(size < op_size))
926 return emulate_gp(ctxt, 0);
928 rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
929 size, &ctxt->exception);
930 if (unlikely(rc != X86EMUL_CONTINUE))
932 ctxt->fetch.end += size;
933 return X86EMUL_CONTINUE;
936 static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
939 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
941 if (unlikely(done_size < size))
942 return __do_insn_fetch_bytes(ctxt, size - done_size);
944 return X86EMUL_CONTINUE;
947 /* Fetch next part of the instruction being emulated. */
948 #define insn_fetch(_type, _ctxt) \
951 rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
952 if (rc != X86EMUL_CONTINUE) \
954 ctxt->_eip += sizeof(_type); \
955 memcpy(&_x, ctxt->fetch.ptr, sizeof(_type)); \
956 ctxt->fetch.ptr += sizeof(_type); \
960 #define insn_fetch_arr(_arr, _size, _ctxt) \
962 rc = do_insn_fetch_bytes(_ctxt, _size); \
963 if (rc != X86EMUL_CONTINUE) \
965 ctxt->_eip += (_size); \
966 memcpy(_arr, ctxt->fetch.ptr, _size); \
967 ctxt->fetch.ptr += (_size); \
971 * Given the 'reg' portion of a ModRM byte, and a register block, return a
972 * pointer into the block that addresses the relevant register.
973 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
975 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
979 int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
981 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
982 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
984 p = reg_rmw(ctxt, modrm_reg);
988 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
989 struct segmented_address addr,
990 u16 *size, unsigned long *address, int op_bytes)
997 rc = segmented_read_std(ctxt, addr, size, 2);
998 if (rc != X86EMUL_CONTINUE)
1001 rc = segmented_read_std(ctxt, addr, address, op_bytes);
1015 FASTOP1SRC2(mul, mul_ex);
1016 FASTOP1SRC2(imul, imul_ex);
1017 FASTOP1SRC2EX(div, div_ex);
1018 FASTOP1SRC2EX(idiv, idiv_ex);
1047 FASTOP2R(cmp, cmp_r);
1049 static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
1051 /* If src is zero, do not writeback, but update flags */
1052 if (ctxt->src.val == 0)
1053 ctxt->dst.type = OP_NONE;
1054 return fastop(ctxt, em_bsf);
1057 static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
1059 /* If src is zero, do not writeback, but update flags */
1060 if (ctxt->src.val == 0)
1061 ctxt->dst.type = OP_NONE;
1062 return fastop(ctxt, em_bsr);
1065 static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
1068 void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf);
1070 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1071 asm("push %[flags]; popf; " CALL_NOSPEC
1072 : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
1076 static void fetch_register_operand(struct operand *op)
1078 switch (op->bytes) {
1080 op->val = *(u8 *)op->addr.reg;
1083 op->val = *(u16 *)op->addr.reg;
1086 op->val = *(u32 *)op->addr.reg;
1089 op->val = *(u64 *)op->addr.reg;
1094 static int em_fninit(struct x86_emulate_ctxt *ctxt)
1096 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1097 return emulate_nm(ctxt);
1100 asm volatile("fninit");
1102 return X86EMUL_CONTINUE;
1105 static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1109 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1110 return emulate_nm(ctxt);
1113 asm volatile("fnstcw %0": "+m"(fcw));
1116 ctxt->dst.val = fcw;
1118 return X86EMUL_CONTINUE;
1121 static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1125 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1126 return emulate_nm(ctxt);
1129 asm volatile("fnstsw %0": "+m"(fsw));
1132 ctxt->dst.val = fsw;
1134 return X86EMUL_CONTINUE;
1137 static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1142 if (ctxt->d & ModRM)
1143 reg = ctxt->modrm_reg;
1145 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
1147 if (ctxt->d & Sse) {
1151 kvm_read_sse_reg(reg, &op->vec_val);
1154 if (ctxt->d & Mmx) {
1163 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1164 op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1166 fetch_register_operand(op);
1167 op->orig_val = op->val;
1170 static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1172 if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1173 ctxt->modrm_seg = VCPU_SREG_SS;
1176 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1180 int index_reg, base_reg, scale;
1181 int rc = X86EMUL_CONTINUE;
1184 ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
1185 index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
1186 base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1188 ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1189 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1190 ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1191 ctxt->modrm_seg = VCPU_SREG_DS;
1193 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1195 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1196 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1198 if (ctxt->d & Sse) {
1201 op->addr.xmm = ctxt->modrm_rm;
1202 kvm_read_sse_reg(ctxt->modrm_rm, &op->vec_val);
1205 if (ctxt->d & Mmx) {
1208 op->addr.mm = ctxt->modrm_rm & 7;
1211 fetch_register_operand(op);
1217 if (ctxt->ad_bytes == 2) {
1218 unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1219 unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1220 unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1221 unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1223 /* 16-bit ModR/M decode. */
1224 switch (ctxt->modrm_mod) {
1226 if (ctxt->modrm_rm == 6)
1227 modrm_ea += insn_fetch(u16, ctxt);
1230 modrm_ea += insn_fetch(s8, ctxt);
1233 modrm_ea += insn_fetch(u16, ctxt);
1236 switch (ctxt->modrm_rm) {
1238 modrm_ea += bx + si;
1241 modrm_ea += bx + di;
1244 modrm_ea += bp + si;
1247 modrm_ea += bp + di;
1256 if (ctxt->modrm_mod != 0)
1263 if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1264 (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1265 ctxt->modrm_seg = VCPU_SREG_SS;
1266 modrm_ea = (u16)modrm_ea;
1268 /* 32/64-bit ModR/M decode. */
1269 if ((ctxt->modrm_rm & 7) == 4) {
1270 sib = insn_fetch(u8, ctxt);
1271 index_reg |= (sib >> 3) & 7;
1272 base_reg |= sib & 7;
1275 if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1276 modrm_ea += insn_fetch(s32, ctxt);
1278 modrm_ea += reg_read(ctxt, base_reg);
1279 adjust_modrm_seg(ctxt, base_reg);
1280 /* Increment ESP on POP [ESP] */
1281 if ((ctxt->d & IncSP) &&
1282 base_reg == VCPU_REGS_RSP)
1283 modrm_ea += ctxt->op_bytes;
1286 modrm_ea += reg_read(ctxt, index_reg) << scale;
1287 } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1288 modrm_ea += insn_fetch(s32, ctxt);
1289 if (ctxt->mode == X86EMUL_MODE_PROT64)
1290 ctxt->rip_relative = 1;
1292 base_reg = ctxt->modrm_rm;
1293 modrm_ea += reg_read(ctxt, base_reg);
1294 adjust_modrm_seg(ctxt, base_reg);
1296 switch (ctxt->modrm_mod) {
1298 modrm_ea += insn_fetch(s8, ctxt);
1301 modrm_ea += insn_fetch(s32, ctxt);
1305 op->addr.mem.ea = modrm_ea;
1306 if (ctxt->ad_bytes != 8)
1307 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1313 static int decode_abs(struct x86_emulate_ctxt *ctxt,
1316 int rc = X86EMUL_CONTINUE;
1319 switch (ctxt->ad_bytes) {
1321 op->addr.mem.ea = insn_fetch(u16, ctxt);
1324 op->addr.mem.ea = insn_fetch(u32, ctxt);
1327 op->addr.mem.ea = insn_fetch(u64, ctxt);
1334 static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1338 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1339 mask = ~((long)ctxt->dst.bytes * 8 - 1);
1341 if (ctxt->src.bytes == 2)
1342 sv = (s16)ctxt->src.val & (s16)mask;
1343 else if (ctxt->src.bytes == 4)
1344 sv = (s32)ctxt->src.val & (s32)mask;
1346 sv = (s64)ctxt->src.val & (s64)mask;
1348 ctxt->dst.addr.mem.ea = address_mask(ctxt,
1349 ctxt->dst.addr.mem.ea + (sv >> 3));
1352 /* only subword offset */
1353 ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1356 static int read_emulated(struct x86_emulate_ctxt *ctxt,
1357 unsigned long addr, void *dest, unsigned size)
1360 struct read_cache *mc = &ctxt->mem_read;
1362 if (mc->pos < mc->end)
1365 if (KVM_EMULATOR_BUG_ON((mc->end + size) >= sizeof(mc->data), ctxt))
1366 return X86EMUL_UNHANDLEABLE;
1368 rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1370 if (rc != X86EMUL_CONTINUE)
1376 memcpy(dest, mc->data + mc->pos, size);
1378 return X86EMUL_CONTINUE;
1381 static int segmented_read(struct x86_emulate_ctxt *ctxt,
1382 struct segmented_address addr,
1389 rc = linearize(ctxt, addr, size, false, &linear);
1390 if (rc != X86EMUL_CONTINUE)
1392 return read_emulated(ctxt, linear, data, size);
1395 static int segmented_write(struct x86_emulate_ctxt *ctxt,
1396 struct segmented_address addr,
1403 rc = linearize(ctxt, addr, size, true, &linear);
1404 if (rc != X86EMUL_CONTINUE)
1406 return ctxt->ops->write_emulated(ctxt, linear, data, size,
1410 static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1411 struct segmented_address addr,
1412 const void *orig_data, const void *data,
1418 rc = linearize(ctxt, addr, size, true, &linear);
1419 if (rc != X86EMUL_CONTINUE)
1421 return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1422 size, &ctxt->exception);
1425 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1426 unsigned int size, unsigned short port,
1429 struct read_cache *rc = &ctxt->io_read;
1431 if (rc->pos == rc->end) { /* refill pio read ahead */
1432 unsigned int in_page, n;
1433 unsigned int count = ctxt->rep_prefix ?
1434 address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1435 in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1436 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1437 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1438 n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1441 rc->pos = rc->end = 0;
1442 if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1447 if (ctxt->rep_prefix && (ctxt->d & String) &&
1448 !(ctxt->eflags & X86_EFLAGS_DF)) {
1449 ctxt->dst.data = rc->data + rc->pos;
1450 ctxt->dst.type = OP_MEM_STR;
1451 ctxt->dst.count = (rc->end - rc->pos) / size;
1454 memcpy(dest, rc->data + rc->pos, size);
1460 static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1461 u16 index, struct desc_struct *desc)
1466 ctxt->ops->get_idt(ctxt, &dt);
1468 if (dt.size < index * 8 + 7)
1469 return emulate_gp(ctxt, index << 3 | 0x2);
1471 addr = dt.address + index * 8;
1472 return linear_read_system(ctxt, addr, desc, sizeof(*desc));
1475 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1476 u16 selector, struct desc_ptr *dt)
1478 const struct x86_emulate_ops *ops = ctxt->ops;
1481 if (selector & 1 << 2) {
1482 struct desc_struct desc;
1485 memset(dt, 0, sizeof(*dt));
1486 if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1490 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1491 dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1493 ops->get_gdt(ctxt, dt);
1496 static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1497 u16 selector, ulong *desc_addr_p)
1500 u16 index = selector >> 3;
1503 get_descriptor_table_ptr(ctxt, selector, &dt);
1505 if (dt.size < index * 8 + 7)
1506 return emulate_gp(ctxt, selector & 0xfffc);
1508 addr = dt.address + index * 8;
1510 #ifdef CONFIG_X86_64
1511 if (addr >> 32 != 0) {
1514 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1515 if (!(efer & EFER_LMA))
1520 *desc_addr_p = addr;
1521 return X86EMUL_CONTINUE;
1524 /* allowed just for 8 bytes segments */
1525 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1526 u16 selector, struct desc_struct *desc,
1531 rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1532 if (rc != X86EMUL_CONTINUE)
1535 return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc));
1538 /* allowed just for 8 bytes segments */
1539 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1540 u16 selector, struct desc_struct *desc)
1545 rc = get_descriptor_ptr(ctxt, selector, &addr);
1546 if (rc != X86EMUL_CONTINUE)
1549 return linear_write_system(ctxt, addr, desc, sizeof(*desc));
1552 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1553 u16 selector, int seg, u8 cpl,
1554 enum x86_transfer_type transfer,
1555 struct desc_struct *desc)
1557 struct desc_struct seg_desc, old_desc;
1559 unsigned err_vec = GP_VECTOR;
1561 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1567 memset(&seg_desc, 0, sizeof(seg_desc));
1569 if (ctxt->mode == X86EMUL_MODE_REAL) {
1570 /* set real mode segment descriptor (keep limit etc. for
1572 ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1573 set_desc_base(&seg_desc, selector << 4);
1575 } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1576 /* VM86 needs a clean new segment descriptor */
1577 set_desc_base(&seg_desc, selector << 4);
1578 set_desc_limit(&seg_desc, 0xffff);
1588 /* TR should be in GDT only */
1589 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1592 /* NULL selector is not valid for TR, CS and (except for long mode) SS */
1593 if (null_selector) {
1594 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
1597 if (seg == VCPU_SREG_SS) {
1598 if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
1602 * ctxt->ops->set_segment expects the CPL to be in
1603 * SS.DPL, so fake an expand-up 32-bit data segment.
1613 /* Skip all following checks */
1617 ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1618 if (ret != X86EMUL_CONTINUE)
1621 err_code = selector & 0xfffc;
1622 err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1625 /* can't load system descriptor into segment selector */
1626 if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1627 if (transfer == X86_TRANSFER_CALL_JMP)
1628 return X86EMUL_UNHANDLEABLE;
1637 * segment is not a writable data segment or segment
1638 * selector's RPL != CPL or DPL != CPL
1640 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1645 * KVM uses "none" when loading CS as part of emulating Real
1646 * Mode exceptions and IRET (handled above). In all other
1647 * cases, loading CS without a control transfer is a KVM bug.
1649 if (WARN_ON_ONCE(transfer == X86_TRANSFER_NONE))
1652 if (!(seg_desc.type & 8))
1655 if (transfer == X86_TRANSFER_RET) {
1656 /* RET can never return to an inner privilege level. */
1659 /* Outer-privilege level return is not implemented */
1661 return X86EMUL_UNHANDLEABLE;
1663 if (transfer == X86_TRANSFER_RET || transfer == X86_TRANSFER_TASK_SWITCH) {
1664 if (seg_desc.type & 4) {
1673 } else { /* X86_TRANSFER_CALL_JMP */
1674 if (seg_desc.type & 4) {
1680 if (rpl > cpl || dpl != cpl)
1684 /* in long-mode d/b must be clear if l is set */
1685 if (seg_desc.d && seg_desc.l) {
1688 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1689 if (efer & EFER_LMA)
1693 /* CS(RPL) <- CPL */
1694 selector = (selector & 0xfffc) | cpl;
1697 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1700 case VCPU_SREG_LDTR:
1701 if (seg_desc.s || seg_desc.type != 2)
1704 default: /* DS, ES, FS, or GS */
1706 * segment is not a data or readable code segment or
1707 * ((segment is a data or nonconforming code segment)
1708 * and ((RPL > DPL) or (CPL > DPL)))
1710 if ((seg_desc.type & 0xa) == 0x8 ||
1711 (((seg_desc.type & 0xc) != 0xc) &&
1712 (rpl > dpl || cpl > dpl)))
1718 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1723 /* mark segment as accessed */
1724 if (!(seg_desc.type & 1)) {
1726 ret = write_segment_descriptor(ctxt, selector,
1728 if (ret != X86EMUL_CONTINUE)
1731 } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1732 ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3));
1733 if (ret != X86EMUL_CONTINUE)
1735 if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
1736 ((u64)base3 << 32), ctxt))
1737 return emulate_gp(ctxt, err_code);
1740 if (seg == VCPU_SREG_TR) {
1741 old_desc = seg_desc;
1742 seg_desc.type |= 2; /* busy */
1743 ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1744 sizeof(seg_desc), &ctxt->exception);
1745 if (ret != X86EMUL_CONTINUE)
1749 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1752 return X86EMUL_CONTINUE;
1754 return emulate_exception(ctxt, err_vec, err_code, true);
1757 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1758 u16 selector, int seg)
1760 u8 cpl = ctxt->ops->cpl(ctxt);
1763 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
1764 * they can load it at CPL<3 (Intel's manual says only LSS can,
1767 * However, the Intel manual says that putting IST=1/DPL=3 in
1768 * an interrupt gate will result in SS=3 (the AMD manual instead
1769 * says it doesn't), so allow SS=3 in __load_segment_descriptor
1770 * and only forbid it here.
1772 if (seg == VCPU_SREG_SS && selector == 3 &&
1773 ctxt->mode == X86EMUL_MODE_PROT64)
1774 return emulate_exception(ctxt, GP_VECTOR, 0, true);
1776 return __load_segment_descriptor(ctxt, selector, seg, cpl,
1777 X86_TRANSFER_NONE, NULL);
1780 static void write_register_operand(struct operand *op)
1782 return assign_register(op->addr.reg, op->val, op->bytes);
1785 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1789 write_register_operand(op);
1792 if (ctxt->lock_prefix)
1793 return segmented_cmpxchg(ctxt,
1799 return segmented_write(ctxt,
1804 return segmented_write(ctxt,
1807 op->bytes * op->count);
1809 kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
1812 kvm_write_mmx_reg(op->addr.mm, &op->mm_val);
1820 return X86EMUL_CONTINUE;
1823 static int emulate_push(struct x86_emulate_ctxt *ctxt, const void *data, int len)
1825 struct segmented_address addr;
1827 rsp_increment(ctxt, -len);
1828 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1829 addr.seg = VCPU_SREG_SS;
1831 return segmented_write(ctxt, addr, data, len);
1834 static int em_push(struct x86_emulate_ctxt *ctxt)
1836 /* Disable writeback. */
1837 ctxt->dst.type = OP_NONE;
1838 return emulate_push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1841 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1842 void *dest, int len)
1845 struct segmented_address addr;
1847 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1848 addr.seg = VCPU_SREG_SS;
1849 rc = segmented_read(ctxt, addr, dest, len);
1850 if (rc != X86EMUL_CONTINUE)
1853 rsp_increment(ctxt, len);
1857 static int em_pop(struct x86_emulate_ctxt *ctxt)
1859 return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1862 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1863 void *dest, int len)
1866 unsigned long val = 0;
1867 unsigned long change_mask;
1868 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1869 int cpl = ctxt->ops->cpl(ctxt);
1871 rc = emulate_pop(ctxt, &val, len);
1872 if (rc != X86EMUL_CONTINUE)
1875 change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1876 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
1877 X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
1878 X86_EFLAGS_AC | X86_EFLAGS_ID;
1880 switch(ctxt->mode) {
1881 case X86EMUL_MODE_PROT64:
1882 case X86EMUL_MODE_PROT32:
1883 case X86EMUL_MODE_PROT16:
1885 change_mask |= X86_EFLAGS_IOPL;
1887 change_mask |= X86_EFLAGS_IF;
1889 case X86EMUL_MODE_VM86:
1891 return emulate_gp(ctxt, 0);
1892 change_mask |= X86_EFLAGS_IF;
1894 default: /* real mode */
1895 change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1899 *(unsigned long *)dest =
1900 (ctxt->eflags & ~change_mask) | (val & change_mask);
1905 static int em_popf(struct x86_emulate_ctxt *ctxt)
1907 ctxt->dst.type = OP_REG;
1908 ctxt->dst.addr.reg = &ctxt->eflags;
1909 ctxt->dst.bytes = ctxt->op_bytes;
1910 return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1913 static int em_enter(struct x86_emulate_ctxt *ctxt)
1916 unsigned frame_size = ctxt->src.val;
1917 unsigned nesting_level = ctxt->src2.val & 31;
1921 return X86EMUL_UNHANDLEABLE;
1923 rbp = reg_read(ctxt, VCPU_REGS_RBP);
1924 rc = emulate_push(ctxt, &rbp, stack_size(ctxt));
1925 if (rc != X86EMUL_CONTINUE)
1927 assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1929 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1930 reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1932 return X86EMUL_CONTINUE;
1935 static int em_leave(struct x86_emulate_ctxt *ctxt)
1937 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1939 return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1942 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1944 int seg = ctxt->src2.val;
1946 ctxt->src.val = get_segment_selector(ctxt, seg);
1947 if (ctxt->op_bytes == 4) {
1948 rsp_increment(ctxt, -2);
1952 return em_push(ctxt);
1955 static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1957 int seg = ctxt->src2.val;
1958 unsigned long selector = 0;
1961 rc = emulate_pop(ctxt, &selector, 2);
1962 if (rc != X86EMUL_CONTINUE)
1965 if (seg == VCPU_SREG_SS)
1966 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1967 if (ctxt->op_bytes > 2)
1968 rsp_increment(ctxt, ctxt->op_bytes - 2);
1970 rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1974 static int em_pusha(struct x86_emulate_ctxt *ctxt)
1976 unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1977 int rc = X86EMUL_CONTINUE;
1978 int reg = VCPU_REGS_RAX;
1980 while (reg <= VCPU_REGS_RDI) {
1981 (reg == VCPU_REGS_RSP) ?
1982 (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
1985 if (rc != X86EMUL_CONTINUE)
1994 static int em_pushf(struct x86_emulate_ctxt *ctxt)
1996 ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
1997 return em_push(ctxt);
2000 static int em_popa(struct x86_emulate_ctxt *ctxt)
2002 int rc = X86EMUL_CONTINUE;
2003 int reg = VCPU_REGS_RDI;
2006 while (reg >= VCPU_REGS_RAX) {
2007 if (reg == VCPU_REGS_RSP) {
2008 rsp_increment(ctxt, ctxt->op_bytes);
2012 rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
2013 if (rc != X86EMUL_CONTINUE)
2015 assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
2021 static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2023 const struct x86_emulate_ops *ops = ctxt->ops;
2030 /* TODO: Add limit checks */
2031 ctxt->src.val = ctxt->eflags;
2033 if (rc != X86EMUL_CONTINUE)
2036 ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
2038 ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
2040 if (rc != X86EMUL_CONTINUE)
2043 ctxt->src.val = ctxt->_eip;
2045 if (rc != X86EMUL_CONTINUE)
2048 ops->get_idt(ctxt, &dt);
2050 eip_addr = dt.address + (irq << 2);
2051 cs_addr = dt.address + (irq << 2) + 2;
2053 rc = linear_read_system(ctxt, cs_addr, &cs, 2);
2054 if (rc != X86EMUL_CONTINUE)
2057 rc = linear_read_system(ctxt, eip_addr, &eip, 2);
2058 if (rc != X86EMUL_CONTINUE)
2061 rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2062 if (rc != X86EMUL_CONTINUE)
2070 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2074 invalidate_registers(ctxt);
2075 rc = __emulate_int_real(ctxt, irq);
2076 if (rc == X86EMUL_CONTINUE)
2077 writeback_registers(ctxt);
2081 static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2083 switch(ctxt->mode) {
2084 case X86EMUL_MODE_REAL:
2085 return __emulate_int_real(ctxt, irq);
2086 case X86EMUL_MODE_VM86:
2087 case X86EMUL_MODE_PROT16:
2088 case X86EMUL_MODE_PROT32:
2089 case X86EMUL_MODE_PROT64:
2091 /* Protected mode interrupts unimplemented yet */
2092 return X86EMUL_UNHANDLEABLE;
2096 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2098 int rc = X86EMUL_CONTINUE;
2099 unsigned long temp_eip = 0;
2100 unsigned long temp_eflags = 0;
2101 unsigned long cs = 0;
2102 unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
2103 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
2104 X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
2105 X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
2106 X86_EFLAGS_AC | X86_EFLAGS_ID |
2108 unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
2111 /* TODO: Add stack limit check */
2113 rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2115 if (rc != X86EMUL_CONTINUE)
2118 if (temp_eip & ~0xffff)
2119 return emulate_gp(ctxt, 0);
2121 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2123 if (rc != X86EMUL_CONTINUE)
2126 rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2128 if (rc != X86EMUL_CONTINUE)
2131 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2133 if (rc != X86EMUL_CONTINUE)
2136 ctxt->_eip = temp_eip;
2138 if (ctxt->op_bytes == 4)
2139 ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2140 else if (ctxt->op_bytes == 2) {
2141 ctxt->eflags &= ~0xffff;
2142 ctxt->eflags |= temp_eflags;
2145 ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2146 ctxt->eflags |= X86_EFLAGS_FIXED;
2147 ctxt->ops->set_nmi_mask(ctxt, false);
2152 static int em_iret(struct x86_emulate_ctxt *ctxt)
2154 switch(ctxt->mode) {
2155 case X86EMUL_MODE_REAL:
2156 return emulate_iret_real(ctxt);
2157 case X86EMUL_MODE_VM86:
2158 case X86EMUL_MODE_PROT16:
2159 case X86EMUL_MODE_PROT32:
2160 case X86EMUL_MODE_PROT64:
2162 /* iret from protected mode unimplemented yet */
2163 return X86EMUL_UNHANDLEABLE;
2167 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2171 struct desc_struct new_desc;
2172 u8 cpl = ctxt->ops->cpl(ctxt);
2174 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2176 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2177 X86_TRANSFER_CALL_JMP,
2179 if (rc != X86EMUL_CONTINUE)
2182 rc = assign_eip_far(ctxt, ctxt->src.val);
2183 /* Error handling is not implemented. */
2184 if (rc != X86EMUL_CONTINUE)
2185 return X86EMUL_UNHANDLEABLE;
2190 static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2192 return assign_eip_near(ctxt, ctxt->src.val);
2195 static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2200 old_eip = ctxt->_eip;
2201 rc = assign_eip_near(ctxt, ctxt->src.val);
2202 if (rc != X86EMUL_CONTINUE)
2204 ctxt->src.val = old_eip;
2209 static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2211 u64 old = ctxt->dst.orig_val64;
2213 if (ctxt->dst.bytes == 16)
2214 return X86EMUL_UNHANDLEABLE;
2216 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2217 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2218 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2219 *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2220 ctxt->eflags &= ~X86_EFLAGS_ZF;
2222 ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2223 (u32) reg_read(ctxt, VCPU_REGS_RBX);
2225 ctxt->eflags |= X86_EFLAGS_ZF;
2227 return X86EMUL_CONTINUE;
2230 static int em_ret(struct x86_emulate_ctxt *ctxt)
2233 unsigned long eip = 0;
2235 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2236 if (rc != X86EMUL_CONTINUE)
2239 return assign_eip_near(ctxt, eip);
2242 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2245 unsigned long eip = 0;
2246 unsigned long cs = 0;
2247 int cpl = ctxt->ops->cpl(ctxt);
2248 struct desc_struct new_desc;
2250 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2251 if (rc != X86EMUL_CONTINUE)
2253 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2254 if (rc != X86EMUL_CONTINUE)
2256 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2259 if (rc != X86EMUL_CONTINUE)
2261 rc = assign_eip_far(ctxt, eip);
2262 /* Error handling is not implemented. */
2263 if (rc != X86EMUL_CONTINUE)
2264 return X86EMUL_UNHANDLEABLE;
2269 static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2273 rc = em_ret_far(ctxt);
2274 if (rc != X86EMUL_CONTINUE)
2276 rsp_increment(ctxt, ctxt->src.val);
2277 return X86EMUL_CONTINUE;
2280 static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2282 /* Save real source value, then compare EAX against destination. */
2283 ctxt->dst.orig_val = ctxt->dst.val;
2284 ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2285 ctxt->src.orig_val = ctxt->src.val;
2286 ctxt->src.val = ctxt->dst.orig_val;
2287 fastop(ctxt, em_cmp);
2289 if (ctxt->eflags & X86_EFLAGS_ZF) {
2290 /* Success: write back to memory; no update of EAX */
2291 ctxt->src.type = OP_NONE;
2292 ctxt->dst.val = ctxt->src.orig_val;
2294 /* Failure: write the value we saw to EAX. */
2295 ctxt->src.type = OP_REG;
2296 ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2297 ctxt->src.val = ctxt->dst.orig_val;
2298 /* Create write-cycle to dest by writing the same value */
2299 ctxt->dst.val = ctxt->dst.orig_val;
2301 return X86EMUL_CONTINUE;
2304 static int em_lseg(struct x86_emulate_ctxt *ctxt)
2306 int seg = ctxt->src2.val;
2310 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2312 rc = load_segment_descriptor(ctxt, sel, seg);
2313 if (rc != X86EMUL_CONTINUE)
2316 ctxt->dst.val = ctxt->src.val;
2320 static int em_rsm(struct x86_emulate_ctxt *ctxt)
2322 if (!ctxt->ops->is_smm(ctxt))
2323 return emulate_ud(ctxt);
2325 if (ctxt->ops->leave_smm(ctxt))
2326 ctxt->ops->triple_fault(ctxt);
2328 return emulator_recalc_and_set_mode(ctxt);
2332 setup_syscalls_segments(struct desc_struct *cs, struct desc_struct *ss)
2334 cs->l = 0; /* will be adjusted later */
2335 set_desc_base(cs, 0); /* flat segment */
2336 cs->g = 1; /* 4kb granularity */
2337 set_desc_limit(cs, 0xfffff); /* 4GB limit */
2338 cs->type = 0x0b; /* Read, Execute, Accessed */
2340 cs->dpl = 0; /* will be adjusted later */
2345 set_desc_base(ss, 0); /* flat segment */
2346 set_desc_limit(ss, 0xfffff); /* 4GB limit */
2347 ss->g = 1; /* 4kb granularity */
2349 ss->type = 0x03; /* Read/Write, Accessed */
2350 ss->d = 1; /* 32bit stack segment */
2357 static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
2359 u32 eax, ebx, ecx, edx;
2362 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2363 return is_guest_vendor_intel(ebx, ecx, edx);
2366 static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
2368 const struct x86_emulate_ops *ops = ctxt->ops;
2369 u32 eax, ebx, ecx, edx;
2372 * syscall should always be enabled in longmode - so only become
2373 * vendor specific (cpuid) if other modes are active...
2375 if (ctxt->mode == X86EMUL_MODE_PROT64)
2380 ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2382 * remark: Intel CPUs only support "syscall" in 64bit longmode. Also a
2383 * 64bit guest with a 32bit compat-app running will #UD !! While this
2384 * behaviour can be fixed (by emulating) into AMD response - CPUs of
2385 * AMD can't behave like Intel.
2387 if (is_guest_vendor_intel(ebx, ecx, edx))
2390 if (is_guest_vendor_amd(ebx, ecx, edx) ||
2391 is_guest_vendor_hygon(ebx, ecx, edx))
2395 * default: (not Intel, not AMD, not Hygon), apply Intel's
2401 static int em_syscall(struct x86_emulate_ctxt *ctxt)
2403 const struct x86_emulate_ops *ops = ctxt->ops;
2404 struct desc_struct cs, ss;
2409 /* syscall is not available in real mode */
2410 if (ctxt->mode == X86EMUL_MODE_REAL ||
2411 ctxt->mode == X86EMUL_MODE_VM86)
2412 return emulate_ud(ctxt);
2414 if (!(em_syscall_is_enabled(ctxt)))
2415 return emulate_ud(ctxt);
2417 ops->get_msr(ctxt, MSR_EFER, &efer);
2418 if (!(efer & EFER_SCE))
2419 return emulate_ud(ctxt);
2421 setup_syscalls_segments(&cs, &ss);
2422 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2424 cs_sel = (u16)(msr_data & 0xfffc);
2425 ss_sel = (u16)(msr_data + 8);
2427 if (efer & EFER_LMA) {
2431 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2432 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2434 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2435 if (efer & EFER_LMA) {
2436 #ifdef CONFIG_X86_64
2437 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2440 ctxt->mode == X86EMUL_MODE_PROT64 ?
2441 MSR_LSTAR : MSR_CSTAR, &msr_data);
2442 ctxt->_eip = msr_data;
2444 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2445 ctxt->eflags &= ~msr_data;
2446 ctxt->eflags |= X86_EFLAGS_FIXED;
2450 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2451 ctxt->_eip = (u32)msr_data;
2453 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2456 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
2457 return X86EMUL_CONTINUE;
2460 static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2462 const struct x86_emulate_ops *ops = ctxt->ops;
2463 struct desc_struct cs, ss;
2468 ops->get_msr(ctxt, MSR_EFER, &efer);
2469 /* inject #GP if in real mode */
2470 if (ctxt->mode == X86EMUL_MODE_REAL)
2471 return emulate_gp(ctxt, 0);
2474 * Not recognized on AMD in compat mode (but is recognized in legacy
2477 if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
2478 && !vendor_intel(ctxt))
2479 return emulate_ud(ctxt);
2481 /* sysenter/sysexit have not been tested in 64bit mode. */
2482 if (ctxt->mode == X86EMUL_MODE_PROT64)
2483 return X86EMUL_UNHANDLEABLE;
2485 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2486 if ((msr_data & 0xfffc) == 0x0)
2487 return emulate_gp(ctxt, 0);
2489 setup_syscalls_segments(&cs, &ss);
2490 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2491 cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2492 ss_sel = cs_sel + 8;
2493 if (efer & EFER_LMA) {
2498 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2499 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2501 ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2502 ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2504 ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2505 *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
2507 if (efer & EFER_LMA)
2508 ctxt->mode = X86EMUL_MODE_PROT64;
2510 return X86EMUL_CONTINUE;
2513 static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2515 const struct x86_emulate_ops *ops = ctxt->ops;
2516 struct desc_struct cs, ss;
2517 u64 msr_data, rcx, rdx;
2519 u16 cs_sel = 0, ss_sel = 0;
2521 /* inject #GP if in real mode or Virtual 8086 mode */
2522 if (ctxt->mode == X86EMUL_MODE_REAL ||
2523 ctxt->mode == X86EMUL_MODE_VM86)
2524 return emulate_gp(ctxt, 0);
2526 setup_syscalls_segments(&cs, &ss);
2528 if ((ctxt->rex_prefix & 0x8) != 0x0)
2529 usermode = X86EMUL_MODE_PROT64;
2531 usermode = X86EMUL_MODE_PROT32;
2533 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2534 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2538 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2540 case X86EMUL_MODE_PROT32:
2541 cs_sel = (u16)(msr_data + 16);
2542 if ((msr_data & 0xfffc) == 0x0)
2543 return emulate_gp(ctxt, 0);
2544 ss_sel = (u16)(msr_data + 24);
2548 case X86EMUL_MODE_PROT64:
2549 cs_sel = (u16)(msr_data + 32);
2550 if (msr_data == 0x0)
2551 return emulate_gp(ctxt, 0);
2552 ss_sel = cs_sel + 8;
2555 if (emul_is_noncanonical_address(rcx, ctxt) ||
2556 emul_is_noncanonical_address(rdx, ctxt))
2557 return emulate_gp(ctxt, 0);
2560 cs_sel |= SEGMENT_RPL_MASK;
2561 ss_sel |= SEGMENT_RPL_MASK;
2563 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2564 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2567 ctxt->mode = usermode;
2568 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2570 return X86EMUL_CONTINUE;
2573 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2576 if (ctxt->mode == X86EMUL_MODE_REAL)
2578 if (ctxt->mode == X86EMUL_MODE_VM86)
2580 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2581 return ctxt->ops->cpl(ctxt) > iopl;
2584 #define VMWARE_PORT_VMPORT (0x5658)
2585 #define VMWARE_PORT_VMRPC (0x5659)
2587 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2590 const struct x86_emulate_ops *ops = ctxt->ops;
2591 struct desc_struct tr_seg;
2594 u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2595 unsigned mask = (1 << len) - 1;
2599 * VMware allows access to these ports even if denied
2600 * by TSS I/O permission bitmap. Mimic behavior.
2602 if (enable_vmware_backdoor &&
2603 ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC)))
2606 ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2609 if (desc_limit_scaled(&tr_seg) < 103)
2611 base = get_desc_base(&tr_seg);
2612 #ifdef CONFIG_X86_64
2613 base |= ((u64)base3) << 32;
2615 r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true);
2616 if (r != X86EMUL_CONTINUE)
2618 if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2620 r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true);
2621 if (r != X86EMUL_CONTINUE)
2623 if ((perm >> bit_idx) & mask)
2628 static bool emulator_io_permitted(struct x86_emulate_ctxt *ctxt,
2634 if (emulator_bad_iopl(ctxt))
2635 if (!emulator_io_port_access_allowed(ctxt, port, len))
2638 ctxt->perm_ok = true;
2643 static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
2646 * Intel CPUs mask the counter and pointers in quite strange
2647 * manner when ECX is zero due to REP-string optimizations.
2649 #ifdef CONFIG_X86_64
2650 if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
2653 *reg_write(ctxt, VCPU_REGS_RCX) = 0;
2656 case 0xa4: /* movsb */
2657 case 0xa5: /* movsd/w */
2658 *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
2660 case 0xaa: /* stosb */
2661 case 0xab: /* stosd/w */
2662 *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
2667 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2668 struct tss_segment_16 *tss)
2670 tss->ip = ctxt->_eip;
2671 tss->flag = ctxt->eflags;
2672 tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
2673 tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
2674 tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
2675 tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
2676 tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
2677 tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
2678 tss->si = reg_read(ctxt, VCPU_REGS_RSI);
2679 tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2681 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2682 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2683 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2684 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2685 tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2688 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2689 struct tss_segment_16 *tss)
2694 ctxt->_eip = tss->ip;
2695 ctxt->eflags = tss->flag | 2;
2696 *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
2697 *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
2698 *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
2699 *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
2700 *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
2701 *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
2702 *reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
2703 *reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2706 * SDM says that segment selectors are loaded before segment
2709 set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
2710 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2711 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2712 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2713 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2718 * Now load segment descriptors. If fault happens at this stage
2719 * it is handled in a context of new task
2721 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2722 X86_TRANSFER_TASK_SWITCH, NULL);
2723 if (ret != X86EMUL_CONTINUE)
2725 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2726 X86_TRANSFER_TASK_SWITCH, NULL);
2727 if (ret != X86EMUL_CONTINUE)
2729 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2730 X86_TRANSFER_TASK_SWITCH, NULL);
2731 if (ret != X86EMUL_CONTINUE)
2733 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2734 X86_TRANSFER_TASK_SWITCH, NULL);
2735 if (ret != X86EMUL_CONTINUE)
2737 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2738 X86_TRANSFER_TASK_SWITCH, NULL);
2739 if (ret != X86EMUL_CONTINUE)
2742 return X86EMUL_CONTINUE;
2745 static int task_switch_16(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
2746 ulong old_tss_base, struct desc_struct *new_desc)
2748 struct tss_segment_16 tss_seg;
2750 u32 new_tss_base = get_desc_base(new_desc);
2752 ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2753 if (ret != X86EMUL_CONTINUE)
2756 save_state_to_tss16(ctxt, &tss_seg);
2758 ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2759 if (ret != X86EMUL_CONTINUE)
2762 ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
2763 if (ret != X86EMUL_CONTINUE)
2766 if (old_tss_sel != 0xffff) {
2767 tss_seg.prev_task_link = old_tss_sel;
2769 ret = linear_write_system(ctxt, new_tss_base,
2770 &tss_seg.prev_task_link,
2771 sizeof(tss_seg.prev_task_link));
2772 if (ret != X86EMUL_CONTINUE)
2776 return load_state_from_tss16(ctxt, &tss_seg);
2779 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2780 struct tss_segment_32 *tss)
2782 /* CR3 and ldt selector are not saved intentionally */
2783 tss->eip = ctxt->_eip;
2784 tss->eflags = ctxt->eflags;
2785 tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
2786 tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
2787 tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
2788 tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
2789 tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
2790 tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
2791 tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
2792 tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
2794 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2795 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2796 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2797 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2798 tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
2799 tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
2802 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2803 struct tss_segment_32 *tss)
2808 if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
2809 return emulate_gp(ctxt, 0);
2810 ctxt->_eip = tss->eip;
2811 ctxt->eflags = tss->eflags | 2;
2813 /* General purpose registers */
2814 *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
2815 *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
2816 *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
2817 *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
2818 *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
2819 *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
2820 *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
2821 *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
2824 * SDM says that segment selectors are loaded before segment
2825 * descriptors. This is important because CPL checks will
2828 set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
2829 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2830 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2831 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2832 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2833 set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
2834 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
2837 * If we're switching between Protected Mode and VM86, we need to make
2838 * sure to update the mode before loading the segment descriptors so
2839 * that the selectors are interpreted correctly.
2841 if (ctxt->eflags & X86_EFLAGS_VM) {
2842 ctxt->mode = X86EMUL_MODE_VM86;
2845 ctxt->mode = X86EMUL_MODE_PROT32;
2850 * Now load segment descriptors. If fault happens at this stage
2851 * it is handled in a context of new task
2853 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
2854 cpl, X86_TRANSFER_TASK_SWITCH, NULL);
2855 if (ret != X86EMUL_CONTINUE)
2857 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2858 X86_TRANSFER_TASK_SWITCH, NULL);
2859 if (ret != X86EMUL_CONTINUE)
2861 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2862 X86_TRANSFER_TASK_SWITCH, NULL);
2863 if (ret != X86EMUL_CONTINUE)
2865 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2866 X86_TRANSFER_TASK_SWITCH, NULL);
2867 if (ret != X86EMUL_CONTINUE)
2869 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2870 X86_TRANSFER_TASK_SWITCH, NULL);
2871 if (ret != X86EMUL_CONTINUE)
2873 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
2874 X86_TRANSFER_TASK_SWITCH, NULL);
2875 if (ret != X86EMUL_CONTINUE)
2877 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
2878 X86_TRANSFER_TASK_SWITCH, NULL);
2883 static int task_switch_32(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
2884 ulong old_tss_base, struct desc_struct *new_desc)
2886 struct tss_segment_32 tss_seg;
2888 u32 new_tss_base = get_desc_base(new_desc);
2889 u32 eip_offset = offsetof(struct tss_segment_32, eip);
2890 u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
2892 ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2893 if (ret != X86EMUL_CONTINUE)
2896 save_state_to_tss32(ctxt, &tss_seg);
2898 /* Only GP registers and segment selectors are saved */
2899 ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
2900 ldt_sel_offset - eip_offset);
2901 if (ret != X86EMUL_CONTINUE)
2904 ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
2905 if (ret != X86EMUL_CONTINUE)
2908 if (old_tss_sel != 0xffff) {
2909 tss_seg.prev_task_link = old_tss_sel;
2911 ret = linear_write_system(ctxt, new_tss_base,
2912 &tss_seg.prev_task_link,
2913 sizeof(tss_seg.prev_task_link));
2914 if (ret != X86EMUL_CONTINUE)
2918 return load_state_from_tss32(ctxt, &tss_seg);
2921 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2922 u16 tss_selector, int idt_index, int reason,
2923 bool has_error_code, u32 error_code)
2925 const struct x86_emulate_ops *ops = ctxt->ops;
2926 struct desc_struct curr_tss_desc, next_tss_desc;
2928 u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
2929 ulong old_tss_base =
2930 ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
2932 ulong desc_addr, dr7;
2934 /* FIXME: old_tss_base == ~0 ? */
2936 ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
2937 if (ret != X86EMUL_CONTINUE)
2939 ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
2940 if (ret != X86EMUL_CONTINUE)
2943 /* FIXME: check that next_tss_desc is tss */
2946 * Check privileges. The three cases are task switch caused by...
2948 * 1. jmp/call/int to task gate: Check against DPL of the task gate
2949 * 2. Exception/IRQ/iret: No check is performed
2950 * 3. jmp/call to TSS/task-gate: No check is performed since the
2951 * hardware checks it before exiting.
2953 if (reason == TASK_SWITCH_GATE) {
2954 if (idt_index != -1) {
2955 /* Software interrupts */
2956 struct desc_struct task_gate_desc;
2959 ret = read_interrupt_descriptor(ctxt, idt_index,
2961 if (ret != X86EMUL_CONTINUE)
2964 dpl = task_gate_desc.dpl;
2965 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
2966 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
2970 desc_limit = desc_limit_scaled(&next_tss_desc);
2971 if (!next_tss_desc.p ||
2972 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2973 desc_limit < 0x2b)) {
2974 return emulate_ts(ctxt, tss_selector & 0xfffc);
2977 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2978 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2979 write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
2982 if (reason == TASK_SWITCH_IRET)
2983 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2985 /* set back link to prev task only if NT bit is set in eflags
2986 note that old_tss_sel is not used after this point */
2987 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2988 old_tss_sel = 0xffff;
2990 if (next_tss_desc.type & 8)
2991 ret = task_switch_32(ctxt, old_tss_sel, old_tss_base, &next_tss_desc);
2993 ret = task_switch_16(ctxt, old_tss_sel,
2994 old_tss_base, &next_tss_desc);
2995 if (ret != X86EMUL_CONTINUE)
2998 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2999 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
3001 if (reason != TASK_SWITCH_IRET) {
3002 next_tss_desc.type |= (1 << 1); /* set busy flag */
3003 write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
3006 ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
3007 ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
3009 if (has_error_code) {
3010 ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
3011 ctxt->lock_prefix = 0;
3012 ctxt->src.val = (unsigned long) error_code;
3013 ret = em_push(ctxt);
3016 dr7 = ops->get_dr(ctxt, 7);
3017 ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
3022 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
3023 u16 tss_selector, int idt_index, int reason,
3024 bool has_error_code, u32 error_code)
3028 invalidate_registers(ctxt);
3029 ctxt->_eip = ctxt->eip;
3030 ctxt->dst.type = OP_NONE;
3032 rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3033 has_error_code, error_code);
3035 if (rc == X86EMUL_CONTINUE) {
3036 ctxt->eip = ctxt->_eip;
3037 writeback_registers(ctxt);
3040 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3043 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
3046 int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3048 register_address_increment(ctxt, reg, df * op->bytes);
3049 op->addr.mem.ea = register_address(ctxt, reg);
3052 static int em_das(struct x86_emulate_ctxt *ctxt)
3055 bool af, cf, old_cf;
3057 cf = ctxt->eflags & X86_EFLAGS_CF;
3063 af = ctxt->eflags & X86_EFLAGS_AF;
3064 if ((al & 0x0f) > 9 || af) {
3066 cf = old_cf | (al >= 250);
3071 if (old_al > 0x99 || old_cf) {
3077 /* Set PF, ZF, SF */
3078 ctxt->src.type = OP_IMM;
3080 ctxt->src.bytes = 1;
3081 fastop(ctxt, em_or);
3082 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3084 ctxt->eflags |= X86_EFLAGS_CF;
3086 ctxt->eflags |= X86_EFLAGS_AF;
3087 return X86EMUL_CONTINUE;
3090 static int em_aam(struct x86_emulate_ctxt *ctxt)
3094 if (ctxt->src.val == 0)
3095 return emulate_de(ctxt);
3097 al = ctxt->dst.val & 0xff;
3098 ah = al / ctxt->src.val;
3099 al %= ctxt->src.val;
3101 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3103 /* Set PF, ZF, SF */
3104 ctxt->src.type = OP_IMM;
3106 ctxt->src.bytes = 1;
3107 fastop(ctxt, em_or);
3109 return X86EMUL_CONTINUE;
3112 static int em_aad(struct x86_emulate_ctxt *ctxt)
3114 u8 al = ctxt->dst.val & 0xff;
3115 u8 ah = (ctxt->dst.val >> 8) & 0xff;
3117 al = (al + (ah * ctxt->src.val)) & 0xff;
3119 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3121 /* Set PF, ZF, SF */
3122 ctxt->src.type = OP_IMM;
3124 ctxt->src.bytes = 1;
3125 fastop(ctxt, em_or);
3127 return X86EMUL_CONTINUE;
3130 static int em_call(struct x86_emulate_ctxt *ctxt)
3133 long rel = ctxt->src.val;
3135 ctxt->src.val = (unsigned long)ctxt->_eip;
3136 rc = jmp_rel(ctxt, rel);
3137 if (rc != X86EMUL_CONTINUE)
3139 return em_push(ctxt);
3142 static int em_call_far(struct x86_emulate_ctxt *ctxt)
3147 struct desc_struct old_desc, new_desc;
3148 const struct x86_emulate_ops *ops = ctxt->ops;
3149 int cpl = ctxt->ops->cpl(ctxt);
3150 enum x86emul_mode prev_mode = ctxt->mode;
3152 old_eip = ctxt->_eip;
3153 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3155 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3156 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3157 X86_TRANSFER_CALL_JMP, &new_desc);
3158 if (rc != X86EMUL_CONTINUE)
3161 rc = assign_eip_far(ctxt, ctxt->src.val);
3162 if (rc != X86EMUL_CONTINUE)
3165 ctxt->src.val = old_cs;
3167 if (rc != X86EMUL_CONTINUE)
3170 ctxt->src.val = old_eip;
3172 /* If we failed, we tainted the memory, but the very least we should
3174 if (rc != X86EMUL_CONTINUE) {
3175 pr_warn_once("faulting far call emulation tainted memory\n");
3180 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3181 ctxt->mode = prev_mode;
3186 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3189 unsigned long eip = 0;
3191 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3192 if (rc != X86EMUL_CONTINUE)
3194 rc = assign_eip_near(ctxt, eip);
3195 if (rc != X86EMUL_CONTINUE)
3197 rsp_increment(ctxt, ctxt->src.val);
3198 return X86EMUL_CONTINUE;
3201 static int em_xchg(struct x86_emulate_ctxt *ctxt)
3203 /* Write back the register source. */
3204 ctxt->src.val = ctxt->dst.val;
3205 write_register_operand(&ctxt->src);
3207 /* Write back the memory destination with implicit LOCK prefix. */
3208 ctxt->dst.val = ctxt->src.orig_val;
3209 ctxt->lock_prefix = 1;
3210 return X86EMUL_CONTINUE;
3213 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3215 ctxt->dst.val = ctxt->src2.val;
3216 return fastop(ctxt, em_imul);
3219 static int em_cwd(struct x86_emulate_ctxt *ctxt)
3221 ctxt->dst.type = OP_REG;
3222 ctxt->dst.bytes = ctxt->src.bytes;
3223 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3224 ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3226 return X86EMUL_CONTINUE;
3229 static int em_rdpid(struct x86_emulate_ctxt *ctxt)
3233 if (!ctxt->ops->guest_has_rdpid(ctxt))
3234 return emulate_ud(ctxt);
3236 ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux);
3237 ctxt->dst.val = tsc_aux;
3238 return X86EMUL_CONTINUE;
3241 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3245 ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3246 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3247 *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3248 return X86EMUL_CONTINUE;
3251 static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3255 if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3256 return emulate_gp(ctxt, 0);
3257 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3258 *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3259 return X86EMUL_CONTINUE;
3262 static int em_mov(struct x86_emulate_ctxt *ctxt)
3264 memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3265 return X86EMUL_CONTINUE;
3268 static int em_movbe(struct x86_emulate_ctxt *ctxt)
3272 if (!ctxt->ops->guest_has_movbe(ctxt))
3273 return emulate_ud(ctxt);
3275 switch (ctxt->op_bytes) {
3278 * From MOVBE definition: "...When the operand size is 16 bits,
3279 * the upper word of the destination register remains unchanged
3282 * Both casting ->valptr and ->val to u16 breaks strict aliasing
3283 * rules so we have to do the operation almost per hand.
3285 tmp = (u16)ctxt->src.val;
3286 ctxt->dst.val &= ~0xffffUL;
3287 ctxt->dst.val |= (unsigned long)swab16(tmp);
3290 ctxt->dst.val = swab32((u32)ctxt->src.val);
3293 ctxt->dst.val = swab64(ctxt->src.val);
3298 return X86EMUL_CONTINUE;
3301 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3303 int cr_num = ctxt->modrm_reg;
3306 if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
3307 return emulate_gp(ctxt, 0);
3309 /* Disable writeback. */
3310 ctxt->dst.type = OP_NONE;
3314 * CR0 write might have updated CR0.PE and/or CR0.PG
3315 * which can affect the cpu's execution mode.
3317 r = emulator_recalc_and_set_mode(ctxt);
3318 if (r != X86EMUL_CONTINUE)
3322 return X86EMUL_CONTINUE;
3325 static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3329 if (ctxt->mode == X86EMUL_MODE_PROT64)
3330 val = ctxt->src.val & ~0ULL;
3332 val = ctxt->src.val & ~0U;
3334 /* #UD condition is already handled. */
3335 if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3336 return emulate_gp(ctxt, 0);
3338 /* Disable writeback. */
3339 ctxt->dst.type = OP_NONE;
3340 return X86EMUL_CONTINUE;
3343 static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3345 u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3349 msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3350 | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3351 r = ctxt->ops->set_msr_with_filter(ctxt, msr_index, msr_data);
3353 if (r == X86EMUL_PROPAGATE_FAULT)
3354 return emulate_gp(ctxt, 0);
3359 static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3361 u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3365 r = ctxt->ops->get_msr_with_filter(ctxt, msr_index, &msr_data);
3367 if (r == X86EMUL_PROPAGATE_FAULT)
3368 return emulate_gp(ctxt, 0);
3370 if (r == X86EMUL_CONTINUE) {
3371 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3372 *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3377 static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
3379 if (segment > VCPU_SREG_GS &&
3380 (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3381 ctxt->ops->cpl(ctxt) > 0)
3382 return emulate_gp(ctxt, 0);
3384 ctxt->dst.val = get_segment_selector(ctxt, segment);
3385 if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3386 ctxt->dst.bytes = 2;
3387 return X86EMUL_CONTINUE;
3390 static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3392 if (ctxt->modrm_reg > VCPU_SREG_GS)
3393 return emulate_ud(ctxt);
3395 return em_store_sreg(ctxt, ctxt->modrm_reg);
3398 static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3400 u16 sel = ctxt->src.val;
3402 if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3403 return emulate_ud(ctxt);
3405 if (ctxt->modrm_reg == VCPU_SREG_SS)
3406 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3408 /* Disable writeback. */
3409 ctxt->dst.type = OP_NONE;
3410 return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3413 static int em_sldt(struct x86_emulate_ctxt *ctxt)
3415 return em_store_sreg(ctxt, VCPU_SREG_LDTR);
3418 static int em_lldt(struct x86_emulate_ctxt *ctxt)
3420 u16 sel = ctxt->src.val;
3422 /* Disable writeback. */
3423 ctxt->dst.type = OP_NONE;
3424 return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3427 static int em_str(struct x86_emulate_ctxt *ctxt)
3429 return em_store_sreg(ctxt, VCPU_SREG_TR);
3432 static int em_ltr(struct x86_emulate_ctxt *ctxt)
3434 u16 sel = ctxt->src.val;
3436 /* Disable writeback. */
3437 ctxt->dst.type = OP_NONE;
3438 return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3441 static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3445 unsigned int max_size;
3447 rc = __linearize(ctxt, ctxt->src.addr.mem, &max_size, 1, ctxt->mode,
3448 &linear, X86EMUL_F_INVLPG);
3449 if (rc == X86EMUL_CONTINUE)
3450 ctxt->ops->invlpg(ctxt, linear);
3451 /* Disable writeback. */
3452 ctxt->dst.type = OP_NONE;
3453 return X86EMUL_CONTINUE;
3456 static int em_clts(struct x86_emulate_ctxt *ctxt)
3460 cr0 = ctxt->ops->get_cr(ctxt, 0);
3462 ctxt->ops->set_cr(ctxt, 0, cr0);
3463 return X86EMUL_CONTINUE;
3466 static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3468 int rc = ctxt->ops->fix_hypercall(ctxt);
3470 if (rc != X86EMUL_CONTINUE)
3473 /* Let the processor re-execute the fixed hypercall */
3474 ctxt->_eip = ctxt->eip;
3475 /* Disable writeback. */
3476 ctxt->dst.type = OP_NONE;
3477 return X86EMUL_CONTINUE;
3480 static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3481 void (*get)(struct x86_emulate_ctxt *ctxt,
3482 struct desc_ptr *ptr))
3484 struct desc_ptr desc_ptr;
3486 if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3487 ctxt->ops->cpl(ctxt) > 0)
3488 return emulate_gp(ctxt, 0);
3490 if (ctxt->mode == X86EMUL_MODE_PROT64)
3492 get(ctxt, &desc_ptr);
3493 if (ctxt->op_bytes == 2) {
3495 desc_ptr.address &= 0x00ffffff;
3497 /* Disable writeback. */
3498 ctxt->dst.type = OP_NONE;
3499 return segmented_write_std(ctxt, ctxt->dst.addr.mem,
3500 &desc_ptr, 2 + ctxt->op_bytes);
3503 static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3505 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3508 static int em_sidt(struct x86_emulate_ctxt *ctxt)
3510 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3513 static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3515 struct desc_ptr desc_ptr;
3518 if (ctxt->mode == X86EMUL_MODE_PROT64)
3520 rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3521 &desc_ptr.size, &desc_ptr.address,
3523 if (rc != X86EMUL_CONTINUE)
3525 if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3526 emul_is_noncanonical_address(desc_ptr.address, ctxt))
3527 return emulate_gp(ctxt, 0);
3529 ctxt->ops->set_gdt(ctxt, &desc_ptr);
3531 ctxt->ops->set_idt(ctxt, &desc_ptr);
3532 /* Disable writeback. */
3533 ctxt->dst.type = OP_NONE;
3534 return X86EMUL_CONTINUE;
3537 static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3539 return em_lgdt_lidt(ctxt, true);
3542 static int em_lidt(struct x86_emulate_ctxt *ctxt)
3544 return em_lgdt_lidt(ctxt, false);
3547 static int em_smsw(struct x86_emulate_ctxt *ctxt)
3549 if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3550 ctxt->ops->cpl(ctxt) > 0)
3551 return emulate_gp(ctxt, 0);
3553 if (ctxt->dst.type == OP_MEM)
3554 ctxt->dst.bytes = 2;
3555 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3556 return X86EMUL_CONTINUE;
3559 static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3561 ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3562 | (ctxt->src.val & 0x0f));
3563 ctxt->dst.type = OP_NONE;
3564 return X86EMUL_CONTINUE;
3567 static int em_loop(struct x86_emulate_ctxt *ctxt)
3569 int rc = X86EMUL_CONTINUE;
3571 register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3572 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3573 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3574 rc = jmp_rel(ctxt, ctxt->src.val);
3579 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3581 int rc = X86EMUL_CONTINUE;
3583 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3584 rc = jmp_rel(ctxt, ctxt->src.val);
3589 static int em_in(struct x86_emulate_ctxt *ctxt)
3591 if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3593 return X86EMUL_IO_NEEDED;
3595 return X86EMUL_CONTINUE;
3598 static int em_out(struct x86_emulate_ctxt *ctxt)
3600 ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3602 /* Disable writeback. */
3603 ctxt->dst.type = OP_NONE;
3604 return X86EMUL_CONTINUE;
3607 static int em_cli(struct x86_emulate_ctxt *ctxt)
3609 if (emulator_bad_iopl(ctxt))
3610 return emulate_gp(ctxt, 0);
3612 ctxt->eflags &= ~X86_EFLAGS_IF;
3613 return X86EMUL_CONTINUE;
3616 static int em_sti(struct x86_emulate_ctxt *ctxt)
3618 if (emulator_bad_iopl(ctxt))
3619 return emulate_gp(ctxt, 0);
3621 ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3622 ctxt->eflags |= X86_EFLAGS_IF;
3623 return X86EMUL_CONTINUE;
3626 static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3628 u32 eax, ebx, ecx, edx;
3631 ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
3632 if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
3633 ctxt->ops->cpl(ctxt)) {
3634 return emulate_gp(ctxt, 0);
3637 eax = reg_read(ctxt, VCPU_REGS_RAX);
3638 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3639 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
3640 *reg_write(ctxt, VCPU_REGS_RAX) = eax;
3641 *reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3642 *reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3643 *reg_write(ctxt, VCPU_REGS_RDX) = edx;
3644 return X86EMUL_CONTINUE;
3647 static int em_sahf(struct x86_emulate_ctxt *ctxt)
3651 flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
3653 flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3655 ctxt->eflags &= ~0xffUL;
3656 ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3657 return X86EMUL_CONTINUE;
3660 static int em_lahf(struct x86_emulate_ctxt *ctxt)
3662 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
3663 *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
3664 return X86EMUL_CONTINUE;
3667 static int em_bswap(struct x86_emulate_ctxt *ctxt)
3669 switch (ctxt->op_bytes) {
3670 #ifdef CONFIG_X86_64
3672 asm("bswap %0" : "+r"(ctxt->dst.val));
3676 asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
3679 return X86EMUL_CONTINUE;
3682 static int em_clflush(struct x86_emulate_ctxt *ctxt)
3684 /* emulating clflush regardless of cpuid */
3685 return X86EMUL_CONTINUE;
3688 static int em_clflushopt(struct x86_emulate_ctxt *ctxt)
3690 /* emulating clflushopt regardless of cpuid */
3691 return X86EMUL_CONTINUE;
3694 static int em_movsxd(struct x86_emulate_ctxt *ctxt)
3696 ctxt->dst.val = (s32) ctxt->src.val;
3697 return X86EMUL_CONTINUE;
3700 static int check_fxsr(struct x86_emulate_ctxt *ctxt)
3702 if (!ctxt->ops->guest_has_fxsr(ctxt))
3703 return emulate_ud(ctxt);
3705 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
3706 return emulate_nm(ctxt);
3709 * Don't emulate a case that should never be hit, instead of working
3710 * around a lack of fxsave64/fxrstor64 on old compilers.
3712 if (ctxt->mode >= X86EMUL_MODE_PROT64)
3713 return X86EMUL_UNHANDLEABLE;
3715 return X86EMUL_CONTINUE;
3719 * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
3720 * and restore MXCSR.
3722 static size_t __fxstate_size(int nregs)
3724 return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
3727 static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
3730 if (ctxt->mode == X86EMUL_MODE_PROT64)
3731 return __fxstate_size(16);
3733 cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
3734 return __fxstate_size(cr4_osfxsr ? 8 : 0);
3738 * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
3741 * - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs
3742 * preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
3744 * 3) 64-bit mode with REX.W prefix
3745 * - like (2), but XMM 8-15 are being saved and restored
3746 * 4) 64-bit mode without REX.W prefix
3747 * - like (3), but FIP and FDP are 64 bit
3749 * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
3750 * desired result. (4) is not emulated.
3752 * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
3753 * and FPU DS) should match.
3755 static int em_fxsave(struct x86_emulate_ctxt *ctxt)
3757 struct fxregs_state fx_state;
3760 rc = check_fxsr(ctxt);
3761 if (rc != X86EMUL_CONTINUE)
3766 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
3770 if (rc != X86EMUL_CONTINUE)
3773 return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
3774 fxstate_size(ctxt));
3778 * FXRSTOR might restore XMM registers not provided by the guest. Fill
3779 * in the host registers (via FXSAVE) instead, so they won't be modified.
3780 * (preemption has to stay disabled until FXRSTOR).
3782 * Use noinline to keep the stack for other functions called by callers small.
3784 static noinline int fxregs_fixup(struct fxregs_state *fx_state,
3785 const size_t used_size)
3787 struct fxregs_state fx_tmp;
3790 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
3791 memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
3792 __fxstate_size(16) - used_size);
3797 static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
3799 struct fxregs_state fx_state;
3803 rc = check_fxsr(ctxt);
3804 if (rc != X86EMUL_CONTINUE)
3807 size = fxstate_size(ctxt);
3808 rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
3809 if (rc != X86EMUL_CONTINUE)
3814 if (size < __fxstate_size(16)) {
3815 rc = fxregs_fixup(&fx_state, size);
3816 if (rc != X86EMUL_CONTINUE)
3820 if (fx_state.mxcsr >> 16) {
3821 rc = emulate_gp(ctxt, 0);
3825 if (rc == X86EMUL_CONTINUE)
3826 rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
3834 static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
3838 if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE))
3839 return emulate_ud(ctxt);
3841 eax = reg_read(ctxt, VCPU_REGS_RAX);
3842 edx = reg_read(ctxt, VCPU_REGS_RDX);
3843 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3845 if (ctxt->ops->set_xcr(ctxt, ecx, ((u64)edx << 32) | eax))
3846 return emulate_gp(ctxt, 0);
3848 return X86EMUL_CONTINUE;
3851 static bool valid_cr(int nr)
3863 static int check_cr_access(struct x86_emulate_ctxt *ctxt)
3865 if (!valid_cr(ctxt->modrm_reg))
3866 return emulate_ud(ctxt);
3868 return X86EMUL_CONTINUE;
3871 static int check_dr_read(struct x86_emulate_ctxt *ctxt)
3873 int dr = ctxt->modrm_reg;
3877 return emulate_ud(ctxt);
3879 cr4 = ctxt->ops->get_cr(ctxt, 4);
3880 if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
3881 return emulate_ud(ctxt);
3883 if (ctxt->ops->get_dr(ctxt, 7) & DR7_GD) {
3886 dr6 = ctxt->ops->get_dr(ctxt, 6);
3887 dr6 &= ~DR_TRAP_BITS;
3888 dr6 |= DR6_BD | DR6_ACTIVE_LOW;
3889 ctxt->ops->set_dr(ctxt, 6, dr6);
3890 return emulate_db(ctxt);
3893 return X86EMUL_CONTINUE;
3896 static int check_dr_write(struct x86_emulate_ctxt *ctxt)
3898 u64 new_val = ctxt->src.val64;
3899 int dr = ctxt->modrm_reg;
3901 if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
3902 return emulate_gp(ctxt, 0);
3904 return check_dr_read(ctxt);
3907 static int check_svme(struct x86_emulate_ctxt *ctxt)
3911 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3913 if (!(efer & EFER_SVME))
3914 return emulate_ud(ctxt);
3916 return X86EMUL_CONTINUE;
3919 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
3921 u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
3923 /* Valid physical address? */
3924 if (rax & 0xffff000000000000ULL)
3925 return emulate_gp(ctxt, 0);
3927 return check_svme(ctxt);
3930 static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
3932 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3934 if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
3935 return emulate_gp(ctxt, 0);
3937 return X86EMUL_CONTINUE;
3940 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
3942 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3943 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
3946 * VMware allows access to these Pseduo-PMCs even when read via RDPMC
3947 * in Ring3 when CR4.PCE=0.
3949 if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx))
3950 return X86EMUL_CONTINUE;
3953 * If CR4.PCE is set, the SDM requires CPL=0 or CR0.PE=0. The CR0.PE
3954 * check however is unnecessary because CPL is always 0 outside
3957 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
3958 ctxt->ops->check_rdpmc_early(ctxt, rcx))
3959 return emulate_gp(ctxt, 0);
3961 return X86EMUL_CONTINUE;
3964 static int check_perm_in(struct x86_emulate_ctxt *ctxt)
3966 ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
3967 if (!emulator_io_permitted(ctxt, ctxt->src.val, ctxt->dst.bytes))
3968 return emulate_gp(ctxt, 0);
3970 return X86EMUL_CONTINUE;
3973 static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3975 ctxt->src.bytes = min(ctxt->src.bytes, 4u);
3976 if (!emulator_io_permitted(ctxt, ctxt->dst.val, ctxt->src.bytes))
3977 return emulate_gp(ctxt, 0);
3979 return X86EMUL_CONTINUE;
3982 #define D(_y) { .flags = (_y) }
3983 #define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
3984 #define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
3985 .intercept = x86_intercept_##_i, .check_perm = (_p) }
3986 #define N D(NotImpl)
3987 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
3988 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
3989 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
3990 #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
3991 #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
3992 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
3993 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
3994 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
3995 #define II(_f, _e, _i) \
3996 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
3997 #define IIP(_f, _e, _i, _p) \
3998 { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
3999 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4000 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4002 #define D2bv(_f) D((_f) | ByteOp), D(_f)
4003 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4004 #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
4005 #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
4006 #define I2bvIP(_f, _e, _i, _p) \
4007 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4009 #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
4010 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
4011 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4013 static const struct opcode group7_rm0[] = {
4015 I(SrcNone | Priv | EmulateOnUD, em_hypercall),
4019 static const struct opcode group7_rm1[] = {
4020 DI(SrcNone | Priv, monitor),
4021 DI(SrcNone | Priv, mwait),
4025 static const struct opcode group7_rm2[] = {
4027 II(ImplicitOps | Priv, em_xsetbv, xsetbv),
4031 static const struct opcode group7_rm3[] = {
4032 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
4033 II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
4034 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
4035 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
4036 DIP(SrcNone | Prot | Priv, stgi, check_svme),
4037 DIP(SrcNone | Prot | Priv, clgi, check_svme),
4038 DIP(SrcNone | Prot | Priv, skinit, check_svme),
4039 DIP(SrcNone | Prot | Priv, invlpga, check_svme),
4042 static const struct opcode group7_rm7[] = {
4044 DIP(SrcNone, rdtscp, check_rdtsc),
4048 static const struct opcode group1[] = {
4050 F(Lock | PageTable, em_or),
4053 F(Lock | PageTable, em_and),
4059 static const struct opcode group1A[] = {
4060 I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N,
4063 static const struct opcode group2[] = {
4064 F(DstMem | ModRM, em_rol),
4065 F(DstMem | ModRM, em_ror),
4066 F(DstMem | ModRM, em_rcl),
4067 F(DstMem | ModRM, em_rcr),
4068 F(DstMem | ModRM, em_shl),
4069 F(DstMem | ModRM, em_shr),
4070 F(DstMem | ModRM, em_shl),
4071 F(DstMem | ModRM, em_sar),
4074 static const struct opcode group3[] = {
4075 F(DstMem | SrcImm | NoWrite, em_test),
4076 F(DstMem | SrcImm | NoWrite, em_test),
4077 F(DstMem | SrcNone | Lock, em_not),
4078 F(DstMem | SrcNone | Lock, em_neg),
4079 F(DstXacc | Src2Mem, em_mul_ex),
4080 F(DstXacc | Src2Mem, em_imul_ex),
4081 F(DstXacc | Src2Mem, em_div_ex),
4082 F(DstXacc | Src2Mem, em_idiv_ex),
4085 static const struct opcode group4[] = {
4086 F(ByteOp | DstMem | SrcNone | Lock, em_inc),
4087 F(ByteOp | DstMem | SrcNone | Lock, em_dec),
4091 static const struct opcode group5[] = {
4092 F(DstMem | SrcNone | Lock, em_inc),
4093 F(DstMem | SrcNone | Lock, em_dec),
4094 I(SrcMem | NearBranch | IsBranch, em_call_near_abs),
4095 I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far),
4096 I(SrcMem | NearBranch | IsBranch, em_jmp_abs),
4097 I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far),
4098 I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined),
4101 static const struct opcode group6[] = {
4102 II(Prot | DstMem, em_sldt, sldt),
4103 II(Prot | DstMem, em_str, str),
4104 II(Prot | Priv | SrcMem16, em_lldt, lldt),
4105 II(Prot | Priv | SrcMem16, em_ltr, ltr),
4109 static const struct group_dual group7 = { {
4110 II(Mov | DstMem, em_sgdt, sgdt),
4111 II(Mov | DstMem, em_sidt, sidt),
4112 II(SrcMem | Priv, em_lgdt, lgdt),
4113 II(SrcMem | Priv, em_lidt, lidt),
4114 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4115 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4116 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
4122 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4123 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4127 static const struct opcode group8[] = {
4129 F(DstMem | SrcImmByte | NoWrite, em_bt),
4130 F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
4131 F(DstMem | SrcImmByte | Lock, em_btr),
4132 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
4136 * The "memory" destination is actually always a register, since we come
4137 * from the register case of group9.
4139 static const struct gprefix pfx_0f_c7_7 = {
4140 N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
4144 static const struct group_dual group9 = { {
4145 N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4147 N, N, N, N, N, N, N,
4148 GP(0, &pfx_0f_c7_7),
4151 static const struct opcode group11[] = {
4152 I(DstMem | SrcImm | Mov | PageTable, em_mov),
4156 static const struct gprefix pfx_0f_ae_7 = {
4157 I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N,
4160 static const struct group_dual group15 = { {
4161 I(ModRM | Aligned16, em_fxsave),
4162 I(ModRM | Aligned16, em_fxrstor),
4163 N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4165 N, N, N, N, N, N, N, N,
4168 static const struct gprefix pfx_0f_6f_0f_7f = {
4169 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
4172 static const struct instr_dual instr_dual_0f_2b = {
4176 static const struct gprefix pfx_0f_2b = {
4177 ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4180 static const struct gprefix pfx_0f_10_0f_11 = {
4181 I(Unaligned, em_mov), I(Unaligned, em_mov), N, N,
4184 static const struct gprefix pfx_0f_28_0f_29 = {
4185 I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4188 static const struct gprefix pfx_0f_e7 = {
4189 N, I(Sse, em_mov), N, N,
4192 static const struct escape escape_d9 = { {
4193 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4196 N, N, N, N, N, N, N, N,
4198 N, N, N, N, N, N, N, N,
4200 N, N, N, N, N, N, N, N,
4202 N, N, N, N, N, N, N, N,
4204 N, N, N, N, N, N, N, N,
4206 N, N, N, N, N, N, N, N,
4208 N, N, N, N, N, N, N, N,
4210 N, N, N, N, N, N, N, N,
4213 static const struct escape escape_db = { {
4214 N, N, N, N, N, N, N, N,
4217 N, N, N, N, N, N, N, N,
4219 N, N, N, N, N, N, N, N,
4221 N, N, N, N, N, N, N, N,
4223 N, N, N, N, N, N, N, N,
4225 N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
4227 N, N, N, N, N, N, N, N,
4229 N, N, N, N, N, N, N, N,
4231 N, N, N, N, N, N, N, N,
4234 static const struct escape escape_dd = { {
4235 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4238 N, N, N, N, N, N, N, N,
4240 N, N, N, N, N, N, N, N,
4242 N, N, N, N, N, N, N, N,
4244 N, N, N, N, N, N, N, N,
4246 N, N, N, N, N, N, N, N,
4248 N, N, N, N, N, N, N, N,
4250 N, N, N, N, N, N, N, N,
4252 N, N, N, N, N, N, N, N,
4255 static const struct instr_dual instr_dual_0f_c3 = {
4256 I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
4259 static const struct mode_dual mode_dual_63 = {
4260 N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
4263 static const struct instr_dual instr_dual_8d = {
4264 D(DstReg | SrcMem | ModRM | NoAccess), N
4267 static const struct opcode opcode_table[256] = {
4269 F6ALU(Lock, em_add),
4270 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
4271 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4273 F6ALU(Lock | PageTable, em_or),
4274 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
4277 F6ALU(Lock, em_adc),
4278 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
4279 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4281 F6ALU(Lock, em_sbb),
4282 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
4283 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4285 F6ALU(Lock | PageTable, em_and), N, N,
4287 F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4289 F6ALU(Lock, em_xor), N, N,
4291 F6ALU(NoWrite, em_cmp), N, N,
4293 X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
4295 X8(I(SrcReg | Stack, em_push)),
4297 X8(I(DstReg | Stack, em_pop)),
4299 I(ImplicitOps | Stack | No64, em_pusha),
4300 I(ImplicitOps | Stack | No64, em_popa),
4301 N, MD(ModRM, &mode_dual_63),
4304 I(SrcImm | Mov | Stack, em_push),
4305 I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4306 I(SrcImmByte | Mov | Stack, em_push),
4307 I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4308 I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4309 I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4311 X16(D(SrcImmByte | NearBranch | IsBranch)),
4313 G(ByteOp | DstMem | SrcImm, group1),
4314 G(DstMem | SrcImm, group1),
4315 G(ByteOp | DstMem | SrcImm | No64, group1),
4316 G(DstMem | SrcImmByte, group1),
4317 F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4318 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4320 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4321 I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4322 I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4323 ID(0, &instr_dual_8d),
4324 I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
4327 DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4329 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4330 I(SrcImmFAddr | No64 | IsBranch, em_call_far), N,
4331 II(ImplicitOps | Stack, em_pushf, pushf),
4332 II(ImplicitOps | Stack, em_popf, popf),
4333 I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4335 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4336 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4337 I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
4338 F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
4340 F2bv(DstAcc | SrcImm | NoWrite, em_test),
4341 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
4342 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4343 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4345 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4347 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4349 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4350 I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch, em_ret_near_imm),
4351 I(ImplicitOps | NearBranch | IsBranch, em_ret),
4352 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4353 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4354 G(ByteOp, group11), G(0, group11),
4356 I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter),
4357 I(Stack | IsBranch, em_leave),
4358 I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm),
4359 I(ImplicitOps | IsBranch, em_ret_far),
4360 D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn),
4361 D(ImplicitOps | No64 | IsBranch),
4362 II(ImplicitOps | IsBranch, em_iret, iret),
4364 G(Src2One | ByteOp, group2), G(Src2One, group2),
4365 G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4366 I(DstAcc | SrcImmUByte | No64, em_aam),
4367 I(DstAcc | SrcImmUByte | No64, em_aad),
4368 F(DstAcc | ByteOp | No64, em_salc),
4369 I(DstAcc | SrcXLat | ByteOp, em_mov),
4371 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4373 X3(I(SrcImmByte | NearBranch | IsBranch, em_loop)),
4374 I(SrcImmByte | NearBranch | IsBranch, em_jcxz),
4375 I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
4376 I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4378 I(SrcImm | NearBranch | IsBranch, em_call),
4379 D(SrcImm | ImplicitOps | NearBranch | IsBranch),
4380 I(SrcImmFAddr | No64 | IsBranch, em_jmp_far),
4381 D(SrcImmByte | ImplicitOps | NearBranch | IsBranch),
4382 I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
4383 I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4385 N, DI(ImplicitOps, icebp), N, N,
4386 DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4387 G(ByteOp, group3), G(0, group3),
4389 D(ImplicitOps), D(ImplicitOps),
4390 I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4391 D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4394 static const struct opcode twobyte_table[256] = {
4396 G(0, group6), GD(0, &group7), N, N,
4397 N, I(ImplicitOps | EmulateOnUD | IsBranch, em_syscall),
4398 II(ImplicitOps | Priv, em_clts, clts), N,
4399 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4400 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4402 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11),
4403 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11),
4405 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 4 * prefetch + 4 * reserved NOP */
4406 D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4407 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4408 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4409 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4410 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* NOP + 7 * reserved NOP */
4412 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_access),
4413 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
4414 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4416 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
4419 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
4420 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4421 N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4424 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4425 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4426 II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4427 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4428 I(ImplicitOps | EmulateOnUD | IsBranch, em_sysenter),
4429 I(ImplicitOps | Priv | EmulateOnUD | IsBranch, em_sysexit),
4431 N, N, N, N, N, N, N, N,
4433 X16(D(DstReg | SrcMem | ModRM)),
4435 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4440 N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4445 N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4447 X16(D(SrcImm | NearBranch | IsBranch)),
4449 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4451 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4452 II(ImplicitOps, em_cpuid, cpuid),
4453 F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4454 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4455 F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4457 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4458 II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4459 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4460 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4461 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4462 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4464 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4465 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4466 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4467 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4468 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4469 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4473 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4474 I(DstReg | SrcMem | ModRM, em_bsf_c),
4475 I(DstReg | SrcMem | ModRM, em_bsr_c),
4476 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4478 F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4479 N, ID(0, &instr_dual_0f_c3),
4480 N, N, N, GD(0, &group9),
4482 X8(I(DstReg, em_bswap)),
4484 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4486 N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
4487 N, N, N, N, N, N, N, N,
4489 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4492 static const struct instr_dual instr_dual_0f_38_f0 = {
4493 I(DstReg | SrcMem | Mov, em_movbe), N
4496 static const struct instr_dual instr_dual_0f_38_f1 = {
4497 I(DstMem | SrcReg | Mov, em_movbe), N
4500 static const struct gprefix three_byte_0f_38_f0 = {
4501 ID(0, &instr_dual_0f_38_f0), ID(0, &instr_dual_0f_38_f0), N, N
4504 static const struct gprefix three_byte_0f_38_f1 = {
4505 ID(0, &instr_dual_0f_38_f1), ID(0, &instr_dual_0f_38_f1), N, N
4509 * Insns below are selected by the prefix which indexed by the third opcode
4512 static const struct opcode opcode_map_0f_38[256] = {
4514 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4516 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4518 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4519 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4540 static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4544 size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4550 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4551 unsigned size, bool sign_extension)
4553 int rc = X86EMUL_CONTINUE;
4557 op->addr.mem.ea = ctxt->_eip;
4558 /* NB. Immediates are sign-extended as necessary. */
4559 switch (op->bytes) {
4561 op->val = insn_fetch(s8, ctxt);
4564 op->val = insn_fetch(s16, ctxt);
4567 op->val = insn_fetch(s32, ctxt);
4570 op->val = insn_fetch(s64, ctxt);
4573 if (!sign_extension) {
4574 switch (op->bytes) {
4582 op->val &= 0xffffffff;
4590 static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4593 int rc = X86EMUL_CONTINUE;
4597 decode_register_operand(ctxt, op);
4600 rc = decode_imm(ctxt, op, 1, false);
4603 ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4607 if (ctxt->d & BitOp)
4608 fetch_bit_operand(ctxt);
4609 op->orig_val = op->val;
4612 ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4616 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4617 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4618 fetch_register_operand(op);
4619 op->orig_val = op->val;
4623 op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
4624 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4625 fetch_register_operand(op);
4626 op->orig_val = op->val;
4629 if (ctxt->d & ByteOp) {
4634 op->bytes = ctxt->op_bytes;
4635 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4636 fetch_register_operand(op);
4637 op->orig_val = op->val;
4641 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4643 register_address(ctxt, VCPU_REGS_RDI);
4644 op->addr.mem.seg = VCPU_SREG_ES;
4651 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4652 fetch_register_operand(op);
4657 op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4660 rc = decode_imm(ctxt, op, 1, true);
4668 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4671 rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4674 ctxt->memop.bytes = 1;
4675 if (ctxt->memop.type == OP_REG) {
4676 ctxt->memop.addr.reg = decode_register(ctxt,
4677 ctxt->modrm_rm, true);
4678 fetch_register_operand(&ctxt->memop);
4682 ctxt->memop.bytes = 2;
4685 ctxt->memop.bytes = 4;
4688 rc = decode_imm(ctxt, op, 2, false);
4691 rc = decode_imm(ctxt, op, imm_size(ctxt), false);
4695 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4697 register_address(ctxt, VCPU_REGS_RSI);
4698 op->addr.mem.seg = ctxt->seg_override;
4704 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4707 reg_read(ctxt, VCPU_REGS_RBX) +
4708 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4709 op->addr.mem.seg = ctxt->seg_override;
4714 op->addr.mem.ea = ctxt->_eip;
4715 op->bytes = ctxt->op_bytes + 2;
4716 insn_fetch_arr(op->valptr, op->bytes, ctxt);
4719 ctxt->memop.bytes = ctxt->op_bytes + 2;
4723 op->val = VCPU_SREG_ES;
4727 op->val = VCPU_SREG_CS;
4731 op->val = VCPU_SREG_SS;
4735 op->val = VCPU_SREG_DS;
4739 op->val = VCPU_SREG_FS;
4743 op->val = VCPU_SREG_GS;
4746 /* Special instructions do their own operand decoding. */
4748 op->type = OP_NONE; /* Disable writeback. */
4756 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type)
4758 int rc = X86EMUL_CONTINUE;
4759 int mode = ctxt->mode;
4760 int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
4761 bool op_prefix = false;
4762 bool has_seg_override = false;
4763 struct opcode opcode;
4765 struct desc_struct desc;
4767 ctxt->memop.type = OP_NONE;
4768 ctxt->memopp = NULL;
4769 ctxt->_eip = ctxt->eip;
4770 ctxt->fetch.ptr = ctxt->fetch.data;
4771 ctxt->fetch.end = ctxt->fetch.data + insn_len;
4772 ctxt->opcode_len = 1;
4773 ctxt->intercept = x86_intercept_none;
4775 memcpy(ctxt->fetch.data, insn, insn_len);
4777 rc = __do_insn_fetch_bytes(ctxt, 1);
4778 if (rc != X86EMUL_CONTINUE)
4783 case X86EMUL_MODE_REAL:
4784 case X86EMUL_MODE_VM86:
4785 def_op_bytes = def_ad_bytes = 2;
4786 ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
4788 def_op_bytes = def_ad_bytes = 4;
4790 case X86EMUL_MODE_PROT16:
4791 def_op_bytes = def_ad_bytes = 2;
4793 case X86EMUL_MODE_PROT32:
4794 def_op_bytes = def_ad_bytes = 4;
4796 #ifdef CONFIG_X86_64
4797 case X86EMUL_MODE_PROT64:
4803 return EMULATION_FAILED;
4806 ctxt->op_bytes = def_op_bytes;
4807 ctxt->ad_bytes = def_ad_bytes;
4809 /* Legacy prefixes. */
4811 switch (ctxt->b = insn_fetch(u8, ctxt)) {
4812 case 0x66: /* operand-size override */
4814 /* switch between 2/4 bytes */
4815 ctxt->op_bytes = def_op_bytes ^ 6;
4817 case 0x67: /* address-size override */
4818 if (mode == X86EMUL_MODE_PROT64)
4819 /* switch between 4/8 bytes */
4820 ctxt->ad_bytes = def_ad_bytes ^ 12;
4822 /* switch between 2/4 bytes */
4823 ctxt->ad_bytes = def_ad_bytes ^ 6;
4825 case 0x26: /* ES override */
4826 has_seg_override = true;
4827 ctxt->seg_override = VCPU_SREG_ES;
4829 case 0x2e: /* CS override */
4830 has_seg_override = true;
4831 ctxt->seg_override = VCPU_SREG_CS;
4833 case 0x36: /* SS override */
4834 has_seg_override = true;
4835 ctxt->seg_override = VCPU_SREG_SS;
4837 case 0x3e: /* DS override */
4838 has_seg_override = true;
4839 ctxt->seg_override = VCPU_SREG_DS;
4841 case 0x64: /* FS override */
4842 has_seg_override = true;
4843 ctxt->seg_override = VCPU_SREG_FS;
4845 case 0x65: /* GS override */
4846 has_seg_override = true;
4847 ctxt->seg_override = VCPU_SREG_GS;
4849 case 0x40 ... 0x4f: /* REX */
4850 if (mode != X86EMUL_MODE_PROT64)
4852 ctxt->rex_prefix = ctxt->b;
4854 case 0xf0: /* LOCK */
4855 ctxt->lock_prefix = 1;
4857 case 0xf2: /* REPNE/REPNZ */
4858 case 0xf3: /* REP/REPE/REPZ */
4859 ctxt->rep_prefix = ctxt->b;
4865 /* Any legacy prefix after a REX prefix nullifies its effect. */
4867 ctxt->rex_prefix = 0;
4873 if (ctxt->rex_prefix & 8)
4874 ctxt->op_bytes = 8; /* REX.W */
4876 /* Opcode byte(s). */
4877 opcode = opcode_table[ctxt->b];
4878 /* Two-byte opcode? */
4879 if (ctxt->b == 0x0f) {
4880 ctxt->opcode_len = 2;
4881 ctxt->b = insn_fetch(u8, ctxt);
4882 opcode = twobyte_table[ctxt->b];
4884 /* 0F_38 opcode map */
4885 if (ctxt->b == 0x38) {
4886 ctxt->opcode_len = 3;
4887 ctxt->b = insn_fetch(u8, ctxt);
4888 opcode = opcode_map_0f_38[ctxt->b];
4891 ctxt->d = opcode.flags;
4893 if (ctxt->d & ModRM)
4894 ctxt->modrm = insn_fetch(u8, ctxt);
4896 /* vex-prefix instructions are not implemented */
4897 if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
4898 (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
4902 while (ctxt->d & GroupMask) {
4903 switch (ctxt->d & GroupMask) {
4905 goffset = (ctxt->modrm >> 3) & 7;
4906 opcode = opcode.u.group[goffset];
4909 goffset = (ctxt->modrm >> 3) & 7;
4910 if ((ctxt->modrm >> 6) == 3)
4911 opcode = opcode.u.gdual->mod3[goffset];
4913 opcode = opcode.u.gdual->mod012[goffset];
4916 goffset = ctxt->modrm & 7;
4917 opcode = opcode.u.group[goffset];
4920 if (ctxt->rep_prefix && op_prefix)
4921 return EMULATION_FAILED;
4922 simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
4923 switch (simd_prefix) {
4924 case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
4925 case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
4926 case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
4927 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
4931 if (ctxt->modrm > 0xbf) {
4932 size_t size = ARRAY_SIZE(opcode.u.esc->high);
4933 u32 index = array_index_nospec(
4934 ctxt->modrm - 0xc0, size);
4936 opcode = opcode.u.esc->high[index];
4938 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
4942 if ((ctxt->modrm >> 6) == 3)
4943 opcode = opcode.u.idual->mod3;
4945 opcode = opcode.u.idual->mod012;
4948 if (ctxt->mode == X86EMUL_MODE_PROT64)
4949 opcode = opcode.u.mdual->mode64;
4951 opcode = opcode.u.mdual->mode32;
4954 return EMULATION_FAILED;
4957 ctxt->d &= ~(u64)GroupMask;
4958 ctxt->d |= opcode.flags;
4961 ctxt->is_branch = opcode.flags & IsBranch;
4965 return EMULATION_FAILED;
4967 ctxt->execute = opcode.u.execute;
4969 if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
4970 likely(!(ctxt->d & EmulateOnUD)))
4971 return EMULATION_FAILED;
4973 if (unlikely(ctxt->d &
4974 (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
4977 * These are copied unconditionally here, and checked unconditionally
4978 * in x86_emulate_insn.
4980 ctxt->check_perm = opcode.check_perm;
4981 ctxt->intercept = opcode.intercept;
4983 if (ctxt->d & NotImpl)
4984 return EMULATION_FAILED;
4986 if (mode == X86EMUL_MODE_PROT64) {
4987 if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
4989 else if (ctxt->d & NearBranch)
4993 if (ctxt->d & Op3264) {
4994 if (mode == X86EMUL_MODE_PROT64)
5000 if ((ctxt->d & No16) && ctxt->op_bytes == 2)
5004 ctxt->op_bytes = 16;
5005 else if (ctxt->d & Mmx)
5009 /* ModRM and SIB bytes. */
5010 if (ctxt->d & ModRM) {
5011 rc = decode_modrm(ctxt, &ctxt->memop);
5012 if (!has_seg_override) {
5013 has_seg_override = true;
5014 ctxt->seg_override = ctxt->modrm_seg;
5016 } else if (ctxt->d & MemAbs)
5017 rc = decode_abs(ctxt, &ctxt->memop);
5018 if (rc != X86EMUL_CONTINUE)
5021 if (!has_seg_override)
5022 ctxt->seg_override = VCPU_SREG_DS;
5024 ctxt->memop.addr.mem.seg = ctxt->seg_override;
5027 * Decode and fetch the source operand: register, memory
5030 rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5031 if (rc != X86EMUL_CONTINUE)
5035 * Decode and fetch the second source operand: register, memory
5038 rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5039 if (rc != X86EMUL_CONTINUE)
5042 /* Decode and fetch the destination operand: register or memory. */
5043 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5045 if (ctxt->rip_relative && likely(ctxt->memopp))
5046 ctxt->memopp->addr.mem.ea = address_mask(ctxt,
5047 ctxt->memopp->addr.mem.ea + ctxt->_eip);
5050 if (rc == X86EMUL_PROPAGATE_FAULT)
5051 ctxt->have_exception = true;
5052 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5055 bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
5057 return ctxt->d & PageTable;
5060 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
5062 /* The second termination condition only applies for REPE
5063 * and REPNE. Test if the repeat string operation prefix is
5064 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5065 * corresponding termination condition according to:
5066 * - if REPE/REPZ and ZF = 0 then done
5067 * - if REPNE/REPNZ and ZF = 1 then done
5069 if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
5070 (ctxt->b == 0xae) || (ctxt->b == 0xaf))
5071 && (((ctxt->rep_prefix == REPE_PREFIX) &&
5072 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5073 || ((ctxt->rep_prefix == REPNE_PREFIX) &&
5074 ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5080 static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5085 rc = asm_safe("fwait");
5088 if (unlikely(rc != X86EMUL_CONTINUE))
5089 return emulate_exception(ctxt, MF_VECTOR, 0, false);
5091 return X86EMUL_CONTINUE;
5094 static void fetch_possible_mmx_operand(struct operand *op)
5096 if (op->type == OP_MM)
5097 kvm_read_mmx_reg(op->addr.mm, &op->mm_val);
5100 static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
5102 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
5104 if (!(ctxt->d & ByteOp))
5105 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
5107 asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
5108 : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
5109 [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
5110 : "c"(ctxt->src2.val));
5112 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
5113 if (!fop) /* exception is returned in fop variable */
5114 return emulate_de(ctxt);
5115 return X86EMUL_CONTINUE;
5118 void init_decode_cache(struct x86_emulate_ctxt *ctxt)
5120 /* Clear fields that are set conditionally but read without a guard. */
5121 ctxt->rip_relative = false;
5122 ctxt->rex_prefix = 0;
5123 ctxt->lock_prefix = 0;
5124 ctxt->rep_prefix = 0;
5125 ctxt->regs_valid = 0;
5126 ctxt->regs_dirty = 0;
5128 ctxt->io_read.pos = 0;
5129 ctxt->io_read.end = 0;
5130 ctxt->mem_read.end = 0;
5133 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
5135 const struct x86_emulate_ops *ops = ctxt->ops;
5136 int rc = X86EMUL_CONTINUE;
5137 int saved_dst_type = ctxt->dst.type;
5138 bool is_guest_mode = ctxt->ops->is_guest_mode(ctxt);
5140 ctxt->mem_read.pos = 0;
5142 /* LOCK prefix is allowed only with some instructions */
5143 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5144 rc = emulate_ud(ctxt);
5148 if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5149 rc = emulate_ud(ctxt);
5153 if (unlikely(ctxt->d &
5154 (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5155 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
5156 (ctxt->d & Undefined)) {
5157 rc = emulate_ud(ctxt);
5161 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
5162 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
5163 rc = emulate_ud(ctxt);
5167 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5168 rc = emulate_nm(ctxt);
5172 if (ctxt->d & Mmx) {
5173 rc = flush_pending_x87_faults(ctxt);
5174 if (rc != X86EMUL_CONTINUE)
5177 * Now that we know the fpu is exception safe, we can fetch
5180 fetch_possible_mmx_operand(&ctxt->src);
5181 fetch_possible_mmx_operand(&ctxt->src2);
5182 if (!(ctxt->d & Mov))
5183 fetch_possible_mmx_operand(&ctxt->dst);
5186 if (unlikely(is_guest_mode) && ctxt->intercept) {
5187 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5188 X86_ICPT_PRE_EXCEPT);
5189 if (rc != X86EMUL_CONTINUE)
5193 /* Instruction can only be executed in protected mode */
5194 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
5195 rc = emulate_ud(ctxt);
5199 /* Privileged instruction can be executed only in CPL=0 */
5200 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5201 if (ctxt->d & PrivUD)
5202 rc = emulate_ud(ctxt);
5204 rc = emulate_gp(ctxt, 0);
5208 /* Do instruction specific permission checks */
5209 if (ctxt->d & CheckPerm) {
5210 rc = ctxt->check_perm(ctxt);
5211 if (rc != X86EMUL_CONTINUE)
5215 if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) {
5216 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5217 X86_ICPT_POST_EXCEPT);
5218 if (rc != X86EMUL_CONTINUE)
5222 if (ctxt->rep_prefix && (ctxt->d & String)) {
5223 /* All REP prefixes have the same first termination condition */
5224 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5225 string_registers_quirk(ctxt);
5226 ctxt->eip = ctxt->_eip;
5227 ctxt->eflags &= ~X86_EFLAGS_RF;
5233 if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
5234 rc = segmented_read(ctxt, ctxt->src.addr.mem,
5235 ctxt->src.valptr, ctxt->src.bytes);
5236 if (rc != X86EMUL_CONTINUE)
5238 ctxt->src.orig_val64 = ctxt->src.val64;
5241 if (ctxt->src2.type == OP_MEM) {
5242 rc = segmented_read(ctxt, ctxt->src2.addr.mem,
5243 &ctxt->src2.val, ctxt->src2.bytes);
5244 if (rc != X86EMUL_CONTINUE)
5248 if ((ctxt->d & DstMask) == ImplicitOps)
5252 if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5253 /* optimisation - avoid slow emulated read if Mov */
5254 rc = segmented_read(ctxt, ctxt->dst.addr.mem,
5255 &ctxt->dst.val, ctxt->dst.bytes);
5256 if (rc != X86EMUL_CONTINUE) {
5257 if (!(ctxt->d & NoWrite) &&
5258 rc == X86EMUL_PROPAGATE_FAULT &&
5259 ctxt->exception.vector == PF_VECTOR)
5260 ctxt->exception.error_code |= PFERR_WRITE_MASK;
5264 /* Copy full 64-bit value for CMPXCHG8B. */
5265 ctxt->dst.orig_val64 = ctxt->dst.val64;
5269 if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) {
5270 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5271 X86_ICPT_POST_MEMACCESS);
5272 if (rc != X86EMUL_CONTINUE)
5276 if (ctxt->rep_prefix && (ctxt->d & String))
5277 ctxt->eflags |= X86_EFLAGS_RF;
5279 ctxt->eflags &= ~X86_EFLAGS_RF;
5281 if (ctxt->execute) {
5282 if (ctxt->d & Fastop)
5283 rc = fastop(ctxt, ctxt->fop);
5285 rc = ctxt->execute(ctxt);
5286 if (rc != X86EMUL_CONTINUE)
5291 if (ctxt->opcode_len == 2)
5293 else if (ctxt->opcode_len == 3)
5294 goto threebyte_insn;
5297 case 0x70 ... 0x7f: /* jcc (short) */
5298 if (test_cc(ctxt->b, ctxt->eflags))
5299 rc = jmp_rel(ctxt, ctxt->src.val);
5301 case 0x8d: /* lea r16/r32, m */
5302 ctxt->dst.val = ctxt->src.addr.mem.ea;
5304 case 0x90 ... 0x97: /* nop / xchg reg, rax */
5305 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5306 ctxt->dst.type = OP_NONE;
5310 case 0x98: /* cbw/cwde/cdqe */
5311 switch (ctxt->op_bytes) {
5312 case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
5313 case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
5314 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5317 case 0xcc: /* int3 */
5318 rc = emulate_int(ctxt, 3);
5320 case 0xcd: /* int n */
5321 rc = emulate_int(ctxt, ctxt->src.val);
5323 case 0xce: /* into */
5324 if (ctxt->eflags & X86_EFLAGS_OF)
5325 rc = emulate_int(ctxt, 4);
5327 case 0xe9: /* jmp rel */
5328 case 0xeb: /* jmp rel short */
5329 rc = jmp_rel(ctxt, ctxt->src.val);
5330 ctxt->dst.type = OP_NONE; /* Disable writeback. */
5332 case 0xf4: /* hlt */
5333 ctxt->ops->halt(ctxt);
5335 case 0xf5: /* cmc */
5336 /* complement carry flag from eflags reg */
5337 ctxt->eflags ^= X86_EFLAGS_CF;
5339 case 0xf8: /* clc */
5340 ctxt->eflags &= ~X86_EFLAGS_CF;
5342 case 0xf9: /* stc */
5343 ctxt->eflags |= X86_EFLAGS_CF;
5345 case 0xfc: /* cld */
5346 ctxt->eflags &= ~X86_EFLAGS_DF;
5348 case 0xfd: /* std */
5349 ctxt->eflags |= X86_EFLAGS_DF;
5352 goto cannot_emulate;
5355 if (rc != X86EMUL_CONTINUE)
5359 if (ctxt->d & SrcWrite) {
5360 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
5361 rc = writeback(ctxt, &ctxt->src);
5362 if (rc != X86EMUL_CONTINUE)
5365 if (!(ctxt->d & NoWrite)) {
5366 rc = writeback(ctxt, &ctxt->dst);
5367 if (rc != X86EMUL_CONTINUE)
5372 * restore dst type in case the decoding will be reused
5373 * (happens for string instruction )
5375 ctxt->dst.type = saved_dst_type;
5377 if ((ctxt->d & SrcMask) == SrcSI)
5378 string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5380 if ((ctxt->d & DstMask) == DstDI)
5381 string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5383 if (ctxt->rep_prefix && (ctxt->d & String)) {
5385 struct read_cache *r = &ctxt->io_read;
5386 if ((ctxt->d & SrcMask) == SrcSI)
5387 count = ctxt->src.count;
5389 count = ctxt->dst.count;
5390 register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5392 if (!string_insn_completed(ctxt)) {
5394 * Re-enter guest when pio read ahead buffer is empty
5395 * or, if it is not used, after each 1024 iteration.
5397 if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5398 (r->end == 0 || r->end != r->pos)) {
5400 * Reset read cache. Usually happens before
5401 * decode, but since instruction is restarted
5402 * we have to do it here.
5404 ctxt->mem_read.end = 0;
5405 writeback_registers(ctxt);
5406 return EMULATION_RESTART;
5408 goto done; /* skip rip writeback */
5410 ctxt->eflags &= ~X86_EFLAGS_RF;
5413 ctxt->eip = ctxt->_eip;
5414 if (ctxt->mode != X86EMUL_MODE_PROT64)
5415 ctxt->eip = (u32)ctxt->_eip;
5418 if (rc == X86EMUL_PROPAGATE_FAULT) {
5419 if (KVM_EMULATOR_BUG_ON(ctxt->exception.vector > 0x1f, ctxt))
5420 return EMULATION_FAILED;
5421 ctxt->have_exception = true;
5423 if (rc == X86EMUL_INTERCEPTED)
5424 return EMULATION_INTERCEPTED;
5426 if (rc == X86EMUL_CONTINUE)
5427 writeback_registers(ctxt);
5429 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
5433 case 0x09: /* wbinvd */
5434 (ctxt->ops->wbinvd)(ctxt);
5436 case 0x08: /* invd */
5437 case 0x0d: /* GrpP (prefetch) */
5438 case 0x18: /* Grp16 (prefetch/nop) */
5439 case 0x1f: /* nop */
5441 case 0x20: /* mov cr, reg */
5442 ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5444 case 0x21: /* mov from dr to reg */
5445 ctxt->dst.val = ops->get_dr(ctxt, ctxt->modrm_reg);
5447 case 0x40 ... 0x4f: /* cmov */
5448 if (test_cc(ctxt->b, ctxt->eflags))
5449 ctxt->dst.val = ctxt->src.val;
5450 else if (ctxt->op_bytes != 4)
5451 ctxt->dst.type = OP_NONE; /* no writeback */
5453 case 0x80 ... 0x8f: /* jnz rel, etc*/
5454 if (test_cc(ctxt->b, ctxt->eflags))
5455 rc = jmp_rel(ctxt, ctxt->src.val);
5457 case 0x90 ... 0x9f: /* setcc r/m8 */
5458 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5460 case 0xb6 ... 0xb7: /* movzx */
5461 ctxt->dst.bytes = ctxt->op_bytes;
5462 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5463 : (u16) ctxt->src.val;
5465 case 0xbe ... 0xbf: /* movsx */
5466 ctxt->dst.bytes = ctxt->op_bytes;
5467 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5468 (s16) ctxt->src.val;
5471 goto cannot_emulate;
5476 if (rc != X86EMUL_CONTINUE)
5482 return EMULATION_FAILED;
5485 void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
5487 invalidate_registers(ctxt);
5490 void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
5492 writeback_registers(ctxt);
5495 bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
5497 if (ctxt->rep_prefix && (ctxt->d & String))
5500 if (ctxt->d & TwoMemOp)