]> Git Repo - linux.git/commitdiff
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <[email protected]>
Fri, 6 Aug 2010 17:07:34 +0000 (10:07 -0700)
committerLinus Torvalds <[email protected]>
Fri, 6 Aug 2010 17:07:34 +0000 (10:07 -0700)
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  um, x86: Cast to (u64 *) inside set_64bit()
  x86-32, asm: Directly access per-cpu GDT
  x86-64, asm: Directly access per-cpu IST
  x86, asm: Merge cmpxchg_486_u64() and cmpxchg8b_emu()
  x86, asm: Move cmpxchg emulation code to arch/x86/lib
  x86, asm: Clean up and simplify <asm/cmpxchg.h>
  x86, asm: Clean up and simplify set_64bit()
  x86: Add memory modify constraints to xchg() and cmpxchg()
  x86-64: Simplify loading initial_gs
  x86: Use symbolic MSR names
  x86: Remove redundant K6 MSRs

1  2 
arch/x86/include/asm/msr-index.h
arch/x86/kernel/cpu/Makefile
arch/x86/kernel/entry_32.S
arch/x86/kernel/entry_64.S
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c

index 2eeb2e692008e6615f7ee5b9c240dcd35cb8bbd5,6068e0e06e00dfd22cd2718a0714100e61fc8b01..65bbec2093aa71ffb5b0858abca890cee6b86447
@@@ -20,7 -20,6 +20,7 @@@
  #define _EFER_LMA             10 /* Long mode active (read-only) */
  #define _EFER_NX              11 /* No execute enable */
  #define _EFER_SVME            12 /* Enable virtualization */
 +#define _EFER_LMSLE           13 /* Long Mode Segment Limit Enable */
  #define _EFER_FFXSR           14 /* Enable Fast FXSAVE/FXRSTOR */
  
  #define EFER_SCE              (1<<_EFER_SCE)
@@@ -28,7 -27,6 +28,7 @@@
  #define EFER_LMA              (1<<_EFER_LMA)
  #define EFER_NX                       (1<<_EFER_NX)
  #define EFER_SVME             (1<<_EFER_SVME)
 +#define EFER_LMSLE            (1<<_EFER_LMSLE)
  #define EFER_FFXSR            (1<<_EFER_FFXSR)
  
  /* Intel MSRs. Some also available on other CPUs */
  #define MSR_K7_FID_VID_STATUS         0xc0010042
  
  /* K6 MSRs */
- #define MSR_K6_EFER                   0xc0000080
- #define MSR_K6_STAR                   0xc0000081
  #define MSR_K6_WHCR                   0xc0000082
  #define MSR_K6_UWCCR                  0xc0000085
  #define MSR_K6_EPMR                   0xc0000086
  #define MSR_IA32_THERM_CONTROL                0x0000019a
  #define MSR_IA32_THERM_INTERRUPT      0x0000019b
  
 -#define THERM_INT_LOW_ENABLE          (1 << 0)
 -#define THERM_INT_HIGH_ENABLE         (1 << 1)
 +#define THERM_INT_HIGH_ENABLE         (1 << 0)
 +#define THERM_INT_LOW_ENABLE          (1 << 1)
 +#define THERM_INT_PLN_ENABLE          (1 << 24)
  
  #define MSR_IA32_THERM_STATUS         0x0000019c
  
  #define THERM_STATUS_PROCHOT          (1 << 0)
 +#define THERM_STATUS_POWER_LIMIT      (1 << 10)
  
  #define MSR_THERM2_CTL                        0x0000019d
  
  
  #define MSR_IA32_TEMPERATURE_TARGET   0x000001a2
  
 +#define MSR_IA32_ENERGY_PERF_BIAS     0x000001b0
 +
 +#define MSR_IA32_PACKAGE_THERM_STATUS         0x000001b1
 +
 +#define PACKAGE_THERM_STATUS_PROCHOT          (1 << 0)
 +#define PACKAGE_THERM_STATUS_POWER_LIMIT      (1 << 10)
 +
 +#define MSR_IA32_PACKAGE_THERM_INTERRUPT      0x000001b2
 +
 +#define PACKAGE_THERM_INT_HIGH_ENABLE         (1 << 0)
 +#define PACKAGE_THERM_INT_LOW_ENABLE          (1 << 1)
 +#define PACKAGE_THERM_INT_PLN_ENABLE          (1 << 24)
 +
  /* MISC_ENABLE bits: architectural */
  #define MSR_IA32_MISC_ENABLE_FAST_STRING      (1ULL << 0)
  #define MSR_IA32_MISC_ENABLE_TCC              (1ULL << 1)
index 5e3a3512ba0511fc991e7721855dffdbecf68b14,c47c43914ba72949674d57e7c5e93294e9d70ed7..3f0ebe429a01c14e67d252e475f34252fcf0f105
@@@ -12,11 -12,11 +12,11 @@@ endi
  nostackp := $(call cc-option, -fno-stack-protector)
  CFLAGS_common.o               := $(nostackp)
  
 -obj-y                 := intel_cacheinfo.o addon_cpuid_features.o
 +obj-y                 := intel_cacheinfo.o scattered.o topology.o
  obj-y                 += proc.o capflags.o powerflags.o common.o
  obj-y                 += vmware.o hypervisor.o sched.o mshyperv.o
  
- obj-$(CONFIG_X86_32)  += bugs.o cmpxchg.o
+ obj-$(CONFIG_X86_32)  += bugs.o
  obj-$(CONFIG_X86_64)  += bugs_64.o
  
  obj-$(CONFIG_CPU_SUP_INTEL)           += intel.o
index 6b196834a0dd909c5988267ca516b793c6691316,233c5829e7ac066a636be430adfe19c236bf6f59..258e93fa26304d08fddcc5f9b75f5ff0757eafba
@@@ -611,14 -611,14 +611,14 @@@ ldt_ss
   * compensating for the offset by changing to the ESPFIX segment with
   * a base address that matches for the difference.
   */
+ #define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
        mov %esp, %edx                  /* load kernel esp */
        mov PT_OLDESP(%esp), %eax       /* load userspace esp */
        mov %dx, %ax                    /* eax: new kernel esp */
        sub %eax, %edx                  /* offset (low word is 0) */
-       PER_CPU(gdt_page, %ebx)
        shr $16, %edx
-       mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */
-       mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */
+       mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
+       mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
        pushl $__ESPFIX_SS
        CFI_ADJUST_CFA_OFFSET 4
        push %eax                       /* new kernel esp */
@@@ -791,9 -791,8 +791,8 @@@ ptregs_clone
   * normal stack and adjusts ESP with the matching offset.
   */
        /* fixup the stack */
-       PER_CPU(gdt_page, %ebx)
-       mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */
-       mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */
+       mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
+       mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
        shl $16, %eax
        addl %esp, %eax                 /* the adjusted stack pointer */
        pushl $__KERNEL_DS
@@@ -1166,9 -1165,6 +1165,9 @@@ ENTRY(xen_failsafe_callback
  .previous
  ENDPROC(xen_failsafe_callback)
  
 +BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK,
 +              xen_evtchn_do_upcall)
 +
  #endif        /* CONFIG_XEN */
  
  #ifdef CONFIG_FUNCTION_TRACER
index 649ed17f70095ca1b49d5a3a69da03465bfb340e,59af275b37a28576dbb350ef92009866c7edc267..c5ea5cdbe7b3d86a927f82835147c42c6d93fabf
@@@ -1065,6 -1065,7 +1065,7 @@@ ENTRY(\sym
  END(\sym)
  .endm
  
+ #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
  .macro paranoidzeroentry_ist sym do_sym ist
  ENTRY(\sym)
        INTR_FRAME
        TRACE_IRQS_OFF
        movq %rsp,%rdi          /* pt_regs pointer */
        xorl %esi,%esi          /* no error code */
-       PER_CPU(init_tss, %r12)
-       subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12)
+       subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
        call \do_sym
-       addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12)
+       addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
        jmp paranoid_exit       /* %ebx: no swapgs flag */
        CFI_ENDPROC
  END(\sym)
@@@ -1329,9 -1329,6 +1329,9 @@@ ENTRY(xen_failsafe_callback
        CFI_ENDPROC
  END(xen_failsafe_callback)
  
 +apicinterrupt XEN_HVM_EVTCHN_CALLBACK \
 +      xen_hvm_callback_vector xen_evtchn_do_upcall
 +
  #endif /* CONFIG_XEN */
  
  /*
diff --combined arch/x86/kvm/svm.c
index 5c81daf3ef5782ffdd3337ff4536f046ac7fd094,24a2206962986dcb82326c4c810281a120831acd..bc5b9b8d4a33117259882835bfb884f4f8f37656
@@@ -4,7 -4,6 +4,7 @@@
   * AMD SVM support
   *
   * Copyright (C) 2006 Qumranet, Inc.
 + * Copyright 2010 Red Hat, Inc. and/or its affilates.
   *
   * Authors:
   *   Yaniv Kamay  <[email protected]>
@@@ -131,7 -130,7 +131,7 @@@ static struct svm_direct_access_msrs 
        u32 index;   /* Index of the MSR */
        bool always; /* True if intercept is always on */
  } direct_access_msrs[] = {
-       { .index = MSR_K6_STAR,                         .always = true  },
+       { .index = MSR_STAR,                            .always = true  },
        { .index = MSR_IA32_SYSENTER_CS,                .always = true  },
  #ifdef CONFIG_X86_64
        { .index = MSR_GS_BASE,                         .always = true  },
@@@ -286,11 -285,11 +286,11 @@@ static inline void flush_guest_tlb(stru
  
  static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
  {
 +      vcpu->arch.efer = efer;
        if (!npt_enabled && !(efer & EFER_LMA))
                efer &= ~EFER_LME;
  
        to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
 -      vcpu->arch.efer = efer;
  }
  
  static int is_external_interrupt(u32 info)
@@@ -384,7 -383,8 +384,7 @@@ static void svm_init_erratum_383(void
        int err;
        u64 val;
  
 -      /* Only Fam10h is affected */
 -      if (boot_cpu_data.x86 != 0x10)
 +      if (!cpu_has_amd_erratum(amd_erratum_383))
                return;
  
        /* Use _safe variants to not break nested virtualization */
@@@ -640,7 -640,7 +640,7 @@@ static __init int svm_hardware_setup(vo
  
        if (nested) {
                printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
 -              kvm_enable_efer_bits(EFER_SVME);
 +              kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
        }
  
        for_each_possible_cpu(cpu) {
@@@ -806,7 -806,7 +806,7 @@@ static void init_vmcb(struct vcpu_svm *
         * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
         */
        svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
 -      kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);
 +      (void)kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);
  
        save->cr4 = X86_CR4_PAE;
        /* rdx = ?? */
@@@ -903,18 -903,13 +903,18 @@@ static struct kvm_vcpu *svm_create_vcpu
        svm->asid_generation = 0;
        init_vmcb(svm);
  
 -      fx_init(&svm->vcpu);
 +      err = fx_init(&svm->vcpu);
 +      if (err)
 +              goto free_page4;
 +
        svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
        if (kvm_vcpu_is_bsp(&svm->vcpu))
                svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
  
        return &svm->vcpu;
  
 +free_page4:
 +      __free_page(hsave_page);
  free_page3:
        __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
  free_page2:
@@@ -1493,7 -1488,7 +1493,7 @@@ static void svm_handle_mce(struct vcpu_
                 */
                pr_err("KVM: Guest triggered AMD Erratum 383\n");
  
 -              set_bit(KVM_REQ_TRIPLE_FAULT, &svm->vcpu.requests);
 +              kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
  
                return;
        }
@@@ -1540,7 -1535,7 +1540,7 @@@ static int io_interception(struct vcpu_
        string = (io_info & SVM_IOIO_STR_MASK) != 0;
        in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
        if (string || in)
 -              return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO);
 +              return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE;
  
        port = io_info >> 16;
        size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
@@@ -1962,7 -1957,7 +1962,7 @@@ static int nested_svm_vmexit(struct vcp
                svm->vmcb->save.cr3 = hsave->save.cr3;
                svm->vcpu.arch.cr3 = hsave->save.cr3;
        } else {
 -              kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
 +              (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
        }
        kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
        kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
@@@ -2085,7 -2080,7 +2085,7 @@@ static bool nested_svm_vmrun(struct vcp
                svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
                svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
        } else
 -              kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
 +              (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
  
        /* Guest paging mode is active - reset mmu */
        kvm_mmu_reset_context(&svm->vcpu);
@@@ -2391,12 -2386,16 +2391,12 @@@ static int iret_interception(struct vcp
  
  static int invlpg_interception(struct vcpu_svm *svm)
  {
 -      if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
 -              pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
 -      return 1;
 +      return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE;
  }
  
  static int emulate_on_interception(struct vcpu_svm *svm)
  {
 -      if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
 -              pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
 -      return 1;
 +      return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE;
  }
  
  static int cr8_write_interception(struct vcpu_svm *svm)
@@@ -2432,7 -2431,7 +2432,7 @@@ static int svm_get_msr(struct kvm_vcpu 
                *data = tsc_offset + native_read_tsc();
                break;
        }
-       case MSR_K6_STAR:
+       case MSR_STAR:
                *data = svm->vmcb->save.star;
                break;
  #ifdef CONFIG_X86_64
@@@ -2556,7 -2555,7 +2556,7 @@@ static int svm_set_msr(struct kvm_vcpu 
  
                break;
        }
-       case MSR_K6_STAR:
+       case MSR_STAR:
                svm->vmcb->save.star = data;
                break;
  #ifdef CONFIG_X86_64
@@@ -2727,99 -2726,6 +2727,99 @@@ static int (*svm_exit_handlers[])(struc
        [SVM_EXIT_NPF]                          = pf_interception,
  };
  
 +void dump_vmcb(struct kvm_vcpu *vcpu)
 +{
 +      struct vcpu_svm *svm = to_svm(vcpu);
 +      struct vmcb_control_area *control = &svm->vmcb->control;
 +      struct vmcb_save_area *save = &svm->vmcb->save;
 +
 +      pr_err("VMCB Control Area:\n");
 +      pr_err("cr_read:            %04x\n", control->intercept_cr_read);
 +      pr_err("cr_write:           %04x\n", control->intercept_cr_write);
 +      pr_err("dr_read:            %04x\n", control->intercept_dr_read);
 +      pr_err("dr_write:           %04x\n", control->intercept_dr_write);
 +      pr_err("exceptions:         %08x\n", control->intercept_exceptions);
 +      pr_err("intercepts:         %016llx\n", control->intercept);
 +      pr_err("pause filter count: %d\n", control->pause_filter_count);
 +      pr_err("iopm_base_pa:       %016llx\n", control->iopm_base_pa);
 +      pr_err("msrpm_base_pa:      %016llx\n", control->msrpm_base_pa);
 +      pr_err("tsc_offset:         %016llx\n", control->tsc_offset);
 +      pr_err("asid:               %d\n", control->asid);
 +      pr_err("tlb_ctl:            %d\n", control->tlb_ctl);
 +      pr_err("int_ctl:            %08x\n", control->int_ctl);
 +      pr_err("int_vector:         %08x\n", control->int_vector);
 +      pr_err("int_state:          %08x\n", control->int_state);
 +      pr_err("exit_code:          %08x\n", control->exit_code);
 +      pr_err("exit_info1:         %016llx\n", control->exit_info_1);
 +      pr_err("exit_info2:         %016llx\n", control->exit_info_2);
 +      pr_err("exit_int_info:      %08x\n", control->exit_int_info);
 +      pr_err("exit_int_info_err:  %08x\n", control->exit_int_info_err);
 +      pr_err("nested_ctl:         %lld\n", control->nested_ctl);
 +      pr_err("nested_cr3:         %016llx\n", control->nested_cr3);
 +      pr_err("event_inj:          %08x\n", control->event_inj);
 +      pr_err("event_inj_err:      %08x\n", control->event_inj_err);
 +      pr_err("lbr_ctl:            %lld\n", control->lbr_ctl);
 +      pr_err("next_rip:           %016llx\n", control->next_rip);
 +      pr_err("VMCB State Save Area:\n");
 +      pr_err("es:   s: %04x a: %04x l: %08x b: %016llx\n",
 +              save->es.selector, save->es.attrib,
 +              save->es.limit, save->es.base);
 +      pr_err("cs:   s: %04x a: %04x l: %08x b: %016llx\n",
 +              save->cs.selector, save->cs.attrib,
 +              save->cs.limit, save->cs.base);
 +      pr_err("ss:   s: %04x a: %04x l: %08x b: %016llx\n",
 +              save->ss.selector, save->ss.attrib,
 +              save->ss.limit, save->ss.base);
 +      pr_err("ds:   s: %04x a: %04x l: %08x b: %016llx\n",
 +              save->ds.selector, save->ds.attrib,
 +              save->ds.limit, save->ds.base);
 +      pr_err("fs:   s: %04x a: %04x l: %08x b: %016llx\n",
 +              save->fs.selector, save->fs.attrib,
 +              save->fs.limit, save->fs.base);
 +      pr_err("gs:   s: %04x a: %04x l: %08x b: %016llx\n",
 +              save->gs.selector, save->gs.attrib,
 +              save->gs.limit, save->gs.base);
 +      pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n",
 +              save->gdtr.selector, save->gdtr.attrib,
 +              save->gdtr.limit, save->gdtr.base);
 +      pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n",
 +              save->ldtr.selector, save->ldtr.attrib,
 +              save->ldtr.limit, save->ldtr.base);
 +      pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n",
 +              save->idtr.selector, save->idtr.attrib,
 +              save->idtr.limit, save->idtr.base);
 +      pr_err("tr:   s: %04x a: %04x l: %08x b: %016llx\n",
 +              save->tr.selector, save->tr.attrib,
 +              save->tr.limit, save->tr.base);
 +      pr_err("cpl:            %d                efer:         %016llx\n",
 +              save->cpl, save->efer);
 +      pr_err("cr0:            %016llx cr2:          %016llx\n",
 +              save->cr0, save->cr2);
 +      pr_err("cr3:            %016llx cr4:          %016llx\n",
 +              save->cr3, save->cr4);
 +      pr_err("dr6:            %016llx dr7:          %016llx\n",
 +              save->dr6, save->dr7);
 +      pr_err("rip:            %016llx rflags:       %016llx\n",
 +              save->rip, save->rflags);
 +      pr_err("rsp:            %016llx rax:          %016llx\n",
 +              save->rsp, save->rax);
 +      pr_err("star:           %016llx lstar:        %016llx\n",
 +              save->star, save->lstar);
 +      pr_err("cstar:          %016llx sfmask:       %016llx\n",
 +              save->cstar, save->sfmask);
 +      pr_err("kernel_gs_base: %016llx sysenter_cs:  %016llx\n",
 +              save->kernel_gs_base, save->sysenter_cs);
 +      pr_err("sysenter_esp:   %016llx sysenter_eip: %016llx\n",
 +              save->sysenter_esp, save->sysenter_eip);
 +      pr_err("gpat:           %016llx dbgctl:       %016llx\n",
 +              save->g_pat, save->dbgctl);
 +      pr_err("br_from:        %016llx br_to:        %016llx\n",
 +              save->br_from, save->br_to);
 +      pr_err("excp_from:      %016llx excp_to:      %016llx\n",
 +              save->last_excp_from, save->last_excp_to);
 +
 +}
 +
  static int handle_exit(struct kvm_vcpu *vcpu)
  {
        struct vcpu_svm *svm = to_svm(vcpu);
                kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
                kvm_run->fail_entry.hardware_entry_failure_reason
                        = svm->vmcb->control.exit_code;
 +              pr_err("KVM: FAILED VMRUN WITH VMCB:\n");
 +              dump_vmcb(vcpu);
                return 0;
        }
  
@@@ -2922,6 -2826,9 +2922,6 @@@ static inline void svm_inject_irq(struc
  {
        struct vmcb_control_area *control;
  
 -      trace_kvm_inj_virq(irq);
 -
 -      ++svm->vcpu.stat.irq_injections;
        control = &svm->vmcb->control;
        control->int_vector = irq;
        control->int_ctl &= ~V_INTR_PRIO_MASK;
@@@ -2935,9 -2842,6 +2935,9 @@@ static void svm_set_irq(struct kvm_vcp
  
        BUG_ON(!(gif_set(svm)));
  
 +      trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
 +      ++vcpu->stat.irq_injections;
 +
        svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
                SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
  }
@@@ -3423,11 -3327,6 +3423,11 @@@ static bool svm_rdtscp_supported(void
        return false;
  }
  
 +static bool svm_has_wbinvd_exit(void)
 +{
 +      return true;
 +}
 +
  static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
  {
        struct vcpu_svm *svm = to_svm(vcpu);
@@@ -3512,8 -3411,6 +3512,8 @@@ static struct kvm_x86_ops svm_x86_ops 
        .rdtscp_supported = svm_rdtscp_supported,
  
        .set_supported_cpuid = svm_set_supported_cpuid,
 +
 +      .has_wbinvd_exit = svm_has_wbinvd_exit,
  };
  
  static int __init svm_init(void)
diff --combined arch/x86/kvm/vmx.c
index 27a0222c29460d79e6e65992ac00fc2845f8f25b,b42ad25d56479840082243b8d4e6d6f688f6f84e..49b25eee25acc075538a411fc24c23a326f02fd4
@@@ -5,7 -5,6 +5,7 @@@
   * machines without emulation or binary translation.
   *
   * Copyright (C) 2006 Qumranet, Inc.
 + * Copyright 2010 Red Hat, Inc. and/or its affilates.
   *
   * Authors:
   *   Avi Kivity   <[email protected]>
@@@ -37,8 -36,6 +37,8 @@@
  #include <asm/vmx.h>
  #include <asm/virtext.h>
  #include <asm/mce.h>
 +#include <asm/i387.h>
 +#include <asm/xcr.h>
  
  #include "trace.h"
  
@@@ -66,9 -63,6 +66,9 @@@ module_param_named(unrestricted_guest
  static int __read_mostly emulate_invalid_guest_state = 0;
  module_param(emulate_invalid_guest_state, bool, S_IRUGO);
  
 +static int __read_mostly vmm_exclusive = 1;
 +module_param(vmm_exclusive, bool, S_IRUGO);
 +
  #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST                         \
        (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
  #define KVM_GUEST_CR0_MASK                                            \
@@@ -179,13 -173,10 +179,13 @@@ static inline struct vcpu_vmx *to_vmx(s
  
  static int init_rmode(struct kvm *kvm);
  static u64 construct_eptp(unsigned long root_hpa);
 +static void kvm_cpu_vmxon(u64 addr);
 +static void kvm_cpu_vmxoff(void);
  
  static DEFINE_PER_CPU(struct vmcs *, vmxarea);
  static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
  static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu);
 +static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
  
  static unsigned long *vmx_io_bitmap_a;
  static unsigned long *vmx_io_bitmap_b;
@@@ -240,14 -231,14 +240,14 @@@ static u64 host_efer
  static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
  
  /*
-  * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it
+  * Keep MSR_STAR at the end, as setup_msrs() will try to optimize it
   * away by decrementing the array size.
   */
  static const u32 vmx_msr_index[] = {
  #ifdef CONFIG_X86_64
        MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
  #endif
-       MSR_EFER, MSR_TSC_AUX, MSR_K6_STAR,
+       MSR_EFER, MSR_TSC_AUX, MSR_STAR,
  };
  #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
  
@@@ -343,11 -334,6 +343,11 @@@ static inline bool cpu_has_vmx_ept_1g_p
        return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT;
  }
  
 +static inline bool cpu_has_vmx_ept_4levels(void)
 +{
 +      return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
 +}
 +
  static inline bool cpu_has_vmx_invept_individual_addr(void)
  {
        return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT;
@@@ -363,16 -349,6 +363,16 @@@ static inline bool cpu_has_vmx_invept_g
        return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;
  }
  
 +static inline bool cpu_has_vmx_invvpid_single(void)
 +{
 +      return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT;
 +}
 +
 +static inline bool cpu_has_vmx_invvpid_global(void)
 +{
 +      return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
 +}
 +
  static inline bool cpu_has_vmx_ept(void)
  {
        return vmcs_config.cpu_based_2nd_exec_ctrl &
@@@ -413,12 -389,6 +413,12 @@@ static inline bool cpu_has_virtual_nmis
        return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
  }
  
 +static inline bool cpu_has_vmx_wbinvd_exit(void)
 +{
 +      return vmcs_config.cpu_based_2nd_exec_ctrl &
 +              SECONDARY_EXEC_WBINVD_EXITING;
 +}
 +
  static inline bool report_flexpriority(void)
  {
        return flexpriority_enabled;
@@@ -483,19 -453,6 +483,19 @@@ static void vmcs_clear(struct vmcs *vmc
                       vmcs, phys_addr);
  }
  
 +static void vmcs_load(struct vmcs *vmcs)
 +{
 +      u64 phys_addr = __pa(vmcs);
 +      u8 error;
 +
 +      asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
 +                      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
 +                      : "cc", "memory");
 +      if (error)
 +              printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
 +                     vmcs, phys_addr);
 +}
 +
  static void __vcpu_clear(void *arg)
  {
        struct vcpu_vmx *vmx = arg;
@@@ -518,27 -475,12 +518,27 @@@ static void vcpu_clear(struct vcpu_vmx 
        smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1);
  }
  
 -static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx)
 +static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx)
  {
        if (vmx->vpid == 0)
                return;
  
 -      __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
 +      if (cpu_has_vmx_invvpid_single())
 +              __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
 +}
 +
 +static inline void vpid_sync_vcpu_global(void)
 +{
 +      if (cpu_has_vmx_invvpid_global())
 +              __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0);
 +}
 +
 +static inline void vpid_sync_context(struct vcpu_vmx *vmx)
 +{
 +      if (cpu_has_vmx_invvpid_single())
 +              vpid_sync_vcpu_single(vmx);
 +      else
 +              vpid_sync_vcpu_global();
  }
  
  static inline void ept_sync_global(void)
@@@ -870,9 -812,6 +870,9 @@@ static void __vmx_load_host_state(struc
                wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
        }
  #endif
 +      if (current_thread_info()->status & TS_USEDFPU)
 +              clts();
 +      load_gdt(&__get_cpu_var(host_gdt));
  }
  
  static void vmx_load_host_state(struct vcpu_vmx *vmx)
  static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 -      u64 phys_addr = __pa(vmx->vmcs);
        u64 tsc_this, delta, new_offset;
 +      u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
  
 -      if (vcpu->cpu != cpu) {
 +      if (!vmm_exclusive)
 +              kvm_cpu_vmxon(phys_addr);
 +      else if (vcpu->cpu != cpu)
                vcpu_clear(vmx);
 -              kvm_migrate_timers(vcpu);
 -              set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
 -              local_irq_disable();
 -              list_add(&vmx->local_vcpus_link,
 -                       &per_cpu(vcpus_on_cpu, cpu));
 -              local_irq_enable();
 -      }
  
        if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
 -              u8 error;
 -
                per_cpu(current_vmcs, cpu) = vmx->vmcs;
 -              asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
 -                            : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
 -                            : "cc");
 -              if (error)
 -                      printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
 -                             vmx->vmcs, phys_addr);
 +              vmcs_load(vmx->vmcs);
        }
  
        if (vcpu->cpu != cpu) {
                struct desc_ptr dt;
                unsigned long sysenter_esp;
  
 +              kvm_migrate_timers(vcpu);
 +              kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 +              local_irq_disable();
 +              list_add(&vmx->local_vcpus_link,
 +                       &per_cpu(vcpus_on_cpu, cpu));
 +              local_irq_enable();
 +
                vcpu->cpu = cpu;
                /*
                 * Linux uses per-cpu TSS and GDT, so set these when switching
  static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
  {
        __vmx_load_host_state(to_vmx(vcpu));
 +      if (!vmm_exclusive) {
 +              __vcpu_clear(to_vmx(vcpu));
 +              kvm_cpu_vmxoff();
 +      }
  }
  
  static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
@@@ -1117,10 -1057,10 +1117,10 @@@ static void setup_msrs(struct vcpu_vmx 
                if (index >= 0 && vmx->rdtscp_enabled)
                        move_msr_up(vmx, index, save_nmsrs++);
                /*
-                * MSR_K6_STAR is only needed on long mode guests, and only
+                * MSR_STAR is only needed on long mode guests, and only
                 * if efer.sce is enabled.
                 */
-               index = __find_msr_index(vmx, MSR_K6_STAR);
+               index = __find_msr_index(vmx, MSR_STAR);
                if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE))
                        move_msr_up(vmx, index, save_nmsrs++);
        }
@@@ -1346,13 -1286,6 +1346,13 @@@ static __init int vmx_disabled_by_bios(
        /* locked but not enabled */
  }
  
 +static void kvm_cpu_vmxon(u64 addr)
 +{
 +      asm volatile (ASM_VMX_VMXON_RAX
 +                      : : "a"(&addr), "m"(addr)
 +                      : "memory", "cc");
 +}
 +
  static int hardware_enable(void *garbage)
  {
        int cpu = raw_smp_processor_id();
                wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
        }
        write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */
 -      asm volatile (ASM_VMX_VMXON_RAX
 -                    : : "a"(&phys_addr), "m"(phys_addr)
 -                    : "memory", "cc");
  
 -      ept_sync_global();
 +      if (vmm_exclusive) {
 +              kvm_cpu_vmxon(phys_addr);
 +              ept_sync_global();
 +      }
 +
 +      store_gdt(&__get_cpu_var(host_gdt));
  
        return 0;
  }
@@@ -1403,15 -1334,13 +1403,15 @@@ static void vmclear_local_vcpus(void
  static void kvm_cpu_vmxoff(void)
  {
        asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
 -      write_cr4(read_cr4() & ~X86_CR4_VMXE);
  }
  
  static void hardware_disable(void *garbage)
  {
 -      vmclear_local_vcpus();
 -      kvm_cpu_vmxoff();
 +      if (vmm_exclusive) {
 +              vmclear_local_vcpus();
 +              kvm_cpu_vmxoff();
 +      }
 +      write_cr4(read_cr4() & ~X86_CR4_VMXE);
  }
  
  static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
@@@ -1610,8 -1539,7 +1610,8 @@@ static __init int hardware_setup(void
        if (!cpu_has_vmx_vpid())
                enable_vpid = 0;
  
 -      if (!cpu_has_vmx_ept()) {
 +      if (!cpu_has_vmx_ept() ||
 +          !cpu_has_vmx_ept_4levels()) {
                enable_ept = 0;
                enable_unrestricted_guest = 0;
        }
@@@ -1700,7 -1628,7 +1700,7 @@@ static gva_t rmode_tss_base(struct kvm 
                gfn_t base_gfn;
  
                slots = kvm_memslots(kvm);
 -              base_gfn = kvm->memslots->memslots[0].base_gfn +
 +              base_gfn = slots->memslots[0].base_gfn +
                                 kvm->memslots->memslots[0].npages - 3;
                return base_gfn << PAGE_SHIFT;
        }
@@@ -1831,12 -1759,9 +1831,12 @@@ static void exit_lmode(struct kvm_vcpu 
  
  static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
  {
 -      vpid_sync_vcpu_all(to_vmx(vcpu));
 -      if (enable_ept)
 +      vpid_sync_context(to_vmx(vcpu));
 +      if (enable_ept) {
 +              if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
 +                      return;
                ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
 +      }
  }
  
  static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
@@@ -2582,7 -2507,7 +2582,7 @@@ static int vmx_vcpu_setup(struct vcpu_v
        vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf);
        vmcs_write32(CR3_TARGET_COUNT, 0);           /* 22.2.1 */
  
 -      vmcs_writel(HOST_CR0, read_cr0());  /* 22.2.3 */
 +      vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS);  /* 22.2.3 */
        vmcs_writel(HOST_CR4, read_cr4());  /* 22.2.3, 22.2.5 */
        vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */
  
  
  static int init_rmode(struct kvm *kvm)
  {
 +      int idx, ret = 0;
 +
 +      idx = srcu_read_lock(&kvm->srcu);
        if (!init_rmode_tss(kvm))
 -              return 0;
 +              goto exit;
        if (!init_rmode_identity_map(kvm))
 -              return 0;
 -      return 1;
 +              goto exit;
 +
 +      ret = 1;
 +exit:
 +      srcu_read_unlock(&kvm->srcu, idx);
 +      return ret;
  }
  
  static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
  {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        u64 msr;
 -      int ret, idx;
 +      int ret;
  
        vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
 -      idx = srcu_read_lock(&vcpu->kvm->srcu);
        if (!init_rmode(vmx->vcpu.kvm)) {
                ret = -ENOMEM;
                goto out;
                msr |= MSR_IA32_APICBASE_BSP;
        kvm_set_apic_base(&vmx->vcpu, msr);
  
 -      fx_init(&vmx->vcpu);
 +      ret = fx_init(&vmx->vcpu);
 +      if (ret != 0)
 +              goto out;
  
        seg_setup(VCPU_SREG_CS);
        /*
        vmx_fpu_activate(&vmx->vcpu);
        update_exception_bitmap(&vmx->vcpu);
  
 -      vpid_sync_vcpu_all(vmx);
 +      vpid_sync_context(vmx);
  
        ret = 0;
  
        vmx->emulation_required = 0;
  
  out:
 -      srcu_read_unlock(&vcpu->kvm->srcu, idx);
        return ret;
  }
  
@@@ -2908,7 -2826,9 +2908,7 @@@ static bool vmx_get_nmi_mask(struct kvm
  {
        if (!cpu_has_virtual_nmis())
                return to_vmx(vcpu)->soft_vnmi_blocked;
 -      else
 -              return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
 -                        GUEST_INTR_STATE_NMI);
 +      return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
  }
  
  static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
@@@ -3150,7 -3070,7 +3150,7 @@@ static int handle_io(struct kvm_vcpu *v
        ++vcpu->stat.io_exits;
  
        if (string || in)
 -              return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO);
 +              return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE;
  
        port = exit_qualification >> 16;
        size = (exit_qualification & 7) + 1;
@@@ -3170,20 -3090,11 +3170,20 @@@ vmx_patch_hypercall(struct kvm_vcpu *vc
        hypercall[2] = 0xc1;
  }
  
 +static void complete_insn_gp(struct kvm_vcpu *vcpu, int err)
 +{
 +      if (err)
 +              kvm_inject_gp(vcpu, 0);
 +      else
 +              skip_emulated_instruction(vcpu);
 +}
 +
  static int handle_cr(struct kvm_vcpu *vcpu)
  {
        unsigned long exit_qualification, val;
        int cr;
        int reg;
 +      int err;
  
        exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
        cr = exit_qualification & 15;
                trace_kvm_cr_write(cr, val);
                switch (cr) {
                case 0:
 -                      kvm_set_cr0(vcpu, val);
 -                      skip_emulated_instruction(vcpu);
 +                      err = kvm_set_cr0(vcpu, val);
 +                      complete_insn_gp(vcpu, err);
                        return 1;
                case 3:
 -                      kvm_set_cr3(vcpu, val);
 -                      skip_emulated_instruction(vcpu);
 +                      err = kvm_set_cr3(vcpu, val);
 +                      complete_insn_gp(vcpu, err);
                        return 1;
                case 4:
 -                      kvm_set_cr4(vcpu, val);
 -                      skip_emulated_instruction(vcpu);
 +                      err = kvm_set_cr4(vcpu, val);
 +                      complete_insn_gp(vcpu, err);
                        return 1;
                case 8: {
                                u8 cr8_prev = kvm_get_cr8(vcpu);
@@@ -3410,25 -3321,30 +3410,25 @@@ static int handle_invlpg(struct kvm_vcp
  static int handle_wbinvd(struct kvm_vcpu *vcpu)
  {
        skip_emulated_instruction(vcpu);
 -      /* TODO: Add support for VT-d/pass-through device */
 +      kvm_emulate_wbinvd(vcpu);
        return 1;
  }
  
 -static int handle_apic_access(struct kvm_vcpu *vcpu)
 +static int handle_xsetbv(struct kvm_vcpu *vcpu)
  {
 -      unsigned long exit_qualification;
 -      enum emulation_result er;
 -      unsigned long offset;
 +      u64 new_bv = kvm_read_edx_eax(vcpu);
 +      u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
  
 -      exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 -      offset = exit_qualification & 0xffful;
 -
 -      er = emulate_instruction(vcpu, 0, 0, 0);
 -
 -      if (er !=  EMULATE_DONE) {
 -              printk(KERN_ERR
 -                     "Fail to handle apic access vmexit! Offset is 0x%lx\n",
 -                     offset);
 -              return -ENOEXEC;
 -      }
 +      if (kvm_set_xcr(vcpu, index, new_bv) == 0)
 +              skip_emulated_instruction(vcpu);
        return 1;
  }
  
 +static int handle_apic_access(struct kvm_vcpu *vcpu)
 +{
 +      return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE;
 +}
 +
  static int handle_task_switch(struct kvm_vcpu *vcpu)
  {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
@@@ -3638,8 -3554,13 +3638,8 @@@ static int handle_invalid_guest_state(s
                        goto out;
                }
  
 -              if (err != EMULATE_DONE) {
 -                      vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 -                      vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
 -                      vcpu->run->internal.ndata = 0;
 -                      ret = 0;
 -                      goto out;
 -              }
 +              if (err != EMULATE_DONE)
 +                      return 0;
  
                if (signal_pending(current))
                        goto out;
@@@ -3702,7 -3623,6 +3702,7 @@@ static int (*kvm_vmx_exit_handlers[])(s
        [EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
        [EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
        [EXIT_REASON_WBINVD]                  = handle_wbinvd,
 +      [EXIT_REASON_XSETBV]                  = handle_xsetbv,
        [EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
        [EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
        [EXIT_REASON_EPT_VIOLATION]           = handle_ept_violation,
@@@ -3736,13 -3656,6 +3736,13 @@@ static int vmx_handle_exit(struct kvm_v
        if (enable_ept && is_paging(vcpu))
                vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
  
 +      if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
 +              vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
 +              vcpu->run->fail_entry.hardware_entry_failure_reason
 +                      = exit_reason;
 +              return 0;
 +      }
 +
        if (unlikely(vmx->fail)) {
                vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
                vcpu->run->fail_entry.hardware_entry_failure_reason
@@@ -3948,6 -3861,11 +3948,6 @@@ static void vmx_vcpu_run(struct kvm_vcp
        if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
                vmx_set_interrupt_shadow(vcpu, 0);
  
 -      /*
 -       * Loading guest fpu may have cleared host cr0.ts
 -       */
 -      vmcs_writel(HOST_CR0, read_cr0());
 -
        asm(
                /* Store host registers */
                "push %%"R"dx; push %%"R"bp;"
@@@ -4083,19 -4001,6 +4083,19 @@@ static void vmx_free_vcpu(struct kvm_vc
        kmem_cache_free(kvm_vcpu_cache, vmx);
  }
  
 +static inline void vmcs_init(struct vmcs *vmcs)
 +{
 +      u64 phys_addr = __pa(per_cpu(vmxarea, raw_smp_processor_id()));
 +
 +      if (!vmm_exclusive)
 +              kvm_cpu_vmxon(phys_addr);
 +
 +      vmcs_clear(vmcs);
 +
 +      if (!vmm_exclusive)
 +              kvm_cpu_vmxoff();
 +}
 +
  static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
  {
        int err;
        if (!vmx->vmcs)
                goto free_msrs;
  
 -      vmcs_clear(vmx->vmcs);
 +      vmcs_init(vmx->vmcs);
  
        cpu = get_cpu();
        vmx_vcpu_load(&vmx->vcpu, cpu);
@@@ -4360,8 -4265,6 +4360,8 @@@ static struct kvm_x86_ops vmx_x86_ops 
        .rdtscp_supported = vmx_rdtscp_supported,
  
        .set_supported_cpuid = vmx_set_supported_cpuid,
 +
 +      .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
  };
  
  static int __init vmx_init(void)
diff --combined arch/x86/kvm/x86.c
index 97aab036dabfa1260a812f460345e58869bdd571,33f0a84cfd93c3b73735b5a32705cae248108e81..25f19078b3210424b091fea80bd8c82b0a9ef254
@@@ -6,7 -6,6 +6,7 @@@
   * Copyright (C) 2006 Qumranet, Inc.
   * Copyright (C) 2008 Qumranet, Inc.
   * Copyright IBM Corporation, 2008
 + * Copyright 2010 Red Hat, Inc. and/or its affilates.
   *
   * Authors:
   *   Avi Kivity   <[email protected]>
  #include <linux/srcu.h>
  #include <linux/slab.h>
  #include <linux/perf_event.h>
 +#include <linux/uaccess.h>
  #include <trace/events/kvm.h>
  
  #define CREATE_TRACE_POINTS
  #include "trace.h"
  
  #include <asm/debugreg.h>
 -#include <asm/uaccess.h>
  #include <asm/msr.h>
  #include <asm/desc.h>
  #include <asm/mtrr.h>
  #include <asm/mce.h>
 +#include <asm/i387.h>
 +#include <asm/xcr.h>
  
  #define MAX_IO_MSRS 256
  #define CR0_RESERVED_BITS                                             \
@@@ -65,7 -62,6 +65,7 @@@
        (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
                          | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
                          | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR  \
 +                        | X86_CR4_OSXSAVE \
                          | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
  
  #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@@ -151,13 -147,6 +151,13 @@@ struct kvm_stats_debugfs_item debugfs_e
        { NULL }
  };
  
 +u64 __read_mostly host_xcr0;
 +
 +static inline u32 bit(int bitno)
 +{
 +      return 1 << (bitno & 31);
 +}
 +
  static void kvm_on_user_return(struct user_return_notifier *urn)
  {
        unsigned slot;
@@@ -296,7 -285,7 +296,7 @@@ static void kvm_multiple_exception(stru
        prev_nr = vcpu->arch.exception.nr;
        if (prev_nr == DF_VECTOR) {
                /* triple fault -> shutdown */
 -              set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
 +              kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
                return;
        }
        class1 = exception_class(prev_nr);
        return changed;
  }
  
 -void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 +int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
  {
 +      unsigned long old_cr0 = kvm_read_cr0(vcpu);
 +      unsigned long update_bits = X86_CR0_PG | X86_CR0_WP |
 +                                  X86_CR0_CD | X86_CR0_NW;
 +
        cr0 |= X86_CR0_ET;
  
  #ifdef CONFIG_X86_64
 -      if (cr0 & 0xffffffff00000000UL) {
 -              kvm_inject_gp(vcpu, 0);
 -              return;
 -      }
 +      if (cr0 & 0xffffffff00000000UL)
 +              return 1;
  #endif
  
        cr0 &= ~CR0_RESERVED_BITS;
  
 -      if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
 -              kvm_inject_gp(vcpu, 0);
 -              return;
 -      }
 +      if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
 +              return 1;
  
 -      if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
 -              kvm_inject_gp(vcpu, 0);
 -              return;
 -      }
 +      if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
 +              return 1;
  
        if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
  #ifdef CONFIG_X86_64
                if ((vcpu->arch.efer & EFER_LME)) {
                        int cs_db, cs_l;
  
 -                      if (!is_pae(vcpu)) {
 -                              kvm_inject_gp(vcpu, 0);
 -                              return;
 -                      }
 +                      if (!is_pae(vcpu))
 +                              return 1;
                        kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 -                      if (cs_l) {
 -                              kvm_inject_gp(vcpu, 0);
 -                              return;
 -
 -                      }
 +                      if (cs_l)
 +                              return 1;
                } else
  #endif
 -              if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
 -                      kvm_inject_gp(vcpu, 0);
 -                      return;
 -              }
 -
 +              if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3))
 +                      return 1;
        }
  
        kvm_x86_ops->set_cr0(vcpu, cr0);
  
 -      kvm_mmu_reset_context(vcpu);
 -      return;
 +      if ((cr0 ^ old_cr0) & update_bits)
 +              kvm_mmu_reset_context(vcpu);
 +      return 0;
  }
  EXPORT_SYMBOL_GPL(kvm_set_cr0);
  
  void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
  {
 -      kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
 +      (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
  }
  EXPORT_SYMBOL_GPL(kvm_lmsw);
  
 -void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 +int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
  {
 -      unsigned long old_cr4 = kvm_read_cr4(vcpu);
 -      unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
 +      u64 xcr0;
  
 -      if (cr4 & CR4_RESERVED_BITS) {
 +      /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now  */
 +      if (index != XCR_XFEATURE_ENABLED_MASK)
 +              return 1;
 +      xcr0 = xcr;
 +      if (kvm_x86_ops->get_cpl(vcpu) != 0)
 +              return 1;
 +      if (!(xcr0 & XSTATE_FP))
 +              return 1;
 +      if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
 +              return 1;
 +      if (xcr0 & ~host_xcr0)
 +              return 1;
 +      vcpu->arch.xcr0 = xcr0;
 +      vcpu->guest_xcr0_loaded = 0;
 +      return 0;
 +}
 +
 +int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 +{
 +      if (__kvm_set_xcr(vcpu, index, xcr)) {
                kvm_inject_gp(vcpu, 0);
 +              return 1;
 +      }
 +      return 0;
 +}
 +EXPORT_SYMBOL_GPL(kvm_set_xcr);
 +
 +static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
 +{
 +      struct kvm_cpuid_entry2 *best;
 +
 +      best = kvm_find_cpuid_entry(vcpu, 1, 0);
 +      return best && (best->ecx & bit(X86_FEATURE_XSAVE));
 +}
 +
 +static void update_cpuid(struct kvm_vcpu *vcpu)
 +{
 +      struct kvm_cpuid_entry2 *best;
 +
 +      best = kvm_find_cpuid_entry(vcpu, 1, 0);
 +      if (!best)
                return;
 +
 +      /* Update OSXSAVE bit */
 +      if (cpu_has_xsave && best->function == 0x1) {
 +              best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
 +              if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
 +                      best->ecx |= bit(X86_FEATURE_OSXSAVE);
        }
 +}
 +
 +int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 +{
 +      unsigned long old_cr4 = kvm_read_cr4(vcpu);
 +      unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
 +
 +      if (cr4 & CR4_RESERVED_BITS)
 +              return 1;
 +
 +      if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
 +              return 1;
  
        if (is_long_mode(vcpu)) {
 -              if (!(cr4 & X86_CR4_PAE)) {
 -                      kvm_inject_gp(vcpu, 0);
 -                      return;
 -              }
 +              if (!(cr4 & X86_CR4_PAE))
 +                      return 1;
        } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
                   && ((cr4 ^ old_cr4) & pdptr_bits)
 -                 && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
 -              kvm_inject_gp(vcpu, 0);
 -              return;
 -      }
 +                 && !load_pdptrs(vcpu, vcpu->arch.cr3))
 +              return 1;
 +
 +      if (cr4 & X86_CR4_VMXE)
 +              return 1;
  
 -      if (cr4 & X86_CR4_VMXE) {
 -              kvm_inject_gp(vcpu, 0);
 -              return;
 -      }
        kvm_x86_ops->set_cr4(vcpu, cr4);
 -      vcpu->arch.cr4 = cr4;
 -      kvm_mmu_reset_context(vcpu);
 +
 +      if ((cr4 ^ old_cr4) & pdptr_bits)
 +              kvm_mmu_reset_context(vcpu);
 +
 +      if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
 +              update_cpuid(vcpu);
 +
 +      return 0;
  }
  EXPORT_SYMBOL_GPL(kvm_set_cr4);
  
 -void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 +int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
  {
        if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
                kvm_mmu_sync_roots(vcpu);
                kvm_mmu_flush_tlb(vcpu);
 -              return;
 +              return 0;
        }
  
        if (is_long_mode(vcpu)) {
 -              if (cr3 & CR3_L_MODE_RESERVED_BITS) {
 -                      kvm_inject_gp(vcpu, 0);
 -                      return;
 -              }
 +              if (cr3 & CR3_L_MODE_RESERVED_BITS)
 +                      return 1;
        } else {
                if (is_pae(vcpu)) {
 -                      if (cr3 & CR3_PAE_RESERVED_BITS) {
 -                              kvm_inject_gp(vcpu, 0);
 -                              return;
 -                      }
 -                      if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
 -                              kvm_inject_gp(vcpu, 0);
 -                              return;
 -                      }
 +                      if (cr3 & CR3_PAE_RESERVED_BITS)
 +                              return 1;
 +                      if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3))
 +                              return 1;
                }
                /*
                 * We don't check reserved bits in nonpae mode, because
         * to debug) behavior on the guest side.
         */
        if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
 -              kvm_inject_gp(vcpu, 0);
 -      else {
 -              vcpu->arch.cr3 = cr3;
 -              vcpu->arch.mmu.new_cr3(vcpu);
 -      }
 +              return 1;
 +      vcpu->arch.cr3 = cr3;
 +      vcpu->arch.mmu.new_cr3(vcpu);
 +      return 0;
  }
  EXPORT_SYMBOL_GPL(kvm_set_cr3);
  
 -void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
 +int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
  {
 -      if (cr8 & CR8_RESERVED_BITS) {
 -              kvm_inject_gp(vcpu, 0);
 -              return;
 -      }
 +      if (cr8 & CR8_RESERVED_BITS)
 +              return 1;
        if (irqchip_in_kernel(vcpu->kvm))
                kvm_lapic_set_tpr(vcpu, cr8);
        else
                vcpu->arch.cr8 = cr8;
 +      return 0;
 +}
 +
 +void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
 +{
 +      if (__kvm_set_cr8(vcpu, cr8))
 +              kvm_inject_gp(vcpu, 0);
  }
  EXPORT_SYMBOL_GPL(kvm_set_cr8);
  
@@@ -633,7 -576,7 +633,7 @@@ unsigned long kvm_get_cr8(struct kvm_vc
  }
  EXPORT_SYMBOL_GPL(kvm_get_cr8);
  
 -int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 +static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
  {
        switch (dr) {
        case 0 ... 3:
                        vcpu->arch.eff_db[dr] = val;
                break;
        case 4:
 -              if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
 -                      kvm_queue_exception(vcpu, UD_VECTOR);
 -                      return 1;
 -              }
 +              if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
 +                      return 1; /* #UD */
                /* fall through */
        case 6:
 -              if (val & 0xffffffff00000000ULL) {
 -                      kvm_inject_gp(vcpu, 0);
 -                      return 1;
 -              }
 +              if (val & 0xffffffff00000000ULL)
 +                      return -1; /* #GP */
                vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
                break;
        case 5:
 -              if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
 -                      kvm_queue_exception(vcpu, UD_VECTOR);
 -                      return 1;
 -              }
 +              if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
 +                      return 1; /* #UD */
                /* fall through */
        default: /* 7 */
 -              if (val & 0xffffffff00000000ULL) {
 -                      kvm_inject_gp(vcpu, 0);
 -                      return 1;
 -              }
 +              if (val & 0xffffffff00000000ULL)
 +                      return -1; /* #GP */
                vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
                if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
                        kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7);
  
        return 0;
  }
 +
 +int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 +{
 +      int res;
 +
 +      res = __kvm_set_dr(vcpu, dr, val);
 +      if (res > 0)
 +              kvm_queue_exception(vcpu, UD_VECTOR);
 +      else if (res < 0)
 +              kvm_inject_gp(vcpu, 0);
 +
 +      return res;
 +}
  EXPORT_SYMBOL_GPL(kvm_set_dr);
  
 -int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
 +static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
  {
        switch (dr) {
        case 0 ... 3:
                *val = vcpu->arch.db[dr];
                break;
        case 4:
 -              if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
 -                      kvm_queue_exception(vcpu, UD_VECTOR);
 +              if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
                        return 1;
 -              }
                /* fall through */
        case 6:
                *val = vcpu->arch.dr6;
                break;
        case 5:
 -              if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
 -                      kvm_queue_exception(vcpu, UD_VECTOR);
 +              if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
                        return 1;
 -              }
                /* fall through */
        default: /* 7 */
                *val = vcpu->arch.dr7;
  
        return 0;
  }
 -EXPORT_SYMBOL_GPL(kvm_get_dr);
  
 -static inline u32 bit(int bitno)
 +int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
  {
 -      return 1 << (bitno & 31);
 +      if (_kvm_get_dr(vcpu, dr, val)) {
 +              kvm_queue_exception(vcpu, UD_VECTOR);
 +              return 1;
 +      }
 +      return 0;
  }
 +EXPORT_SYMBOL_GPL(kvm_get_dr);
  
  /*
   * List of msr numbers which we expose to userspace through KVM_GET_MSRS
@@@ -733,7 -671,7 +733,7 @@@ static u32 msrs_to_save[] = 
        HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
        HV_X64_MSR_APIC_ASSIST_PAGE,
        MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
-       MSR_K6_STAR,
+       MSR_STAR,
  #ifdef CONFIG_X86_64
        MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
  #endif
@@@ -744,14 -682,10 +744,14 @@@ static unsigned num_msrs_to_save
  
  static u32 emulated_msrs[] = {
        MSR_IA32_MISC_ENABLE,
 +      MSR_IA32_MCG_STATUS,
 +      MSR_IA32_MCG_CTL,
  };
  
  static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
  {
 +      u64 old_efer = vcpu->arch.efer;
 +
        if (efer & efer_reserved_bits)
                return 1;
  
  
        kvm_x86_ops->set_efer(vcpu, efer);
  
 -      vcpu->arch.efer = efer;
 -
        vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
        kvm_mmu_reset_context(vcpu);
  
 +      /* Update reserved bits */
 +      if ((efer ^ old_efer) & EFER_NX)
 +              kvm_mmu_reset_context(vcpu);
 +
        return 0;
  }
  
@@@ -950,7 -882,7 +950,7 @@@ static int kvm_request_guest_time_updat
  
        if (!vcpu->time_page)
                return 0;
 -      set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests);
 +      kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v);
        return 1;
  }
  
@@@ -1592,12 -1524,16 +1592,12 @@@ static int __msr_io(struct kvm_vcpu *vc
  {
        int i, idx;
  
 -      vcpu_load(vcpu);
 -
        idx = srcu_read_lock(&vcpu->kvm->srcu);
        for (i = 0; i < msrs->nmsrs; ++i)
                if (do_msr(vcpu, entries[i].index, &entries[i].data))
                        break;
        srcu_read_unlock(&vcpu->kvm->srcu, idx);
  
 -      vcpu_put(vcpu);
 -
        return i;
  }
  
@@@ -1682,7 -1618,6 +1682,7 @@@ int kvm_dev_ioctl_check_extension(long 
        case KVM_CAP_PCI_SEGMENT:
        case KVM_CAP_DEBUGREGS:
        case KVM_CAP_X86_ROBUST_SINGLESTEP:
 +      case KVM_CAP_XSAVE:
                r = 1;
                break;
        case KVM_CAP_COALESCED_MMIO:
        case KVM_CAP_MCE:
                r = KVM_MAX_MCE_BANKS;
                break;
 +      case KVM_CAP_XCRS:
 +              r = cpu_has_xsave;
 +              break;
        default:
                r = 0;
                break;
        return r;
  }
  
 +static void wbinvd_ipi(void *garbage)
 +{
 +      wbinvd();
 +}
 +
 +static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
 +{
 +      return vcpu->kvm->arch.iommu_domain &&
 +              !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
 +}
 +
  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  {
 +      /* Address WBINVD may be executed by guest */
 +      if (need_emulate_wbinvd(vcpu)) {
 +              if (kvm_x86_ops->has_wbinvd_exit())
 +                      cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
 +              else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
 +                      smp_call_function_single(vcpu->cpu,
 +                                      wbinvd_ipi, NULL, 1);
 +      }
 +
        kvm_x86_ops->vcpu_load(vcpu, cpu);
        if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
                unsigned long khz = cpufreq_quick_get(cpu);
  
  void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
  {
 -      kvm_put_guest_fpu(vcpu);
        kvm_x86_ops->vcpu_put(vcpu);
 +      kvm_put_guest_fpu(vcpu);
  }
  
  static int is_efer_nx(void)
@@@ -1869,6 -1781,7 +1869,6 @@@ static int kvm_vcpu_ioctl_set_cpuid(str
        if (copy_from_user(cpuid_entries, entries,
                           cpuid->nent * sizeof(struct kvm_cpuid_entry)))
                goto out_free;
 -      vcpu_load(vcpu);
        for (i = 0; i < cpuid->nent; i++) {
                vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
                vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
        r = 0;
        kvm_apic_set_version(vcpu);
        kvm_x86_ops->cpuid_update(vcpu);
 -      vcpu_put(vcpu);
 +      update_cpuid(vcpu);
  
  out_free:
        vfree(cpuid_entries);
@@@ -1907,10 -1820,11 +1907,10 @@@ static int kvm_vcpu_ioctl_set_cpuid2(st
        if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
                           cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
                goto out;
 -      vcpu_load(vcpu);
        vcpu->arch.cpuid_nent = cpuid->nent;
        kvm_apic_set_version(vcpu);
        kvm_x86_ops->cpuid_update(vcpu);
 -      vcpu_put(vcpu);
 +      update_cpuid(vcpu);
        return 0;
  
  out:
@@@ -1923,6 -1837,7 +1923,6 @@@ static int kvm_vcpu_ioctl_get_cpuid2(st
  {
        int r;
  
 -      vcpu_load(vcpu);
        r = -E2BIG;
        if (cpuid->nent < vcpu->arch.cpuid_nent)
                goto out;
  
  out:
        cpuid->nent = vcpu->arch.cpuid_nent;
 -      vcpu_put(vcpu);
        return r;
  }
  
@@@ -1985,13 -1901,13 +1985,13 @@@ static void do_cpuid_ent(struct kvm_cpu
                0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
        /* cpuid 1.ecx */
        const u32 kvm_supported_word4_x86_features =
 -              F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ |
 +              F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
                0 /* DS-CPL, VMX, SMX, EST */ |
                0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
                0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
                0 /* Reserved, DCA */ | F(XMM4_1) |
                F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
 -              0 /* Reserved, XSAVE, OSXSAVE */;
 +              0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX);
        /* cpuid 0x80000001.ecx */
        const u32 kvm_supported_word6_x86_features =
                F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
  
        switch (function) {
        case 0:
 -              entry->eax = min(entry->eax, (u32)0xb);
 +              entry->eax = min(entry->eax, (u32)0xd);
                break;
        case 1:
                entry->edx &= kvm_supported_word0_x86_features;
                }
                break;
        }
 +      case 0xd: {
 +              int i;
 +
 +              entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 +              for (i = 1; *nent < maxnent; ++i) {
 +                      if (entry[i - 1].eax == 0 && i != 2)
 +                              break;
 +                      do_cpuid_1_ent(&entry[i], function, i);
 +                      entry[i].flags |=
 +                             KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 +                      ++*nent;
 +              }
 +              break;
 +      }
        case KVM_CPUID_SIGNATURE: {
                char signature[12] = "KVMKVMKVM\0\0";
                u32 *sigptr = (u32 *)signature;
@@@ -2179,7 -2081,9 +2179,7 @@@ out
  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
                                    struct kvm_lapic_state *s)
  {
 -      vcpu_load(vcpu);
        memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
 -      vcpu_put(vcpu);
  
        return 0;
  }
  static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
                                    struct kvm_lapic_state *s)
  {
 -      vcpu_load(vcpu);
        memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
        kvm_apic_post_state_restore(vcpu);
        update_cr8_intercept(vcpu);
 -      vcpu_put(vcpu);
  
        return 0;
  }
@@@ -2201,15 -2107,20 +2201,15 @@@ static int kvm_vcpu_ioctl_interrupt(str
                return -EINVAL;
        if (irqchip_in_kernel(vcpu->kvm))
                return -ENXIO;
 -      vcpu_load(vcpu);
  
        kvm_queue_interrupt(vcpu, irq->irq, false);
  
 -      vcpu_put(vcpu);
 -
        return 0;
  }
  
  static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
  {
 -      vcpu_load(vcpu);
        kvm_inject_nmi(vcpu);
 -      vcpu_put(vcpu);
  
        return 0;
  }
@@@ -2229,6 -2140,7 +2229,6 @@@ static int kvm_vcpu_ioctl_x86_setup_mce
        int r;
        unsigned bank_num = mcg_cap & 0xff, bank;
  
 -      vcpu_load(vcpu);
        r = -EINVAL;
        if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
                goto out;
        for (bank = 0; bank < bank_num; bank++)
                vcpu->arch.mce_banks[bank*4] = ~(u64)0;
  out:
 -      vcpu_put(vcpu);
        return r;
  }
  
@@@ -2275,7 -2188,7 +2275,7 @@@ static int kvm_vcpu_ioctl_x86_set_mce(s
                        printk(KERN_DEBUG "kvm: set_mce: "
                               "injects mce exception while "
                               "previous one is in progress!\n");
 -                      set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
 +                      kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
                        return 0;
                }
                if (banks[1] & MCI_STATUS_VAL)
  static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
                                               struct kvm_vcpu_events *events)
  {
 -      vcpu_load(vcpu);
 -
        events->exception.injected =
                vcpu->arch.exception.pending &&
                !kvm_exception_is_soft(vcpu->arch.exception.nr);
        events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
                         | KVM_VCPUEVENT_VALID_SIPI_VECTOR
                         | KVM_VCPUEVENT_VALID_SHADOW);
 -
 -      vcpu_put(vcpu);
  }
  
  static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
                              | KVM_VCPUEVENT_VALID_SHADOW))
                return -EINVAL;
  
 -      vcpu_load(vcpu);
 -
        vcpu->arch.exception.pending = events->exception.injected;
        vcpu->arch.exception.nr = events->exception.nr;
        vcpu->arch.exception.has_error_code = events->exception.has_error_code;
        if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
                vcpu->arch.sipi_vector = events->sipi_vector;
  
 -      vcpu_put(vcpu);
 -
        return 0;
  }
  
  static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
                                             struct kvm_debugregs *dbgregs)
  {
 -      vcpu_load(vcpu);
 -
        memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
        dbgregs->dr6 = vcpu->arch.dr6;
        dbgregs->dr7 = vcpu->arch.dr7;
        dbgregs->flags = 0;
 -
 -      vcpu_put(vcpu);
  }
  
  static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
        if (dbgregs->flags)
                return -EINVAL;
  
 -      vcpu_load(vcpu);
 -
        memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
        vcpu->arch.dr6 = dbgregs->dr6;
        vcpu->arch.dr7 = dbgregs->dr7;
  
 -      vcpu_put(vcpu);
 +      return 0;
 +}
 +
 +static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 +                                       struct kvm_xsave *guest_xsave)
 +{
 +      if (cpu_has_xsave)
 +              memcpy(guest_xsave->region,
 +                      &vcpu->arch.guest_fpu.state->xsave,
 +                      sizeof(struct xsave_struct));
 +      else {
 +              memcpy(guest_xsave->region,
 +                      &vcpu->arch.guest_fpu.state->fxsave,
 +                      sizeof(struct i387_fxsave_struct));
 +              *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
 +                      XSTATE_FPSSE;
 +      }
 +}
 +
 +static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
 +                                      struct kvm_xsave *guest_xsave)
 +{
 +      u64 xstate_bv =
 +              *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
  
 +      if (cpu_has_xsave)
 +              memcpy(&vcpu->arch.guest_fpu.state->xsave,
 +                      guest_xsave->region, sizeof(struct xsave_struct));
 +      else {
 +              if (xstate_bv & ~XSTATE_FPSSE)
 +                      return -EINVAL;
 +              memcpy(&vcpu->arch.guest_fpu.state->fxsave,
 +                      guest_xsave->region, sizeof(struct i387_fxsave_struct));
 +      }
        return 0;
  }
  
 +static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
 +                                      struct kvm_xcrs *guest_xcrs)
 +{
 +      if (!cpu_has_xsave) {
 +              guest_xcrs->nr_xcrs = 0;
 +              return;
 +      }
 +
 +      guest_xcrs->nr_xcrs = 1;
 +      guest_xcrs->flags = 0;
 +      guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
 +      guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
 +}
 +
 +static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
 +                                     struct kvm_xcrs *guest_xcrs)
 +{
 +      int i, r = 0;
 +
 +      if (!cpu_has_xsave)
 +              return -EINVAL;
 +
 +      if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
 +              return -EINVAL;
 +
 +      for (i = 0; i < guest_xcrs->nr_xcrs; i++)
 +              /* Only support XCR0 currently */
 +              if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) {
 +                      r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
 +                              guest_xcrs->xcrs[0].value);
 +                      break;
 +              }
 +      if (r)
 +              r = -EINVAL;
 +      return r;
 +}
 +
  long kvm_arch_vcpu_ioctl(struct file *filp,
                         unsigned int ioctl, unsigned long arg)
  {
        struct kvm_vcpu *vcpu = filp->private_data;
        void __user *argp = (void __user *)arg;
        int r;
 -      struct kvm_lapic_state *lapic = NULL;
 +      union {
 +              struct kvm_lapic_state *lapic;
 +              struct kvm_xsave *xsave;
 +              struct kvm_xcrs *xcrs;
 +              void *buffer;
 +      } u;
  
 +      u.buffer = NULL;
        switch (ioctl) {
        case KVM_GET_LAPIC: {
                r = -EINVAL;
                if (!vcpu->arch.apic)
                        goto out;
 -              lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
 +              u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
  
                r = -ENOMEM;
 -              if (!lapic)
 +              if (!u.lapic)
                        goto out;
 -              r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic);
 +              r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
                if (r)
                        goto out;
                r = -EFAULT;
 -              if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state)))
 +              if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
                        goto out;
                r = 0;
                break;
                r = -EINVAL;
                if (!vcpu->arch.apic)
                        goto out;
 -              lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
 +              u.lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
                r = -ENOMEM;
 -              if (!lapic)
 +              if (!u.lapic)
                        goto out;
                r = -EFAULT;
 -              if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state)))
 +              if (copy_from_user(u.lapic, argp, sizeof(struct kvm_lapic_state)))
                        goto out;
 -              r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic);
 +              r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
                if (r)
                        goto out;
                r = 0;
                r = -EFAULT;
                if (copy_from_user(&mce, argp, sizeof mce))
                        goto out;
 -              vcpu_load(vcpu);
                r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
 -              vcpu_put(vcpu);
                break;
        }
        case KVM_GET_VCPU_EVENTS: {
                r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
                break;
        }
 +      case KVM_GET_XSAVE: {
 +              u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
 +              r = -ENOMEM;
 +              if (!u.xsave)
 +                      break;
 +
 +              kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
 +
 +              r = -EFAULT;
 +              if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
 +                      break;
 +              r = 0;
 +              break;
 +      }
 +      case KVM_SET_XSAVE: {
 +              u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
 +              r = -ENOMEM;
 +              if (!u.xsave)
 +                      break;
 +
 +              r = -EFAULT;
 +              if (copy_from_user(u.xsave, argp, sizeof(struct kvm_xsave)))
 +                      break;
 +
 +              r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
 +              break;
 +      }
 +      case KVM_GET_XCRS: {
 +              u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
 +              r = -ENOMEM;
 +              if (!u.xcrs)
 +                      break;
 +
 +              kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
 +
 +              r = -EFAULT;
 +              if (copy_to_user(argp, u.xcrs,
 +                               sizeof(struct kvm_xcrs)))
 +                      break;
 +              r = 0;
 +              break;
 +      }
 +      case KVM_SET_XCRS: {
 +              u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
 +              r = -ENOMEM;
 +              if (!u.xcrs)
 +                      break;
 +
 +              r = -EFAULT;
 +              if (copy_from_user(u.xcrs, argp,
 +                                 sizeof(struct kvm_xcrs)))
 +                      break;
 +
 +              r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
 +              break;
 +      }
        default:
                r = -EINVAL;
        }
  out:
 -      kfree(lapic);
 +      kfree(u.buffer);
        return r;
  }
  
@@@ -2762,6 -2560,115 +2762,6 @@@ static int kvm_vm_ioctl_get_nr_mmu_page
        return kvm->arch.n_alloc_mmu_pages;
  }
  
 -gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn)
 -{
 -      int i;
 -      struct kvm_mem_alias *alias;
 -      struct kvm_mem_aliases *aliases;
 -
 -      aliases = kvm_aliases(kvm);
 -
 -      for (i = 0; i < aliases->naliases; ++i) {
 -              alias = &aliases->aliases[i];
 -              if (alias->flags & KVM_ALIAS_INVALID)
 -                      continue;
 -              if (gfn >= alias->base_gfn
 -                  && gfn < alias->base_gfn + alias->npages)
 -                      return alias->target_gfn + gfn - alias->base_gfn;
 -      }
 -      return gfn;
 -}
 -
 -gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 -{
 -      int i;
 -      struct kvm_mem_alias *alias;
 -      struct kvm_mem_aliases *aliases;
 -
 -      aliases = kvm_aliases(kvm);
 -
 -      for (i = 0; i < aliases->naliases; ++i) {
 -              alias = &aliases->aliases[i];
 -              if (gfn >= alias->base_gfn
 -                  && gfn < alias->base_gfn + alias->npages)
 -                      return alias->target_gfn + gfn - alias->base_gfn;
 -      }
 -      return gfn;
 -}
 -
 -/*
 - * Set a new alias region.  Aliases map a portion of physical memory into
 - * another portion.  This is useful for memory windows, for example the PC
 - * VGA region.
 - */
 -static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 -                                       struct kvm_memory_alias *alias)
 -{
 -      int r, n;
 -      struct kvm_mem_alias *p;
 -      struct kvm_mem_aliases *aliases, *old_aliases;
 -
 -      r = -EINVAL;
 -      /* General sanity checks */
 -      if (alias->memory_size & (PAGE_SIZE - 1))
 -              goto out;
 -      if (alias->guest_phys_addr & (PAGE_SIZE - 1))
 -              goto out;
 -      if (alias->slot >= KVM_ALIAS_SLOTS)
 -              goto out;
 -      if (alias->guest_phys_addr + alias->memory_size
 -          < alias->guest_phys_addr)
 -              goto out;
 -      if (alias->target_phys_addr + alias->memory_size
 -          < alias->target_phys_addr)
 -              goto out;
 -
 -      r = -ENOMEM;
 -      aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
 -      if (!aliases)
 -              goto out;
 -
 -      mutex_lock(&kvm->slots_lock);
 -
 -      /* invalidate any gfn reference in case of deletion/shrinking */
 -      memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
 -      aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID;
 -      old_aliases = kvm->arch.aliases;
 -      rcu_assign_pointer(kvm->arch.aliases, aliases);
 -      synchronize_srcu_expedited(&kvm->srcu);
 -      kvm_mmu_zap_all(kvm);
 -      kfree(old_aliases);
 -
 -      r = -ENOMEM;
 -      aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
 -      if (!aliases)
 -              goto out_unlock;
 -
 -      memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
 -
 -      p = &aliases->aliases[alias->slot];
 -      p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
 -      p->npages = alias->memory_size >> PAGE_SHIFT;
 -      p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
 -      p->flags &= ~(KVM_ALIAS_INVALID);
 -
 -      for (n = KVM_ALIAS_SLOTS; n > 0; --n)
 -              if (aliases->aliases[n - 1].npages)
 -                      break;
 -      aliases->naliases = n;
 -
 -      old_aliases = kvm->arch.aliases;
 -      rcu_assign_pointer(kvm->arch.aliases, aliases);
 -      synchronize_srcu_expedited(&kvm->srcu);
 -      kfree(old_aliases);
 -      r = 0;
 -
 -out_unlock:
 -      mutex_unlock(&kvm->slots_lock);
 -out:
 -      return r;
 -}
 -
  static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
  {
        int r;
@@@ -2890,6 -2797,7 +2890,6 @@@ int kvm_vm_ioctl_get_dirty_log(struct k
        struct kvm_memory_slot *memslot;
        unsigned long n;
        unsigned long is_dirty = 0;
 -      unsigned long *dirty_bitmap = NULL;
  
        mutex_lock(&kvm->slots_lock);
  
  
        n = kvm_dirty_bitmap_bytes(memslot);
  
 -      r = -ENOMEM;
 -      dirty_bitmap = vmalloc(n);
 -      if (!dirty_bitmap)
 -              goto out;
 -      memset(dirty_bitmap, 0, n);
 -
        for (i = 0; !is_dirty && i < n/sizeof(long); i++)
                is_dirty = memslot->dirty_bitmap[i];
  
        /* If nothing is dirty, don't bother messing with page tables. */
        if (is_dirty) {
                struct kvm_memslots *slots, *old_slots;
 +              unsigned long *dirty_bitmap;
  
                spin_lock(&kvm->mmu_lock);
                kvm_mmu_slot_remove_write_access(kvm, log->slot);
                spin_unlock(&kvm->mmu_lock);
  
 -              slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
 -              if (!slots)
 -                      goto out_free;
 +              r = -ENOMEM;
 +              dirty_bitmap = vmalloc(n);
 +              if (!dirty_bitmap)
 +                      goto out;
 +              memset(dirty_bitmap, 0, n);
  
 +              r = -ENOMEM;
 +              slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
 +              if (!slots) {
 +                      vfree(dirty_bitmap);
 +                      goto out;
 +              }
                memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
                slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
  
                synchronize_srcu_expedited(&kvm->srcu);
                dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
                kfree(old_slots);
 +
 +              r = -EFAULT;
 +              if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) {
 +                      vfree(dirty_bitmap);
 +                      goto out;
 +              }
 +              vfree(dirty_bitmap);
 +      } else {
 +              r = -EFAULT;
 +              if (clear_user(log->dirty_bitmap, n))
 +                      goto out;
        }
  
        r = 0;
 -      if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
 -              r = -EFAULT;
 -out_free:
 -      vfree(dirty_bitmap);
  out:
        mutex_unlock(&kvm->slots_lock);
        return r;
@@@ -2969,6 -2867,7 +2969,6 @@@ long kvm_arch_vm_ioctl(struct file *fil
        union {
                struct kvm_pit_state ps;
                struct kvm_pit_state2 ps2;
 -              struct kvm_memory_alias alias;
                struct kvm_pit_config pit_config;
        } u;
  
                        goto out;
                break;
        }
 -      case KVM_SET_MEMORY_REGION: {
 -              struct kvm_memory_region kvm_mem;
 -              struct kvm_userspace_memory_region kvm_userspace_mem;
 -
 -              r = -EFAULT;
 -              if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
 -                      goto out;
 -              kvm_userspace_mem.slot = kvm_mem.slot;
 -              kvm_userspace_mem.flags = kvm_mem.flags;
 -              kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
 -              kvm_userspace_mem.memory_size = kvm_mem.memory_size;
 -              r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
 -              if (r)
 -                      goto out;
 -              break;
 -      }
        case KVM_SET_NR_MMU_PAGES:
                r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
                if (r)
        case KVM_GET_NR_MMU_PAGES:
                r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
                break;
 -      case KVM_SET_MEMORY_ALIAS:
 -              r = -EFAULT;
 -              if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias)))
 -                      goto out;
 -              r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias);
 -              if (r)
 -                      goto out;
 -              break;
        case KVM_CREATE_IRQCHIP: {
                struct kvm_pic *vpic;
  
@@@ -3336,7 -3259,7 +3336,7 @@@ static int kvm_read_guest_virt_helper(g
                }
                ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
                if (ret < 0) {
 -                      r = X86EMUL_UNHANDLEABLE;
 +                      r = X86EMUL_IO_NEEDED;
                        goto out;
                }
  
@@@ -3392,7 -3315,7 +3392,7 @@@ static int kvm_write_guest_virt_system(
                }
                ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
                if (ret < 0) {
 -                      r = X86EMUL_UNHANDLEABLE;
 +                      r = X86EMUL_IO_NEEDED;
                        goto out;
                }
  
  static int emulator_read_emulated(unsigned long addr,
                                  void *val,
                                  unsigned int bytes,
 +                                unsigned int *error_code,
                                  struct kvm_vcpu *vcpu)
  {
        gpa_t                 gpa;
 -      u32 error_code;
  
        if (vcpu->mmio_read_completed) {
                memcpy(val, vcpu->mmio_data, bytes);
                return X86EMUL_CONTINUE;
        }
  
 -      gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code);
 +      gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, error_code);
  
 -      if (gpa == UNMAPPED_GVA) {
 -              kvm_inject_page_fault(vcpu, addr, error_code);
 +      if (gpa == UNMAPPED_GVA)
                return X86EMUL_PROPAGATE_FAULT;
 -      }
  
        /* For APIC access vmexit */
        if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@@ -3445,12 -3370,11 +3445,12 @@@ mmio
        trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
  
        vcpu->mmio_needed = 1;
 -      vcpu->mmio_phys_addr = gpa;
 -      vcpu->mmio_size = bytes;
 -      vcpu->mmio_is_write = 0;
 +      vcpu->run->exit_reason = KVM_EXIT_MMIO;
 +      vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
 +      vcpu->run->mmio.len = vcpu->mmio_size = bytes;
 +      vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0;
  
 -      return X86EMUL_UNHANDLEABLE;
 +      return X86EMUL_IO_NEEDED;
  }
  
  int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
  static int emulator_write_emulated_onepage(unsigned long addr,
                                           const void *val,
                                           unsigned int bytes,
 +                                         unsigned int *error_code,
                                           struct kvm_vcpu *vcpu)
  {
        gpa_t                 gpa;
 -      u32 error_code;
  
 -      gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code);
 +      gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error_code);
  
 -      if (gpa == UNMAPPED_GVA) {
 -              kvm_inject_page_fault(vcpu, addr, error_code);
 +      if (gpa == UNMAPPED_GVA)
                return X86EMUL_PROPAGATE_FAULT;
 -      }
  
        /* For APIC access vmexit */
        if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@@ -3494,11 -3420,10 +3494,11 @@@ mmio
                return X86EMUL_CONTINUE;
  
        vcpu->mmio_needed = 1;
 -      vcpu->mmio_phys_addr = gpa;
 -      vcpu->mmio_size = bytes;
 -      vcpu->mmio_is_write = 1;
 -      memcpy(vcpu->mmio_data, val, bytes);
 +      vcpu->run->exit_reason = KVM_EXIT_MMIO;
 +      vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
 +      vcpu->run->mmio.len = vcpu->mmio_size = bytes;
 +      vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
 +      memcpy(vcpu->run->mmio.data, val, bytes);
  
        return X86EMUL_CONTINUE;
  }
  int emulator_write_emulated(unsigned long addr,
                            const void *val,
                            unsigned int bytes,
 +                          unsigned int *error_code,
                            struct kvm_vcpu *vcpu)
  {
        /* Crossing a page boundary? */
                int rc, now;
  
                now = -addr & ~PAGE_MASK;
 -              rc = emulator_write_emulated_onepage(addr, val, now, vcpu);
 +              rc = emulator_write_emulated_onepage(addr, val, now, error_code,
 +                                                   vcpu);
                if (rc != X86EMUL_CONTINUE)
                        return rc;
                addr += now;
                val += now;
                bytes -= now;
        }
 -      return emulator_write_emulated_onepage(addr, val, bytes, vcpu);
 +      return emulator_write_emulated_onepage(addr, val, bytes, error_code,
 +                                             vcpu);
  }
 -EXPORT_SYMBOL_GPL(emulator_write_emulated);
  
  #define CMPXCHG_TYPE(t, ptr, old, new) \
        (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
@@@ -3540,7 -3463,6 +3540,7 @@@ static int emulator_cmpxchg_emulated(un
                                     const void *old,
                                     const void *new,
                                     unsigned int bytes,
 +                                   unsigned int *error_code,
                                     struct kvm_vcpu *vcpu)
  {
        gpa_t gpa;
                goto emul_write;
  
        page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
 +      if (is_error_page(page)) {
 +              kvm_release_page_clean(page);
 +              goto emul_write;
 +      }
  
        kaddr = kmap_atomic(page, KM_USER0);
        kaddr += offset_in_page(gpa);
  emul_write:
        printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
  
 -      return emulator_write_emulated(addr, new, bytes, vcpu);
 +      return emulator_write_emulated(addr, new, bytes, error_code, vcpu);
  }
  
  static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
@@@ -3686,38 -3604,42 +3686,38 @@@ int emulate_invlpg(struct kvm_vcpu *vcp
        return X86EMUL_CONTINUE;
  }
  
 -int emulate_clts(struct kvm_vcpu *vcpu)
 +int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
  {
 -      kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
 -      kvm_x86_ops->fpu_activate(vcpu);
 +      if (!need_emulate_wbinvd(vcpu))
 +              return X86EMUL_CONTINUE;
 +
 +      if (kvm_x86_ops->has_wbinvd_exit()) {
 +              smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
 +                              wbinvd_ipi, NULL, 1);
 +              cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
 +      }
 +      wbinvd();
        return X86EMUL_CONTINUE;
  }
 +EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
  
 -int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
 +int emulate_clts(struct kvm_vcpu *vcpu)
  {
 -      return kvm_get_dr(ctxt->vcpu, dr, dest);
 +      kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
 +      kvm_x86_ops->fpu_activate(vcpu);
 +      return X86EMUL_CONTINUE;
  }
  
 -int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
 +int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu)
  {
 -      unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
 -
 -      return kvm_set_dr(ctxt->vcpu, dr, value & mask);
 +      return _kvm_get_dr(vcpu, dr, dest);
  }
  
 -void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
 +int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu)
  {
 -      u8 opcodes[4];
 -      unsigned long rip = kvm_rip_read(vcpu);
 -      unsigned long rip_linear;
 -
 -      if (!printk_ratelimit())
 -              return;
  
 -      rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
 -
 -      kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL);
 -
 -      printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
 -             context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
 +      return __kvm_set_dr(vcpu, dr, value);
  }
 -EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
  
  static u64 mk_cr_64(u64 curr_cr, u32 new_val)
  {
@@@ -3752,32 -3674,27 +3752,32 @@@ static unsigned long emulator_get_cr(in
        return value;
  }
  
 -static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
 +static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
  {
 +      int res = 0;
 +
        switch (cr) {
        case 0:
 -              kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
 +              res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
                break;
        case 2:
                vcpu->arch.cr2 = val;
                break;
        case 3:
 -              kvm_set_cr3(vcpu, val);
 +              res = kvm_set_cr3(vcpu, val);
                break;
        case 4:
 -              kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
 +              res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
                break;
        case 8:
 -              kvm_set_cr8(vcpu, val & 0xfUL);
 +              res = __kvm_set_cr8(vcpu, val & 0xfUL);
                break;
        default:
                vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
 +              res = -1;
        }
 +
 +      return res;
  }
  
  static int emulator_get_cpl(struct kvm_vcpu *vcpu)
@@@ -3790,12 -3707,6 +3790,12 @@@ static void emulator_get_gdt(struct des
        kvm_x86_ops->get_gdt(vcpu, dt);
  }
  
 +static unsigned long emulator_get_cached_segment_base(int seg,
 +                                                    struct kvm_vcpu *vcpu)
 +{
 +      return get_segment_base(vcpu, seg);
 +}
 +
  static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg,
                                           struct kvm_vcpu *vcpu)
  {
@@@ -3868,6 -3779,11 +3868,6 @@@ static void emulator_set_segment_select
        kvm_set_segment(vcpu, &kvm_seg, seg);
  }
  
 -static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 -{
 -      kvm_x86_ops->set_rflags(vcpu, rflags);
 -}
 -
  static struct x86_emulate_ops emulate_ops = {
        .read_std            = kvm_read_guest_virt_system,
        .write_std           = kvm_write_guest_virt_system,
        .set_cached_descriptor = emulator_set_cached_descriptor,
        .get_segment_selector = emulator_get_segment_selector,
        .set_segment_selector = emulator_set_segment_selector,
 +      .get_cached_segment_base = emulator_get_cached_segment_base,
        .get_gdt             = emulator_get_gdt,
        .get_cr              = emulator_get_cr,
        .set_cr              = emulator_set_cr,
        .cpl                 = emulator_get_cpl,
 -      .set_rflags          = emulator_set_rflags,
 +      .get_dr              = emulator_get_dr,
 +      .set_dr              = emulator_set_dr,
 +      .set_msr             = kvm_set_msr,
 +      .get_msr             = kvm_get_msr,
  };
  
  static void cache_all_regs(struct kvm_vcpu *vcpu)
        vcpu->arch.regs_dirty = ~0;
  }
  
 +static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
 +{
 +      u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
 +      /*
 +       * an sti; sti; sequence only disable interrupts for the first
 +       * instruction. So, if the last instruction, be it emulated or
 +       * not, left the system with the INT_STI flag enabled, it
 +       * means that the last instruction is an sti. We should not
 +       * leave the flag on in this case. The same goes for mov ss
 +       */
 +      if (!(int_shadow & mask))
 +              kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
 +}
 +
 +static void inject_emulated_exception(struct kvm_vcpu *vcpu)
 +{
 +      struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
 +      if (ctxt->exception == PF_VECTOR)
 +              kvm_inject_page_fault(vcpu, ctxt->cr2, ctxt->error_code);
 +      else if (ctxt->error_code_valid)
 +              kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code);
 +      else
 +              kvm_queue_exception(vcpu, ctxt->exception);
 +}
 +
 +static int handle_emulation_failure(struct kvm_vcpu *vcpu)
 +{
 +      ++vcpu->stat.insn_emulation_fail;
 +      trace_kvm_emulate_insn_failed(vcpu);
 +      vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 +      vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
 +      vcpu->run->internal.ndata = 0;
 +      kvm_queue_exception(vcpu, UD_VECTOR);
 +      return EMULATE_FAIL;
 +}
 +
 +static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
 +{
 +      gpa_t gpa;
 +
 +      if (tdp_enabled)
 +              return false;
 +
 +      /*
 +       * if emulation was due to access to shadowed page table
 +       * and it failed try to unshadow page and re-entetr the
 +       * guest to let CPU execute the instruction.
 +       */
 +      if (kvm_mmu_unprotect_page_virt(vcpu, gva))
 +              return true;
 +
 +      gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
 +
 +      if (gpa == UNMAPPED_GVA)
 +              return true; /* let cpu generate fault */
 +
 +      if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT)))
 +              return true;
 +
 +      return false;
 +}
 +
  int emulate_instruction(struct kvm_vcpu *vcpu,
                        unsigned long cr2,
                        u16 error_code,
                        int emulation_type)
  {
 -      int r, shadow_mask;
 -      struct decode_cache *c;
 -      struct kvm_run *run = vcpu->run;
 +      int r;
 +      struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
  
        kvm_clear_exception_queue(vcpu);
        vcpu->arch.mmio_fault_cr2 = cr2;
         */
        cache_all_regs(vcpu);
  
 -      vcpu->mmio_is_write = 0;
 -
        if (!(emulation_type & EMULTYPE_NO_DECODE)) {
                int cs_db, cs_l;
                kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
                        ? X86EMUL_MODE_VM86 : cs_l
                        ? X86EMUL_MODE_PROT64 : cs_db
                        ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
 +              memset(c, 0, sizeof(struct decode_cache));
 +              memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
 +              vcpu->arch.emulate_ctxt.interruptibility = 0;
 +              vcpu->arch.emulate_ctxt.exception = -1;
  
                r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
                trace_kvm_emulate_insn_start(vcpu);
  
                /* Only allow emulation of specific instructions on #UD
                 * (namely VMMCALL, sysenter, sysexit, syscall)*/
 -              c = &vcpu->arch.emulate_ctxt.decode;
                if (emulation_type & EMULTYPE_TRAP_UD) {
                        if (!c->twobyte)
                                return EMULATE_FAIL;
  
                ++vcpu->stat.insn_emulation;
                if (r)  {
 -                      ++vcpu->stat.insn_emulation_fail;
 -                      trace_kvm_emulate_insn_failed(vcpu);
 -                      if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
 +                      if (reexecute_instruction(vcpu, cr2))
                                return EMULATE_DONE;
 -                      return EMULATE_FAIL;
 +                      if (emulation_type & EMULTYPE_SKIP)
 +                              return EMULATE_FAIL;
 +                      return handle_emulation_failure(vcpu);
                }
        }
  
                return EMULATE_DONE;
        }
  
 +      /* this is needed for vmware backdor interface to work since it
 +         changes registers values  during IO operation */
 +      memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
 +
  restart:
        r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
 -      shadow_mask = vcpu->arch.emulate_ctxt.interruptibility;
  
 -      if (r == 0)
 -              kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask);
 +      if (r) { /* emulation failed */
 +              if (reexecute_instruction(vcpu, cr2))
 +                      return EMULATE_DONE;
  
 -      if (vcpu->arch.pio.count) {
 -              if (!vcpu->arch.pio.in)
 -                      vcpu->arch.pio.count = 0;
 -              return EMULATE_DO_MMIO;
 +              return handle_emulation_failure(vcpu);
        }
  
 -      if (r || vcpu->mmio_is_write) {
 -              run->exit_reason = KVM_EXIT_MMIO;
 -              run->mmio.phys_addr = vcpu->mmio_phys_addr;
 -              memcpy(run->mmio.data, vcpu->mmio_data, 8);
 -              run->mmio.len = vcpu->mmio_size;
 -              run->mmio.is_write = vcpu->mmio_is_write;
 +      toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility);
 +      kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
 +      memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
 +      kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
 +
 +      if (vcpu->arch.emulate_ctxt.exception >= 0) {
 +              inject_emulated_exception(vcpu);
 +              return EMULATE_DONE;
        }
  
 -      if (r) {
 -              if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
 -                      goto done;
 -              if (!vcpu->mmio_needed) {
 -                      ++vcpu->stat.insn_emulation_fail;
 -                      trace_kvm_emulate_insn_failed(vcpu);
 -                      kvm_report_emulation_failure(vcpu, "mmio");
 -                      return EMULATE_FAIL;
 -              }
 +      if (vcpu->arch.pio.count) {
 +              if (!vcpu->arch.pio.in)
 +                      vcpu->arch.pio.count = 0;
                return EMULATE_DO_MMIO;
        }
  
 -      if (vcpu->mmio_is_write) {
 -              vcpu->mmio_needed = 0;
 +      if (vcpu->mmio_needed) {
 +              if (vcpu->mmio_is_write)
 +                      vcpu->mmio_needed = 0;
                return EMULATE_DO_MMIO;
        }
  
 -done:
 -      if (vcpu->arch.exception.pending)
 -              vcpu->arch.emulate_ctxt.restart = false;
 -
        if (vcpu->arch.emulate_ctxt.restart)
                goto restart;
  
@@@ -4252,9 -4108,6 +4252,9 @@@ int kvm_arch_init(void *opaque
  
        perf_register_guest_info_callbacks(&kvm_guest_cbs);
  
 +      if (cpu_has_xsave)
 +              host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
 +
        return 0;
  
  out:
@@@ -4417,7 -4270,7 +4417,7 @@@ int kvm_fix_hypercall(struct kvm_vcpu *
  
        kvm_x86_ops->patch_hypercall(vcpu, instruction);
  
 -      return emulator_write_emulated(rip, instruction, 3, vcpu);
 +      return emulator_write_emulated(rip, instruction, 3, NULL, vcpu);
  }
  
  void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
@@@ -4653,78 -4506,59 +4653,78 @@@ static void inject_pending_event(struc
        }
  }
  
 +static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
 +{
 +      if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
 +                      !vcpu->guest_xcr0_loaded) {
 +              /* kvm_set_xcr() also depends on this */
 +              xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
 +              vcpu->guest_xcr0_loaded = 1;
 +      }
 +}
 +
 +static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
 +{
 +      if (vcpu->guest_xcr0_loaded) {
 +              if (vcpu->arch.xcr0 != host_xcr0)
 +                      xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
 +              vcpu->guest_xcr0_loaded = 0;
 +      }
 +}
 +
  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
  {
        int r;
        bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
                vcpu->run->request_interrupt_window;
  
 -      if (vcpu->requests)
 -              if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
 -                      kvm_mmu_unload(vcpu);
 -
 -      r = kvm_mmu_reload(vcpu);
 -      if (unlikely(r))
 -              goto out;
 -
        if (vcpu->requests) {
 -              if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
 +              if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
 +                      kvm_mmu_unload(vcpu);
 +              if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
                        __kvm_migrate_timers(vcpu);
 -              if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
 +              if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu))
                        kvm_write_guest_time(vcpu);
 -              if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
 +              if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
                        kvm_mmu_sync_roots(vcpu);
 -              if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
 +              if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
                        kvm_x86_ops->tlb_flush(vcpu);
 -              if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
 -                                     &vcpu->requests)) {
 +              if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
                        vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
                        r = 0;
                        goto out;
                }
 -              if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
 +              if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
                        vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
                        r = 0;
                        goto out;
                }
 -              if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) {
 +              if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
                        vcpu->fpu_active = 0;
                        kvm_x86_ops->fpu_deactivate(vcpu);
                }
        }
  
 +      r = kvm_mmu_reload(vcpu);
 +      if (unlikely(r))
 +              goto out;
 +
        preempt_disable();
  
        kvm_x86_ops->prepare_guest_switch(vcpu);
        if (vcpu->fpu_active)
                kvm_load_guest_fpu(vcpu);
 +      kvm_load_guest_xcr0(vcpu);
  
 -      local_irq_disable();
 +      atomic_set(&vcpu->guest_mode, 1);
 +      smp_wmb();
  
 -      clear_bit(KVM_REQ_KICK, &vcpu->requests);
 -      smp_mb__after_clear_bit();
 +      local_irq_disable();
  
 -      if (vcpu->requests || need_resched() || signal_pending(current)) {
 -              set_bit(KVM_REQ_KICK, &vcpu->requests);
 +      if (!atomic_read(&vcpu->guest_mode) || vcpu->requests
 +          || need_resched() || signal_pending(current)) {
 +              atomic_set(&vcpu->guest_mode, 0);
 +              smp_wmb();
                local_irq_enable();
                preempt_enable();
                r = 1;
        if (hw_breakpoint_active())
                hw_breakpoint_restore();
  
 -      set_bit(KVM_REQ_KICK, &vcpu->requests);
 +      atomic_set(&vcpu->guest_mode, 0);
 +      smp_wmb();
        local_irq_enable();
  
        ++vcpu->stat.exits;
@@@ -4832,7 -4665,7 +4832,7 @@@ static int __vcpu_run(struct kvm_vcpu *
                        srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
                        kvm_vcpu_block(vcpu);
                        vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 -                      if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
 +                      if (kvm_check_request(KVM_REQ_UNHALT, vcpu))
                        {
                                switch(vcpu->arch.mp_state) {
                                case KVM_MP_STATE_HALTED:
@@@ -4884,6 -4717,8 +4884,6 @@@ int kvm_arch_vcpu_ioctl_run(struct kvm_
        int r;
        sigset_t sigsaved;
  
 -      vcpu_load(vcpu);
 -
        if (vcpu->sigset_active)
                sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
  
                vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
                r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE);
                srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 -              if (r == EMULATE_DO_MMIO) {
 +              if (r != EMULATE_DONE) {
                        r = 0;
                        goto out;
                }
        if (vcpu->sigset_active)
                sigprocmask(SIG_SETMASK, &sigsaved, NULL);
  
 -      vcpu_put(vcpu);
        return r;
  }
  
  int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
  {
 -      vcpu_load(vcpu);
 -
        regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
        regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
        regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
        regs->rip = kvm_rip_read(vcpu);
        regs->rflags = kvm_get_rflags(vcpu);
  
 -      vcpu_put(vcpu);
 -
        return 0;
  }
  
  int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
  {
 -      vcpu_load(vcpu);
 -
        kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
        kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
        kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
  
        vcpu->arch.exception.pending = false;
  
 -      vcpu_put(vcpu);
 -
        return 0;
  }
  
@@@ -4998,6 -4842,8 +4998,6 @@@ int kvm_arch_vcpu_ioctl_get_sregs(struc
  {
        struct desc_ptr dt;
  
 -      vcpu_load(vcpu);
 -
        kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
        kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
        kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
                set_bit(vcpu->arch.interrupt.nr,
                        (unsigned long *)sregs->interrupt_bitmap);
  
 -      vcpu_put(vcpu);
 -
        return 0;
  }
  
  int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
                                    struct kvm_mp_state *mp_state)
  {
 -      vcpu_load(vcpu);
        mp_state->mp_state = vcpu->arch.mp_state;
 -      vcpu_put(vcpu);
        return 0;
  }
  
  int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
                                    struct kvm_mp_state *mp_state)
  {
 -      vcpu_load(vcpu);
        vcpu->arch.mp_state = mp_state->mp_state;
 -      vcpu_put(vcpu);
        return 0;
  }
  
  int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
                    bool has_error_code, u32 error_code)
  {
 +      struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
        int cs_db, cs_l, ret;
        cache_all_regs(vcpu);
  
                ? X86EMUL_MODE_VM86 : cs_l
                ? X86EMUL_MODE_PROT64 : cs_db
                ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
 +      memset(c, 0, sizeof(struct decode_cache));
 +      memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
  
        ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops,
                                   tss_selector, reason, has_error_code,
        if (ret)
                return EMULATE_FAIL;
  
 +      memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
 +      kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
        kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
        return EMULATE_DONE;
  }
@@@ -5088,6 -4935,8 +5088,6 @@@ int kvm_arch_vcpu_ioctl_set_sregs(struc
        int pending_vec, max_bits;
        struct desc_ptr dt;
  
 -      vcpu_load(vcpu);
 -
        dt.size = sregs->idt.limit;
        dt.address = sregs->idt.base;
        kvm_x86_ops->set_idt(vcpu, &dt);
            !is_protmode(vcpu))
                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
  
 -      vcpu_put(vcpu);
 -
        return 0;
  }
  
@@@ -5156,10 -5007,12 +5156,10 @@@ int kvm_arch_vcpu_ioctl_set_guest_debug
        unsigned long rflags;
        int i, r;
  
 -      vcpu_load(vcpu);
 -
        if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
                r = -EBUSY;
                if (vcpu->arch.exception.pending)
 -                      goto unlock_out;
 +                      goto out;
                if (dbg->control & KVM_GUESTDBG_INJECT_DB)
                        kvm_queue_exception(vcpu, DB_VECTOR);
                else
  
        r = 0;
  
 -unlock_out:
 -      vcpu_put(vcpu);
 +out:
  
        return r;
  }
  
 -/*
 - * fxsave fpu state.  Taken from x86_64/processor.h.  To be killed when
 - * we have asm/x86/processor.h
 - */
 -struct fxsave {
 -      u16     cwd;
 -      u16     swd;
 -      u16     twd;
 -      u16     fop;
 -      u64     rip;
 -      u64     rdp;
 -      u32     mxcsr;
 -      u32     mxcsr_mask;
 -      u32     st_space[32];   /* 8*16 bytes for each FP-reg = 128 bytes */
 -#ifdef CONFIG_X86_64
 -      u32     xmm_space[64];  /* 16*16 bytes for each XMM-reg = 256 bytes */
 -#else
 -      u32     xmm_space[32];  /* 8*16 bytes for each XMM-reg = 128 bytes */
 -#endif
 -};
 -
  /*
   * Translate a guest virtual address to a guest physical address.
   */
@@@ -5216,6 -5091,7 +5216,6 @@@ int kvm_arch_vcpu_ioctl_translate(struc
        gpa_t gpa;
        int idx;
  
 -      vcpu_load(vcpu);
        idx = srcu_read_lock(&vcpu->kvm->srcu);
        gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
        srcu_read_unlock(&vcpu->kvm->srcu, idx);
        tr->valid = gpa != UNMAPPED_GVA;
        tr->writeable = 1;
        tr->usermode = 0;
 -      vcpu_put(vcpu);
  
        return 0;
  }
  
  int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
  {
 -      struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
 -
 -      vcpu_load(vcpu);
 +      struct i387_fxsave_struct *fxsave =
 +                      &vcpu->arch.guest_fpu.state->fxsave;
  
        memcpy(fpu->fpr, fxsave->st_space, 128);
        fpu->fcw = fxsave->cwd;
        fpu->last_dp = fxsave->rdp;
        memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
  
 -      vcpu_put(vcpu);
 -
        return 0;
  }
  
  int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
  {
 -      struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
 -
 -      vcpu_load(vcpu);
 +      struct i387_fxsave_struct *fxsave =
 +                      &vcpu->arch.guest_fpu.state->fxsave;
  
        memcpy(fxsave->st_space, fpu->fpr, 128);
        fxsave->cwd = fpu->fcw;
        fxsave->rdp = fpu->last_dp;
        memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
  
 -      vcpu_put(vcpu);
 -
        return 0;
  }
  
 -void fx_init(struct kvm_vcpu *vcpu)
 +int fx_init(struct kvm_vcpu *vcpu)
  {
 -      unsigned after_mxcsr_mask;
 +      int err;
 +
 +      err = fpu_alloc(&vcpu->arch.guest_fpu);
 +      if (err)
 +              return err;
 +
 +      fpu_finit(&vcpu->arch.guest_fpu);
  
        /*
 -       * Touch the fpu the first time in non atomic context as if
 -       * this is the first fpu instruction the exception handler
 -       * will fire before the instruction returns and it'll have to
 -       * allocate ram with GFP_KERNEL.
 +       * Ensure guest xcr0 is valid for loading
         */
 -      if (!used_math())
 -              kvm_fx_save(&vcpu->arch.host_fx_image);
 -
 -      /* Initialize guest FPU by resetting ours and saving into guest's */
 -      preempt_disable();
 -      kvm_fx_save(&vcpu->arch.host_fx_image);
 -      kvm_fx_finit();
 -      kvm_fx_save(&vcpu->arch.guest_fx_image);
 -      kvm_fx_restore(&vcpu->arch.host_fx_image);
 -      preempt_enable();
 +      vcpu->arch.xcr0 = XSTATE_FP;
  
        vcpu->arch.cr0 |= X86_CR0_ET;
 -      after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
 -      vcpu->arch.guest_fx_image.mxcsr = 0x1f80;
 -      memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask,
 -             0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
 +
 +      return 0;
  }
  EXPORT_SYMBOL_GPL(fx_init);
  
 +static void fx_free(struct kvm_vcpu *vcpu)
 +{
 +      fpu_free(&vcpu->arch.guest_fpu);
 +}
 +
  void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
  {
        if (vcpu->guest_fpu_loaded)
                return;
  
 +      /*
 +       * Restore all possible states in the guest,
 +       * and assume host would use all available bits.
 +       * Guest xcr0 would be loaded later.
 +       */
 +      kvm_put_guest_xcr0(vcpu);
        vcpu->guest_fpu_loaded = 1;
 -      kvm_fx_save(&vcpu->arch.host_fx_image);
 -      kvm_fx_restore(&vcpu->arch.guest_fx_image);
 +      unlazy_fpu(current);
 +      fpu_restore_checking(&vcpu->arch.guest_fpu);
        trace_kvm_fpu(1);
  }
  
  void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
  {
 +      kvm_put_guest_xcr0(vcpu);
 +
        if (!vcpu->guest_fpu_loaded)
                return;
  
        vcpu->guest_fpu_loaded = 0;
 -      kvm_fx_save(&vcpu->arch.guest_fx_image);
 -      kvm_fx_restore(&vcpu->arch.host_fx_image);
 +      fpu_save_init(&vcpu->arch.guest_fpu);
        ++vcpu->stat.fpu_reload;
 -      set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
 +      kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
        trace_kvm_fpu(0);
  }
  
@@@ -5325,8 -5204,6 +5325,8 @@@ void kvm_arch_vcpu_free(struct kvm_vcp
                vcpu->arch.time_page = NULL;
        }
  
 +      free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
 +      fx_free(vcpu);
        kvm_x86_ops->vcpu_free(vcpu);
  }
  
@@@ -5340,6 -5217,9 +5340,6 @@@ int kvm_arch_vcpu_setup(struct kvm_vcp
  {
        int r;
  
 -      /* We do fxsave: this must be aligned. */
 -      BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
 -
        vcpu->arch.mtrr_state.have_fixed = 1;
        vcpu_load(vcpu);
        r = kvm_arch_vcpu_reset(vcpu);
@@@ -5361,7 -5241,6 +5361,7 @@@ void kvm_arch_vcpu_destroy(struct kvm_v
        kvm_mmu_unload(vcpu);
        vcpu_put(vcpu);
  
 +      fx_free(vcpu);
        kvm_x86_ops->vcpu_free(vcpu);
  }
  
@@@ -5455,12 -5334,7 +5455,12 @@@ int kvm_arch_vcpu_init(struct kvm_vcpu 
        }
        vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
  
 +      if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
 +              goto fail_free_mce_banks;
 +
        return 0;
 +fail_free_mce_banks:
 +      kfree(vcpu->arch.mce_banks);
  fail_free_lapic:
        kvm_free_lapic(vcpu);
  fail_mmu_destroy:
@@@ -5490,6 -5364,12 +5490,6 @@@ struct  kvm *kvm_arch_create_vm(void
        if (!kvm)
                return ERR_PTR(-ENOMEM);
  
 -      kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
 -      if (!kvm->arch.aliases) {
 -              kfree(kvm);
 -              return ERR_PTR(-ENOMEM);
 -      }
 -
        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
        INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
  
@@@ -5532,12 -5412,12 +5532,12 @@@ static void kvm_free_vcpus(struct kvm *
  void kvm_arch_sync_events(struct kvm *kvm)
  {
        kvm_free_all_assigned_devices(kvm);
 +      kvm_free_pit(kvm);
  }
  
  void kvm_arch_destroy_vm(struct kvm *kvm)
  {
        kvm_iommu_unmap_guest(kvm);
 -      kvm_free_pit(kvm);
        kfree(kvm->arch.vpic);
        kfree(kvm->arch.vioapic);
        kvm_free_vcpus(kvm);
        if (kvm->arch.ept_identity_pagetable)
                put_page(kvm->arch.ept_identity_pagetable);
        cleanup_srcu_struct(&kvm->srcu);
 -      kfree(kvm->arch.aliases);
        kfree(kvm);
  }
  
@@@ -5557,11 -5438,6 +5557,11 @@@ int kvm_arch_prepare_memory_region(stru
                                int user_alloc)
  {
        int npages = memslot->npages;
 +      int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
 +
 +      /* Prevent internal slot pages from being moved by fork()/COW. */
 +      if (memslot->id >= KVM_MEMORY_SLOTS)
 +              map_flags = MAP_SHARED | MAP_ANONYMOUS;
  
        /*To keep backward compatibility with older userspace,
         *x86 needs to hanlde !user_alloc case.
                        userspace_addr = do_mmap(NULL, 0,
                                                 npages * PAGE_SIZE,
                                                 PROT_READ | PROT_WRITE,
 -                                               MAP_PRIVATE | MAP_ANONYMOUS,
 +                                               map_flags,
                                                 0);
                        up_write(&current->mm->mmap_sem);
  
@@@ -5647,7 -5523,7 +5647,7 @@@ void kvm_vcpu_kick(struct kvm_vcpu *vcp
  
        me = get_cpu();
        if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
 -              if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
 +              if (atomic_xchg(&vcpu->guest_mode, 0))
                        smp_send_reschedule(cpu);
        put_cpu();
  }
This page took 0.196702 seconds and 4 git commands to generate.