Merge branch 'kvm-5.16-fixes' into kvm-master

author Paolo Bonzini <[email protected]>

Tue, 16 Nov 2021 13:08:19 +0000 (08:08 -0500)

committer Paolo Bonzini <[email protected]>

Thu, 18 Nov 2021 07:11:57 +0000 (02:11 -0500)
author Paolo Bonzini <[email protected]>
Tue, 16 Nov 2021 13:08:19 +0000 (08:08 -0500)
committer Paolo Bonzini <[email protected]>
Thu, 18 Nov 2021 07:11:57 +0000 (02:11 -0500)
diff --combined arch/x86/include/asm/kvm_host.h

index e5d8700319cc08e99b4058569f8015fb345ad7de,e977634333d4ab17566f3b486181443eaea996fa..6ac61f85e07b9971c40158d0f7d88cde0e3ba55c
--- 1/arch/x86/include/asm/kvm_host.h
--- 2/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@@ -363,6 -363,7 +363,7 @@@ union kvm_mmu_extended_role 
                 unsigned int cr4_smap:1;
                 unsigned int cr4_smep:1;
                 unsigned int cr4_la57:1;
+               unsigned int efer_lma:1;
         };
   };
   
@@@ -688,18 -689,18 +689,18 @@@ struct kvm_vcpu_arch 
          *
          * Note that while the PKRU state lives inside the fpu registers,
          * it is switched out separately at VMENTER and VMEXIT time. The
- -       * "guest_fpu" state here contains the guest FPU context, with the
+ +       * "guest_fpstate" state here contains the guest FPU context, with the
          * host PRKU bits.
          */
- -      struct fpu *user_fpu;
- -      struct fpu *guest_fpu;
+ +      struct fpu_guest guest_fpu;
   
         u64 xcr0;
         u64 guest_supported_xcr0;
   
         struct kvm_pio_request pio;
         void *pio_data;
- -      void *guest_ins_data;
+ +      void *sev_pio_data;
+ +      unsigned sev_pio_count;
   
         u8 event_exit_inst_len;
   
@@@ -1477,7 -1478,6 +1478,7 @@@ struct kvm_x86_ops 
         int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
         int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
         int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
+ +      int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
   
         int (*get_msr_feature)(struct kvm_msr_entry *entry);
   
@@@ -1700,6 -1700,8 +1701,6 @@@ void kvm_vcpu_deliver_sipi_vector(struc
   int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
                     int reason, bool has_error_code, u32 error_code);
   
- -void kvm_free_guest_fpu(struct kvm_vcpu *vcpu);
- -
   void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0);
   void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4);
   int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
diff --combined arch/x86/kvm/mmu/mmu.c

index 33794379949e01b8e0ed35947acefde61d888816,0571b1c7bf6f0b3c14163575321c568d9a575cb1..3be9beea838d134a077a67e05f12a1c9aaa7c2ef
--- 1/arch/x86/kvm/mmu/mmu.c
--- 2/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@@ -4599,10 -4599,10 +4599,10 @@@ static void update_pkru_bitmask(struct 
         unsigned bit;
         bool wp;
   
- -      if (!is_cr4_pke(mmu)) {
- -              mmu->pkru_mask = 0;
+ +      mmu->pkru_mask = 0;
+ +
+ +      if (!is_cr4_pke(mmu))
                 return;
- -      }
   
         wp = is_cr0_wp(mmu);
   
@@@ -4682,6 -4682,7 +4682,7 @@@ static union kvm_mmu_extended_role kvm_
                 /* PKEY and LA57 are active iff long mode is active. */
                 ext.cr4_pke = ____is_efer_lma(regs) && ____is_cr4_pke(regs);
                 ext.cr4_la57 = ____is_efer_lma(regs) && ____is_cr4_la57(regs);
+               ext.efer_lma = ____is_efer_lma(regs);
         }
   
         ext.valid = 1;
diff --combined arch/x86/kvm/svm/sev.c

index 902c52a8dd0c9155a693568d9fe2a62338f9c9fd,87874c5865316108924afabc7e9d48a68bd2a16f..21ac0a5de4e0c8ba019fe943c0cc11e9533edfcc
--- 1/arch/x86/kvm/svm/sev.c
--- 2/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@@ -17,10 -17,10 +17,10 @@@
   #include <linux/misc_cgroup.h>
   #include <linux/processor.h>
   #include <linux/trace_events.h>
- -#include <asm/fpu/internal.h>
   
   #include <asm/pkru.h>
   #include <asm/trapnr.h>
+ +#include <asm/fpu/xcr.h>
   
   #include "x86.h"
   #include "svm.h"
@@@ -120,26 -120,16 +120,26 @@@ static bool __sev_recycle_asids(int min
         return true;
   }
   
+ +static int sev_misc_cg_try_charge(struct kvm_sev_info *sev)
+ +{
+ +      enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
+ +      return misc_cg_try_charge(type, sev->misc_cg, 1);
+ +}
+ +
+ +static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)
+ +{
+ +      enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
+ +      misc_cg_uncharge(type, sev->misc_cg, 1);
+ +}
+ +
   static int sev_asid_new(struct kvm_sev_info *sev)
   {
         int asid, min_asid, max_asid, ret;
         bool retry = true;
- -      enum misc_res_type type;
   
- -      type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
         WARN_ON(sev->misc_cg);
         sev->misc_cg = get_current_misc_cg();
- -      ret = misc_cg_try_charge(type, sev->misc_cg, 1);
+ +      ret = sev_misc_cg_try_charge(sev);
         if (ret) {
                 put_misc_cg(sev->misc_cg);
                 sev->misc_cg = NULL;
@@@ -172,7 -162,7 +172,7 @@@ again
   
         return asid;
   e_uncharge:
- -      misc_cg_uncharge(type, sev->misc_cg, 1);
+ +      sev_misc_cg_uncharge(sev);
         put_misc_cg(sev->misc_cg);
         sev->misc_cg = NULL;
         return ret;
@@@ -189,6 -179,7 +189,6 @@@ static void sev_asid_free(struct kvm_se
   {
         struct svm_cpu_data *sd;
         int cpu;
- -      enum misc_res_type type;
   
         mutex_lock(&sev_bitmap_lock);
   
@@@ -201,7 -192,8 +201,7 @@@
   
         mutex_unlock(&sev_bitmap_lock);
   
- -      type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
- -      misc_cg_uncharge(type, sev->misc_cg, 1);
+ +      sev_misc_cg_uncharge(sev);
         put_misc_cg(sev->misc_cg);
         sev->misc_cg = NULL;
   }
@@@ -237,7 -229,6 +237,6 @@@ static void sev_unbind_asid(struct kvm 
   static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
   {
         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
-       bool es_active = argp->id == KVM_SEV_ES_INIT;
         int asid, ret;
   
         if (kvm->created_vcpus)
@@@ -247,7 -238,8 +246,8 @@@
         if (unlikely(sev->active))
                 return ret;
   
-       sev->es_active = es_active;
+       sev->active = true;
+       sev->es_active = argp->id == KVM_SEV_ES_INIT;
         asid = sev_asid_new(sev);
         if (asid < 0)
                 goto e_no_asid;
@@@ -257,8 -249,6 +257,6 @@@
         if (ret)
                 goto e_free;
   
-       sev->active = true;
-       sev->asid = asid;
         INIT_LIST_HEAD(&sev->regions_list);
   
         return 0;
@@@ -268,6 -258,7 +266,7 @@@ e_free
         sev->asid = 0;
   e_no_asid:
         sev->es_active = false;
+       sev->active = false;
         return ret;
   }
   
@@@ -598,7 -589,7 +597,7 @@@ static int sev_es_sync_vmsa(struct vcpu
          * traditional VMSA as it has been built so far (in prep
          * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
          */
- -      memcpy(svm->vmsa, save, sizeof(*save));
+ +      memcpy(svm->sev_es.vmsa, save, sizeof(*save));
   
         return 0;
   }
@@@ -620,18 -611,13 +619,18 @@@ static int __sev_launch_update_vmsa(str
          * the VMSA memory content (i.e it will write the same memory region
          * with the guest's key), so invalidate it first.
          */
- -      clflush_cache_range(svm->vmsa, PAGE_SIZE);
+ +      clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE);
   
         vmsa.reserved = 0;
         vmsa.handle = to_kvm_svm(kvm)->sev_info.handle;
- -      vmsa.address = __sme_pa(svm->vmsa);
+ +      vmsa.address = __sme_pa(svm->sev_es.vmsa);
         vmsa.len = PAGE_SIZE;
- -      return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
+ +      ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
+ +      if (ret)
+ +        return ret;
+ +
+ +      vcpu->arch.guest_state_protected = true;
+ +      return 0;
   }
   
   static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
@@@ -1492,13 -1478,6 +1491,13 @@@ static int sev_receive_update_data(stru
                 goto e_free_trans;
         }
   
+ +      /*
+ +       * Flush (on non-coherent CPUs) before RECEIVE_UPDATE_DATA, the PSP
+ +       * encrypts the written data with the guest's key, and the cache may
+ +       * contain dirty, unencrypted data.
+ +       */
+ +      sev_clflush_pages(guest_page, n);
+ +
         /* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */
         data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
         data.guest_address |= sev_me_mask;
@@@ -1530,7 -1509,7 +1529,7 @@@ static int sev_receive_finish(struct kv
         return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
   }
   
- static bool cmd_allowed_from_miror(u32 cmd_id)
+ static bool is_cmd_allowed_from_mirror(u32 cmd_id)
   {
         /*
          * Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES
@@@ -1544,201 -1523,6 +1543,201 @@@
         return false;
   }
   
+ +static int sev_lock_for_migration(struct kvm *kvm)
+ +{
+ +      struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ +
+ +      /*
+ +       * Bail if this VM is already involved in a migration to avoid deadlock
+ +       * between two VMs trying to migrate to/from each other.
+ +       */
+ +      if (atomic_cmpxchg_acquire(&sev->migration_in_progress, 0, 1))
+ +              return -EBUSY;
+ +
+ +      mutex_lock(&kvm->lock);
+ +
+ +      return 0;
+ +}
+ +
+ +static void sev_unlock_after_migration(struct kvm *kvm)
+ +{
+ +      struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ +
+ +      mutex_unlock(&kvm->lock);
+ +      atomic_set_release(&sev->migration_in_progress, 0);
+ +}
+ +
+ +
+ +static int sev_lock_vcpus_for_migration(struct kvm *kvm)
+ +{
+ +      struct kvm_vcpu *vcpu;
+ +      int i, j;
+ +
+ +      kvm_for_each_vcpu(i, vcpu, kvm) {
+ +              if (mutex_lock_killable(&vcpu->mutex))
+ +                      goto out_unlock;
+ +      }
+ +
+ +      return 0;
+ +
+ +out_unlock:
+ +      kvm_for_each_vcpu(j, vcpu, kvm) {
+ +              if (i == j)
+ +                      break;
+ +
+ +              mutex_unlock(&vcpu->mutex);
+ +      }
+ +      return -EINTR;
+ +}
+ +
+ +static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
+ +{
+ +      struct kvm_vcpu *vcpu;
+ +      int i;
+ +
+ +      kvm_for_each_vcpu(i, vcpu, kvm) {
+ +              mutex_unlock(&vcpu->mutex);
+ +      }
+ +}
+ +
+ +static void sev_migrate_from(struct kvm_sev_info *dst,
+ +                            struct kvm_sev_info *src)
+ +{
+ +      dst->active = true;
+ +      dst->asid = src->asid;
+ +      dst->handle = src->handle;
+ +      dst->pages_locked = src->pages_locked;
+ +
+ +      src->asid = 0;
+ +      src->active = false;
+ +      src->handle = 0;
+ +      src->pages_locked = 0;
+ +
+ +      INIT_LIST_HEAD(&dst->regions_list);
+ +      list_replace_init(&src->regions_list, &dst->regions_list);
+ +}
+ +
+ +static int sev_es_migrate_from(struct kvm *dst, struct kvm *src)
+ +{
+ +      int i;
+ +      struct kvm_vcpu *dst_vcpu, *src_vcpu;
+ +      struct vcpu_svm *dst_svm, *src_svm;
+ +
+ +      if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus))
+ +              return -EINVAL;
+ +
+ +      kvm_for_each_vcpu(i, src_vcpu, src) {
+ +              if (!src_vcpu->arch.guest_state_protected)
+ +                      return -EINVAL;
+ +      }
+ +
+ +      kvm_for_each_vcpu(i, src_vcpu, src) {
+ +              src_svm = to_svm(src_vcpu);
+ +              dst_vcpu = kvm_get_vcpu(dst, i);
+ +              dst_svm = to_svm(dst_vcpu);
+ +
+ +              /*
+ +               * Transfer VMSA and GHCB state to the destination.  Nullify and
+ +               * clear source fields as appropriate, the state now belongs to
+ +               * the destination.
+ +               */
+ +              memcpy(&dst_svm->sev_es, &src_svm->sev_es, sizeof(src_svm->sev_es));
+ +              dst_svm->vmcb->control.ghcb_gpa = src_svm->vmcb->control.ghcb_gpa;
+ +              dst_svm->vmcb->control.vmsa_pa = src_svm->vmcb->control.vmsa_pa;
+ +              dst_vcpu->arch.guest_state_protected = true;
+ +
+ +              memset(&src_svm->sev_es, 0, sizeof(src_svm->sev_es));
+ +              src_svm->vmcb->control.ghcb_gpa = INVALID_PAGE;
+ +              src_svm->vmcb->control.vmsa_pa = INVALID_PAGE;
+ +              src_vcpu->arch.guest_state_protected = false;
+ +      }
+ +      to_kvm_svm(src)->sev_info.es_active = false;
+ +      to_kvm_svm(dst)->sev_info.es_active = true;
+ +
+ +      return 0;
+ +}
+ +
+ +int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd)
+ +{
+ +      struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info;
+ +      struct kvm_sev_info *src_sev, *cg_cleanup_sev;
+ +      struct file *source_kvm_file;
+ +      struct kvm *source_kvm;
+ +      bool charged = false;
+ +      int ret;
+ +
+ +      ret = sev_lock_for_migration(kvm);
+ +      if (ret)
+ +              return ret;
+ +
+ +      if (sev_guest(kvm)) {
+ +              ret = -EINVAL;
+ +              goto out_unlock;
+ +      }
+ +
+ +      source_kvm_file = fget(source_fd);
+ +      if (!file_is_kvm(source_kvm_file)) {
+ +              ret = -EBADF;
+ +              goto out_fput;
+ +      }
+ +
+ +      source_kvm = source_kvm_file->private_data;
+ +      ret = sev_lock_for_migration(source_kvm);
+ +      if (ret)
+ +              goto out_fput;
+ +
+ +      if (!sev_guest(source_kvm)) {
+ +              ret = -EINVAL;
+ +              goto out_source;
+ +      }
+ +
+ +      src_sev = &to_kvm_svm(source_kvm)->sev_info;
+ +      dst_sev->misc_cg = get_current_misc_cg();
+ +      cg_cleanup_sev = dst_sev;
+ +      if (dst_sev->misc_cg != src_sev->misc_cg) {
+ +              ret = sev_misc_cg_try_charge(dst_sev);
+ +              if (ret)
+ +                      goto out_dst_cgroup;
+ +              charged = true;
+ +      }
+ +
+ +      ret = sev_lock_vcpus_for_migration(kvm);
+ +      if (ret)
+ +              goto out_dst_cgroup;
+ +      ret = sev_lock_vcpus_for_migration(source_kvm);
+ +      if (ret)
+ +              goto out_dst_vcpu;
+ +
+ +      if (sev_es_guest(source_kvm)) {
+ +              ret = sev_es_migrate_from(kvm, source_kvm);
+ +              if (ret)
+ +                      goto out_source_vcpu;
+ +      }
+ +      sev_migrate_from(dst_sev, src_sev);
+ +      kvm_vm_dead(source_kvm);
+ +      cg_cleanup_sev = src_sev;
+ +      ret = 0;
+ +
+ +out_source_vcpu:
+ +      sev_unlock_vcpus_for_migration(source_kvm);
+ +out_dst_vcpu:
+ +      sev_unlock_vcpus_for_migration(kvm);
+ +out_dst_cgroup:
+ +      /* Operates on the source on success, on the destination on failure.  */
+ +      if (charged)
+ +              sev_misc_cg_uncharge(cg_cleanup_sev);
+ +      put_misc_cg(cg_cleanup_sev->misc_cg);
+ +      cg_cleanup_sev->misc_cg = NULL;
+ +out_source:
+ +      sev_unlock_after_migration(source_kvm);
+ +out_fput:
+ +      if (source_kvm_file)
+ +              fput(source_kvm_file);
+ +out_unlock:
+ +      sev_unlock_after_migration(kvm);
+ +      return ret;
+ +}
+ +
   int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
   {
         struct kvm_sev_cmd sev_cmd;
@@@ -1757,7 -1541,7 +1756,7 @@@
   
         /* Only the enc_context_owner handles some memory enc operations. */
         if (is_mirroring_enc_context(kvm) &&
-           !cmd_allowed_from_miror(sev_cmd.id)) {
+           !is_cmd_allowed_from_mirror(sev_cmd.id)) {
                 r = -EINVAL;
                 goto out;
         }
@@@ -1990,7 -1774,12 +1989,12 @@@ int svm_vm_copy_asid_from(struct kvm *k
         mutex_unlock(&source_kvm->lock);
         mutex_lock(&kvm->lock);
   
-       if (sev_guest(kvm)) {
+       /*
+        * Disallow out-of-band SEV/SEV-ES init if the target is already an
+        * SEV guest, or if vCPUs have been created.  KVM relies on vCPUs being
+        * created after SEV/SEV-ES initialization, e.g. to init intercepts.
+        */
+       if (sev_guest(kvm) || kvm->created_vcpus) {
                 ret = -EINVAL;
                 goto e_mirror_unlock;
         }
@@@ -2241,16 -2030,16 +2245,16 @@@ void sev_free_vcpu(struct kvm_vcpu *vcp
         svm = to_svm(vcpu);
   
         if (vcpu->arch.guest_state_protected)
- -              sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
- -      __free_page(virt_to_page(svm->vmsa));
+ +              sev_flush_guest_memory(svm, svm->sev_es.vmsa, PAGE_SIZE);
+ +      __free_page(virt_to_page(svm->sev_es.vmsa));
   
- -      if (svm->ghcb_sa_free)
- -              kfree(svm->ghcb_sa);
+ +      if (svm->sev_es.ghcb_sa_free)
+ +              kfree(svm->sev_es.ghcb_sa);
   }
   
   static void dump_ghcb(struct vcpu_svm *svm)
   {
- -      struct ghcb *ghcb = svm->ghcb;
+ +      struct ghcb *ghcb = svm->sev_es.ghcb;
         unsigned int nbits;
   
         /* Re-use the dump_invalid_vmcb module parameter */
@@@ -2276,7 -2065,7 +2280,7 @@@
   static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
   {
         struct kvm_vcpu *vcpu = &svm->vcpu;
- -      struct ghcb *ghcb = svm->ghcb;
+ +      struct ghcb *ghcb = svm->sev_es.ghcb;
   
         /*
          * The GHCB protocol so far allows for the following data
@@@ -2296,7 -2085,7 +2300,7 @@@ static void sev_es_sync_from_ghcb(struc
   {
         struct vmcb_control_area *control = &svm->vmcb->control;
         struct kvm_vcpu *vcpu = &svm->vcpu;
- -      struct ghcb *ghcb = svm->ghcb;
+ +      struct ghcb *ghcb = svm->sev_es.ghcb;
         u64 exit_code;
   
         /*
@@@ -2343,7 -2132,7 +2347,7 @@@ static int sev_es_validate_vmgexit(stru
         struct ghcb *ghcb;
         u64 exit_code = 0;
   
- -      ghcb = svm->ghcb;
+ +      ghcb = svm->sev_es.ghcb;
   
         /* Only GHCB Usage code 0 is supported */
         if (ghcb->ghcb_usage)
@@@ -2461,34 -2250,33 +2465,34 @@@ vmgexit_err
   
   void sev_es_unmap_ghcb(struct vcpu_svm *svm)
   {
- -      if (!svm->ghcb)
+ +      if (!svm->sev_es.ghcb)
                 return;
   
- -      if (svm->ghcb_sa_free) {
+ +      if (svm->sev_es.ghcb_sa_free) {
                 /*
                  * The scratch area lives outside the GHCB, so there is a
                  * buffer that, depending on the operation performed, may
                  * need to be synced, then freed.
                  */
- -              if (svm->ghcb_sa_sync) {
+ +              if (svm->sev_es.ghcb_sa_sync) {
                         kvm_write_guest(svm->vcpu.kvm,
- -                                      ghcb_get_sw_scratch(svm->ghcb),
- -                                      svm->ghcb_sa, svm->ghcb_sa_len);
- -                      svm->ghcb_sa_sync = false;
+ +                                      ghcb_get_sw_scratch(svm->sev_es.ghcb),
+ +                                      svm->sev_es.ghcb_sa,
+ +                                      svm->sev_es.ghcb_sa_len);
+ +                      svm->sev_es.ghcb_sa_sync = false;
                 }
   
- -              kfree(svm->ghcb_sa);
- -              svm->ghcb_sa = NULL;
- -              svm->ghcb_sa_free = false;
+ +              kfree(svm->sev_es.ghcb_sa);
+ +              svm->sev_es.ghcb_sa = NULL;
+ +              svm->sev_es.ghcb_sa_free = false;
         }
   
- -      trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);
+ +      trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->sev_es.ghcb);
   
         sev_es_sync_to_ghcb(svm);
   
- -      kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true);
- -      svm->ghcb = NULL;
+ +      kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map, true);
+ +      svm->sev_es.ghcb = NULL;
   }
   
   void pre_sev_run(struct vcpu_svm *svm, int cpu)
@@@ -2518,7 -2306,7 +2522,7 @@@
   static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
   {
         struct vmcb_control_area *control = &svm->vmcb->control;
- -      struct ghcb *ghcb = svm->ghcb;
+ +      struct ghcb *ghcb = svm->sev_es.ghcb;
         u64 ghcb_scratch_beg, ghcb_scratch_end;
         u64 scratch_gpa_beg, scratch_gpa_end;
         void *scratch_va;
@@@ -2554,7 -2342,7 +2558,7 @@@
                         return false;
                 }
   
- -              scratch_va = (void *)svm->ghcb;
+ +              scratch_va = (void *)svm->sev_es.ghcb;
                 scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
         } else {
                 /*
@@@ -2584,12 -2372,12 +2588,12 @@@
                  * the vCPU next time (i.e. a read was requested so the data
                  * must be written back to the guest memory).
                  */
- -              svm->ghcb_sa_sync = sync;
- -              svm->ghcb_sa_free = true;
+ +              svm->sev_es.ghcb_sa_sync = sync;
+ +              svm->sev_es.ghcb_sa_free = true;
         }
   
- -      svm->ghcb_sa = scratch_va;
- -      svm->ghcb_sa_len = len;
+ +      svm->sev_es.ghcb_sa = scratch_va;
+ +      svm->sev_es.ghcb_sa_len = len;
   
         return true;
   }
@@@ -2708,15 -2496,15 +2712,15 @@@ int sev_handle_vmgexit(struct kvm_vcpu 
                 return -EINVAL;
         }
   
- -      if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
+ +      if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) {
                 /* Unable to map GHCB from guest */
                 vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
                             ghcb_gpa);
                 return -EINVAL;
         }
   
- -      svm->ghcb = svm->ghcb_map.hva;
- -      ghcb = svm->ghcb_map.hva;
+ +      svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva;
+ +      ghcb = svm->sev_es.ghcb_map.hva;
   
         trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
   
@@@ -2739,7 -2527,7 +2743,7 @@@
                 ret = kvm_sev_es_mmio_read(vcpu,
                                            control->exit_info_1,
                                            control->exit_info_2,
- -                                         svm->ghcb_sa);
+ +                                         svm->sev_es.ghcb_sa);
                 break;
         case SVM_VMGEXIT_MMIO_WRITE:
                 if (!setup_vmgexit_scratch(svm, false, control->exit_info_2))
@@@ -2748,7 -2536,7 +2752,7 @@@
                 ret = kvm_sev_es_mmio_write(vcpu,
                                             control->exit_info_1,
                                             control->exit_info_2,
- -                                          svm->ghcb_sa);
+ +                                          svm->sev_es.ghcb_sa);
                 break;
         case SVM_VMGEXIT_NMI_COMPLETE:
                 ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
@@@ -2795,21 -2583,11 +2799,21 @@@
   
   int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
   {
- -      if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2))
+ +      int count;
+ +      int bytes;
+ +
+ +      if (svm->vmcb->control.exit_info_2 > INT_MAX)
+ +              return -EINVAL;
+ +
+ +      count = svm->vmcb->control.exit_info_2;
+ +      if (unlikely(check_mul_overflow(count, size, &bytes)))
+ +              return -EINVAL;
+ +
+ +      if (!setup_vmgexit_scratch(svm, in, bytes))
                 return -EINVAL;
   
- -      return kvm_sev_es_string_io(&svm->vcpu, size, port,
- -                                  svm->ghcb_sa, svm->ghcb_sa_len / size, in);
+ +      return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa,
+ +                                  count, in);
   }
   
   void sev_es_init_vmcb(struct vcpu_svm *svm)
@@@ -2824,7 -2602,7 +2828,7 @@@
          * VMCB page. Do not include the encryption mask on the VMSA physical
          * address since hardware will access it using the guest key.
          */
- -      svm->vmcb->control.vmsa_pa = __pa(svm->vmsa);
+ +      svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
   
         /* Can't intercept CR register access, HV can't modify CR registers */
         svm_clr_intercept(svm, INTERCEPT_CR0_READ);
@@@ -2896,8 -2674,8 +2900,8 @@@ void sev_vcpu_deliver_sipi_vector(struc
         struct vcpu_svm *svm = to_svm(vcpu);
   
         /* First SIPI: Use the values as initially set by the VMM */
- -      if (!svm->received_first_sipi) {
- -              svm->received_first_sipi = true;
+ +      if (!svm->sev_es.received_first_sipi) {
+ +              svm->sev_es.received_first_sipi = true;
                 return;
         }
   
@@@ -2906,8 -2684,8 +2910,8 @@@
          * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
          * non-zero value.
          */
- -      if (!svm->ghcb)
+ +      if (!svm->sev_es.ghcb)
                 return;
   
- -      ghcb_set_sw_exit_info_2(svm->ghcb, 1);
+ +      ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
   }
diff --combined arch/x86/kvm/svm/svm.h

index 437e68504e669139a6dac6832fb3fc67667c3742,a345f557be4af10d733ddb9864b9ed0707848de7..5faad3dc10e27ac0dc987cd6c7041fd8b2ea4162
--- 1/arch/x86/kvm/svm/svm.h
--- 2/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@@ -80,7 -80,6 +80,7 @@@ struct kvm_sev_info 
         u64 ap_jump_table;      /* SEV-ES AP Jump Table address */
         struct kvm *enc_context_owner; /* Owner of copied encryption context */
         struct misc_cg *misc_cg; /* For misc cgroup accounting */
+ +      atomic_t migration_in_progress;
   };
   
   struct kvm_svm {
@@@ -124,20 -123,6 +124,20 @@@ struct svm_nested_state 
         bool initialized;
   };
   
+ +struct vcpu_sev_es_state {
+ +      /* SEV-ES support */
+ +      struct vmcb_save_area *vmsa;
+ +      struct ghcb *ghcb;
+ +      struct kvm_host_map ghcb_map;
+ +      bool received_first_sipi;
+ +
+ +      /* SEV-ES scratch area support */
+ +      void *ghcb_sa;
+ +      u32 ghcb_sa_len;
+ +      bool ghcb_sa_sync;
+ +      bool ghcb_sa_free;
+ +};
+ +
   struct vcpu_svm {
         struct kvm_vcpu vcpu;
         /* vmcb always points at current_vmcb->ptr, it's purely a shorthand. */
@@@ -201,7 -186,17 +201,7 @@@
                 DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS);
         } shadow_msr_intercept;
   
- -      /* SEV-ES support */
- -      struct vmcb_save_area *vmsa;
- -      struct ghcb *ghcb;
- -      struct kvm_host_map ghcb_map;
- -      bool received_first_sipi;
- -
- -      /* SEV-ES scratch area support */
- -      void *ghcb_sa;
- -      u64 ghcb_sa_len;
- -      bool ghcb_sa_sync;
- -      bool ghcb_sa_free;
+ +      struct vcpu_sev_es_state sev_es;
   
         bool guest_state_loaded;
   };
@@@ -226,12 -221,12 +226,12 @@@ DECLARE_PER_CPU(struct svm_cpu_data *, 
   
   void recalc_intercepts(struct vcpu_svm *svm);
   
- -static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
+ +static __always_inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
   {
         return container_of(kvm, struct kvm_svm, kvm);
   }
   
- -static inline bool sev_guest(struct kvm *kvm)
+ +static __always_inline bool sev_guest(struct kvm *kvm)
   {
   #ifdef CONFIG_KVM_AMD_SEV
         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
@@@ -242,12 -237,12 +242,12 @@@
   #endif
   }
   
- -static inline bool sev_es_guest(struct kvm *kvm)
+ +static __always_inline bool sev_es_guest(struct kvm *kvm)
   {
   #ifdef CONFIG_KVM_AMD_SEV
         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
   
-       return sev_guest(kvm) && sev->es_active;
+       return sev->es_active && !WARN_ON_ONCE(!sev->active);
   #else
         return false;
   #endif
@@@ -279,7 -274,7 +279,7 @@@ static inline bool vmcb_is_dirty(struc
           return !test_bit(bit, (unsigned long *)&vmcb->control.clean);
   }
   
- -static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
+ +static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
   {
         return container_of(vcpu, struct vcpu_svm, vcpu);
   }
@@@ -563,7 -558,6 +563,7 @@@ int svm_register_enc_region(struct kvm 
   int svm_unregister_enc_region(struct kvm *kvm,
                               struct kvm_enc_region *range);
   int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd);
+ +int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd);
   void pre_sev_run(struct vcpu_svm *svm, int cpu);
   void __init sev_set_cpu_caps(void);
   void __init sev_hardware_setup(void);
diff --combined arch/x86/kvm/x86.c

index dc7eb5fddfd3ef8e8154f3acbd48528021bbfcba,4ae77e1dadf6c3d4f49c95d2dec545e1b0f82509..c85d70107057a41bbef4da893e545fc93c6a2562
--- 1/arch/x86/kvm/x86.c
--- 2/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -68,9 -68,7 +68,9 @@@
   #include <asm/mce.h>
   #include <asm/pkru.h>
   #include <linux/kernel_stat.h>
- -#include <asm/fpu/internal.h> /* Ugh! */
+ +#include <asm/fpu/api.h>
+ +#include <asm/fpu/xcr.h>
+ +#include <asm/fpu/xstate.h>
   #include <asm/pvclock.h>
   #include <asm/div64.h>
   #include <asm/irq_remapping.h>
@@@ -295,6 -293,8 +295,6 @@@ u64 __read_mostly host_xcr0
   u64 __read_mostly supported_xcr0;
   EXPORT_SYMBOL_GPL(supported_xcr0);
   
- -static struct kmem_cache *x86_fpu_cache;
- -
   static struct kmem_cache *x86_emulator_cache;
   
   /*
@@@ -3292,6 -3292,9 +3292,6 @@@ static void record_steal_time(struct kv
         }
   
         st = (struct kvm_steal_time __user *)ghc->hva;
- -      if (!user_access_begin(st, sizeof(*st)))
- -              return;
- -
         /*
          * Doing a TLB flush here, on the guest's behalf, can avoid
          * expensive IPIs.
@@@ -3300,16 -3303,13 +3300,16 @@@
                 u8 st_preempted = 0;
                 int err = -EFAULT;
   
+ +              if (!user_access_begin(st, sizeof(*st)))
+ +                      return;
+ +
                 asm volatile("1: xchgb %0, %2\n"
                              "xor %1, %1\n"
                              "2:\n"
                              _ASM_EXTABLE_UA(1b, 2b)
-                            : "+r" (st_preempted),
-                              "+&r" (err)
-                            : "m" (st->preempted));
+                            : "+q" (st_preempted),
+                              "+&r" (err),
+                              "+m" (st->preempted));
                 if (err)
                         goto out;
   
@@@ -3325,9 -3325,6 +3325,9 @@@
                 if (!user_access_begin(st, sizeof(*st)))
                         goto dirty;
         } else {
+ +              if (!user_access_begin(st, sizeof(*st)))
+ +                      return;
+ +
                 unsafe_put_user(0, &st->preempted, out);
                 vcpu->arch.st.preempted = 0;
         }
@@@ -4817,27 -4814,144 +4817,27 @@@ static int kvm_vcpu_ioctl_x86_set_debug
         return 0;
   }
   
- -#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
- -
- -static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
- -{
- -      struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
- -      u64 xstate_bv = xsave->header.xfeatures;
- -      u64 valid;
- -
- -      /*
- -       * Copy legacy XSAVE area, to avoid complications with CPUID
- -       * leaves 0 and 1 in the loop below.
- -       */
- -      memcpy(dest, xsave, XSAVE_HDR_OFFSET);
- -
- -      /* Set XSTATE_BV */
- -      xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE;
- -      *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
- -
- -      /*
- -       * Copy each region from the possibly compacted offset to the
- -       * non-compacted offset.
- -       */
- -      valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
- -      while (valid) {
- -              u32 size, offset, ecx, edx;
- -              u64 xfeature_mask = valid & -valid;
- -              int xfeature_nr = fls64(xfeature_mask) - 1;
- -              void *src;
- -
- -              cpuid_count(XSTATE_CPUID, xfeature_nr,
- -                          &size, &offset, &ecx, &edx);
- -
- -              if (xfeature_nr == XFEATURE_PKRU) {
- -                      memcpy(dest + offset, &vcpu->arch.pkru,
- -                             sizeof(vcpu->arch.pkru));
- -              } else {
- -                      src = get_xsave_addr(xsave, xfeature_nr);
- -                      if (src)
- -                              memcpy(dest + offset, src, size);
- -              }
- -
- -              valid -= xfeature_mask;
- -      }
- -}
- -
- -static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
- -{
- -      struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
- -      u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
- -      u64 valid;
- -
- -      /*
- -       * Copy legacy XSAVE area, to avoid complications with CPUID
- -       * leaves 0 and 1 in the loop below.
- -       */
- -      memcpy(xsave, src, XSAVE_HDR_OFFSET);
- -
- -      /* Set XSTATE_BV and possibly XCOMP_BV.  */
- -      xsave->header.xfeatures = xstate_bv;
- -      if (boot_cpu_has(X86_FEATURE_XSAVES))
- -              xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
- -
- -      /*
- -       * Copy each region from the non-compacted offset to the
- -       * possibly compacted offset.
- -       */
- -      valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
- -      while (valid) {
- -              u32 size, offset, ecx, edx;
- -              u64 xfeature_mask = valid & -valid;
- -              int xfeature_nr = fls64(xfeature_mask) - 1;
- -
- -              cpuid_count(XSTATE_CPUID, xfeature_nr,
- -                          &size, &offset, &ecx, &edx);
- -
- -              if (xfeature_nr == XFEATURE_PKRU) {
- -                      memcpy(&vcpu->arch.pkru, src + offset,
- -                             sizeof(vcpu->arch.pkru));
- -              } else {
- -                      void *dest = get_xsave_addr(xsave, xfeature_nr);
- -
- -                      if (dest)
- -                              memcpy(dest, src + offset, size);
- -              }
- -
- -              valid -= xfeature_mask;
- -      }
- -}
- -
   static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
                                          struct kvm_xsave *guest_xsave)
   {
- -      if (!vcpu->arch.guest_fpu)
+ +      if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
                 return;
   
- -      if (boot_cpu_has(X86_FEATURE_XSAVE)) {
- -              memset(guest_xsave, 0, sizeof(struct kvm_xsave));
- -              fill_xsave((u8 *) guest_xsave->region, vcpu);
- -      } else {
- -              memcpy(guest_xsave->region,
- -                      &vcpu->arch.guest_fpu->state.fxsave,
- -                      sizeof(struct fxregs_state));
- -              *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
- -                      XFEATURE_MASK_FPSSE;
- -      }
+ +      fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
+ +                                     guest_xsave->region,
+ +                                     sizeof(guest_xsave->region),
+ +                                     vcpu->arch.pkru);
   }
   
- -#define XSAVE_MXCSR_OFFSET 24
- -
   static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
                                         struct kvm_xsave *guest_xsave)
   {
- -      u64 xstate_bv;
- -      u32 mxcsr;
- -
- -      if (!vcpu->arch.guest_fpu)
+ +      if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
                 return 0;
   
- -      xstate_bv = *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
- -      mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
- -
- -      if (boot_cpu_has(X86_FEATURE_XSAVE)) {
- -              /*
- -               * Here we allow setting states that are not present in
- -               * CPUID leaf 0xD, index 0, EDX:EAX.  This is for compatibility
- -               * with old userspace.
- -               */
- -              if (xstate_bv & ~supported_xcr0 || mxcsr & ~mxcsr_feature_mask)
- -                      return -EINVAL;
- -              load_xsave(vcpu, (u8 *)guest_xsave->region);
- -      } else {
- -              if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
- -                      mxcsr & ~mxcsr_feature_mask)
- -                      return -EINVAL;
- -              memcpy(&vcpu->arch.guest_fpu->state.fxsave,
- -                      guest_xsave->region, sizeof(struct fxregs_state));
- -      }
- -      return 0;
+ +      return fpu_copy_uabi_to_guest_fpstate(&vcpu->arch.guest_fpu,
+ +                                            guest_xsave->region,
+ +                                            supported_xcr0, &vcpu->arch.pkru);
   }
   
   static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
@@@ -5779,12 -5893,6 +5779,12 @@@ split_irqchip_unlock
                 if (kvm_x86_ops.vm_copy_enc_context_from)
                         r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]);
                 return r;
+ +      case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
+ +              r = -EINVAL;
+ +              if (kvm_x86_ops.vm_move_enc_context_from)
+ +                      r = kvm_x86_ops.vm_move_enc_context_from(
+ +                              kvm, cap->args[0]);
+ +              return r;
         case KVM_CAP_EXIT_HYPERCALL:
                 if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) {
                         r = -EINVAL;
@@@ -7035,7 -7143,7 +7035,7 @@@ static int kernel_pio(struct kvm_vcpu *
   }
   
   static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
- -                             unsigned short port, void *val,
+ +                             unsigned short port,
                                unsigned int count, bool in)
   {
         vcpu->arch.pio.port = port;
@@@ -7043,8 -7151,10 +7043,8 @@@
         vcpu->arch.pio.count  = count;
         vcpu->arch.pio.size = size;
   
- -      if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
- -              vcpu->arch.pio.count = 0;
+ +      if (!kernel_pio(vcpu, vcpu->arch.pio_data))
                 return 1;
- -      }
   
         vcpu->run->exit_reason = KVM_EXIT_IO;
         vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
@@@ -7056,39 -7166,26 +7056,39 @@@
         return 0;
   }
   
- -static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
- -                         unsigned short port, void *val, unsigned int count)
+ +static int __emulator_pio_in(struct kvm_vcpu *vcpu, int size,
+ +                           unsigned short port, unsigned int count)
   {
- -      int ret;
+ +      WARN_ON(vcpu->arch.pio.count);
+ +      memset(vcpu->arch.pio_data, 0, size * count);
+ +      return emulator_pio_in_out(vcpu, size, port, count, true);
+ +}
   
- -      if (vcpu->arch.pio.count)
- -              goto data_avail;
+ +static void complete_emulator_pio_in(struct kvm_vcpu *vcpu, void *val)
+ +{
+ +      int size = vcpu->arch.pio.size;
+ +      unsigned count = vcpu->arch.pio.count;
+ +      memcpy(val, vcpu->arch.pio_data, size * count);
+ +      trace_kvm_pio(KVM_PIO_IN, vcpu->arch.pio.port, size, count, vcpu->arch.pio_data);
+ +      vcpu->arch.pio.count = 0;
+ +}
   
- -      memset(vcpu->arch.pio_data, 0, size * count);
+ +static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
+ +                         unsigned short port, void *val, unsigned int count)
+ +{
+ +      if (vcpu->arch.pio.count) {
+ +              /* Complete previous iteration.  */
+ +      } else {
+ +              int r = __emulator_pio_in(vcpu, size, port, count);
+ +              if (!r)
+ +                      return r;
   
- -      ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
- -      if (ret) {
- -data_avail:
- -              memcpy(val, vcpu->arch.pio_data, size * count);
- -              trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
- -              vcpu->arch.pio.count = 0;
- -              return 1;
+ +              /* Results already available, fall through.  */
         }
   
- -      return 0;
+ +      WARN_ON(count != vcpu->arch.pio.count);
+ +      complete_emulator_pio_in(vcpu, val);
+ +      return 1;
   }
   
   static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
@@@ -7103,15 -7200,9 +7103,15 @@@ static int emulator_pio_out(struct kvm_
                             unsigned short port, const void *val,
                             unsigned int count)
   {
+ +      int ret;
+ +
         memcpy(vcpu->arch.pio_data, val, size * count);
         trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
- -      return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
+ +      ret = emulator_pio_in_out(vcpu, size, port, count, false);
+ +      if (ret)
+ +                vcpu->arch.pio.count = 0;
+ +
+ +        return ret;
   }
   
   static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
@@@ -8606,11 -8697,18 +8606,11 @@@ int kvm_arch_init(void *opaque
         }
   
         r = -ENOMEM;
- -      x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu),
- -                                        __alignof__(struct fpu), SLAB_ACCOUNT,
- -                                        NULL);
- -      if (!x86_fpu_cache) {
- -              printk(KERN_ERR "kvm: failed to allocate cache for x86 fpu\n");
- -              goto out;
- -      }
   
         x86_emulator_cache = kvm_alloc_emulator_cache();
         if (!x86_emulator_cache) {
                 pr_err("kvm: failed to allocate cache for x86 emulator\n");
- -              goto out_free_x86_fpu_cache;
+ +              goto out;
         }
   
         user_return_msrs = alloc_percpu(struct kvm_user_return_msrs);
@@@ -8648,6 -8746,8 +8648,6 @@@ out_free_percpu
         free_percpu(user_return_msrs);
   out_free_x86_emulator_cache:
         kmem_cache_destroy(x86_emulator_cache);
- -out_free_x86_fpu_cache:
- -      kmem_cache_destroy(x86_fpu_cache);
   out:
         return r;
   }
@@@ -8674,6 -8774,7 +8674,6 @@@ void kvm_arch_exit(void
         kvm_mmu_module_exit();
         free_percpu(user_return_msrs);
         kmem_cache_destroy(x86_emulator_cache);
- -      kmem_cache_destroy(x86_fpu_cache);
   #ifdef CONFIG_KVM_XEN
         static_key_deferred_flush(&kvm_xen_enabled);
         WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
@@@ -8962,17 -9063,9 +8962,17 @@@ static void post_kvm_run_save(struct kv
   
         kvm_run->cr8 = kvm_get_cr8(vcpu);
         kvm_run->apic_base = kvm_get_apic_base(vcpu);
+ +
+ +      /*
+ +       * The call to kvm_ready_for_interrupt_injection() may end up in
+ +       * kvm_xen_has_interrupt() which may require the srcu lock to be
+ +       * held, to protect against changes in the vcpu_info address.
+ +       */
+ +      vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
         kvm_run->ready_for_interrupt_injection =
                 pic_in_kernel(vcpu->kvm) ||
                 kvm_vcpu_ready_for_interrupt_injection(vcpu);
+ +      srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
   
         if (is_smm(vcpu))
                 kvm_run->flags |= KVM_RUN_X86_SMM;
@@@ -9547,12 -9640,16 +9547,16 @@@ static void vcpu_load_eoi_exitmap(struc
         if (!kvm_apic_hw_enabled(vcpu->arch.apic))
                 return;
   
-       if (to_hv_vcpu(vcpu))
+       if (to_hv_vcpu(vcpu)) {
                 bitmap_or((ulong *)eoi_exit_bitmap,
                           vcpu->arch.ioapic_handled_vectors,
                           to_hv_synic(vcpu)->vec_bitmap, 256);
+               static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
+               return;
+       }
   
-       static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
+       static_call(kvm_x86_load_eoi_exitmap)(
+               vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors);
   }
   
   void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
@@@ -9611,7 -9708,7 +9615,7 @@@ static int vcpu_enter_guest(struct kvm_
         }
   
         if (kvm_request_pending(vcpu)) {
- -              if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) {
+ +              if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
                         r = -EIO;
                         goto out;
                 }
@@@ -9845,14 -9942,14 +9849,14 @@@
                 if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
                         break;
   
- -                if (unlikely(kvm_vcpu_exit_request(vcpu))) {
+ +              if (vcpu->arch.apicv_active)
+ +                      static_call(kvm_x86_sync_pir_to_irr)(vcpu);
+ +
+ +              if (unlikely(kvm_vcpu_exit_request(vcpu))) {
                         exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
                         break;
                 }
- -
- -              if (vcpu->arch.apicv_active)
- -                      static_call(kvm_x86_sync_pir_to_irr)(vcpu);
- -        }
+ +      }
   
         /*
          * Do this here before restoring debug registers on the host.  And
@@@ -10115,21 -10212,58 +10119,21 @@@ static int complete_emulated_mmio(struc
         return 0;
   }
   
- -static void kvm_save_current_fpu(struct fpu *fpu)
- -{
- -      /*
- -       * If the target FPU state is not resident in the CPU registers, just
- -       * memcpy() from current, else save CPU state directly to the target.
- -       */
- -      if (test_thread_flag(TIF_NEED_FPU_LOAD))
- -              memcpy(&fpu->state, &current->thread.fpu.state,
- -                     fpu_kernel_xstate_size);
- -      else
- -              save_fpregs_to_fpstate(fpu);
- -}
- -
   /* Swap (qemu) user FPU context for the guest FPU context. */
   static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
   {
- -      fpregs_lock();
- -
- -      kvm_save_current_fpu(vcpu->arch.user_fpu);
- -
         /*
- -       * Guests with protected state can't have it set by the hypervisor,
- -       * so skip trying to set it.
+ +       * Exclude PKRU from restore as restored separately in
+ +       * kvm_x86_ops.run().
          */
- -      if (vcpu->arch.guest_fpu)
- -              /* PKRU is separately restored in kvm_x86_ops.run. */
- -              __restore_fpregs_from_fpstate(&vcpu->arch.guest_fpu->state,
- -                                      ~XFEATURE_MASK_PKRU);
- -
- -      fpregs_mark_activate();
- -      fpregs_unlock();
- -
+ +      fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, true);
         trace_kvm_fpu(1);
   }
   
   /* When vcpu_run ends, restore user space FPU context. */
   static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
   {
- -      fpregs_lock();
- -
- -      /*
- -       * Guests with protected state can't have it read by the hypervisor,
- -       * so skip trying to save it.
- -       */
- -      if (vcpu->arch.guest_fpu)
- -              kvm_save_current_fpu(vcpu->arch.guest_fpu);
- -
- -      restore_fpregs_from_fpstate(&vcpu->arch.user_fpu->state);
- -
- -      fpregs_mark_activate();
- -      fpregs_unlock();
- -
+ +      fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, false);
         ++vcpu->stat.fpu_reload;
         trace_kvm_fpu(0);
   }
@@@ -10730,12 -10864,12 +10734,12 @@@ int kvm_arch_vcpu_ioctl_get_fpu(struct 
   {
         struct fxregs_state *fxsave;
   
- -      if (!vcpu->arch.guest_fpu)
+ +      if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
                 return 0;
   
         vcpu_load(vcpu);
   
- -      fxsave = &vcpu->arch.guest_fpu->state.fxsave;
+ +      fxsave = &vcpu->arch.guest_fpu.fpstate->regs.fxsave;
         memcpy(fpu->fpr, fxsave->st_space, 128);
         fpu->fcw = fxsave->cwd;
         fpu->fsw = fxsave->swd;
@@@ -10753,12 -10887,12 +10757,12 @@@ int kvm_arch_vcpu_ioctl_set_fpu(struct 
   {
         struct fxregs_state *fxsave;
   
- -      if (!vcpu->arch.guest_fpu)
+ +      if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
                 return 0;
   
         vcpu_load(vcpu);
   
- -      fxsave = &vcpu->arch.guest_fpu->state.fxsave;
+ +      fxsave = &vcpu->arch.guest_fpu.fpstate->regs.fxsave;
   
         memcpy(fxsave->st_space, fpu->fpr, 128);
         fxsave->cwd = fpu->fcw;
@@@ -10809,6 -10943,15 +10813,6 @@@ static int sync_regs(struct kvm_vcpu *v
         return 0;
   }
   
- -void kvm_free_guest_fpu(struct kvm_vcpu *vcpu)
- -{
- -      if (vcpu->arch.guest_fpu) {
- -              kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
- -              vcpu->arch.guest_fpu = NULL;
- -      }
- -}
- -EXPORT_SYMBOL_GPL(kvm_free_guest_fpu);
- -
   int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
   {
         if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
@@@ -10865,10 -11008,23 +10869,10 @@@ int kvm_arch_vcpu_create(struct kvm_vcp
         if (!alloc_emulate_ctxt(vcpu))
                 goto free_wbinvd_dirty_mask;
   
- -      vcpu->arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
- -                                              GFP_KERNEL_ACCOUNT);
- -      if (!vcpu->arch.user_fpu) {
- -              pr_err("kvm: failed to allocate userspace's fpu\n");
- -              goto free_emulate_ctxt;
- -      }
- -
- -      vcpu->arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
- -                                               GFP_KERNEL_ACCOUNT);
- -      if (!vcpu->arch.guest_fpu) {
+ +      if (!fpu_alloc_guest_fpstate(&vcpu->arch.guest_fpu)) {
                 pr_err("kvm: failed to allocate vcpu's fpu\n");
- -              goto free_user_fpu;
+ +              goto free_emulate_ctxt;
         }
- -      fpstate_init(&vcpu->arch.guest_fpu->state);
- -      if (boot_cpu_has(X86_FEATURE_XSAVES))
- -              vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
- -                      host_xcr0 | XSTATE_COMPACTION_ENABLED;
   
         vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
         vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
@@@ -10900,7 -11056,9 +10904,7 @@@
         return 0;
   
   free_guest_fpu:
- -      kvm_free_guest_fpu(vcpu);
- -free_user_fpu:
- -      kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
+ +      fpu_free_guest_fpstate(&vcpu->arch.guest_fpu);
   free_emulate_ctxt:
         kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
   free_wbinvd_dirty_mask:
@@@ -10946,7 -11104,8 +10950,7 @@@ void kvm_arch_vcpu_destroy(struct kvm_v
   
         kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
         free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
- -      kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
- -      kvm_free_guest_fpu(vcpu);
+ +      fpu_free_guest_fpstate(&vcpu->arch.guest_fpu);
   
         kvm_hv_vcpu_uninit(vcpu);
         kvm_pmu_destroy(vcpu);
@@@ -11008,8 -11167,8 +11012,8 @@@ void kvm_vcpu_reset(struct kvm_vcpu *vc
         kvm_async_pf_hash_reset(vcpu);
         vcpu->arch.apf.halted = false;
   
- -      if (vcpu->arch.guest_fpu && kvm_mpx_supported()) {
- -              void *mpx_state_buffer;
+ +      if (vcpu->arch.guest_fpu.fpstate && kvm_mpx_supported()) {
+ +              struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
   
                 /*
                  * To avoid have the INIT path from kvm_apic_has_events() that be
@@@ -11017,10 -11176,14 +11021,10 @@@
                  */
                 if (init_event)
                         kvm_put_guest_fpu(vcpu);
- -              mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
- -                                      XFEATURE_BNDREGS);
- -              if (mpx_state_buffer)
- -                      memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
- -              mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
- -                                      XFEATURE_BNDCSR);
- -              if (mpx_state_buffer)
- -                      memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
+ +
+ +              fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS);
+ +              fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR);
+ +
                 if (init_event)
                         kvm_load_guest_fpu(vcpu);
         }
@@@ -12480,81 -12643,44 +12484,81 @@@ int kvm_sev_es_mmio_read(struct kvm_vcp
   }
   EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read);
   
- -static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
+ +static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
+ +                         unsigned int port);
+ +
+ +static int complete_sev_es_emulated_outs(struct kvm_vcpu *vcpu)
   {
- -      memcpy(vcpu->arch.guest_ins_data, vcpu->arch.pio_data,
- -             vcpu->arch.pio.count * vcpu->arch.pio.size);
- -      vcpu->arch.pio.count = 0;
+ +      int size = vcpu->arch.pio.size;
+ +      int port = vcpu->arch.pio.port;
   
+ +      vcpu->arch.pio.count = 0;
+ +      if (vcpu->arch.sev_pio_count)
+ +              return kvm_sev_es_outs(vcpu, size, port);
         return 1;
   }
   
   static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
- -                         unsigned int port, void *data,  unsigned int count)
+ +                         unsigned int port)
   {
- -      int ret;
- -
- -      ret = emulator_pio_out_emulated(vcpu->arch.emulate_ctxt, size, port,
- -                                      data, count);
- -      if (ret)
- -              return ret;
+ +      for (;;) {
+ +              unsigned int count =
+ +                      min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
+ +              int ret = emulator_pio_out(vcpu, size, port, vcpu->arch.sev_pio_data, count);
+ +
+ +              /* memcpy done already by emulator_pio_out.  */
+ +              vcpu->arch.sev_pio_count -= count;
+ +              vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size;
+ +              if (!ret)
+ +                      break;
   
- -      vcpu->arch.pio.count = 0;
+ +              /* Emulation done by the kernel.  */
+ +              if (!vcpu->arch.sev_pio_count)
+ +                      return 1;
+ +      }
   
+ +      vcpu->arch.complete_userspace_io = complete_sev_es_emulated_outs;
         return 0;
   }
   
   static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
- -                        unsigned int port, void *data, unsigned int count)
+ +                        unsigned int port);
+ +
+ +static void advance_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
   {
- -      int ret;
+ +      unsigned count = vcpu->arch.pio.count;
+ +      complete_emulator_pio_in(vcpu, vcpu->arch.sev_pio_data);
+ +      vcpu->arch.sev_pio_count -= count;
+ +      vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size;
+ +}
   
- -      ret = emulator_pio_in_emulated(vcpu->arch.emulate_ctxt, size, port,
- -                                     data, count);
- -      if (ret) {
- -              vcpu->arch.pio.count = 0;
- -      } else {
- -              vcpu->arch.guest_ins_data = data;
- -              vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins;
+ +static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
+ +{
+ +      int size = vcpu->arch.pio.size;
+ +      int port = vcpu->arch.pio.port;
+ +
+ +      advance_sev_es_emulated_ins(vcpu);
+ +      if (vcpu->arch.sev_pio_count)
+ +              return kvm_sev_es_ins(vcpu, size, port);
+ +      return 1;
+ +}
+ +
+ +static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
+ +                        unsigned int port)
+ +{
+ +      for (;;) {
+ +              unsigned int count =
+ +                      min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
+ +              if (!__emulator_pio_in(vcpu, size, port, count))
+ +                      break;
+ +
+ +              /* Emulation done by the kernel.  */
+ +              advance_sev_es_emulated_ins(vcpu);
+ +              if (!vcpu->arch.sev_pio_count)
+ +                      return 1;
         }
   
+ +      vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins;
         return 0;
   }
   
@@@ -12562,10 -12688,8 +12566,10 @@@ int kvm_sev_es_string_io(struct kvm_vcp
                          unsigned int port, void *data,  unsigned int count,
                          int in)
   {
- -      return in ? kvm_sev_es_ins(vcpu, size, port, data, count)
- -                : kvm_sev_es_outs(vcpu, size, port, data, count);
+ +      vcpu->arch.sev_pio_data = data;
+ +      vcpu->arch.sev_pio_count = count;
+ +      return in ? kvm_sev_es_ins(vcpu, size, port)
+ +                : kvm_sev_es_outs(vcpu, size, port);
   }
   EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
   
diff --combined arch/x86/kvm/xen.c

index 8f62baebd028626d493b135468796ee31d5a3b27,272be5c1ebedb383f1361c5564da175f1ecd3bc4..565da9c3853bf4807afaa0dc3a34504b33758287
--- 1/arch/x86/kvm/xen.c
--- 2/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@@ -127,9 -127,9 +127,9 @@@ void kvm_xen_update_runstate_guest(stru
         state_entry_time = vx->runstate_entry_time;
         state_entry_time |= XEN_RUNSTATE_UPDATE;
   
-       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) !=
+       BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) !=
                      sizeof(state_entry_time));
-       BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) !=
+       BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
                      sizeof(state_entry_time));
   
         if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
@@@ -144,9 -144,9 +144,9 @@@
          */
         BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
                      offsetof(struct compat_vcpu_runstate_info, state));
-       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+       BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) !=
                      sizeof(vx->current_runstate));
-       BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) !=
+       BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
                      sizeof(vx->current_runstate));
   
         if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
@@@ -163,9 -163,9 +163,9 @@@
                      offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
         BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
                      offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
-       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
-                    sizeof(((struct compat_vcpu_runstate_info *)0)->time));
-       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+       BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
+                    sizeof_field(struct compat_vcpu_runstate_info, time));
+       BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
                      sizeof(vx->runstate_times));
   
         if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
@@@ -190,7 -190,6 +190,7 @@@
   
   int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
   {
+ +      int err;
         u8 rc = 0;
   
         /*
@@@ -205,9 -204,9 +205,9 @@@
         BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) !=
                      offsetof(struct compat_vcpu_info, evtchn_upcall_pending));
         BUILD_BUG_ON(sizeof(rc) !=
-                    sizeof(((struct vcpu_info *)0)->evtchn_upcall_pending));
+                    sizeof_field(struct vcpu_info, evtchn_upcall_pending));
         BUILD_BUG_ON(sizeof(rc) !=
-                    sizeof(((struct compat_vcpu_info *)0)->evtchn_upcall_pending));
+                    sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending));
   
         /*
          * For efficiency, this mirrors the checks for using the valid
@@@ -217,29 -216,13 +217,29 @@@
         if (likely(slots->generation == ghc->generation &&
                    !kvm_is_error_hva(ghc->hva) && ghc->memslot)) {
                 /* Fast path */
- -              __get_user(rc, (u8 __user *)ghc->hva + offset);
- -      } else {
- -              /* Slow path */
- -              kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset,
- -                                           sizeof(rc));
+ +              pagefault_disable();
+ +              err = __get_user(rc, (u8 __user *)ghc->hva + offset);
+ +              pagefault_enable();
+ +              if (!err)
+ +                      return rc;
         }
   
+ +      /* Slow path */
+ +
+ +      /*
+ +       * This function gets called from kvm_vcpu_block() after setting the
+ +       * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately
+ +       * from a HLT. So we really mustn't sleep. If the page ended up absent
+ +       * at that point, just return 1 in order to trigger an immediate wake,
+ +       * and we'll end up getting called again from a context where we *can*
+ +       * fault in the page and wait for it.
+ +       */
+ +      if (in_atomic() || !task_is_running(current))
+ +              return 1;
+ +
+ +      kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset,
+ +                                   sizeof(rc));
+ +
         return rc;
   }
   
@@@ -299,7 -282,7 +299,7 @@@ int kvm_xen_hvm_get_attr(struct kvm *kv
                 break;
   
         case KVM_XEN_ATTR_TYPE_SHARED_INFO:
-               data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_gfn);
+               data->u.shared_info.gfn = kvm->arch.xen.shinfo_gfn;
                 r = 0;
                 break;
   
diff --combined include/linux/kvm_host.h

index 9e0667e3723e91aca55b1e1050e63cb2b3e3cca5,eb625af4fc5ee6e2222f33417dcdd6a3431c1efb..c310648cc8f1abd8581f245871920b4e08a960f9
--- 1/include/linux/kvm_host.h
--- 2/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@@ -150,7 -150,7 +150,7 @@@ static inline bool is_error_page(struc
   #define KVM_REQ_MMU_RELOAD        (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
   #define KVM_REQ_UNBLOCK           2
   #define KVM_REQ_UNHALT            3
- -#define KVM_REQ_VM_BUGGED         (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+ +#define KVM_REQ_VM_DEAD           (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
   #define KVM_REQUEST_ARCH_BASE     8
   
   #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
@@@ -617,7 -617,6 +617,7 @@@ struct kvm 
         unsigned int max_halt_poll_ns;
         u32 dirty_ring_size;
         bool vm_bugged;
+ +      bool vm_dead;
   
   #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
         struct notifier_block pm_notifier;
@@@ -651,19 -650,12 +651,19 @@@
   #define vcpu_err(vcpu, fmt, ...)                                      \
         kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
   
+ +static inline void kvm_vm_dead(struct kvm *kvm)
+ +{
+ +      kvm->vm_dead = true;
+ +      kvm_make_all_cpus_request(kvm, KVM_REQ_VM_DEAD);
+ +}
+ +
   static inline void kvm_vm_bugged(struct kvm *kvm)
   {
         kvm->vm_bugged = true;
- -      kvm_make_all_cpus_request(kvm, KVM_REQ_VM_BUGGED);
+ +      kvm_vm_dead(kvm);
   }
   
+ +
   #define KVM_BUG(cond, kvm, fmt...)                            \
   ({                                                            \
         int __ret = (cond);                                     \
@@@ -874,7 -866,7 +874,7 @@@ void kvm_release_pfn_dirty(kvm_pfn_t pf
   void kvm_set_pfn_dirty(kvm_pfn_t pfn);
   void kvm_set_pfn_accessed(kvm_pfn_t pfn);
   
- void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
+ void kvm_release_pfn(kvm_pfn_t pfn, bool dirty);
   int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
                         int len);
   int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
@@@ -950,12 -942,8 +950,8 @@@ struct kvm_memory_slot *kvm_vcpu_gfn_to
   kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
   kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
   int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map);
- int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
-               struct gfn_to_pfn_cache *cache, bool atomic);
   struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn);
   void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);
- int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
-                 struct gfn_to_pfn_cache *cache, bool dirty, bool atomic);
   unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
   unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
   int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
diff --combined virt/kvm/kvm_main.c

index d3172450050184fa5b492cb9023e908c6f18e01d,7a28c29dca8a6b7f5ffc001a5d97e9e37448f685..9646bb9112c101ea0398f7d447bd563d2008de50
--- 1/virt/kvm/kvm_main.c
--- 2/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@@ -2548,72 -2548,36 +2548,36 @@@ struct page *gfn_to_page(struct kvm *kv
   }
   EXPORT_SYMBOL_GPL(gfn_to_page);
   
- void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache)
+ void kvm_release_pfn(kvm_pfn_t pfn, bool dirty)
   {
         if (pfn == 0)
                 return;
   
-       if (cache)
-               cache->pfn = cache->gfn = 0;
- 
         if (dirty)
                 kvm_release_pfn_dirty(pfn);
         else
                 kvm_release_pfn_clean(pfn);
   }
   
- static void kvm_cache_gfn_to_pfn(struct kvm_memory_slot *slot, gfn_t gfn,
-                                struct gfn_to_pfn_cache *cache, u64 gen)
- {
-       kvm_release_pfn(cache->pfn, cache->dirty, cache);
- 
-       cache->pfn = gfn_to_pfn_memslot(slot, gfn);
-       cache->gfn = gfn;
-       cache->dirty = false;
-       cache->generation = gen;
- }
- 
- static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn,
-                        struct kvm_host_map *map,
-                        struct gfn_to_pfn_cache *cache,
-                        bool atomic)
+ int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
   {
         kvm_pfn_t pfn;
         void *hva = NULL;
         struct page *page = KVM_UNMAPPED_PAGE;
-       struct kvm_memory_slot *slot = __gfn_to_memslot(slots, gfn);
-       u64 gen = slots->generation;
   
         if (!map)
                 return -EINVAL;
   
-       if (cache) {
-               if (!cache->pfn || cache->gfn != gfn ||
-                       cache->generation != gen) {
-                       if (atomic)
-                               return -EAGAIN;
-                       kvm_cache_gfn_to_pfn(slot, gfn, cache, gen);
-               }
-               pfn = cache->pfn;
-       } else {
-               if (atomic)
-                       return -EAGAIN;
-               pfn = gfn_to_pfn_memslot(slot, gfn);
-       }
+       pfn = gfn_to_pfn(vcpu->kvm, gfn);
         if (is_error_noslot_pfn(pfn))
                 return -EINVAL;
   
         if (pfn_valid(pfn)) {
                 page = pfn_to_page(pfn);
-               if (atomic)
-                       hva = kmap_atomic(page);
-               else
-                       hva = kmap(page);
+               hva = kmap(page);
   #ifdef CONFIG_HAS_IOMEM
-       } else if (!atomic) {
-               hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
         } else {
-               return -EINVAL;
+               hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
   #endif
         }
   
@@@ -2627,27 -2591,9 +2591,9 @@@
   
         return 0;
   }
- 
- int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
-               struct gfn_to_pfn_cache *cache, bool atomic)
- {
-       return __kvm_map_gfn(kvm_memslots(vcpu->kvm), gfn, map,
-                       cache, atomic);
- }
- EXPORT_SYMBOL_GPL(kvm_map_gfn);
- 
- int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
- {
-       return __kvm_map_gfn(kvm_vcpu_memslots(vcpu), gfn, map,
-               NULL, false);
- }
   EXPORT_SYMBOL_GPL(kvm_vcpu_map);
   
- static void __kvm_unmap_gfn(struct kvm *kvm,
-                       struct kvm_memory_slot *memslot,
-                       struct kvm_host_map *map,
-                       struct gfn_to_pfn_cache *cache,
-                       bool dirty, bool atomic)
+ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
   {
         if (!map)
                 return;
@@@ -2655,45 -2601,21 +2601,21 @@@
         if (!map->hva)
                 return;
   
-       if (map->page != KVM_UNMAPPED_PAGE) {
-               if (atomic)
-                       kunmap_atomic(map->hva);
-               else
-                       kunmap(map->page);
-       }
+       if (map->page != KVM_UNMAPPED_PAGE)
+               kunmap(map->page);
   #ifdef CONFIG_HAS_IOMEM
-       else if (!atomic)
-               memunmap(map->hva);
         else
-               WARN_ONCE(1, "Unexpected unmapping in atomic context");
+               memunmap(map->hva);
   #endif
   
         if (dirty)
-               mark_page_dirty_in_slot(kvm, memslot, map->gfn);
+               kvm_vcpu_mark_page_dirty(vcpu, map->gfn);
   
-       if (cache)
-               cache->dirty |= dirty;
-       else
-               kvm_release_pfn(map->pfn, dirty, NULL);
+       kvm_release_pfn(map->pfn, dirty);
   
         map->hva = NULL;
         map->page = NULL;
   }
- 
- int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, 
-                 struct gfn_to_pfn_cache *cache, bool dirty, bool atomic)
- {
-       __kvm_unmap_gfn(vcpu->kvm, gfn_to_memslot(vcpu->kvm, map->gfn), map,
-                       cache, dirty, atomic);
-       return 0;
- }
- EXPORT_SYMBOL_GPL(kvm_unmap_gfn);
- 
- void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
- {
-       __kvm_unmap_gfn(vcpu->kvm, kvm_vcpu_gfn_to_memslot(vcpu, map->gfn),
-                       map, NULL, dirty, false);
- }
   EXPORT_SYMBOL_GPL(kvm_vcpu_unmap);
   
   struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn)
@@@ -3747,7 -3669,7 +3669,7 @@@ static long kvm_vcpu_ioctl(struct file 
         struct kvm_fpu *fpu = NULL;
         struct kvm_sregs *kvm_sregs = NULL;
   
- -      if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged)
+ +      if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead)
                 return -EIO;
   
         if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
@@@ -3957,7 -3879,7 +3879,7 @@@ static long kvm_vcpu_compat_ioctl(struc
         void __user *argp = compat_ptr(arg);
         int r;
   
- -      if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged)
+ +      if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead)
                 return -EIO;
   
         switch (ioctl) {
@@@ -4023,7 -3945,7 +3945,7 @@@ static long kvm_device_ioctl(struct fil
   {
         struct kvm_device *dev = filp->private_data;
   
- -      if (dev->kvm->mm != current->mm || dev->kvm->vm_bugged)
+ +      if (dev->kvm->mm != current->mm || dev->kvm->vm_dead)
                 return -EIO;
   
         switch (ioctl) {
@@@ -4345,7 -4267,7 +4267,7 @@@ static long kvm_vm_ioctl(struct file *f
         void __user *argp = (void __user *)arg;
         int r;
   
- -      if (kvm->mm != current->mm || kvm->vm_bugged)
+ +      if (kvm->mm != current->mm || kvm->vm_dead)
                 return -EIO;
         switch (ioctl) {
         case KVM_CREATE_VCPU:
@@@ -4556,7 -4478,7 +4478,7 @@@ static long kvm_vm_compat_ioctl(struct 
         struct kvm *kvm = filp->private_data;
         int r;
   
- -      if (kvm->mm != current->mm || kvm->vm_bugged)
+ +      if (kvm->mm != current->mm || kvm->vm_dead)
                 return -EIO;
         switch (ioctl) {
   #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
author	Paolo Bonzini <[email protected]>
	Tue, 16 Nov 2021 13:08:19 +0000 (08:08 -0500)
committer	Paolo Bonzini <[email protected]>
	Thu, 18 Nov 2021 07:11:57 +0000 (02:11 -0500)
		1	2
arch/x86/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/mmu/mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/svm/sev.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/svm/svm.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/xen.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
virt/kvm/kvm_main.c	patch \|	diff1 \|	diff2 \|	blob \| history