Merge tag 'kvmarm-fixes-5.11-2' into kvmarm-master/next

author Marc Zyngier <[email protected]>

Fri, 12 Feb 2021 14:07:39 +0000 (14:07 +0000)

committer Marc Zyngier <[email protected]>

Fri, 12 Feb 2021 14:07:39 +0000 (14:07 +0000)
author Marc Zyngier <[email protected]>
Fri, 12 Feb 2021 14:07:39 +0000 (14:07 +0000)
committer Marc Zyngier <[email protected]>
Fri, 12 Feb 2021 14:07:39 +0000 (14:07 +0000)
diff --combined Documentation/virt/kvm/api.rst

index c136e254b4960270863e86ecc01325771ab5ab6a,c347b7083abf4640db2eae3a8c27378a07c4dacc..a9bf7f2ab76f5c5ef3bc64a577e29a0ad4d297c9
--- 1/Documentation/virt/kvm/api.rst
--- 2/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@@ -262,18 -262,6 +262,18 @@@ The KVM_RUN ioctl (cf.) communicates wi
   memory region.  This ioctl returns the size of that region.  See the
   KVM_RUN documentation for details.
   
+ +Besides the size of the KVM_RUN communication region, other areas of
+ +the VCPU file descriptor can be mmap-ed, including:
+ +
+ +- if KVM_CAP_COALESCED_MMIO is available, a page at
+ +  KVM_COALESCED_MMIO_PAGE_OFFSET * PAGE_SIZE; for historical reasons,
+ +  this page is included in the result of KVM_GET_VCPU_MMAP_SIZE.
+ +  KVM_CAP_COALESCED_MMIO is not documented yet.
+ +
+ +- if KVM_CAP_DIRTY_LOG_RING is available, a number of pages at
+ +  KVM_DIRTY_LOG_PAGE_OFFSET * PAGE_SIZE.  For more information on
+ +  KVM_CAP_DIRTY_LOG_RING, see section 8.3.
+ +
   
   4.6 KVM_SET_MEMORY_REGION
   -------------------------
@@@ -1281,6 -1269,9 +1281,9 @@@ field userspace_addr, which must point 
   the entire memory slot size.  Any object may back this memory, including
   anonymous memory, ordinary files, and hugetlbfs.
   
+ On architectures that support a form of address tagging, userspace_addr must
+ be an untagged address.
+ 
   It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
   be identical.  This allows large pages in the guest to be backed by large
   pages in the host.
@@@ -4472,9 -4463,9 +4475,9 @@@ that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 
   4.118 KVM_GET_SUPPORTED_HV_CPUID
   --------------------------------
   
- -:Capability: KVM_CAP_HYPERV_CPUID
+ +:Capability: KVM_CAP_HYPERV_CPUID (vcpu), KVM_CAP_SYS_HYPERV_CPUID (system)
   :Architectures: x86
- -:Type: vcpu ioctl
+ +:Type: system ioctl, vcpu ioctl
   :Parameters: struct kvm_cpuid2 (in/out)
   :Returns: 0 on success, -1 on error
   
@@@ -4519,6 -4510,9 +4522,6 @@@ Currently, the following list of CPUID 
    - HYPERV_CPUID_SYNDBG_INTERFACE
    - HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
   
- -HYPERV_CPUID_NESTED_FEATURES leaf is only exposed when Enlightened VMCS was
- -enabled on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS).
- -
   Userspace invokes KVM_GET_SUPPORTED_HV_CPUID by passing a kvm_cpuid2 structure
   with the 'nent' field indicating the number of entries in the variable-size
   array 'entries'.  If the number of entries is too low to describe all Hyper-V
@@@ -4529,15 -4523,6 +4532,15 @@@ number of valid entries in the 'entries
   'index' and 'flags' fields in 'struct kvm_cpuid_entry2' are currently reserved,
   userspace should not expect to get any particular value there.
   
+ +Note, vcpu version of KVM_GET_SUPPORTED_HV_CPUID is currently deprecated. Unlike
+ +system ioctl which exposes all supported feature bits unconditionally, vcpu
+ +version has the following quirks:
+ +- HYPERV_CPUID_NESTED_FEATURES leaf and HV_X64_ENLIGHTENED_VMCS_RECOMMENDED
+ +  feature bit are only exposed when Enlightened VMCS was previously enabled
+ +  on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS).
+ +- HV_STIMER_DIRECT_MODE_AVAILABLE bit is only exposed with in-kernel LAPIC.
+ +  (presumes KVM_CREATE_IRQCHIP has already been called).
+ +
   4.119 KVM_ARM_VCPU_FINALIZE
   ---------------------------
   
@@@ -6413,91 -6398,3 +6416,91 @@@ When enabled, KVM will disable paravirt
   guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf
   (0x40000001). Otherwise, a guest may use the paravirtual features
   regardless of what has actually been exposed through the CPUID leaf.
+ +
+ +
+ +8.29 KVM_CAP_DIRTY_LOG_RING
+ +---------------------------
+ +
+ +:Architectures: x86
+ +:Parameters: args[0] - size of the dirty log ring
+ +
+ +KVM is capable of tracking dirty memory using ring buffers that are
+ +mmaped into userspace; there is one dirty ring per vcpu.
+ +
+ +The dirty ring is available to userspace as an array of
+ +``struct kvm_dirty_gfn``.  Each dirty entry it's defined as::
+ +
+ +  struct kvm_dirty_gfn {
+ +          __u32 flags;
+ +          __u32 slot; /* as_id | slot_id */
+ +          __u64 offset;
+ +  };
+ +
+ +The following values are defined for the flags field to define the
+ +current state of the entry::
+ +
+ +  #define KVM_DIRTY_GFN_F_DIRTY           BIT(0)
+ +  #define KVM_DIRTY_GFN_F_RESET           BIT(1)
+ +  #define KVM_DIRTY_GFN_F_MASK            0x3
+ +
+ +Userspace should call KVM_ENABLE_CAP ioctl right after KVM_CREATE_VM
+ +ioctl to enable this capability for the new guest and set the size of
+ +the rings.  Enabling the capability is only allowed before creating any
+ +vCPU, and the size of the ring must be a power of two.  The larger the
+ +ring buffer, the less likely the ring is full and the VM is forced to
+ +exit to userspace. The optimal size depends on the workload, but it is
+ +recommended that it be at least 64 KiB (4096 entries).
+ +
+ +Just like for dirty page bitmaps, the buffer tracks writes to
+ +all user memory regions for which the KVM_MEM_LOG_DIRTY_PAGES flag was
+ +set in KVM_SET_USER_MEMORY_REGION.  Once a memory region is registered
+ +with the flag set, userspace can start harvesting dirty pages from the
+ +ring buffer.
+ +
+ +An entry in the ring buffer can be unused (flag bits ``00``),
+ +dirty (flag bits ``01``) or harvested (flag bits ``1X``).  The
+ +state machine for the entry is as follows::
+ +
+ +          dirtied         harvested        reset
+ +     00 -----------> 01 -------------> 1X -------+
+ +      ^                                          |
+ +      |                                          |
+ +      +------------------------------------------+
+ +
+ +To harvest the dirty pages, userspace accesses the mmaped ring buffer
+ +to read the dirty GFNs.  If the flags has the DIRTY bit set (at this stage
+ +the RESET bit must be cleared), then it means this GFN is a dirty GFN.
+ +The userspace should harvest this GFN and mark the flags from state
+ +``01b`` to ``1Xb`` (bit 0 will be ignored by KVM, but bit 1 must be set
+ +to show that this GFN is harvested and waiting for a reset), and move
+ +on to the next GFN.  The userspace should continue to do this until the
+ +flags of a GFN have the DIRTY bit cleared, meaning that it has harvested
+ +all the dirty GFNs that were available.
+ +
+ +It's not necessary for userspace to harvest the all dirty GFNs at once.
+ +However it must collect the dirty GFNs in sequence, i.e., the userspace
+ +program cannot skip one dirty GFN to collect the one next to it.
+ +
+ +After processing one or more entries in the ring buffer, userspace
+ +calls the VM ioctl KVM_RESET_DIRTY_RINGS to notify the kernel about
+ +it, so that the kernel will reprotect those collected GFNs.
+ +Therefore, the ioctl must be called *before* reading the content of
+ +the dirty pages.
+ +
+ +The dirty ring can get full.  When it happens, the KVM_RUN of the
+ +vcpu will return with exit reason KVM_EXIT_DIRTY_LOG_FULL.
+ +
+ +The dirty ring interface has a major difference comparing to the
+ +KVM_GET_DIRTY_LOG interface in that, when reading the dirty ring from
+ +userspace, it's still possible that the kernel has not yet flushed the
+ +processor's dirty page buffers into the kernel buffer (with dirty bitmaps, the
+ +flushing is done by the KVM_GET_DIRTY_LOG ioctl).  To achieve that, one
+ +needs to kick the vcpu out of KVM_RUN using a signal.  The resulting
+ +vmexit ensures that all dirty GFNs are flushed to the dirty rings.
+ +
+ +NOTE: the capability KVM_CAP_DIRTY_LOG_RING and the corresponding
+ +ioctl KVM_RESET_DIRTY_RINGS are mutual exclusive to the existing ioctls
+ +KVM_GET_DIRTY_LOG and KVM_CLEAR_DIRTY_LOG.  After enabling
+ +KVM_CAP_DIRTY_LOG_RING with an acceptable dirty ring size, the virtual
+ +machine will switch to ring-buffer dirty page tracking and further
+ +KVM_GET_DIRTY_LOG or KVM_CLEAR_DIRTY_LOG ioctls will fail.
diff --combined virt/kvm/kvm_main.c

index fa9e3614d30edb150f6b8f7a071f0e351657dd08,a9abaf5f8e53c832e02bc06f5176a2efa3157432..8367d88ce39bf2b831d317c91ebb087cc034d096
--- 1/virt/kvm/kvm_main.c
--- 2/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@@ -63,8 -63,6 +63,8 @@@
   #define CREATE_TRACE_POINTS
   #include <trace/events/kvm.h>
   
+ +#include <linux/kvm_dirty_ring.h>
+ +
   /* Worst case buffer size needed for holding an integer. */
   #define ITOA_MAX_LEN 12
   
@@@ -417,7 -415,6 +417,7 @@@ static void kvm_vcpu_init(struct kvm_vc
   
   void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
   {
+ +      kvm_dirty_ring_free(&vcpu->dirty_ring);
         kvm_arch_vcpu_destroy(vcpu);
   
         /*
@@@ -485,8 -482,9 +485,8 @@@ static int kvm_mmu_notifier_invalidate_
         kvm->mmu_notifier_count++;
         need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end,
                                              range->flags);
- -      need_tlb_flush |= kvm->tlbs_dirty;
         /* we've to flush the tlb before the pages can be freed */
- -      if (need_tlb_flush)
+ +      if (need_tlb_flush || kvm->tlbs_dirty)
                 kvm_flush_remote_tlbs(kvm);
   
         spin_unlock(&kvm->mmu_lock);
@@@ -1292,6 -1290,7 +1292,7 @@@ int __kvm_set_memory_region(struct kvm 
                 return -EINVAL;
         /* We can read the guest memory with __xxx_user() later on. */
         if ((mem->userspace_addr & (PAGE_SIZE - 1)) ||
+           (mem->userspace_addr != untagged_addr(mem->userspace_addr)) ||
              !access_ok((void __user *)(unsigned long)mem->userspace_addr,
                         mem->memory_size))
                 return -EINVAL;
@@@ -1364,7 -1363,7 +1365,7 @@@
         /* Allocate/free page dirty bitmap as needed */
         if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
                 new.dirty_bitmap = NULL;
- -      else if (!new.dirty_bitmap) {
+ +      else if (!new.dirty_bitmap && !kvm->dirty_ring_size) {
                 r = kvm_alloc_dirty_bitmap(&new);
                 if (r)
                         return r;
@@@ -1425,10 -1424,6 +1426,10 @@@ int kvm_get_dirty_log(struct kvm *kvm, 
         unsigned long n;
         unsigned long any = 0;
   
+ +      /* Dirty ring tracking is exclusive to dirty log tracking */
+ +      if (kvm->dirty_ring_size)
+ +              return -ENXIO;
+ +
         *memslot = NULL;
         *is_dirty = 0;
   
@@@ -1490,10 -1485,6 +1491,10 @@@ static int kvm_get_dirty_log_protect(st
         unsigned long *dirty_bitmap_buffer;
         bool flush;
   
+ +      /* Dirty ring tracking is exclusive to dirty log tracking */
+ +      if (kvm->dirty_ring_size)
+ +              return -ENXIO;
+ +
         as_id = log->slot >> 16;
         id = (u16)log->slot;
         if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
@@@ -1602,10 -1593,6 +1603,10 @@@ static int kvm_clear_dirty_log_protect(
         unsigned long *dirty_bitmap_buffer;
         bool flush;
   
+ +      /* Dirty ring tracking is exclusive to dirty log tracking */
+ +      if (kvm->dirty_ring_size)
+ +              return -ENXIO;
+ +
         as_id = log->slot >> 16;
         id = (u16)log->slot;
         if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
@@@ -2210,8 -2197,7 +2211,8 @@@ int kvm_vcpu_map(struct kvm_vcpu *vcpu
   }
   EXPORT_SYMBOL_GPL(kvm_vcpu_map);
   
- -static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot,
+ +static void __kvm_unmap_gfn(struct kvm *kvm,
+ +                      struct kvm_memory_slot *memslot,
                         struct kvm_host_map *map,
                         struct gfn_to_pfn_cache *cache,
                         bool dirty, bool atomic)
@@@ -2236,7 -2222,7 +2237,7 @@@
   #endif
   
         if (dirty)
- -              mark_page_dirty_in_slot(memslot, map->gfn);
+ +              mark_page_dirty_in_slot(kvm, memslot, map->gfn);
   
         if (cache)
                 cache->dirty |= dirty;
@@@ -2250,7 -2236,7 +2251,7 @@@
   int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, 
                   struct gfn_to_pfn_cache *cache, bool dirty, bool atomic)
   {
- -      __kvm_unmap_gfn(gfn_to_memslot(vcpu->kvm, map->gfn), map,
+ +      __kvm_unmap_gfn(vcpu->kvm, gfn_to_memslot(vcpu->kvm, map->gfn), map,
                         cache, dirty, atomic);
         return 0;
   }
@@@ -2258,8 -2244,8 +2259,8 @@@ EXPORT_SYMBOL_GPL(kvm_unmap_gfn)
   
   void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
   {
- -      __kvm_unmap_gfn(kvm_vcpu_gfn_to_memslot(vcpu, map->gfn), map, NULL,
- -                      dirty, false);
+ +      __kvm_unmap_gfn(vcpu->kvm, kvm_vcpu_gfn_to_memslot(vcpu, map->gfn),
+ +                      map, NULL, dirty, false);
   }
   EXPORT_SYMBOL_GPL(kvm_vcpu_unmap);
   
@@@ -2433,8 -2419,7 +2434,8 @@@ int kvm_vcpu_read_guest_atomic(struct k
   }
   EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic);
   
- -static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
+ +static int __kvm_write_guest_page(struct kvm *kvm,
+ +                                struct kvm_memory_slot *memslot, gfn_t gfn,
                                   const void *data, int offset, int len)
   {
         int r;
@@@ -2446,7 -2431,7 +2447,7 @@@
         r = __copy_to_user((void __user *)addr + offset, data, len);
         if (r)
                 return -EFAULT;
- -      mark_page_dirty_in_slot(memslot, gfn);
+ +      mark_page_dirty_in_slot(kvm, memslot, gfn);
         return 0;
   }
   
@@@ -2455,7 -2440,7 +2456,7 @@@ int kvm_write_guest_page(struct kvm *kv
   {
         struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
   
- -      return __kvm_write_guest_page(slot, gfn, data, offset, len);
+ +      return __kvm_write_guest_page(kvm, slot, gfn, data, offset, len);
   }
   EXPORT_SYMBOL_GPL(kvm_write_guest_page);
   
@@@ -2464,7 -2449,7 +2465,7 @@@ int kvm_vcpu_write_guest_page(struct kv
   {
         struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
   
- -      return __kvm_write_guest_page(slot, gfn, data, offset, len);
+ +      return __kvm_write_guest_page(vcpu->kvm, slot, gfn, data, offset, len);
   }
   EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page);
   
@@@ -2583,7 -2568,7 +2584,7 @@@ int kvm_write_guest_offset_cached(struc
         r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
         if (r)
                 return -EFAULT;
- -      mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT);
+ +      mark_page_dirty_in_slot(kvm, ghc->memslot, gpa >> PAGE_SHIFT);
   
         return 0;
   }
@@@ -2632,16 -2617,23 +2633,16 @@@ int kvm_read_guest_cached(struct kvm *k
   }
   EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
   
- -int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
- -{
- -      const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
- -
- -      return kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
- -}
- -EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
- -
   int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
   {
+ +      const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
         gfn_t gfn = gpa >> PAGE_SHIFT;
         int seg;
         int offset = offset_in_page(gpa);
         int ret;
   
         while ((seg = next_segment(len, offset)) != 0) {
- -              ret = kvm_clear_guest_page(kvm, gfn, offset, seg);
+ +              ret = kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
                 if (ret < 0)
                         return ret;
                 offset = 0;
@@@ -2652,19 -2644,12 +2653,19 @@@
   }
   EXPORT_SYMBOL_GPL(kvm_clear_guest);
   
- -void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn)
+ +void mark_page_dirty_in_slot(struct kvm *kvm,
+ +                           struct kvm_memory_slot *memslot,
+ +                           gfn_t gfn)
   {
- -      if (memslot && memslot->dirty_bitmap) {
+ +      if (memslot && kvm_slot_dirty_track_enabled(memslot)) {
                 unsigned long rel_gfn = gfn - memslot->base_gfn;
+ +              u32 slot = (memslot->as_id << 16) | memslot->id;
   
- -              set_bit_le(rel_gfn, memslot->dirty_bitmap);
+ +              if (kvm->dirty_ring_size)
+ +                      kvm_dirty_ring_push(kvm_dirty_ring_get(kvm),
+ +                                          slot, rel_gfn);
+ +              else
+ +                      set_bit_le(rel_gfn, memslot->dirty_bitmap);
         }
   }
   EXPORT_SYMBOL_GPL(mark_page_dirty_in_slot);
@@@ -2674,7 -2659,7 +2675,7 @@@ void mark_page_dirty(struct kvm *kvm, g
         struct kvm_memory_slot *memslot;
   
         memslot = gfn_to_memslot(kvm, gfn);
- -      mark_page_dirty_in_slot(memslot, gfn);
+ +      mark_page_dirty_in_slot(kvm, memslot, gfn);
   }
   EXPORT_SYMBOL_GPL(mark_page_dirty);
   
@@@ -2683,7 -2668,7 +2684,7 @@@ void kvm_vcpu_mark_page_dirty(struct kv
         struct kvm_memory_slot *memslot;
   
         memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
- -      mark_page_dirty_in_slot(memslot, gfn);
+ +      mark_page_dirty_in_slot(vcpu->kvm, memslot, gfn);
   }
   EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
   
@@@ -3024,17 -3009,6 +3025,17 @@@ void kvm_vcpu_on_spin(struct kvm_vcpu *
   }
   EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
   
+ +static bool kvm_page_in_dirty_ring(struct kvm *kvm, unsigned long pgoff)
+ +{
+ +#if KVM_DIRTY_LOG_PAGE_OFFSET > 0
+ +      return (pgoff >= KVM_DIRTY_LOG_PAGE_OFFSET) &&
+ +          (pgoff < KVM_DIRTY_LOG_PAGE_OFFSET +
+ +           kvm->dirty_ring_size / PAGE_SIZE);
+ +#else
+ +      return false;
+ +#endif
+ +}
+ +
   static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf)
   {
         struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data;
@@@ -3050,10 -3024,6 +3051,10 @@@
         else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
                 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
   #endif
+ +      else if (kvm_page_in_dirty_ring(vcpu->kvm, vmf->pgoff))
+ +              page = kvm_dirty_ring_get_page(
+ +                  &vcpu->dirty_ring,
+ +                  vmf->pgoff - KVM_DIRTY_LOG_PAGE_OFFSET);
         else
                 return kvm_arch_vcpu_fault(vcpu, vmf);
         get_page(page);
@@@ -3067,14 -3037,6 +3068,14 @@@ static const struct vm_operations_struc
   
   static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
   {
+ +      struct kvm_vcpu *vcpu = file->private_data;
+ +      unsigned long pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ +
+ +      if ((kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff) ||
+ +           kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff + pages - 1)) &&
+ +          ((vma->vm_flags & VM_EXEC) || !(vma->vm_flags & VM_SHARED)))
+ +              return -EINVAL;
+ +
         vma->vm_ops = &kvm_vcpu_vm_ops;
         return 0;
   }
@@@ -3155,7 -3117,7 +3156,7 @@@ static int kvm_vm_ioctl_create_vcpu(str
         }
   
         BUILD_BUG_ON(sizeof(struct kvm_run) > PAGE_SIZE);
- -      page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ +      page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
         if (!page) {
                 r = -ENOMEM;
                 goto vcpu_free;
@@@ -3168,13 -3130,6 +3169,13 @@@
         if (r)
                 goto vcpu_free_run_page;
   
+ +      if (kvm->dirty_ring_size) {
+ +              r = kvm_dirty_ring_alloc(&vcpu->dirty_ring,
+ +                                       id, kvm->dirty_ring_size);
+ +              if (r)
+ +                      goto arch_vcpu_destroy;
+ +      }
+ +
         mutex_lock(&kvm->lock);
         if (kvm_get_vcpu_by_id(kvm, id)) {
                 r = -EEXIST;
@@@ -3208,8 -3163,6 +3209,8 @@@
   
   unlock_vcpu_destroy:
         mutex_unlock(&kvm->lock);
+ +      kvm_dirty_ring_free(&vcpu->dirty_ring);
+ +arch_vcpu_destroy:
         kvm_arch_vcpu_destroy(vcpu);
   vcpu_free_run_page:
         free_page((unsigned long)vcpu->run);
@@@ -3682,78 -3635,12 +3683,78 @@@ static long kvm_vm_ioctl_check_extensio
   #endif
         case KVM_CAP_NR_MEMSLOTS:
                 return KVM_USER_MEM_SLOTS;
+ +      case KVM_CAP_DIRTY_LOG_RING:
+ +#if KVM_DIRTY_LOG_PAGE_OFFSET > 0
+ +              return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn);
+ +#else
+ +              return 0;
+ +#endif
         default:
                 break;
         }
         return kvm_vm_ioctl_check_extension(kvm, arg);
   }
   
+ +static int kvm_vm_ioctl_enable_dirty_log_ring(struct kvm *kvm, u32 size)
+ +{
+ +      int r;
+ +
+ +      if (!KVM_DIRTY_LOG_PAGE_OFFSET)
+ +              return -EINVAL;
+ +
+ +      /* the size should be power of 2 */
+ +      if (!size || (size & (size - 1)))
+ +              return -EINVAL;
+ +
+ +      /* Should be bigger to keep the reserved entries, or a page */
+ +      if (size < kvm_dirty_ring_get_rsvd_entries() *
+ +          sizeof(struct kvm_dirty_gfn) || size < PAGE_SIZE)
+ +              return -EINVAL;
+ +
+ +      if (size > KVM_DIRTY_RING_MAX_ENTRIES *
+ +          sizeof(struct kvm_dirty_gfn))
+ +              return -E2BIG;
+ +
+ +      /* We only allow it to set once */
+ +      if (kvm->dirty_ring_size)
+ +              return -EINVAL;
+ +
+ +      mutex_lock(&kvm->lock);
+ +
+ +      if (kvm->created_vcpus) {
+ +              /* We don't allow to change this value after vcpu created */
+ +              r = -EINVAL;
+ +      } else {
+ +              kvm->dirty_ring_size = size;
+ +              r = 0;
+ +      }
+ +
+ +      mutex_unlock(&kvm->lock);
+ +      return r;
+ +}
+ +
+ +static int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm)
+ +{
+ +      int i;
+ +      struct kvm_vcpu *vcpu;
+ +      int cleared = 0;
+ +
+ +      if (!kvm->dirty_ring_size)
+ +              return -EINVAL;
+ +
+ +      mutex_lock(&kvm->slots_lock);
+ +
+ +      kvm_for_each_vcpu(i, vcpu, kvm)
+ +              cleared += kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring);
+ +
+ +      mutex_unlock(&kvm->slots_lock);
+ +
+ +      if (cleared)
+ +              kvm_flush_remote_tlbs(kvm);
+ +
+ +      return cleared;
+ +}
+ +
   int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm,
                                                   struct kvm_enable_cap *cap)
   {
@@@ -3784,8 -3671,6 +3785,8 @@@ static int kvm_vm_ioctl_enable_cap_gene
                 kvm->max_halt_poll_ns = cap->args[0];
                 return 0;
         }
+ +      case KVM_CAP_DIRTY_LOG_RING:
+ +              return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]);
         default:
                 return kvm_vm_ioctl_enable_cap(kvm, cap);
         }
@@@ -3970,9 -3855,6 +3971,9 @@@ static long kvm_vm_ioctl(struct file *f
         case KVM_CHECK_EXTENSION:
                 r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
                 break;
+ +      case KVM_RESET_DIRTY_RINGS:
+ +              r = kvm_vm_ioctl_reset_dirty_pages(kvm);
+ +              break;
         default:
                 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
         }
author	Marc Zyngier <[email protected]>
	Fri, 12 Feb 2021 14:07:39 +0000 (14:07 +0000)
committer	Marc Zyngier <[email protected]>
	Fri, 12 Feb 2021 14:07:39 +0000 (14:07 +0000)
		1	2
Documentation/virt/kvm/api.rst	patch \|	diff1 \|	diff2 \|	blob \| history
virt/kvm/kvm_main.c	patch \|	diff1 \|	diff2 \|	blob \| history