Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

author Linus Torvalds <[email protected]>

Wed, 8 Oct 2014 09:27:39 +0000 (05:27 -0400)

committer Linus Torvalds <[email protected]>

Wed, 8 Oct 2014 09:27:39 +0000 (05:27 -0400)
author Linus Torvalds <[email protected]>
Wed, 8 Oct 2014 09:27:39 +0000 (05:27 -0400)
committer Linus Torvalds <[email protected]>
Wed, 8 Oct 2014 09:27:39 +0000 (05:27 -0400)
diff --combined arch/powerpc/kvm/book3s_64_mmu_hv.c

index 79294c4c5015ea83771e4ebc849a561bd376144d,81460c5359c0b9121a4c8dcbaf8ddeaa30bb97a6..d40770248b6a10dbb9846b52cbf76de25bfef1d0
--- 1/arch/powerpc/kvm/book3s_64_mmu_hv.c
--- 2/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@@ -62,10 -62,10 +62,10 @@@ long kvmppc_alloc_hpt(struct kvm *kvm, 
         }
   
         kvm->arch.hpt_cma_alloc = 0;
- -      page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
+ +      page = kvm_alloc_hpt(1ul << (order - PAGE_SHIFT));
         if (page) {
                 hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
- -              memset((void *)hpt, 0, (1 << order));
+ +              memset((void *)hpt, 0, (1ul << order));
                 kvm->arch.hpt_cma_alloc = 1;
         }
   
@@@ -1002,11 -1002,11 +1002,11 @@@ static int kvm_age_rmapp(struct kvm *kv
         return ret;
   }
   
- int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva)
+ int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
   {
         if (!kvm->arch.using_mmu_notifiers)
                 return 0;
-       return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+       return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp);
   }
   
   static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
diff --combined arch/s390/include/asm/pgtable.h

index 5efb2fe186e78275faa6ecc223b94a8c96c86a70,0242588ded678b8f0e196579d5799c8a1415e9e4..b7054356cc9843ba4e80d17bc38614614f3361e3
--- 1/arch/s390/include/asm/pgtable.h
--- 2/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@@ -30,6 -30,7 +30,7 @@@
   #include <linux/sched.h>
   #include <linux/mm_types.h>
   #include <linux/page-flags.h>
+ #include <linux/radix-tree.h>
   #include <asm/bug.h>
   #include <asm/page.h>
   
@@@ -789,82 -790,67 +790,67 @@@ static inline pgste_t pgste_set_pte(pte
   
   /**
    * struct gmap_struct - guest address space
+  * @crst_list: list of all crst tables used in the guest address space
    * @mm: pointer to the parent mm_struct
+  * @guest_to_host: radix tree with guest to host address translation
+  * @host_to_guest: radix tree with pointer to segment table entries
+  * @guest_table_lock: spinlock to protect all entries in the guest page table
    * @table: pointer to the page directory
    * @asce: address space control element for gmap page table
-  * @crst_list: list of all crst tables used in the guest address space
    * @pfault_enabled: defines if pfaults are applicable for the guest
    */
   struct gmap {
         struct list_head list;
+       struct list_head crst_list;
         struct mm_struct *mm;
+       struct radix_tree_root guest_to_host;
+       struct radix_tree_root host_to_guest;
+       spinlock_t guest_table_lock;
         unsigned long *table;
         unsigned long asce;
+       unsigned long asce_end;
         void *private;
-       struct list_head crst_list;
         bool pfault_enabled;
   };
   
- /**
-  * struct gmap_rmap - reverse mapping for segment table entries
-  * @gmap: pointer to the gmap_struct
-  * @entry: pointer to a segment table entry
-  * @vmaddr: virtual address in the guest address space
-  */
- struct gmap_rmap {
-       struct list_head list;
-       struct gmap *gmap;
-       unsigned long *entry;
-       unsigned long vmaddr;
- };
- 
- /**
-  * struct gmap_pgtable - gmap information attached to a page table
-  * @vmaddr: address of the 1MB segment in the process virtual memory
-  * @mapper: list of segment table entries mapping a page table
-  */
- struct gmap_pgtable {
-       unsigned long vmaddr;
-       struct list_head mapper;
- };
- 
   /**
    * struct gmap_notifier - notify function block for page invalidation
    * @notifier_call: address of callback function
    */
   struct gmap_notifier {
         struct list_head list;
-       void (*notifier_call)(struct gmap *gmap, unsigned long address);
+       void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
   };
   
- struct gmap *gmap_alloc(struct mm_struct *mm);
+ struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit);
   void gmap_free(struct gmap *gmap);
   void gmap_enable(struct gmap *gmap);
   void gmap_disable(struct gmap *gmap);
   int gmap_map_segment(struct gmap *gmap, unsigned long from,
                      unsigned long to, unsigned long len);
   int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
- unsigned long __gmap_translate(unsigned long address, struct gmap *);
- unsigned long gmap_translate(unsigned long address, struct gmap *);
- unsigned long __gmap_fault(unsigned long address, struct gmap *);
- unsigned long gmap_fault(unsigned long address, struct gmap *);
- void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
- void __gmap_zap(unsigned long address, struct gmap *);
+ unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
+ unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
+ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
+ int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
+ void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
+ void __gmap_zap(struct gmap *, unsigned long gaddr);
   bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
   
   
   void gmap_register_ipte_notifier(struct gmap_notifier *);
   void gmap_unregister_ipte_notifier(struct gmap_notifier *);
   int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
- void gmap_do_ipte_notify(struct mm_struct *, pte_t *);
+ void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
   
   static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
+                                       unsigned long addr,
                                         pte_t *ptep, pgste_t pgste)
   {
   #ifdef CONFIG_PGSTE
         if (pgste_val(pgste) & PGSTE_IN_BIT) {
                 pgste_val(pgste) &= ~PGSTE_IN_BIT;
-               gmap_do_ipte_notify(mm, ptep);
+               gmap_do_ipte_notify(mm, addr, ptep);
         }
   #endif
         return pgste;
@@@ -1110,7 -1096,7 +1096,7 @@@ static inline int ptep_test_and_clear_u
         pgste_val(pgste) &= ~PGSTE_UC_BIT;
         pte = *ptep;
         if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
-               pgste = pgste_ipte_notify(mm, ptep, pgste);
+               pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
                 __ptep_ipte(addr, ptep);
                 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
                         pte_val(pte) |= _PAGE_PROTECT;
@@@ -1127,21 -1113,20 +1113,21 @@@ static inline int ptep_test_and_clear_y
                                             unsigned long addr, pte_t *ptep)
   {
         pgste_t pgste;
- -      pte_t pte;
+ +      pte_t pte, oldpte;
         int young;
   
         if (mm_has_pgste(vma->vm_mm)) {
                 pgste = pgste_get_lock(ptep);
-               pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
+               pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
         }
   
- -      pte = *ptep;
+ +      oldpte = pte = *ptep;
         ptep_flush_direct(vma->vm_mm, addr, ptep);
         young = pte_young(pte);
         pte = pte_mkold(pte);
   
         if (mm_has_pgste(vma->vm_mm)) {
+ +              pgste = pgste_update_all(&oldpte, pgste, vma->vm_mm);
                 pgste = pgste_set_pte(ptep, pgste, pte);
                 pgste_set_unlock(ptep, pgste);
         } else
@@@ -1179,7 -1164,7 +1165,7 @@@ static inline pte_t ptep_get_and_clear(
   
         if (mm_has_pgste(mm)) {
                 pgste = pgste_get_lock(ptep);
-               pgste = pgste_ipte_notify(mm, ptep, pgste);
+               pgste = pgste_ipte_notify(mm, address, ptep, pgste);
         }
   
         pte = *ptep;
@@@ -1203,7 -1188,7 +1189,7 @@@ static inline pte_t ptep_modify_prot_st
   
         if (mm_has_pgste(mm)) {
                 pgste = pgste_get_lock(ptep);
-               pgste_ipte_notify(mm, ptep, pgste);
+               pgste_ipte_notify(mm, address, ptep, pgste);
         }
   
         pte = *ptep;
@@@ -1240,7 -1225,7 +1226,7 @@@ static inline pte_t ptep_clear_flush(st
   
         if (mm_has_pgste(vma->vm_mm)) {
                 pgste = pgste_get_lock(ptep);
-               pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
+               pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
         }
   
         pte = *ptep;
@@@ -1274,7 -1259,7 +1260,7 @@@ static inline pte_t ptep_get_and_clear_
   
         if (!full && mm_has_pgste(mm)) {
                 pgste = pgste_get_lock(ptep);
-               pgste = pgste_ipte_notify(mm, ptep, pgste);
+               pgste = pgste_ipte_notify(mm, address, ptep, pgste);
         }
   
         pte = *ptep;
@@@ -1299,7 -1284,7 +1285,7 @@@ static inline pte_t ptep_set_wrprotect(
         if (pte_write(pte)) {
                 if (mm_has_pgste(mm)) {
                         pgste = pgste_get_lock(ptep);
-                       pgste = pgste_ipte_notify(mm, ptep, pgste);
+                       pgste = pgste_ipte_notify(mm, address, ptep, pgste);
                 }
   
                 ptep_flush_lazy(mm, address, ptep);
@@@ -1325,13 -1310,12 +1311,13 @@@ static inline int ptep_set_access_flags
                 return 0;
         if (mm_has_pgste(vma->vm_mm)) {
                 pgste = pgste_get_lock(ptep);
-               pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
+               pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
         }
   
         ptep_flush_direct(vma->vm_mm, address, ptep);
   
         if (mm_has_pgste(vma->vm_mm)) {
+ +              pgste_set_key(ptep, pgste, entry, vma->vm_mm);
                 pgste = pgste_set_pte(ptep, pgste, entry);
                 pgste_set_unlock(ptep, pgste);
         } else
diff --combined arch/x86/include/asm/cpufeature.h

index 516903b98e06f5d9c20be87e07231a682714d287,2075e6c34c78ce9bc6ae4fcfc0f865ab34350626..094292a63e74b0c5b4378a7d94b2890c811e2da0
--- 1/arch/x86/include/asm/cpufeature.h
--- 2/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@@ -202,6 -202,7 +202,7 @@@
   #define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */
   #define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */
   #define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */
+ #define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
   
   
   /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
@@@ -250,15 -251,8 +251,15 @@@
   #include <asm/asm.h>
   #include <linux/bitops.h>
   
+ +#ifdef CONFIG_X86_FEATURE_NAMES
   extern const char * const x86_cap_flags[NCAPINTS*32];
   extern const char * const x86_power_flags[32];
+ +#define X86_CAP_FMT "%s"
+ +#define x86_cap_flag(flag) x86_cap_flags[flag]
+ +#else
+ +#define X86_CAP_FMT "%d:%d"
+ +#define x86_cap_flag(flag) ((flag) >> 5), ((flag) & 31)
+ +#endif
   
   /*
    * In order to save room, we index into this array by doing
diff --combined virt/kvm/kvm_main.c

index 95519bc959edcaaefe27d1fd9e1d7a51aa52b6e2,39a02fbdb5725ef2d2145f17c6f638db1362152c..384eaa7b02fa993f77981d6c91e6dd7f039c1f34
--- 1/virt/kvm/kvm_main.c
--- 2/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@@ -52,11 -52,13 +52,13 @@@
   
   #include <asm/processor.h>
   #include <asm/io.h>
+ #include <asm/ioctl.h>
   #include <asm/uaccess.h>
   #include <asm/pgtable.h>
   
   #include "coalesced_mmio.h"
   #include "async_pf.h"
+ #include "vfio.h"
   
   #define CREATE_TRACE_POINTS
   #include <trace/events/kvm.h>
@@@ -95,8 -97,6 +97,6 @@@ static int hardware_enable_all(void)
   static void hardware_disable_all(void);
   
   static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
- static void update_memslots(struct kvm_memslots *slots,
-                           struct kvm_memory_slot *new, u64 last_generation);
   
   static void kvm_release_pfn_dirty(pfn_t pfn);
   static void mark_page_dirty_in_slot(struct kvm *kvm,
@@@ -110,7 -110,7 +110,7 @@@ static bool largepages_enabled = true
   bool kvm_is_mmio_pfn(pfn_t pfn)
   {
         if (pfn_valid(pfn))
- -              return PageReserved(pfn_to_page(pfn));
+ +              return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
   
         return true;
   }
@@@ -129,7 -129,8 +129,8 @@@ int vcpu_load(struct kvm_vcpu *vcpu
                 struct pid *oldpid = vcpu->pid;
                 struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
                 rcu_assign_pointer(vcpu->pid, newpid);
-               synchronize_rcu();
+               if (oldpid)
+                       synchronize_rcu();
                 put_pid(oldpid);
         }
         cpu = get_cpu();
@@@ -152,7 -153,7 +153,7 @@@ static void ack_flush(void *_completed
   {
   }
   
- static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
+ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
   {
         int i, cpu, me;
         cpumask_var_t cpus;
@@@ -189,7 -190,7 +190,7 @@@ void kvm_flush_remote_tlbs(struct kvm *
         long dirty_count = kvm->tlbs_dirty;
   
         smp_mb();
-       if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
+       if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
                 ++kvm->stat.remote_tlb_flush;
         cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
   }
@@@ -197,17 -198,17 +198,17 @@@ EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs
   
   void kvm_reload_remote_mmus(struct kvm *kvm)
   {
-       make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+       kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
   }
   
   void kvm_make_mclock_inprogress_request(struct kvm *kvm)
   {
-       make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
+       kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
   }
   
   void kvm_make_scan_ioapic_request(struct kvm *kvm)
   {
-       make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
+       kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
   }
   
   int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
@@@ -295,6 -296,9 +296,9 @@@ static void kvm_mmu_notifier_invalidate
                 kvm_flush_remote_tlbs(kvm);
   
         spin_unlock(&kvm->mmu_lock);
+ 
+       kvm_arch_mmu_notifier_invalidate_page(kvm, address);
+ 
         srcu_read_unlock(&kvm->srcu, idx);
   }
   
@@@ -368,7 -372,8 +372,8 @@@ static void kvm_mmu_notifier_invalidate
   
   static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
                                               struct mm_struct *mm,
-                                             unsigned long address)
+                                             unsigned long start,
+                                             unsigned long end)
   {
         struct kvm *kvm = mmu_notifier_to_kvm(mn);
         int young, idx;
@@@ -376,7 -381,7 +381,7 @@@
         idx = srcu_read_lock(&kvm->srcu);
         spin_lock(&kvm->mmu_lock);
   
-       young = kvm_age_hva(kvm, address);
+       young = kvm_age_hva(kvm, start, end);
         if (young)
                 kvm_flush_remote_tlbs(kvm);
   
@@@ -476,6 -481,13 +481,13 @@@ static struct kvm *kvm_create_vm(unsign
         kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
         if (!kvm->memslots)
                 goto out_err_no_srcu;
+ 
+       /*
+        * Init kvm generation close to the maximum to easily test the
+        * code of handling generation number wrap-around.
+        */
+       kvm->memslots->generation = -150;
+ 
         kvm_init_memslots_id(kvm);
         if (init_srcu_struct(&kvm->srcu))
                 goto out_err_no_srcu;
@@@ -687,8 -699,7 +699,7 @@@ static void sort_memslots(struct kvm_me
   }
   
   static void update_memslots(struct kvm_memslots *slots,
-                           struct kvm_memory_slot *new,
-                           u64 last_generation)
+                           struct kvm_memory_slot *new)
   {
         if (new) {
                 int id = new->id;
@@@ -699,15 -710,13 +710,13 @@@
                 if (new->npages != npages)
                         sort_memslots(slots);
         }
- 
-       slots->generation = last_generation + 1;
   }
   
   static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
   {
         u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
   
- #ifdef KVM_CAP_READONLY_MEM
+ #ifdef __KVM_HAVE_READONLY_MEM
         valid_flags |= KVM_MEM_READONLY;
   #endif
   
@@@ -722,10 -731,24 +731,24 @@@ static struct kvm_memslots *install_new
   {
         struct kvm_memslots *old_memslots = kvm->memslots;
   
-       update_memslots(slots, new, kvm->memslots->generation);
+       /*
+        * Set the low bit in the generation, which disables SPTE caching
+        * until the end of synchronize_srcu_expedited.
+        */
+       WARN_ON(old_memslots->generation & 1);
+       slots->generation = old_memslots->generation + 1;
+ 
+       update_memslots(slots, new);
         rcu_assign_pointer(kvm->memslots, slots);
         synchronize_srcu_expedited(&kvm->srcu);
   
+       /*
+        * Increment the new memslot generation a second time. This prevents
+        * vm exits that race with memslot updates from caching a memslot
+        * generation that will (potentially) be valid forever.
+        */
+       slots->generation++;
+ 
         kvm_arch_memslots_updated(kvm);
   
         return old_memslots;
@@@ -776,7 -799,6 +799,6 @@@ int __kvm_set_memory_region(struct kvm 
         base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
         npages = mem->memory_size >> PAGE_SHIFT;
   
-       r = -EINVAL;
         if (npages > KVM_MEM_MAX_NR_PAGES)
                 goto out;
   
@@@ -790,7 -812,6 +812,6 @@@
         new.npages = npages;
         new.flags = mem->flags;
   
-       r = -EINVAL;
         if (npages) {
                 if (!old.npages)
                         change = KVM_MR_CREATE;
@@@ -846,7 -867,6 +867,6 @@@
         }
   
         if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
-               r = -ENOMEM;
                 slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
                                 GFP_KERNEL);
                 if (!slots)
@@@ -1075,9 -1095,9 +1095,9 @@@ EXPORT_SYMBOL_GPL(gfn_to_hva)
    * If writable is set to false, the hva returned by this function is only
    * allowed to be read.
    */
- unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+ unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
+                                     gfn_t gfn, bool *writable)
   {
-       struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
         unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
   
         if (!kvm_is_error_hva(hva) && writable)
@@@ -1086,6 -1106,13 +1106,13 @@@
         return hva;
   }
   
+ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+ {
+       struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+ 
+       return gfn_to_hva_memslot_prot(slot, gfn, writable);
+ }
+ 
   static int kvm_read_hva(void *data, void __user *hva, int len)
   {
         return __copy_from_user(data, hva, len);
@@@ -1107,6 -1134,43 +1134,43 @@@ static int get_user_page_nowait(struct 
         return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL);
   }
   
+ int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
+                        unsigned long addr, bool write_fault,
+                        struct page **pagep)
+ {
+       int npages;
+       int locked = 1;
+       int flags = FOLL_TOUCH | FOLL_HWPOISON |
+                   (pagep ? FOLL_GET : 0) |
+                   (write_fault ? FOLL_WRITE : 0);
+ 
+       /*
+        * If retrying the fault, we get here *not* having allowed the filemap
+        * to wait on the page lock. We should now allow waiting on the IO with
+        * the mmap semaphore released.
+        */
+       down_read(&mm->mmap_sem);
+       npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL,
+                                 &locked);
+       if (!locked) {
+               VM_BUG_ON(npages);
+ 
+               if (!pagep)
+                       return 0;
+ 
+               /*
+                * The previous call has now waited on the IO. Now we can
+                * retry and complete. Pass TRIED to ensure we do not re
+                * schedule async IO (see e.g. filemap_fault).
+                */
+               down_read(&mm->mmap_sem);
+               npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED,
+                                         pagep, NULL, NULL);
+       }
+       up_read(&mm->mmap_sem);
+       return npages;
+ }
+ 
   static inline int check_user_page_hwpoison(unsigned long addr)
   {
         int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE;
@@@ -1169,9 -1233,15 +1233,15 @@@ static int hva_to_pfn_slow(unsigned lon
                 npages = get_user_page_nowait(current, current->mm,
                                               addr, write_fault, page);
                 up_read(&current->mm->mmap_sem);
-       } else
-               npages = get_user_pages_fast(addr, 1, write_fault,
-                                            page);
+       } else {
+               /*
+                * By now we have tried gup_fast, and possibly async_pf, and we
+                * are certainly not atomic. Time to retry the gup, allowing
+                * mmap semaphore to be relinquished in the case of IO.
+                */
+               npages = kvm_get_user_page_io(current, current->mm, addr,
+                                             write_fault, page);
+       }
         if (npages != 1)
                 return npages;
   
@@@ -1725,7 -1795,7 +1795,7 @@@ int kvm_vcpu_yield_to(struct kvm_vcpu *
         rcu_read_lock();
         pid = rcu_dereference(target->pid);
         if (pid)
- -              task = get_pid_task(target->pid, PIDTYPE_PID);
+ +              task = get_pid_task(pid, PIDTYPE_PID);
         rcu_read_unlock();
         if (!task)
                 return ret;
@@@ -1768,8 -1838,7 +1838,7 @@@ static bool kvm_vcpu_eligible_for_direc
         bool eligible;
   
         eligible = !vcpu->spin_loop.in_spin_loop ||
-                       (vcpu->spin_loop.in_spin_loop &&
-                        vcpu->spin_loop.dy_eligible);
+                   vcpu->spin_loop.dy_eligible;
   
         if (vcpu->spin_loop.in_spin_loop)
                 kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
@@@ -1975,6 -2044,9 +2044,9 @@@ static long kvm_vcpu_ioctl(struct file 
         if (vcpu->kvm->mm != current->mm)
                 return -EIO;
   
+       if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
+               return -EINVAL;
+ 
   #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
         /*
          * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
@@@ -2259,6 -2331,29 +2331,29 @@@ struct kvm_device *kvm_device_from_filp
         return filp->private_data;
   }
   
+ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
+ #ifdef CONFIG_KVM_MPIC
+       [KVM_DEV_TYPE_FSL_MPIC_20]      = &kvm_mpic_ops,
+       [KVM_DEV_TYPE_FSL_MPIC_42]      = &kvm_mpic_ops,
+ #endif
+ 
+ #ifdef CONFIG_KVM_XICS
+       [KVM_DEV_TYPE_XICS]             = &kvm_xics_ops,
+ #endif
+ };
+ 
+ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
+ {
+       if (type >= ARRAY_SIZE(kvm_device_ops_table))
+               return -ENOSPC;
+ 
+       if (kvm_device_ops_table[type] != NULL)
+               return -EEXIST;
+ 
+       kvm_device_ops_table[type] = ops;
+       return 0;
+ }
+ 
   static int kvm_ioctl_create_device(struct kvm *kvm,
                                    struct kvm_create_device *cd)
   {
@@@ -2267,36 -2362,12 +2362,12 @@@
         bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
         int ret;
   
-       switch (cd->type) {
- #ifdef CONFIG_KVM_MPIC
-       case KVM_DEV_TYPE_FSL_MPIC_20:
-       case KVM_DEV_TYPE_FSL_MPIC_42:
-               ops = &kvm_mpic_ops;
-               break;
- #endif
- #ifdef CONFIG_KVM_XICS
-       case KVM_DEV_TYPE_XICS:
-               ops = &kvm_xics_ops;
-               break;
- #endif
- #ifdef CONFIG_KVM_VFIO
-       case KVM_DEV_TYPE_VFIO:
-               ops = &kvm_vfio_ops;
-               break;
- #endif
- #ifdef CONFIG_KVM_ARM_VGIC
-       case KVM_DEV_TYPE_ARM_VGIC_V2:
-               ops = &kvm_arm_vgic_v2_ops;
-               break;
- #endif
- #ifdef CONFIG_S390
-       case KVM_DEV_TYPE_FLIC:
-               ops = &kvm_flic_ops;
-               break;
- #endif
-       default:
+       if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
+               return -ENODEV;
+ 
+       ops = kvm_device_ops_table[cd->type];
+       if (ops == NULL)
                 return -ENODEV;
-       }
   
         if (test)
                 return 0;
@@@ -2611,7 -2682,6 +2682,6 @@@ static long kvm_dev_ioctl(struct file *
   
         switch (ioctl) {
         case KVM_GET_API_VERSION:
-               r = -EINVAL;
                 if (arg)
                         goto out;
                 r = KVM_API_VERSION;
@@@ -2623,7 -2693,6 +2693,6 @@@
                 r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
                 break;
         case KVM_GET_VCPU_MMAP_SIZE:
-               r = -EINVAL;
                 if (arg)
                         goto out;
                 r = PAGE_SIZE;     /* struct kvm_run */
@@@ -2668,7 -2737,7 +2737,7 @@@ static void hardware_enable_nolock(voi
   
         cpumask_set_cpu(cpu, cpus_hardware_enabled);
   
-       r = kvm_arch_hardware_enable(NULL);
+       r = kvm_arch_hardware_enable();
   
         if (r) {
                 cpumask_clear_cpu(cpu, cpus_hardware_enabled);
@@@ -2693,7 -2762,7 +2762,7 @@@ static void hardware_disable_nolock(voi
         if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
                 return;
         cpumask_clear_cpu(cpu, cpus_hardware_enabled);
-       kvm_arch_hardware_disable(NULL);
+       kvm_arch_hardware_disable();
   }
   
   static void hardware_disable(void)
@@@ -3123,6 -3192,8 +3192,8 @@@ static void kvm_sched_in(struct preempt
         if (vcpu->preempted)
                 vcpu->preempted = false;
   
+       kvm_arch_sched_in(vcpu, cpu);
+ 
         kvm_arch_vcpu_load(vcpu, cpu);
   }
   
@@@ -3214,6 -3285,9 +3285,9 @@@ int kvm_init(void *opaque, unsigned vcp
                 goto out_undebugfs;
         }
   
+       r = kvm_vfio_ops_init();
+       WARN_ON(r);
+ 
         return 0;
   
   out_undebugfs:
author	Linus Torvalds <[email protected]>
	Wed, 8 Oct 2014 09:27:39 +0000 (05:27 -0400)
committer	Linus Torvalds <[email protected]>
	Wed, 8 Oct 2014 09:27:39 +0000 (05:27 -0400)
		1	2
arch/powerpc/kvm/book3s_64_mmu_hv.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/include/asm/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/cpufeature.h	patch \|	diff1 \|	diff2 \|	blob \| history
virt/kvm/kvm_main.c	patch \|	diff1 \|	diff2 \|	blob \| history