Merge branch 'stable/xen-swiotlb-0.8.6' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <[email protected]>

Thu, 12 Aug 2010 16:09:41 +0000 (09:09 -0700)

committer Linus Torvalds <[email protected]>

Thu, 12 Aug 2010 16:09:41 +0000 (09:09 -0700)
author Linus Torvalds <[email protected]>
Thu, 12 Aug 2010 16:09:41 +0000 (09:09 -0700)
committer Linus Torvalds <[email protected]>
Thu, 12 Aug 2010 16:09:41 +0000 (09:09 -0700)
diff --combined arch/x86/xen/Makefile

index 930954685980bbdbbf58af280b4729c85211f596,32af238055c3fd98f287fdf9a1804ab45914694f..779385158915ee1055826205ea331ab7e973ff6a
--- 1/arch/x86/xen/Makefile
--- 2/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@@ -12,9 -12,10 +12,10 @@@ CFLAGS_mmu.o                        := $(nostackp
   
   obj-y         := enlighten.o setup.o multicalls.o mmu.o irq.o \
                         time.o xen-asm.o xen-asm_$(BITS).o \
- -                      grant-table.o suspend.o
+ +                      grant-table.o suspend.o platform-pci-unplug.o
   
   obj-$(CONFIG_SMP)             += smp.o
   obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
   obj-$(CONFIG_XEN_DEBUG_FS)    += debugfs.o
   
+ obj-$(CONFIG_SWIOTLB_XEN)     += pci-swiotlb-xen.o
diff --combined arch/x86/xen/enlighten.c

index d4ff5e83621d14d75e1fe3f7632fe09a5ab8670c,3254e8bc4cd7718507025526da9e65535bc80699..7d46c84414188bf401777e4d65a9652e5cc09fa5
--- 1/arch/x86/xen/enlighten.c
--- 2/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@@ -11,7 -11,6 +11,7 @@@
    * Jeremy Fitzhardinge <[email protected]>, XenSource Inc, 2007
    */
   
+ +#include <linux/cpu.h>
   #include <linux/kernel.h>
   #include <linux/init.h>
   #include <linux/smp.h>
@@@ -36,10 -35,8 +36,10 @@@
   #include <xen/interface/version.h>
   #include <xen/interface/physdev.h>
   #include <xen/interface/vcpu.h>
+ +#include <xen/interface/memory.h>
   #include <xen/features.h>
   #include <xen/page.h>
+ +#include <xen/hvm.h>
   #include <xen/hvc-console.h>
   
   #include <asm/paravirt.h>
@@@ -58,9 -55,7 +58,9 @@@
   #include <asm/pgtable.h>
   #include <asm/tlbflush.h>
   #include <asm/reboot.h>
+ +#include <asm/setup.h>
   #include <asm/stackprotector.h>
+ +#include <asm/hypervisor.h>
   
   #include "xen-ops.h"
   #include "mmu.h"
@@@ -81,10 -76,6 +81,10 @@@ struct shared_info xen_dummy_shared_inf
   
   void *xen_initial_gdt;
   
+ +RESERVE_BRK(shared_info_page_brk, PAGE_SIZE);
+ +__read_mostly int xen_have_vector_callback;
+ +EXPORT_SYMBOL_GPL(xen_have_vector_callback);
+ +
   /*
    * Point at some empty memory to start with. We map the real shared_info
    * page as soon as fixmap is up and running.
@@@ -106,14 -97,6 +106,14 @@@ struct shared_info *HYPERVISOR_shared_i
    */
   static int have_vcpu_info_placement = 1;
   
+ +static void clamp_max_cpus(void)
+ +{
+ +#ifdef CONFIG_SMP
+ +      if (setup_max_cpus > MAX_VIRT_CPUS)
+ +              setup_max_cpus = MAX_VIRT_CPUS;
+ +#endif
+ +}
+ +
   static void xen_vcpu_setup(int cpu)
   {
         struct vcpu_register_vcpu_info info;
@@@ -121,17 -104,13 +121,17 @@@
         struct vcpu_info *vcpup;
   
         BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
- -      per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
   
- -      if (!have_vcpu_info_placement)
- -              return;         /* already tested, not available */
+ +      if (cpu < MAX_VIRT_CPUS)
+ +              per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
   
- -      vcpup = &per_cpu(xen_vcpu_info, cpu);
+ +      if (!have_vcpu_info_placement) {
+ +              if (cpu >= MAX_VIRT_CPUS)
+ +                      clamp_max_cpus();
+ +              return;
+ +      }
   
+ +      vcpup = &per_cpu(xen_vcpu_info, cpu);
         info.mfn = arbitrary_virt_to_mfn(vcpup);
         info.offset = offset_in_page(vcpup);
   
@@@ -146,7 -125,6 +146,7 @@@
         if (err) {
                 printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
                 have_vcpu_info_placement = 0;
+ +              clamp_max_cpus();
         } else {
                 /* This cpu is using the registered vcpu info, even if
                    later ones fail to. */
@@@ -753,6 -731,7 +753,6 @@@ static void set_xen_basic_apic_ops(void
   
   #endif
   
- -
   static void xen_clts(void)
   {
         struct multicall_space mcs;
@@@ -947,6 -926,10 +947,6 @@@ static const struct pv_init_ops xen_ini
         .patch = xen_patch,
   };
   
- -static const struct pv_time_ops xen_time_ops __initdata = {
- -      .sched_clock = xen_sched_clock,
- -};
- -
   static const struct pv_cpu_ops xen_cpu_ops __initdata = {
         .cpuid = xen_cpuid,
   
@@@ -1045,23 -1028,6 +1045,23 @@@ static void xen_crash_shutdown(struct p
         xen_reboot(SHUTDOWN_crash);
   }
   
+ +static int
+ +xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
+ +{
+ +      xen_reboot(SHUTDOWN_crash);
+ +      return NOTIFY_DONE;
+ +}
+ +
+ +static struct notifier_block xen_panic_block = {
+ +      .notifier_call= xen_panic_event,
+ +};
+ +
+ +int xen_panic_handler_init(void)
+ +{
+ +      atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
+ +      return 0;
+ +}
+ +
   static const struct machine_ops __initdata xen_machine_ops = {
         .restart = xen_restart,
         .halt = xen_machine_halt,
@@@ -1101,6 -1067,7 +1101,6 @@@ asmlinkage void __init xen_start_kernel
         /* Install Xen paravirt ops */
         pv_info = xen_info;
         pv_init_ops = xen_init_ops;
- -      pv_time_ops = xen_time_ops;
         pv_cpu_ops = xen_cpu_ops;
         pv_apic_ops = xen_apic_ops;
   
@@@ -1108,7 -1075,13 +1108,7 @@@
         x86_init.oem.arch_setup = xen_arch_setup;
         x86_init.oem.banner = xen_banner;
   
- -      x86_init.timers.timer_init = xen_time_init;
- -      x86_init.timers.setup_percpu_clockev = x86_init_noop;
- -      x86_cpuinit.setup_percpu_clockev = x86_init_noop;
- -
- -      x86_platform.calibrate_tsc = xen_tsc_khz;
- -      x86_platform.get_wallclock = xen_get_wallclock;
- -      x86_platform.set_wallclock = xen_set_wallclock;
+ +      xen_init_time_ops();
   
         /*
          * Set up some pagetable state before starting to set any ptes.
@@@ -1172,6 -1145,10 +1172,10 @@@
   
         pgd = (pgd_t *)xen_start_info->pt_base;
   
+       if (!xen_initial_domain())
+               __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
+ 
+       __supported_pte_mask |= _PAGE_IOMAP;
         /* Don't do the full vcpu_info placement stuff until we have a
            possible map and a non-dummy shared_info. */
         per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
@@@ -1233,139 -1210,3 +1237,139 @@@
         x86_64_start_reservations((char *)__pa_symbol(&boot_params));
   #endif
   }
+ +
+ +static uint32_t xen_cpuid_base(void)
+ +{
+ +      uint32_t base, eax, ebx, ecx, edx;
+ +      char signature[13];
+ +
+ +      for (base = 0x40000000; base < 0x40010000; base += 0x100) {
+ +              cpuid(base, &eax, &ebx, &ecx, &edx);
+ +              *(uint32_t *)(signature + 0) = ebx;
+ +              *(uint32_t *)(signature + 4) = ecx;
+ +              *(uint32_t *)(signature + 8) = edx;
+ +              signature[12] = 0;
+ +
+ +              if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2))
+ +                      return base;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +static int init_hvm_pv_info(int *major, int *minor)
+ +{
+ +      uint32_t eax, ebx, ecx, edx, pages, msr, base;
+ +      u64 pfn;
+ +
+ +      base = xen_cpuid_base();
+ +      cpuid(base + 1, &eax, &ebx, &ecx, &edx);
+ +
+ +      *major = eax >> 16;
+ +      *minor = eax & 0xffff;
+ +      printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor);
+ +
+ +      cpuid(base + 2, &pages, &msr, &ecx, &edx);
+ +
+ +      pfn = __pa(hypercall_page);
+ +      wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+ +
+ +      xen_setup_features();
+ +
+ +      pv_info = xen_info;
+ +      pv_info.kernel_rpl = 0;
+ +
+ +      xen_domain_type = XEN_HVM_DOMAIN;
+ +
+ +      return 0;
+ +}
+ +
+ +void xen_hvm_init_shared_info(void)
+ +{
+ +      int cpu;
+ +      struct xen_add_to_physmap xatp;
+ +      static struct shared_info *shared_info_page = 0;
+ +
+ +      if (!shared_info_page)
+ +              shared_info_page = (struct shared_info *)
+ +                      extend_brk(PAGE_SIZE, PAGE_SIZE);
+ +      xatp.domid = DOMID_SELF;
+ +      xatp.idx = 0;
+ +      xatp.space = XENMAPSPACE_shared_info;
+ +      xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
+ +      if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
+ +              BUG();
+ +
+ +      HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
+ +
+ +      /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
+ +       * page, we use it in the event channel upcall and in some pvclock
+ +       * related functions. We don't need the vcpu_info placement
+ +       * optimizations because we don't use any pv_mmu or pv_irq op on
+ +       * HVM.
+ +       * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
+ +       * online but xen_hvm_init_shared_info is run at resume time too and
+ +       * in that case multiple vcpus might be online. */
+ +      for_each_online_cpu(cpu) {
+ +              per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+ +      }
+ +}
+ +
+ +#ifdef CONFIG_XEN_PVHVM
+ +static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
+ +                                  unsigned long action, void *hcpu)
+ +{
+ +      int cpu = (long)hcpu;
+ +      switch (action) {
+ +      case CPU_UP_PREPARE:
+ +              per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+ +              break;
+ +      default:
+ +              break;
+ +      }
+ +      return NOTIFY_OK;
+ +}
+ +
+ +static struct notifier_block __cpuinitdata xen_hvm_cpu_notifier = {
+ +      .notifier_call  = xen_hvm_cpu_notify,
+ +};
+ +
+ +static void __init xen_hvm_guest_init(void)
+ +{
+ +      int r;
+ +      int major, minor;
+ +
+ +      r = init_hvm_pv_info(&major, &minor);
+ +      if (r < 0)
+ +              return;
+ +
+ +      xen_hvm_init_shared_info();
+ +
+ +      if (xen_feature(XENFEAT_hvm_callback_vector))
+ +              xen_have_vector_callback = 1;
+ +      register_cpu_notifier(&xen_hvm_cpu_notifier);
+ +      xen_unplug_emulated_devices();
+ +      have_vcpu_info_placement = 0;
+ +      x86_init.irqs.intr_init = xen_init_IRQ;
+ +      xen_hvm_init_time_ops();
+ +      xen_hvm_init_mmu_ops();
+ +}
+ +
+ +static bool __init xen_hvm_platform(void)
+ +{
+ +      if (xen_pv_domain())
+ +              return false;
+ +
+ +      if (!xen_cpuid_base())
+ +              return false;
+ +
+ +      return true;
+ +}
+ +
+ +const __refconst struct hypervisor_x86 x86_hyper_xen_hvm = {
+ +      .name                   = "Xen HVM",
+ +      .detect                 = xen_hvm_platform,
+ +      .init_platform          = xen_hvm_guest_init,
+ +};
+ +EXPORT_SYMBOL(x86_hyper_xen_hvm);
+ +#endif
diff --combined arch/x86/xen/mmu.c

index 413b19b3d0fe5322ebe5a492d432eb132304aadf,ef5728dde8f39cf77ffa96d54cac44bba3130fcc..42086ac406af21da6d281687625684c0dddbe11c
--- 1/arch/x86/xen/mmu.c
--- 2/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@@ -42,6 -42,7 +42,7 @@@
   #include <linux/highmem.h>
   #include <linux/debugfs.h>
   #include <linux/bug.h>
+ #include <linux/vmalloc.h>
   #include <linux/module.h>
   #include <linux/gfp.h>
   
@@@ -51,15 -52,18 +52,19 @@@
   #include <asm/mmu_context.h>
   #include <asm/setup.h>
   #include <asm/paravirt.h>
+ #include <asm/e820.h>
   #include <asm/linkage.h>
+ #include <asm/page.h>
   
   #include <asm/xen/hypercall.h>
   #include <asm/xen/hypervisor.h>
   
+ #include <xen/xen.h>
   #include <xen/page.h>
   #include <xen/interface/xen.h>
+ +#include <xen/interface/hvm/hvm_op.h>
   #include <xen/interface/version.h>
+ #include <xen/interface/memory.h>
   #include <xen/hvc-console.h>
   
   #include "multicalls.h"
@@@ -68,6 -72,13 +73,13 @@@
   
   #define MMU_UPDATE_HISTO      30
   
+ /*
+  * Protects atomic reservation decrease/increase against concurrent increases.
+  * Also protects non-atomic updates of current_pages and driver_pages, and
+  * balloon lists.
+  */
+ DEFINE_SPINLOCK(xen_reservation_lock);
+ 
   #ifdef CONFIG_XEN_DEBUG_FS
   
   static struct {
@@@ -378,6 -389,28 +390,28 @@@ static bool xen_page_pinned(void *ptr
         return PagePinned(page);
   }
   
+ static bool xen_iomap_pte(pte_t pte)
+ {
+       return pte_flags(pte) & _PAGE_IOMAP;
+ }
+ 
+ static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
+ {
+       struct multicall_space mcs;
+       struct mmu_update *u;
+ 
+       mcs = xen_mc_entry(sizeof(*u));
+       u = mcs.args;
+ 
+       /* ptep might be kmapped when using 32-bit HIGHPTE */
+       u->ptr = arbitrary_virt_to_machine(ptep).maddr;
+       u->val = pte_val_ma(pteval);
+ 
+       MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_IO);
+ 
+       xen_mc_issue(PARAVIRT_LAZY_MMU);
+ }
+ 
   static void xen_extend_mmu_update(const struct mmu_update *update)
   {
         struct multicall_space mcs;
@@@ -454,6 -487,11 +488,11 @@@ void set_pte_mfn(unsigned long vaddr, u
   void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
                     pte_t *ptep, pte_t pteval)
   {
+       if (xen_iomap_pte(pteval)) {
+               xen_set_iomap_pte(ptep, pteval);
+               goto out;
+       }
+ 
         ADD_STATS(set_pte_at, 1);
   //    ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
         ADD_STATS(set_pte_at_current, mm == current->mm);
@@@ -524,8 -562,25 +563,25 @@@ static pteval_t pte_pfn_to_mfn(pteval_
         return val;
   }
   
+ static pteval_t iomap_pte(pteval_t val)
+ {
+       if (val & _PAGE_PRESENT) {
+               unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
+               pteval_t flags = val & PTE_FLAGS_MASK;
+ 
+               /* We assume the pte frame number is a MFN, so
+                  just use it as-is. */
+               val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
+       }
+ 
+       return val;
+ }
+ 
   pteval_t xen_pte_val(pte_t pte)
   {
+       if (xen_initial_domain() && (pte.pte & _PAGE_IOMAP))
+               return pte.pte;
+ 
         return pte_mfn_to_pfn(pte.pte);
   }
   PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
@@@ -538,7 -593,22 +594,22 @@@ PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val)
   
   pte_t xen_make_pte(pteval_t pte)
   {
-       pte = pte_pfn_to_mfn(pte);
+       phys_addr_t addr = (pte & PTE_PFN_MASK);
+ 
+       /*
+        * Unprivileged domains are allowed to do IOMAPpings for
+        * PCI passthrough, but not map ISA space.  The ISA
+        * mappings are just dummy local mappings to keep other
+        * parts of the kernel happy.
+        */
+       if (unlikely(pte & _PAGE_IOMAP) &&
+           (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
+               pte = iomap_pte(pte);
+       } else {
+               pte &= ~_PAGE_IOMAP;
+               pte = pte_pfn_to_mfn(pte);
+       }
+ 
         return native_make_pte(pte);
   }
   PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
@@@ -594,6 -664,11 +665,11 @@@ void xen_set_pud(pud_t *ptr, pud_t val
   
   void xen_set_pte(pte_t *ptep, pte_t pte)
   {
+       if (xen_iomap_pte(pte)) {
+               xen_set_iomap_pte(ptep, pte);
+               return;
+       }
+ 
         ADD_STATS(pte_update, 1);
   //    ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
         ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
@@@ -610,6 -685,11 +686,11 @@@
   #ifdef CONFIG_X86_PAE
   void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
   {
+       if (xen_iomap_pte(pte)) {
+               xen_set_iomap_pte(ptep, pte);
+               return;
+       }
+ 
         set_64bit((u64 *)ptep, native_pte_val(pte));
   }
   
@@@ -936,8 -1016,6 +1017,6 @@@ static int xen_pin_page(struct mm_struc
      read-only, and can be pinned. */
   static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
   {
-       vm_unmap_aliases();
- 
         xen_mc_batch();
   
         if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
@@@ -1501,7 -1579,6 +1580,6 @@@ static void xen_alloc_ptpage(struct mm_
         if (PagePinned(virt_to_page(mm->pgd))) {
                 SetPagePinned(page);
   
-               vm_unmap_aliases();
                 if (!PageHighMem(page)) {
                         make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
                         if (level == PT_PTE && USE_SPLIT_PTLOCKS)
@@@ -1812,9 -1889,16 +1890,16 @@@ static void xen_set_fixmap(unsigned idx
                 pte = pfn_pte(phys, prot);
                 break;
   
-       default:
+       case FIX_PARAVIRT_BOOTMAP:
+               /* This is an MFN, but it isn't an IO mapping from the
+                  IO domain */
                 pte = mfn_pte(phys, prot);
                 break;
+ 
+       default:
+               /* By default, set_fixmap is used for hardware mappings */
+               pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP));
+               break;
         }
   
         __native_set_fixmap(idx, pte);
@@@ -1940,42 -2024,206 +2025,240 @@@ void __init xen_init_mmu_ops(void
         x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
         x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
         pv_mmu_ops = xen_mmu_ops;
+ 
+       vmap_lazy_unmap = false;
+ }
+ 
+ /* Protected by xen_reservation_lock. */
+ #define MAX_CONTIG_ORDER 9 /* 2MB */
+ static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
+ 
+ #define VOID_PTE (mfn_pte(0, __pgprot(0)))
+ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
+                               unsigned long *in_frames,
+                               unsigned long *out_frames)
+ {
+       int i;
+       struct multicall_space mcs;
+ 
+       xen_mc_batch();
+       for (i = 0; i < (1UL<<order); i++, vaddr += PAGE_SIZE) {
+               mcs = __xen_mc_entry(0);
+ 
+               if (in_frames)
+                       in_frames[i] = virt_to_mfn(vaddr);
+ 
+               MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
+               set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
+ 
+               if (out_frames)
+                       out_frames[i] = virt_to_pfn(vaddr);
+       }
+       xen_mc_issue(0);
+ }
+ 
+ /*
+  * Update the pfn-to-mfn mappings for a virtual address range, either to
+  * point to an array of mfns, or contiguously from a single starting
+  * mfn.
+  */
+ static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
+                                    unsigned long *mfns,
+                                    unsigned long first_mfn)
+ {
+       unsigned i, limit;
+       unsigned long mfn;
+ 
+       xen_mc_batch();
+ 
+       limit = 1u << order;
+       for (i = 0; i < limit; i++, vaddr += PAGE_SIZE) {
+               struct multicall_space mcs;
+               unsigned flags;
+ 
+               mcs = __xen_mc_entry(0);
+               if (mfns)
+                       mfn = mfns[i];
+               else
+                       mfn = first_mfn + i;
+ 
+               if (i < (limit - 1))
+                       flags = 0;
+               else {
+                       if (order == 0)
+                               flags = UVMF_INVLPG | UVMF_ALL;
+                       else
+                               flags = UVMF_TLB_FLUSH | UVMF_ALL;
+               }
+ 
+               MULTI_update_va_mapping(mcs.mc, vaddr,
+                               mfn_pte(mfn, PAGE_KERNEL), flags);
+ 
+               set_phys_to_machine(virt_to_pfn(vaddr), mfn);
+       }
+ 
+       xen_mc_issue(0);
+ }
+ 
+ /*
+  * Perform the hypercall to exchange a region of our pfns to point to
+  * memory with the required contiguous alignment.  Takes the pfns as
+  * input, and populates mfns as output.
+  *
+  * Returns a success code indicating whether the hypervisor was able to
+  * satisfy the request or not.
+  */
+ static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in,
+                              unsigned long *pfns_in,
+                              unsigned long extents_out,
+                              unsigned int order_out,
+                              unsigned long *mfns_out,
+                              unsigned int address_bits)
+ {
+       long rc;
+       int success;
+ 
+       struct xen_memory_exchange exchange = {
+               .in = {
+                       .nr_extents   = extents_in,
+                       .extent_order = order_in,
+                       .extent_start = pfns_in,
+                       .domid        = DOMID_SELF
+               },
+               .out = {
+                       .nr_extents   = extents_out,
+                       .extent_order = order_out,
+                       .extent_start = mfns_out,
+                       .address_bits = address_bits,
+                       .domid        = DOMID_SELF
+               }
+       };
+ 
+       BUG_ON(extents_in << order_in != extents_out << order_out);
+ 
+       rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
+       success = (exchange.nr_exchanged == extents_in);
+ 
+       BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
+       BUG_ON(success && (rc != 0));
+ 
+       return success;
+ }
+ 
+ int xen_create_contiguous_region(unsigned long vstart, unsigned int order,
+                                unsigned int address_bits)
+ {
+       unsigned long *in_frames = discontig_frames, out_frame;
+       unsigned long  flags;
+       int            success;
+ 
+       /*
+        * Currently an auto-translated guest will not perform I/O, nor will
+        * it require PAE page directories below 4GB. Therefore any calls to
+        * this function are redundant and can be ignored.
+        */
+ 
+       if (xen_feature(XENFEAT_auto_translated_physmap))
+               return 0;
+ 
+       if (unlikely(order > MAX_CONTIG_ORDER))
+               return -ENOMEM;
+ 
+       memset((void *) vstart, 0, PAGE_SIZE << order);
+ 
+       spin_lock_irqsave(&xen_reservation_lock, flags);
+ 
+       /* 1. Zap current PTEs, remembering MFNs. */
+       xen_zap_pfn_range(vstart, order, in_frames, NULL);
+ 
+       /* 2. Get a new contiguous memory extent. */
+       out_frame = virt_to_pfn(vstart);
+       success = xen_exchange_memory(1UL << order, 0, in_frames,
+                                     1, order, &out_frame,
+                                     address_bits);
+ 
+       /* 3. Map the new extent in place of old pages. */
+       if (success)
+               xen_remap_exchanged_ptes(vstart, order, NULL, out_frame);
+       else
+               xen_remap_exchanged_ptes(vstart, order, in_frames, 0);
+ 
+       spin_unlock_irqrestore(&xen_reservation_lock, flags);
+ 
+       return success ? 0 : -ENOMEM;
+ }
+ EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
+ 
+ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
+ {
+       unsigned long *out_frames = discontig_frames, in_frame;
+       unsigned long  flags;
+       int success;
+ 
+       if (xen_feature(XENFEAT_auto_translated_physmap))
+               return;
+ 
+       if (unlikely(order > MAX_CONTIG_ORDER))
+               return;
+ 
+       memset((void *) vstart, 0, PAGE_SIZE << order);
+ 
+       spin_lock_irqsave(&xen_reservation_lock, flags);
+ 
+       /* 1. Find start MFN of contiguous extent. */
+       in_frame = virt_to_mfn(vstart);
+ 
+       /* 2. Zap current PTEs. */
+       xen_zap_pfn_range(vstart, order, NULL, out_frames);
+ 
+       /* 3. Do the exchange for non-contiguous MFNs. */
+       success = xen_exchange_memory(1, order, &in_frame, 1UL << order,
+                                       0, out_frames, 0);
+ 
+       /* 4. Map new pages in place of old pages. */
+       if (success)
+               xen_remap_exchanged_ptes(vstart, order, out_frames, 0);
+       else
+               xen_remap_exchanged_ptes(vstart, order, NULL, in_frame);
+ 
+       spin_unlock_irqrestore(&xen_reservation_lock, flags);
   }
+ EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
   
+ +#ifdef CONFIG_XEN_PVHVM
+ +static void xen_hvm_exit_mmap(struct mm_struct *mm)
+ +{
+ +      struct xen_hvm_pagetable_dying a;
+ +      int rc;
+ +
+ +      a.domid = DOMID_SELF;
+ +      a.gpa = __pa(mm->pgd);
+ +      rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
+ +      WARN_ON_ONCE(rc < 0);
+ +}
+ +
+ +static int is_pagetable_dying_supported(void)
+ +{
+ +      struct xen_hvm_pagetable_dying a;
+ +      int rc = 0;
+ +
+ +      a.domid = DOMID_SELF;
+ +      a.gpa = 0x00;
+ +      rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
+ +      if (rc < 0) {
+ +              printk(KERN_DEBUG "HVMOP_pagetable_dying not supported\n");
+ +              return 0;
+ +      }
+ +      return 1;
+ +}
+ +
+ +void __init xen_hvm_init_mmu_ops(void)
+ +{
+ +      if (is_pagetable_dying_supported())
+ +              pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
+ +}
+ +#endif
+ +
   #ifdef CONFIG_XEN_DEBUG_FS
   
   static struct dentry *d_mmu_debug;
diff --combined drivers/xen/Kconfig

index 0a882693663997634a553f76271f9cb287ea8fb2,97199c2a64a076ee47622c0f12987c4587c6a26c..60d71e9abe9fa369ce4a9deb99f8050ff6e855b3
--- 1/drivers/xen/Kconfig
--- 2/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@@ -62,13 -62,8 +62,18 @@@ config XEN_SYS_HYPERVISO
          virtual environment, /sys/hypervisor will still be present,
          but will have no xen contents.
   
+ +config XEN_PLATFORM_PCI
+ +      tristate "xen platform pci device driver"
+ +      depends on XEN_PVHVM
+ +      default m
+ +      help
+ +        Driver for the Xen PCI Platform device: it is responsible for
+ +        initializing xenbus and grant_table when running in a Xen HVM
+ +        domain. As a consequence this driver is required to run any Xen PV
+ +        frontend on Xen HVM.
++
+ config SWIOTLB_XEN
+       def_bool y
+       depends on SWIOTLB
+ 
   endmenu
diff --combined drivers/xen/Makefile

index e392fb776af365823e5b8985245dc5613c303a4c,85f84cff810469628702f6ed56563dd1b37ac1bf..fcaf838f54be26b6c37cf48ec174922ffda3cb1d
--- 1/drivers/xen/Makefile
--- 2/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@@ -10,4 -10,4 +10,5 @@@ obj-$(CONFIG_XEN_BALLOON)     += balloon.
   obj-$(CONFIG_XEN_DEV_EVTCHN)  += evtchn.o
   obj-$(CONFIG_XENFS)           += xenfs/
   obj-$(CONFIG_XEN_SYS_HYPERVISOR)      += sys-hypervisor.o
+ +obj-$(CONFIG_XEN_PLATFORM_PCI)        += platform-pci.o
+ obj-$(CONFIG_SWIOTLB_XEN)     += swiotlb-xen.o
diff --combined include/linux/vmalloc.h

index de05e96e0a70587efd6a3043852c610796a1a140,b840fdaf438c7814cadd0b3800aa5e9e14fcd913..01c2145118dc53bd831eaad32255e058b8db3dcd
--- 1/include/linux/vmalloc.h
--- 2/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@@ -7,6 -7,8 +7,8 @@@
   
   struct vm_area_struct;                /* vma defining user mapping in mm_types.h */
   
+ extern bool vmap_lazy_unmap;
+ 
   /* bits in flags of vmalloc's vm_struct below */
   #define VM_IOREMAP    0x00000001      /* ioremap() and friends */
   #define VM_ALLOC      0x00000002      /* vmalloc() */
@@@ -30,7 -32,7 +32,7 @@@ struct vm_struct 
         unsigned long           flags;
         struct page             **pages;
         unsigned int            nr_pages;
- -      unsigned long           phys_addr;
+ +      phys_addr_t             phys_addr;
         void                    *caller;
   };
   
diff --combined include/xen/xen-ops.h

index 46bc81ef74c67468a667e6f5d06026be1cc43f4c,d789c937c48aac9dc11e722201e7a573040eb8c3..351f4051f6d856d455d5f3d60b75bf3570209d9a
--- 1/include/xen/xen-ops.h
--- 2/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@@ -7,7 -7,6 +7,7 @@@ DECLARE_PER_CPU(struct vcpu_info *, xen
   
   void xen_pre_suspend(void);
   void xen_post_suspend(int suspend_cancelled);
+ +void xen_hvm_post_suspend(int suspend_cancelled);
   
   void xen_mm_pin_all(void);
   void xen_mm_unpin_all(void);
@@@ -15,6 -14,10 +15,12 @@@
   void xen_timer_resume(void);
   void xen_arch_resume(void);
   
+ +int xen_setup_shutdown_event(void);
+ +
+ extern unsigned long *xen_contiguous_bitmap;
+ int xen_create_contiguous_region(unsigned long vstart, unsigned int order,
+                               unsigned int address_bits);
+ 
+ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order);
+ 
   #endif /* INCLUDE_XEN_OPS_H */
diff --combined mm/vmalloc.c

index 918c51335d64165348dc3b54775019f7a78bcaa7,7f35fe2cf9e7cbbef465fe1b985608e8c1f99d84..6b8889da69a60612301c2bd26244ae0f3e1e1966
--- 1/mm/vmalloc.c
--- 2/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@@ -31,6 -31,7 +31,7 @@@
   #include <asm/tlbflush.h>
   #include <asm/shmparam.h>
   
+ bool vmap_lazy_unmap __read_mostly = true;
   
   /*** Page table manipulation functions ***/
   
@@@ -502,6 -503,9 +503,9 @@@ static unsigned long lazy_max_pages(voi
   {
         unsigned int log;
   
+       if (!vmap_lazy_unmap)
+               return 0;
+ 
         log = fls(num_online_cpus());
   
         return log * (32UL * 1024 * 1024 / PAGE_SIZE);
@@@ -732,7 -736,7 +736,7 @@@ static struct vmap_block *new_vmap_bloc
                                         node, gfp_mask);
         if (unlikely(IS_ERR(va))) {
                 kfree(vb);
- -              return ERR_PTR(PTR_ERR(va));
+ +              return ERR_CAST(va);
         }
   
         err = radix_tree_preload(gfp_mask);
@@@ -2403,7 -2407,7 +2407,7 @@@ static int s_show(struct seq_file *m, v
                 seq_printf(m, " pages=%d", v->nr_pages);
   
         if (v->phys_addr)
- -              seq_printf(m, " phys=%lx", v->phys_addr);
+ +              seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
   
         if (v->flags & VM_IOREMAP)
                 seq_printf(m, " ioremap");
@@@ -2437,11 -2441,8 +2441,11 @@@ static int vmalloc_open(struct inode *i
         unsigned int *ptr = NULL;
         int ret;
   
- -      if (NUMA_BUILD)
+ +      if (NUMA_BUILD) {
                 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
+ +              if (ptr == NULL)
+ +                      return -ENOMEM;
+ +      }
         ret = seq_open(file, &vmalloc_op);
         if (!ret) {
                 struct seq_file *m = file->private_data;
author	Linus Torvalds <[email protected]>
	Thu, 12 Aug 2010 16:09:41 +0000 (09:09 -0700)
committer	Linus Torvalds <[email protected]>
	Thu, 12 Aug 2010 16:09:41 +0000 (09:09 -0700)
		1	2
arch/x86/xen/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/xen/enlighten.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/xen/mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/xen/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/xen/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/vmalloc.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/xen/xen-ops.h	patch \|	diff1 \|	diff2 \|	blob \| history
mm/vmalloc.c	patch \|	diff1 \|	diff2 \|	blob \| history