]> Git Repo - linux.git/commitdiff
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <[email protected]>
Tue, 2 May 2017 06:54:56 +0000 (23:54 -0700)
committerLinus Torvalds <[email protected]>
Tue, 2 May 2017 06:54:56 +0000 (23:54 -0700)
Pull x86 mm updates from Ingo Molnar:
 "The main x86 MM changes in this cycle were:

   - continued native kernel PCID support preparation patches to the TLB
     flushing code (Andy Lutomirski)

   - various fixes related to 32-bit compat syscall returning address
     over 4Gb in applications, launched from 64-bit binaries - motivated
     by C/R frameworks such as Virtuozzo. (Dmitry Safonov)

   - continued Intel 5-level paging enablement: in particular the
     conversion of x86 GUP to the generic GUP code. (Kirill A. Shutemov)

   - x86/mpx ABI corner case fixes/enhancements (Joerg Roedel)

   - ... plus misc updates, fixes and cleanups"

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (62 commits)
  mm, zone_device: Replace {get, put}_zone_device_page() with a single reference to fix pmem crash
  x86/mm: Fix flush_tlb_page() on Xen
  x86/mm: Make flush_tlb_mm_range() more predictable
  x86/mm: Remove flush_tlb() and flush_tlb_current_task()
  x86/vm86/32: Switch to flush_tlb_mm_range() in mark_screen_rdonly()
  x86/mm/64: Fix crash in remove_pagetable()
  Revert "x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation"
  x86/boot/e820: Remove a redundant self assignment
  x86/mm: Fix dump pagetables for 4 levels of page tables
  x86/mpx, selftests: Only check bounds-vs-shadow when we keep shadow
  x86/mpx: Correctly report do_mpx_bt_fault() failures to user-space
  Revert "x86/mm/numa: Remove numa_nodemask_from_meminfo()"
  x86/espfix: Add support for 5-level paging
  x86/kasan: Extend KASAN to support 5-level paging
  x86/mm: Add basic defines/helpers for CONFIG_X86_5LEVEL=y
  x86/paravirt: Add 5-level support to the paravirt code
  x86/mm: Define virtual memory map for 5-level paging
  x86/asm: Remove __VIRTUAL_MASK_SHIFT==47 assert
  x86/boot: Detect 5-level paging support
  x86/mm/numa: Remove numa_nodemask_from_meminfo()
  ...

1  2 
arch/x86/Kconfig
arch/x86/entry/entry_64.S
arch/x86/entry/vdso/vma.c
arch/x86/include/asm/elf.h
arch/x86/kernel/setup.c
arch/x86/kvm/vmx.c
arch/x86/platform/efi/efi_64.c
drivers/nvdimm/pmem.c
mm/swap.c

diff --combined arch/x86/Kconfig
index 2b899858532a9a7e6c0638e20fe7a3ed18eabe1c,2bde14451e549e308645fe793bbcb3bca453d45c..8d4f87e5bba31abcedcb34d1b35dadf8efa3d76b
@@@ -98,6 -98,7 +98,6 @@@ config X8
        select HAVE_ACPI_APEI_NMI               if ACPI
        select HAVE_ALIGNED_STRUCT_PAGE         if SLUB
        select HAVE_ARCH_AUDITSYSCALL
 -      select HAVE_ARCH_HARDENED_USERCOPY
        select HAVE_ARCH_HUGE_VMAP              if X86_64 || X86_PAE
        select HAVE_ARCH_JUMP_LABEL
        select HAVE_ARCH_KASAN                  if X86_64 && SPARSEMEM_VMEMMAP
        select HAVE_ARCH_KMEMCHECK
        select HAVE_ARCH_MMAP_RND_BITS          if MMU
        select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if MMU && COMPAT
+       select HAVE_ARCH_COMPAT_MMAP_BASES      if MMU && COMPAT
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_TRACEHOOK
        select HAVE_ARCH_TRANSPARENT_HUGEPAGE
        select HAVE_EBPF_JIT                    if X86_64
        select HAVE_EFFICIENT_UNALIGNED_ACCESS
        select HAVE_EXIT_THREAD
 -      select HAVE_FENTRY                      if X86_64
 +      select HAVE_FENTRY                      if X86_64 || DYNAMIC_FTRACE
        select HAVE_FTRACE_MCOUNT_RECORD
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_TRACER
@@@ -289,6 -291,7 +290,7 @@@ config ARCH_SUPPORTS_DEBUG_PAGEALLO
  config KASAN_SHADOW_OFFSET
        hex
        depends on KASAN
+       default 0xdff8000000000000 if X86_5LEVEL
        default 0xdffffc0000000000
  
  config HAVE_INTEL_TXT
@@@ -1042,14 -1045,6 +1044,14 @@@ config X86_MC
          The action the kernel takes depends on the severity of the problem,
          ranging from warning messages to halting the machine.
  
 +config X86_MCELOG_LEGACY
 +      bool "Support for deprecated /dev/mcelog character device"
 +      depends on X86_MCE
 +      ---help---
 +        Enable support for /dev/mcelog which is needed by the old mcelog
 +        userspace logging daemon. Consider switching to the new generation
 +        rasdaemon solution.
 +
  config X86_MCE_INTEL
        def_bool y
        prompt "Intel MCE features"
@@@ -1079,7 -1074,7 +1081,7 @@@ config X86_MCE_THRESHOL
        def_bool y
  
  config X86_MCE_INJECT
 -      depends on X86_MCE && X86_LOCAL_APIC
 +      depends on X86_MCE && X86_LOCAL_APIC && X86_MCELOG_LEGACY
        tristate "Machine check injector support"
        ---help---
          Provide support for injecting machine checks for testing purposes.
@@@ -1973,7 -1968,7 +1975,7 @@@ config RELOCATABL
  config RANDOMIZE_BASE
        bool "Randomize the address of the kernel image (KASLR)"
        depends on RELOCATABLE
 -      default n
 +      default y
        ---help---
          In support of Kernel Address Space Layout Randomization (KASLR),
          this randomizes the physical address at which the kernel image
          theoretically possible, but the implementations are further
          limited due to memory layouts.
  
 -        If unsure, say N.
 +        If unsure, say Y.
  
  # Relocation on x86 needs some additional build support
  config X86_NEED_RELOCS
@@@ -2052,7 -2047,7 +2054,7 @@@ config RANDOMIZE_MEMOR
           configuration have in average 30,000 different possible virtual
           addresses for each memory section.
  
 -         If unsure, say N.
 +         If unsure, say Y.
  
  config RANDOMIZE_MEMORY_PHYSICAL_PADDING
        hex "Physical memory mapping padding" if EXPERT
index d2b2a2948ffe8ec3b045a64521d2703e7db3a445,f07b4efb34d5b0205db483f217c3c7e4d98b84e2..607d72c4a485cf25a3e582804cea0b7c031e1d32
@@@ -212,7 -212,7 +212,7 @@@ entry_SYSCALL_64_fastpath
         * If we see that no exit work is required (which we are required
         * to check with IRQs off), then we can go straight to SYSRET64.
         */
 -      DISABLE_INTERRUPTS(CLBR_NONE)
 +      DISABLE_INTERRUPTS(CLBR_ANY)
        TRACE_IRQS_OFF
        movq    PER_CPU_VAR(current_task), %r11
        testl   $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
         * raise(3) will trigger this, for example.  IRQs are off.
         */
        TRACE_IRQS_ON
 -      ENABLE_INTERRUPTS(CLBR_NONE)
 +      ENABLE_INTERRUPTS(CLBR_ANY)
        SAVE_EXTRA_REGS
        movq    %rsp, %rdi
        call    syscall_return_slowpath /* returns with IRQs disabled */
@@@ -265,12 -265,9 +265,9 @@@ return_from_SYSCALL_64
         *
         * If width of "canonical tail" ever becomes variable, this will need
         * to be updated to remain correct on both old and new CPUs.
+        *
+        * Change top 16 bits to be the sign-extension of 47th bit
         */
-       .ifne __VIRTUAL_MASK_SHIFT - 47
-       .error "virtual address width changed -- SYSRET checks need update"
-       .endif
-       /* Change top 16 bits to be the sign-extension of 47th bit */
        shl     $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
        sar     $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
  
@@@ -343,7 -340,7 +340,7 @@@ ENTRY(stub_ptregs_64
         * Called from fast path -- disable IRQs again, pop return address
         * and jump to slow path
         */
 -      DISABLE_INTERRUPTS(CLBR_NONE)
 +      DISABLE_INTERRUPTS(CLBR_ANY)
        TRACE_IRQS_OFF
        popq    %rax
        jmp     entry_SYSCALL64_slow_path
@@@ -518,7 -515,7 +515,7 @@@ common_interrupt
        interrupt do_IRQ
        /* 0(%rsp): old RSP */
  ret_from_intr:
 -      DISABLE_INTERRUPTS(CLBR_NONE)
 +      DISABLE_INTERRUPTS(CLBR_ANY)
        TRACE_IRQS_OFF
        decl    PER_CPU_VAR(irq_count)
  
@@@ -1051,7 -1048,7 +1048,7 @@@ END(paranoid_entry
   * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
   */
  ENTRY(paranoid_exit)
 -      DISABLE_INTERRUPTS(CLBR_NONE)
 +      DISABLE_INTERRUPTS(CLBR_ANY)
        TRACE_IRQS_OFF_DEBUG
        testl   %ebx, %ebx                      /* swapgs needed? */
        jnz     paranoid_exit_no_swapgs
@@@ -1156,9 -1153,10 +1153,9 @@@ END(error_entry
   *   0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode
   */
  ENTRY(error_exit)
 -      movl    %ebx, %eax
 -      DISABLE_INTERRUPTS(CLBR_NONE)
 +      DISABLE_INTERRUPTS(CLBR_ANY)
        TRACE_IRQS_OFF
 -      testl   %eax, %eax
 +      testl   %ebx, %ebx
        jnz     retint_kernel
        jmp     retint_user
  END(error_exit)
index faf80fdeeacc3fac2e99804c855ec502d37db1d4,5c5d4d7618e6dac3ccd376fce7eb53a161de626d..139ad7726e10d58d739f21c16717e9553da3b2a2
@@@ -22,7 -22,6 +22,7 @@@
  #include <asm/page.h>
  #include <asm/desc.h>
  #include <asm/cpufeature.h>
 +#include <asm/mshyperv.h>
  
  #if defined(CONFIG_X86_64)
  unsigned int __read_mostly vdso64_enabled = 1;
@@@ -122,12 -121,6 +122,12 @@@ static int vvar_fault(const struct vm_s
                                vmf->address,
                                __pa(pvti) >> PAGE_SHIFT);
                }
 +      } else if (sym_offset == image->sym_hvclock_page) {
 +              struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page();
 +
 +              if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
 +                      ret = vm_insert_pfn(vma, vmf->address,
 +                                          vmalloc_to_pfn(tsc_pg));
        }
  
        if (ret == 0 || ret == -EBUSY)
@@@ -361,7 -354,7 +361,7 @@@ static void vgetcpu_cpu_init(void *arg
        d.p = 1;                /* Present */
        d.d = 1;                /* 32-bit */
  
-       write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
+       write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
  }
  
  static int vgetcpu_online(unsigned int cpu)
index 3762536619f8cb83c17777e60bbd56830d792cde,d4d3ed456cb7b1559a8aa3da27e355269ada35a1..e8ab9a46bc6890f802cbdc5bef85f4f16b221183
@@@ -287,14 -287,29 +287,29 @@@ struct task_struct
  
  #define       ARCH_DLINFO_IA32                                                \
  do {                                                                  \
 -      if (vdso32_enabled) {                                           \
 +      if (VDSO_CURRENT_BASE) {                                        \
                NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY);                    \
                NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE);        \
        }                                                               \
  } while (0)
  
+ /*
+  * True on X86_32 or when emulating IA32 on X86_64
+  */
+ static inline int mmap_is_ia32(void)
+ {
+       return IS_ENABLED(CONFIG_X86_32) ||
+              (IS_ENABLED(CONFIG_COMPAT) &&
+               test_thread_flag(TIF_ADDR32));
+ }
+ extern unsigned long tasksize_32bit(void);
+ extern unsigned long tasksize_64bit(void);
+ extern unsigned long get_mmap_base(int is_legacy);
  #ifdef CONFIG_X86_32
  
+ #define __STACK_RND_MASK(is32bit) (0x7ff)
  #define STACK_RND_MASK (0x7ff)
  
  #define ARCH_DLINFO           ARCH_DLINFO_IA32
  #else /* CONFIG_X86_32 */
  
  /* 1GB for 64bit, 8MB for 32bit */
- #define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff)
+ #define __STACK_RND_MASK(is32bit) ((is32bit) ? 0x7ff : 0x3fffff)
+ #define STACK_RND_MASK __STACK_RND_MASK(mmap_is_ia32())
  
  #define ARCH_DLINFO                                                   \
  do {                                                                  \
@@@ -348,16 -364,6 +364,6 @@@ extern int compat_arch_setup_additional
                                              int uses_interp);
  #define compat_arch_setup_additional_pages compat_arch_setup_additional_pages
  
- /*
-  * True on X86_32 or when emulating IA32 on X86_64
-  */
- static inline int mmap_is_ia32(void)
- {
-       return IS_ENABLED(CONFIG_X86_32) ||
-              (IS_ENABLED(CONFIG_COMPAT) &&
-               test_thread_flag(TIF_ADDR32));
- }
  /* Do not change the values. See get_align_mask() */
  enum align_flags {
        ALIGN_VA_32     = BIT(0),
diff --combined arch/x86/kernel/setup.c
index 0b4d3c686b1ef94463c8caabce01e38404d1b8c1,21a3db1b9fecbfc17cac7795f5ac62bbd25d64dc..603a1669a2ecf2ba62f0b24bee93955296a9f97c
@@@ -70,7 -70,6 +70,7 @@@
  #include <linux/tboot.h>
  #include <linux/jiffies.h>
  
 +#include <linux/usb/xhci-dbgp.h>
  #include <video/edid.h>
  
  #include <asm/mtrr.h>
@@@ -812,26 -811,6 +812,26 @@@ dump_kernel_offset(struct notifier_bloc
        return 0;
  }
  
 +static void __init simple_udelay_calibration(void)
 +{
 +      unsigned int tsc_khz, cpu_khz;
 +      unsigned long lpj;
 +
 +      if (!boot_cpu_has(X86_FEATURE_TSC))
 +              return;
 +
 +      cpu_khz = x86_platform.calibrate_cpu();
 +      tsc_khz = x86_platform.calibrate_tsc();
 +
 +      tsc_khz = tsc_khz ? : cpu_khz;
 +      if (!tsc_khz)
 +              return;
 +
 +      lpj = tsc_khz * 1000;
 +      do_div(lpj, HZ);
 +      loops_per_jiffy = lpj;
 +}
 +
  /*
   * Determine if we were loaded by an EFI loader.  If so, then we have also been
   * passed the efi memmap, systab, etc., so we should use these data structures
@@@ -980,8 -959,6 +980,8 @@@ void __init setup_arch(char **cmdline_p
         */
        x86_configure_nx();
  
 +      simple_udelay_calibration();
 +
        parse_early_param();
  
  #ifdef CONFIG_MEMORY_HOTPLUG
        memblock_set_current_limit(ISA_END_ADDRESS);
        e820__memblock_setup();
  
 +      if (!early_xdbc_setup_hardware())
 +              early_xdbc_register_console();
 +
        reserve_bios_regions();
  
        if (efi_enabled(EFI_MEMMAP)) {
  
        kasan_init();
  
- #ifdef CONFIG_X86_32
-       /* sync back kernel address range */
-       clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
-                       swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
-                       KERNEL_PGD_PTRS);
-       /*
-        * sync back low identity map too.  It is used for example
-        * in the 32-bit EFI stub.
-        */
-       clone_pgd_range(initial_page_table,
-                       swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
-                       min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
- #endif
        tboot_probe();
  
        map_vsyscall();
diff --combined arch/x86/kvm/vmx.c
index 259e9b28ccf8e7ff2434d6911ffcfbfc4ef89f42,535cc065b844dc5be6d37b807eb1191c64cf84f3..1a471e5f963f8a31f45b6d971c2b262848876067
@@@ -935,7 -935,6 +935,6 @@@ static DEFINE_PER_CPU(struct vmcs *, cu
   * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
   */
  static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
- static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
  
  /*
   * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
@@@ -2057,14 -2056,13 +2056,13 @@@ static bool update_transition_efer(stru
   */
  static unsigned long segment_base(u16 selector)
  {
-       struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
        struct desc_struct *table;
        unsigned long v;
  
        if (!(selector & ~SEGMENT_RPL_MASK))
                return 0;
  
-       table = (struct desc_struct *)gdt->address;
+       table = get_current_gdt_ro();
  
        if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
                u16 ldt_selector = kvm_read_ldt();
@@@ -2169,7 -2167,7 +2167,7 @@@ static void __vmx_load_host_state(struc
  #endif
        if (vmx->host_state.msr_host_bndcfgs)
                wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
-       load_gdt(this_cpu_ptr(&host_gdt));
+       load_fixmap_gdt(raw_smp_processor_id());
  }
  
  static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@@ -2271,7 -2269,7 +2269,7 @@@ static void vmx_vcpu_load(struct kvm_vc
        }
  
        if (!already_loaded) {
-               struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
+               void *gdt = get_current_gdt_ro();
                unsigned long sysenter_esp;
  
                kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
                 */
                vmcs_writel(HOST_TR_BASE,
                            (unsigned long)this_cpu_ptr(&cpu_tss));
-               vmcs_writel(HOST_GDTR_BASE, gdt->address);
+               vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
  
                /*
                 * VM exits change the host TR limit to 0x67 after a VM
@@@ -3471,8 -3469,6 +3469,6 @@@ static int hardware_enable(void
                ept_sync_global();
        }
  
-       native_store_gdt(this_cpu_ptr(&host_gdt));
        return 0;
  }
  
@@@ -8198,9 -8194,6 +8194,9 @@@ static bool nested_vmx_exit_handled(str
                return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
        case EXIT_REASON_PREEMPTION_TIMER:
                return false;
 +      case EXIT_REASON_PML_FULL:
 +              /* We don't expose PML support to L1. */
 +              return false;
        default:
                return true;
        }
@@@ -10270,18 -10263,6 +10266,18 @@@ static int prepare_vmcs02(struct kvm_vc
  
        }
  
 +      if (enable_pml) {
 +              /*
 +               * Conceptually we want to copy the PML address and index from
 +               * vmcs01 here, and then back to vmcs01 on nested vmexit. But,
 +               * since we always flush the log on each vmexit, this happens
 +               * to be equivalent to simply resetting the fields in vmcs02.
 +               */
 +              ASSERT(vmx->pml_pg);
 +              vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
 +              vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
 +      }
 +
        if (nested_cpu_has_ept(vmcs12)) {
                kvm_mmu_unload(vcpu);
                nested_ept_init_mmu_context(vcpu);
index 642a8698ad61e22ceed781af013861c837368151,6b6b8e8d4ae79ac84f1e2f99c692edc85c239a87..c488625c9712de4fe150d01df5c260e650967265
@@@ -47,7 -47,7 +47,7 @@@
  #include <asm/pgalloc.h>
  
  /*
 - * We allocate runtime services regions bottom-up, starting from -4G, i.e.
 + * We allocate runtime services regions top-down, starting from -4G, i.e.
   * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G.
   */
  static u64 efi_va = EFI_VA_START;
@@@ -135,6 -135,7 +135,7 @@@ static pgd_t *efi_pgd
  int __init efi_alloc_page_tables(void)
  {
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        gfp_t gfp_mask;
  
                return -ENOMEM;
  
        pgd = efi_pgd + pgd_index(EFI_VA_END);
+       p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END);
+       if (!p4d) {
+               free_page((unsigned long)efi_pgd);
+               return -ENOMEM;
+       }
  
-       pud = pud_alloc_one(NULL, 0);
+       pud = pud_alloc(&init_mm, p4d, EFI_VA_END);
        if (!pud) {
+               if (CONFIG_PGTABLE_LEVELS > 4)
+                       free_page((unsigned long) pgd_page_vaddr(*pgd));
                free_page((unsigned long)efi_pgd);
                return -ENOMEM;
        }
  
-       pgd_populate(NULL, pgd, pud);
        return 0;
  }
  
@@@ -166,6 -172,7 +172,7 @@@ void efi_sync_low_kernel_mappings(void
  {
        unsigned num_entries;
        pgd_t *pgd_k, *pgd_efi;
+       p4d_t *p4d_k, *p4d_efi;
        pud_t *pud_k, *pud_efi;
  
        if (efi_enabled(EFI_OLD_MEMMAP))
        num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET);
        memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries);
  
+       /*
+        * As with PGDs, we share all P4D entries apart from the one entry
+        * that covers the EFI runtime mapping space.
+        */
+       BUILD_BUG_ON(p4d_index(EFI_VA_END) != p4d_index(MODULES_END));
+       BUILD_BUG_ON((EFI_VA_START & P4D_MASK) != (EFI_VA_END & P4D_MASK));
+       pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
+       pgd_k = pgd_offset_k(EFI_VA_END);
+       p4d_efi = p4d_offset(pgd_efi, 0);
+       p4d_k = p4d_offset(pgd_k, 0);
+       num_entries = p4d_index(EFI_VA_END);
+       memcpy(p4d_efi, p4d_k, sizeof(p4d_t) * num_entries);
        /*
         * We share all the PUD entries apart from those that map the
         * EFI regions. Copy around them.
        BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0);
        BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0);
  
-       pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
-       pud_efi = pud_offset(pgd_efi, 0);
-       pgd_k = pgd_offset_k(EFI_VA_END);
-       pud_k = pud_offset(pgd_k, 0);
+       p4d_efi = p4d_offset(pgd_efi, EFI_VA_END);
+       p4d_k = p4d_offset(pgd_k, EFI_VA_END);
+       pud_efi = pud_offset(p4d_efi, 0);
+       pud_k = pud_offset(p4d_k, 0);
  
        num_entries = pud_index(EFI_VA_END);
        memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
  
-       pud_efi = pud_offset(pgd_efi, EFI_VA_START);
-       pud_k = pud_offset(pgd_k, EFI_VA_START);
+       pud_efi = pud_offset(p4d_efi, EFI_VA_START);
+       pud_k = pud_offset(p4d_k, EFI_VA_START);
  
        num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START);
        memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
diff --combined drivers/nvdimm/pmem.c
index 5b536be5a12eb97023745a59f65283280b7b3675,fb7bbc79ac264421eba88de2120dbc2295b3b74d..fbc640bf06b01821c108711a89dea3564355357e
@@@ -25,6 -25,7 +25,7 @@@
  #include <linux/badblocks.h>
  #include <linux/memremap.h>
  #include <linux/vmalloc.h>
+ #include <linux/blk-mq.h>
  #include <linux/pfn_t.h>
  #include <linux/slab.h>
  #include <linux/pmem.h>
@@@ -231,6 -232,11 +232,11 @@@ static void pmem_release_queue(void *q
        blk_cleanup_queue(q);
  }
  
 -      blk_mq_freeze_queue_start(q);
+ static void pmem_freeze_queue(void *q)
+ {
++      blk_freeze_queue_start(q);
+ }
  static void pmem_release_disk(void *disk)
  {
        del_gendisk(disk);
@@@ -284,6 -290,9 +290,9 @@@ static int pmem_attach_disk(struct devi
        if (!q)
                return -ENOMEM;
  
+       if (devm_add_action_or_reset(dev, pmem_release_queue, q))
+               return -ENOMEM;
        pmem->pfn_flags = PFN_DEV;
        if (is_nd_pfn(dev)) {
                addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter,
                                pmem->size, ARCH_MEMREMAP_PMEM);
  
        /*
-        * At release time the queue must be dead before
+        * At release time the queue must be frozen before
         * devm_memremap_pages is unwound
         */
-       if (devm_add_action_or_reset(dev, pmem_release_queue, q))
+       if (devm_add_action_or_reset(dev, pmem_freeze_queue, q))
                return -ENOMEM;
  
        if (IS_ERR(addr))
diff --combined mm/swap.c
index 5dabf444d724db98595567b0f7daed7d53fc877e,a4e6113276b5554dafd8d70ba9712f3bd28813b9..d8d9ee9e311a6f25520f304a01c77f24a3538584
+++ b/mm/swap.c
@@@ -97,6 -97,16 +97,16 @@@ static void __put_compound_page(struct 
  
  void __put_page(struct page *page)
  {
+       if (is_zone_device_page(page)) {
+               put_dev_pagemap(page->pgmap);
+               /*
+                * The page belongs to the device that created pgmap. Do
+                * not return it to page allocator.
+                */
+               return;
+       }
        if (unlikely(PageCompound(page)))
                __put_compound_page(page);
        else
@@@ -670,19 -680,30 +680,19 @@@ static void lru_add_drain_per_cpu(struc
  
  static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
  
 -/*
 - * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM
 - * workqueue, aiding in getting memory freed.
 - */
 -static struct workqueue_struct *lru_add_drain_wq;
 -
 -static int __init lru_init(void)
 -{
 -      lru_add_drain_wq = alloc_workqueue("lru-add-drain", WQ_MEM_RECLAIM, 0);
 -
 -      if (WARN(!lru_add_drain_wq,
 -              "Failed to create workqueue lru_add_drain_wq"))
 -              return -ENOMEM;
 -
 -      return 0;
 -}
 -early_initcall(lru_init);
 -
  void lru_add_drain_all(void)
  {
        static DEFINE_MUTEX(lock);
        static struct cpumask has_work;
        int cpu;
  
 +      /*
 +       * Make sure nobody triggers this path before mm_percpu_wq is fully
 +       * initialized.
 +       */
 +      if (WARN_ON(!mm_percpu_wq))
 +              return;
 +
        mutex_lock(&lock);
        get_online_cpus();
        cpumask_clear(&has_work);
                    pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
                    need_activate_page_drain(cpu)) {
                        INIT_WORK(work, lru_add_drain_per_cpu);
 -                      queue_work_on(cpu, lru_add_drain_wq, work);
 +                      queue_work_on(cpu, mm_percpu_wq, work);
                        cpumask_set_cpu(cpu, &has_work);
                }
        }
This page took 0.174288 seconds and 4 git commands to generate.