Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <[email protected]>

Sat, 23 Dec 2017 19:53:04 +0000 (11:53 -0800)

committer Linus Torvalds <[email protected]>

Sat, 23 Dec 2017 19:53:04 +0000 (11:53 -0800)
author Linus Torvalds <[email protected]>
Sat, 23 Dec 2017 19:53:04 +0000 (11:53 -0800)
committer Linus Torvalds <[email protected]>
Sat, 23 Dec 2017 19:53:04 +0000 (11:53 -0800)
diff --combined arch/powerpc/include/asm/mmu_context.h

index 6177d43f0ce8afa9c1f6a1101e92ba161e47d97a,44fdf4786638b1fe2f21b8c15927eea8c19ee47f..e2a2b8400490049143edee40316313a906ca6db7
--- 1/arch/powerpc/include/asm/mmu_context.h
--- 2/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@@ -78,52 -78,6 +78,52 @@@ extern void switch_cop(struct mm_struc
   extern int use_cop(unsigned long acop, struct mm_struct *mm);
   extern void drop_cop(unsigned long acop, struct mm_struct *mm);
   
+ +#ifdef CONFIG_PPC_BOOK3S_64
+ +static inline void inc_mm_active_cpus(struct mm_struct *mm)
+ +{
+ +      atomic_inc(&mm->context.active_cpus);
+ +}
+ +
+ +static inline void dec_mm_active_cpus(struct mm_struct *mm)
+ +{
+ +      atomic_dec(&mm->context.active_cpus);
+ +}
+ +
+ +static inline void mm_context_add_copro(struct mm_struct *mm)
+ +{
+ +      /*
+ +       * On hash, should only be called once over the lifetime of
+ +       * the context, as we can't decrement the active cpus count
+ +       * and flush properly for the time being.
+ +       */
+ +      inc_mm_active_cpus(mm);
+ +}
+ +
+ +static inline void mm_context_remove_copro(struct mm_struct *mm)
+ +{
+ +      /*
+ +       * Need to broadcast a global flush of the full mm before
+ +       * decrementing active_cpus count, as the next TLBI may be
+ +       * local and the nMMU and/or PSL need to be cleaned up.
+ +       * Should be rare enough so that it's acceptable.
+ +       *
+ +       * Skip on hash, as we don't know how to do the proper flush
+ +       * for the time being. Invalidations will remain global if
+ +       * used on hash.
+ +       */
+ +      if (radix_enabled()) {
+ +              flush_all_mm(mm);
+ +              dec_mm_active_cpus(mm);
+ +      }
+ +}
+ +#else
+ +static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
+ +static inline void dec_mm_active_cpus(struct mm_struct *mm) { }
+ +static inline void mm_context_add_copro(struct mm_struct *mm) { }
+ +static inline void mm_context_remove_copro(struct mm_struct *mm) { }
+ +#endif
+ +
+ +
   extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                                struct task_struct *tsk);
   
@@@ -160,18 -114,15 +160,19 @@@ static inline void enter_lazy_tlb(struc
   #endif
   }
   
- static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+ static inline int arch_dup_mmap(struct mm_struct *oldmm,
+                               struct mm_struct *mm)
   {
+       return 0;
   }
   
+ +#ifndef CONFIG_PPC_BOOK3S_64
   static inline void arch_exit_mmap(struct mm_struct *mm)
   {
   }
+ +#else
+ +extern void arch_exit_mmap(struct mm_struct *mm);
+ +#endif
   
   static inline void arch_unmap(struct mm_struct *mm,
                               struct vm_area_struct *vma,
diff --combined arch/x86/Kconfig

index 8eed3f94bfc774de5e3f344590f8889a999dea9c,cd5199de231e68a969984b47d4875fb1884a788c..d4fc98c50378c40bc901f6446d2bfff68151eb6a
--- 1/arch/x86/Kconfig
--- 2/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@@ -56,7 -56,7 +56,7 @@@ config X8
         select ARCH_HAS_KCOV                    if X86_64
         select ARCH_HAS_PMEM_API                if X86_64
         # Causing hangs/crashes, see the commit that added this change for details.
- -      select ARCH_HAS_REFCOUNT                if BROKEN
+ +      select ARCH_HAS_REFCOUNT
         select ARCH_HAS_UACCESS_FLUSHCACHE      if X86_64
         select ARCH_HAS_SET_MEMORY
         select ARCH_HAS_SG_CHAIN
@@@ -93,10 -93,8 +93,10 @@@
         select GENERIC_FIND_FIRST_BIT
         select GENERIC_IOMAP
         select GENERIC_IRQ_EFFECTIVE_AFF_MASK   if SMP
+ +      select GENERIC_IRQ_MATRIX_ALLOCATOR     if X86_LOCAL_APIC
         select GENERIC_IRQ_MIGRATION            if SMP
         select GENERIC_IRQ_PROBE
+ +      select GENERIC_IRQ_RESERVATION_MODE
         select GENERIC_IRQ_SHOW
         select GENERIC_PENDING_IRQ              if SMP
         select GENERIC_SMP_IDLE_THREAD
@@@ -112,6 -110,7 +112,6 @@@
         select HAVE_ARCH_JUMP_LABEL
         select HAVE_ARCH_KASAN                  if X86_64
         select HAVE_ARCH_KGDB
- -      select HAVE_ARCH_KMEMCHECK
         select HAVE_ARCH_MMAP_RND_BITS          if MMU
         select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if MMU && COMPAT
         select HAVE_ARCH_COMPAT_MMAP_BASES      if MMU && COMPAT
@@@ -926,7 -925,8 +926,8 @@@ config MAXSM
   config NR_CPUS
         int "Maximum number of CPUs" if SMP && !MAXSMP
         range 2 8 if SMP && X86_32 && !X86_BIGSMP
-       range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
+       range 2 64 if SMP && X86_32 && X86_BIGSMP
+       range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
         range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
         default "1" if !SMP
         default "8192" if MAXSMP
@@@ -1429,7 -1429,7 +1430,7 @@@ config ARCH_DMA_ADDR_T_64BI
   
   config X86_DIRECT_GBPAGES
         def_bool y
- -      depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK
+ +      depends on X86_64 && !DEBUG_PAGEALLOC
         ---help---
           Certain kernel features effectively disable kernel
           linear 1 GB mappings (even if the CPU otherwise
@@@ -1803,22 -1803,6 +1804,22 @@@ config X86_SMA
   
           If unsure, say Y.
   
+ +config X86_INTEL_UMIP
+ +      def_bool y
+ +      depends on CPU_SUP_INTEL
+ +      prompt "Intel User Mode Instruction Prevention" if EXPERT
+ +      ---help---
+ +        The User Mode Instruction Prevention (UMIP) is a security
+ +        feature in newer Intel processors. If enabled, a general
+ +        protection fault is issued if the SGDT, SLDT, SIDT, SMSW
+ +        or STR instructions are executed in user mode. These instructions
+ +        unnecessarily expose information about the hardware state.
+ +
+ +        The vast majority of applications do not use these instructions.
+ +        For the very few that do, software emulation is provided in
+ +        specific cases in protected and virtual-8086 modes. Emulated
+ +        results are dummy.
+ +
   config X86_INTEL_MPX
         prompt "Intel MPX (Memory Protection Extensions)"
         def_bool n
diff --combined arch/x86/entry/entry_64.S

index 423885bee398c6c9cb80f3bd4a2ec8317e41062a,87cebe78bbefb9ba28fa3b2f9bf6f58f1134465f..3d19c830e1b1ab3c7e3115014039a35eb9607214
--- 1/arch/x86/entry/entry_64.S
--- 2/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@@ -51,19 -51,15 +51,19 @@@ ENTRY(native_usergs_sysret64
   END(native_usergs_sysret64)
   #endif /* CONFIG_PARAVIRT */
   
- -.macro TRACE_IRQS_IRETQ
+ +.macro TRACE_IRQS_FLAGS flags:req
   #ifdef CONFIG_TRACE_IRQFLAGS
- -      bt      $9, EFLAGS(%rsp)                /* interrupts off? */
+ +      bt      $9, \flags              /* interrupts off? */
         jnc     1f
         TRACE_IRQS_ON
   1:
   #endif
   .endm
   
+ +.macro TRACE_IRQS_IRETQ
+ +      TRACE_IRQS_FLAGS EFLAGS(%rsp)
+ +.endm
+ +
   /*
    * When dynamic function tracer is enabled it will add a breakpoint
    * to all locations that it is about to modify, sync CPUs, update
@@@ -158,8 -154,8 +158,8 @@@
         _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
   
   /* The top word of the SYSENTER stack is hot and is usable as scratch space. */
- #define RSP_SCRATCH   CPU_ENTRY_AREA_SYSENTER_stack + \
-                       SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
+ #define RSP_SCRATCH   CPU_ENTRY_AREA_entry_stack + \
+                       SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
   
   ENTRY(entry_SYSCALL_64_trampoline)
         UNWIND_HINT_EMPTY
@@@ -210,6 -206,8 +210,6 @@@ ENTRY(entry_SYSCALL_64
         movq    %rsp, PER_CPU_VAR(rsp_scratch)
         movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
   
- -      TRACE_IRQS_OFF
- -
         /* Construct struct pt_regs on stack */
         pushq   $__USER_DS                      /* pt_regs->ss */
         pushq   PER_CPU_VAR(rsp_scratch)        /* pt_regs->sp */
@@@ -230,8 -228,6 +230,8 @@@ GLOBAL(entry_SYSCALL_64_after_hwframe
         sub     $(6*8), %rsp                    /* pt_regs->bp, bx, r12-15 not saved */
         UNWIND_HINT_REGS extra=0
   
+ +      TRACE_IRQS_OFF
+ +
         /*
          * If we need to do entry work or if we guess we'll need to do
          * exit work, go straight to the slow path.
@@@ -1078,13 -1074,11 +1078,13 @@@ ENTRY(native_load_gs_index
         FRAME_BEGIN
         pushfq
         DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
+ +      TRACE_IRQS_OFF
         SWAPGS
   .Lgs_change:
         movl    %edi, %gs
   2:    ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
         SWAPGS
+ +      TRACE_IRQS_FLAGS (%rsp)
         popfq
         FRAME_END
         ret
diff --combined arch/x86/include/asm/desc.h

index aab4fe9f49f868a03a5c2da5eeb788a6bb80c24d,bc359dd2f7f646a379840a758c20032a86273c4a..ec8be07c0cda5c9b240d351ca583409713c58406
--- 1/arch/x86/include/asm/desc.h
--- 2/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@@ -7,6 -7,7 +7,7 @@@
   #include <asm/mmu.h>
   #include <asm/fixmap.h>
   #include <asm/irq_vectors.h>
+ #include <asm/cpu_entry_area.h>
   
   #include <linux/smp.h>
   #include <linux/percpu.h>
@@@ -386,7 -387,7 +387,7 @@@ static inline void set_desc_limit(struc
   void update_intr_gate(unsigned int n, const void *addr);
   void alloc_intr_gate(unsigned int n, const void *addr);
   
- -extern unsigned long used_vectors[];
+ +extern unsigned long system_vectors[];
   
   #ifdef CONFIG_X86_64
   DECLARE_PER_CPU(u32, debug_idt_ctr);
diff --combined arch/x86/include/asm/processor.h

index 1f2434ee9f806c4355a38599ab4485140a8cd1df,9e482d8b0b9786d98d30db35c7e7341b526efd9f..cad8dab266bceefcd91a830371716d48679c7cc7
--- 1/arch/x86/include/asm/processor.h
--- 2/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@@ -132,7 -132,6 +132,7 @@@ struct cpuinfo_x86 
         /* Index into per_cpu list: */
         u16                     cpu_index;
         u32                     microcode;
+ +      unsigned                initialized : 1;
   } __randomize_layout;
   
   struct cpuid_regs {
@@@ -337,12 -336,12 +337,12 @@@ struct x86_hw_tss 
   #define IO_BITMAP_OFFSET              (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
   #define INVALID_IO_BITMAP_OFFSET      0x8000
   
- struct SYSENTER_stack {
+ struct entry_stack {
         unsigned long           words[64];
   };
   
- struct SYSENTER_stack_page {
-       struct SYSENTER_stack stack;
+ struct entry_stack_page {
+       struct entry_stack stack;
   } __aligned(PAGE_SIZE);
   
   struct tss_struct {
diff --combined arch/x86/include/asm/tlbflush.h

index 877b5c1a1b1247116e20e7272dbade77e1874fc4,171b429f43a266fc7851e8ccbf3bd49c08ada417..e1884cf35257b8133ca97f50d146ae3ebfcaa30f
--- 1/arch/x86/include/asm/tlbflush.h
--- 2/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@@ -9,70 -9,66 +9,66 @@@
   #include <asm/cpufeature.h>
   #include <asm/special_insns.h>
   #include <asm/smp.h>
+ #include <asm/invpcid.h>
   
- static inline void __invpcid(unsigned long pcid, unsigned long addr,
-                            unsigned long type)
+ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
   {
-       struct { u64 d[2]; } desc = { { pcid, addr } };
- 
         /*
-        * The memory clobber is because the whole point is to invalidate
-        * stale TLB entries and, especially if we're flushing global
-        * mappings, we don't want the compiler to reorder any subsequent
-        * memory accesses before the TLB flush.
-        *
-        * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
-        * invpcid (%rcx), %rax in long mode.
+        * Bump the generation count.  This also serves as a full barrier
+        * that synchronizes with switch_mm(): callers are required to order
+        * their read of mm_cpumask after their writes to the paging
+        * structures.
          */
-       asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
-                     : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+       return atomic64_inc_return(&mm->context.tlb_gen);
   }
   
- #define INVPCID_TYPE_INDIV_ADDR               0
- #define INVPCID_TYPE_SINGLE_CTXT      1
- #define INVPCID_TYPE_ALL_INCL_GLOBAL  2
- #define INVPCID_TYPE_ALL_NON_GLOBAL   3
+ /* There are 12 bits of space for ASIDS in CR3 */
+ #define CR3_HW_ASID_BITS              12
+ /*
+  * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
+  * user/kernel switches
+  */
+ #define PTI_CONSUMED_ASID_BITS                0
   
- /* Flush all mappings for a given pcid and addr, not including globals. */
- static inline void invpcid_flush_one(unsigned long pcid,
-                                    unsigned long addr)
- {
-       __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
- }
+ #define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS)
+ /*
+  * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid.  -1 below to account
+  * for them being zero-based.  Another -1 is because ASID 0 is reserved for
+  * use by non-PCID-aware users.
+  */
+ #define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
   
- /* Flush all mappings for a given PCID, not including globals. */
- static inline void invpcid_flush_single_context(unsigned long pcid)
+ static inline u16 kern_pcid(u16 asid)
   {
-       __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+       VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+       /*
+        * If PCID is on, ASID-aware code paths put the ASID+1 into the
+        * PCID bits.  This serves two purposes.  It prevents a nasty
+        * situation in which PCID-unaware code saves CR3, loads some other
+        * value (with PCID == 0), and then restores CR3, thus corrupting
+        * the TLB for ASID 0 if the saved ASID was nonzero.  It also means
+        * that any bugs involving loading a PCID-enabled CR3 with
+        * CR4.PCIDE off will trigger deterministically.
+        */
+       return asid + 1;
   }
   
- /* Flush all mappings, including globals, for all PCIDs. */
- static inline void invpcid_flush_all(void)
+ struct pgd_t;
+ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
   {
-       __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+       if (static_cpu_has(X86_FEATURE_PCID)) {
+               return __sme_pa(pgd) | kern_pcid(asid);
+       } else {
+               VM_WARN_ON_ONCE(asid != 0);
+               return __sme_pa(pgd);
+       }
   }
   
- /* Flush all mappings for all PCIDs except globals. */
- static inline void invpcid_flush_all_nonglobals(void)
+ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
   {
-       __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
- }
- 
- static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
- {
-       u64 new_tlb_gen;
- 
-       /*
-        * Bump the generation count.  This also serves as a full barrier
-        * that synchronizes with switch_mm(): callers are required to order
-        * their read of mm_cpumask after their writes to the paging
-        * structures.
-        */
-       smp_mb__before_atomic();
-       new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
-       smp_mb__after_atomic();
- 
-       return new_tlb_gen;
+       VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+       VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
+       return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
   }
   
   #ifdef CONFIG_PARAVIRT
@@@ -173,43 -169,40 +169,43 @@@ static inline void cr4_init_shadow(void
         this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
   }
   
+ +static inline void __cr4_set(unsigned long cr4)
+ +{
+ +      lockdep_assert_irqs_disabled();
+ +      this_cpu_write(cpu_tlbstate.cr4, cr4);
+ +      __write_cr4(cr4);
+ +}
+ +
   /* Set in this cpu's CR4. */
   static inline void cr4_set_bits(unsigned long mask)
   {
- -      unsigned long cr4;
+ +      unsigned long cr4, flags;
   
+ +      local_irq_save(flags);
         cr4 = this_cpu_read(cpu_tlbstate.cr4);
- -      if ((cr4 | mask) != cr4) {
- -              cr4 |= mask;
- -              this_cpu_write(cpu_tlbstate.cr4, cr4);
- -              __write_cr4(cr4);
- -      }
+ +      if ((cr4 | mask) != cr4)
+ +              __cr4_set(cr4 | mask);
+ +      local_irq_restore(flags);
   }
   
   /* Clear in this cpu's CR4. */
   static inline void cr4_clear_bits(unsigned long mask)
   {
- -      unsigned long cr4;
+ +      unsigned long cr4, flags;
   
+ +      local_irq_save(flags);
         cr4 = this_cpu_read(cpu_tlbstate.cr4);
- -      if ((cr4 & ~mask) != cr4) {
- -              cr4 &= ~mask;
- -              this_cpu_write(cpu_tlbstate.cr4, cr4);
- -              __write_cr4(cr4);
- -      }
+ +      if ((cr4 & ~mask) != cr4)
+ +              __cr4_set(cr4 & ~mask);
+ +      local_irq_restore(flags);
   }
   
- -static inline void cr4_toggle_bits(unsigned long mask)
+ +static inline void cr4_toggle_bits_irqsoff(unsigned long mask)
   {
         unsigned long cr4;
   
         cr4 = this_cpu_read(cpu_tlbstate.cr4);
- -      cr4 ^= mask;
- -      this_cpu_write(cpu_tlbstate.cr4, cr4);
- -      __write_cr4(cr4);
+ +      __cr4_set(cr4 ^ mask);
   }
   
   /* Read the CR4 shadow. */
@@@ -237,6 -230,9 +233,9 @@@ static inline void cr4_set_bits_and_upd
   
   extern void initialize_tlbstate_and_flush(void);
   
+ /*
+  * flush the entire current user mapping
+  */
   static inline void __native_flush_tlb(void)
   {
         /*
@@@ -249,20 -245,12 +248,12 @@@
         preempt_enable();
   }
   
- static inline void __native_flush_tlb_global_irq_disabled(void)
- {
-       unsigned long cr4;
- 
-       cr4 = this_cpu_read(cpu_tlbstate.cr4);
-       /* clear PGE */
-       native_write_cr4(cr4 & ~X86_CR4_PGE);
-       /* write old PGE again and flush TLBs */
-       native_write_cr4(cr4);
- }
- 
+ /*
+  * flush everything
+  */
   static inline void __native_flush_tlb_global(void)
   {
-       unsigned long flags;
+       unsigned long cr4, flags;
   
         if (static_cpu_has(X86_FEATURE_INVPCID)) {
                 /*
@@@ -280,22 -268,36 +271,36 @@@
          */
         raw_local_irq_save(flags);
   
-       __native_flush_tlb_global_irq_disabled();
+       cr4 = this_cpu_read(cpu_tlbstate.cr4);
+       /* toggle PGE */
+       native_write_cr4(cr4 ^ X86_CR4_PGE);
+       /* write old PGE again and flush TLBs */
+       native_write_cr4(cr4);
   
         raw_local_irq_restore(flags);
   }
   
+ /*
+  * flush one page in the user mapping
+  */
   static inline void __native_flush_tlb_single(unsigned long addr)
   {
         asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
   }
   
+ /*
+  * flush everything
+  */
   static inline void __flush_tlb_all(void)
   {
-       if (boot_cpu_has(X86_FEATURE_PGE))
+       if (boot_cpu_has(X86_FEATURE_PGE)) {
                 __flush_tlb_global();
-       else
+       } else {
+               /*
+                * !PGE -> !PCID (setup_pcid()), thus every flush is total.
+                */
                 __flush_tlb();
+       }
   
         /*
          * Note: if we somehow had PCID but not PGE, then this wouldn't work --
@@@ -306,6 -308,9 +311,9 @@@
          */
   }
   
+ /*
+  * flush one page in the kernel mapping
+  */
   static inline void __flush_tlb_one(unsigned long addr)
   {
         count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
diff --combined arch/x86/kernel/cpu/common.c

index 7416da3ec4dfa0b0f275dd10a5f9bfa12b884022,8ddcfa4d4165bb92717137da51f174a50365456e..c9757f07d738af73ce3bd14c51780c71a512395f
--- 1/arch/x86/kernel/cpu/common.c
--- 2/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@@ -329,30 -329,6 +329,30 @@@ static __always_inline void setup_smap(
         }
   }
   
+ +static __always_inline void setup_umip(struct cpuinfo_x86 *c)
+ +{
+ +      /* Check the boot processor, plus build option for UMIP. */
+ +      if (!cpu_feature_enabled(X86_FEATURE_UMIP))
+ +              goto out;
+ +
+ +      /* Check the current processor's cpuid bits. */
+ +      if (!cpu_has(c, X86_FEATURE_UMIP))
+ +              goto out;
+ +
+ +      cr4_set_bits(X86_CR4_UMIP);
+ +
+ +      pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n");
+ +
+ +      return;
+ +
+ +out:
+ +      /*
+ +       * Make sure UMIP is disabled in case it was enabled in a
+ +       * previous boot (e.g., via kexec).
+ +       */
+ +      cr4_clear_bits(X86_CR4_UMIP);
+ +}
+ +
   /*
    * Protection Keys are not available in 32-bit mode.
    */
@@@ -506,102 -482,8 +506,8 @@@ static const unsigned int exception_sta
           [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
           [DEBUG_STACK - 1]                     = DEBUG_STKSZ
   };
- 
- static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
- #endif
- 
- static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page,
-                                  SYSENTER_stack_storage);
- 
- static void __init
- set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
- {
-       for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
-               __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
- }
- 
- /* Setup the fixmap mappings only once per-processor */
- static void __init setup_cpu_entry_area(int cpu)
- {
- #ifdef CONFIG_X86_64
-       extern char _entry_trampoline[];
- 
-       /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
-       pgprot_t gdt_prot = PAGE_KERNEL_RO;
-       pgprot_t tss_prot = PAGE_KERNEL_RO;
- #else
-       /*
-        * On native 32-bit systems, the GDT cannot be read-only because
-        * our double fault handler uses a task gate, and entering through
-        * a task gate needs to change an available TSS to busy.  If the
-        * GDT is read-only, that will triple fault.  The TSS cannot be
-        * read-only because the CPU writes to it on task switches.
-        *
-        * On Xen PV, the GDT must be read-only because the hypervisor
-        * requires it.
-        */
-       pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
-               PAGE_KERNEL_RO : PAGE_KERNEL;
-       pgprot_t tss_prot = PAGE_KERNEL;
- #endif
- 
-       __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
-       set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page),
-                               per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1,
-                               PAGE_KERNEL);
- 
-       /*
-        * The Intel SDM says (Volume 3, 7.2.1):
-        *
-        *  Avoid placing a page boundary in the part of the TSS that the
-        *  processor reads during a task switch (the first 104 bytes). The
-        *  processor may not correctly perform address translations if a
-        *  boundary occurs in this area. During a task switch, the processor
-        *  reads and writes into the first 104 bytes of each TSS (using
-        *  contiguous physical addresses beginning with the physical address
-        *  of the first byte of the TSS). So, after TSS access begins, if
-        *  part of the 104 bytes is not physically contiguous, the processor
-        *  will access incorrect information without generating a page-fault
-        *  exception.
-        *
-        * There are also a lot of errata involving the TSS spanning a page
-        * boundary.  Assert that we're not doing that.
-        */
-       BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
-                     offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
-       BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
-       set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
-                               &per_cpu(cpu_tss_rw, cpu),
-                               sizeof(struct tss_struct) / PAGE_SIZE,
-                               tss_prot);
- 
- #ifdef CONFIG_X86_32
-       per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
   #endif
   
- #ifdef CONFIG_X86_64
-       BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
-       BUILD_BUG_ON(sizeof(exception_stacks) !=
-                    sizeof(((struct cpu_entry_area *)0)->exception_stacks));
-       set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
-                               &per_cpu(exception_stacks, cpu),
-                               sizeof(exception_stacks) / PAGE_SIZE,
-                               PAGE_KERNEL);
- 
-       __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
-                    __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
- #endif
- }
- 
- void __init setup_cpu_entry_areas(void)
- {
-       unsigned int cpu;
- 
-       for_each_possible_cpu(cpu)
-               setup_cpu_entry_area(cpu);
- }
- 
   /* Load the original GDT from the per-cpu structure */
   void load_direct_gdt(int cpu)
   {
@@@ -976,8 -858,8 +882,8 @@@ static void identify_cpu_without_cpuid(
    * cache alignment.
    * The others are not touched to avoid unwanted side effects.
    *
- - * WARNING: this function is only called on the BP.  Don't add code here
- - * that is supposed to run on all CPUs.
+ + * WARNING: this function is only called on the boot CPU.  Don't add code
+ + * here that is supposed to run on all CPUs.
    */
   static void __init early_identify_cpu(struct cpuinfo_x86 *c)
   {
@@@ -1260,10 -1142,9 +1166,10 @@@ static void identify_cpu(struct cpuinfo
         /* Disable the PN if appropriate */
         squash_the_stupid_serial_number(c);
   
- -      /* Set up SMEP/SMAP */
+ +      /* Set up SMEP/SMAP/UMIP */
         setup_smep(c);
         setup_smap(c);
+ +      setup_umip(c);
   
         /*
          * The vendor-specific functions might have changed features.
@@@ -1348,7 -1229,7 +1254,7 @@@ void enable_sep_cpu(void
   
         tss->x86_tss.ss1 = __KERNEL_CS;
         wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
-       wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0);
+       wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
         wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
   
         put_cpu();
@@@ -1465,7 -1346,7 +1371,7 @@@ void syscall_init(void
          * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
          */
         wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-       wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
+       wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
         wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
   #else
         wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
@@@ -1680,7 -1561,7 +1586,7 @@@ void cpu_init(void
          */
         set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
         load_TR_desc();
-       load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
+       load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
   
         load_mm_ldt(&init_mm);
   
diff --combined arch/x86/kernel/smpboot.c

index 35cb20994e32d2bf05f0b1510ccc26cc7e7590a5,33d6000265aa75e6f4d41ed55d4a9ac13a1b6ba2..c5970efa85570ab324bd1cad2e57d464dba86f46
--- 1/arch/x86/kernel/smpboot.c
--- 2/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@@ -77,7 -77,6 +77,7 @@@
   #include <asm/i8259.h>
   #include <asm/realmode.h>
   #include <asm/misc.h>
+ +#include <asm/qspinlock.h>
   
   /* Number of siblings per CPU package */
   int smp_num_siblings = 1;
@@@ -101,12 -100,15 +101,12 @@@ DEFINE_PER_CPU_READ_MOSTLY(struct cpuin
   EXPORT_PER_CPU_SYMBOL(cpu_info);
   
   /* Logical package management. We might want to allocate that dynamically */
- -static int *physical_to_logical_pkg __read_mostly;
- -static unsigned long *physical_package_map __read_mostly;;
- -static unsigned int max_physical_pkg_id __read_mostly;
   unsigned int __max_logical_packages __read_mostly;
   EXPORT_SYMBOL(__max_logical_packages);
   static unsigned int logical_packages __read_mostly;
   
   /* Maximum number of SMT threads on any online core */
- -int __max_smt_threads __read_mostly;
+ +int __read_mostly __max_smt_threads = 1;
   
   /* Flag to indicate if a complete sched domain rebuild is required */
   bool x86_topology_update;
@@@ -237,7 -239,7 +237,7 @@@ static void notrace start_secondary(voi
         load_cr3(swapper_pg_dir);
         __flush_tlb_all();
   #endif
- -
+ +      load_current_idt();
         cpu_init();
         x86_cpuinit.early_percpu_clock_init();
         preempt_disable();
@@@ -248,19 -250,19 +248,19 @@@
         /* otherwise gcc will move up smp_processor_id before the cpu_init */
         barrier();
         /*
- -       * Check TSC synchronization with the BP:
+ +       * Check TSC synchronization with the boot CPU:
          */
         check_tsc_sync_target();
   
         /*
- -       * Lock vector_lock and initialize the vectors on this cpu
- -       * before setting the cpu online. We must set it online with
- -       * vector_lock held to prevent a concurrent setup/teardown
- -       * from seeing a half valid vector space.
+ +       * Lock vector_lock, set CPU online and bring the vector
+ +       * allocator online. Online must be set with vector_lock held
+ +       * to prevent a concurrent irq setup/teardown from seeing a
+ +       * half valid vector space.
          */
         lock_vector_lock();
- -      setup_vector_irq(smp_processor_id());
         set_cpu_online(smp_processor_id(), true);
+ +      lapic_online();
         unlock_vector_lock();
         cpu_set_state_online(smp_processor_id());
         x86_platform.nmi_init();
@@@ -277,25 -279,6 +277,25 @@@
         cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
   }
   
+ +/**
+ + * topology_phys_to_logical_pkg - Map a physical package id to a logical
+ + *
+ + * Returns logical package id or -1 if not found
+ + */
+ +int topology_phys_to_logical_pkg(unsigned int phys_pkg)
+ +{
+ +      int cpu;
+ +
+ +      for_each_possible_cpu(cpu) {
+ +              struct cpuinfo_x86 *c = &cpu_data(cpu);
+ +
+ +              if (c->initialized && c->phys_proc_id == phys_pkg)
+ +                      return c->logical_proc_id;
+ +      }
+ +      return -1;
+ +}
+ +EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+ +
   /**
    * topology_update_package_map - Update the physical to logical package map
    * @pkg:      The physical package id as retrieved via CPUID
@@@ -303,23 -286,102 +303,23 @@@
    */
   int topology_update_package_map(unsigned int pkg, unsigned int cpu)
   {
- -      unsigned int new;
+ +      int new;
   
- -      /* Called from early boot ? */
- -      if (!physical_package_map)
- -              return 0;
- -
- -      if (pkg >= max_physical_pkg_id)
- -              return -EINVAL;
- -
- -      /* Set the logical package id */
- -      if (test_and_set_bit(pkg, physical_package_map))
+ +      /* Already available somewhere? */
+ +      new = topology_phys_to_logical_pkg(pkg);
+ +      if (new >= 0)
                 goto found;
   
- -      if (logical_packages >= __max_logical_packages) {
- -              pr_warn("Package %u of CPU %u exceeds BIOS package data %u.\n",
- -                      logical_packages, cpu, __max_logical_packages);
- -              return -ENOSPC;
- -      }
- -
         new = logical_packages++;
         if (new != pkg) {
                 pr_info("CPU %u Converting physical %u to logical package %u\n",
                         cpu, pkg, new);
         }
- -      physical_to_logical_pkg[pkg] = new;
- -
   found:
- -      cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg];
+ +      cpu_data(cpu).logical_proc_id = new;
         return 0;
   }
   
- -/**
- - * topology_phys_to_logical_pkg - Map a physical package id to a logical
- - *
- - * Returns logical package id or -1 if not found
- - */
- -int topology_phys_to_logical_pkg(unsigned int phys_pkg)
- -{
- -      if (phys_pkg >= max_physical_pkg_id)
- -              return -1;
- -      return physical_to_logical_pkg[phys_pkg];
- -}
- -EXPORT_SYMBOL(topology_phys_to_logical_pkg);
- -
- -static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu)
- -{
- -      unsigned int ncpus;
- -      size_t size;
- -
- -      /*
- -       * Today neither Intel nor AMD support heterogenous systems. That
- -       * might change in the future....
- -       *
- -       * While ideally we'd want '* smp_num_siblings' in the below @ncpus
- -       * computation, this won't actually work since some Intel BIOSes
- -       * report inconsistent HT data when they disable HT.
- -       *
- -       * In particular, they reduce the APIC-IDs to only include the cores,
- -       * but leave the CPUID topology to say there are (2) siblings.
- -       * This means we don't know how many threads there will be until
- -       * after the APIC enumeration.
- -       *
- -       * By not including this we'll sometimes over-estimate the number of
- -       * logical packages by the amount of !present siblings, but this is
- -       * still better than MAX_LOCAL_APIC.
- -       *
- -       * We use total_cpus not nr_cpu_ids because nr_cpu_ids can be limited
- -       * on the command line leading to a similar issue as the HT disable
- -       * problem because the hyperthreads are usually enumerated after the
- -       * primary cores.
- -       */
- -      ncpus = boot_cpu_data.x86_max_cores;
- -      if (!ncpus) {
- -              pr_warn("x86_max_cores == zero !?!?");
- -              ncpus = 1;
- -      }
- -
- -      __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
- -      logical_packages = 0;
- -
- -      /*
- -       * Possibly larger than what we need as the number of apic ids per
- -       * package can be smaller than the actual used apic ids.
- -       */
- -      max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus);
- -      size = max_physical_pkg_id * sizeof(unsigned int);
- -      physical_to_logical_pkg = kmalloc(size, GFP_KERNEL);
- -      memset(physical_to_logical_pkg, 0xff, size);
- -      size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
- -      physical_package_map = kzalloc(size, GFP_KERNEL);
- -
- -      pr_info("Max logical packages: %u\n", __max_logical_packages);
- -
- -      topology_update_package_map(c->phys_proc_id, cpu);
- -}
- -
   void __init smp_store_boot_cpu_info(void)
   {
         int id = 0; /* CPU 0 */
@@@ -327,8 -389,7 +327,8 @@@
   
         *c = boot_cpu_data;
         c->cpu_index = id;
- -      smp_init_package_map(c, id);
+ +      topology_update_package_map(c->phys_proc_id, id);
+ +      c->initialized = true;
   }
   
   /*
@@@ -339,16 -400,13 +339,16 @@@ void smp_store_cpu_info(int id
   {
         struct cpuinfo_x86 *c = &cpu_data(id);
   
- -      *c = boot_cpu_data;
+ +      /* Copy boot_cpu_data only on the first bringup */
+ +      if (!c->initialized)
+ +              *c = boot_cpu_data;
         c->cpu_index = id;
         /*
          * During boot time, CPU0 has this setup already. Save the info when
          * bringing up AP or offlined CPU0.
          */
         identify_secondary_cpu(c);
+ +      c->initialized = true;
   }
   
   static bool
@@@ -932,12 -990,8 +932,8 @@@ static int do_boot_cpu(int apicid, int 
         initial_code = (unsigned long)start_secondary;
         initial_stack  = idle->thread.sp;
   
-       /*
-        * Enable the espfix hack for this CPU
-       */
- #ifdef CONFIG_X86_ESPFIX64
+       /* Enable the espfix hack for this CPU */
         init_espfix_ap(cpu);
- #endif
   
         /* So we see what's up */
         announce_cpu(cpu, apicid);
@@@ -1036,7 -1090,7 +1032,7 @@@ int native_cpu_up(unsigned int cpu, str
         unsigned long flags;
         int err, ret = 0;
   
- -      WARN_ON(irqs_disabled());
+ +      lockdep_assert_irqs_enabled();
   
         pr_debug("++++++++++++++++++++=_---CPU UP  %u\n", cpu);
   
@@@ -1132,10 -1186,17 +1128,10 @@@ static __init void disable_smp(void
         cpumask_set_cpu(0, topology_core_cpumask(0));
   }
   
- -enum {
- -      SMP_OK,
- -      SMP_NO_CONFIG,
- -      SMP_NO_APIC,
- -      SMP_FORCE_UP,
- -};
- -
   /*
    * Various sanity checks.
    */
- -static int __init smp_sanity_check(unsigned max_cpus)
+ +static void __init smp_sanity_check(void)
   {
         preempt_disable();
   
@@@ -1172,6 -1233,16 +1168,6 @@@
                 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
         }
   
- -      /*
- -       * If we couldn't find an SMP configuration at boot time,
- -       * get out of here now!
- -       */
- -      if (!smp_found_config && !acpi_lapic) {
- -              preempt_enable();
- -              pr_notice("SMP motherboard not detected\n");
- -              return SMP_NO_CONFIG;
- -      }
- -
         /*
          * Should not be necessary because the MP table should list the boot
          * CPU too, but we do it for the sake of robustness anyway.
@@@ -1182,6 -1253,29 +1178,6 @@@
                 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
         }
         preempt_enable();
- -
- -      /*
- -       * If we couldn't find a local APIC, then get out of here now!
- -       */
- -      if (APIC_INTEGRATED(boot_cpu_apic_version) &&
- -          !boot_cpu_has(X86_FEATURE_APIC)) {
- -              if (!disable_apic) {
- -                      pr_err("BIOS bug, local APIC #%d not detected!...\n",
- -                              boot_cpu_physical_apicid);
- -                      pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n");
- -              }
- -              return SMP_NO_APIC;
- -      }
- -
- -      /*
- -       * If SMP should be disabled, then really disable it!
- -       */
- -      if (!max_cpus) {
- -              pr_info("SMP mode deactivated\n");
- -              return SMP_FORCE_UP;
- -      }
- -
- -      return SMP_OK;
   }
   
   static void __init smp_cpu_index_default(void)
@@@ -1196,18 -1290,9 +1192,18 @@@
         }
   }
   
+ +static void __init smp_get_logical_apicid(void)
+ +{
+ +      if (x2apic_mode)
+ +              cpu0_logical_apicid = apic_read(APIC_LDR);
+ +      else
+ +              cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
+ +}
+ +
   /*
- - * Prepare for SMP bootup.  The MP table or ACPI has been read
- - * earlier.  Just do some sanity checking here and enable APIC mode.
+ + * Prepare for SMP bootup.
+ + * @max_cpus: configured maximum number of CPUs, It is a legacy parameter
+ + *            for common interface support.
    */
   void __init native_smp_prepare_cpus(unsigned int max_cpus)
   {
@@@ -1239,33 -1324,35 +1235,33 @@@
   
         set_cpu_sibling_map(0);
   
- -      switch (smp_sanity_check(max_cpus)) {
- -      case SMP_NO_CONFIG:
- -              disable_smp();
- -              if (APIC_init_uniprocessor())
- -                      pr_notice("Local APIC not detected. Using dummy APIC emulation.\n");
- -              return;
- -      case SMP_NO_APIC:
+ +      smp_sanity_check();
+ +
+ +      switch (apic_intr_mode) {
+ +      case APIC_PIC:
+ +      case APIC_VIRTUAL_WIRE_NO_CONFIG:
                 disable_smp();
                 return;
- -      case SMP_FORCE_UP:
+ +      case APIC_SYMMETRIC_IO_NO_ROUTING:
                 disable_smp();
- -              apic_bsp_setup(false);
+ +              /* Setup local timer */
+ +              x86_init.timers.setup_percpu_clockev();
                 return;
- -      case SMP_OK:
+ +      case APIC_VIRTUAL_WIRE:
+ +      case APIC_SYMMETRIC_IO:
                 break;
         }
   
- -      if (read_apic_id() != boot_cpu_physical_apicid) {
- -              panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
- -                   read_apic_id(), boot_cpu_physical_apicid);
- -              /* Or can we switch back to PIC here? */
- -      }
+ +      /* Setup local timer */
+ +      x86_init.timers.setup_percpu_clockev();
   
- -      default_setup_apic_routing();
- -      cpu0_logical_apicid = apic_bsp_setup(false);
+ +      smp_get_logical_apicid();
   
         pr_info("CPU0: ");
         print_cpu_info(&cpu_data(0));
   
+ +      native_pv_lock_init();
+ +
         uv_system_init();
   
         set_mtrr_aps_delayed_init();
@@@ -1297,22 -1384,14 +1293,22 @@@ void __init native_smp_prepare_boot_cpu
   
   void __init native_smp_cpus_done(unsigned int max_cpus)
   {
+ +      int ncpus;
+ +
         pr_debug("Boot done\n");
+ +      /*
+ +       * Today neither Intel nor AMD support heterogenous systems so
+ +       * extrapolate the boot cpu's data to all packages.
+ +       */
+ +      ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
+ +      __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
+ +      pr_info("Max logical packages: %u\n", __max_logical_packages);
   
         if (x86_has_numa_in_package)
                 set_sched_topology(x86_numa_in_package_topology);
   
         nmi_selftest();
         impress_friends();
- -      setup_ioapic_dest();
         mtrr_aps_init();
   }
   
@@@ -1471,14 -1550,13 +1467,14 @@@ void cpu_disable_common(void
         remove_cpu_from_maps(cpu);
         unlock_vector_lock();
         fixup_irqs();
+ +      lapic_offline();
   }
   
   int native_cpu_disable(void)
   {
         int ret;
   
- -      ret = check_irq_vectors_for_cpu_disable();
+ +      ret = lapic_can_unplug_cpu();
         if (ret)
                 return ret;
   
diff --combined arch/x86/kernel/traps.c

index e98f8b66a460b98b31d262cff23fa063be33ac5a,7c16fe0b60c247ff9ac4bfdefc5820d085c6163a..f69dbd47d7332f4af7e5f274bb6aa9736f3014bd
--- 1/arch/x86/kernel/traps.c
--- 2/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@@ -42,6 -42,7 +42,6 @@@
   #include <linux/edac.h>
   #endif
   
- -#include <asm/kmemcheck.h>
   #include <asm/stacktrace.h>
   #include <asm/processor.h>
   #include <asm/debugreg.h>
@@@ -51,6 -52,7 +51,7 @@@
   #include <asm/traps.h>
   #include <asm/desc.h>
   #include <asm/fpu/internal.h>
+ #include <asm/cpu_entry_area.h>
   #include <asm/mce.h>
   #include <asm/fixmap.h>
   #include <asm/mach_traps.h>
@@@ -59,7 -61,6 +60,7 @@@
   #include <asm/trace/mpx.h>
   #include <asm/mpx.h>
   #include <asm/vm86.h>
+ +#include <asm/umip.h>
   
   #ifdef CONFIG_X86_64
   #include <asm/x86_init.h>
@@@ -71,7 -72,7 +72,7 @@@
   #include <asm/proto.h>
   #endif
   
- -DECLARE_BITMAP(used_vectors, NR_VECTORS);
+ +DECLARE_BITMAP(system_vectors, NR_VECTORS);
   
   static inline void cond_local_irq_enable(struct pt_regs *regs)
   {
@@@ -536,11 -537,6 +537,11 @@@ do_general_protection(struct pt_regs *r
         RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
         cond_local_irq_enable(regs);
   
+ +      if (static_cpu_has(X86_FEATURE_UMIP)) {
+ +              if (user_mode(regs) && fixup_umip_exception(regs))
+ +                      return;
+ +      }
+ +
         if (v8086_mode(regs)) {
                 local_irq_enable();
                 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
@@@ -768,6 -764,10 +769,6 @@@ dotraplinkage void do_debug(struct pt_r
         if (!dr6 && user_mode(regs))
                 user_icebp = 1;
   
- -      /* Catch kmemcheck conditions! */
- -      if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
- -              goto exit;
- -
         /* Store the virtualized DR6 value */
         tsk->thread.debugreg6 = dr6;
   
@@@ -951,8 -951,9 +952,9 @@@ void __init trap_init(void
          * "sidt" instruction will not leak the location of the kernel, and
          * to defend the IDT against arbitrary memory write vulnerabilities.
          * It will be reloaded in cpu_init() */
-       __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
-       idt_descr.address = fix_to_virt(FIX_RO_IDT);
+       cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
+                   PAGE_KERNEL_RO);
+       idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
   
         /*
          * Should be a barrier for any external CPU state:
diff --combined arch/x86/mm/Makefile

index 8e13b8cc6bedb0dc84eea64cd80ca6ae39037eaa,2e0017af8f9b068e4a9f6d49103af859b02064fd..52195ee3f6d50ebd2005aa040b1cf0023edd6b33
--- 1/arch/x86/mm/Makefile
--- 2/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@@ -10,7 -10,7 +10,7 @@@ CFLAGS_REMOVE_mem_encrypt.o   = -p
   endif
   
   obj-y :=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
-           pat.o pgtable.o physaddr.o setup_nx.o tlb.o
+           pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
   
   # Make sure __phys_addr has no stackprotector
   nostackp := $(call cc-option, -fno-stack-protector)
@@@ -29,6 -29,8 +29,6 @@@ obj-$(CONFIG_X86_PTDUMP)      += debug_paget
   
   obj-$(CONFIG_HIGHMEM)         += highmem_32.o
   
- -obj-$(CONFIG_KMEMCHECK)               += kmemcheck/
- -
   KASAN_SANITIZE_kasan_init_$(BITS).o := n
   obj-$(CONFIG_KASAN)           += kasan_init_$(BITS).o
   
diff --combined arch/x86/xen/mmu_pv.c

index 69145ea5532c306ab4cd52fb9bd428a0435eaa47,a0e2b8c6e5c73c5332597cbed8c6dd298a250cb3..4d62c071b166f65c848a12ca07bfe44ca20e198a
--- 1/arch/x86/xen/mmu_pv.c
--- 2/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@@ -315,7 -315,7 +315,7 @@@ void xen_ptep_modify_prot_commit(struc
   static pteval_t pte_mfn_to_pfn(pteval_t val)
   {
         if (val & _PAGE_PRESENT) {
- -              unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
+ +              unsigned long mfn = (val & XEN_PTE_MFN_MASK) >> PAGE_SHIFT;
                 unsigned long pfn = mfn_to_pfn(mfn);
   
                 pteval_t flags = val & PTE_FLAGS_MASK;
@@@ -1721,7 -1721,7 +1721,7 @@@ static unsigned long __init m2p(phys_ad
   {
         phys_addr_t paddr;
   
- -      maddr &= PTE_PFN_MASK;
+ +      maddr &= XEN_PTE_MFN_MASK;
         paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
   
         return paddr;
@@@ -1902,18 -1902,6 +1902,18 @@@ void __init xen_setup_kernel_pagetable(
         /* Graft it onto L4[511][510] */
         copy_page(level2_kernel_pgt, l2);
   
+ +      /*
+ +       * Zap execute permission from the ident map. Due to the sharing of
+ +       * L1 entries we need to do this in the L2.
+ +       */
+ +      if (__supported_pte_mask & _PAGE_NX) {
+ +              for (i = 0; i < PTRS_PER_PMD; ++i) {
+ +                      if (pmd_none(level2_ident_pgt[i]))
+ +                              continue;
+ +                      level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX);
+ +              }
+ +      }
+ +
         /* Copy the initial P->M table mappings if necessary. */
         i = pgd_index(xen_start_info->mfn_list);
         if (i && i < pgd_index(__START_KERNEL_map))
@@@ -2273,7 -2261,6 +2273,6 @@@ static void xen_set_fixmap(unsigned idx
   
         switch (idx) {
         case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
-       case FIX_RO_IDT:
   #ifdef CONFIG_X86_32
         case FIX_WP_TEST:
   # ifdef CONFIG_HIGHMEM
@@@ -2284,7 -2271,6 +2283,6 @@@
   #endif
         case FIX_TEXT_POKE0:
         case FIX_TEXT_POKE1:
-       case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM:
                 /* All local page mappings */
                 pte = pfn_pte(phys, prot);
                 break;
diff --combined include/asm-generic/pgtable.h

index b234d54f2cb6e4c23a21db2af3b225264eccae2a,231b35a76dd9b6e85b75ec47857dd6a0f0ec4559..868e68561f913ecaec80ddf05f02816767ea5a17
--- 1/include/asm-generic/pgtable.h
--- 2/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@@ -805,23 -805,15 +805,23 @@@ static inline int pmd_trans_huge(pmd_t 
   {
         return 0;
   }
- -#ifndef __HAVE_ARCH_PMD_WRITE
+ +#ifndef pmd_write
   static inline int pmd_write(pmd_t pmd)
   {
         BUG();
         return 0;
   }
- -#endif /* __HAVE_ARCH_PMD_WRITE */
+ +#endif /* pmd_write */
   #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
   
+ +#ifndef pud_write
+ +static inline int pud_write(pud_t pud)
+ +{
+ +      BUG();
+ +      return 0;
+ +}
+ +#endif /* pud_write */
+ +
   #if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
         (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
          !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
@@@ -1025,6 -1017,11 +1025,11 @@@ static inline int pmd_clear_huge(pmd_t 
   struct file;
   int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
                         unsigned long size, pgprot_t *vma_prot);
+ 
+ #ifndef CONFIG_X86_ESPFIX64
+ static inline void init_espfix_bsp(void) { }
+ #endif
+ 
   #endif /* !__ASSEMBLY__ */
   
   #ifndef io_remap_pfn_range
diff --combined init/main.c

index e96e3a14533cda199963fe96b97dc78779c66037,8a390f60ec81e53a8422985a74bbf5505ea095ae..7b606fc4848264f3eb52a86bc2f6480585cb5654
--- 1/init/main.c
--- 2/init/main.c
+++ b/init/main.c
@@@ -46,7 -46,6 +46,7 @@@
   #include <linux/cgroup.h>
   #include <linux/efi.h>
   #include <linux/tick.h>
+ +#include <linux/sched/isolation.h>
   #include <linux/interrupt.h>
   #include <linux/taskstats_kern.h>
   #include <linux/delayacct.h>
@@@ -70,6 -69,7 +70,6 @@@
   #include <linux/kgdb.h>
   #include <linux/ftrace.h>
   #include <linux/async.h>
- -#include <linux/kmemcheck.h>
   #include <linux/sfi.h>
   #include <linux/shmem_fs.h>
   #include <linux/slab.h>
@@@ -504,6 -504,8 +504,8 @@@ static void __init mm_init(void
         pgtable_init();
         vmalloc_init();
         ioremap_huge_init();
+       /* Should be run before the first non-init thread is created */
+       init_espfix_bsp();
   }
   
   asmlinkage __visible void __init start_kernel(void)
@@@ -562,6 -564,7 +564,6 @@@
          * kmem_cache_init()
          */
         setup_log_buf(0);
- -      pidhash_init();
         vfs_caches_init_early();
         sort_main_extable();
         trap_init();
@@@ -588,12 -591,6 +590,12 @@@
                 local_irq_disable();
         radix_tree_init();
   
+ +      /*
+ +       * Set up housekeeping before setting up workqueues to allow the unbound
+ +       * workqueue to take non-housekeeping into account.
+ +       */
+ +      housekeeping_init();
+ +
         /*
          * Allow workqueue creation and work item queueing/cancelling
          * early.  Work item execution depends on kthreads and starts after
@@@ -669,19 -666,15 +671,15 @@@
         debug_objects_mem_init();
         setup_per_cpu_pageset();
         numa_policy_init();
+ +      acpi_early_init();
         if (late_time_init)
                 late_time_init();
         calibrate_delay();
- -      pidmap_init();
+ +      pid_idr_init();
         anon_vma_init();
- -      acpi_early_init();
   #ifdef CONFIG_X86
         if (efi_enabled(EFI_RUNTIME_SERVICES))
                 efi_enter_virtual_mode();
- #endif
- #ifdef CONFIG_X86_ESPFIX64
-       /* Should be run before the first non-init thread is created */
-       init_espfix_bsp();
   #endif
         thread_stack_cache_init();
         cred_init();
diff --combined kernel/fork.c

index 432eadf6b58c18d9de6a3d09f3fef36089b4b5a2,500ce64517d93e68ebfa856d244c51f148faa7ba..2295fc69717f6c3d877ef3cac15b55336d7746c6
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -469,7 -469,7 +469,7 @@@ void __init fork_init(void
         /* create a slab on which task_structs can be allocated */
         task_struct_cachep = kmem_cache_create("task_struct",
                         arch_task_struct_size, align,
- -                      SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL);
+ +                      SLAB_PANIC|SLAB_ACCOUNT, NULL);
   #endif
   
         /* do the arch specific task caches init */
@@@ -721,8 -721,7 +721,7 @@@ static __latent_entropy int dup_mmap(st
                         goto out;
         }
         /* a new mm has just been created */
-       arch_dup_mmap(oldmm, mm);
-       retval = 0;
+       retval = arch_dup_mmap(oldmm, mm);
   out:
         up_write(&mm->mmap_sem);
         flush_tlb_mm(oldmm);
@@@ -817,7 -816,8 +816,7 @@@ static struct mm_struct *mm_init(struc
         init_rwsem(&mm->mmap_sem);
         INIT_LIST_HEAD(&mm->mmlist);
         mm->core_state = NULL;
- -      atomic_long_set(&mm->nr_ptes, 0);
- -      mm_nr_pmds_init(mm);
+ +      mm_pgtables_bytes_init(mm);
         mm->map_count = 0;
         mm->locked_vm = 0;
         mm->pinned_vm = 0;
@@@ -871,9 -871,12 +870,9 @@@ static void check_mm(struct mm_struct *
                                           "mm:%p idx:%d val:%ld\n", mm, i, x);
         }
   
- -      if (atomic_long_read(&mm->nr_ptes))
- -              pr_alert("BUG: non-zero nr_ptes on freeing mm: %ld\n",
- -                              atomic_long_read(&mm->nr_ptes));
- -      if (mm_nr_pmds(mm))
- -              pr_alert("BUG: non-zero nr_pmds on freeing mm: %ld\n",
- -                              mm_nr_pmds(mm));
+ +      if (mm_pgtables_bytes(mm))
+ +              pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n",
+ +                              mm_pgtables_bytes(mm));
   
   #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
         VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
@@@ -1871,7 -1874,7 +1870,7 @@@ static __latent_entropy struct task_str
                 retval = -ERESTARTNOINTR;
                 goto bad_fork_cancel_cgroup;
         }
- -      if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) {
+ +      if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) {
                 retval = -ENOMEM;
                 goto bad_fork_cancel_cgroup;
         }
@@@ -2205,18 -2208,18 +2204,18 @@@ void __init proc_caches_init(void
         sighand_cachep = kmem_cache_create("sighand_cache",
                         sizeof(struct sighand_struct), 0,
                         SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
- -                      SLAB_NOTRACK|SLAB_ACCOUNT, sighand_ctor);
+ +                      SLAB_ACCOUNT, sighand_ctor);
         signal_cachep = kmem_cache_create("signal_cache",
                         sizeof(struct signal_struct), 0,
- -                      SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT,
+ +                      SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
                         NULL);
         files_cachep = kmem_cache_create("files_cache",
                         sizeof(struct files_struct), 0,
- -                      SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT,
+ +                      SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
                         NULL);
         fs_cachep = kmem_cache_create("fs_cache",
                         sizeof(struct fs_struct), 0,
- -                      SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT,
+ +                      SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
                         NULL);
         /*
          * FIXME! The "sizeof(struct mm_struct)" currently includes the
@@@ -2227,7 -2230,7 +2226,7 @@@
          */
         mm_cachep = kmem_cache_create("mm_struct",
                         sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
- -                      SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT,
+ +                      SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
                         NULL);
         vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
         mmap_init();
author	Linus Torvalds <[email protected]>
	Sat, 23 Dec 2017 19:53:04 +0000 (11:53 -0800)
committer	Linus Torvalds <[email protected]>
	Sat, 23 Dec 2017 19:53:04 +0000 (11:53 -0800)
		1	2
arch/powerpc/include/asm/mmu_context.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/entry/entry_64.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/desc.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/processor.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/tlbflush.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/common.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/smpboot.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/traps.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/mm/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/xen/mmu_pv.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/asm-generic/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/main.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history