]> Git Repo - linux.git/commitdiff
Merge branch 'x86-efi-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <[email protected]>
Mon, 31 Mar 2014 19:26:05 +0000 (12:26 -0700)
committerLinus Torvalds <[email protected]>
Mon, 31 Mar 2014 19:26:05 +0000 (12:26 -0700)
Pull x86 EFI changes from Ingo Molnar:
 "The main changes:

  - Add debug code to the dump EFI pagetable - Borislav Petkov

  - Make 1:1 runtime mapping robust when booting on machines with lots
    of memory - Borislav Petkov

  - Move the EFI facilities bits out of 'x86_efi_facility' and into
    efi.flags which is the standard architecture independent place to
    keep EFI state, by Matt Fleming.

  - Add 'EFI mixed mode' support: this allows 64-bit kernels to be
    booted from 32-bit firmware.  This needs a bootloader that supports
    the 'EFI handover protocol'.  By Matt Fleming"

* 'x86-efi-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (31 commits)
  x86, efi: Abstract x86 efi_early calls
  x86/efi: Restore 'attr' argument to query_variable_info()
  x86/efi: Rip out phys_efi_get_time()
  x86/efi: Preserve segment registers in mixed mode
  x86/boot: Fix non-EFI build
  x86, tools: Fix up compiler warnings
  x86/efi: Re-disable interrupts after calling firmware services
  x86/boot: Don't overwrite cr4 when enabling PAE
  x86/efi: Wire up CONFIG_EFI_MIXED
  x86/efi: Add mixed runtime services support
  x86/efi: Firmware agnostic handover entry points
  x86/efi: Split the boot stub into 32/64 code paths
  x86/efi: Add early thunk code to go from 64-bit to 32-bit
  x86/efi: Build our own EFI services pointer table
  efi: Add separate 32-bit/64-bit definitions
  x86/efi: Delete dead code when checking for non-native
  x86/mm/pageattr: Always dump the right page table in an oops
  x86, tools: Consolidate #ifdef code
  x86/boot: Cleanup header.S by removing some #ifdefs
  efi: Use NULL instead of 0 for pointer
  ...

1  2 
arch/x86/boot/header.S
arch/x86/include/asm/pgtable.h
arch/x86/mm/fault.c
arch/x86/mm/pageattr.c

diff --combined arch/x86/boot/header.S
index 46f5a220b0ade4aaa277d4fe44b80db00ab53521,256388260c8869c0a018dc39a7e2a78520838481..0ca9a5c362bc9681de69956f9c31ee9965d7bb10
@@@ -283,7 -283,7 +283,7 @@@ _start
        # Part 2 of the header, from the old setup.S
  
                .ascii  "HdrS"          # header signature
-               .word   0x020c          # header version number (>= 0x0105)
+               .word   0x020d          # header version number (>= 0x0105)
                                        # or else old loadlin-1.5 will fail)
                .globl realmode_swtch
  realmode_swtch:       .word   0, 0            # default_switch, SETUPSEG
@@@ -350,7 -350,7 +350,7 @@@ cmd_line_ptr:      .long   0               # (Header versio
                                        # can be located anywhere in
                                        # low memory 0x10000 or higher.
  
 -ramdisk_max:  .long 0x7fffffff
 +initrd_addr_max: .long 0x7fffffff
                                        # (Header version 0x0203 or later)
                                        # The highest safe address for
                                        # the contents of an initrd
@@@ -375,7 -375,8 +375,8 @@@ xloadflags
  # define XLF0 0
  #endif
  
- #if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64)
+ #if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64) && \
+       !defined(CONFIG_EFI_MIXED)
     /* kernel/boot_param/ramdisk could be loaded above 4g */
  # define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G
  #else
  #endif
  
  #ifdef CONFIG_EFI_STUB
- # ifdef CONFIG_X86_64
- #  define XLF23 XLF_EFI_HANDOVER_64           /* 64-bit EFI handover ok */
+ # ifdef CONFIG_EFI_MIXED
+ #  define XLF23 (XLF_EFI_HANDOVER_32|XLF_EFI_HANDOVER_64)
  # else
- #  define XLF23 XLF_EFI_HANDOVER_32           /* 32-bit EFI handover ok */
+ #  ifdef CONFIG_X86_64
+ #   define XLF23 XLF_EFI_HANDOVER_64          /* 64-bit EFI handover ok */
+ #  else
+ #   define XLF23 XLF_EFI_HANDOVER_32          /* 32-bit EFI handover ok */
+ #  endif
  # endif
  #else
  # define XLF23 0
@@@ -426,13 -431,7 +431,7 @@@ pref_address:             .quad LOAD_PHYSICAL_ADD
  #define INIT_SIZE VO_INIT_SIZE
  #endif
  init_size:            .long INIT_SIZE         # kernel initialization size
- handover_offset:
- #ifdef CONFIG_EFI_STUB
-                       .long 0x30              # offset to the handover
-                                               # protocol entry point
- #else
-                       .long 0
- #endif
+ handover_offset:      .long 0                 # Filled in by build.c
  
  # End of setup header #####################################################
  
index bbc8b12fa443d47ee9a8faa59b36767e7aec866c,938ef1d0458ef2eb60873e464d9f99857743083a..b459ddf27d64149915fb2e2938f2392a8322d957
         : (prot))
  
  #ifndef __ASSEMBLY__
  #include <asm/x86_init.h>
  
+ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
  /*
   * ZERO_PAGE is a global shared page that is always zero: used
   * for zero-mapped memory areas etc..
@@@ -445,10 -446,20 +446,10 @@@ static inline int pte_same(pte_t a, pte
        return a.pte == b.pte;
  }
  
 -static inline int pteval_present(pteval_t pteval)
 -{
 -      /*
 -       * Yes Linus, _PAGE_PROTNONE == _PAGE_NUMA. Expressing it this
 -       * way clearly states that the intent is that protnone and numa
 -       * hinting ptes are considered present for the purposes of
 -       * pagetable operations like zapping, protection changes, gup etc.
 -       */
 -      return pteval & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_NUMA);
 -}
 -
  static inline int pte_present(pte_t a)
  {
 -      return pteval_present(pte_flags(a));
 +      return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE |
 +                             _PAGE_NUMA);
  }
  
  #define pte_accessible pte_accessible
diff --combined arch/x86/mm/fault.c
index a10c8c79216187d2faa5add449762710d51c759b,a92c4c999787b1af9b38eac757a8e2d0cc4f99a3..8e57229926779eb9db2afad3e5b277def75d4e0a
@@@ -584,8 -584,13 +584,13 @@@ show_fault_oops(struct pt_regs *regs, u
  
        if (error_code & PF_INSTR) {
                unsigned int level;
+               pgd_t *pgd;
+               pte_t *pte;
  
-               pte_t *pte = lookup_address(address, &level);
+               pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+               pgd += pgd_index(address);
+               pte = lookup_address_in_pgd(pgd, address, &level);
  
                if (pte && pte_present(*pte) && !pte_exec(*pte))
                        printk(nx_warning, from_kuid(&init_user_ns, current_uid()));
@@@ -1020,12 -1025,8 +1025,12 @@@ static inline bool smap_violation(int e
   * This routine handles page faults.  It determines the address,
   * and the problem, and then passes it off to one of the appropriate
   * routines.
 + *
 + * This function must have noinline because both callers
 + * {,trace_}do_page_fault() have notrace on. Having this an actual function
 + * guarantees there's a function trace entry.
   */
 -static void __kprobes
 +static void __kprobes noinline
  __do_page_fault(struct pt_regs *regs, unsigned long error_code,
                unsigned long address)
  {
@@@ -1249,38 -1250,31 +1254,38 @@@ good_area
        up_read(&mm->mmap_sem);
  }
  
 -dotraplinkage void __kprobes
 +dotraplinkage void __kprobes notrace
  do_page_fault(struct pt_regs *regs, unsigned long error_code)
  {
 +      unsigned long address = read_cr2(); /* Get the faulting address */
        enum ctx_state prev_state;
 -      /* Get the faulting address: */
 -      unsigned long address = read_cr2();
 +
 +      /*
 +       * We must have this function tagged with __kprobes, notrace and call
 +       * read_cr2() before calling anything else. To avoid calling any kind
 +       * of tracing machinery before we've observed the CR2 value.
 +       *
 +       * exception_{enter,exit}() contain all sorts of tracepoints.
 +       */
  
        prev_state = exception_enter();
        __do_page_fault(regs, error_code, address);
        exception_exit(prev_state);
  }
  
 -static void trace_page_fault_entries(struct pt_regs *regs,
 +#ifdef CONFIG_TRACING
 +static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
                                     unsigned long error_code)
  {
        if (user_mode(regs))
 -              trace_page_fault_user(read_cr2(), regs, error_code);
 +              trace_page_fault_user(address, regs, error_code);
        else
 -              trace_page_fault_kernel(read_cr2(), regs, error_code);
 +              trace_page_fault_kernel(address, regs, error_code);
  }
  
 -dotraplinkage void __kprobes
 +dotraplinkage void __kprobes notrace
  trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
  {
 -      enum ctx_state prev_state;
        /*
         * The exception_enter and tracepoint processing could
         * trigger another page faults (user space callchain
         * the faulting address now.
         */
        unsigned long address = read_cr2();
 +      enum ctx_state prev_state;
  
        prev_state = exception_enter();
 -      trace_page_fault_entries(regs, error_code);
 +      trace_page_fault_entries(address, regs, error_code);
        __do_page_fault(regs, error_code, address);
        exception_exit(prev_state);
  }
 +#endif /* CONFIG_TRACING */
diff --combined arch/x86/mm/pageattr.c
index cf125b301b692304da63fa72534a8d285ca0da3f,1585da3b9b8590c7d34fbba0db4bfc86f160a533..ae242a7c11c7473cfeb163b78d54fa62005b8e44
@@@ -126,8 -126,8 +126,8 @@@ within(unsigned long addr, unsigned lon
   * @vaddr:    virtual start address
   * @size:     number of bytes to flush
   *
 - * clflush is an unordered instruction which needs fencing with mfence
 - * to avoid ordering issues.
 + * clflushopt is an unordered instruction which needs fencing with mfence or
 + * sfence to avoid ordering issues.
   */
  void clflush_cache_range(void *vaddr, unsigned int size)
  {
        mb();
  
        for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size)
 -              clflush(vaddr);
 +              clflushopt(vaddr);
        /*
         * Flush any possible final partial cacheline:
         */
 -      clflush(vend);
 +      clflushopt(vend);
  
        mb();
  }
@@@ -323,8 -323,12 +323,12 @@@ static inline pgprot_t static_protectio
        return prot;
  }
  
- static pte_t *__lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
-                                     unsigned int *level)
+ /*
+  * Lookup the page table entry for a virtual address in a specific pgd.
+  * Return a pointer to the entry and the level of the mapping.
+  */
+ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
+                            unsigned int *level)
  {
        pud_t *pud;
        pmd_t *pmd;
   */
  pte_t *lookup_address(unsigned long address, unsigned int *level)
  {
-         return __lookup_address_in_pgd(pgd_offset_k(address), address, level);
+         return lookup_address_in_pgd(pgd_offset_k(address), address, level);
  }
  EXPORT_SYMBOL_GPL(lookup_address);
  
@@@ -373,7 -377,7 +377,7 @@@ static pte_t *_lookup_address_cpa(struc
                                  unsigned int *level)
  {
          if (cpa->pgd)
-               return __lookup_address_in_pgd(cpa->pgd + pgd_index(address),
+               return lookup_address_in_pgd(cpa->pgd + pgd_index(address),
                                               address, level);
  
          return lookup_address(address, level);
@@@ -692,6 -696,18 +696,18 @@@ static bool try_to_free_pmd_page(pmd_t 
        return true;
  }
  
+ static bool try_to_free_pud_page(pud_t *pud)
+ {
+       int i;
+       for (i = 0; i < PTRS_PER_PUD; i++)
+               if (!pud_none(pud[i]))
+                       return false;
+       free_page((unsigned long)pud);
+       return true;
+ }
  static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
  {
        pte_t *pte = pte_offset_kernel(pmd, start);
@@@ -805,6 -821,16 +821,16 @@@ static void unmap_pud_range(pgd_t *pgd
         */
  }
  
+ static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end)
+ {
+       pgd_t *pgd_entry = root + pgd_index(addr);
+       unmap_pud_range(pgd_entry, addr, end);
+       if (try_to_free_pud_page((pud_t *)pgd_page_vaddr(*pgd_entry)))
+               pgd_clear(pgd_entry);
+ }
  static int alloc_pte_page(pmd_t *pmd)
  {
        pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
@@@ -999,9 -1025,8 +1025,8 @@@ static int populate_pud(struct cpa_dat
  static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
  {
        pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
-       bool allocd_pgd = false;
-       pgd_t *pgd_entry;
        pud_t *pud = NULL;      /* shut up gcc */
+       pgd_t *pgd_entry;
        int ret;
  
        pgd_entry = cpa->pgd + pgd_index(addr);
                        return -1;
  
                set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
-               allocd_pgd = true;
        }
  
        pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
  
        ret = populate_pud(cpa, addr, pgd_entry, pgprot);
        if (ret < 0) {
-               unmap_pud_range(pgd_entry, addr,
+               unmap_pgd_range(cpa->pgd, addr,
                                addr + (cpa->numpages << PAGE_SHIFT));
-               if (allocd_pgd) {
-                       /*
-                        * If I allocated this PUD page, I can just as well
-                        * free it in this error path.
-                        */
-                       pgd_clear(pgd_entry);
-                       free_page((unsigned long)pud);
-               }
                return ret;
        }
        cpa->numpages = ret;
        return 0;
  }
@@@ -1377,10 -1393,10 +1393,10 @@@ static int change_page_attr_set_clr(uns
        cache = cache_attr(mask_set);
  
        /*
 -       * On success we use clflush, when the CPU supports it to
 -       * avoid the wbindv. If the CPU does not support it and in the
 +       * On success we use CLFLUSH, when the CPU supports it to
 +       * avoid the WBINVD. If the CPU does not support it and in the
         * error case we fall back to cpa_flush_all (which uses
 -       * wbindv):
 +       * WBINVD):
         */
        if (!ret && cpu_has_clflush) {
                if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
        return retval;
  }
  
+ void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address,
+                              unsigned numpages)
+ {
+       unmap_pgd_range(root, address, address + (numpages << PAGE_SHIFT));
+ }
  /*
   * The testcases use internal knowledge of the implementation that shouldn't
   * be exposed to the rest of the kernel. Include these directly here.
This page took 0.114245 seconds and 4 git commands to generate.