# Part 2 of the header, from the old setup.S
.ascii "HdrS" # header signature
- .word 0x020c # header version number (>= 0x0105)
+ .word 0x020d # header version number (>= 0x0105)
# or else old loadlin-1.5 will fail)
.globl realmode_swtch
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
# can be located anywhere in
# low memory 0x10000 or higher.
-ramdisk_max: .long 0x7fffffff
+initrd_addr_max: .long 0x7fffffff
# (Header version 0x0203 or later)
# The highest safe address for
# the contents of an initrd
# define XLF0 0
#endif
- #if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64)
+ #if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64) && \
+ !defined(CONFIG_EFI_MIXED)
/* kernel/boot_param/ramdisk could be loaded above 4g */
# define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G
#else
#endif
#ifdef CONFIG_EFI_STUB
- # ifdef CONFIG_X86_64
- # define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */
+ # ifdef CONFIG_EFI_MIXED
+ # define XLF23 (XLF_EFI_HANDOVER_32|XLF_EFI_HANDOVER_64)
# else
- # define XLF23 XLF_EFI_HANDOVER_32 /* 32-bit EFI handover ok */
+ # ifdef CONFIG_X86_64
+ # define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */
+ # else
+ # define XLF23 XLF_EFI_HANDOVER_32 /* 32-bit EFI handover ok */
+ # endif
# endif
#else
# define XLF23 0
#define INIT_SIZE VO_INIT_SIZE
#endif
init_size: .long INIT_SIZE # kernel initialization size
- handover_offset:
- #ifdef CONFIG_EFI_STUB
- .long 0x30 # offset to the handover
- # protocol entry point
- #else
- .long 0
- #endif
+ handover_offset: .long 0 # Filled in by build.c
# End of setup header #####################################################
: (prot))
#ifndef __ASSEMBLY__
-
#include <asm/x86_init.h>
+ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
+
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
return a.pte == b.pte;
}
-static inline int pteval_present(pteval_t pteval)
-{
- /*
- * Yes Linus, _PAGE_PROTNONE == _PAGE_NUMA. Expressing it this
- * way clearly states that the intent is that protnone and numa
- * hinting ptes are considered present for the purposes of
- * pagetable operations like zapping, protection changes, gup etc.
- */
- return pteval & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_NUMA);
-}
-
static inline int pte_present(pte_t a)
{
- return pteval_present(pte_flags(a));
+ return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE |
+ _PAGE_NUMA);
}
#define pte_accessible pte_accessible
if (error_code & PF_INSTR) {
unsigned int level;
+ pgd_t *pgd;
+ pte_t *pte;
- pte_t *pte = lookup_address(address, &level);
+ pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+ pgd += pgd_index(address);
+
+ pte = lookup_address_in_pgd(pgd, address, &level);
if (pte && pte_present(*pte) && !pte_exec(*pte))
printk(nx_warning, from_kuid(&init_user_ns, current_uid()));
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
* routines.
+ *
+ * This function must have noinline because both callers
+ * {,trace_}do_page_fault() have notrace on. Having this an actual function
+ * guarantees there's a function trace entry.
*/
-static void __kprobes
+static void __kprobes noinline
__do_page_fault(struct pt_regs *regs, unsigned long error_code,
unsigned long address)
{
up_read(&mm->mmap_sem);
}
-dotraplinkage void __kprobes
+dotraplinkage void __kprobes notrace
do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
+ unsigned long address = read_cr2(); /* Get the faulting address */
enum ctx_state prev_state;
- /* Get the faulting address: */
- unsigned long address = read_cr2();
+
+ /*
+ * We must have this function tagged with __kprobes, notrace and call
+ * read_cr2() before calling anything else. To avoid calling any kind
+ * of tracing machinery before we've observed the CR2 value.
+ *
+ * exception_{enter,exit}() contain all sorts of tracepoints.
+ */
prev_state = exception_enter();
__do_page_fault(regs, error_code, address);
exception_exit(prev_state);
}
-static void trace_page_fault_entries(struct pt_regs *regs,
+#ifdef CONFIG_TRACING
+static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
unsigned long error_code)
{
if (user_mode(regs))
- trace_page_fault_user(read_cr2(), regs, error_code);
+ trace_page_fault_user(address, regs, error_code);
else
- trace_page_fault_kernel(read_cr2(), regs, error_code);
+ trace_page_fault_kernel(address, regs, error_code);
}
-dotraplinkage void __kprobes
+dotraplinkage void __kprobes notrace
trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
- enum ctx_state prev_state;
/*
* The exception_enter and tracepoint processing could
* trigger another page faults (user space callchain
* the faulting address now.
*/
unsigned long address = read_cr2();
+ enum ctx_state prev_state;
prev_state = exception_enter();
- trace_page_fault_entries(regs, error_code);
+ trace_page_fault_entries(address, regs, error_code);
__do_page_fault(regs, error_code, address);
exception_exit(prev_state);
}
+#endif /* CONFIG_TRACING */
* @vaddr: virtual start address
* @size: number of bytes to flush
*
- * clflush is an unordered instruction which needs fencing with mfence
- * to avoid ordering issues.
+ * clflushopt is an unordered instruction which needs fencing with mfence or
+ * sfence to avoid ordering issues.
*/
void clflush_cache_range(void *vaddr, unsigned int size)
{
mb();
for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size)
- clflush(vaddr);
+ clflushopt(vaddr);
/*
* Flush any possible final partial cacheline:
*/
- clflush(vend);
+ clflushopt(vend);
mb();
}
return prot;
}
- static pte_t *__lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
- unsigned int *level)
+ /*
+ * Lookup the page table entry for a virtual address in a specific pgd.
+ * Return a pointer to the entry and the level of the mapping.
+ */
+ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
+ unsigned int *level)
{
pud_t *pud;
pmd_t *pmd;
*/
pte_t *lookup_address(unsigned long address, unsigned int *level)
{
- return __lookup_address_in_pgd(pgd_offset_k(address), address, level);
+ return lookup_address_in_pgd(pgd_offset_k(address), address, level);
}
EXPORT_SYMBOL_GPL(lookup_address);
unsigned int *level)
{
if (cpa->pgd)
- return __lookup_address_in_pgd(cpa->pgd + pgd_index(address),
+ return lookup_address_in_pgd(cpa->pgd + pgd_index(address),
address, level);
return lookup_address(address, level);
return true;
}
+ static bool try_to_free_pud_page(pud_t *pud)
+ {
+ int i;
+
+ for (i = 0; i < PTRS_PER_PUD; i++)
+ if (!pud_none(pud[i]))
+ return false;
+
+ free_page((unsigned long)pud);
+ return true;
+ }
+
static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
{
pte_t *pte = pte_offset_kernel(pmd, start);
*/
}
+ static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end)
+ {
+ pgd_t *pgd_entry = root + pgd_index(addr);
+
+ unmap_pud_range(pgd_entry, addr, end);
+
+ if (try_to_free_pud_page((pud_t *)pgd_page_vaddr(*pgd_entry)))
+ pgd_clear(pgd_entry);
+ }
+
static int alloc_pte_page(pmd_t *pmd)
{
pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
{
pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
- bool allocd_pgd = false;
- pgd_t *pgd_entry;
pud_t *pud = NULL; /* shut up gcc */
+ pgd_t *pgd_entry;
int ret;
pgd_entry = cpa->pgd + pgd_index(addr);
return -1;
set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
- allocd_pgd = true;
}
pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
ret = populate_pud(cpa, addr, pgd_entry, pgprot);
if (ret < 0) {
- unmap_pud_range(pgd_entry, addr,
+ unmap_pgd_range(cpa->pgd, addr,
addr + (cpa->numpages << PAGE_SHIFT));
-
- if (allocd_pgd) {
- /*
- * If I allocated this PUD page, I can just as well
- * free it in this error path.
- */
- pgd_clear(pgd_entry);
- free_page((unsigned long)pud);
- }
return ret;
}
+
cpa->numpages = ret;
return 0;
}
cache = cache_attr(mask_set);
/*
- * On success we use clflush, when the CPU supports it to
- * avoid the wbindv. If the CPU does not support it and in the
+ * On success we use CLFLUSH, when the CPU supports it to
+ * avoid the WBINVD. If the CPU does not support it and in the
* error case we fall back to cpa_flush_all (which uses
- * wbindv):
+ * WBINVD):
*/
if (!ret && cpu_has_clflush) {
if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
return retval;
}
+ void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address,
+ unsigned numpages)
+ {
+ unmap_pgd_range(root, address, address + (numpages << PAGE_SHIFT));
+ }
+
/*
* The testcases use internal knowledge of the implementation that shouldn't
* be exposed to the rest of the kernel. Include these directly here.