select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
select HAVE_ARCH_USERFAULTFD_WP if X86_64 && USERFAULTFD
select HAVE_ARCH_VMAP_STACK if X86_64
+ select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_WITHIN_STACK_FRAMES
select HAVE_ASM_MODVERSIONS
select HAVE_CMPXCHG_DOUBLE
def_bool y
depends on X86_64 && SMP
- config X86_32_LAZY_GS
- def_bool y
- depends on X86_32 && !STACKPROTECTOR
-
config ARCH_SUPPORTS_UPROBES
def_bool y
default $(success,$(srctree)/scripts/gcc-x86_32-has-stack-protector.sh $(CC))
help
We have to make sure stack protector is unconditionally disabled if
- the compiler produces broken code.
+ the compiler produces broken code or if it does not let us control
+ the segment on 32-bit kernels.
menu "Processor type and features"
depends on X86_EXTENDED_PLATFORM
depends on NUMA
depends on EFI
+ depends on KEXEC_CORE
depends on X86_X2APIC
depends on PCI
help
config HIGHMEM64G
bool "64GB"
- depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6
+ depends on !M486SX && !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6
select X86_PAE
help
Select this if you have a 32-bit processor and more than 4
depends on CRYPTO_SHA256=y
select SRCU
select MMU_NOTIFIER
+ select NUMA_KEEP_MEMINFO if NUMA
help
Intel(R) Software Guard eXtensions (SGX) is a set of CPU instructions
that can be used by applications to set aside private regions of code
REALMODE_CFLAGS += -fno-stack-protector
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -Wno-address-of-packed-member)
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align4))
+REALMODE_CFLAGS += $(CLANG_FLAGS)
export REALMODE_CFLAGS
# BITS is used as extension for files which are available in a 32 bit
# temporary until string.h is fixed
KBUILD_CFLAGS += -ffreestanding
+
+ ifeq ($(CONFIG_STACKPROTECTOR),y)
+ ifeq ($(CONFIG_SMP),y)
+ KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard
+ else
+ KBUILD_CFLAGS += -mstack-protector-guard=global
+ endif
+ endif
else
BITS := 64
UTS_MACHINE := x86_64
static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
{
char buffer[MAX_INSN_SIZE];
- enum es_result ret;
+ int ret;
memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
- insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE, 1);
- insn_get_length(&ctxt->insn);
+ ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
+ if (ret < 0)
+ return ES_DECODE_FAILED;
- ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED;
-
- return ret;
+ return ES_OK;
}
static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
}
finish:
- if (result == ES_OK) {
+ if (result == ES_OK)
vc_finish_insn(&ctxt);
- } else if (result != ES_RETRY) {
- /*
- * For now, just halt the machine. That makes debugging easier,
- * later we just call sev_es_terminate() here.
- */
- while (true)
- asm volatile("hlt\n");
- }
+ else if (result != ES_RETRY)
+ sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
}
* 1C(%esp) - %ds
* 20(%esp) - %es
* 24(%esp) - %fs
- * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
+ * 28(%esp) - unused -- was %gs on old stackprotector kernels
* 2C(%esp) - orig_eax
* 30(%esp) - %eip
* 34(%esp) - %cs
#define PTI_SWITCH_MASK (1 << PAGE_SHIFT)
- /*
- * User gs save/restore
- *
- * %gs is used for userland TLS and kernel only uses it for stack
- * canary which is required to be at %gs:20 by gcc. Read the comment
- * at the top of stackprotector.h for more info.
- *
- * Local labels 98 and 99 are used.
- */
- #ifdef CONFIG_X86_32_LAZY_GS
-
- /* unfortunately push/pop can't be no-op */
- .macro PUSH_GS
- pushl $0
- .endm
- .macro POP_GS pop=0
- addl $(4 + \pop), %esp
- .endm
- .macro POP_GS_EX
- .endm
-
- /* all the rest are no-op */
- .macro PTGS_TO_GS
- .endm
- .macro PTGS_TO_GS_EX
- .endm
- .macro GS_TO_REG reg
- .endm
- .macro REG_TO_PTGS reg
- .endm
- .macro SET_KERNEL_GS reg
- .endm
-
- #else /* CONFIG_X86_32_LAZY_GS */
-
- .macro PUSH_GS
- pushl %gs
- .endm
-
- .macro POP_GS pop=0
- 98: popl %gs
- .if \pop <> 0
- add $\pop, %esp
- .endif
- .endm
- .macro POP_GS_EX
- .pushsection .fixup, "ax"
- 99: movl $0, (%esp)
- jmp 98b
- .popsection
- _ASM_EXTABLE(98b, 99b)
- .endm
-
- .macro PTGS_TO_GS
- 98: mov PT_GS(%esp), %gs
- .endm
- .macro PTGS_TO_GS_EX
- .pushsection .fixup, "ax"
- 99: movl $0, PT_GS(%esp)
- jmp 98b
- .popsection
- _ASM_EXTABLE(98b, 99b)
- .endm
-
- .macro GS_TO_REG reg
- movl %gs, \reg
- .endm
- .macro REG_TO_PTGS reg
- movl \reg, PT_GS(%esp)
- .endm
- .macro SET_KERNEL_GS reg
- movl $(__KERNEL_STACK_CANARY), \reg
- movl \reg, %gs
- .endm
-
- #endif /* CONFIG_X86_32_LAZY_GS */
-
/* Unconditionally switch to user cr3 */
.macro SWITCH_TO_USER_CR3 scratch_reg:req
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
*
* Lets build a 5 entry IRET frame after that, such that struct pt_regs
* is complete and in particular regs->sp is correct. This gives us
- * the original 6 enties as gap:
+ * the original 6 entries as gap:
*
* 14*4(%esp) - <previous context>
* 13*4(%esp) - gap / flags
.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 unwind_espfix=0
cld
.if \skip_gs == 0
- PUSH_GS
+ pushl $0
.endif
pushl %fs
movl $(__USER_DS), %edx
movl %edx, %ds
movl %edx, %es
- .if \skip_gs == 0
- SET_KERNEL_GS %edx
- .endif
/* Switch to kernel stack if necessary */
.if \switch_stacks > 0
SWITCH_TO_KERNEL_STACK
1: popl %ds
2: popl %es
3: popl %fs
- POP_GS \pop
+ addl $(4 + \pop), %esp /* pop the unused "gs" slot */
IRET_FRAME
.pushsection .fixup, "ax"
4: movl $0, (%esp)
_ASM_EXTABLE(1b, 4b)
_ASM_EXTABLE(2b, 5b)
_ASM_EXTABLE(3b, 6b)
- POP_GS_EX
.endm
.macro RESTORE_ALL_NMI cr3_reg:req pop=0
#ifdef CONFIG_STACKPROTECTOR
movl TASK_stack_canary(%edx), %ebx
- movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
+ movl %ebx, PER_CPU_VAR(__stack_chk_guard)
#endif
#ifdef CONFIG_RETPOLINE
movl PT_EIP(%esp), %edx /* pt_regs->ip */
movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */
1: mov PT_FS(%esp), %fs
- PTGS_TO_GS
popl %ebx /* pt_regs->bx */
addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */
jmp 1b
.popsection
_ASM_EXTABLE(1b, 2b)
- PTGS_TO_GS_EX
.Lsysenter_fix_flags:
pushl $X86_EFLAGS_FIXED
SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
ENCODE_FRAME_POINTER
- /* fixup %gs */
- GS_TO_REG %ecx
movl PT_GS(%esp), %edi # get the function address
- REG_TO_PTGS %ecx
- SET_KERNEL_GS %ecx
/* fixup orig %eax */
movl PT_ORIG_EAX(%esp), %edx # get the error code
is_64bit = kernel_ip(to) || any_64bit_mode(regs);
#endif
insn_init(&insn, kaddr, size, is_64bit);
- insn_get_length(&insn);
+
/*
- * Make sure there was not a problem decoding the
- * instruction and getting the length. This is
- * doubly important because we have an infinite
- * loop if insn.length=0.
+ * Make sure there was not a problem decoding the instruction.
+ * This is doubly important because we have an infinite loop if
+ * insn.length=0.
*/
- if (!insn.length)
+ if (insn_get_length(&insn))
break;
to += insn.length;
*
* [-period, 0]
*
- * the difference between two consequtive reads is:
+ * the difference between two consecutive reads is:
*
* A) value2 - value1;
* when no overflows have happened in between,
/*
* The LBR logs any address in the IP, even if the IP just
* faulted. This means userspace can control the from address.
- * Ensure we don't blindy read any address by validating it is
+ * Ensure we don't blindly read any address by validating it is
* a known text address.
*/
if (kernel_text_address(from)) {
is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs());
#endif
insn_init(&insn, addr, bytes_read, is64);
- insn_get_opcode(&insn);
- if (!insn.opcode.got)
+ if (insn_get_opcode(&insn))
return X86_BR_ABORT;
switch (insn.opcode.bytes[0]) {
ret = X86_BR_INT;
break;
case 0xe8: /* call near rel */
- insn_get_immediate(&insn);
- if (insn.immediate1.value == 0) {
+ if (insn_get_immediate(&insn) || insn.immediate1.value == 0) {
/* zero length call */
ret = X86_BR_ZERO_CALL;
break;
ret = X86_BR_JMP;
break;
case 0xff: /* call near absolute, call far absolute ind */
- insn_get_modrm(&insn);
+ if (insn_get_modrm(&insn))
+ return X86_BR_ABORT;
+
ext = (insn.modrm.bytes[0] >> 3) & 0x7;
switch (ext) {
case 2: /* near ind call */
/* CPU types for specific tunings: */
#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
- #define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
+ /* FREE, was #define X86_FEATURE_K7 ( 3*32+ 5) "" Athlon */
#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
+#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
+#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
+#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */
#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */
#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */
#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */
/*
* Google experimented with loop-unrolling and this turned out to be
- * the optimal version — two calls, each with their own speculation
+ * the optimal version - two calls, each with their own speculation
* trap should their return address end up getting used, in a loop.
*/
#define __FILL_RETURN_BUFFER(reg, nr, sp) \
.macro JMP_NOSPEC reg:req
#ifdef CONFIG_RETPOLINE
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
- __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+ __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
#else
jmp *%\reg
.macro CALL_NOSPEC reg:req
#ifdef CONFIG_RETPOLINE
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
- __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+ __stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
#else
call *%\reg
ALTERNATIVE_2( \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
- "call __x86_retpoline_%V[thunk_target]\n", \
+ "call __x86_indirect_thunk_%V[thunk_target]\n", \
X86_FEATURE_RETPOLINE, \
"lfence;\n" \
ANNOTATE_RETPOLINE_SAFE \
struct x86_hw_tss {
u32 reserved1;
u64 sp0;
-
- /*
- * We store cpu_current_top_of_stack in sp1 so it's always accessible.
- * Linux does not use ring 1, so sp1 is not otherwise needed.
- */
u64 sp1;
/*
char stack[IRQ_STACK_SIZE];
} __aligned(IRQ_STACK_SIZE);
-#ifdef CONFIG_X86_32
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
-#else
-/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
-#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
-#endif
#ifdef CONFIG_X86_64
struct fixed_percpu_data {
* GCC hardcodes the stack canary as %gs:40. Since the
* irq_stack is the object at %gs:0, we reserve the bottom
* 48 bytes of the irq stack for the canary.
+ *
+ * Once we are willing to require -mstack-protector-guard-symbol=
+ * support for x86_64 stackprotector, we can get rid of this.
*/
char gs_base[40];
unsigned long stack_canary;
void current_save_fsgs(void);
#else /* X86_64 */
#ifdef CONFIG_STACKPROTECTOR
- /*
- * Make sure stack canary segment base is cached-aligned:
- * "For Intel Atom processors, avoid non zero segment base address
- * that is not aligned to cache line boundary at all cost."
- * (Optim Ref Manual Assembly/Compiler Coding Rule 15.)
- */
- struct stack_canary {
- char __pad[20]; /* canary at %gs:20 */
- unsigned long canary;
- };
- DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
+ DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
#endif
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
struct io_bitmap *io_bitmap;
/*
- * IOPL. Priviledge level dependent I/O permission which is
+ * IOPL. Privilege level dependent I/O permission which is
* emulated via the I/O bitmap to prevent user space from disabling
* interrupts.
*/
[GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
[GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
- GDT_STACK_CANARY_INIT
#endif
} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
if (pk)
pk->pkru = init_pkru_value;
/*
- * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
+ * Setting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
* cpuid bit to be set. We need to ensure that we
* update that bit in this CPU's "cpu_info".
*/
__loadsegment_simple(gs, 0);
wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
#endif
- load_stack_canary_segment();
}
#ifdef CONFIG_X86_32
cpu_set_bug_bits(c);
- cpu_set_core_cap_bits(c);
+ sld_setup(c);
fpu__init_system(c);
* where GS is unused by the prev and next threads.
*
* Since neither vendor documents this anywhere that I can see,
- * detect it directly instead of hardcoding the choice by
+ * detect it directly instead of hard-coding the choice by
* vendor.
*
* I've designated AMD's behavior as the "bug" because it's
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
+DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK;
+
/* May not be marked __init: used by software suspend */
void syscall_init(void)
{
EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
#ifdef CONFIG_STACKPROTECTOR
- DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
+ DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
+ EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
#endif
#endif /* CONFIG_X86_64 */
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR)
),
MCESEV(
- KEEP, "Non signalled machine check",
+ KEEP, "Non signaled machine check",
SER, BITCLR(MCI_STATUS_S)
),
static bool is_copy_from_user(struct pt_regs *regs)
{
u8 insn_buf[MAX_INSN_SIZE];
- struct insn insn;
unsigned long addr;
+ struct insn insn;
+ int ret;
if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
return false;
- kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
- insn_get_opcode(&insn);
- if (!insn.opcode.got)
+ ret = insn_decode_kernel(&insn, insn_buf);
+ if (ret < 0)
return false;
switch (insn.opcode.value) {
case 0x8A: case 0x8B:
/* MOVZ mem,reg */
case 0xB60F: case 0xB70F:
- insn_get_modrm(&insn);
- insn_get_sib(&insn);
- if (!insn.modrm.got || !insn.sib.got)
- return false;
addr = (unsigned long)insn_get_addr_ref(&insn, regs);
break;
/* REP MOVS */
/*
* Range of the BSS area. The size of the BSS area is determined
- * at link time, with RESERVE_BRK*() facility reserving additional
+ * at link time, with RESERVE_BRK() facility reserving additional
* chunks.
*/
unsigned long _brk_start = (unsigned long)__brk_base;
printk(KERN_DEBUG "reserving inaccessible SNB gfx pages\n");
/*
- * Reserve all memory below the 1 MB mark that has not
- * already been reserved.
+ * SandyBridge integrated graphics devices have a bug that prevents
+ * them from accessing certain memory ranges, namely anything below
+ * 1M and in the pages listed in bad_pages[] above.
+ *
+ * To avoid these pages being ever accessed by SNB gfx devices
+ * reserve all memory below the 1 MB mark and bad_pages that have
+ * not already been reserved at boot time.
*/
memblock_reserve(0, 1<<20);
-
+
for (i = 0; i < ARRAY_SIZE(bad_pages); i++) {
if (memblock_reserve(bad_pages[i], PAGE_SIZE))
printk(KERN_WARNING "failed to reserve 0x%08lx\n",
}
}
-/*
- * Here we put platform-specific memory range workarounds, i.e.
- * memory known to be corrupt or otherwise in need to be reserved on
- * specific platforms.
- *
- * If this gets used more widely it could use a real dispatch mechanism.
- */
-static void __init trim_platform_memory_ranges(void)
-{
- trim_snb_memory();
-}
-
static void __init trim_bios_range(void)
{
/*
early_param("reservelow", parse_reservelow);
-static void __init trim_low_memory_range(void)
+static void __init early_reserve_memory(void)
{
+ /*
+ * Reserve the memory occupied by the kernel between _text and
+ * __end_of_kernel_reserve symbols. Any kernel sections after the
+ * __end_of_kernel_reserve symbol must be explicitly reserved with a
+ * separate memblock_reserve() or they will be discarded.
+ */
+ memblock_reserve(__pa_symbol(_text),
+ (unsigned long)__end_of_kernel_reserve - (unsigned long)_text);
+
+ /*
+ * The first 4Kb of memory is a BIOS owned area, but generally it is
+ * not listed as such in the E820 table.
+ *
+ * Reserve the first memory page and typically some additional
+ * memory (64KiB by default) since some BIOSes are known to corrupt
+ * low memory. See the Kconfig help text for X86_RESERVE_LOW.
+ *
+ * In addition, make sure page 0 is always reserved because on
+ * systems with L1TF its contents can be leaked to user processes.
+ */
memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE));
+
+ early_reserve_initrd();
+
+ if (efi_enabled(EFI_BOOT))
+ efi_memblock_x86_reserve_range();
+
+ memblock_x86_reserve_range_setup_data();
+
+ reserve_ibft_region();
+ reserve_bios_regions();
}
-
+
/*
* Dump out kernel offset information on panic.
*/
void __init setup_arch(char **cmdline_p)
{
- /*
- * Reserve the memory occupied by the kernel between _text and
- * __end_of_kernel_reserve symbols. Any kernel sections after the
- * __end_of_kernel_reserve symbol must be explicitly reserved with a
- * separate memblock_reserve() or they will be discarded.
- */
- memblock_reserve(__pa_symbol(_text),
- (unsigned long)__end_of_kernel_reserve - (unsigned long)_text);
-
- /*
- * Make sure page 0 is always reserved because on systems with
- * L1TF its contents can be leaked to user processes.
- */
- memblock_reserve(0, PAGE_SIZE);
-
- early_reserve_initrd();
-
- /*
- * At this point everything still needed from the boot loader
- * or BIOS or kernel text should be early reserved or marked not
- * RAM in e820. All other memory is free game.
- */
-
#ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
idt_setup_early_traps();
early_cpu_init();
- arch_init_ideal_nops();
jump_label_init();
static_call_init();
early_ioremap_init();
parse_early_param();
- if (efi_enabled(EFI_BOOT))
- efi_memblock_x86_reserve_range();
+ /*
+ * Do some memory reservations *before* memory is added to
+ * memblock, so memblock allocations won't overwrite it.
+ * Do it after early param, so we could get (unlikely) panic from
+ * serial.
+ *
+ * After this point everything still needed from the boot loader or
+ * firmware or kernel text should be early reserved or marked not
+ * RAM in e820. All other memory is free game.
+ */
+ early_reserve_memory();
+
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Memory used by the kernel cannot be hot-removed because Linux
x86_report_nx();
- /* after early param, so could get panic from serial */
- memblock_x86_reserve_range_setup_data();
-
if (acpi_mps_check()) {
#ifdef CONFIG_X86_LOCAL_APIC
disable_apic = 1;
*/
find_smp_config();
- reserve_ibft_region();
-
early_alloc_pgt_buf();
/*
* Need to conclude brk, before e820__memblock_setup()
- * it could use memblock_find_in_range, could overlap with
- * brk area.
+ * it could use memblock_find_in_range, could overlap with
+ * brk area.
*/
reserve_brk();
*/
sev_setup_arch();
- reserve_bios_regions();
-
efi_fake_memmap();
efi_find_mirror();
efi_esrt_init();
reserve_real_mode();
- trim_platform_memory_ranges();
- trim_low_memory_range();
+ /*
+ * Reserving memory causing GPU hangs on Sandy Bridge integrated
+ * graphics devices should be done after we allocated memory under
+ * 1M for the real mode trampoline.
+ */
+ trim_snb_memory();
init_mem_mapping();
reserve_initrd();
acpi_table_upgrade();
+ /* Look for ACPI tables and reserve memory occupied by them. */
+ acpi_boot_table_init();
vsmp_init();
early_platform_quirks();
- /*
- * Parse the ACPI tables for possible boot-time SMP configuration.
- */
- acpi_boot_table_init();
-
early_acpi_boot_init();
initmem_init();
}
/*
- * This function handles the case when an NMI is raised in the #VC exception
- * handler entry code. In this case, the IST entry for #VC must be adjusted, so
- * that any subsequent #VC exception will not overwrite the stack contents of the
- * interrupted #VC handler.
+ * This function handles the case when an NMI is raised in the #VC
+ * exception handler entry code, before the #VC handler has switched off
+ * its IST stack. In this case, the IST entry for #VC must be adjusted,
+ * so that any nested #VC exception will not overwrite the stack
+ * contents of the interrupted #VC handler.
*
* The IST entry is adjusted unconditionally so that it can be also be
- * unconditionally adjusted back in sev_es_ist_exit(). Otherwise a nested
- * sev_es_ist_exit() call may adjust back the IST entry too early.
+ * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a
+ * nested sev_es_ist_exit() call may adjust back the IST entry too
+ * early.
+ *
+ * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run
+ * on the NMI IST stack, as they are only called from NMI handling code
+ * right now.
*/
void noinstr __sev_es_ist_enter(struct pt_regs *regs)
{
unsigned long old_ist, new_ist;
/* Read old IST entry */
- old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
+ new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
- /* Make room on the IST stack */
+ /*
+ * If NMI happened while on the #VC IST stack, set the new IST
+ * value below regs->sp, so that the interrupted stack frame is
+ * not overwritten by subsequent #VC exceptions.
+ */
if (on_vc_stack(regs))
- new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist);
- else
- new_ist = old_ist - sizeof(old_ist);
+ new_ist = regs->sp;
- /* Store old IST entry */
+ /*
+ * Reserve additional 8 bytes and store old IST value so this
+ * adjustment can be unrolled in __sev_es_ist_exit().
+ */
+ new_ist -= sizeof(old_ist);
*(unsigned long *)new_ist = old_ist;
/* Set new IST entry */
return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
}
- static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+ static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
{
char buffer[MAX_INSN_SIZE];
- enum es_result ret;
int res;
- if (user_mode(ctxt->regs)) {
- res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
- if (!res) {
- ctxt->fi.vector = X86_TRAP_PF;
- ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
- ctxt->fi.cr2 = ctxt->regs->ip;
- return ES_EXCEPTION;
- }
-
- if (!insn_decode(&ctxt->insn, ctxt->regs, buffer, res))
- return ES_DECODE_FAILED;
- } else {
- res = vc_fetch_insn_kernel(ctxt, buffer);
- if (res) {
- ctxt->fi.vector = X86_TRAP_PF;
- ctxt->fi.error_code = X86_PF_INSTR;
- ctxt->fi.cr2 = ctxt->regs->ip;
- return ES_EXCEPTION;
- }
-
- insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE, 1);
- insn_get_length(&ctxt->insn);
+ res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
+ if (!res) {
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
+ ctxt->fi.cr2 = ctxt->regs->ip;
+ return ES_EXCEPTION;
+ }
+
+ if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res))
+ return ES_DECODE_FAILED;
+
+ if (ctxt->insn.immediate.got)
+ return ES_OK;
+ else
+ return ES_DECODE_FAILED;
+ }
+
+ static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt)
+ {
+ char buffer[MAX_INSN_SIZE];
+ int res, ret;
+
+ res = vc_fetch_insn_kernel(ctxt, buffer);
+ if (res) {
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.error_code = X86_PF_INSTR;
+ ctxt->fi.cr2 = ctxt->regs->ip;
+ return ES_EXCEPTION;
}
- ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED;
+ ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
+ if (ret < 0)
+ return ES_DECODE_FAILED;
+ else
+ return ES_OK;
+ }
- return ret;
+ static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+ {
+ if (user_mode(ctxt->regs))
+ return __vc_decode_user_insn(ctxt);
+ else
+ return __vc_decode_kern_insn(ctxt);
}
static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
return false;
}
+ static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+ {
+ if (c->phys_proc_id == o->phys_proc_id &&
+ c->cpu_die_id == o->cpu_die_id)
+ return true;
+ return false;
+ }
+
/*
- * Define snc_cpu[] for SNC (Sub-NUMA Cluster) CPUs.
+ * Unlike the other levels, we do not enforce keeping a
+ * multicore group inside a NUMA node. If this happens, we will
+ * discard the MC level of the topology later.
+ */
+ static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+ {
+ if (c->phys_proc_id == o->phys_proc_id)
+ return true;
+ return false;
+ }
+
+ /*
+ * Define intel_cod_cpu[] for Intel COD (Cluster-on-Die) CPUs.
*
- * These are Intel CPUs that enumerate an LLC that is shared by
- * multiple NUMA nodes. The LLC on these systems is shared for
- * off-package data access but private to the NUMA node (half
- * of the package) for on-package access.
+ * Any Intel CPU that has multiple nodes per package and does not
+ * match intel_cod_cpu[] has the SNC (Sub-NUMA Cluster) topology.
*
- * CPUID (the source of the information about the LLC) can only
- * enumerate the cache as being shared *or* unshared, but not
- * this particular configuration. The CPU in this case enumerates
- * the cache to be shared across the entire package (spanning both
- * NUMA nodes).
+ * When in SNC mode, these CPUs enumerate an LLC that is shared
+ * by multiple NUMA nodes. The LLC is shared for off-package data
+ * access but private to the NUMA node (half of the package) for
+ * on-package access. CPUID (the source of the information about
+ * the LLC) can only enumerate the cache as shared or unshared,
+ * but not this particular configuration.
*/
- static const struct x86_cpu_id snc_cpu[] = {
- X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, NULL),
+ static const struct x86_cpu_id intel_cod_cpu[] = {
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, 0), /* COD */
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, 0), /* COD */
+ X86_MATCH_INTEL_FAM6_MODEL(ANY, 1), /* SNC */
{}
};
static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
+ const struct x86_cpu_id *id = x86_match_cpu(intel_cod_cpu);
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
+ bool intel_snc = id && id->driver_data;
/* Do not match if we do not have a valid APICID for cpu: */
if (per_cpu(cpu_llc_id, cpu1) == BAD_APICID)
* means 'c' does not share the LLC of 'o'. This will be
* reflected to userspace.
*/
- if (!topology_same_node(c, o) && x86_match_cpu(snc_cpu))
+ if (match_pkg(c, o) && !topology_same_node(c, o) && intel_snc)
return false;
return topology_sane(c, o, "llc");
}
- /*
- * Unlike the other levels, we do not enforce keeping a
- * multicore group inside a NUMA node. If this happens, we will
- * discard the MC level of the topology later.
- */
- static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
- {
- if (c->phys_proc_id == o->phys_proc_id)
- return true;
- return false;
- }
-
- static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
- {
- if ((c->phys_proc_id == o->phys_proc_id) &&
- (c->cpu_die_id == o->cpu_die_id))
- return true;
- return false;
- }
-
#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
static inline int x86_sched_itmt_flags(void)
for_each_cpu(i, cpu_sibling_setup_mask) {
o = &cpu_data(i);
+ if (match_pkg(c, o) && !topology_same_node(c, o))
+ x86_has_numa_in_package = true;
+
if ((i == cpu) || (has_smt && match_smt(c, o)))
link_mask(topology_sibling_cpumask, cpu, i);
if ((i == cpu) || (has_mp && match_llc(c, o)))
link_mask(cpu_llc_shared_mask, cpu, i);
+ if ((i == cpu) || (has_mp && match_die(c, o)))
+ link_mask(topology_die_cpumask, cpu, i);
}
+ threads = cpumask_weight(topology_sibling_cpumask(cpu));
+ if (threads > __max_smt_threads)
+ __max_smt_threads = threads;
+
/*
* This needs a separate iteration over the cpus because we rely on all
* topology_sibling_cpumask links to be set-up.
/*
* Does this new cpu bringup a new core?
*/
- if (cpumask_weight(
- topology_sibling_cpumask(cpu)) == 1) {
+ if (threads == 1) {
/*
* for each core in package, increment
* the booted_cores for this new cpu
} else if (i != cpu && !c->booted_cores)
c->booted_cores = cpu_data(i).booted_cores;
}
- if (match_pkg(c, o) && !topology_same_node(c, o))
- x86_has_numa_in_package = true;
-
- if ((i == cpu) || (has_mp && match_die(c, o)))
- link_mask(topology_die_cpumask, cpu, i);
}
-
- threads = cpumask_weight(topology_sibling_cpumask(cpu));
- if (threads > __max_smt_threads)
- __max_smt_threads = threads;
}
/* maps the cpu to the sched domain representing multi-core */
int ncpus;
/*
- * Today neither Intel nor AMD support heterogenous systems so
+ * Today neither Intel nor AMD support heterogeneous systems so
* extrapolate the boot cpu's data to all packages.
*/
ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
local_irq_disable();
}
-static bool wakeup_cpu0(void)
+/**
+ * cond_wakeup_cpu0 - Wake up CPU0 if needed.
+ *
+ * If NMI wants to wake up CPU0, start CPU0.
+ */
+void cond_wakeup_cpu0(void)
{
if (smp_processor_id() == 0 && enable_start_cpu0)
- return true;
-
- return false;
+ start_cpu0();
}
+EXPORT_SYMBOL_GPL(cond_wakeup_cpu0);
/*
* We need to flush the caches before going to sleep, lest we have
__monitor(mwait_ptr, 0, 0);
mb();
__mwait(eax, 0);
- /*
- * If NMI wants to wake up CPU0, start CPU0.
- */
- if (wakeup_cpu0())
- start_cpu0();
+
+ cond_wakeup_cpu0();
}
}
while (1) {
native_halt();
- /*
- * If NMI wants to wake up CPU0, start CPU0.
- */
- if (wakeup_cpu0())
- start_cpu0();
+
+ cond_wakeup_cpu0();
}
}
/*
* Adjust our frame so that we return straight to the #GP
* vector with the expected RSP value. This is safe because
- * we won't enable interupts or schedule before we invoke
+ * we won't enable interrupts or schedule before we invoke
* general_protection, so nothing will clobber the stack
* frame we just set up.
*
{
u8 insn_buf[MAX_INSN_SIZE];
struct insn insn;
+ int ret;
if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip,
MAX_INSN_SIZE))
return GP_NO_HINT;
- kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
- insn_get_modrm(&insn);
- insn_get_sib(&insn);
+ ret = insn_decode_kernel(&insn, insn_buf);
+ if (ret < 0)
+ return GP_NO_HINT;
*addr = (unsigned long)insn_get_addr_ref(&insn, regs);
if (*addr == -1UL)
tsk->thread.trap_nr = X86_TRAP_GP;
if (fixup_vdso_exception(regs, X86_TRAP_GP, error_code, 0))
- return;
+ goto exit;
show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
force_sig(SIGSEGV);
if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs))
dr6 &= ~DR_STEP;
- if (kprobe_debug_handler(regs))
- goto out;
-
/*
* The kernel doesn't use INT1
*/
goto out_irq;
}
+ /* #DB for bus lock can only be triggered from userspace. */
+ if (dr6 & DR_BUS_LOCK)
+ handle_bus_lock(regs);
+
/* Add the virtual_dr6 bits for signals. */
dr6 |= current->thread.virtual_dr6;
if (dr6 & (DR_STEP | DR_TRAP_BITS) || icebp)
goto exit;
if (fixup_vdso_exception(regs, trapnr, 0, 0))
- return;
+ goto exit;
force_sig_fault(SIGFPE, si_code,
(void __user *)uprobe_get_trap_addr(regs));
* by whether the operand is a register or a memory location.
* If operand is a register, return as many bytes as the operand
* size. If operand is memory, return only the two least
- * siginificant bytes.
+ * significant bytes.
*/
if (X86_MODRM_MOD(insn->modrm.value) == 3)
*data_size = insn->opnd_bytes;
if (!nr_copied)
return false;
- if (!insn_decode(&insn, regs, buf, nr_copied))
+ if (!insn_decode_from_regs(&insn, regs, buf, nr_copied))
return false;
umip_inst = identify_insn(&insn);
* resolve_seg_reg() - obtain segment register index
* @insn: Instruction with operands
* @regs: Register values as seen when entering kernel mode
- * @regoff: Operand offset, in pt_regs, used to deterimine segment register
+ * @regoff: Operand offset, in pt_regs, used to determine segment register
*
* Determine the segment register associated with the operands and, if
* applicable, prefixes and the instruction pointed by @insn.
case INAT_SEG_REG_FS:
return (unsigned short)(regs->fs & 0xffff);
case INAT_SEG_REG_GS:
- /*
- * GS may or may not be in regs as per CONFIG_X86_32_LAZY_GS.
- * The macro below takes care of both cases.
- */
return get_user_gs(regs);
case INAT_SEG_REG_IGNORE:
default:
* @insn: Instruction containing ModRM byte
* @regs: Register values as seen when entering kernel mode
* @offs1: Offset of the first operand register
- * @offs2: Offset of the second opeand register, if applicable
+ * @offs2: Offset of the second operand register, if applicable
*
* Obtain the offset, in pt_regs, of the registers indicated by the ModRM byte
* in @insn. This function is to be used with 16-bit address encodings. The
* If ModRM.mod is 0 and ModRM.rm is 110b, then we use displacement-
* only addressing. This means that no registers are involved in
* computing the effective address. Thus, ensure that the first
- * register offset is invalild. The second register offset is already
+ * register offset is invalid. The second register offset is already
* invalid under the aforementioned conditions.
*/
if ((X86_MODRM_MOD(insn->modrm.value) == 0) &&
static int get_eff_addr_reg(struct insn *insn, struct pt_regs *regs,
int *regoff, long *eff_addr)
{
- insn_get_modrm(insn);
+ int ret;
- if (!insn->modrm.nbytes)
- return -EINVAL;
+ ret = insn_get_modrm(insn);
+ if (ret)
+ return ret;
if (X86_MODRM_MOD(insn->modrm.value) != 3)
return -EINVAL;
int *regoff, long *eff_addr)
{
long tmp;
+ int ret;
if (insn->addr_bytes != 8 && insn->addr_bytes != 4)
return -EINVAL;
- insn_get_modrm(insn);
-
- if (!insn->modrm.nbytes)
- return -EINVAL;
+ ret = insn_get_modrm(insn);
+ if (ret)
+ return ret;
if (X86_MODRM_MOD(insn->modrm.value) > 2)
return -EINVAL;
* @base_offset will have a register, as an offset from the base of pt_regs,
* that can be used to resolve the associated segment.
*
- * -EINVAL on error.
+ * Negative value on error.
*/
static int get_eff_addr_sib(struct insn *insn, struct pt_regs *regs,
int *base_offset, long *eff_addr)
{
long base, indx;
int indx_offset;
+ int ret;
if (insn->addr_bytes != 8 && insn->addr_bytes != 4)
return -EINVAL;
- insn_get_modrm(insn);
+ ret = insn_get_modrm(insn);
+ if (ret)
+ return ret;
if (!insn->modrm.nbytes)
return -EINVAL;
if (X86_MODRM_MOD(insn->modrm.value) > 2)
return -EINVAL;
- insn_get_sib(insn);
+ ret = insn_get_sib(insn);
+ if (ret)
+ return ret;
if (!insn->sib.nbytes)
return -EINVAL;
short eff_addr;
long tmp;
- insn_get_modrm(insn);
- insn_get_displacement(insn);
+ if (insn_get_displacement(insn))
+ goto out;
if (insn->addr_bytes != 2)
goto out;
}
/**
- * insn_decode() - Decode an instruction
+ * insn_decode_from_regs() - Decode an instruction
* @insn: Structure to store decoded instruction
* @regs: Structure with register values as seen when entering kernel mode
* @buf: Buffer containing the instruction bytes
*
* True if instruction was decoded, False otherwise.
*/
- bool insn_decode(struct insn *insn, struct pt_regs *regs,
- unsigned char buf[MAX_INSN_SIZE], int buf_size)
+ bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs,
+ unsigned char buf[MAX_INSN_SIZE], int buf_size)
{
int seg_defs;
insn->addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs);
insn->opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs);
- insn_get_length(insn);
+ if (insn_get_length(insn))
+ return false;
+
if (buf_size < insn->length)
return false;
/* BPF trampoline can be made to work without these nops,
* but let's waste 5 bytes for now and optimize later
*/
- memcpy(prog, ideal_nops[NOP_ATOMIC5], cnt);
+ memcpy(prog, x86_nops[5], cnt);
prog += cnt;
if (!ebpf_from_cbpf) {
if (tail_call_reachable && !is_subprog)
void *old_addr, void *new_addr,
const bool text_live)
{
- const u8 *nop_insn = ideal_nops[NOP_ATOMIC5];
+ const u8 *nop_insn = x86_nops[5];
u8 old_insn[X86_PATCH_SIZE];
u8 new_insn[X86_PATCH_SIZE];
u8 *prog;
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
- memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
+ memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
prog += X86_PATCH_SIZE;
/* out: */
noplen = ASM_NOP_MAX;
for (i = 0; i < noplen; i++)
- EMIT1(ideal_nops[noplen][i]);
+ EMIT1(x86_nops[noplen][i]);
len -= noplen;
}
if (is_imm8(jmp_offset)) {
if (jmp_padding) {
/* To keep the jmp_offset valid, the extra bytes are
- * padded before the jump insn, so we substract the
+ * padded before the jump insn, so we subtract the
* 2 bytes of jmp_cond insn from INSN_SZ_DIFF.
*
* If the previous pass already emits an imm8
if (jmp_padding) {
/* To avoid breaking jmp_offset, the extra bytes
* are padded before the actual jmp insn, so
- * 2 bytes is substracted from INSN_SZ_DIFF.
+ * 2 bytes is subtracted from INSN_SZ_DIFF.
*
* If the previous pass already emits an imm8
* jmp, there is nothing to pad (0 byte).
}
if (image) {
- if (unlikely(proglen + ilen > oldproglen)) {
+ /*
+ * When populating the image, assert that:
+ *
+ * i) We do not write beyond the allocated space, and
+ * ii) addrs[i] did not change from the prior run, in order
+ * to validate assumptions made for computing branch
+ * displacements.
+ */
+ if (unlikely(proglen + ilen > oldproglen ||
+ proglen + ilen != addrs[i])) {
pr_err("bpf_jit: fatal error\n");
return -EFAULT;
}
/* remember return value in a stack for bpf prog to access */
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
im->ip_after_call = prog;
- memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
+ memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
prog += X86_PATCH_SIZE;
}
* the boot start info structure.
* - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared.
* - `cr4`: all bits are cleared.
- * - `cs `: must be a 32-bit read/execute code segment with a base of ‘0’
- * and a limit of ‘0xFFFFFFFF’. The selector value is unspecified.
+ * - `cs `: must be a 32-bit read/execute code segment with a base of `0`
+ * and a limit of `0xFFFFFFFF`. The selector value is unspecified.
* - `ds`, `es`: must be a 32-bit read/write data segment with a base of
- * ‘0’ and a limit of ‘0xFFFFFFFF’. The selector values are all
+ * `0` and a limit of `0xFFFFFFFF`. The selector values are all
* unspecified.
* - `tr`: must be a 32-bit TSS (active) with a base of '0' and a limit
* of '0x67'.
#define PVH_GDT_ENTRY_CS 1
#define PVH_GDT_ENTRY_DS 2
- #define PVH_GDT_ENTRY_CANARY 3
#define PVH_CS_SEL (PVH_GDT_ENTRY_CS * 8)
#define PVH_DS_SEL (PVH_GDT_ENTRY_DS * 8)
- #define PVH_CANARY_SEL (PVH_GDT_ENTRY_CANARY * 8)
SYM_CODE_START_LOCAL(pvh_start_xen)
cld
#else /* CONFIG_X86_64 */
- /* Set base address in stack canary descriptor. */
- movl $_pa(gdt_start),%eax
- movl $_pa(canary),%ecx
- movw %cx, (PVH_GDT_ENTRY_CANARY * 8) + 2(%eax)
- shrl $16, %ecx
- movb %cl, (PVH_GDT_ENTRY_CANARY * 8) + 4(%eax)
- movb %ch, (PVH_GDT_ENTRY_CANARY * 8) + 7(%eax)
-
- mov $PVH_CANARY_SEL,%eax
- mov %eax,%gs
-
call mk_early_pgtbl_32
mov $_pa(initial_page_table), %eax
.quad GDT_ENTRY(0xc09a, 0, 0xfffff) /* PVH_CS_SEL */
#endif
.quad GDT_ENTRY(0xc092, 0, 0xfffff) /* PVH_DS_SEL */
- .quad GDT_ENTRY(0x4090, 0, 0x18) /* PVH_CANARY_SEL */
SYM_DATA_END_LABEL(gdt_start, SYM_L_LOCAL, gdt_end)
.balign 16
/*
* segment registers
*/
- #ifdef CONFIG_X86_32_LAZY_GS
savesegment(gs, ctxt->gs);
- #endif
#ifdef CONFIG_X86_64
- savesegment(gs, ctxt->gs);
savesegment(fs, ctxt->fs);
savesegment(ds, ctxt->ds);
savesegment(es, ctxt->es);
wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
#else
loadsegment(fs, __KERNEL_PERCPU);
- loadsegment(gs, __KERNEL_STACK_CANARY);
#endif
/* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */
*/
wrmsrl(MSR_FS_BASE, ctxt->fs_base);
wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
- #elif defined(CONFIG_X86_32_LAZY_GS)
+ #else
loadsegment(gs, ctxt->gs);
#endif
/*
* When bsp_check() is called in hibernate and suspend, cpu hotplug
- * is disabled already. So it's unnessary to handle race condition between
+ * is disabled already. So it's unnecessary to handle race condition between
* cpumask query and cpu hotplug.
*/
static int bsp_check(void)