]> Git Repo - linux.git/blame - arch/x86/kernel/entry_64.S
Linux 4.0-rc5
[linux.git] / arch / x86 / kernel / entry_64.S
CommitLineData
1da177e4
LT
1/*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <[email protected]>
1da177e4
LT
7 */
8
9/*
10 * entry.S contains the system-call and fault low-level handling routines.
11 *
8b4777a4
AL
12 * Some of this is documented in Documentation/x86/entry_64.txt
13 *
1da177e4
LT
14 * NOTE: This code handles signal-recognition, which happens every time
15 * after an interrupt and after each system call.
0bd7b798
AH
16 *
17 * Normal syscalls and interrupts don't save a full stack frame, this is
1da177e4 18 * only done for syscall tracing, signals or fork/exec et.al.
0bd7b798
AH
19 *
20 * A note on terminology:
21 * - top of stack: Architecture defined interrupt frame from SS to RIP
22 * at the top of the kernel process stack.
0d2eb44f 23 * - partial stack frame: partially saved registers up to R11.
0bd7b798 24 * - full stack frame: Like partial stack frame, but all register saved.
2e91a17b
AK
25 *
26 * Some macro usage:
27 * - CFI macros are used to generate dwarf2 unwind information for better
28 * backtraces. They don't change any code.
29 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
30 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
31 * There are unfortunately lots of special cases where some registers
32 * not touched. The macro is a big mess that should be cleaned up.
33 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
34 * Gives a full stack frame.
35 * - ENTRY/END Define functions in the symbol table.
36 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
37 * frame that is otherwise undefined after a SYSCALL
38 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
cb5dd2c5 39 * - idtentry - Define exception entry points.
1da177e4
LT
40 */
41
1da177e4
LT
42#include <linux/linkage.h>
43#include <asm/segment.h>
1da177e4
LT
44#include <asm/cache.h>
45#include <asm/errno.h>
46#include <asm/dwarf2.h>
47#include <asm/calling.h>
e2d5df93 48#include <asm/asm-offsets.h>
1da177e4
LT
49#include <asm/msr.h>
50#include <asm/unistd.h>
51#include <asm/thread_info.h>
52#include <asm/hw_irq.h>
0341c14d 53#include <asm/page_types.h>
2601e64d 54#include <asm/irqflags.h>
72fe4858 55#include <asm/paravirt.h>
9939ddaf 56#include <asm/percpu.h>
d7abc0fa 57#include <asm/asm.h>
91d1aa43 58#include <asm/context_tracking.h>
63bcff2a 59#include <asm/smap.h>
3891a04a 60#include <asm/pgtable_types.h>
d7e7528b 61#include <linux/err.h>
1da177e4 62
86a1c34a
RM
63/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
64#include <linux/elf-em.h>
65#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
66#define __AUDIT_ARCH_64BIT 0x80000000
67#define __AUDIT_ARCH_LE 0x40000000
68
1da177e4 69 .code64
ea714547
JO
70 .section .entry.text, "ax"
71
16444a8a 72
dc37db4d 73#ifndef CONFIG_PREEMPT
1da177e4 74#define retint_kernel retint_restore_args
0bd7b798 75#endif
2601e64d 76
72fe4858 77#ifdef CONFIG_PARAVIRT
2be29982 78ENTRY(native_usergs_sysret64)
72fe4858
GOC
79 swapgs
80 sysretq
b3baaa13 81ENDPROC(native_usergs_sysret64)
72fe4858
GOC
82#endif /* CONFIG_PARAVIRT */
83
2601e64d
IM
84
85.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
86#ifdef CONFIG_TRACE_IRQFLAGS
87 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
88 jnc 1f
89 TRACE_IRQS_ON
901:
91#endif
92.endm
93
5963e317
SR
94/*
95 * When dynamic function tracer is enabled it will add a breakpoint
96 * to all locations that it is about to modify, sync CPUs, update
97 * all the code, sync CPUs, then remove the breakpoints. In this time
98 * if lockdep is enabled, it might jump back into the debug handler
99 * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF).
100 *
101 * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to
102 * make sure the stack pointer does not get reset back to the top
103 * of the debug stack, and instead just reuses the current stack.
104 */
105#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
106
107.macro TRACE_IRQS_OFF_DEBUG
108 call debug_stack_set_zero
109 TRACE_IRQS_OFF
110 call debug_stack_reset
111.endm
112
113.macro TRACE_IRQS_ON_DEBUG
114 call debug_stack_set_zero
115 TRACE_IRQS_ON
116 call debug_stack_reset
117.endm
118
119.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
120 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
121 jnc 1f
122 TRACE_IRQS_ON_DEBUG
1231:
124.endm
125
126#else
127# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF
128# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON
129# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ
130#endif
131
1da177e4 132/*
0bd7b798
AH
133 * C code is not supposed to know about undefined top of stack. Every time
134 * a C function with an pt_regs argument is called from the SYSCALL based
1da177e4
LT
135 * fast path FIXUP_TOP_OF_STACK is needed.
136 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
137 * manipulation.
0bd7b798
AH
138 */
139
140 /* %rsp:at FRAMEEND */
c002a1e6 141 .macro FIXUP_TOP_OF_STACK tmp offset=0
3d1e42a7 142 movq PER_CPU_VAR(old_rsp),\tmp
c002a1e6
AH
143 movq \tmp,RSP+\offset(%rsp)
144 movq $__USER_DS,SS+\offset(%rsp)
145 movq $__USER_CS,CS+\offset(%rsp)
0fcedc86
AL
146 movq RIP+\offset(%rsp),\tmp /* get rip */
147 movq \tmp,RCX+\offset(%rsp) /* copy it to rcx as sysret would do */
c002a1e6
AH
148 movq R11+\offset(%rsp),\tmp /* get eflags */
149 movq \tmp,EFLAGS+\offset(%rsp)
1da177e4
LT
150 .endm
151
c002a1e6
AH
152 .macro RESTORE_TOP_OF_STACK tmp offset=0
153 movq RSP+\offset(%rsp),\tmp
3d1e42a7 154 movq \tmp,PER_CPU_VAR(old_rsp)
c002a1e6
AH
155 movq EFLAGS+\offset(%rsp),\tmp
156 movq \tmp,R11+\offset(%rsp)
1da177e4
LT
157 .endm
158
dcd072e2
AH
159/*
160 * initial frame state for interrupts (and exceptions without error code)
161 */
162 .macro EMPTY_FRAME start=1 offset=0
7effaa88 163 .if \start
dcd072e2 164 CFI_STARTPROC simple
adf14236 165 CFI_SIGNAL_FRAME
dcd072e2 166 CFI_DEF_CFA rsp,8+\offset
7effaa88 167 .else
dcd072e2 168 CFI_DEF_CFA_OFFSET 8+\offset
7effaa88 169 .endif
1da177e4 170 .endm
d99015b1
AH
171
172/*
dcd072e2 173 * initial frame state for interrupts (and exceptions without error code)
d99015b1 174 */
dcd072e2 175 .macro INTR_FRAME start=1 offset=0
e8a0e276
IM
176 EMPTY_FRAME \start, SS+8+\offset-RIP
177 /*CFI_REL_OFFSET ss, SS+\offset-RIP*/
178 CFI_REL_OFFSET rsp, RSP+\offset-RIP
179 /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
180 /*CFI_REL_OFFSET cs, CS+\offset-RIP*/
181 CFI_REL_OFFSET rip, RIP+\offset-RIP
d99015b1
AH
182 .endm
183
d99015b1
AH
184/*
185 * initial frame state for exceptions with error code (and interrupts
186 * with vector already pushed)
187 */
dcd072e2 188 .macro XCPT_FRAME start=1 offset=0
e8a0e276 189 INTR_FRAME \start, RIP+\offset-ORIG_RAX
dcd072e2
AH
190 .endm
191
192/*
193 * frame that enables calling into C.
194 */
195 .macro PARTIAL_FRAME start=1 offset=0
e8a0e276
IM
196 XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
197 CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
198 CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
199 CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
200 CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
201 CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
202 CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
203 CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
204 CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
205 CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
dcd072e2
AH
206 .endm
207
208/*
209 * frame that enables passing a complete pt_regs to a C function.
210 */
211 .macro DEFAULT_FRAME start=1 offset=0
e8a0e276 212 PARTIAL_FRAME \start, R11+\offset-R15
dcd072e2
AH
213 CFI_REL_OFFSET rbx, RBX+\offset
214 CFI_REL_OFFSET rbp, RBP+\offset
215 CFI_REL_OFFSET r12, R12+\offset
216 CFI_REL_OFFSET r13, R13+\offset
217 CFI_REL_OFFSET r14, R14+\offset
218 CFI_REL_OFFSET r15, R15+\offset
219 .endm
d99015b1 220
e2f6bc25
AH
221ENTRY(save_paranoid)
222 XCPT_FRAME 1 RDI+8
223 cld
3bab13b0
JB
224 movq %rdi, RDI+8(%rsp)
225 movq %rsi, RSI+8(%rsp)
e2f6bc25
AH
226 movq_cfi rdx, RDX+8
227 movq_cfi rcx, RCX+8
228 movq_cfi rax, RAX+8
3bab13b0
JB
229 movq %r8, R8+8(%rsp)
230 movq %r9, R9+8(%rsp)
231 movq %r10, R10+8(%rsp)
232 movq %r11, R11+8(%rsp)
e2f6bc25 233 movq_cfi rbx, RBX+8
3bab13b0
JB
234 movq %rbp, RBP+8(%rsp)
235 movq %r12, R12+8(%rsp)
236 movq %r13, R13+8(%rsp)
237 movq %r14, R14+8(%rsp)
238 movq %r15, R15+8(%rsp)
e2f6bc25
AH
239 movl $1,%ebx
240 movl $MSR_GS_BASE,%ecx
241 rdmsr
242 testl %edx,%edx
243 js 1f /* negative -> in kernel */
244 SWAPGS
245 xorl %ebx,%ebx
2461: ret
247 CFI_ENDPROC
248END(save_paranoid)
249
1da177e4 250/*
5b3eec0c
IM
251 * A newly forked process directly context switches into this address.
252 *
253 * rdi: prev task we switched from
0bd7b798 254 */
1da177e4 255ENTRY(ret_from_fork)
dcd072e2 256 DEFAULT_FRAME
5b3eec0c 257
7106a5ab
BL
258 LOCK ; btr $TIF_FORK,TI_flags(%r8)
259
6eebdda3 260 pushq_cfi $0x0002
df5d1874 261 popfq_cfi # reset kernel eflags
5b3eec0c
IM
262
263 call schedule_tail # rdi: 'prev' task parameter
264
1da177e4 265 GET_THREAD_INFO(%rcx)
5b3eec0c 266
1da177e4 267 RESTORE_REST
5b3eec0c
IM
268
269 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
7076aada 270 jz 1f
5b3eec0c 271
956421fb
AL
272 /*
273 * By the time we get here, we have no idea whether our pt_regs,
274 * ti flags, and ti status came from the 64-bit SYSCALL fast path,
275 * the slow path, or one of the ia32entry paths.
276 * Use int_ret_from_sys_call to return, since it can safely handle
277 * all of the above.
278 */
279 jmp int_ret_from_sys_call
5b3eec0c 280
7076aada 2811:
22e2430d 282 subq $REST_SKIP, %rsp # leave space for volatiles
7076aada
AV
283 CFI_ADJUST_CFA_OFFSET REST_SKIP
284 movq %rbp, %rdi
285 call *%rbx
22e2430d
AV
286 movl $0, RAX(%rsp)
287 RESTORE_REST
288 jmp int_ret_from_sys_call
1da177e4 289 CFI_ENDPROC
4b787e0b 290END(ret_from_fork)
1da177e4
LT
291
292/*
0d2eb44f 293 * System call entry. Up to 6 arguments in registers are supported.
1da177e4
LT
294 *
295 * SYSCALL does not save anything on the stack and does not change the
63bcff2a
PA
296 * stack pointer. However, it does mask the flags register for us, so
297 * CLD and CLAC are not needed.
1da177e4 298 */
0bd7b798 299
1da177e4 300/*
0bd7b798 301 * Register setup:
1da177e4
LT
302 * rax system call number
303 * rdi arg0
0bd7b798 304 * rcx return address for syscall/sysret, C arg3
1da177e4 305 * rsi arg1
0bd7b798 306 * rdx arg2
1da177e4
LT
307 * r10 arg3 (--> moved to rcx for C)
308 * r8 arg4
309 * r9 arg5
310 * r11 eflags for syscall/sysret, temporary for C
0bd7b798
AH
311 * r12-r15,rbp,rbx saved by C code, not touched.
312 *
1da177e4
LT
313 * Interrupts are off on entry.
314 * Only called from user space.
315 *
316 * XXX if we had a free scratch register we could save the RSP into the stack frame
317 * and report it properly in ps. Unfortunately we haven't.
7bf36bbc
AK
318 *
319 * When user can change the frames always force IRET. That is because
320 * it deals with uncanonical addresses better. SYSRET has trouble
321 * with them due to bugs in both AMD and Intel CPUs.
0bd7b798 322 */
1da177e4
LT
323
324ENTRY(system_call)
7effaa88 325 CFI_STARTPROC simple
adf14236 326 CFI_SIGNAL_FRAME
9af45651 327 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
7effaa88
JB
328 CFI_REGISTER rip,rcx
329 /*CFI_REGISTER rflags,r11*/
72fe4858
GOC
330 SWAPGS_UNSAFE_STACK
331 /*
332 * A hypervisor implementation might want to use a label
333 * after the swapgs, so that it can do the swapgs
334 * for the guest and jump here on syscall.
335 */
f6b2bc84 336GLOBAL(system_call_after_swapgs)
72fe4858 337
3d1e42a7 338 movq %rsp,PER_CPU_VAR(old_rsp)
9af45651 339 movq PER_CPU_VAR(kernel_stack),%rsp
2601e64d
IM
340 /*
341 * No need to follow this irqs off/on section - it's straight
342 * and short:
343 */
72fe4858 344 ENABLE_INTERRUPTS(CLBR_NONE)
54eea995
AL
345 SAVE_ARGS 8, 0, rax_enosys=1
346 movq_cfi rax,(ORIG_RAX-ARGOFFSET)
7effaa88
JB
347 movq %rcx,RIP-ARGOFFSET(%rsp)
348 CFI_REL_OFFSET rip,RIP-ARGOFFSET
46db09d3 349 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
1da177e4 350 jnz tracesys
86a1c34a 351system_call_fastpath:
fca460f9 352#if __SYSCALL_MASK == ~0
1da177e4 353 cmpq $__NR_syscall_max,%rax
fca460f9
PA
354#else
355 andl $__SYSCALL_MASK,%eax
356 cmpl $__NR_syscall_max,%eax
357#endif
54eea995 358 ja ret_from_sys_call /* and return regs->ax */
1da177e4
LT
359 movq %r10,%rcx
360 call *sys_call_table(,%rax,8) # XXX: rip relative
361 movq %rax,RAX-ARGOFFSET(%rsp)
362/*
363 * Syscall return path ending with SYSRET (fast path)
0bd7b798
AH
364 * Has incomplete stack frame and undefined top of stack.
365 */
1da177e4 366ret_from_sys_call:
96b6352c
AL
367 testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
368 jnz int_ret_from_sys_call_fixup /* Go the the slow path */
369
10cd706d 370 LOCKDEP_SYS_EXIT
72fe4858 371 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 372 TRACE_IRQS_OFF
bcddc015 373 CFI_REMEMBER_STATE
2601e64d
IM
374 /*
375 * sysretq will re-enable interrupts:
376 */
377 TRACE_IRQS_ON
1da177e4 378 movq RIP-ARGOFFSET(%rsp),%rcx
7effaa88 379 CFI_REGISTER rip,rcx
838feb47 380 RESTORE_ARGS 1,-ARG_SKIP,0
7effaa88 381 /*CFI_REGISTER rflags,r11*/
3d1e42a7 382 movq PER_CPU_VAR(old_rsp), %rsp
2be29982 383 USERGS_SYSRET64
1da177e4 384
bcddc015 385 CFI_RESTORE_STATE
1da177e4 386
96b6352c 387int_ret_from_sys_call_fixup:
b60e714d 388 FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
96b6352c 389 jmp int_ret_from_sys_call
86a1c34a 390
1da177e4 391 /* Do syscall tracing */
0bd7b798 392tracesys:
1dcf74f6
AL
393 leaq -REST_SKIP(%rsp), %rdi
394 movq $AUDIT_ARCH_X86_64, %rsi
395 call syscall_trace_enter_phase1
396 test %rax, %rax
397 jnz tracesys_phase2 /* if needed, run the slow path */
398 LOAD_ARGS 0 /* else restore clobbered regs */
399 jmp system_call_fastpath /* and return to the fast path */
400
401tracesys_phase2:
1da177e4 402 SAVE_REST
1da177e4 403 FIXUP_TOP_OF_STACK %rdi
1dcf74f6
AL
404 movq %rsp, %rdi
405 movq $AUDIT_ARCH_X86_64, %rsi
406 movq %rax,%rdx
407 call syscall_trace_enter_phase2
408
d4d67150
RM
409 /*
410 * Reload arg registers from stack in case ptrace changed them.
1dcf74f6 411 * We don't reload %rax because syscall_trace_entry_phase2() returned
d4d67150
RM
412 * the value it wants us to use in the table lookup.
413 */
414 LOAD_ARGS ARGOFFSET, 1
1da177e4 415 RESTORE_REST
fca460f9 416#if __SYSCALL_MASK == ~0
1da177e4 417 cmpq $__NR_syscall_max,%rax
fca460f9
PA
418#else
419 andl $__SYSCALL_MASK,%eax
420 cmpl $__NR_syscall_max,%eax
421#endif
54eea995 422 ja int_ret_from_sys_call /* RAX(%rsp) is already set */
1da177e4
LT
423 movq %r10,%rcx /* fixup for C */
424 call *sys_call_table(,%rax,8)
a31f8dd7 425 movq %rax,RAX-ARGOFFSET(%rsp)
7bf36bbc 426 /* Use IRET because user could have changed frame */
0bd7b798
AH
427
428/*
1da177e4
LT
429 * Syscall return path ending with IRET.
430 * Has correct top of stack, but partial stack frame.
bcddc015 431 */
bc8b2b92 432GLOBAL(int_ret_from_sys_call)
72fe4858 433 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 434 TRACE_IRQS_OFF
1da177e4
LT
435 movl $_TIF_ALLWORK_MASK,%edi
436 /* edi: mask to check */
bc8b2b92 437GLOBAL(int_with_check)
10cd706d 438 LOCKDEP_SYS_EXIT_IRQ
1da177e4 439 GET_THREAD_INFO(%rcx)
26ccb8a7 440 movl TI_flags(%rcx),%edx
1da177e4
LT
441 andl %edi,%edx
442 jnz int_careful
26ccb8a7 443 andl $~TS_COMPAT,TI_status(%rcx)
1da177e4
LT
444 jmp retint_swapgs
445
446 /* Either reschedule or signal or syscall exit tracking needed. */
447 /* First do a reschedule test. */
448 /* edx: work, edi: workmask */
449int_careful:
450 bt $TIF_NEED_RESCHED,%edx
451 jnc int_very_careful
2601e64d 452 TRACE_IRQS_ON
72fe4858 453 ENABLE_INTERRUPTS(CLBR_NONE)
df5d1874 454 pushq_cfi %rdi
0430499c 455 SCHEDULE_USER
df5d1874 456 popq_cfi %rdi
72fe4858 457 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 458 TRACE_IRQS_OFF
1da177e4
LT
459 jmp int_with_check
460
461 /* handle signals and tracing -- both require a full stack frame */
462int_very_careful:
2601e64d 463 TRACE_IRQS_ON
72fe4858 464 ENABLE_INTERRUPTS(CLBR_NONE)
b60e714d 465int_check_syscall_exit_work:
1da177e4 466 SAVE_REST
0bd7b798 467 /* Check for syscall exit trace */
d4d67150 468 testl $_TIF_WORK_SYSCALL_EXIT,%edx
1da177e4 469 jz int_signal
df5d1874 470 pushq_cfi %rdi
0bd7b798 471 leaq 8(%rsp),%rdi # &ptregs -> arg1
1da177e4 472 call syscall_trace_leave
df5d1874 473 popq_cfi %rdi
d4d67150 474 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
1da177e4 475 jmp int_restore_rest
0bd7b798 476
1da177e4 477int_signal:
8f4d37ec 478 testl $_TIF_DO_NOTIFY_MASK,%edx
1da177e4
LT
479 jz 1f
480 movq %rsp,%rdi # &ptregs -> arg1
481 xorl %esi,%esi # oldset -> arg2
482 call do_notify_resume
eca91e78 4831: movl $_TIF_WORK_MASK,%edi
1da177e4
LT
484int_restore_rest:
485 RESTORE_REST
72fe4858 486 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 487 TRACE_IRQS_OFF
1da177e4
LT
488 jmp int_with_check
489 CFI_ENDPROC
bcddc015 490END(system_call)
0bd7b798 491
1d4b4b29
AV
492 .macro FORK_LIKE func
493ENTRY(stub_\func)
494 CFI_STARTPROC
495 popq %r11 /* save return address */
496 PARTIAL_FRAME 0
497 SAVE_REST
498 pushq %r11 /* put it back on stack */
499 FIXUP_TOP_OF_STACK %r11, 8
500 DEFAULT_FRAME 0 8 /* offset 8: return address */
501 call sys_\func
502 RESTORE_TOP_OF_STACK %r11, 8
503 ret $REST_SKIP /* pop extended registers */
504 CFI_ENDPROC
505END(stub_\func)
506 .endm
507
b3af11af
AV
508 .macro FIXED_FRAME label,func
509ENTRY(\label)
510 CFI_STARTPROC
511 PARTIAL_FRAME 0 8 /* offset 8: return address */
512 FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
513 call \func
514 RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
515 ret
516 CFI_ENDPROC
517END(\label)
518 .endm
519
1d4b4b29
AV
520 FORK_LIKE clone
521 FORK_LIKE fork
522 FORK_LIKE vfork
b3af11af 523 FIXED_FRAME stub_iopl, sys_iopl
1da177e4 524
1da177e4
LT
525ENTRY(stub_execve)
526 CFI_STARTPROC
e6b04b6b
JB
527 addq $8, %rsp
528 PARTIAL_FRAME 0
1da177e4 529 SAVE_REST
1da177e4
LT
530 FIXUP_TOP_OF_STACK %r11
531 call sys_execve
1da177e4
LT
532 movq %rax,RAX(%rsp)
533 RESTORE_REST
534 jmp int_ret_from_sys_call
535 CFI_ENDPROC
4b787e0b 536END(stub_execve)
0bd7b798 537
27d6ec7a
DD
538ENTRY(stub_execveat)
539 CFI_STARTPROC
540 addq $8, %rsp
541 PARTIAL_FRAME 0
542 SAVE_REST
543 FIXUP_TOP_OF_STACK %r11
544 call sys_execveat
545 RESTORE_TOP_OF_STACK %r11
546 movq %rax,RAX(%rsp)
547 RESTORE_REST
548 jmp int_ret_from_sys_call
549 CFI_ENDPROC
550END(stub_execveat)
551
1da177e4
LT
552/*
553 * sigreturn is special because it needs to restore all registers on return.
554 * This cannot be done with SYSRET, so use the IRET return path instead.
0bd7b798 555 */
1da177e4
LT
556ENTRY(stub_rt_sigreturn)
557 CFI_STARTPROC
7effaa88 558 addq $8, %rsp
e6b04b6b 559 PARTIAL_FRAME 0
1da177e4 560 SAVE_REST
1da177e4
LT
561 FIXUP_TOP_OF_STACK %r11
562 call sys_rt_sigreturn
563 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
564 RESTORE_REST
565 jmp int_ret_from_sys_call
566 CFI_ENDPROC
4b787e0b 567END(stub_rt_sigreturn)
1da177e4 568
c5a37394 569#ifdef CONFIG_X86_X32_ABI
c5a37394
PA
570ENTRY(stub_x32_rt_sigreturn)
571 CFI_STARTPROC
572 addq $8, %rsp
573 PARTIAL_FRAME 0
574 SAVE_REST
c5a37394
PA
575 FIXUP_TOP_OF_STACK %r11
576 call sys32_x32_rt_sigreturn
577 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
578 RESTORE_REST
579 jmp int_ret_from_sys_call
580 CFI_ENDPROC
581END(stub_x32_rt_sigreturn)
582
d1a797f3
PA
583ENTRY(stub_x32_execve)
584 CFI_STARTPROC
585 addq $8, %rsp
586 PARTIAL_FRAME 0
587 SAVE_REST
588 FIXUP_TOP_OF_STACK %r11
6783eaa2 589 call compat_sys_execve
d1a797f3
PA
590 RESTORE_TOP_OF_STACK %r11
591 movq %rax,RAX(%rsp)
592 RESTORE_REST
593 jmp int_ret_from_sys_call
594 CFI_ENDPROC
595END(stub_x32_execve)
596
27d6ec7a
DD
597ENTRY(stub_x32_execveat)
598 CFI_STARTPROC
599 addq $8, %rsp
600 PARTIAL_FRAME 0
601 SAVE_REST
602 FIXUP_TOP_OF_STACK %r11
603 call compat_sys_execveat
604 RESTORE_TOP_OF_STACK %r11
605 movq %rax,RAX(%rsp)
606 RESTORE_REST
607 jmp int_ret_from_sys_call
608 CFI_ENDPROC
609END(stub_x32_execveat)
610
c5a37394
PA
611#endif
612
939b7871
PA
613/*
614 * Build the entry stubs and pointer table with some assembler magic.
615 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
616 * single cache line on all modern x86 implementations.
617 */
618 .section .init.rodata,"a"
619ENTRY(interrupt)
ea714547 620 .section .entry.text
939b7871
PA
621 .p2align 5
622 .p2align CONFIG_X86_L1_CACHE_SHIFT
623ENTRY(irq_entries_start)
624 INTR_FRAME
625vector=FIRST_EXTERNAL_VECTOR
2414e021 626.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
939b7871
PA
627 .balign 32
628 .rept 7
2414e021 629 .if vector < FIRST_SYSTEM_VECTOR
8665596e 630 .if vector <> FIRST_EXTERNAL_VECTOR
939b7871
PA
631 CFI_ADJUST_CFA_OFFSET -8
632 .endif
df5d1874 6331: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */
8665596e 634 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
939b7871
PA
635 jmp 2f
636 .endif
637 .previous
638 .quad 1b
ea714547 639 .section .entry.text
939b7871
PA
640vector=vector+1
641 .endif
642 .endr
6432: jmp common_interrupt
644.endr
645 CFI_ENDPROC
646END(irq_entries_start)
647
648.previous
649END(interrupt)
650.previous
651
d99015b1 652/*
1da177e4
LT
653 * Interrupt entry/exit.
654 *
655 * Interrupt entry points save only callee clobbered registers in fast path.
d99015b1
AH
656 *
657 * Entry runs with interrupts off.
658 */
1da177e4 659
722024db 660/* 0(%rsp): ~(interrupt number) */
1da177e4 661 .macro interrupt func
625dbc3b
FW
662 /* reserve pt_regs for scratch regs and rbp */
663 subq $ORIG_RAX-RBP, %rsp
664 CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
f6f64681
DV
665 cld
666 /* start from rbp in pt_regs and jump over */
667 movq_cfi rdi, (RDI-RBP)
668 movq_cfi rsi, (RSI-RBP)
669 movq_cfi rdx, (RDX-RBP)
670 movq_cfi rcx, (RCX-RBP)
671 movq_cfi rax, (RAX-RBP)
672 movq_cfi r8, (R8-RBP)
673 movq_cfi r9, (R9-RBP)
674 movq_cfi r10, (R10-RBP)
675 movq_cfi r11, (R11-RBP)
676
677 /* Save rbp so that we can unwind from get_irq_regs() */
678 movq_cfi rbp, 0
679
680 /* Save previous stack value */
681 movq %rsp, %rsi
682
683 leaq -RBP(%rsp),%rdi /* arg1 for handler */
684 testl $3, CS-RBP(%rsi)
685 je 1f
686 SWAPGS
687 /*
688 * irq_count is used to check if a CPU is already on an interrupt stack
689 * or not. While this is essentially redundant with preempt_count it is
690 * a little cheaper to use a separate counter in the PDA (short of
691 * moving irq_enter into assembly, which would be too much work)
692 */
6931: incl PER_CPU_VAR(irq_count)
694 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
695 CFI_DEF_CFA_REGISTER rsi
696
697 /* Store previous stack value */
698 pushq %rsi
699 CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
700 0x77 /* DW_OP_breg7 */, 0, \
701 0x06 /* DW_OP_deref */, \
702 0x08 /* DW_OP_const1u */, SS+8-RBP, \
703 0x22 /* DW_OP_plus */
704 /* We entered an interrupt context - irqs are off: */
705 TRACE_IRQS_OFF
706
1da177e4
LT
707 call \func
708 .endm
709
722024db
AH
710 /*
711 * The interrupt stubs push (~vector+0x80) onto the stack and
712 * then jump to common_interrupt.
713 */
939b7871
PA
714 .p2align CONFIG_X86_L1_CACHE_SHIFT
715common_interrupt:
7effaa88 716 XCPT_FRAME
ee4eb87b 717 ASM_CLAC
722024db 718 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
1da177e4 719 interrupt do_IRQ
3d1e42a7 720 /* 0(%rsp): old_rsp-ARGOFFSET */
7effaa88 721ret_from_intr:
72fe4858 722 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 723 TRACE_IRQS_OFF
56895530 724 decl PER_CPU_VAR(irq_count)
625dbc3b 725
a2bbe750
FW
726 /* Restore saved previous stack */
727 popq %rsi
928282e4 728 CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */
eab9e613 729 leaq ARGOFFSET-RBP(%rsi), %rsp
7effaa88 730 CFI_DEF_CFA_REGISTER rsp
eab9e613 731 CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET
625dbc3b 732
7effaa88 733exit_intr:
1da177e4
LT
734 GET_THREAD_INFO(%rcx)
735 testl $3,CS-ARGOFFSET(%rsp)
736 je retint_kernel
0bd7b798 737
1da177e4
LT
738 /* Interrupt came from user space */
739 /*
740 * Has a correct top of stack, but a partial stack frame
741 * %rcx: thread info. Interrupts off.
0bd7b798 742 */
1da177e4
LT
743retint_with_reschedule:
744 movl $_TIF_WORK_MASK,%edi
7effaa88 745retint_check:
10cd706d 746 LOCKDEP_SYS_EXIT_IRQ
26ccb8a7 747 movl TI_flags(%rcx),%edx
1da177e4 748 andl %edi,%edx
7effaa88 749 CFI_REMEMBER_STATE
1da177e4 750 jnz retint_careful
10cd706d
PZ
751
752retint_swapgs: /* return to user-space */
2601e64d
IM
753 /*
754 * The iretq could re-enable interrupts:
755 */
72fe4858 756 DISABLE_INTERRUPTS(CLBR_ANY)
2601e64d 757 TRACE_IRQS_IRETQ
2a23c6b8
AL
758
759 /*
760 * Try to use SYSRET instead of IRET if we're returning to
761 * a completely clean 64-bit userspace context.
762 */
763 movq (RCX-R11)(%rsp), %rcx
764 cmpq %rcx,(RIP-R11)(%rsp) /* RCX == RIP */
765 jne opportunistic_sysret_failed
766
767 /*
768 * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP
769 * in kernel space. This essentially lets the user take over
770 * the kernel, since userspace controls RSP. It's not worth
771 * testing for canonicalness exactly -- this check detects any
772 * of the 17 high bits set, which is true for non-canonical
773 * or kernel addresses. (This will pessimize vsyscall=native.
774 * Big deal.)
775 *
776 * If virtual addresses ever become wider, this will need
777 * to be updated to remain correct on both old and new CPUs.
778 */
779 .ifne __VIRTUAL_MASK_SHIFT - 47
780 .error "virtual address width changed -- sysret checks need update"
781 .endif
782 shr $__VIRTUAL_MASK_SHIFT, %rcx
783 jnz opportunistic_sysret_failed
784
785 cmpq $__USER_CS,(CS-R11)(%rsp) /* CS must match SYSRET */
786 jne opportunistic_sysret_failed
787
788 movq (R11-ARGOFFSET)(%rsp), %r11
789 cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp) /* R11 == RFLAGS */
790 jne opportunistic_sysret_failed
791
792 testq $X86_EFLAGS_RF,%r11 /* sysret can't restore RF */
793 jnz opportunistic_sysret_failed
794
795 /* nothing to check for RSP */
796
797 cmpq $__USER_DS,(SS-ARGOFFSET)(%rsp) /* SS must match SYSRET */
798 jne opportunistic_sysret_failed
799
800 /*
801 * We win! This label is here just for ease of understanding
802 * perf profiles. Nothing jumps here.
803 */
804irq_return_via_sysret:
805 CFI_REMEMBER_STATE
806 RESTORE_ARGS 1,8,1
807 movq (RSP-RIP)(%rsp),%rsp
808 USERGS_SYSRET64
809 CFI_RESTORE_STATE
810
811opportunistic_sysret_failed:
72fe4858 812 SWAPGS
2601e64d
IM
813 jmp restore_args
814
10cd706d 815retint_restore_args: /* return to kernel space */
72fe4858 816 DISABLE_INTERRUPTS(CLBR_ANY)
2601e64d
IM
817 /*
818 * The iretq could re-enable interrupts:
819 */
820 TRACE_IRQS_IRETQ
821restore_args:
838feb47 822 RESTORE_ARGS 1,8,1
3701d863 823
f7f3d791 824irq_return:
7209a75d
AL
825 INTERRUPT_RETURN
826
827ENTRY(native_iret)
3891a04a
PA
828 /*
829 * Are we returning to a stack segment from the LDT? Note: in
830 * 64-bit mode SS:RSP on the exception stack is always valid.
831 */
34273f41 832#ifdef CONFIG_X86_ESPFIX64
3891a04a 833 testb $4,(SS-RIP)(%rsp)
7209a75d 834 jnz native_irq_return_ldt
34273f41 835#endif
3891a04a 836
af726f21 837.global native_irq_return_iret
7209a75d 838native_irq_return_iret:
b645af2d
AL
839 /*
840 * This may fault. Non-paranoid faults on return to userspace are
841 * handled by fixup_bad_iret. These include #SS, #GP, and #NP.
842 * Double-faults due to espfix64 are handled in do_double_fault.
843 * Other faults here are fatal.
844 */
1da177e4 845 iretq
3701d863 846
34273f41 847#ifdef CONFIG_X86_ESPFIX64
7209a75d 848native_irq_return_ldt:
3891a04a
PA
849 pushq_cfi %rax
850 pushq_cfi %rdi
851 SWAPGS
852 movq PER_CPU_VAR(espfix_waddr),%rdi
853 movq %rax,(0*8)(%rdi) /* RAX */
854 movq (2*8)(%rsp),%rax /* RIP */
855 movq %rax,(1*8)(%rdi)
856 movq (3*8)(%rsp),%rax /* CS */
857 movq %rax,(2*8)(%rdi)
858 movq (4*8)(%rsp),%rax /* RFLAGS */
859 movq %rax,(3*8)(%rdi)
860 movq (6*8)(%rsp),%rax /* SS */
861 movq %rax,(5*8)(%rdi)
862 movq (5*8)(%rsp),%rax /* RSP */
863 movq %rax,(4*8)(%rdi)
864 andl $0xffff0000,%eax
865 popq_cfi %rdi
866 orq PER_CPU_VAR(espfix_stack),%rax
867 SWAPGS
868 movq %rax,%rsp
869 popq_cfi %rax
7209a75d 870 jmp native_irq_return_iret
34273f41 871#endif
3891a04a 872
7effaa88 873 /* edi: workmask, edx: work */
1da177e4 874retint_careful:
7effaa88 875 CFI_RESTORE_STATE
1da177e4
LT
876 bt $TIF_NEED_RESCHED,%edx
877 jnc retint_signal
2601e64d 878 TRACE_IRQS_ON
72fe4858 879 ENABLE_INTERRUPTS(CLBR_NONE)
df5d1874 880 pushq_cfi %rdi
0430499c 881 SCHEDULE_USER
df5d1874 882 popq_cfi %rdi
1da177e4 883 GET_THREAD_INFO(%rcx)
72fe4858 884 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 885 TRACE_IRQS_OFF
1da177e4 886 jmp retint_check
0bd7b798 887
1da177e4 888retint_signal:
8f4d37ec 889 testl $_TIF_DO_NOTIFY_MASK,%edx
10ffdbb8 890 jz retint_swapgs
2601e64d 891 TRACE_IRQS_ON
72fe4858 892 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4 893 SAVE_REST
0bd7b798 894 movq $-1,ORIG_RAX(%rsp)
3829ee6b 895 xorl %esi,%esi # oldset
1da177e4
LT
896 movq %rsp,%rdi # &pt_regs
897 call do_notify_resume
898 RESTORE_REST
72fe4858 899 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 900 TRACE_IRQS_OFF
be9e6870 901 GET_THREAD_INFO(%rcx)
eca91e78 902 jmp retint_with_reschedule
1da177e4
LT
903
904#ifdef CONFIG_PREEMPT
905 /* Returning to kernel space. Check if we need preemption */
906 /* rcx: threadinfo. interrupts off. */
b06babac 907ENTRY(retint_kernel)
c2daa3be 908 cmpl $0,PER_CPU_VAR(__preempt_count)
1da177e4 909 jnz retint_restore_args
1da177e4
LT
910 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
911 jnc retint_restore_args
912 call preempt_schedule_irq
913 jmp exit_intr
0bd7b798 914#endif
1da177e4 915 CFI_ENDPROC
4b787e0b 916END(common_interrupt)
3891a04a 917
1da177e4
LT
918/*
919 * APIC interrupts.
0bd7b798 920 */
cf910e83 921.macro apicinterrupt3 num sym do_sym
322648d1 922ENTRY(\sym)
7effaa88 923 INTR_FRAME
ee4eb87b 924 ASM_CLAC
df5d1874 925 pushq_cfi $~(\num)
39e95433 926.Lcommon_\sym:
322648d1 927 interrupt \do_sym
1da177e4
LT
928 jmp ret_from_intr
929 CFI_ENDPROC
322648d1
AH
930END(\sym)
931.endm
1da177e4 932
cf910e83
SA
933#ifdef CONFIG_TRACING
934#define trace(sym) trace_##sym
935#define smp_trace(sym) smp_trace_##sym
936
937.macro trace_apicinterrupt num sym
938apicinterrupt3 \num trace(\sym) smp_trace(\sym)
939.endm
940#else
941.macro trace_apicinterrupt num sym do_sym
942.endm
943#endif
944
945.macro apicinterrupt num sym do_sym
946apicinterrupt3 \num \sym \do_sym
947trace_apicinterrupt \num \sym
948.endm
949
322648d1 950#ifdef CONFIG_SMP
cf910e83 951apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR \
322648d1 952 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
cf910e83 953apicinterrupt3 REBOOT_VECTOR \
4ef702c1 954 reboot_interrupt smp_reboot_interrupt
322648d1 955#endif
1da177e4 956
03b48632 957#ifdef CONFIG_X86_UV
cf910e83 958apicinterrupt3 UV_BAU_MESSAGE \
322648d1 959 uv_bau_message_intr1 uv_bau_message_interrupt
03b48632 960#endif
322648d1
AH
961apicinterrupt LOCAL_TIMER_VECTOR \
962 apic_timer_interrupt smp_apic_timer_interrupt
4a4de9c7
DS
963apicinterrupt X86_PLATFORM_IPI_VECTOR \
964 x86_platform_ipi smp_x86_platform_ipi
89b831ef 965
d78f2664 966#ifdef CONFIG_HAVE_KVM
cf910e83 967apicinterrupt3 POSTED_INTR_VECTOR \
d78f2664
YZ
968 kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
969#endif
970
33e5ff63 971#ifdef CONFIG_X86_MCE_THRESHOLD
322648d1 972apicinterrupt THRESHOLD_APIC_VECTOR \
7856f6cc 973 threshold_interrupt smp_threshold_interrupt
33e5ff63
SA
974#endif
975
976#ifdef CONFIG_X86_THERMAL_VECTOR
322648d1
AH
977apicinterrupt THERMAL_APIC_VECTOR \
978 thermal_interrupt smp_thermal_interrupt
33e5ff63 979#endif
1812924b 980
322648d1
AH
981#ifdef CONFIG_SMP
982apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
983 call_function_single_interrupt smp_call_function_single_interrupt
984apicinterrupt CALL_FUNCTION_VECTOR \
985 call_function_interrupt smp_call_function_interrupt
986apicinterrupt RESCHEDULE_VECTOR \
987 reschedule_interrupt smp_reschedule_interrupt
988#endif
1da177e4 989
322648d1
AH
990apicinterrupt ERROR_APIC_VECTOR \
991 error_interrupt smp_error_interrupt
992apicinterrupt SPURIOUS_APIC_VECTOR \
993 spurious_interrupt smp_spurious_interrupt
0bd7b798 994
e360adbe
PZ
995#ifdef CONFIG_IRQ_WORK
996apicinterrupt IRQ_WORK_VECTOR \
997 irq_work_interrupt smp_irq_work_interrupt
241771ef
IM
998#endif
999
1da177e4
LT
1000/*
1001 * Exception entry points.
0bd7b798 1002 */
577ed45e
AL
1003#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
1004
1005.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
322648d1 1006ENTRY(\sym)
577ed45e
AL
1007 /* Sanity check */
1008 .if \shift_ist != -1 && \paranoid == 0
1009 .error "using shift_ist requires paranoid=1"
1010 .endif
1011
cb5dd2c5
AL
1012 .if \has_error_code
1013 XCPT_FRAME
1014 .else
7effaa88 1015 INTR_FRAME
cb5dd2c5 1016 .endif
1da177e4 1017
ee4eb87b 1018 ASM_CLAC
b8b1d08b 1019 PARAVIRT_ADJUST_EXCEPTION_FRAME
cb5dd2c5
AL
1020
1021 .ifeq \has_error_code
1022 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1023 .endif
1024
b1cccb1b
JB
1025 subq $ORIG_RAX-R15, %rsp
1026 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
cb5dd2c5
AL
1027
1028 .if \paranoid
48e08d0f
AL
1029 .if \paranoid == 1
1030 CFI_REMEMBER_STATE
1031 testl $3, CS(%rsp) /* If coming from userspace, switch */
1032 jnz 1f /* stacks. */
1033 .endif
b8b1d08b 1034 call save_paranoid
cb5dd2c5
AL
1035 .else
1036 call error_entry
1037 .endif
1038
1bd24efc 1039 DEFAULT_FRAME 0
cb5dd2c5
AL
1040
1041 .if \paranoid
577ed45e
AL
1042 .if \shift_ist != -1
1043 TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */
1044 .else
b8b1d08b 1045 TRACE_IRQS_OFF
cb5dd2c5 1046 .endif
577ed45e 1047 .endif
cb5dd2c5
AL
1048
1049 movq %rsp,%rdi /* pt_regs pointer */
1050
1051 .if \has_error_code
1052 movq ORIG_RAX(%rsp),%rsi /* get error code */
1053 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
1054 .else
1055 xorl %esi,%esi /* no error code */
1056 .endif
1057
577ed45e
AL
1058 .if \shift_ist != -1
1059 subq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
1060 .endif
1061
322648d1 1062 call \do_sym
cb5dd2c5 1063
577ed45e
AL
1064 .if \shift_ist != -1
1065 addq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
1066 .endif
1067
cb5dd2c5
AL
1068 .if \paranoid
1069 jmp paranoid_exit /* %ebx: no swapgs flag */
1070 .else
1071 jmp error_exit /* %ebx: no swapgs flag */
1072 .endif
1073
48e08d0f
AL
1074 .if \paranoid == 1
1075 CFI_RESTORE_STATE
1076 /*
1077 * Paranoid entry from userspace. Switch stacks and treat it
1078 * as a normal entry. This means that paranoid handlers
1079 * run in real process context if user_mode(regs).
1080 */
10811:
1082 call error_entry
1083
1084 DEFAULT_FRAME 0
1085
1086 movq %rsp,%rdi /* pt_regs pointer */
1087 call sync_regs
1088 movq %rax,%rsp /* switch stack */
1089
1090 movq %rsp,%rdi /* pt_regs pointer */
1091
1092 .if \has_error_code
1093 movq ORIG_RAX(%rsp),%rsi /* get error code */
1094 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
1095 .else
1096 xorl %esi,%esi /* no error code */
1097 .endif
1098
1099 call \do_sym
1100
1101 jmp error_exit /* %ebx: no swapgs flag */
1102 .endif
1103
b8b1d08b 1104 CFI_ENDPROC
ddeb8f21 1105END(\sym)
322648d1 1106.endm
b8b1d08b 1107
25c74b10 1108#ifdef CONFIG_TRACING
cb5dd2c5
AL
1109.macro trace_idtentry sym do_sym has_error_code:req
1110idtentry trace(\sym) trace(\do_sym) has_error_code=\has_error_code
1111idtentry \sym \do_sym has_error_code=\has_error_code
25c74b10
SA
1112.endm
1113#else
cb5dd2c5
AL
1114.macro trace_idtentry sym do_sym has_error_code:req
1115idtentry \sym \do_sym has_error_code=\has_error_code
25c74b10
SA
1116.endm
1117#endif
1118
cb5dd2c5
AL
1119idtentry divide_error do_divide_error has_error_code=0
1120idtentry overflow do_overflow has_error_code=0
1121idtentry bounds do_bounds has_error_code=0
1122idtentry invalid_op do_invalid_op has_error_code=0
1123idtentry device_not_available do_device_not_available has_error_code=0
48e08d0f 1124idtentry double_fault do_double_fault has_error_code=1 paranoid=2
cb5dd2c5
AL
1125idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
1126idtentry invalid_TSS do_invalid_TSS has_error_code=1
1127idtentry segment_not_present do_segment_not_present has_error_code=1
1128idtentry spurious_interrupt_bug do_spurious_interrupt_bug has_error_code=0
1129idtentry coprocessor_error do_coprocessor_error has_error_code=0
1130idtentry alignment_check do_alignment_check has_error_code=1
1131idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0
5cec93c2 1132
2601e64d 1133
9f1e87ea
CG
1134 /* Reload gs selector with exception handling */
1135 /* edi: new selector */
9f9d489a 1136ENTRY(native_load_gs_index)
7effaa88 1137 CFI_STARTPROC
df5d1874 1138 pushfq_cfi
b8aa287f 1139 DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
9f1e87ea 1140 SWAPGS
0bd7b798 1141gs_change:
9f1e87ea 1142 movl %edi,%gs
1da177e4 11432: mfence /* workaround */
72fe4858 1144 SWAPGS
df5d1874 1145 popfq_cfi
9f1e87ea 1146 ret
7effaa88 1147 CFI_ENDPROC
6efdcfaf 1148END(native_load_gs_index)
0bd7b798 1149
d7abc0fa 1150 _ASM_EXTABLE(gs_change,bad_gs)
9f1e87ea 1151 .section .fixup,"ax"
1da177e4 1152 /* running with kernelgs */
0bd7b798 1153bad_gs:
72fe4858 1154 SWAPGS /* switch back to user gs */
1da177e4 1155 xorl %eax,%eax
9f1e87ea
CG
1156 movl %eax,%gs
1157 jmp 2b
1158 .previous
0bd7b798 1159
2699500b 1160/* Call softirq on interrupt stack. Interrupts are off. */
7d65f4a6 1161ENTRY(do_softirq_own_stack)
7effaa88 1162 CFI_STARTPROC
df5d1874 1163 pushq_cfi %rbp
2699500b
AK
1164 CFI_REL_OFFSET rbp,0
1165 mov %rsp,%rbp
1166 CFI_DEF_CFA_REGISTER rbp
56895530 1167 incl PER_CPU_VAR(irq_count)
26f80bd6 1168 cmove PER_CPU_VAR(irq_stack_ptr),%rsp
2699500b 1169 push %rbp # backlink for old unwinder
ed6b676c 1170 call __do_softirq
2699500b 1171 leaveq
df5d1874 1172 CFI_RESTORE rbp
7effaa88 1173 CFI_DEF_CFA_REGISTER rsp
2699500b 1174 CFI_ADJUST_CFA_OFFSET -8
56895530 1175 decl PER_CPU_VAR(irq_count)
ed6b676c 1176 ret
7effaa88 1177 CFI_ENDPROC
7d65f4a6 1178END(do_softirq_own_stack)
75154f40 1179
3d75e1b8 1180#ifdef CONFIG_XEN
cb5dd2c5 1181idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
3d75e1b8
JF
1182
1183/*
9f1e87ea
CG
1184 * A note on the "critical region" in our callback handler.
1185 * We want to avoid stacking callback handlers due to events occurring
1186 * during handling of the last event. To do this, we keep events disabled
1187 * until we've done all processing. HOWEVER, we must enable events before
1188 * popping the stack frame (can't be done atomically) and so it would still
1189 * be possible to get enough handler activations to overflow the stack.
1190 * Although unlikely, bugs of that kind are hard to track down, so we'd
1191 * like to avoid the possibility.
1192 * So, on entry to the handler we detect whether we interrupted an
1193 * existing activation in its critical region -- if so, we pop the current
1194 * activation and restart the handler using the previous one.
1195 */
3d75e1b8
JF
1196ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1197 CFI_STARTPROC
9f1e87ea
CG
1198/*
1199 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1200 * see the correct pointer to the pt_regs
1201 */
3d75e1b8
JF
1202 movq %rdi, %rsp # we don't return, adjust the stack frame
1203 CFI_ENDPROC
dcd072e2 1204 DEFAULT_FRAME
56895530 120511: incl PER_CPU_VAR(irq_count)
3d75e1b8
JF
1206 movq %rsp,%rbp
1207 CFI_DEF_CFA_REGISTER rbp
26f80bd6 1208 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
3d75e1b8
JF
1209 pushq %rbp # backlink for old unwinder
1210 call xen_evtchn_do_upcall
1211 popq %rsp
1212 CFI_DEF_CFA_REGISTER rsp
56895530 1213 decl PER_CPU_VAR(irq_count)
fdfd811d
DV
1214#ifndef CONFIG_PREEMPT
1215 call xen_maybe_preempt_hcall
1216#endif
3d75e1b8
JF
1217 jmp error_exit
1218 CFI_ENDPROC
371c394a 1219END(xen_do_hypervisor_callback)
3d75e1b8
JF
1220
1221/*
9f1e87ea
CG
1222 * Hypervisor uses this for application faults while it executes.
1223 * We get here for two reasons:
1224 * 1. Fault while reloading DS, ES, FS or GS
1225 * 2. Fault while executing IRET
1226 * Category 1 we do not need to fix up as Xen has already reloaded all segment
1227 * registers that could be reloaded and zeroed the others.
1228 * Category 2 we fix up by killing the current process. We cannot use the
1229 * normal Linux return path in this case because if we use the IRET hypercall
1230 * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1231 * We distinguish between categories by comparing each saved segment register
1232 * with its current contents: any discrepancy means we in category 1.
1233 */
3d75e1b8 1234ENTRY(xen_failsafe_callback)
dcd072e2
AH
1235 INTR_FRAME 1 (6*8)
1236 /*CFI_REL_OFFSET gs,GS*/
1237 /*CFI_REL_OFFSET fs,FS*/
1238 /*CFI_REL_OFFSET es,ES*/
1239 /*CFI_REL_OFFSET ds,DS*/
1240 CFI_REL_OFFSET r11,8
1241 CFI_REL_OFFSET rcx,0
3d75e1b8
JF
1242 movw %ds,%cx
1243 cmpw %cx,0x10(%rsp)
1244 CFI_REMEMBER_STATE
1245 jne 1f
1246 movw %es,%cx
1247 cmpw %cx,0x18(%rsp)
1248 jne 1f
1249 movw %fs,%cx
1250 cmpw %cx,0x20(%rsp)
1251 jne 1f
1252 movw %gs,%cx
1253 cmpw %cx,0x28(%rsp)
1254 jne 1f
1255 /* All segments match their saved values => Category 2 (Bad IRET). */
1256 movq (%rsp),%rcx
1257 CFI_RESTORE rcx
1258 movq 8(%rsp),%r11
1259 CFI_RESTORE r11
1260 addq $0x30,%rsp
1261 CFI_ADJUST_CFA_OFFSET -0x30
14ae22ba
IM
1262 pushq_cfi $0 /* RIP */
1263 pushq_cfi %r11
1264 pushq_cfi %rcx
4a5c3e77 1265 jmp general_protection
3d75e1b8
JF
1266 CFI_RESTORE_STATE
12671: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
1268 movq (%rsp),%rcx
1269 CFI_RESTORE rcx
1270 movq 8(%rsp),%r11
1271 CFI_RESTORE r11
1272 addq $0x30,%rsp
1273 CFI_ADJUST_CFA_OFFSET -0x30
a349e23d 1274 pushq_cfi $-1 /* orig_ax = -1 => not a system call */
3d75e1b8
JF
1275 SAVE_ALL
1276 jmp error_exit
1277 CFI_ENDPROC
3d75e1b8
JF
1278END(xen_failsafe_callback)
1279
cf910e83 1280apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
38e20b07
SY
1281 xen_hvm_callback_vector xen_evtchn_do_upcall
1282
3d75e1b8 1283#endif /* CONFIG_XEN */
ddeb8f21 1284
bc2b0331 1285#if IS_ENABLED(CONFIG_HYPERV)
cf910e83 1286apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
bc2b0331
S
1287 hyperv_callback_vector hyperv_vector_handler
1288#endif /* CONFIG_HYPERV */
1289
577ed45e
AL
1290idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
1291idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
6f442be2 1292idtentry stack_segment do_stack_segment has_error_code=1
6cac5a92 1293#ifdef CONFIG_XEN
cb5dd2c5
AL
1294idtentry xen_debug do_debug has_error_code=0
1295idtentry xen_int3 do_int3 has_error_code=0
1296idtentry xen_stack_segment do_stack_segment has_error_code=1
6cac5a92 1297#endif
cb5dd2c5
AL
1298idtentry general_protection do_general_protection has_error_code=1
1299trace_idtentry page_fault do_page_fault has_error_code=1
631bc487 1300#ifdef CONFIG_KVM_GUEST
cb5dd2c5 1301idtentry async_page_fault do_async_page_fault has_error_code=1
631bc487 1302#endif
ddeb8f21 1303#ifdef CONFIG_X86_MCE
cb5dd2c5 1304idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
ddeb8f21
AH
1305#endif
1306
1307 /*
48e08d0f
AL
1308 * "Paranoid" exit path from exception stack. This is invoked
1309 * only on return from non-NMI IST interrupts that came
1310 * from kernel space.
ddeb8f21 1311 *
48e08d0f
AL
1312 * We may be returning to very strange contexts (e.g. very early
1313 * in syscall entry), so checking for preemption here would
1314 * be complicated. Fortunately, we there's no good reason
1315 * to try to handle preemption here.
ddeb8f21
AH
1316 */
1317
1318 /* ebx: no swapgs flag */
1319ENTRY(paranoid_exit)
1f130a78 1320 DEFAULT_FRAME
ddeb8f21 1321 DISABLE_INTERRUPTS(CLBR_NONE)
5963e317 1322 TRACE_IRQS_OFF_DEBUG
ddeb8f21
AH
1323 testl %ebx,%ebx /* swapgs needed? */
1324 jnz paranoid_restore
ddeb8f21
AH
1325 TRACE_IRQS_IRETQ 0
1326 SWAPGS_UNSAFE_STACK
0300e7f1 1327 RESTORE_ALL 8
48e08d0f 1328 INTERRUPT_RETURN
ddeb8f21 1329paranoid_restore:
5963e317 1330 TRACE_IRQS_IRETQ_DEBUG 0
ddeb8f21 1331 RESTORE_ALL 8
48e08d0f 1332 INTERRUPT_RETURN
ddeb8f21
AH
1333 CFI_ENDPROC
1334END(paranoid_exit)
1335
1336/*
1337 * Exception entry point. This expects an error code/orig_rax on the stack.
1338 * returns in "no swapgs flag" in %ebx.
1339 */
1340ENTRY(error_entry)
1341 XCPT_FRAME
1342 CFI_ADJUST_CFA_OFFSET 15*8
1343 /* oldrax contains error code */
1344 cld
3bab13b0
JB
1345 movq %rdi, RDI+8(%rsp)
1346 movq %rsi, RSI+8(%rsp)
1347 movq %rdx, RDX+8(%rsp)
1348 movq %rcx, RCX+8(%rsp)
1349 movq %rax, RAX+8(%rsp)
1350 movq %r8, R8+8(%rsp)
1351 movq %r9, R9+8(%rsp)
1352 movq %r10, R10+8(%rsp)
1353 movq %r11, R11+8(%rsp)
ddeb8f21 1354 movq_cfi rbx, RBX+8
3bab13b0
JB
1355 movq %rbp, RBP+8(%rsp)
1356 movq %r12, R12+8(%rsp)
1357 movq %r13, R13+8(%rsp)
1358 movq %r14, R14+8(%rsp)
1359 movq %r15, R15+8(%rsp)
ddeb8f21
AH
1360 xorl %ebx,%ebx
1361 testl $3,CS+8(%rsp)
1362 je error_kernelspace
1363error_swapgs:
1364 SWAPGS
1365error_sti:
1366 TRACE_IRQS_OFF
1367 ret
ddeb8f21
AH
1368
1369/*
1370 * There are two places in the kernel that can potentially fault with
b645af2d
AL
1371 * usergs. Handle them here. B stepping K8s sometimes report a
1372 * truncated RIP for IRET exceptions returning to compat mode. Check
1373 * for these here too.
ddeb8f21
AH
1374 */
1375error_kernelspace:
3bab13b0 1376 CFI_REL_OFFSET rcx, RCX+8
ddeb8f21 1377 incl %ebx
7209a75d 1378 leaq native_irq_return_iret(%rip),%rcx
ddeb8f21 1379 cmpq %rcx,RIP+8(%rsp)
b645af2d 1380 je error_bad_iret
ae24ffe5
BG
1381 movl %ecx,%eax /* zero extend */
1382 cmpq %rax,RIP+8(%rsp)
1383 je bstep_iret
ddeb8f21 1384 cmpq $gs_change,RIP+8(%rsp)
9f1e87ea 1385 je error_swapgs
ddeb8f21 1386 jmp error_sti
ae24ffe5
BG
1387
1388bstep_iret:
1389 /* Fix truncated RIP */
1390 movq %rcx,RIP+8(%rsp)
b645af2d
AL
1391 /* fall through */
1392
1393error_bad_iret:
1394 SWAPGS
1395 mov %rsp,%rdi
1396 call fixup_bad_iret
1397 mov %rax,%rsp
1398 decl %ebx /* Return to usergs */
1399 jmp error_sti
e6b04b6b 1400 CFI_ENDPROC
ddeb8f21
AH
1401END(error_entry)
1402
1403
1404/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1405ENTRY(error_exit)
1406 DEFAULT_FRAME
1407 movl %ebx,%eax
1408 RESTORE_REST
1409 DISABLE_INTERRUPTS(CLBR_NONE)
1410 TRACE_IRQS_OFF
1411 GET_THREAD_INFO(%rcx)
1412 testl %eax,%eax
1413 jne retint_kernel
1414 LOCKDEP_SYS_EXIT_IRQ
1415 movl TI_flags(%rcx),%edx
1416 movl $_TIF_WORK_MASK,%edi
1417 andl %edi,%edx
1418 jnz retint_careful
1419 jmp retint_swapgs
1420 CFI_ENDPROC
1421END(error_exit)
1422
3f3c8b8c
SR
1423/*
1424 * Test if a given stack is an NMI stack or not.
1425 */
1426 .macro test_in_nmi reg stack nmi_ret normal_ret
1427 cmpq %\reg, \stack
1428 ja \normal_ret
1429 subq $EXCEPTION_STKSZ, %\reg
1430 cmpq %\reg, \stack
1431 jb \normal_ret
1432 jmp \nmi_ret
1433 .endm
ddeb8f21
AH
1434
1435 /* runs on exception stack */
1436ENTRY(nmi)
1437 INTR_FRAME
1438 PARAVIRT_ADJUST_EXCEPTION_FRAME
3f3c8b8c
SR
1439 /*
1440 * We allow breakpoints in NMIs. If a breakpoint occurs, then
1441 * the iretq it performs will take us out of NMI context.
1442 * This means that we can have nested NMIs where the next
1443 * NMI is using the top of the stack of the previous NMI. We
1444 * can't let it execute because the nested NMI will corrupt the
1445 * stack of the previous NMI. NMI handlers are not re-entrant
1446 * anyway.
1447 *
1448 * To handle this case we do the following:
1449 * Check the a special location on the stack that contains
1450 * a variable that is set when NMIs are executing.
1451 * The interrupted task's stack is also checked to see if it
1452 * is an NMI stack.
1453 * If the variable is not set and the stack is not the NMI
1454 * stack then:
1455 * o Set the special variable on the stack
1456 * o Copy the interrupt frame into a "saved" location on the stack
1457 * o Copy the interrupt frame into a "copy" location on the stack
1458 * o Continue processing the NMI
1459 * If the variable is set or the previous stack is the NMI stack:
1460 * o Modify the "copy" location to jump to the repeate_nmi
1461 * o return back to the first NMI
1462 *
1463 * Now on exit of the first NMI, we first clear the stack variable
1464 * The NMI stack will tell any nested NMIs at that point that it is
1465 * nested. Then we pop the stack normally with iret, and if there was
1466 * a nested NMI that updated the copy interrupt stack frame, a
1467 * jump will be made to the repeat_nmi code that will handle the second
1468 * NMI.
1469 */
1470
1471 /* Use %rdx as out temp variable throughout */
1472 pushq_cfi %rdx
62610913 1473 CFI_REL_OFFSET rdx, 0
3f3c8b8c 1474
45d5a168
SR
1475 /*
1476 * If %cs was not the kernel segment, then the NMI triggered in user
1477 * space, which means it is definitely not nested.
1478 */
a38449ef 1479 cmpl $__KERNEL_CS, 16(%rsp)
45d5a168
SR
1480 jne first_nmi
1481
3f3c8b8c
SR
1482 /*
1483 * Check the special variable on the stack to see if NMIs are
1484 * executing.
1485 */
a38449ef 1486 cmpl $1, -8(%rsp)
3f3c8b8c
SR
1487 je nested_nmi
1488
1489 /*
1490 * Now test if the previous stack was an NMI stack.
1491 * We need the double check. We check the NMI stack to satisfy the
1492 * race when the first NMI clears the variable before returning.
1493 * We check the variable because the first NMI could be in a
1494 * breakpoint routine using a breakpoint stack.
1495 */
1496 lea 6*8(%rsp), %rdx
1497 test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
62610913 1498 CFI_REMEMBER_STATE
3f3c8b8c
SR
1499
1500nested_nmi:
1501 /*
1502 * Do nothing if we interrupted the fixup in repeat_nmi.
1503 * It's about to repeat the NMI handler, so we are fine
1504 * with ignoring this one.
1505 */
1506 movq $repeat_nmi, %rdx
1507 cmpq 8(%rsp), %rdx
1508 ja 1f
1509 movq $end_repeat_nmi, %rdx
1510 cmpq 8(%rsp), %rdx
1511 ja nested_nmi_out
1512
15131:
1514 /* Set up the interrupted NMIs stack to jump to repeat_nmi */
28696f43 1515 leaq -1*8(%rsp), %rdx
3f3c8b8c 1516 movq %rdx, %rsp
28696f43
SQ
1517 CFI_ADJUST_CFA_OFFSET 1*8
1518 leaq -10*8(%rsp), %rdx
3f3c8b8c
SR
1519 pushq_cfi $__KERNEL_DS
1520 pushq_cfi %rdx
1521 pushfq_cfi
1522 pushq_cfi $__KERNEL_CS
1523 pushq_cfi $repeat_nmi
1524
1525 /* Put stack back */
28696f43
SQ
1526 addq $(6*8), %rsp
1527 CFI_ADJUST_CFA_OFFSET -6*8
3f3c8b8c
SR
1528
1529nested_nmi_out:
1530 popq_cfi %rdx
62610913 1531 CFI_RESTORE rdx
3f3c8b8c
SR
1532
1533 /* No need to check faults here */
1534 INTERRUPT_RETURN
1535
62610913 1536 CFI_RESTORE_STATE
3f3c8b8c
SR
1537first_nmi:
1538 /*
1539 * Because nested NMIs will use the pushed location that we
1540 * stored in rdx, we must keep that space available.
1541 * Here's what our stack frame will look like:
1542 * +-------------------------+
1543 * | original SS |
1544 * | original Return RSP |
1545 * | original RFLAGS |
1546 * | original CS |
1547 * | original RIP |
1548 * +-------------------------+
1549 * | temp storage for rdx |
1550 * +-------------------------+
1551 * | NMI executing variable |
1552 * +-------------------------+
3f3c8b8c
SR
1553 * | copied SS |
1554 * | copied Return RSP |
1555 * | copied RFLAGS |
1556 * | copied CS |
1557 * | copied RIP |
1558 * +-------------------------+
28696f43
SQ
1559 * | Saved SS |
1560 * | Saved Return RSP |
1561 * | Saved RFLAGS |
1562 * | Saved CS |
1563 * | Saved RIP |
1564 * +-------------------------+
3f3c8b8c
SR
1565 * | pt_regs |
1566 * +-------------------------+
1567 *
79fb4ad6
SR
1568 * The saved stack frame is used to fix up the copied stack frame
1569 * that a nested NMI may change to make the interrupted NMI iret jump
1570 * to the repeat_nmi. The original stack frame and the temp storage
3f3c8b8c
SR
1571 * is also used by nested NMIs and can not be trusted on exit.
1572 */
79fb4ad6 1573 /* Do not pop rdx, nested NMIs will corrupt that part of the stack */
62610913
JB
1574 movq (%rsp), %rdx
1575 CFI_RESTORE rdx
1576
3f3c8b8c
SR
1577 /* Set the NMI executing variable on the stack. */
1578 pushq_cfi $1
1579
28696f43
SQ
1580 /*
1581 * Leave room for the "copied" frame
1582 */
1583 subq $(5*8), %rsp
444723dc 1584 CFI_ADJUST_CFA_OFFSET 5*8
28696f43 1585
3f3c8b8c
SR
1586 /* Copy the stack frame to the Saved frame */
1587 .rept 5
28696f43 1588 pushq_cfi 11*8(%rsp)
3f3c8b8c 1589 .endr
62610913
JB
1590 CFI_DEF_CFA_OFFSET SS+8-RIP
1591
79fb4ad6
SR
1592 /* Everything up to here is safe from nested NMIs */
1593
62610913
JB
1594 /*
1595 * If there was a nested NMI, the first NMI's iret will return
1596 * here. But NMIs are still enabled and we can take another
1597 * nested NMI. The nested NMI checks the interrupted RIP to see
1598 * if it is between repeat_nmi and end_repeat_nmi, and if so
1599 * it will just return, as we are about to repeat an NMI anyway.
1600 * This makes it safe to copy to the stack frame that a nested
1601 * NMI will update.
1602 */
1603repeat_nmi:
1604 /*
1605 * Update the stack variable to say we are still in NMI (the update
1606 * is benign for the non-repeat case, where 1 was pushed just above
1607 * to this very stack slot).
1608 */
28696f43 1609 movq $1, 10*8(%rsp)
3f3c8b8c
SR
1610
1611 /* Make another copy, this one may be modified by nested NMIs */
28696f43
SQ
1612 addq $(10*8), %rsp
1613 CFI_ADJUST_CFA_OFFSET -10*8
3f3c8b8c 1614 .rept 5
28696f43 1615 pushq_cfi -6*8(%rsp)
3f3c8b8c 1616 .endr
28696f43 1617 subq $(5*8), %rsp
62610913
JB
1618 CFI_DEF_CFA_OFFSET SS+8-RIP
1619end_repeat_nmi:
3f3c8b8c
SR
1620
1621 /*
1622 * Everything below this point can be preempted by a nested
79fb4ad6
SR
1623 * NMI if the first NMI took an exception and reset our iret stack
1624 * so that we repeat another NMI.
3f3c8b8c 1625 */
1fd466ef 1626 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
b1cccb1b
JB
1627 subq $ORIG_RAX-R15, %rsp
1628 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1fd466ef
SR
1629 /*
1630 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
1631 * as we should not be calling schedule in NMI context.
1632 * Even with normal interrupts enabled. An NMI should not be
1633 * setting NEED_RESCHED or anything that normal interrupts and
1634 * exceptions might do.
1635 */
ddeb8f21
AH
1636 call save_paranoid
1637 DEFAULT_FRAME 0
7fbb98c5
SR
1638
1639 /*
1640 * Save off the CR2 register. If we take a page fault in the NMI then
1641 * it could corrupt the CR2 value. If the NMI preempts a page fault
1642 * handler before it was able to read the CR2 register, and then the
1643 * NMI itself takes a page fault, the page fault that was preempted
1644 * will read the information from the NMI page fault and not the
1645 * origin fault. Save it off and restore it if it changes.
1646 * Use the r12 callee-saved register.
1647 */
1648 movq %cr2, %r12
1649
ddeb8f21
AH
1650 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1651 movq %rsp,%rdi
1652 movq $-1,%rsi
1653 call do_nmi
7fbb98c5
SR
1654
1655 /* Did the NMI take a page fault? Restore cr2 if it did */
1656 movq %cr2, %rcx
1657 cmpq %rcx, %r12
1658 je 1f
1659 movq %r12, %cr2
16601:
1661
ddeb8f21
AH
1662 testl %ebx,%ebx /* swapgs needed? */
1663 jnz nmi_restore
ddeb8f21
AH
1664nmi_swapgs:
1665 SWAPGS_UNSAFE_STACK
1666nmi_restore:
444723dc
JB
1667 /* Pop the extra iret frame at once */
1668 RESTORE_ALL 6*8
28696f43 1669
3f3c8b8c 1670 /* Clear the NMI executing stack variable */
28696f43 1671 movq $0, 5*8(%rsp)
ddeb8f21 1672 jmp irq_return
9f1e87ea 1673 CFI_ENDPROC
ddeb8f21
AH
1674END(nmi)
1675
1676ENTRY(ignore_sysret)
1677 CFI_STARTPROC
1678 mov $-ENOSYS,%eax
1679 sysret
1680 CFI_ENDPROC
1681END(ignore_sysret)
1682
This page took 1.394906 seconds and 4 git commands to generate.