Linux Audio

Check our new training course

Loading...
Note: File does not exist in v5.9.
   1/*
   2 *  linux/arch/x86_64/entry.S
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 *  Copyright (C) 2000, 2001, 2002  Andi Kleen SuSE Labs
   6 *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
   7 */
   8
   9/*
  10 * entry.S contains the system-call and fault low-level handling routines.
  11 *
  12 * Some of this is documented in Documentation/x86/entry_64.txt
  13 *
  14 * NOTE: This code handles signal-recognition, which happens every time
  15 * after an interrupt and after each system call.
  16 *
  17 * Normal syscalls and interrupts don't save a full stack frame, this is
  18 * only done for syscall tracing, signals or fork/exec et.al.
  19 *
  20 * A note on terminology:
  21 * - top of stack: Architecture defined interrupt frame from SS to RIP
  22 * at the top of the kernel process stack.
  23 * - partial stack frame: partially saved registers up to R11.
  24 * - full stack frame: Like partial stack frame, but all register saved.
  25 *
  26 * Some macro usage:
  27 * - CFI macros are used to generate dwarf2 unwind information for better
  28 * backtraces. They don't change any code.
  29 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
  30 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
  31 * There are unfortunately lots of special cases where some registers
  32 * not touched. The macro is a big mess that should be cleaned up.
  33 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
  34 * Gives a full stack frame.
  35 * - ENTRY/END Define functions in the symbol table.
  36 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  37 * frame that is otherwise undefined after a SYSCALL
  38 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  39 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
  40 */
  41
  42#include <linux/linkage.h>
  43#include <asm/segment.h>
  44#include <asm/cache.h>
  45#include <asm/errno.h>
  46#include <asm/dwarf2.h>
  47#include <asm/calling.h>
  48#include <asm/asm-offsets.h>
  49#include <asm/msr.h>
  50#include <asm/unistd.h>
  51#include <asm/thread_info.h>
  52#include <asm/hw_irq.h>
  53#include <asm/page_types.h>
  54#include <asm/irqflags.h>
  55#include <asm/paravirt.h>
  56#include <asm/ftrace.h>
  57#include <asm/percpu.h>
  58#include <asm/asm.h>
  59#include <linux/err.h>
  60
  61/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
  62#include <linux/elf-em.h>
  63#define AUDIT_ARCH_X86_64	(EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
  64#define __AUDIT_ARCH_64BIT 0x80000000
  65#define __AUDIT_ARCH_LE	   0x40000000
  66
  67	.code64
  68	.section .entry.text, "ax"
  69
  70#ifdef CONFIG_FUNCTION_TRACER
  71#ifdef CONFIG_DYNAMIC_FTRACE
  72ENTRY(mcount)
  73	retq
  74END(mcount)
  75
  76ENTRY(ftrace_caller)
  77	cmpl $0, function_trace_stop
  78	jne  ftrace_stub
  79
  80	MCOUNT_SAVE_FRAME
  81
  82	movq 0x38(%rsp), %rdi
  83	movq 8(%rbp), %rsi
  84	subq $MCOUNT_INSN_SIZE, %rdi
  85
  86GLOBAL(ftrace_call)
  87	call ftrace_stub
  88
  89	MCOUNT_RESTORE_FRAME
  90
  91#ifdef CONFIG_FUNCTION_GRAPH_TRACER
  92GLOBAL(ftrace_graph_call)
  93	jmp ftrace_stub
  94#endif
  95
  96GLOBAL(ftrace_stub)
  97	retq
  98END(ftrace_caller)
  99
 100#else /* ! CONFIG_DYNAMIC_FTRACE */
 101ENTRY(mcount)
 102	cmpl $0, function_trace_stop
 103	jne  ftrace_stub
 104
 105	cmpq $ftrace_stub, ftrace_trace_function
 106	jnz trace
 107
 108#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 109	cmpq $ftrace_stub, ftrace_graph_return
 110	jnz ftrace_graph_caller
 111
 112	cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
 113	jnz ftrace_graph_caller
 114#endif
 115
 116GLOBAL(ftrace_stub)
 117	retq
 118
 119trace:
 120	MCOUNT_SAVE_FRAME
 121
 122	movq 0x38(%rsp), %rdi
 123	movq 8(%rbp), %rsi
 124	subq $MCOUNT_INSN_SIZE, %rdi
 125
 126	call   *ftrace_trace_function
 127
 128	MCOUNT_RESTORE_FRAME
 129
 130	jmp ftrace_stub
 131END(mcount)
 132#endif /* CONFIG_DYNAMIC_FTRACE */
 133#endif /* CONFIG_FUNCTION_TRACER */
 134
 135#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 136ENTRY(ftrace_graph_caller)
 137	cmpl $0, function_trace_stop
 138	jne ftrace_stub
 139
 140	MCOUNT_SAVE_FRAME
 141
 142	leaq 8(%rbp), %rdi
 143	movq 0x38(%rsp), %rsi
 144	movq (%rbp), %rdx
 145	subq $MCOUNT_INSN_SIZE, %rsi
 146
 147	call	prepare_ftrace_return
 148
 149	MCOUNT_RESTORE_FRAME
 150
 151	retq
 152END(ftrace_graph_caller)
 153
 154GLOBAL(return_to_handler)
 155	subq  $24, %rsp
 156
 157	/* Save the return values */
 158	movq %rax, (%rsp)
 159	movq %rdx, 8(%rsp)
 160	movq %rbp, %rdi
 161
 162	call ftrace_return_to_handler
 163
 164	movq %rax, %rdi
 165	movq 8(%rsp), %rdx
 166	movq (%rsp), %rax
 167	addq $24, %rsp
 168	jmp *%rdi
 169#endif
 170
 171
 172#ifndef CONFIG_PREEMPT
 173#define retint_kernel retint_restore_args
 174#endif
 175
 176#ifdef CONFIG_PARAVIRT
 177ENTRY(native_usergs_sysret64)
 178	swapgs
 179	sysretq
 180ENDPROC(native_usergs_sysret64)
 181#endif /* CONFIG_PARAVIRT */
 182
 183
 184.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
 185#ifdef CONFIG_TRACE_IRQFLAGS
 186	bt   $9,EFLAGS-\offset(%rsp)	/* interrupts off? */
 187	jnc  1f
 188	TRACE_IRQS_ON
 1891:
 190#endif
 191.endm
 192
 193/*
 194 * When dynamic function tracer is enabled it will add a breakpoint
 195 * to all locations that it is about to modify, sync CPUs, update
 196 * all the code, sync CPUs, then remove the breakpoints. In this time
 197 * if lockdep is enabled, it might jump back into the debug handler
 198 * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF).
 199 *
 200 * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to
 201 * make sure the stack pointer does not get reset back to the top
 202 * of the debug stack, and instead just reuses the current stack.
 203 */
 204#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
 205
 206.macro TRACE_IRQS_OFF_DEBUG
 207	call debug_stack_set_zero
 208	TRACE_IRQS_OFF
 209	call debug_stack_reset
 210.endm
 211
 212.macro TRACE_IRQS_ON_DEBUG
 213	call debug_stack_set_zero
 214	TRACE_IRQS_ON
 215	call debug_stack_reset
 216.endm
 217
 218.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
 219	bt   $9,EFLAGS-\offset(%rsp)	/* interrupts off? */
 220	jnc  1f
 221	TRACE_IRQS_ON_DEBUG
 2221:
 223.endm
 224
 225#else
 226# define TRACE_IRQS_OFF_DEBUG		TRACE_IRQS_OFF
 227# define TRACE_IRQS_ON_DEBUG		TRACE_IRQS_ON
 228# define TRACE_IRQS_IRETQ_DEBUG		TRACE_IRQS_IRETQ
 229#endif
 230
 231/*
 232 * C code is not supposed to know about undefined top of stack. Every time
 233 * a C function with an pt_regs argument is called from the SYSCALL based
 234 * fast path FIXUP_TOP_OF_STACK is needed.
 235 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
 236 * manipulation.
 237 */
 238
 239	/* %rsp:at FRAMEEND */
 240	.macro FIXUP_TOP_OF_STACK tmp offset=0
 241	movq PER_CPU_VAR(old_rsp),\tmp
 242	movq \tmp,RSP+\offset(%rsp)
 243	movq $__USER_DS,SS+\offset(%rsp)
 244	movq $__USER_CS,CS+\offset(%rsp)
 245	movq $-1,RCX+\offset(%rsp)
 246	movq R11+\offset(%rsp),\tmp  /* get eflags */
 247	movq \tmp,EFLAGS+\offset(%rsp)
 248	.endm
 249
 250	.macro RESTORE_TOP_OF_STACK tmp offset=0
 251	movq RSP+\offset(%rsp),\tmp
 252	movq \tmp,PER_CPU_VAR(old_rsp)
 253	movq EFLAGS+\offset(%rsp),\tmp
 254	movq \tmp,R11+\offset(%rsp)
 255	.endm
 256
 257	.macro FAKE_STACK_FRAME child_rip
 258	/* push in order ss, rsp, eflags, cs, rip */
 259	xorl %eax, %eax
 260	pushq_cfi $__KERNEL_DS /* ss */
 261	/*CFI_REL_OFFSET	ss,0*/
 262	pushq_cfi %rax /* rsp */
 263	CFI_REL_OFFSET	rsp,0
 264	pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */
 265	/*CFI_REL_OFFSET	rflags,0*/
 266	pushq_cfi $__KERNEL_CS /* cs */
 267	/*CFI_REL_OFFSET	cs,0*/
 268	pushq_cfi \child_rip /* rip */
 269	CFI_REL_OFFSET	rip,0
 270	pushq_cfi %rax /* orig rax */
 271	.endm
 272
 273	.macro UNFAKE_STACK_FRAME
 274	addq $8*6, %rsp
 275	CFI_ADJUST_CFA_OFFSET	-(6*8)
 276	.endm
 277
 278/*
 279 * initial frame state for interrupts (and exceptions without error code)
 280 */
 281	.macro EMPTY_FRAME start=1 offset=0
 282	.if \start
 283	CFI_STARTPROC simple
 284	CFI_SIGNAL_FRAME
 285	CFI_DEF_CFA rsp,8+\offset
 286	.else
 287	CFI_DEF_CFA_OFFSET 8+\offset
 288	.endif
 289	.endm
 290
 291/*
 292 * initial frame state for interrupts (and exceptions without error code)
 293 */
 294	.macro INTR_FRAME start=1 offset=0
 295	EMPTY_FRAME \start, SS+8+\offset-RIP
 296	/*CFI_REL_OFFSET ss, SS+\offset-RIP*/
 297	CFI_REL_OFFSET rsp, RSP+\offset-RIP
 298	/*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
 299	/*CFI_REL_OFFSET cs, CS+\offset-RIP*/
 300	CFI_REL_OFFSET rip, RIP+\offset-RIP
 301	.endm
 302
 303/*
 304 * initial frame state for exceptions with error code (and interrupts
 305 * with vector already pushed)
 306 */
 307	.macro XCPT_FRAME start=1 offset=0
 308	INTR_FRAME \start, RIP+\offset-ORIG_RAX
 309	/*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
 310	.endm
 311
 312/*
 313 * frame that enables calling into C.
 314 */
 315	.macro PARTIAL_FRAME start=1 offset=0
 316	XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
 317	CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
 318	CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
 319	CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
 320	CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
 321	CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
 322	CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
 323	CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
 324	CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
 325	CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
 326	.endm
 327
 328/*
 329 * frame that enables passing a complete pt_regs to a C function.
 330 */
 331	.macro DEFAULT_FRAME start=1 offset=0
 332	PARTIAL_FRAME \start, R11+\offset-R15
 333	CFI_REL_OFFSET rbx, RBX+\offset
 334	CFI_REL_OFFSET rbp, RBP+\offset
 335	CFI_REL_OFFSET r12, R12+\offset
 336	CFI_REL_OFFSET r13, R13+\offset
 337	CFI_REL_OFFSET r14, R14+\offset
 338	CFI_REL_OFFSET r15, R15+\offset
 339	.endm
 340
 341/* save partial stack frame */
 342	.macro SAVE_ARGS_IRQ
 343	cld
 344	/* start from rbp in pt_regs and jump over */
 345	movq_cfi rdi, RDI-RBP
 346	movq_cfi rsi, RSI-RBP
 347	movq_cfi rdx, RDX-RBP
 348	movq_cfi rcx, RCX-RBP
 349	movq_cfi rax, RAX-RBP
 350	movq_cfi  r8,  R8-RBP
 351	movq_cfi  r9,  R9-RBP
 352	movq_cfi r10, R10-RBP
 353	movq_cfi r11, R11-RBP
 354
 355	/* Save rbp so that we can unwind from get_irq_regs() */
 356	movq_cfi rbp, 0
 357
 358	/* Save previous stack value */
 359	movq %rsp, %rsi
 360
 361	leaq -RBP(%rsp),%rdi	/* arg1 for handler */
 362	testl $3, CS-RBP(%rsi)
 363	je 1f
 364	SWAPGS
 365	/*
 366	 * irq_count is used to check if a CPU is already on an interrupt stack
 367	 * or not. While this is essentially redundant with preempt_count it is
 368	 * a little cheaper to use a separate counter in the PDA (short of
 369	 * moving irq_enter into assembly, which would be too much work)
 370	 */
 3711:	incl PER_CPU_VAR(irq_count)
 372	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
 373	CFI_DEF_CFA_REGISTER	rsi
 374
 375	/* Store previous stack value */
 376	pushq %rsi
 377	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
 378			0x77 /* DW_OP_breg7 */, 0, \
 379			0x06 /* DW_OP_deref */, \
 380			0x08 /* DW_OP_const1u */, SS+8-RBP, \
 381			0x22 /* DW_OP_plus */
 382	/* We entered an interrupt context - irqs are off: */
 383	TRACE_IRQS_OFF
 384	.endm
 385
 386ENTRY(save_rest)
 387	PARTIAL_FRAME 1 REST_SKIP+8
 388	movq 5*8+16(%rsp), %r11	/* save return address */
 389	movq_cfi rbx, RBX+16
 390	movq_cfi rbp, RBP+16
 391	movq_cfi r12, R12+16
 392	movq_cfi r13, R13+16
 393	movq_cfi r14, R14+16
 394	movq_cfi r15, R15+16
 395	movq %r11, 8(%rsp)	/* return address */
 396	FIXUP_TOP_OF_STACK %r11, 16
 397	ret
 398	CFI_ENDPROC
 399END(save_rest)
 400
 401/* save complete stack frame */
 402	.pushsection .kprobes.text, "ax"
 403ENTRY(save_paranoid)
 404	XCPT_FRAME 1 RDI+8
 405	cld
 406	movq_cfi rdi, RDI+8
 407	movq_cfi rsi, RSI+8
 408	movq_cfi rdx, RDX+8
 409	movq_cfi rcx, RCX+8
 410	movq_cfi rax, RAX+8
 411	movq_cfi r8, R8+8
 412	movq_cfi r9, R9+8
 413	movq_cfi r10, R10+8
 414	movq_cfi r11, R11+8
 415	movq_cfi rbx, RBX+8
 416	movq_cfi rbp, RBP+8
 417	movq_cfi r12, R12+8
 418	movq_cfi r13, R13+8
 419	movq_cfi r14, R14+8
 420	movq_cfi r15, R15+8
 421	movl $1,%ebx
 422	movl $MSR_GS_BASE,%ecx
 423	rdmsr
 424	testl %edx,%edx
 425	js 1f	/* negative -> in kernel */
 426	SWAPGS
 427	xorl %ebx,%ebx
 4281:	ret
 429	CFI_ENDPROC
 430END(save_paranoid)
 431	.popsection
 432
 433/*
 434 * A newly forked process directly context switches into this address.
 435 *
 436 * rdi: prev task we switched from
 437 */
 438ENTRY(ret_from_fork)
 439	DEFAULT_FRAME
 440
 441	LOCK ; btr $TIF_FORK,TI_flags(%r8)
 442
 443	pushq_cfi kernel_eflags(%rip)
 444	popfq_cfi				# reset kernel eflags
 445
 446	call schedule_tail			# rdi: 'prev' task parameter
 447
 448	GET_THREAD_INFO(%rcx)
 449
 450	RESTORE_REST
 451
 452	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
 453	jz   retint_restore_args
 454
 455	testl $_TIF_IA32, TI_flags(%rcx)	# 32-bit compat task needs IRET
 456	jnz  int_ret_from_sys_call
 457
 458	RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
 459	jmp ret_from_sys_call			# go to the SYSRET fastpath
 460
 461	CFI_ENDPROC
 462END(ret_from_fork)
 463
 464/*
 465 * System call entry. Up to 6 arguments in registers are supported.
 466 *
 467 * SYSCALL does not save anything on the stack and does not change the
 468 * stack pointer.
 469 */
 470
 471/*
 472 * Register setup:
 473 * rax  system call number
 474 * rdi  arg0
 475 * rcx  return address for syscall/sysret, C arg3
 476 * rsi  arg1
 477 * rdx  arg2
 478 * r10  arg3 	(--> moved to rcx for C)
 479 * r8   arg4
 480 * r9   arg5
 481 * r11  eflags for syscall/sysret, temporary for C
 482 * r12-r15,rbp,rbx saved by C code, not touched.
 483 *
 484 * Interrupts are off on entry.
 485 * Only called from user space.
 486 *
 487 * XXX	if we had a free scratch register we could save the RSP into the stack frame
 488 *      and report it properly in ps. Unfortunately we haven't.
 489 *
 490 * When user can change the frames always force IRET. That is because
 491 * it deals with uncanonical addresses better. SYSRET has trouble
 492 * with them due to bugs in both AMD and Intel CPUs.
 493 */
 494
 495ENTRY(system_call)
 496	CFI_STARTPROC	simple
 497	CFI_SIGNAL_FRAME
 498	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
 499	CFI_REGISTER	rip,rcx
 500	/*CFI_REGISTER	rflags,r11*/
 501	SWAPGS_UNSAFE_STACK
 502	/*
 503	 * A hypervisor implementation might want to use a label
 504	 * after the swapgs, so that it can do the swapgs
 505	 * for the guest and jump here on syscall.
 506	 */
 507GLOBAL(system_call_after_swapgs)
 508
 509	movq	%rsp,PER_CPU_VAR(old_rsp)
 510	movq	PER_CPU_VAR(kernel_stack),%rsp
 511	/*
 512	 * No need to follow this irqs off/on section - it's straight
 513	 * and short:
 514	 */
 515	ENABLE_INTERRUPTS(CLBR_NONE)
 516	SAVE_ARGS 8,0
 517	movq  %rax,ORIG_RAX-ARGOFFSET(%rsp)
 518	movq  %rcx,RIP-ARGOFFSET(%rsp)
 519	CFI_REL_OFFSET rip,RIP-ARGOFFSET
 520	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 521	jnz tracesys
 522system_call_fastpath:
 523#if __SYSCALL_MASK == ~0
 524	cmpq $__NR_syscall_max,%rax
 525#else
 526	andl $__SYSCALL_MASK,%eax
 527	cmpl $__NR_syscall_max,%eax
 528#endif
 529	ja badsys
 530	movq %r10,%rcx
 531	call *sys_call_table(,%rax,8)  # XXX:	 rip relative
 532	movq %rax,RAX-ARGOFFSET(%rsp)
 533/*
 534 * Syscall return path ending with SYSRET (fast path)
 535 * Has incomplete stack frame and undefined top of stack.
 536 */
 537ret_from_sys_call:
 538	movl $_TIF_ALLWORK_MASK,%edi
 539	/* edi:	flagmask */
 540sysret_check:
 541	LOCKDEP_SYS_EXIT
 542	DISABLE_INTERRUPTS(CLBR_NONE)
 543	TRACE_IRQS_OFF
 544	movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
 545	andl %edi,%edx
 546	jnz  sysret_careful
 547	CFI_REMEMBER_STATE
 548	/*
 549	 * sysretq will re-enable interrupts:
 550	 */
 551	TRACE_IRQS_ON
 552	movq RIP-ARGOFFSET(%rsp),%rcx
 553	CFI_REGISTER	rip,rcx
 554	RESTORE_ARGS 1,-ARG_SKIP,0
 555	/*CFI_REGISTER	rflags,r11*/
 556	movq	PER_CPU_VAR(old_rsp), %rsp
 557	USERGS_SYSRET64
 558
 559	CFI_RESTORE_STATE
 560	/* Handle reschedules */
 561	/* edx:	work, edi: workmask */
 562sysret_careful:
 563	bt $TIF_NEED_RESCHED,%edx
 564	jnc sysret_signal
 565	TRACE_IRQS_ON
 566	ENABLE_INTERRUPTS(CLBR_NONE)
 567	pushq_cfi %rdi
 568	call schedule
 569	popq_cfi %rdi
 570	jmp sysret_check
 571
 572	/* Handle a signal */
 573sysret_signal:
 574	TRACE_IRQS_ON
 575	ENABLE_INTERRUPTS(CLBR_NONE)
 576#ifdef CONFIG_AUDITSYSCALL
 577	bt $TIF_SYSCALL_AUDIT,%edx
 578	jc sysret_audit
 579#endif
 580	/*
 581	 * We have a signal, or exit tracing or single-step.
 582	 * These all wind up with the iret return path anyway,
 583	 * so just join that path right now.
 584	 */
 585	FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
 586	jmp int_check_syscall_exit_work
 587
 588badsys:
 589	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
 590	jmp ret_from_sys_call
 591
 592#ifdef CONFIG_AUDITSYSCALL
 593	/*
 594	 * Fast path for syscall audit without full syscall trace.
 595	 * We just call __audit_syscall_entry() directly, and then
 596	 * jump back to the normal fast path.
 597	 */
 598auditsys:
 599	movq %r10,%r9			/* 6th arg: 4th syscall arg */
 600	movq %rdx,%r8			/* 5th arg: 3rd syscall arg */
 601	movq %rsi,%rcx			/* 4th arg: 2nd syscall arg */
 602	movq %rdi,%rdx			/* 3rd arg: 1st syscall arg */
 603	movq %rax,%rsi			/* 2nd arg: syscall number */
 604	movl $AUDIT_ARCH_X86_64,%edi	/* 1st arg: audit arch */
 605	call __audit_syscall_entry
 606	LOAD_ARGS 0		/* reload call-clobbered registers */
 607	jmp system_call_fastpath
 608
 609	/*
 610	 * Return fast path for syscall audit.  Call __audit_syscall_exit()
 611	 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
 612	 * masked off.
 613	 */
 614sysret_audit:
 615	movq RAX-ARGOFFSET(%rsp),%rsi	/* second arg, syscall return value */
 616	cmpq $-MAX_ERRNO,%rsi	/* is it < -MAX_ERRNO? */
 617	setbe %al		/* 1 if so, 0 if not */
 618	movzbl %al,%edi		/* zero-extend that into %edi */
 619	call __audit_syscall_exit
 620	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
 621	jmp sysret_check
 622#endif	/* CONFIG_AUDITSYSCALL */
 623
 624	/* Do syscall tracing */
 625tracesys:
 626#ifdef CONFIG_AUDITSYSCALL
 627	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
 628	jz auditsys
 629#endif
 630	SAVE_REST
 631	movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
 632	FIXUP_TOP_OF_STACK %rdi
 633	movq %rsp,%rdi
 634	call syscall_trace_enter
 635	/*
 636	 * Reload arg registers from stack in case ptrace changed them.
 637	 * We don't reload %rax because syscall_trace_enter() returned
 638	 * the value it wants us to use in the table lookup.
 639	 */
 640	LOAD_ARGS ARGOFFSET, 1
 641	RESTORE_REST
 642#if __SYSCALL_MASK == ~0
 643	cmpq $__NR_syscall_max,%rax
 644#else
 645	andl $__SYSCALL_MASK,%eax
 646	cmpl $__NR_syscall_max,%eax
 647#endif
 648	ja   int_ret_from_sys_call	/* RAX(%rsp) set to -ENOSYS above */
 649	movq %r10,%rcx	/* fixup for C */
 650	call *sys_call_table(,%rax,8)
 651	movq %rax,RAX-ARGOFFSET(%rsp)
 652	/* Use IRET because user could have changed frame */
 653
 654/*
 655 * Syscall return path ending with IRET.
 656 * Has correct top of stack, but partial stack frame.
 657 */
 658GLOBAL(int_ret_from_sys_call)
 659	DISABLE_INTERRUPTS(CLBR_NONE)
 660	TRACE_IRQS_OFF
 661	movl $_TIF_ALLWORK_MASK,%edi
 662	/* edi:	mask to check */
 663GLOBAL(int_with_check)
 664	LOCKDEP_SYS_EXIT_IRQ
 665	GET_THREAD_INFO(%rcx)
 666	movl TI_flags(%rcx),%edx
 667	andl %edi,%edx
 668	jnz   int_careful
 669	andl    $~TS_COMPAT,TI_status(%rcx)
 670	jmp   retint_swapgs
 671
 672	/* Either reschedule or signal or syscall exit tracking needed. */
 673	/* First do a reschedule test. */
 674	/* edx:	work, edi: workmask */
 675int_careful:
 676	bt $TIF_NEED_RESCHED,%edx
 677	jnc  int_very_careful
 678	TRACE_IRQS_ON
 679	ENABLE_INTERRUPTS(CLBR_NONE)
 680	pushq_cfi %rdi
 681	call schedule
 682	popq_cfi %rdi
 683	DISABLE_INTERRUPTS(CLBR_NONE)
 684	TRACE_IRQS_OFF
 685	jmp int_with_check
 686
 687	/* handle signals and tracing -- both require a full stack frame */
 688int_very_careful:
 689	TRACE_IRQS_ON
 690	ENABLE_INTERRUPTS(CLBR_NONE)
 691int_check_syscall_exit_work:
 692	SAVE_REST
 693	/* Check for syscall exit trace */
 694	testl $_TIF_WORK_SYSCALL_EXIT,%edx
 695	jz int_signal
 696	pushq_cfi %rdi
 697	leaq 8(%rsp),%rdi	# &ptregs -> arg1
 698	call syscall_trace_leave
 699	popq_cfi %rdi
 700	andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
 701	jmp int_restore_rest
 702
 703int_signal:
 704	testl $_TIF_DO_NOTIFY_MASK,%edx
 705	jz 1f
 706	movq %rsp,%rdi		# &ptregs -> arg1
 707	xorl %esi,%esi		# oldset -> arg2
 708	call do_notify_resume
 7091:	movl $_TIF_WORK_MASK,%edi
 710int_restore_rest:
 711	RESTORE_REST
 712	DISABLE_INTERRUPTS(CLBR_NONE)
 713	TRACE_IRQS_OFF
 714	jmp int_with_check
 715	CFI_ENDPROC
 716END(system_call)
 717
 718/*
 719 * Certain special system calls that need to save a complete full stack frame.
 720 */
 721	.macro PTREGSCALL label,func,arg
 722ENTRY(\label)
 723	PARTIAL_FRAME 1 8		/* offset 8: return address */
 724	subq $REST_SKIP, %rsp
 725	CFI_ADJUST_CFA_OFFSET REST_SKIP
 726	call save_rest
 727	DEFAULT_FRAME 0 8		/* offset 8: return address */
 728	leaq 8(%rsp), \arg	/* pt_regs pointer */
 729	call \func
 730	jmp ptregscall_common
 731	CFI_ENDPROC
 732END(\label)
 733	.endm
 734
 735	PTREGSCALL stub_clone, sys_clone, %r8
 736	PTREGSCALL stub_fork, sys_fork, %rdi
 737	PTREGSCALL stub_vfork, sys_vfork, %rdi
 738	PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
 739	PTREGSCALL stub_iopl, sys_iopl, %rsi
 740
 741ENTRY(ptregscall_common)
 742	DEFAULT_FRAME 1 8	/* offset 8: return address */
 743	RESTORE_TOP_OF_STACK %r11, 8
 744	movq_cfi_restore R15+8, r15
 745	movq_cfi_restore R14+8, r14
 746	movq_cfi_restore R13+8, r13
 747	movq_cfi_restore R12+8, r12
 748	movq_cfi_restore RBP+8, rbp
 749	movq_cfi_restore RBX+8, rbx
 750	ret $REST_SKIP		/* pop extended registers */
 751	CFI_ENDPROC
 752END(ptregscall_common)
 753
 754ENTRY(stub_execve)
 755	CFI_STARTPROC
 756	addq $8, %rsp
 757	PARTIAL_FRAME 0
 758	SAVE_REST
 759	FIXUP_TOP_OF_STACK %r11
 760	movq %rsp, %rcx
 761	call sys_execve
 762	RESTORE_TOP_OF_STACK %r11
 763	movq %rax,RAX(%rsp)
 764	RESTORE_REST
 765	jmp int_ret_from_sys_call
 766	CFI_ENDPROC
 767END(stub_execve)
 768
 769/*
 770 * sigreturn is special because it needs to restore all registers on return.
 771 * This cannot be done with SYSRET, so use the IRET return path instead.
 772 */
 773ENTRY(stub_rt_sigreturn)
 774	CFI_STARTPROC
 775	addq $8, %rsp
 776	PARTIAL_FRAME 0
 777	SAVE_REST
 778	movq %rsp,%rdi
 779	FIXUP_TOP_OF_STACK %r11
 780	call sys_rt_sigreturn
 781	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
 782	RESTORE_REST
 783	jmp int_ret_from_sys_call
 784	CFI_ENDPROC
 785END(stub_rt_sigreturn)
 786
 787#ifdef CONFIG_X86_X32_ABI
 788	PTREGSCALL stub_x32_sigaltstack, sys32_sigaltstack, %rdx
 789
 790ENTRY(stub_x32_rt_sigreturn)
 791	CFI_STARTPROC
 792	addq $8, %rsp
 793	PARTIAL_FRAME 0
 794	SAVE_REST
 795	movq %rsp,%rdi
 796	FIXUP_TOP_OF_STACK %r11
 797	call sys32_x32_rt_sigreturn
 798	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
 799	RESTORE_REST
 800	jmp int_ret_from_sys_call
 801	CFI_ENDPROC
 802END(stub_x32_rt_sigreturn)
 803
 804ENTRY(stub_x32_execve)
 805	CFI_STARTPROC
 806	addq $8, %rsp
 807	PARTIAL_FRAME 0
 808	SAVE_REST
 809	FIXUP_TOP_OF_STACK %r11
 810	movq %rsp, %rcx
 811	call sys32_execve
 812	RESTORE_TOP_OF_STACK %r11
 813	movq %rax,RAX(%rsp)
 814	RESTORE_REST
 815	jmp int_ret_from_sys_call
 816	CFI_ENDPROC
 817END(stub_x32_execve)
 818
 819#endif
 820
 821/*
 822 * Build the entry stubs and pointer table with some assembler magic.
 823 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
 824 * single cache line on all modern x86 implementations.
 825 */
 826	.section .init.rodata,"a"
 827ENTRY(interrupt)
 828	.section .entry.text
 829	.p2align 5
 830	.p2align CONFIG_X86_L1_CACHE_SHIFT
 831ENTRY(irq_entries_start)
 832	INTR_FRAME
 833vector=FIRST_EXTERNAL_VECTOR
 834.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
 835	.balign 32
 836  .rept	7
 837    .if vector < NR_VECTORS
 838      .if vector <> FIRST_EXTERNAL_VECTOR
 839	CFI_ADJUST_CFA_OFFSET -8
 840      .endif
 8411:	pushq_cfi $(~vector+0x80)	/* Note: always in signed byte range */
 842      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
 843	jmp 2f
 844      .endif
 845      .previous
 846	.quad 1b
 847      .section .entry.text
 848vector=vector+1
 849    .endif
 850  .endr
 8512:	jmp common_interrupt
 852.endr
 853	CFI_ENDPROC
 854END(irq_entries_start)
 855
 856.previous
 857END(interrupt)
 858.previous
 859
 860/*
 861 * Interrupt entry/exit.
 862 *
 863 * Interrupt entry points save only callee clobbered registers in fast path.
 864 *
 865 * Entry runs with interrupts off.
 866 */
 867
 868/* 0(%rsp): ~(interrupt number) */
 869	.macro interrupt func
 870	/* reserve pt_regs for scratch regs and rbp */
 871	subq $ORIG_RAX-RBP, %rsp
 872	CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
 873	SAVE_ARGS_IRQ
 874	call \func
 875	.endm
 876
 877/*
 878 * Interrupt entry/exit should be protected against kprobes
 879 */
 880	.pushsection .kprobes.text, "ax"
 881	/*
 882	 * The interrupt stubs push (~vector+0x80) onto the stack and
 883	 * then jump to common_interrupt.
 884	 */
 885	.p2align CONFIG_X86_L1_CACHE_SHIFT
 886common_interrupt:
 887	XCPT_FRAME
 888	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
 889	interrupt do_IRQ
 890	/* 0(%rsp): old_rsp-ARGOFFSET */
 891ret_from_intr:
 892	DISABLE_INTERRUPTS(CLBR_NONE)
 893	TRACE_IRQS_OFF
 894	decl PER_CPU_VAR(irq_count)
 895
 896	/* Restore saved previous stack */
 897	popq %rsi
 898	CFI_DEF_CFA rsi,SS+8-RBP	/* reg/off reset after def_cfa_expr */
 899	leaq ARGOFFSET-RBP(%rsi), %rsp
 900	CFI_DEF_CFA_REGISTER	rsp
 901	CFI_ADJUST_CFA_OFFSET	RBP-ARGOFFSET
 902
 903exit_intr:
 904	GET_THREAD_INFO(%rcx)
 905	testl $3,CS-ARGOFFSET(%rsp)
 906	je retint_kernel
 907
 908	/* Interrupt came from user space */
 909	/*
 910	 * Has a correct top of stack, but a partial stack frame
 911	 * %rcx: thread info. Interrupts off.
 912	 */
 913retint_with_reschedule:
 914	movl $_TIF_WORK_MASK,%edi
 915retint_check:
 916	LOCKDEP_SYS_EXIT_IRQ
 917	movl TI_flags(%rcx),%edx
 918	andl %edi,%edx
 919	CFI_REMEMBER_STATE
 920	jnz  retint_careful
 921
 922retint_swapgs:		/* return to user-space */
 923	/*
 924	 * The iretq could re-enable interrupts:
 925	 */
 926	DISABLE_INTERRUPTS(CLBR_ANY)
 927	TRACE_IRQS_IRETQ
 928	SWAPGS
 929	jmp restore_args
 930
 931retint_restore_args:	/* return to kernel space */
 932	DISABLE_INTERRUPTS(CLBR_ANY)
 933	/*
 934	 * The iretq could re-enable interrupts:
 935	 */
 936	TRACE_IRQS_IRETQ
 937restore_args:
 938	RESTORE_ARGS 1,8,1
 939
 940irq_return:
 941	INTERRUPT_RETURN
 942	_ASM_EXTABLE(irq_return, bad_iret)
 943
 944#ifdef CONFIG_PARAVIRT
 945ENTRY(native_iret)
 946	iretq
 947	_ASM_EXTABLE(native_iret, bad_iret)
 948#endif
 949
 950	.section .fixup,"ax"
 951bad_iret:
 952	/*
 953	 * The iret traps when the %cs or %ss being restored is bogus.
 954	 * We've lost the original trap vector and error code.
 955	 * #GPF is the most likely one to get for an invalid selector.
 956	 * So pretend we completed the iret and took the #GPF in user mode.
 957	 *
 958	 * We are now running with the kernel GS after exception recovery.
 959	 * But error_entry expects us to have user GS to match the user %cs,
 960	 * so swap back.
 961	 */
 962	pushq $0
 963
 964	SWAPGS
 965	jmp general_protection
 966
 967	.previous
 968
 969	/* edi: workmask, edx: work */
 970retint_careful:
 971	CFI_RESTORE_STATE
 972	bt    $TIF_NEED_RESCHED,%edx
 973	jnc   retint_signal
 974	TRACE_IRQS_ON
 975	ENABLE_INTERRUPTS(CLBR_NONE)
 976	pushq_cfi %rdi
 977	call  schedule
 978	popq_cfi %rdi
 979	GET_THREAD_INFO(%rcx)
 980	DISABLE_INTERRUPTS(CLBR_NONE)
 981	TRACE_IRQS_OFF
 982	jmp retint_check
 983
 984retint_signal:
 985	testl $_TIF_DO_NOTIFY_MASK,%edx
 986	jz    retint_swapgs
 987	TRACE_IRQS_ON
 988	ENABLE_INTERRUPTS(CLBR_NONE)
 989	SAVE_REST
 990	movq $-1,ORIG_RAX(%rsp)
 991	xorl %esi,%esi		# oldset
 992	movq %rsp,%rdi		# &pt_regs
 993	call do_notify_resume
 994	RESTORE_REST
 995	DISABLE_INTERRUPTS(CLBR_NONE)
 996	TRACE_IRQS_OFF
 997	GET_THREAD_INFO(%rcx)
 998	jmp retint_with_reschedule
 999
1000#ifdef CONFIG_PREEMPT
1001	/* Returning to kernel space. Check if we need preemption */
1002	/* rcx:	 threadinfo. interrupts off. */
1003ENTRY(retint_kernel)
1004	cmpl $0,TI_preempt_count(%rcx)
1005	jnz  retint_restore_args
1006	bt  $TIF_NEED_RESCHED,TI_flags(%rcx)
1007	jnc  retint_restore_args
1008	bt   $9,EFLAGS-ARGOFFSET(%rsp)	/* interrupts off? */
1009	jnc  retint_restore_args
1010	call preempt_schedule_irq
1011	jmp exit_intr
1012#endif
1013
1014	CFI_ENDPROC
1015END(common_interrupt)
1016/*
1017 * End of kprobes section
1018 */
1019       .popsection
1020
1021/*
1022 * APIC interrupts.
1023 */
1024.macro apicinterrupt num sym do_sym
1025ENTRY(\sym)
1026	INTR_FRAME
1027	pushq_cfi $~(\num)
1028.Lcommon_\sym:
1029	interrupt \do_sym
1030	jmp ret_from_intr
1031	CFI_ENDPROC
1032END(\sym)
1033.endm
1034
1035#ifdef CONFIG_SMP
1036apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
1037	irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
1038apicinterrupt REBOOT_VECTOR \
1039	reboot_interrupt smp_reboot_interrupt
1040#endif
1041
1042#ifdef CONFIG_X86_UV
1043apicinterrupt UV_BAU_MESSAGE \
1044	uv_bau_message_intr1 uv_bau_message_interrupt
1045#endif
1046apicinterrupt LOCAL_TIMER_VECTOR \
1047	apic_timer_interrupt smp_apic_timer_interrupt
1048apicinterrupt X86_PLATFORM_IPI_VECTOR \
1049	x86_platform_ipi smp_x86_platform_ipi
1050
1051#ifdef CONFIG_SMP
1052	ALIGN
1053	INTR_FRAME
1054.irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
1055	16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
1056.if NUM_INVALIDATE_TLB_VECTORS > \idx
1057ENTRY(invalidate_interrupt\idx)
1058	pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx)
1059	jmp .Lcommon_invalidate_interrupt0
1060	CFI_ADJUST_CFA_OFFSET -8
1061END(invalidate_interrupt\idx)
1062.endif
1063.endr
1064	CFI_ENDPROC
1065apicinterrupt INVALIDATE_TLB_VECTOR_START, \
1066	invalidate_interrupt0, smp_invalidate_interrupt
1067#endif
1068
1069apicinterrupt THRESHOLD_APIC_VECTOR \
1070	threshold_interrupt smp_threshold_interrupt
1071apicinterrupt THERMAL_APIC_VECTOR \
1072	thermal_interrupt smp_thermal_interrupt
1073
1074#ifdef CONFIG_SMP
1075apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
1076	call_function_single_interrupt smp_call_function_single_interrupt
1077apicinterrupt CALL_FUNCTION_VECTOR \
1078	call_function_interrupt smp_call_function_interrupt
1079apicinterrupt RESCHEDULE_VECTOR \
1080	reschedule_interrupt smp_reschedule_interrupt
1081#endif
1082
1083apicinterrupt ERROR_APIC_VECTOR \
1084	error_interrupt smp_error_interrupt
1085apicinterrupt SPURIOUS_APIC_VECTOR \
1086	spurious_interrupt smp_spurious_interrupt
1087
1088#ifdef CONFIG_IRQ_WORK
1089apicinterrupt IRQ_WORK_VECTOR \
1090	irq_work_interrupt smp_irq_work_interrupt
1091#endif
1092
1093/*
1094 * Exception entry points.
1095 */
1096.macro zeroentry sym do_sym
1097ENTRY(\sym)
1098	INTR_FRAME
1099	PARAVIRT_ADJUST_EXCEPTION_FRAME
1100	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
1101	subq $ORIG_RAX-R15, %rsp
1102	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1103	call error_entry
1104	DEFAULT_FRAME 0
1105	movq %rsp,%rdi		/* pt_regs pointer */
1106	xorl %esi,%esi		/* no error code */
1107	call \do_sym
1108	jmp error_exit		/* %ebx: no swapgs flag */
1109	CFI_ENDPROC
1110END(\sym)
1111.endm
1112
1113.macro paranoidzeroentry sym do_sym
1114ENTRY(\sym)
1115	INTR_FRAME
1116	PARAVIRT_ADJUST_EXCEPTION_FRAME
1117	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
1118	subq $ORIG_RAX-R15, %rsp
1119	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1120	call save_paranoid
1121	TRACE_IRQS_OFF
1122	movq %rsp,%rdi		/* pt_regs pointer */
1123	xorl %esi,%esi		/* no error code */
1124	call \do_sym
1125	jmp paranoid_exit	/* %ebx: no swapgs flag */
1126	CFI_ENDPROC
1127END(\sym)
1128.endm
1129
1130#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
1131.macro paranoidzeroentry_ist sym do_sym ist
1132ENTRY(\sym)
1133	INTR_FRAME
1134	PARAVIRT_ADJUST_EXCEPTION_FRAME
1135	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
1136	subq $ORIG_RAX-R15, %rsp
1137	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1138	call save_paranoid
1139	TRACE_IRQS_OFF_DEBUG
1140	movq %rsp,%rdi		/* pt_regs pointer */
1141	xorl %esi,%esi		/* no error code */
1142	subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
1143	call \do_sym
1144	addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
1145	jmp paranoid_exit	/* %ebx: no swapgs flag */
1146	CFI_ENDPROC
1147END(\sym)
1148.endm
1149
1150.macro errorentry sym do_sym
1151ENTRY(\sym)
1152	XCPT_FRAME
1153	PARAVIRT_ADJUST_EXCEPTION_FRAME
1154	subq $ORIG_RAX-R15, %rsp
1155	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1156	call error_entry
1157	DEFAULT_FRAME 0
1158	movq %rsp,%rdi			/* pt_regs pointer */
1159	movq ORIG_RAX(%rsp),%rsi	/* get error code */
1160	movq $-1,ORIG_RAX(%rsp)		/* no syscall to restart */
1161	call \do_sym
1162	jmp error_exit			/* %ebx: no swapgs flag */
1163	CFI_ENDPROC
1164END(\sym)
1165.endm
1166
1167	/* error code is on the stack already */
1168.macro paranoiderrorentry sym do_sym
1169ENTRY(\sym)
1170	XCPT_FRAME
1171	PARAVIRT_ADJUST_EXCEPTION_FRAME
1172	subq $ORIG_RAX-R15, %rsp
1173	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1174	call save_paranoid
1175	DEFAULT_FRAME 0
1176	TRACE_IRQS_OFF
1177	movq %rsp,%rdi			/* pt_regs pointer */
1178	movq ORIG_RAX(%rsp),%rsi	/* get error code */
1179	movq $-1,ORIG_RAX(%rsp)		/* no syscall to restart */
1180	call \do_sym
1181	jmp paranoid_exit		/* %ebx: no swapgs flag */
1182	CFI_ENDPROC
1183END(\sym)
1184.endm
1185
1186zeroentry divide_error do_divide_error
1187zeroentry overflow do_overflow
1188zeroentry bounds do_bounds
1189zeroentry invalid_op do_invalid_op
1190zeroentry device_not_available do_device_not_available
1191paranoiderrorentry double_fault do_double_fault
1192zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
1193errorentry invalid_TSS do_invalid_TSS
1194errorentry segment_not_present do_segment_not_present
1195zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
1196zeroentry coprocessor_error do_coprocessor_error
1197errorentry alignment_check do_alignment_check
1198zeroentry simd_coprocessor_error do_simd_coprocessor_error
1199
1200
1201	/* Reload gs selector with exception handling */
1202	/* edi:  new selector */
1203ENTRY(native_load_gs_index)
1204	CFI_STARTPROC
1205	pushfq_cfi
1206	DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
1207	SWAPGS
1208gs_change:
1209	movl %edi,%gs
12102:	mfence		/* workaround */
1211	SWAPGS
1212	popfq_cfi
1213	ret
1214	CFI_ENDPROC
1215END(native_load_gs_index)
1216
1217	_ASM_EXTABLE(gs_change,bad_gs)
1218	.section .fixup,"ax"
1219	/* running with kernelgs */
1220bad_gs:
1221	SWAPGS			/* switch back to user gs */
1222	xorl %eax,%eax
1223	movl %eax,%gs
1224	jmp  2b
1225	.previous
1226
1227ENTRY(kernel_thread_helper)
1228	pushq $0		# fake return address
1229	CFI_STARTPROC
1230	/*
1231	 * Here we are in the child and the registers are set as they were
1232	 * at kernel_thread() invocation in the parent.
1233	 */
1234	call *%rsi
1235	# exit
1236	mov %eax, %edi
1237	call do_exit
1238	ud2			# padding for call trace
1239	CFI_ENDPROC
1240END(kernel_thread_helper)
1241
1242/*
1243 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1244 *
1245 * C extern interface:
1246 *	 extern long execve(const char *name, char **argv, char **envp)
1247 *
1248 * asm input arguments:
1249 *	rdi: name, rsi: argv, rdx: envp
1250 *
1251 * We want to fallback into:
1252 *	extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs)
1253 *
1254 * do_sys_execve asm fallback arguments:
1255 *	rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
1256 */
1257ENTRY(kernel_execve)
1258	CFI_STARTPROC
1259	FAKE_STACK_FRAME $0
1260	SAVE_ALL
1261	movq %rsp,%rcx
1262	call sys_execve
1263	movq %rax, RAX(%rsp)
1264	RESTORE_REST
1265	testq %rax,%rax
1266	je int_ret_from_sys_call
1267	RESTORE_ARGS
1268	UNFAKE_STACK_FRAME
1269	ret
1270	CFI_ENDPROC
1271END(kernel_execve)
1272
1273/* Call softirq on interrupt stack. Interrupts are off. */
1274ENTRY(call_softirq)
1275	CFI_STARTPROC
1276	pushq_cfi %rbp
1277	CFI_REL_OFFSET rbp,0
1278	mov  %rsp,%rbp
1279	CFI_DEF_CFA_REGISTER rbp
1280	incl PER_CPU_VAR(irq_count)
1281	cmove PER_CPU_VAR(irq_stack_ptr),%rsp
1282	push  %rbp			# backlink for old unwinder
1283	call __do_softirq
1284	leaveq
1285	CFI_RESTORE		rbp
1286	CFI_DEF_CFA_REGISTER	rsp
1287	CFI_ADJUST_CFA_OFFSET   -8
1288	decl PER_CPU_VAR(irq_count)
1289	ret
1290	CFI_ENDPROC
1291END(call_softirq)
1292
1293#ifdef CONFIG_XEN
1294zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
1295
1296/*
1297 * A note on the "critical region" in our callback handler.
1298 * We want to avoid stacking callback handlers due to events occurring
1299 * during handling of the last event. To do this, we keep events disabled
1300 * until we've done all processing. HOWEVER, we must enable events before
1301 * popping the stack frame (can't be done atomically) and so it would still
1302 * be possible to get enough handler activations to overflow the stack.
1303 * Although unlikely, bugs of that kind are hard to track down, so we'd
1304 * like to avoid the possibility.
1305 * So, on entry to the handler we detect whether we interrupted an
1306 * existing activation in its critical region -- if so, we pop the current
1307 * activation and restart the handler using the previous one.
1308 */
1309ENTRY(xen_do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
1310	CFI_STARTPROC
1311/*
1312 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1313 * see the correct pointer to the pt_regs
1314 */
1315	movq %rdi, %rsp            # we don't return, adjust the stack frame
1316	CFI_ENDPROC
1317	DEFAULT_FRAME
131811:	incl PER_CPU_VAR(irq_count)
1319	movq %rsp,%rbp
1320	CFI_DEF_CFA_REGISTER rbp
1321	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
1322	pushq %rbp			# backlink for old unwinder
1323	call xen_evtchn_do_upcall
1324	popq %rsp
1325	CFI_DEF_CFA_REGISTER rsp
1326	decl PER_CPU_VAR(irq_count)
1327	jmp  error_exit
1328	CFI_ENDPROC
1329END(xen_do_hypervisor_callback)
1330
1331/*
1332 * Hypervisor uses this for application faults while it executes.
1333 * We get here for two reasons:
1334 *  1. Fault while reloading DS, ES, FS or GS
1335 *  2. Fault while executing IRET
1336 * Category 1 we do not need to fix up as Xen has already reloaded all segment
1337 * registers that could be reloaded and zeroed the others.
1338 * Category 2 we fix up by killing the current process. We cannot use the
1339 * normal Linux return path in this case because if we use the IRET hypercall
1340 * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1341 * We distinguish between categories by comparing each saved segment register
1342 * with its current contents: any discrepancy means we in category 1.
1343 */
1344ENTRY(xen_failsafe_callback)
1345	INTR_FRAME 1 (6*8)
1346	/*CFI_REL_OFFSET gs,GS*/
1347	/*CFI_REL_OFFSET fs,FS*/
1348	/*CFI_REL_OFFSET es,ES*/
1349	/*CFI_REL_OFFSET ds,DS*/
1350	CFI_REL_OFFSET r11,8
1351	CFI_REL_OFFSET rcx,0
1352	movw %ds,%cx
1353	cmpw %cx,0x10(%rsp)
1354	CFI_REMEMBER_STATE
1355	jne 1f
1356	movw %es,%cx
1357	cmpw %cx,0x18(%rsp)
1358	jne 1f
1359	movw %fs,%cx
1360	cmpw %cx,0x20(%rsp)
1361	jne 1f
1362	movw %gs,%cx
1363	cmpw %cx,0x28(%rsp)
1364	jne 1f
1365	/* All segments match their saved values => Category 2 (Bad IRET). */
1366	movq (%rsp),%rcx
1367	CFI_RESTORE rcx
1368	movq 8(%rsp),%r11
1369	CFI_RESTORE r11
1370	addq $0x30,%rsp
1371	CFI_ADJUST_CFA_OFFSET -0x30
1372	pushq_cfi $0	/* RIP */
1373	pushq_cfi %r11
1374	pushq_cfi %rcx
1375	jmp general_protection
1376	CFI_RESTORE_STATE
13771:	/* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
1378	movq (%rsp),%rcx
1379	CFI_RESTORE rcx
1380	movq 8(%rsp),%r11
1381	CFI_RESTORE r11
1382	addq $0x30,%rsp
1383	CFI_ADJUST_CFA_OFFSET -0x30
1384	pushq_cfi $0
1385	SAVE_ALL
1386	jmp error_exit
1387	CFI_ENDPROC
1388END(xen_failsafe_callback)
1389
1390apicinterrupt XEN_HVM_EVTCHN_CALLBACK \
1391	xen_hvm_callback_vector xen_evtchn_do_upcall
1392
1393#endif /* CONFIG_XEN */
1394
1395/*
1396 * Some functions should be protected against kprobes
1397 */
1398	.pushsection .kprobes.text, "ax"
1399
1400paranoidzeroentry_ist debug do_debug DEBUG_STACK
1401paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
1402paranoiderrorentry stack_segment do_stack_segment
1403#ifdef CONFIG_XEN
1404zeroentry xen_debug do_debug
1405zeroentry xen_int3 do_int3
1406errorentry xen_stack_segment do_stack_segment
1407#endif
1408errorentry general_protection do_general_protection
1409errorentry page_fault do_page_fault
1410#ifdef CONFIG_KVM_GUEST
1411errorentry async_page_fault do_async_page_fault
1412#endif
1413#ifdef CONFIG_X86_MCE
1414paranoidzeroentry machine_check *machine_check_vector(%rip)
1415#endif
1416
1417	/*
1418	 * "Paranoid" exit path from exception stack.
1419	 * Paranoid because this is used by NMIs and cannot take
1420	 * any kernel state for granted.
1421	 * We don't do kernel preemption checks here, because only
1422	 * NMI should be common and it does not enable IRQs and
1423	 * cannot get reschedule ticks.
1424	 *
1425	 * "trace" is 0 for the NMI handler only, because irq-tracing
1426	 * is fundamentally NMI-unsafe. (we cannot change the soft and
1427	 * hard flags at once, atomically)
1428	 */
1429
1430	/* ebx:	no swapgs flag */
1431ENTRY(paranoid_exit)
1432	DEFAULT_FRAME
1433	DISABLE_INTERRUPTS(CLBR_NONE)
1434	TRACE_IRQS_OFF_DEBUG
1435	testl %ebx,%ebx				/* swapgs needed? */
1436	jnz paranoid_restore
1437	testl $3,CS(%rsp)
1438	jnz   paranoid_userspace
1439paranoid_swapgs:
1440	TRACE_IRQS_IRETQ 0
1441	SWAPGS_UNSAFE_STACK
1442	RESTORE_ALL 8
1443	jmp irq_return
1444paranoid_restore:
1445	TRACE_IRQS_IRETQ_DEBUG 0
1446	RESTORE_ALL 8
1447	jmp irq_return
1448paranoid_userspace:
1449	GET_THREAD_INFO(%rcx)
1450	movl TI_flags(%rcx),%ebx
1451	andl $_TIF_WORK_MASK,%ebx
1452	jz paranoid_swapgs
1453	movq %rsp,%rdi			/* &pt_regs */
1454	call sync_regs
1455	movq %rax,%rsp			/* switch stack for scheduling */
1456	testl $_TIF_NEED_RESCHED,%ebx
1457	jnz paranoid_schedule
1458	movl %ebx,%edx			/* arg3: thread flags */
1459	TRACE_IRQS_ON
1460	ENABLE_INTERRUPTS(CLBR_NONE)
1461	xorl %esi,%esi 			/* arg2: oldset */
1462	movq %rsp,%rdi 			/* arg1: &pt_regs */
1463	call do_notify_resume
1464	DISABLE_INTERRUPTS(CLBR_NONE)
1465	TRACE_IRQS_OFF
1466	jmp paranoid_userspace
1467paranoid_schedule:
1468	TRACE_IRQS_ON
1469	ENABLE_INTERRUPTS(CLBR_ANY)
1470	call schedule
1471	DISABLE_INTERRUPTS(CLBR_ANY)
1472	TRACE_IRQS_OFF
1473	jmp paranoid_userspace
1474	CFI_ENDPROC
1475END(paranoid_exit)
1476
1477/*
1478 * Exception entry point. This expects an error code/orig_rax on the stack.
1479 * returns in "no swapgs flag" in %ebx.
1480 */
1481ENTRY(error_entry)
1482	XCPT_FRAME
1483	CFI_ADJUST_CFA_OFFSET 15*8
1484	/* oldrax contains error code */
1485	cld
1486	movq_cfi rdi, RDI+8
1487	movq_cfi rsi, RSI+8
1488	movq_cfi rdx, RDX+8
1489	movq_cfi rcx, RCX+8
1490	movq_cfi rax, RAX+8
1491	movq_cfi  r8,  R8+8
1492	movq_cfi  r9,  R9+8
1493	movq_cfi r10, R10+8
1494	movq_cfi r11, R11+8
1495	movq_cfi rbx, RBX+8
1496	movq_cfi rbp, RBP+8
1497	movq_cfi r12, R12+8
1498	movq_cfi r13, R13+8
1499	movq_cfi r14, R14+8
1500	movq_cfi r15, R15+8
1501	xorl %ebx,%ebx
1502	testl $3,CS+8(%rsp)
1503	je error_kernelspace
1504error_swapgs:
1505	SWAPGS
1506error_sti:
1507	TRACE_IRQS_OFF
1508	ret
1509
1510/*
1511 * There are two places in the kernel that can potentially fault with
1512 * usergs. Handle them here. The exception handlers after iret run with
1513 * kernel gs again, so don't set the user space flag. B stepping K8s
1514 * sometimes report an truncated RIP for IRET exceptions returning to
1515 * compat mode. Check for these here too.
1516 */
1517error_kernelspace:
1518	incl %ebx
1519	leaq irq_return(%rip),%rcx
1520	cmpq %rcx,RIP+8(%rsp)
1521	je error_swapgs
1522	movl %ecx,%eax	/* zero extend */
1523	cmpq %rax,RIP+8(%rsp)
1524	je bstep_iret
1525	cmpq $gs_change,RIP+8(%rsp)
1526	je error_swapgs
1527	jmp error_sti
1528
1529bstep_iret:
1530	/* Fix truncated RIP */
1531	movq %rcx,RIP+8(%rsp)
1532	jmp error_swapgs
1533	CFI_ENDPROC
1534END(error_entry)
1535
1536
1537/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
1538ENTRY(error_exit)
1539	DEFAULT_FRAME
1540	movl %ebx,%eax
1541	RESTORE_REST
1542	DISABLE_INTERRUPTS(CLBR_NONE)
1543	TRACE_IRQS_OFF
1544	GET_THREAD_INFO(%rcx)
1545	testl %eax,%eax
1546	jne retint_kernel
1547	LOCKDEP_SYS_EXIT_IRQ
1548	movl TI_flags(%rcx),%edx
1549	movl $_TIF_WORK_MASK,%edi
1550	andl %edi,%edx
1551	jnz retint_careful
1552	jmp retint_swapgs
1553	CFI_ENDPROC
1554END(error_exit)
1555
1556/*
1557 * Test if a given stack is an NMI stack or not.
1558 */
1559	.macro test_in_nmi reg stack nmi_ret normal_ret
1560	cmpq %\reg, \stack
1561	ja \normal_ret
1562	subq $EXCEPTION_STKSZ, %\reg
1563	cmpq %\reg, \stack
1564	jb \normal_ret
1565	jmp \nmi_ret
1566	.endm
1567
1568	/* runs on exception stack */
1569ENTRY(nmi)
1570	INTR_FRAME
1571	PARAVIRT_ADJUST_EXCEPTION_FRAME
1572	/*
1573	 * We allow breakpoints in NMIs. If a breakpoint occurs, then
1574	 * the iretq it performs will take us out of NMI context.
1575	 * This means that we can have nested NMIs where the next
1576	 * NMI is using the top of the stack of the previous NMI. We
1577	 * can't let it execute because the nested NMI will corrupt the
1578	 * stack of the previous NMI. NMI handlers are not re-entrant
1579	 * anyway.
1580	 *
1581	 * To handle this case we do the following:
1582	 *  Check the a special location on the stack that contains
1583	 *  a variable that is set when NMIs are executing.
1584	 *  The interrupted task's stack is also checked to see if it
1585	 *  is an NMI stack.
1586	 *  If the variable is not set and the stack is not the NMI
1587	 *  stack then:
1588	 *    o Set the special variable on the stack
1589	 *    o Copy the interrupt frame into a "saved" location on the stack
1590	 *    o Copy the interrupt frame into a "copy" location on the stack
1591	 *    o Continue processing the NMI
1592	 *  If the variable is set or the previous stack is the NMI stack:
1593	 *    o Modify the "copy" location to jump to the repeate_nmi
1594	 *    o return back to the first NMI
1595	 *
1596	 * Now on exit of the first NMI, we first clear the stack variable
1597	 * The NMI stack will tell any nested NMIs at that point that it is
1598	 * nested. Then we pop the stack normally with iret, and if there was
1599	 * a nested NMI that updated the copy interrupt stack frame, a
1600	 * jump will be made to the repeat_nmi code that will handle the second
1601	 * NMI.
1602	 */
1603
1604	/* Use %rdx as out temp variable throughout */
1605	pushq_cfi %rdx
1606	CFI_REL_OFFSET rdx, 0
1607
1608	/*
1609	 * If %cs was not the kernel segment, then the NMI triggered in user
1610	 * space, which means it is definitely not nested.
1611	 */
1612	cmpl $__KERNEL_CS, 16(%rsp)
1613	jne first_nmi
1614
1615	/*
1616	 * Check the special variable on the stack to see if NMIs are
1617	 * executing.
1618	 */
1619	cmpl $1, -8(%rsp)
1620	je nested_nmi
1621
1622	/*
1623	 * Now test if the previous stack was an NMI stack.
1624	 * We need the double check. We check the NMI stack to satisfy the
1625	 * race when the first NMI clears the variable before returning.
1626	 * We check the variable because the first NMI could be in a
1627	 * breakpoint routine using a breakpoint stack.
1628	 */
1629	lea 6*8(%rsp), %rdx
1630	test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
1631	CFI_REMEMBER_STATE
1632
1633nested_nmi:
1634	/*
1635	 * Do nothing if we interrupted the fixup in repeat_nmi.
1636	 * It's about to repeat the NMI handler, so we are fine
1637	 * with ignoring this one.
1638	 */
1639	movq $repeat_nmi, %rdx
1640	cmpq 8(%rsp), %rdx
1641	ja 1f
1642	movq $end_repeat_nmi, %rdx
1643	cmpq 8(%rsp), %rdx
1644	ja nested_nmi_out
1645
16461:
1647	/* Set up the interrupted NMIs stack to jump to repeat_nmi */
1648	leaq -6*8(%rsp), %rdx
1649	movq %rdx, %rsp
1650	CFI_ADJUST_CFA_OFFSET 6*8
1651	pushq_cfi $__KERNEL_DS
1652	pushq_cfi %rdx
1653	pushfq_cfi
1654	pushq_cfi $__KERNEL_CS
1655	pushq_cfi $repeat_nmi
1656
1657	/* Put stack back */
1658	addq $(11*8), %rsp
1659	CFI_ADJUST_CFA_OFFSET -11*8
1660
1661nested_nmi_out:
1662	popq_cfi %rdx
1663	CFI_RESTORE rdx
1664
1665	/* No need to check faults here */
1666	INTERRUPT_RETURN
1667
1668	CFI_RESTORE_STATE
1669first_nmi:
1670	/*
1671	 * Because nested NMIs will use the pushed location that we
1672	 * stored in rdx, we must keep that space available.
1673	 * Here's what our stack frame will look like:
1674	 * +-------------------------+
1675	 * | original SS             |
1676	 * | original Return RSP     |
1677	 * | original RFLAGS         |
1678	 * | original CS             |
1679	 * | original RIP            |
1680	 * +-------------------------+
1681	 * | temp storage for rdx    |
1682	 * +-------------------------+
1683	 * | NMI executing variable  |
1684	 * +-------------------------+
1685	 * | Saved SS                |
1686	 * | Saved Return RSP        |
1687	 * | Saved RFLAGS            |
1688	 * | Saved CS                |
1689	 * | Saved RIP               |
1690	 * +-------------------------+
1691	 * | copied SS               |
1692	 * | copied Return RSP       |
1693	 * | copied RFLAGS           |
1694	 * | copied CS               |
1695	 * | copied RIP              |
1696	 * +-------------------------+
1697	 * | pt_regs                 |
1698	 * +-------------------------+
1699	 *
1700	 * The saved stack frame is used to fix up the copied stack frame
1701	 * that a nested NMI may change to make the interrupted NMI iret jump
1702	 * to the repeat_nmi. The original stack frame and the temp storage
1703	 * is also used by nested NMIs and can not be trusted on exit.
1704	 */
1705	/* Do not pop rdx, nested NMIs will corrupt that part of the stack */
1706	movq (%rsp), %rdx
1707	CFI_RESTORE rdx
1708
1709	/* Set the NMI executing variable on the stack. */
1710	pushq_cfi $1
1711
1712	/* Copy the stack frame to the Saved frame */
1713	.rept 5
1714	pushq_cfi 6*8(%rsp)
1715	.endr
1716	CFI_DEF_CFA_OFFSET SS+8-RIP
1717
1718	/* Everything up to here is safe from nested NMIs */
1719
1720	/*
1721	 * If there was a nested NMI, the first NMI's iret will return
1722	 * here. But NMIs are still enabled and we can take another
1723	 * nested NMI. The nested NMI checks the interrupted RIP to see
1724	 * if it is between repeat_nmi and end_repeat_nmi, and if so
1725	 * it will just return, as we are about to repeat an NMI anyway.
1726	 * This makes it safe to copy to the stack frame that a nested
1727	 * NMI will update.
1728	 */
1729repeat_nmi:
1730	/*
1731	 * Update the stack variable to say we are still in NMI (the update
1732	 * is benign for the non-repeat case, where 1 was pushed just above
1733	 * to this very stack slot).
1734	 */
1735	movq $1, 5*8(%rsp)
1736
1737	/* Make another copy, this one may be modified by nested NMIs */
1738	.rept 5
1739	pushq_cfi 4*8(%rsp)
1740	.endr
1741	CFI_DEF_CFA_OFFSET SS+8-RIP
1742end_repeat_nmi:
1743
1744	/*
1745	 * Everything below this point can be preempted by a nested
1746	 * NMI if the first NMI took an exception and reset our iret stack
1747	 * so that we repeat another NMI.
1748	 */
1749	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
1750	subq $ORIG_RAX-R15, %rsp
1751	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1752	/*
1753	 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
1754	 * as we should not be calling schedule in NMI context.
1755	 * Even with normal interrupts enabled. An NMI should not be
1756	 * setting NEED_RESCHED or anything that normal interrupts and
1757	 * exceptions might do.
1758	 */
1759	call save_paranoid
1760	DEFAULT_FRAME 0
1761	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1762	movq %rsp,%rdi
1763	movq $-1,%rsi
1764	call do_nmi
1765	testl %ebx,%ebx				/* swapgs needed? */
1766	jnz nmi_restore
1767nmi_swapgs:
1768	SWAPGS_UNSAFE_STACK
1769nmi_restore:
1770	RESTORE_ALL 8
1771	/* Clear the NMI executing stack variable */
1772	movq $0, 10*8(%rsp)
1773	jmp irq_return
1774	CFI_ENDPROC
1775END(nmi)
1776
1777ENTRY(ignore_sysret)
1778	CFI_STARTPROC
1779	mov $-ENOSYS,%eax
1780	sysret
1781	CFI_ENDPROC
1782END(ignore_sysret)
1783
1784/*
1785 * End of kprobes section
1786 */
1787	.popsection