Linux Audio

Check our new training course

Yocto / OpenEmbedded training

Feb 10-13, 2025
Register
Loading...
Note: File does not exist in v6.13.7.
   1/*
   2 *  linux/arch/x86_64/entry.S
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 *  Copyright (C) 2000, 2001, 2002  Andi Kleen SuSE Labs
   6 *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
   7 */
   8
   9/*
  10 * entry.S contains the system-call and fault low-level handling routines.
  11 *
  12 * Some of this is documented in Documentation/x86/entry_64.txt
  13 *
  14 * NOTE: This code handles signal-recognition, which happens every time
  15 * after an interrupt and after each system call.
  16 *
  17 * Normal syscalls and interrupts don't save a full stack frame, this is
  18 * only done for syscall tracing, signals or fork/exec et.al.
  19 *
  20 * A note on terminology:
  21 * - top of stack: Architecture defined interrupt frame from SS to RIP
  22 * at the top of the kernel process stack.
  23 * - partial stack frame: partially saved registers up to R11.
  24 * - full stack frame: Like partial stack frame, but all register saved.
  25 *
  26 * Some macro usage:
  27 * - CFI macros are used to generate dwarf2 unwind information for better
  28 * backtraces. They don't change any code.
  29 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
  30 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
  31 * There are unfortunately lots of special cases where some registers
  32 * not touched. The macro is a big mess that should be cleaned up.
  33 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
  34 * Gives a full stack frame.
  35 * - ENTRY/END Define functions in the symbol table.
  36 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  37 * frame that is otherwise undefined after a SYSCALL
  38 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  39 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
  40 */
  41
  42#include <linux/linkage.h>
  43#include <asm/segment.h>
  44#include <asm/cache.h>
  45#include <asm/errno.h>
  46#include <asm/dwarf2.h>
  47#include <asm/calling.h>
  48#include <asm/asm-offsets.h>
  49#include <asm/msr.h>
  50#include <asm/unistd.h>
  51#include <asm/thread_info.h>
  52#include <asm/hw_irq.h>
  53#include <asm/page_types.h>
  54#include <asm/irqflags.h>
  55#include <asm/paravirt.h>
  56#include <asm/ftrace.h>
  57#include <asm/percpu.h>
  58
  59/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
  60#include <linux/elf-em.h>
  61#define AUDIT_ARCH_X86_64	(EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
  62#define __AUDIT_ARCH_64BIT 0x80000000
  63#define __AUDIT_ARCH_LE	   0x40000000
  64
  65	.code64
  66	.section .entry.text, "ax"
  67
  68#ifdef CONFIG_FUNCTION_TRACER
  69#ifdef CONFIG_DYNAMIC_FTRACE
  70ENTRY(mcount)
  71	retq
  72END(mcount)
  73
  74ENTRY(ftrace_caller)
  75	cmpl $0, function_trace_stop
  76	jne  ftrace_stub
  77
  78	MCOUNT_SAVE_FRAME
  79
  80	movq 0x38(%rsp), %rdi
  81	movq 8(%rbp), %rsi
  82	subq $MCOUNT_INSN_SIZE, %rdi
  83
  84GLOBAL(ftrace_call)
  85	call ftrace_stub
  86
  87	MCOUNT_RESTORE_FRAME
  88
  89#ifdef CONFIG_FUNCTION_GRAPH_TRACER
  90GLOBAL(ftrace_graph_call)
  91	jmp ftrace_stub
  92#endif
  93
  94GLOBAL(ftrace_stub)
  95	retq
  96END(ftrace_caller)
  97
  98#else /* ! CONFIG_DYNAMIC_FTRACE */
  99ENTRY(mcount)
 100	cmpl $0, function_trace_stop
 101	jne  ftrace_stub
 102
 103	cmpq $ftrace_stub, ftrace_trace_function
 104	jnz trace
 105
 106#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 107	cmpq $ftrace_stub, ftrace_graph_return
 108	jnz ftrace_graph_caller
 109
 110	cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
 111	jnz ftrace_graph_caller
 112#endif
 113
 114GLOBAL(ftrace_stub)
 115	retq
 116
 117trace:
 118	MCOUNT_SAVE_FRAME
 119
 120	movq 0x38(%rsp), %rdi
 121	movq 8(%rbp), %rsi
 122	subq $MCOUNT_INSN_SIZE, %rdi
 123
 124	call   *ftrace_trace_function
 125
 126	MCOUNT_RESTORE_FRAME
 127
 128	jmp ftrace_stub
 129END(mcount)
 130#endif /* CONFIG_DYNAMIC_FTRACE */
 131#endif /* CONFIG_FUNCTION_TRACER */
 132
 133#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 134ENTRY(ftrace_graph_caller)
 135	cmpl $0, function_trace_stop
 136	jne ftrace_stub
 137
 138	MCOUNT_SAVE_FRAME
 139
 140	leaq 8(%rbp), %rdi
 141	movq 0x38(%rsp), %rsi
 142	movq (%rbp), %rdx
 143	subq $MCOUNT_INSN_SIZE, %rsi
 144
 145	call	prepare_ftrace_return
 146
 147	MCOUNT_RESTORE_FRAME
 148
 149	retq
 150END(ftrace_graph_caller)
 151
 152GLOBAL(return_to_handler)
 153	subq  $24, %rsp
 154
 155	/* Save the return values */
 156	movq %rax, (%rsp)
 157	movq %rdx, 8(%rsp)
 158	movq %rbp, %rdi
 159
 160	call ftrace_return_to_handler
 161
 162	movq %rax, %rdi
 163	movq 8(%rsp), %rdx
 164	movq (%rsp), %rax
 165	addq $24, %rsp
 166	jmp *%rdi
 167#endif
 168
 169
 170#ifndef CONFIG_PREEMPT
 171#define retint_kernel retint_restore_args
 172#endif
 173
 174#ifdef CONFIG_PARAVIRT
 175ENTRY(native_usergs_sysret64)
 176	swapgs
 177	sysretq
 178ENDPROC(native_usergs_sysret64)
 179#endif /* CONFIG_PARAVIRT */
 180
 181
 182.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
 183#ifdef CONFIG_TRACE_IRQFLAGS
 184	bt   $9,EFLAGS-\offset(%rsp)	/* interrupts off? */
 185	jnc  1f
 186	TRACE_IRQS_ON
 1871:
 188#endif
 189.endm
 190
 191/*
 192 * C code is not supposed to know about undefined top of stack. Every time
 193 * a C function with an pt_regs argument is called from the SYSCALL based
 194 * fast path FIXUP_TOP_OF_STACK is needed.
 195 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
 196 * manipulation.
 197 */
 198
 199	/* %rsp:at FRAMEEND */
 200	.macro FIXUP_TOP_OF_STACK tmp offset=0
 201	movq PER_CPU_VAR(old_rsp),\tmp
 202	movq \tmp,RSP+\offset(%rsp)
 203	movq $__USER_DS,SS+\offset(%rsp)
 204	movq $__USER_CS,CS+\offset(%rsp)
 205	movq $-1,RCX+\offset(%rsp)
 206	movq R11+\offset(%rsp),\tmp  /* get eflags */
 207	movq \tmp,EFLAGS+\offset(%rsp)
 208	.endm
 209
 210	.macro RESTORE_TOP_OF_STACK tmp offset=0
 211	movq RSP+\offset(%rsp),\tmp
 212	movq \tmp,PER_CPU_VAR(old_rsp)
 213	movq EFLAGS+\offset(%rsp),\tmp
 214	movq \tmp,R11+\offset(%rsp)
 215	.endm
 216
 217	.macro FAKE_STACK_FRAME child_rip
 218	/* push in order ss, rsp, eflags, cs, rip */
 219	xorl %eax, %eax
 220	pushq_cfi $__KERNEL_DS /* ss */
 221	/*CFI_REL_OFFSET	ss,0*/
 222	pushq_cfi %rax /* rsp */
 223	CFI_REL_OFFSET	rsp,0
 224	pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */
 225	/*CFI_REL_OFFSET	rflags,0*/
 226	pushq_cfi $__KERNEL_CS /* cs */
 227	/*CFI_REL_OFFSET	cs,0*/
 228	pushq_cfi \child_rip /* rip */
 229	CFI_REL_OFFSET	rip,0
 230	pushq_cfi %rax /* orig rax */
 231	.endm
 232
 233	.macro UNFAKE_STACK_FRAME
 234	addq $8*6, %rsp
 235	CFI_ADJUST_CFA_OFFSET	-(6*8)
 236	.endm
 237
 238/*
 239 * initial frame state for interrupts (and exceptions without error code)
 240 */
 241	.macro EMPTY_FRAME start=1 offset=0
 242	.if \start
 243	CFI_STARTPROC simple
 244	CFI_SIGNAL_FRAME
 245	CFI_DEF_CFA rsp,8+\offset
 246	.else
 247	CFI_DEF_CFA_OFFSET 8+\offset
 248	.endif
 249	.endm
 250
 251/*
 252 * initial frame state for interrupts (and exceptions without error code)
 253 */
 254	.macro INTR_FRAME start=1 offset=0
 255	EMPTY_FRAME \start, SS+8+\offset-RIP
 256	/*CFI_REL_OFFSET ss, SS+\offset-RIP*/
 257	CFI_REL_OFFSET rsp, RSP+\offset-RIP
 258	/*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
 259	/*CFI_REL_OFFSET cs, CS+\offset-RIP*/
 260	CFI_REL_OFFSET rip, RIP+\offset-RIP
 261	.endm
 262
 263/*
 264 * initial frame state for exceptions with error code (and interrupts
 265 * with vector already pushed)
 266 */
 267	.macro XCPT_FRAME start=1 offset=0
 268	INTR_FRAME \start, RIP+\offset-ORIG_RAX
 269	/*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
 270	.endm
 271
 272/*
 273 * frame that enables calling into C.
 274 */
 275	.macro PARTIAL_FRAME start=1 offset=0
 276	XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
 277	CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
 278	CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
 279	CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
 280	CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
 281	CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
 282	CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
 283	CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
 284	CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
 285	CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
 286	.endm
 287
 288/*
 289 * frame that enables passing a complete pt_regs to a C function.
 290 */
 291	.macro DEFAULT_FRAME start=1 offset=0
 292	PARTIAL_FRAME \start, R11+\offset-R15
 293	CFI_REL_OFFSET rbx, RBX+\offset
 294	CFI_REL_OFFSET rbp, RBP+\offset
 295	CFI_REL_OFFSET r12, R12+\offset
 296	CFI_REL_OFFSET r13, R13+\offset
 297	CFI_REL_OFFSET r14, R14+\offset
 298	CFI_REL_OFFSET r15, R15+\offset
 299	.endm
 300
 301/* save partial stack frame */
 302	.macro SAVE_ARGS_IRQ
 303	cld
 304	/* start from rbp in pt_regs and jump over */
 305	movq_cfi rdi, RDI-RBP
 306	movq_cfi rsi, RSI-RBP
 307	movq_cfi rdx, RDX-RBP
 308	movq_cfi rcx, RCX-RBP
 309	movq_cfi rax, RAX-RBP
 310	movq_cfi  r8,  R8-RBP
 311	movq_cfi  r9,  R9-RBP
 312	movq_cfi r10, R10-RBP
 313	movq_cfi r11, R11-RBP
 314
 315	/* Save rbp so that we can unwind from get_irq_regs() */
 316	movq_cfi rbp, 0
 317
 318	/* Save previous stack value */
 319	movq %rsp, %rsi
 320
 321	leaq -RBP(%rsp),%rdi	/* arg1 for handler */
 322	testl $3, CS(%rdi)
 323	je 1f
 324	SWAPGS
 325	/*
 326	 * irq_count is used to check if a CPU is already on an interrupt stack
 327	 * or not. While this is essentially redundant with preempt_count it is
 328	 * a little cheaper to use a separate counter in the PDA (short of
 329	 * moving irq_enter into assembly, which would be too much work)
 330	 */
 3311:	incl PER_CPU_VAR(irq_count)
 332	jne 2f
 333	mov PER_CPU_VAR(irq_stack_ptr),%rsp
 334	EMPTY_FRAME 0
 335
 3362:	/* Store previous stack value */
 337	pushq %rsi
 338	/* We entered an interrupt context - irqs are off: */
 339	TRACE_IRQS_OFF
 340	.endm
 341
 342ENTRY(save_rest)
 343	PARTIAL_FRAME 1 REST_SKIP+8
 344	movq 5*8+16(%rsp), %r11	/* save return address */
 345	movq_cfi rbx, RBX+16
 346	movq_cfi rbp, RBP+16
 347	movq_cfi r12, R12+16
 348	movq_cfi r13, R13+16
 349	movq_cfi r14, R14+16
 350	movq_cfi r15, R15+16
 351	movq %r11, 8(%rsp)	/* return address */
 352	FIXUP_TOP_OF_STACK %r11, 16
 353	ret
 354	CFI_ENDPROC
 355END(save_rest)
 356
 357/* save complete stack frame */
 358	.pushsection .kprobes.text, "ax"
 359ENTRY(save_paranoid)
 360	XCPT_FRAME 1 RDI+8
 361	cld
 362	movq_cfi rdi, RDI+8
 363	movq_cfi rsi, RSI+8
 364	movq_cfi rdx, RDX+8
 365	movq_cfi rcx, RCX+8
 366	movq_cfi rax, RAX+8
 367	movq_cfi r8, R8+8
 368	movq_cfi r9, R9+8
 369	movq_cfi r10, R10+8
 370	movq_cfi r11, R11+8
 371	movq_cfi rbx, RBX+8
 372	movq_cfi rbp, RBP+8
 373	movq_cfi r12, R12+8
 374	movq_cfi r13, R13+8
 375	movq_cfi r14, R14+8
 376	movq_cfi r15, R15+8
 377	movl $1,%ebx
 378	movl $MSR_GS_BASE,%ecx
 379	rdmsr
 380	testl %edx,%edx
 381	js 1f	/* negative -> in kernel */
 382	SWAPGS
 383	xorl %ebx,%ebx
 3841:	ret
 385	CFI_ENDPROC
 386END(save_paranoid)
 387	.popsection
 388
 389/*
 390 * A newly forked process directly context switches into this address.
 391 *
 392 * rdi: prev task we switched from
 393 */
 394ENTRY(ret_from_fork)
 395	DEFAULT_FRAME
 396
 397	LOCK ; btr $TIF_FORK,TI_flags(%r8)
 398
 399	pushq_cfi kernel_eflags(%rip)
 400	popfq_cfi				# reset kernel eflags
 401
 402	call schedule_tail			# rdi: 'prev' task parameter
 403
 404	GET_THREAD_INFO(%rcx)
 405
 406	RESTORE_REST
 407
 408	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
 409	je   int_ret_from_sys_call
 410
 411	testl $_TIF_IA32, TI_flags(%rcx)	# 32-bit compat task needs IRET
 412	jnz  int_ret_from_sys_call
 413
 414	RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
 415	jmp ret_from_sys_call			# go to the SYSRET fastpath
 416
 417	CFI_ENDPROC
 418END(ret_from_fork)
 419
 420/*
 421 * System call entry. Up to 6 arguments in registers are supported.
 422 *
 423 * SYSCALL does not save anything on the stack and does not change the
 424 * stack pointer.
 425 */
 426
 427/*
 428 * Register setup:
 429 * rax  system call number
 430 * rdi  arg0
 431 * rcx  return address for syscall/sysret, C arg3
 432 * rsi  arg1
 433 * rdx  arg2
 434 * r10  arg3 	(--> moved to rcx for C)
 435 * r8   arg4
 436 * r9   arg5
 437 * r11  eflags for syscall/sysret, temporary for C
 438 * r12-r15,rbp,rbx saved by C code, not touched.
 439 *
 440 * Interrupts are off on entry.
 441 * Only called from user space.
 442 *
 443 * XXX	if we had a free scratch register we could save the RSP into the stack frame
 444 *      and report it properly in ps. Unfortunately we haven't.
 445 *
 446 * When user can change the frames always force IRET. That is because
 447 * it deals with uncanonical addresses better. SYSRET has trouble
 448 * with them due to bugs in both AMD and Intel CPUs.
 449 */
 450
 451ENTRY(system_call)
 452	CFI_STARTPROC	simple
 453	CFI_SIGNAL_FRAME
 454	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
 455	CFI_REGISTER	rip,rcx
 456	/*CFI_REGISTER	rflags,r11*/
 457	SWAPGS_UNSAFE_STACK
 458	/*
 459	 * A hypervisor implementation might want to use a label
 460	 * after the swapgs, so that it can do the swapgs
 461	 * for the guest and jump here on syscall.
 462	 */
 463ENTRY(system_call_after_swapgs)
 464
 465	movq	%rsp,PER_CPU_VAR(old_rsp)
 466	movq	PER_CPU_VAR(kernel_stack),%rsp
 467	/*
 468	 * No need to follow this irqs off/on section - it's straight
 469	 * and short:
 470	 */
 471	ENABLE_INTERRUPTS(CLBR_NONE)
 472	SAVE_ARGS 8,0
 473	movq  %rax,ORIG_RAX-ARGOFFSET(%rsp)
 474	movq  %rcx,RIP-ARGOFFSET(%rsp)
 475	CFI_REL_OFFSET rip,RIP-ARGOFFSET
 476	GET_THREAD_INFO(%rcx)
 477	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
 478	jnz tracesys
 479system_call_fastpath:
 480	cmpq $__NR_syscall_max,%rax
 481	ja badsys
 482	movq %r10,%rcx
 483	call *sys_call_table(,%rax,8)  # XXX:	 rip relative
 484	movq %rax,RAX-ARGOFFSET(%rsp)
 485/*
 486 * Syscall return path ending with SYSRET (fast path)
 487 * Has incomplete stack frame and undefined top of stack.
 488 */
 489ret_from_sys_call:
 490	movl $_TIF_ALLWORK_MASK,%edi
 491	/* edi:	flagmask */
 492sysret_check:
 493	LOCKDEP_SYS_EXIT
 494	GET_THREAD_INFO(%rcx)
 495	DISABLE_INTERRUPTS(CLBR_NONE)
 496	TRACE_IRQS_OFF
 497	movl TI_flags(%rcx),%edx
 498	andl %edi,%edx
 499	jnz  sysret_careful
 500	CFI_REMEMBER_STATE
 501	/*
 502	 * sysretq will re-enable interrupts:
 503	 */
 504	TRACE_IRQS_ON
 505	movq RIP-ARGOFFSET(%rsp),%rcx
 506	CFI_REGISTER	rip,rcx
 507	RESTORE_ARGS 1,-ARG_SKIP,0
 508	/*CFI_REGISTER	rflags,r11*/
 509	movq	PER_CPU_VAR(old_rsp), %rsp
 510	USERGS_SYSRET64
 511
 512	CFI_RESTORE_STATE
 513	/* Handle reschedules */
 514	/* edx:	work, edi: workmask */
 515sysret_careful:
 516	bt $TIF_NEED_RESCHED,%edx
 517	jnc sysret_signal
 518	TRACE_IRQS_ON
 519	ENABLE_INTERRUPTS(CLBR_NONE)
 520	pushq_cfi %rdi
 521	call schedule
 522	popq_cfi %rdi
 523	jmp sysret_check
 524
 525	/* Handle a signal */
 526sysret_signal:
 527	TRACE_IRQS_ON
 528	ENABLE_INTERRUPTS(CLBR_NONE)
 529#ifdef CONFIG_AUDITSYSCALL
 530	bt $TIF_SYSCALL_AUDIT,%edx
 531	jc sysret_audit
 532#endif
 533	/*
 534	 * We have a signal, or exit tracing or single-step.
 535	 * These all wind up with the iret return path anyway,
 536	 * so just join that path right now.
 537	 */
 538	FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
 539	jmp int_check_syscall_exit_work
 540
 541badsys:
 542	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
 543	jmp ret_from_sys_call
 544
 545#ifdef CONFIG_AUDITSYSCALL
 546	/*
 547	 * Fast path for syscall audit without full syscall trace.
 548	 * We just call audit_syscall_entry() directly, and then
 549	 * jump back to the normal fast path.
 550	 */
 551auditsys:
 552	movq %r10,%r9			/* 6th arg: 4th syscall arg */
 553	movq %rdx,%r8			/* 5th arg: 3rd syscall arg */
 554	movq %rsi,%rcx			/* 4th arg: 2nd syscall arg */
 555	movq %rdi,%rdx			/* 3rd arg: 1st syscall arg */
 556	movq %rax,%rsi			/* 2nd arg: syscall number */
 557	movl $AUDIT_ARCH_X86_64,%edi	/* 1st arg: audit arch */
 558	call audit_syscall_entry
 559	LOAD_ARGS 0		/* reload call-clobbered registers */
 560	jmp system_call_fastpath
 561
 562	/*
 563	 * Return fast path for syscall audit.  Call audit_syscall_exit()
 564	 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
 565	 * masked off.
 566	 */
 567sysret_audit:
 568	movq RAX-ARGOFFSET(%rsp),%rsi	/* second arg, syscall return value */
 569	cmpq $0,%rsi		/* is it < 0? */
 570	setl %al		/* 1 if so, 0 if not */
 571	movzbl %al,%edi		/* zero-extend that into %edi */
 572	inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
 573	call audit_syscall_exit
 574	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
 575	jmp sysret_check
 576#endif	/* CONFIG_AUDITSYSCALL */
 577
 578	/* Do syscall tracing */
 579tracesys:
 580#ifdef CONFIG_AUDITSYSCALL
 581	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
 582	jz auditsys
 583#endif
 584	SAVE_REST
 585	movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
 586	FIXUP_TOP_OF_STACK %rdi
 587	movq %rsp,%rdi
 588	call syscall_trace_enter
 589	/*
 590	 * Reload arg registers from stack in case ptrace changed them.
 591	 * We don't reload %rax because syscall_trace_enter() returned
 592	 * the value it wants us to use in the table lookup.
 593	 */
 594	LOAD_ARGS ARGOFFSET, 1
 595	RESTORE_REST
 596	cmpq $__NR_syscall_max,%rax
 597	ja   int_ret_from_sys_call	/* RAX(%rsp) set to -ENOSYS above */
 598	movq %r10,%rcx	/* fixup for C */
 599	call *sys_call_table(,%rax,8)
 600	movq %rax,RAX-ARGOFFSET(%rsp)
 601	/* Use IRET because user could have changed frame */
 602
 603/*
 604 * Syscall return path ending with IRET.
 605 * Has correct top of stack, but partial stack frame.
 606 */
 607GLOBAL(int_ret_from_sys_call)
 608	DISABLE_INTERRUPTS(CLBR_NONE)
 609	TRACE_IRQS_OFF
 610	testl $3,CS-ARGOFFSET(%rsp)
 611	je retint_restore_args
 612	movl $_TIF_ALLWORK_MASK,%edi
 613	/* edi:	mask to check */
 614GLOBAL(int_with_check)
 615	LOCKDEP_SYS_EXIT_IRQ
 616	GET_THREAD_INFO(%rcx)
 617	movl TI_flags(%rcx),%edx
 618	andl %edi,%edx
 619	jnz   int_careful
 620	andl    $~TS_COMPAT,TI_status(%rcx)
 621	jmp   retint_swapgs
 622
 623	/* Either reschedule or signal or syscall exit tracking needed. */
 624	/* First do a reschedule test. */
 625	/* edx:	work, edi: workmask */
 626int_careful:
 627	bt $TIF_NEED_RESCHED,%edx
 628	jnc  int_very_careful
 629	TRACE_IRQS_ON
 630	ENABLE_INTERRUPTS(CLBR_NONE)
 631	pushq_cfi %rdi
 632	call schedule
 633	popq_cfi %rdi
 634	DISABLE_INTERRUPTS(CLBR_NONE)
 635	TRACE_IRQS_OFF
 636	jmp int_with_check
 637
 638	/* handle signals and tracing -- both require a full stack frame */
 639int_very_careful:
 640	TRACE_IRQS_ON
 641	ENABLE_INTERRUPTS(CLBR_NONE)
 642int_check_syscall_exit_work:
 643	SAVE_REST
 644	/* Check for syscall exit trace */
 645	testl $_TIF_WORK_SYSCALL_EXIT,%edx
 646	jz int_signal
 647	pushq_cfi %rdi
 648	leaq 8(%rsp),%rdi	# &ptregs -> arg1
 649	call syscall_trace_leave
 650	popq_cfi %rdi
 651	andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
 652	jmp int_restore_rest
 653
 654int_signal:
 655	testl $_TIF_DO_NOTIFY_MASK,%edx
 656	jz 1f
 657	movq %rsp,%rdi		# &ptregs -> arg1
 658	xorl %esi,%esi		# oldset -> arg2
 659	call do_notify_resume
 6601:	movl $_TIF_WORK_MASK,%edi
 661int_restore_rest:
 662	RESTORE_REST
 663	DISABLE_INTERRUPTS(CLBR_NONE)
 664	TRACE_IRQS_OFF
 665	jmp int_with_check
 666	CFI_ENDPROC
 667END(system_call)
 668
 669/*
 670 * Certain special system calls that need to save a complete full stack frame.
 671 */
 672	.macro PTREGSCALL label,func,arg
 673ENTRY(\label)
 674	PARTIAL_FRAME 1 8		/* offset 8: return address */
 675	subq $REST_SKIP, %rsp
 676	CFI_ADJUST_CFA_OFFSET REST_SKIP
 677	call save_rest
 678	DEFAULT_FRAME 0 8		/* offset 8: return address */
 679	leaq 8(%rsp), \arg	/* pt_regs pointer */
 680	call \func
 681	jmp ptregscall_common
 682	CFI_ENDPROC
 683END(\label)
 684	.endm
 685
 686	PTREGSCALL stub_clone, sys_clone, %r8
 687	PTREGSCALL stub_fork, sys_fork, %rdi
 688	PTREGSCALL stub_vfork, sys_vfork, %rdi
 689	PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
 690	PTREGSCALL stub_iopl, sys_iopl, %rsi
 691
 692ENTRY(ptregscall_common)
 693	DEFAULT_FRAME 1 8	/* offset 8: return address */
 694	RESTORE_TOP_OF_STACK %r11, 8
 695	movq_cfi_restore R15+8, r15
 696	movq_cfi_restore R14+8, r14
 697	movq_cfi_restore R13+8, r13
 698	movq_cfi_restore R12+8, r12
 699	movq_cfi_restore RBP+8, rbp
 700	movq_cfi_restore RBX+8, rbx
 701	ret $REST_SKIP		/* pop extended registers */
 702	CFI_ENDPROC
 703END(ptregscall_common)
 704
 705ENTRY(stub_execve)
 706	CFI_STARTPROC
 707	addq $8, %rsp
 708	PARTIAL_FRAME 0
 709	SAVE_REST
 710	FIXUP_TOP_OF_STACK %r11
 711	movq %rsp, %rcx
 712	call sys_execve
 713	RESTORE_TOP_OF_STACK %r11
 714	movq %rax,RAX(%rsp)
 715	RESTORE_REST
 716	jmp int_ret_from_sys_call
 717	CFI_ENDPROC
 718END(stub_execve)
 719
 720/*
 721 * sigreturn is special because it needs to restore all registers on return.
 722 * This cannot be done with SYSRET, so use the IRET return path instead.
 723 */
 724ENTRY(stub_rt_sigreturn)
 725	CFI_STARTPROC
 726	addq $8, %rsp
 727	PARTIAL_FRAME 0
 728	SAVE_REST
 729	movq %rsp,%rdi
 730	FIXUP_TOP_OF_STACK %r11
 731	call sys_rt_sigreturn
 732	movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
 733	RESTORE_REST
 734	jmp int_ret_from_sys_call
 735	CFI_ENDPROC
 736END(stub_rt_sigreturn)
 737
 738/*
 739 * Build the entry stubs and pointer table with some assembler magic.
 740 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
 741 * single cache line on all modern x86 implementations.
 742 */
 743	.section .init.rodata,"a"
 744ENTRY(interrupt)
 745	.section .entry.text
 746	.p2align 5
 747	.p2align CONFIG_X86_L1_CACHE_SHIFT
 748ENTRY(irq_entries_start)
 749	INTR_FRAME
 750vector=FIRST_EXTERNAL_VECTOR
 751.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
 752	.balign 32
 753  .rept	7
 754    .if vector < NR_VECTORS
 755      .if vector <> FIRST_EXTERNAL_VECTOR
 756	CFI_ADJUST_CFA_OFFSET -8
 757      .endif
 7581:	pushq_cfi $(~vector+0x80)	/* Note: always in signed byte range */
 759      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
 760	jmp 2f
 761      .endif
 762      .previous
 763	.quad 1b
 764      .section .entry.text
 765vector=vector+1
 766    .endif
 767  .endr
 7682:	jmp common_interrupt
 769.endr
 770	CFI_ENDPROC
 771END(irq_entries_start)
 772
 773.previous
 774END(interrupt)
 775.previous
 776
 777/*
 778 * Interrupt entry/exit.
 779 *
 780 * Interrupt entry points save only callee clobbered registers in fast path.
 781 *
 782 * Entry runs with interrupts off.
 783 */
 784
 785/* 0(%rsp): ~(interrupt number) */
 786	.macro interrupt func
 787	/* reserve pt_regs for scratch regs and rbp */
 788	subq $ORIG_RAX-RBP, %rsp
 789	CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
 790	SAVE_ARGS_IRQ
 791	PARTIAL_FRAME 0
 792	call \func
 793	.endm
 794
 795/*
 796 * Interrupt entry/exit should be protected against kprobes
 797 */
 798	.pushsection .kprobes.text, "ax"
 799	/*
 800	 * The interrupt stubs push (~vector+0x80) onto the stack and
 801	 * then jump to common_interrupt.
 802	 */
 803	.p2align CONFIG_X86_L1_CACHE_SHIFT
 804common_interrupt:
 805	XCPT_FRAME
 806	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
 807	interrupt do_IRQ
 808	/* 0(%rsp): old_rsp-ARGOFFSET */
 809ret_from_intr:
 810	DISABLE_INTERRUPTS(CLBR_NONE)
 811	TRACE_IRQS_OFF
 812	decl PER_CPU_VAR(irq_count)
 813
 814	/* Restore saved previous stack */
 815	popq %rsi
 816	leaq 16(%rsi), %rsp
 817
 818	CFI_DEF_CFA_REGISTER	rsp
 819	CFI_ADJUST_CFA_OFFSET	-16
 820
 821exit_intr:
 822	GET_THREAD_INFO(%rcx)
 823	testl $3,CS-ARGOFFSET(%rsp)
 824	je retint_kernel
 825
 826	/* Interrupt came from user space */
 827	/*
 828	 * Has a correct top of stack, but a partial stack frame
 829	 * %rcx: thread info. Interrupts off.
 830	 */
 831retint_with_reschedule:
 832	movl $_TIF_WORK_MASK,%edi
 833retint_check:
 834	LOCKDEP_SYS_EXIT_IRQ
 835	movl TI_flags(%rcx),%edx
 836	andl %edi,%edx
 837	CFI_REMEMBER_STATE
 838	jnz  retint_careful
 839
 840retint_swapgs:		/* return to user-space */
 841	/*
 842	 * The iretq could re-enable interrupts:
 843	 */
 844	DISABLE_INTERRUPTS(CLBR_ANY)
 845	TRACE_IRQS_IRETQ
 846	SWAPGS
 847	jmp restore_args
 848
 849retint_restore_args:	/* return to kernel space */
 850	DISABLE_INTERRUPTS(CLBR_ANY)
 851	/*
 852	 * The iretq could re-enable interrupts:
 853	 */
 854	TRACE_IRQS_IRETQ
 855restore_args:
 856	RESTORE_ARGS 1,8,1
 857
 858irq_return:
 859	INTERRUPT_RETURN
 860
 861	.section __ex_table, "a"
 862	.quad irq_return, bad_iret
 863	.previous
 864
 865#ifdef CONFIG_PARAVIRT
 866ENTRY(native_iret)
 867	iretq
 868
 869	.section __ex_table,"a"
 870	.quad native_iret, bad_iret
 871	.previous
 872#endif
 873
 874	.section .fixup,"ax"
 875bad_iret:
 876	/*
 877	 * The iret traps when the %cs or %ss being restored is bogus.
 878	 * We've lost the original trap vector and error code.
 879	 * #GPF is the most likely one to get for an invalid selector.
 880	 * So pretend we completed the iret and took the #GPF in user mode.
 881	 *
 882	 * We are now running with the kernel GS after exception recovery.
 883	 * But error_entry expects us to have user GS to match the user %cs,
 884	 * so swap back.
 885	 */
 886	pushq $0
 887
 888	SWAPGS
 889	jmp general_protection
 890
 891	.previous
 892
 893	/* edi: workmask, edx: work */
 894retint_careful:
 895	CFI_RESTORE_STATE
 896	bt    $TIF_NEED_RESCHED,%edx
 897	jnc   retint_signal
 898	TRACE_IRQS_ON
 899	ENABLE_INTERRUPTS(CLBR_NONE)
 900	pushq_cfi %rdi
 901	call  schedule
 902	popq_cfi %rdi
 903	GET_THREAD_INFO(%rcx)
 904	DISABLE_INTERRUPTS(CLBR_NONE)
 905	TRACE_IRQS_OFF
 906	jmp retint_check
 907
 908retint_signal:
 909	testl $_TIF_DO_NOTIFY_MASK,%edx
 910	jz    retint_swapgs
 911	TRACE_IRQS_ON
 912	ENABLE_INTERRUPTS(CLBR_NONE)
 913	SAVE_REST
 914	movq $-1,ORIG_RAX(%rsp)
 915	xorl %esi,%esi		# oldset
 916	movq %rsp,%rdi		# &pt_regs
 917	call do_notify_resume
 918	RESTORE_REST
 919	DISABLE_INTERRUPTS(CLBR_NONE)
 920	TRACE_IRQS_OFF
 921	GET_THREAD_INFO(%rcx)
 922	jmp retint_with_reschedule
 923
 924#ifdef CONFIG_PREEMPT
 925	/* Returning to kernel space. Check if we need preemption */
 926	/* rcx:	 threadinfo. interrupts off. */
 927ENTRY(retint_kernel)
 928	cmpl $0,TI_preempt_count(%rcx)
 929	jnz  retint_restore_args
 930	bt  $TIF_NEED_RESCHED,TI_flags(%rcx)
 931	jnc  retint_restore_args
 932	bt   $9,EFLAGS-ARGOFFSET(%rsp)	/* interrupts off? */
 933	jnc  retint_restore_args
 934	call preempt_schedule_irq
 935	jmp exit_intr
 936#endif
 937
 938	CFI_ENDPROC
 939END(common_interrupt)
 940/*
 941 * End of kprobes section
 942 */
 943       .popsection
 944
 945/*
 946 * APIC interrupts.
 947 */
 948.macro apicinterrupt num sym do_sym
 949ENTRY(\sym)
 950	INTR_FRAME
 951	pushq_cfi $~(\num)
 952	interrupt \do_sym
 953	jmp ret_from_intr
 954	CFI_ENDPROC
 955END(\sym)
 956.endm
 957
 958#ifdef CONFIG_SMP
 959apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
 960	irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
 961apicinterrupt REBOOT_VECTOR \
 962	reboot_interrupt smp_reboot_interrupt
 963#endif
 964
 965#ifdef CONFIG_X86_UV
 966apicinterrupt UV_BAU_MESSAGE \
 967	uv_bau_message_intr1 uv_bau_message_interrupt
 968#endif
 969apicinterrupt LOCAL_TIMER_VECTOR \
 970	apic_timer_interrupt smp_apic_timer_interrupt
 971apicinterrupt X86_PLATFORM_IPI_VECTOR \
 972	x86_platform_ipi smp_x86_platform_ipi
 973
 974#ifdef CONFIG_SMP
 975.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
 976	16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 977.if NUM_INVALIDATE_TLB_VECTORS > \idx
 978apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
 979	invalidate_interrupt\idx smp_invalidate_interrupt
 980.endif
 981.endr
 982#endif
 983
 984apicinterrupt THRESHOLD_APIC_VECTOR \
 985	threshold_interrupt smp_threshold_interrupt
 986apicinterrupt THERMAL_APIC_VECTOR \
 987	thermal_interrupt smp_thermal_interrupt
 988
 989#ifdef CONFIG_SMP
 990apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
 991	call_function_single_interrupt smp_call_function_single_interrupt
 992apicinterrupt CALL_FUNCTION_VECTOR \
 993	call_function_interrupt smp_call_function_interrupt
 994apicinterrupt RESCHEDULE_VECTOR \
 995	reschedule_interrupt smp_reschedule_interrupt
 996#endif
 997
 998apicinterrupt ERROR_APIC_VECTOR \
 999	error_interrupt smp_error_interrupt
1000apicinterrupt SPURIOUS_APIC_VECTOR \
1001	spurious_interrupt smp_spurious_interrupt
1002
1003#ifdef CONFIG_IRQ_WORK
1004apicinterrupt IRQ_WORK_VECTOR \
1005	irq_work_interrupt smp_irq_work_interrupt
1006#endif
1007
1008/*
1009 * Exception entry points.
1010 */
1011.macro zeroentry sym do_sym
1012ENTRY(\sym)
1013	INTR_FRAME
1014	PARAVIRT_ADJUST_EXCEPTION_FRAME
1015	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
1016	subq $ORIG_RAX-R15, %rsp
1017	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1018	call error_entry
1019	DEFAULT_FRAME 0
1020	movq %rsp,%rdi		/* pt_regs pointer */
1021	xorl %esi,%esi		/* no error code */
1022	call \do_sym
1023	jmp error_exit		/* %ebx: no swapgs flag */
1024	CFI_ENDPROC
1025END(\sym)
1026.endm
1027
1028.macro paranoidzeroentry sym do_sym
1029ENTRY(\sym)
1030	INTR_FRAME
1031	PARAVIRT_ADJUST_EXCEPTION_FRAME
1032	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
1033	subq $ORIG_RAX-R15, %rsp
1034	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1035	call save_paranoid
1036	TRACE_IRQS_OFF
1037	movq %rsp,%rdi		/* pt_regs pointer */
1038	xorl %esi,%esi		/* no error code */
1039	call \do_sym
1040	jmp paranoid_exit	/* %ebx: no swapgs flag */
1041	CFI_ENDPROC
1042END(\sym)
1043.endm
1044
1045#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
1046.macro paranoidzeroentry_ist sym do_sym ist
1047ENTRY(\sym)
1048	INTR_FRAME
1049	PARAVIRT_ADJUST_EXCEPTION_FRAME
1050	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
1051	subq $ORIG_RAX-R15, %rsp
1052	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1053	call save_paranoid
1054	TRACE_IRQS_OFF
1055	movq %rsp,%rdi		/* pt_regs pointer */
1056	xorl %esi,%esi		/* no error code */
1057	subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
1058	call \do_sym
1059	addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
1060	jmp paranoid_exit	/* %ebx: no swapgs flag */
1061	CFI_ENDPROC
1062END(\sym)
1063.endm
1064
1065.macro errorentry sym do_sym
1066ENTRY(\sym)
1067	XCPT_FRAME
1068	PARAVIRT_ADJUST_EXCEPTION_FRAME
1069	subq $ORIG_RAX-R15, %rsp
1070	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1071	call error_entry
1072	DEFAULT_FRAME 0
1073	movq %rsp,%rdi			/* pt_regs pointer */
1074	movq ORIG_RAX(%rsp),%rsi	/* get error code */
1075	movq $-1,ORIG_RAX(%rsp)		/* no syscall to restart */
1076	call \do_sym
1077	jmp error_exit			/* %ebx: no swapgs flag */
1078	CFI_ENDPROC
1079END(\sym)
1080.endm
1081
1082	/* error code is on the stack already */
1083.macro paranoiderrorentry sym do_sym
1084ENTRY(\sym)
1085	XCPT_FRAME
1086	PARAVIRT_ADJUST_EXCEPTION_FRAME
1087	subq $ORIG_RAX-R15, %rsp
1088	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1089	call save_paranoid
1090	DEFAULT_FRAME 0
1091	TRACE_IRQS_OFF
1092	movq %rsp,%rdi			/* pt_regs pointer */
1093	movq ORIG_RAX(%rsp),%rsi	/* get error code */
1094	movq $-1,ORIG_RAX(%rsp)		/* no syscall to restart */
1095	call \do_sym
1096	jmp paranoid_exit		/* %ebx: no swapgs flag */
1097	CFI_ENDPROC
1098END(\sym)
1099.endm
1100
1101zeroentry divide_error do_divide_error
1102zeroentry overflow do_overflow
1103zeroentry bounds do_bounds
1104zeroentry invalid_op do_invalid_op
1105zeroentry device_not_available do_device_not_available
1106paranoiderrorentry double_fault do_double_fault
1107zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
1108errorentry invalid_TSS do_invalid_TSS
1109errorentry segment_not_present do_segment_not_present
1110zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
1111zeroentry coprocessor_error do_coprocessor_error
1112errorentry alignment_check do_alignment_check
1113zeroentry simd_coprocessor_error do_simd_coprocessor_error
1114
1115
1116	/* Reload gs selector with exception handling */
1117	/* edi:  new selector */
1118ENTRY(native_load_gs_index)
1119	CFI_STARTPROC
1120	pushfq_cfi
1121	DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
1122	SWAPGS
1123gs_change:
1124	movl %edi,%gs
11252:	mfence		/* workaround */
1126	SWAPGS
1127	popfq_cfi
1128	ret
1129	CFI_ENDPROC
1130END(native_load_gs_index)
1131
1132	.section __ex_table,"a"
1133	.align 8
1134	.quad gs_change,bad_gs
1135	.previous
1136	.section .fixup,"ax"
1137	/* running with kernelgs */
1138bad_gs:
1139	SWAPGS			/* switch back to user gs */
1140	xorl %eax,%eax
1141	movl %eax,%gs
1142	jmp  2b
1143	.previous
1144
1145ENTRY(kernel_thread_helper)
1146	pushq $0		# fake return address
1147	CFI_STARTPROC
1148	/*
1149	 * Here we are in the child and the registers are set as they were
1150	 * at kernel_thread() invocation in the parent.
1151	 */
1152	call *%rsi
1153	# exit
1154	mov %eax, %edi
1155	call do_exit
1156	ud2			# padding for call trace
1157	CFI_ENDPROC
1158END(kernel_thread_helper)
1159
1160/*
1161 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1162 *
1163 * C extern interface:
1164 *	 extern long execve(const char *name, char **argv, char **envp)
1165 *
1166 * asm input arguments:
1167 *	rdi: name, rsi: argv, rdx: envp
1168 *
1169 * We want to fallback into:
1170 *	extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs)
1171 *
1172 * do_sys_execve asm fallback arguments:
1173 *	rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
1174 */
1175ENTRY(kernel_execve)
1176	CFI_STARTPROC
1177	FAKE_STACK_FRAME $0
1178	SAVE_ALL
1179	movq %rsp,%rcx
1180	call sys_execve
1181	movq %rax, RAX(%rsp)
1182	RESTORE_REST
1183	testq %rax,%rax
1184	je int_ret_from_sys_call
1185	RESTORE_ARGS
1186	UNFAKE_STACK_FRAME
1187	ret
1188	CFI_ENDPROC
1189END(kernel_execve)
1190
1191/* Call softirq on interrupt stack. Interrupts are off. */
1192ENTRY(call_softirq)
1193	CFI_STARTPROC
1194	pushq_cfi %rbp
1195	CFI_REL_OFFSET rbp,0
1196	mov  %rsp,%rbp
1197	CFI_DEF_CFA_REGISTER rbp
1198	incl PER_CPU_VAR(irq_count)
1199	cmove PER_CPU_VAR(irq_stack_ptr),%rsp
1200	push  %rbp			# backlink for old unwinder
1201	call __do_softirq
1202	leaveq
1203	CFI_RESTORE		rbp
1204	CFI_DEF_CFA_REGISTER	rsp
1205	CFI_ADJUST_CFA_OFFSET   -8
1206	decl PER_CPU_VAR(irq_count)
1207	ret
1208	CFI_ENDPROC
1209END(call_softirq)
1210
1211#ifdef CONFIG_XEN
1212zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
1213
1214/*
1215 * A note on the "critical region" in our callback handler.
1216 * We want to avoid stacking callback handlers due to events occurring
1217 * during handling of the last event. To do this, we keep events disabled
1218 * until we've done all processing. HOWEVER, we must enable events before
1219 * popping the stack frame (can't be done atomically) and so it would still
1220 * be possible to get enough handler activations to overflow the stack.
1221 * Although unlikely, bugs of that kind are hard to track down, so we'd
1222 * like to avoid the possibility.
1223 * So, on entry to the handler we detect whether we interrupted an
1224 * existing activation in its critical region -- if so, we pop the current
1225 * activation and restart the handler using the previous one.
1226 */
1227ENTRY(xen_do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
1228	CFI_STARTPROC
1229/*
1230 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1231 * see the correct pointer to the pt_regs
1232 */
1233	movq %rdi, %rsp            # we don't return, adjust the stack frame
1234	CFI_ENDPROC
1235	DEFAULT_FRAME
123611:	incl PER_CPU_VAR(irq_count)
1237	movq %rsp,%rbp
1238	CFI_DEF_CFA_REGISTER rbp
1239	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
1240	pushq %rbp			# backlink for old unwinder
1241	call xen_evtchn_do_upcall
1242	popq %rsp
1243	CFI_DEF_CFA_REGISTER rsp
1244	decl PER_CPU_VAR(irq_count)
1245	jmp  error_exit
1246	CFI_ENDPROC
1247END(xen_do_hypervisor_callback)
1248
1249/*
1250 * Hypervisor uses this for application faults while it executes.
1251 * We get here for two reasons:
1252 *  1. Fault while reloading DS, ES, FS or GS
1253 *  2. Fault while executing IRET
1254 * Category 1 we do not need to fix up as Xen has already reloaded all segment
1255 * registers that could be reloaded and zeroed the others.
1256 * Category 2 we fix up by killing the current process. We cannot use the
1257 * normal Linux return path in this case because if we use the IRET hypercall
1258 * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1259 * We distinguish between categories by comparing each saved segment register
1260 * with its current contents: any discrepancy means we in category 1.
1261 */
1262ENTRY(xen_failsafe_callback)
1263	INTR_FRAME 1 (6*8)
1264	/*CFI_REL_OFFSET gs,GS*/
1265	/*CFI_REL_OFFSET fs,FS*/
1266	/*CFI_REL_OFFSET es,ES*/
1267	/*CFI_REL_OFFSET ds,DS*/
1268	CFI_REL_OFFSET r11,8
1269	CFI_REL_OFFSET rcx,0
1270	movw %ds,%cx
1271	cmpw %cx,0x10(%rsp)
1272	CFI_REMEMBER_STATE
1273	jne 1f
1274	movw %es,%cx
1275	cmpw %cx,0x18(%rsp)
1276	jne 1f
1277	movw %fs,%cx
1278	cmpw %cx,0x20(%rsp)
1279	jne 1f
1280	movw %gs,%cx
1281	cmpw %cx,0x28(%rsp)
1282	jne 1f
1283	/* All segments match their saved values => Category 2 (Bad IRET). */
1284	movq (%rsp),%rcx
1285	CFI_RESTORE rcx
1286	movq 8(%rsp),%r11
1287	CFI_RESTORE r11
1288	addq $0x30,%rsp
1289	CFI_ADJUST_CFA_OFFSET -0x30
1290	pushq_cfi $0	/* RIP */
1291	pushq_cfi %r11
1292	pushq_cfi %rcx
1293	jmp general_protection
1294	CFI_RESTORE_STATE
12951:	/* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
1296	movq (%rsp),%rcx
1297	CFI_RESTORE rcx
1298	movq 8(%rsp),%r11
1299	CFI_RESTORE r11
1300	addq $0x30,%rsp
1301	CFI_ADJUST_CFA_OFFSET -0x30
1302	pushq_cfi $0
1303	SAVE_ALL
1304	jmp error_exit
1305	CFI_ENDPROC
1306END(xen_failsafe_callback)
1307
1308apicinterrupt XEN_HVM_EVTCHN_CALLBACK \
1309	xen_hvm_callback_vector xen_evtchn_do_upcall
1310
1311#endif /* CONFIG_XEN */
1312
1313/*
1314 * Some functions should be protected against kprobes
1315 */
1316	.pushsection .kprobes.text, "ax"
1317
1318paranoidzeroentry_ist debug do_debug DEBUG_STACK
1319paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
1320paranoiderrorentry stack_segment do_stack_segment
1321#ifdef CONFIG_XEN
1322zeroentry xen_debug do_debug
1323zeroentry xen_int3 do_int3
1324errorentry xen_stack_segment do_stack_segment
1325#endif
1326errorentry general_protection do_general_protection
1327errorentry page_fault do_page_fault
1328#ifdef CONFIG_KVM_GUEST
1329errorentry async_page_fault do_async_page_fault
1330#endif
1331#ifdef CONFIG_X86_MCE
1332paranoidzeroentry machine_check *machine_check_vector(%rip)
1333#endif
1334
1335	/*
1336	 * "Paranoid" exit path from exception stack.
1337	 * Paranoid because this is used by NMIs and cannot take
1338	 * any kernel state for granted.
1339	 * We don't do kernel preemption checks here, because only
1340	 * NMI should be common and it does not enable IRQs and
1341	 * cannot get reschedule ticks.
1342	 *
1343	 * "trace" is 0 for the NMI handler only, because irq-tracing
1344	 * is fundamentally NMI-unsafe. (we cannot change the soft and
1345	 * hard flags at once, atomically)
1346	 */
1347
1348	/* ebx:	no swapgs flag */
1349ENTRY(paranoid_exit)
1350	DEFAULT_FRAME
1351	DISABLE_INTERRUPTS(CLBR_NONE)
1352	TRACE_IRQS_OFF
1353	testl %ebx,%ebx				/* swapgs needed? */
1354	jnz paranoid_restore
1355	testl $3,CS(%rsp)
1356	jnz   paranoid_userspace
1357paranoid_swapgs:
1358	TRACE_IRQS_IRETQ 0
1359	SWAPGS_UNSAFE_STACK
1360	RESTORE_ALL 8
1361	jmp irq_return
1362paranoid_restore:
1363	TRACE_IRQS_IRETQ 0
1364	RESTORE_ALL 8
1365	jmp irq_return
1366paranoid_userspace:
1367	GET_THREAD_INFO(%rcx)
1368	movl TI_flags(%rcx),%ebx
1369	andl $_TIF_WORK_MASK,%ebx
1370	jz paranoid_swapgs
1371	movq %rsp,%rdi			/* &pt_regs */
1372	call sync_regs
1373	movq %rax,%rsp			/* switch stack for scheduling */
1374	testl $_TIF_NEED_RESCHED,%ebx
1375	jnz paranoid_schedule
1376	movl %ebx,%edx			/* arg3: thread flags */
1377	TRACE_IRQS_ON
1378	ENABLE_INTERRUPTS(CLBR_NONE)
1379	xorl %esi,%esi 			/* arg2: oldset */
1380	movq %rsp,%rdi 			/* arg1: &pt_regs */
1381	call do_notify_resume
1382	DISABLE_INTERRUPTS(CLBR_NONE)
1383	TRACE_IRQS_OFF
1384	jmp paranoid_userspace
1385paranoid_schedule:
1386	TRACE_IRQS_ON
1387	ENABLE_INTERRUPTS(CLBR_ANY)
1388	call schedule
1389	DISABLE_INTERRUPTS(CLBR_ANY)
1390	TRACE_IRQS_OFF
1391	jmp paranoid_userspace
1392	CFI_ENDPROC
1393END(paranoid_exit)
1394
1395/*
1396 * Exception entry point. This expects an error code/orig_rax on the stack.
1397 * returns in "no swapgs flag" in %ebx.
1398 */
1399ENTRY(error_entry)
1400	XCPT_FRAME
1401	CFI_ADJUST_CFA_OFFSET 15*8
1402	/* oldrax contains error code */
1403	cld
1404	movq_cfi rdi, RDI+8
1405	movq_cfi rsi, RSI+8
1406	movq_cfi rdx, RDX+8
1407	movq_cfi rcx, RCX+8
1408	movq_cfi rax, RAX+8
1409	movq_cfi  r8,  R8+8
1410	movq_cfi  r9,  R9+8
1411	movq_cfi r10, R10+8
1412	movq_cfi r11, R11+8
1413	movq_cfi rbx, RBX+8
1414	movq_cfi rbp, RBP+8
1415	movq_cfi r12, R12+8
1416	movq_cfi r13, R13+8
1417	movq_cfi r14, R14+8
1418	movq_cfi r15, R15+8
1419	xorl %ebx,%ebx
1420	testl $3,CS+8(%rsp)
1421	je error_kernelspace
1422error_swapgs:
1423	SWAPGS
1424error_sti:
1425	TRACE_IRQS_OFF
1426	ret
1427
1428/*
1429 * There are two places in the kernel that can potentially fault with
1430 * usergs. Handle them here. The exception handlers after iret run with
1431 * kernel gs again, so don't set the user space flag. B stepping K8s
1432 * sometimes report an truncated RIP for IRET exceptions returning to
1433 * compat mode. Check for these here too.
1434 */
1435error_kernelspace:
1436	incl %ebx
1437	leaq irq_return(%rip),%rcx
1438	cmpq %rcx,RIP+8(%rsp)
1439	je error_swapgs
1440	movl %ecx,%eax	/* zero extend */
1441	cmpq %rax,RIP+8(%rsp)
1442	je bstep_iret
1443	cmpq $gs_change,RIP+8(%rsp)
1444	je error_swapgs
1445	jmp error_sti
1446
1447bstep_iret:
1448	/* Fix truncated RIP */
1449	movq %rcx,RIP+8(%rsp)
1450	jmp error_swapgs
1451	CFI_ENDPROC
1452END(error_entry)
1453
1454
1455/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
1456ENTRY(error_exit)
1457	DEFAULT_FRAME
1458	movl %ebx,%eax
1459	RESTORE_REST
1460	DISABLE_INTERRUPTS(CLBR_NONE)
1461	TRACE_IRQS_OFF
1462	GET_THREAD_INFO(%rcx)
1463	testl %eax,%eax
1464	jne retint_kernel
1465	LOCKDEP_SYS_EXIT_IRQ
1466	movl TI_flags(%rcx),%edx
1467	movl $_TIF_WORK_MASK,%edi
1468	andl %edi,%edx
1469	jnz retint_careful
1470	jmp retint_swapgs
1471	CFI_ENDPROC
1472END(error_exit)
1473
1474
1475	/* runs on exception stack */
1476ENTRY(nmi)
1477	INTR_FRAME
1478	PARAVIRT_ADJUST_EXCEPTION_FRAME
1479	pushq_cfi $-1
1480	subq $ORIG_RAX-R15, %rsp
1481	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1482	call save_paranoid
1483	DEFAULT_FRAME 0
1484	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1485	movq %rsp,%rdi
1486	movq $-1,%rsi
1487	call do_nmi
1488#ifdef CONFIG_TRACE_IRQFLAGS
1489	/* paranoidexit; without TRACE_IRQS_OFF */
1490	/* ebx:	no swapgs flag */
1491	DISABLE_INTERRUPTS(CLBR_NONE)
1492	testl %ebx,%ebx				/* swapgs needed? */
1493	jnz nmi_restore
1494	testl $3,CS(%rsp)
1495	jnz nmi_userspace
1496nmi_swapgs:
1497	SWAPGS_UNSAFE_STACK
1498nmi_restore:
1499	RESTORE_ALL 8
1500	jmp irq_return
1501nmi_userspace:
1502	GET_THREAD_INFO(%rcx)
1503	movl TI_flags(%rcx),%ebx
1504	andl $_TIF_WORK_MASK,%ebx
1505	jz nmi_swapgs
1506	movq %rsp,%rdi			/* &pt_regs */
1507	call sync_regs
1508	movq %rax,%rsp			/* switch stack for scheduling */
1509	testl $_TIF_NEED_RESCHED,%ebx
1510	jnz nmi_schedule
1511	movl %ebx,%edx			/* arg3: thread flags */
1512	ENABLE_INTERRUPTS(CLBR_NONE)
1513	xorl %esi,%esi 			/* arg2: oldset */
1514	movq %rsp,%rdi 			/* arg1: &pt_regs */
1515	call do_notify_resume
1516	DISABLE_INTERRUPTS(CLBR_NONE)
1517	jmp nmi_userspace
1518nmi_schedule:
1519	ENABLE_INTERRUPTS(CLBR_ANY)
1520	call schedule
1521	DISABLE_INTERRUPTS(CLBR_ANY)
1522	jmp nmi_userspace
1523	CFI_ENDPROC
1524#else
1525	jmp paranoid_exit
1526	CFI_ENDPROC
1527#endif
1528END(nmi)
1529
1530ENTRY(ignore_sysret)
1531	CFI_STARTPROC
1532	mov $-ENOSYS,%eax
1533	sysret
1534	CFI_ENDPROC
1535END(ignore_sysret)
1536
1537/*
1538 * End of kprobes section
1539 */
1540	.popsection