Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.5.6.
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 *  Copyright (C) 1991,1992  Linus Torvalds
   4 *
   5 * entry_32.S contains the system-call and low-level fault and trap handling routines.
   6 *
   7 * Stack layout while running C code:
   8 *	ptrace needs to have all registers on the stack.
   9 *	If the order here is changed, it needs to be
  10 *	updated in fork.c:copy_process(), signal.c:do_signal(),
  11 *	ptrace.c and ptrace.h
  12 *
  13 *	 0(%esp) - %ebx
  14 *	 4(%esp) - %ecx
  15 *	 8(%esp) - %edx
  16 *	 C(%esp) - %esi
  17 *	10(%esp) - %edi
  18 *	14(%esp) - %ebp
  19 *	18(%esp) - %eax
  20 *	1C(%esp) - %ds
  21 *	20(%esp) - %es
  22 *	24(%esp) - %fs
  23 *	28(%esp) - %gs		saved iff !CONFIG_X86_32_LAZY_GS
  24 *	2C(%esp) - orig_eax
  25 *	30(%esp) - %eip
  26 *	34(%esp) - %cs
  27 *	38(%esp) - %eflags
  28 *	3C(%esp) - %oldesp
  29 *	40(%esp) - %oldss
  30 */
  31
  32#include <linux/linkage.h>
  33#include <linux/err.h>
  34#include <asm/thread_info.h>
  35#include <asm/irqflags.h>
  36#include <asm/errno.h>
  37#include <asm/segment.h>
  38#include <asm/smp.h>
  39#include <asm/percpu.h>
  40#include <asm/processor-flags.h>
  41#include <asm/irq_vectors.h>
  42#include <asm/cpufeatures.h>
  43#include <asm/alternative-asm.h>
  44#include <asm/asm.h>
  45#include <asm/smap.h>
  46#include <asm/frame.h>
  47#include <asm/nospec-branch.h>
  48
  49#include "calling.h"
  50
  51	.section .entry.text, "ax"
  52
  53/*
  54 * We use macros for low-level operations which need to be overridden
  55 * for paravirtualization.  The following will never clobber any registers:
  56 *   INTERRUPT_RETURN (aka. "iret")
  57 *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
  58 *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
  59 *
  60 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
  61 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
  62 * Allowing a register to be clobbered can shrink the paravirt replacement
  63 * enough to patch inline, increasing performance.
  64 */
  65
  66#ifdef CONFIG_PREEMPTION
  67# define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
  68#else
  69# define preempt_stop(clobbers)
  70#endif
  71
  72.macro TRACE_IRQS_IRET
  73#ifdef CONFIG_TRACE_IRQFLAGS
  74	testl	$X86_EFLAGS_IF, PT_EFLAGS(%esp)     # interrupts off?
  75	jz	1f
  76	TRACE_IRQS_ON
  771:
  78#endif
  79.endm
  80
  81#define PTI_SWITCH_MASK         (1 << PAGE_SHIFT)
  82
  83/*
  84 * User gs save/restore
  85 *
  86 * %gs is used for userland TLS and kernel only uses it for stack
  87 * canary which is required to be at %gs:20 by gcc.  Read the comment
  88 * at the top of stackprotector.h for more info.
  89 *
  90 * Local labels 98 and 99 are used.
  91 */
  92#ifdef CONFIG_X86_32_LAZY_GS
  93
  94 /* unfortunately push/pop can't be no-op */
  95.macro PUSH_GS
  96	pushl	$0
  97.endm
  98.macro POP_GS pop=0
  99	addl	$(4 + \pop), %esp
 100.endm
 101.macro POP_GS_EX
 102.endm
 103
 104 /* all the rest are no-op */
 105.macro PTGS_TO_GS
 106.endm
 107.macro PTGS_TO_GS_EX
 108.endm
 109.macro GS_TO_REG reg
 110.endm
 111.macro REG_TO_PTGS reg
 112.endm
 113.macro SET_KERNEL_GS reg
 114.endm
 115
 116#else	/* CONFIG_X86_32_LAZY_GS */
 117
 118.macro PUSH_GS
 119	pushl	%gs
 120.endm
 121
 122.macro POP_GS pop=0
 12398:	popl	%gs
 124  .if \pop <> 0
 125	add	$\pop, %esp
 126  .endif
 127.endm
 128.macro POP_GS_EX
 129.pushsection .fixup, "ax"
 13099:	movl	$0, (%esp)
 131	jmp	98b
 132.popsection
 133	_ASM_EXTABLE(98b, 99b)
 134.endm
 135
 136.macro PTGS_TO_GS
 13798:	mov	PT_GS(%esp), %gs
 138.endm
 139.macro PTGS_TO_GS_EX
 140.pushsection .fixup, "ax"
 14199:	movl	$0, PT_GS(%esp)
 142	jmp	98b
 143.popsection
 144	_ASM_EXTABLE(98b, 99b)
 145.endm
 146
 147.macro GS_TO_REG reg
 148	movl	%gs, \reg
 149.endm
 150.macro REG_TO_PTGS reg
 151	movl	\reg, PT_GS(%esp)
 152.endm
 153.macro SET_KERNEL_GS reg
 154	movl	$(__KERNEL_STACK_CANARY), \reg
 155	movl	\reg, %gs
 156.endm
 157
 158#endif /* CONFIG_X86_32_LAZY_GS */
 159
 160/* Unconditionally switch to user cr3 */
 161.macro SWITCH_TO_USER_CR3 scratch_reg:req
 162	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
 163
 164	movl	%cr3, \scratch_reg
 165	orl	$PTI_SWITCH_MASK, \scratch_reg
 166	movl	\scratch_reg, %cr3
 167.Lend_\@:
 168.endm
 169
 170.macro BUG_IF_WRONG_CR3 no_user_check=0
 171#ifdef CONFIG_DEBUG_ENTRY
 172	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
 173	.if \no_user_check == 0
 174	/* coming from usermode? */
 175	testl	$SEGMENT_RPL_MASK, PT_CS(%esp)
 176	jz	.Lend_\@
 177	.endif
 178	/* On user-cr3? */
 179	movl	%cr3, %eax
 180	testl	$PTI_SWITCH_MASK, %eax
 181	jnz	.Lend_\@
 182	/* From userspace with kernel cr3 - BUG */
 183	ud2
 184.Lend_\@:
 185#endif
 186.endm
 187
 188/*
 189 * Switch to kernel cr3 if not already loaded and return current cr3 in
 190 * \scratch_reg
 191 */
 192.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
 193	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
 194	movl	%cr3, \scratch_reg
 195	/* Test if we are already on kernel CR3 */
 196	testl	$PTI_SWITCH_MASK, \scratch_reg
 197	jz	.Lend_\@
 198	andl	$(~PTI_SWITCH_MASK), \scratch_reg
 199	movl	\scratch_reg, %cr3
 200	/* Return original CR3 in \scratch_reg */
 201	orl	$PTI_SWITCH_MASK, \scratch_reg
 202.Lend_\@:
 203.endm
 204
 205#define CS_FROM_ENTRY_STACK	(1 << 31)
 206#define CS_FROM_USER_CR3	(1 << 30)
 207#define CS_FROM_KERNEL		(1 << 29)
 208
 209.macro FIXUP_FRAME
 210	/*
 211	 * The high bits of the CS dword (__csh) are used for CS_FROM_*.
 212	 * Clear them in case hardware didn't do this for us.
 213	 */
 214	andl	$0x0000ffff, 3*4(%esp)
 215
 216#ifdef CONFIG_VM86
 217	testl	$X86_EFLAGS_VM, 4*4(%esp)
 218	jnz	.Lfrom_usermode_no_fixup_\@
 219#endif
 220	testl	$SEGMENT_RPL_MASK, 3*4(%esp)
 221	jnz	.Lfrom_usermode_no_fixup_\@
 222
 223	orl	$CS_FROM_KERNEL, 3*4(%esp)
 224
 225	/*
 226	 * When we're here from kernel mode; the (exception) stack looks like:
 227	 *
 228	 *  5*4(%esp) - <previous context>
 229	 *  4*4(%esp) - flags
 230	 *  3*4(%esp) - cs
 231	 *  2*4(%esp) - ip
 232	 *  1*4(%esp) - orig_eax
 233	 *  0*4(%esp) - gs / function
 234	 *
 235	 * Lets build a 5 entry IRET frame after that, such that struct pt_regs
 236	 * is complete and in particular regs->sp is correct. This gives us
 237	 * the original 5 enties as gap:
 238	 *
 239	 * 12*4(%esp) - <previous context>
 240	 * 11*4(%esp) - gap / flags
 241	 * 10*4(%esp) - gap / cs
 242	 *  9*4(%esp) - gap / ip
 243	 *  8*4(%esp) - gap / orig_eax
 244	 *  7*4(%esp) - gap / gs / function
 245	 *  6*4(%esp) - ss
 246	 *  5*4(%esp) - sp
 247	 *  4*4(%esp) - flags
 248	 *  3*4(%esp) - cs
 249	 *  2*4(%esp) - ip
 250	 *  1*4(%esp) - orig_eax
 251	 *  0*4(%esp) - gs / function
 252	 */
 253
 254	pushl	%ss		# ss
 255	pushl	%esp		# sp (points at ss)
 256	addl	$6*4, (%esp)	# point sp back at the previous context
 257	pushl	6*4(%esp)	# flags
 258	pushl	6*4(%esp)	# cs
 259	pushl	6*4(%esp)	# ip
 260	pushl	6*4(%esp)	# orig_eax
 261	pushl	6*4(%esp)	# gs / function
 262.Lfrom_usermode_no_fixup_\@:
 263.endm
 264
 265.macro IRET_FRAME
 266	testl $CS_FROM_KERNEL, 1*4(%esp)
 267	jz .Lfinished_frame_\@
 268
 269	/*
 270	 * Reconstruct the 3 entry IRET frame right after the (modified)
 271	 * regs->sp without lowering %esp in between, such that an NMI in the
 272	 * middle doesn't scribble our stack.
 273	 */
 274	pushl	%eax
 275	pushl	%ecx
 276	movl	5*4(%esp), %eax		# (modified) regs->sp
 277
 278	movl	4*4(%esp), %ecx		# flags
 279	movl	%ecx, -4(%eax)
 280
 281	movl	3*4(%esp), %ecx		# cs
 282	andl	$0x0000ffff, %ecx
 283	movl	%ecx, -8(%eax)
 284
 285	movl	2*4(%esp), %ecx		# ip
 286	movl	%ecx, -12(%eax)
 287
 288	movl	1*4(%esp), %ecx		# eax
 289	movl	%ecx, -16(%eax)
 290
 291	popl	%ecx
 292	lea	-16(%eax), %esp
 293	popl	%eax
 294.Lfinished_frame_\@:
 295.endm
 296
 297.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0
 298	cld
 299.if \skip_gs == 0
 300	PUSH_GS
 301.endif
 302	FIXUP_FRAME
 303	pushl	%fs
 304	pushl	%es
 305	pushl	%ds
 306	pushl	\pt_regs_ax
 307	pushl	%ebp
 308	pushl	%edi
 309	pushl	%esi
 310	pushl	%edx
 311	pushl	%ecx
 312	pushl	%ebx
 313	movl	$(__USER_DS), %edx
 314	movl	%edx, %ds
 315	movl	%edx, %es
 316	movl	$(__KERNEL_PERCPU), %edx
 317	movl	%edx, %fs
 318.if \skip_gs == 0
 319	SET_KERNEL_GS %edx
 320.endif
 321	/* Switch to kernel stack if necessary */
 322.if \switch_stacks > 0
 323	SWITCH_TO_KERNEL_STACK
 324.endif
 325.endm
 326
 327.macro SAVE_ALL_NMI cr3_reg:req
 328	SAVE_ALL
 329
 330	BUG_IF_WRONG_CR3
 331
 332	/*
 333	 * Now switch the CR3 when PTI is enabled.
 334	 *
 335	 * We can enter with either user or kernel cr3, the code will
 336	 * store the old cr3 in \cr3_reg and switches to the kernel cr3
 337	 * if necessary.
 338	 */
 339	SWITCH_TO_KERNEL_CR3 scratch_reg=\cr3_reg
 340
 341.Lend_\@:
 342.endm
 343
 344.macro RESTORE_INT_REGS
 345	popl	%ebx
 346	popl	%ecx
 347	popl	%edx
 348	popl	%esi
 349	popl	%edi
 350	popl	%ebp
 351	popl	%eax
 352.endm
 353
 354.macro RESTORE_REGS pop=0
 355	RESTORE_INT_REGS
 3561:	popl	%ds
 3572:	popl	%es
 3583:	popl	%fs
 359	POP_GS \pop
 360.pushsection .fixup, "ax"
 3614:	movl	$0, (%esp)
 362	jmp	1b
 3635:	movl	$0, (%esp)
 364	jmp	2b
 3656:	movl	$0, (%esp)
 366	jmp	3b
 367.popsection
 368	_ASM_EXTABLE(1b, 4b)
 369	_ASM_EXTABLE(2b, 5b)
 370	_ASM_EXTABLE(3b, 6b)
 371	POP_GS_EX
 372.endm
 373
 374.macro RESTORE_ALL_NMI cr3_reg:req pop=0
 375	/*
 376	 * Now switch the CR3 when PTI is enabled.
 377	 *
 378	 * We enter with kernel cr3 and switch the cr3 to the value
 379	 * stored on \cr3_reg, which is either a user or a kernel cr3.
 380	 */
 381	ALTERNATIVE "jmp .Lswitched_\@", "", X86_FEATURE_PTI
 382
 383	testl	$PTI_SWITCH_MASK, \cr3_reg
 384	jz	.Lswitched_\@
 385
 386	/* User cr3 in \cr3_reg - write it to hardware cr3 */
 387	movl	\cr3_reg, %cr3
 388
 389.Lswitched_\@:
 390
 391	BUG_IF_WRONG_CR3
 392
 393	RESTORE_REGS pop=\pop
 394.endm
 395
 396.macro CHECK_AND_APPLY_ESPFIX
 397#ifdef CONFIG_X86_ESPFIX32
 398#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
 399
 400	ALTERNATIVE	"jmp .Lend_\@", "", X86_BUG_ESPFIX
 401
 402	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS, SS and CS
 403	/*
 404	 * Warning: PT_OLDSS(%esp) contains the wrong/random values if we
 405	 * are returning to the kernel.
 406	 * See comments in process.c:copy_thread() for details.
 407	 */
 408	movb	PT_OLDSS(%esp), %ah
 409	movb	PT_CS(%esp), %al
 410	andl	$(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
 411	cmpl	$((SEGMENT_LDT << 8) | USER_RPL), %eax
 412	jne	.Lend_\@	# returning to user-space with LDT SS
 413
 414	/*
 415	 * Setup and switch to ESPFIX stack
 416	 *
 417	 * We're returning to userspace with a 16 bit stack. The CPU will not
 418	 * restore the high word of ESP for us on executing iret... This is an
 419	 * "official" bug of all the x86-compatible CPUs, which we can work
 420	 * around to make dosemu and wine happy. We do this by preloading the
 421	 * high word of ESP with the high word of the userspace ESP while
 422	 * compensating for the offset by changing to the ESPFIX segment with
 423	 * a base address that matches for the difference.
 424	 */
 425	mov	%esp, %edx			/* load kernel esp */
 426	mov	PT_OLDESP(%esp), %eax		/* load userspace esp */
 427	mov	%dx, %ax			/* eax: new kernel esp */
 428	sub	%eax, %edx			/* offset (low word is 0) */
 429	shr	$16, %edx
 430	mov	%dl, GDT_ESPFIX_SS + 4		/* bits 16..23 */
 431	mov	%dh, GDT_ESPFIX_SS + 7		/* bits 24..31 */
 432	pushl	$__ESPFIX_SS
 433	pushl	%eax				/* new kernel esp */
 434	/*
 435	 * Disable interrupts, but do not irqtrace this section: we
 436	 * will soon execute iret and the tracer was already set to
 437	 * the irqstate after the IRET:
 438	 */
 439	DISABLE_INTERRUPTS(CLBR_ANY)
 440	lss	(%esp), %esp			/* switch to espfix segment */
 441.Lend_\@:
 442#endif /* CONFIG_X86_ESPFIX32 */
 443.endm
 444
 445/*
 446 * Called with pt_regs fully populated and kernel segments loaded,
 447 * so we can access PER_CPU and use the integer registers.
 448 *
 449 * We need to be very careful here with the %esp switch, because an NMI
 450 * can happen everywhere. If the NMI handler finds itself on the
 451 * entry-stack, it will overwrite the task-stack and everything we
 452 * copied there. So allocate the stack-frame on the task-stack and
 453 * switch to it before we do any copying.
 454 */
 455
 456.macro SWITCH_TO_KERNEL_STACK
 457
 458	ALTERNATIVE     "", "jmp .Lend_\@", X86_FEATURE_XENPV
 459
 460	BUG_IF_WRONG_CR3
 461
 462	SWITCH_TO_KERNEL_CR3 scratch_reg=%eax
 463
 464	/*
 465	 * %eax now contains the entry cr3 and we carry it forward in
 466	 * that register for the time this macro runs
 467	 */
 468
 469	/* Are we on the entry stack? Bail out if not! */
 470	movl	PER_CPU_VAR(cpu_entry_area), %ecx
 471	addl	$CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
 472	subl	%esp, %ecx	/* ecx = (end of entry_stack) - esp */
 473	cmpl	$SIZEOF_entry_stack, %ecx
 474	jae	.Lend_\@
 475
 476	/* Load stack pointer into %esi and %edi */
 477	movl	%esp, %esi
 478	movl	%esi, %edi
 479
 480	/* Move %edi to the top of the entry stack */
 481	andl	$(MASK_entry_stack), %edi
 482	addl	$(SIZEOF_entry_stack), %edi
 483
 484	/* Load top of task-stack into %edi */
 485	movl	TSS_entry2task_stack(%edi), %edi
 486
 487	/* Special case - entry from kernel mode via entry stack */
 488#ifdef CONFIG_VM86
 489	movl	PT_EFLAGS(%esp), %ecx		# mix EFLAGS and CS
 490	movb	PT_CS(%esp), %cl
 491	andl	$(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %ecx
 492#else
 493	movl	PT_CS(%esp), %ecx
 494	andl	$SEGMENT_RPL_MASK, %ecx
 495#endif
 496	cmpl	$USER_RPL, %ecx
 497	jb	.Lentry_from_kernel_\@
 498
 499	/* Bytes to copy */
 500	movl	$PTREGS_SIZE, %ecx
 501
 502#ifdef CONFIG_VM86
 503	testl	$X86_EFLAGS_VM, PT_EFLAGS(%esi)
 504	jz	.Lcopy_pt_regs_\@
 505
 506	/*
 507	 * Stack-frame contains 4 additional segment registers when
 508	 * coming from VM86 mode
 509	 */
 510	addl	$(4 * 4), %ecx
 511
 512#endif
 513.Lcopy_pt_regs_\@:
 514
 515	/* Allocate frame on task-stack */
 516	subl	%ecx, %edi
 517
 518	/* Switch to task-stack */
 519	movl	%edi, %esp
 520
 521	/*
 522	 * We are now on the task-stack and can safely copy over the
 523	 * stack-frame
 524	 */
 525	shrl	$2, %ecx
 526	cld
 527	rep movsl
 528
 529	jmp .Lend_\@
 530
 531.Lentry_from_kernel_\@:
 532
 533	/*
 534	 * This handles the case when we enter the kernel from
 535	 * kernel-mode and %esp points to the entry-stack. When this
 536	 * happens we need to switch to the task-stack to run C code,
 537	 * but switch back to the entry-stack again when we approach
 538	 * iret and return to the interrupted code-path. This usually
 539	 * happens when we hit an exception while restoring user-space
 540	 * segment registers on the way back to user-space or when the
 541	 * sysenter handler runs with eflags.tf set.
 542	 *
 543	 * When we switch to the task-stack here, we can't trust the
 544	 * contents of the entry-stack anymore, as the exception handler
 545	 * might be scheduled out or moved to another CPU. Therefore we
 546	 * copy the complete entry-stack to the task-stack and set a
 547	 * marker in the iret-frame (bit 31 of the CS dword) to detect
 548	 * what we've done on the iret path.
 549	 *
 550	 * On the iret path we copy everything back and switch to the
 551	 * entry-stack, so that the interrupted kernel code-path
 552	 * continues on the same stack it was interrupted with.
 553	 *
 554	 * Be aware that an NMI can happen anytime in this code.
 555	 *
 556	 * %esi: Entry-Stack pointer (same as %esp)
 557	 * %edi: Top of the task stack
 558	 * %eax: CR3 on kernel entry
 559	 */
 560
 561	/* Calculate number of bytes on the entry stack in %ecx */
 562	movl	%esi, %ecx
 563
 564	/* %ecx to the top of entry-stack */
 565	andl	$(MASK_entry_stack), %ecx
 566	addl	$(SIZEOF_entry_stack), %ecx
 567
 568	/* Number of bytes on the entry stack to %ecx */
 569	sub	%esi, %ecx
 570
 571	/* Mark stackframe as coming from entry stack */
 572	orl	$CS_FROM_ENTRY_STACK, PT_CS(%esp)
 573
 574	/*
 575	 * Test the cr3 used to enter the kernel and add a marker
 576	 * so that we can switch back to it before iret.
 577	 */
 578	testl	$PTI_SWITCH_MASK, %eax
 579	jz	.Lcopy_pt_regs_\@
 580	orl	$CS_FROM_USER_CR3, PT_CS(%esp)
 581
 582	/*
 583	 * %esi and %edi are unchanged, %ecx contains the number of
 584	 * bytes to copy. The code at .Lcopy_pt_regs_\@ will allocate
 585	 * the stack-frame on task-stack and copy everything over
 586	 */
 587	jmp .Lcopy_pt_regs_\@
 588
 589.Lend_\@:
 590.endm
 591
 592/*
 593 * Switch back from the kernel stack to the entry stack.
 594 *
 595 * The %esp register must point to pt_regs on the task stack. It will
 596 * first calculate the size of the stack-frame to copy, depending on
 597 * whether we return to VM86 mode or not. With that it uses 'rep movsl'
 598 * to copy the contents of the stack over to the entry stack.
 599 *
 600 * We must be very careful here, as we can't trust the contents of the
 601 * task-stack once we switched to the entry-stack. When an NMI happens
 602 * while on the entry-stack, the NMI handler will switch back to the top
 603 * of the task stack, overwriting our stack-frame we are about to copy.
 604 * Therefore we switch the stack only after everything is copied over.
 605 */
 606.macro SWITCH_TO_ENTRY_STACK
 607
 608	ALTERNATIVE     "", "jmp .Lend_\@", X86_FEATURE_XENPV
 609
 610	/* Bytes to copy */
 611	movl	$PTREGS_SIZE, %ecx
 612
 613#ifdef CONFIG_VM86
 614	testl	$(X86_EFLAGS_VM), PT_EFLAGS(%esp)
 615	jz	.Lcopy_pt_regs_\@
 616
 617	/* Additional 4 registers to copy when returning to VM86 mode */
 618	addl    $(4 * 4), %ecx
 619
 620.Lcopy_pt_regs_\@:
 621#endif
 622
 623	/* Initialize source and destination for movsl */
 624	movl	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %edi
 625	subl	%ecx, %edi
 626	movl	%esp, %esi
 627
 628	/* Save future stack pointer in %ebx */
 629	movl	%edi, %ebx
 630
 631	/* Copy over the stack-frame */
 632	shrl	$2, %ecx
 633	cld
 634	rep movsl
 635
 636	/*
 637	 * Switch to entry-stack - needs to happen after everything is
 638	 * copied because the NMI handler will overwrite the task-stack
 639	 * when on entry-stack
 640	 */
 641	movl	%ebx, %esp
 642
 643.Lend_\@:
 644.endm
 645
 646/*
 647 * This macro handles the case when we return to kernel-mode on the iret
 648 * path and have to switch back to the entry stack and/or user-cr3
 649 *
 650 * See the comments below the .Lentry_from_kernel_\@ label in the
 651 * SWITCH_TO_KERNEL_STACK macro for more details.
 652 */
 653.macro PARANOID_EXIT_TO_KERNEL_MODE
 654
 655	/*
 656	 * Test if we entered the kernel with the entry-stack. Most
 657	 * likely we did not, because this code only runs on the
 658	 * return-to-kernel path.
 659	 */
 660	testl	$CS_FROM_ENTRY_STACK, PT_CS(%esp)
 661	jz	.Lend_\@
 662
 663	/* Unlikely slow-path */
 664
 665	/* Clear marker from stack-frame */
 666	andl	$(~CS_FROM_ENTRY_STACK), PT_CS(%esp)
 667
 668	/* Copy the remaining task-stack contents to entry-stack */
 669	movl	%esp, %esi
 670	movl	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %edi
 671
 672	/* Bytes on the task-stack to ecx */
 673	movl	PER_CPU_VAR(cpu_tss_rw + TSS_sp1), %ecx
 674	subl	%esi, %ecx
 675
 676	/* Allocate stack-frame on entry-stack */
 677	subl	%ecx, %edi
 678
 679	/*
 680	 * Save future stack-pointer, we must not switch until the
 681	 * copy is done, otherwise the NMI handler could destroy the
 682	 * contents of the task-stack we are about to copy.
 683	 */
 684	movl	%edi, %ebx
 685
 686	/* Do the copy */
 687	shrl	$2, %ecx
 688	cld
 689	rep movsl
 690
 691	/* Safe to switch to entry-stack now */
 692	movl	%ebx, %esp
 693
 694	/*
 695	 * We came from entry-stack and need to check if we also need to
 696	 * switch back to user cr3.
 697	 */
 698	testl	$CS_FROM_USER_CR3, PT_CS(%esp)
 699	jz	.Lend_\@
 700
 701	/* Clear marker from stack-frame */
 702	andl	$(~CS_FROM_USER_CR3), PT_CS(%esp)
 703
 704	SWITCH_TO_USER_CR3 scratch_reg=%eax
 705
 706.Lend_\@:
 707.endm
 708/*
 709 * %eax: prev task
 710 * %edx: next task
 711 */
 712ENTRY(__switch_to_asm)
 713	/*
 714	 * Save callee-saved registers
 715	 * This must match the order in struct inactive_task_frame
 716	 */
 717	pushl	%ebp
 718	pushl	%ebx
 719	pushl	%edi
 720	pushl	%esi
 721	pushfl
 722
 723	/* switch stack */
 724	movl	%esp, TASK_threadsp(%eax)
 725	movl	TASK_threadsp(%edx), %esp
 726
 727#ifdef CONFIG_STACKPROTECTOR
 728	movl	TASK_stack_canary(%edx), %ebx
 729	movl	%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
 730#endif
 731
 732#ifdef CONFIG_RETPOLINE
 733	/*
 734	 * When switching from a shallower to a deeper call stack
 735	 * the RSB may either underflow or use entries populated
 736	 * with userspace addresses. On CPUs where those concerns
 737	 * exist, overwrite the RSB with entries which capture
 738	 * speculative execution to prevent attack.
 739	 */
 740	FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
 741#endif
 742
 743	/* restore callee-saved registers */
 744	popfl
 745	popl	%esi
 746	popl	%edi
 747	popl	%ebx
 748	popl	%ebp
 749
 750	jmp	__switch_to
 751END(__switch_to_asm)
 752
 753/*
 754 * The unwinder expects the last frame on the stack to always be at the same
 755 * offset from the end of the page, which allows it to validate the stack.
 756 * Calling schedule_tail() directly would break that convention because its an
 757 * asmlinkage function so its argument has to be pushed on the stack.  This
 758 * wrapper creates a proper "end of stack" frame header before the call.
 759 */
 760ENTRY(schedule_tail_wrapper)
 761	FRAME_BEGIN
 762
 763	pushl	%eax
 764	call	schedule_tail
 765	popl	%eax
 766
 767	FRAME_END
 768	ret
 769ENDPROC(schedule_tail_wrapper)
 770/*
 771 * A newly forked process directly context switches into this address.
 772 *
 773 * eax: prev task we switched from
 774 * ebx: kernel thread func (NULL for user thread)
 775 * edi: kernel thread arg
 776 */
 777ENTRY(ret_from_fork)
 778	call	schedule_tail_wrapper
 779
 780	testl	%ebx, %ebx
 781	jnz	1f		/* kernel threads are uncommon */
 782
 7832:
 784	/* When we fork, we trace the syscall return in the child, too. */
 785	movl    %esp, %eax
 786	call    syscall_return_slowpath
 787	STACKLEAK_ERASE
 788	jmp     restore_all
 789
 790	/* kernel thread */
 7911:	movl	%edi, %eax
 792	CALL_NOSPEC %ebx
 793	/*
 794	 * A kernel thread is allowed to return here after successfully
 795	 * calling do_execve().  Exit to userspace to complete the execve()
 796	 * syscall.
 797	 */
 798	movl	$0, PT_EAX(%esp)
 799	jmp	2b
 800END(ret_from_fork)
 801
 802/*
 803 * Return to user mode is not as complex as all this looks,
 804 * but we want the default path for a system call return to
 805 * go as quickly as possible which is why some of this is
 806 * less clear than it otherwise should be.
 807 */
 808
 809	# userspace resumption stub bypassing syscall exit tracing
 810	ALIGN
 811ret_from_exception:
 812	preempt_stop(CLBR_ANY)
 813ret_from_intr:
 814#ifdef CONFIG_VM86
 815	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS and CS
 816	movb	PT_CS(%esp), %al
 817	andl	$(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
 818#else
 819	/*
 820	 * We can be coming here from child spawned by kernel_thread().
 821	 */
 822	movl	PT_CS(%esp), %eax
 823	andl	$SEGMENT_RPL_MASK, %eax
 824#endif
 825	cmpl	$USER_RPL, %eax
 826	jb	restore_all_kernel		# not returning to v8086 or userspace
 827
 828ENTRY(resume_userspace)
 829	DISABLE_INTERRUPTS(CLBR_ANY)
 830	TRACE_IRQS_OFF
 831	movl	%esp, %eax
 832	call	prepare_exit_to_usermode
 833	jmp	restore_all
 834END(ret_from_exception)
 835
 836GLOBAL(__begin_SYSENTER_singlestep_region)
 837/*
 838 * All code from here through __end_SYSENTER_singlestep_region is subject
 839 * to being single-stepped if a user program sets TF and executes SYSENTER.
 840 * There is absolutely nothing that we can do to prevent this from happening
 841 * (thanks Intel!).  To keep our handling of this situation as simple as
 842 * possible, we handle TF just like AC and NT, except that our #DB handler
 843 * will ignore all of the single-step traps generated in this range.
 844 */
 845
 846#ifdef CONFIG_XEN_PV
 847/*
 848 * Xen doesn't set %esp to be precisely what the normal SYSENTER
 849 * entry point expects, so fix it up before using the normal path.
 850 */
 851ENTRY(xen_sysenter_target)
 852	addl	$5*4, %esp			/* remove xen-provided frame */
 853	jmp	.Lsysenter_past_esp
 854#endif
 855
 856/*
 857 * 32-bit SYSENTER entry.
 858 *
 859 * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
 860 * if X86_FEATURE_SEP is available.  This is the preferred system call
 861 * entry on 32-bit systems.
 862 *
 863 * The SYSENTER instruction, in principle, should *only* occur in the
 864 * vDSO.  In practice, a small number of Android devices were shipped
 865 * with a copy of Bionic that inlined a SYSENTER instruction.  This
 866 * never happened in any of Google's Bionic versions -- it only happened
 867 * in a narrow range of Intel-provided versions.
 868 *
 869 * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs.
 870 * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
 871 * SYSENTER does not save anything on the stack,
 872 * and does not save old EIP (!!!), ESP, or EFLAGS.
 873 *
 874 * To avoid losing track of EFLAGS.VM (and thus potentially corrupting
 875 * user and/or vm86 state), we explicitly disable the SYSENTER
 876 * instruction in vm86 mode by reprogramming the MSRs.
 877 *
 878 * Arguments:
 879 * eax  system call number
 880 * ebx  arg1
 881 * ecx  arg2
 882 * edx  arg3
 883 * esi  arg4
 884 * edi  arg5
 885 * ebp  user stack
 886 * 0(%ebp) arg6
 887 */
 888ENTRY(entry_SYSENTER_32)
 889	/*
 890	 * On entry-stack with all userspace-regs live - save and
 891	 * restore eflags and %eax to use it as scratch-reg for the cr3
 892	 * switch.
 893	 */
 894	pushfl
 895	pushl	%eax
 896	BUG_IF_WRONG_CR3 no_user_check=1
 897	SWITCH_TO_KERNEL_CR3 scratch_reg=%eax
 898	popl	%eax
 899	popfl
 900
 901	/* Stack empty again, switch to task stack */
 902	movl	TSS_entry2task_stack(%esp), %esp
 903
 904.Lsysenter_past_esp:
 905	pushl	$__USER_DS		/* pt_regs->ss */
 906	pushl	%ebp			/* pt_regs->sp (stashed in bp) */
 907	pushfl				/* pt_regs->flags (except IF = 0) */
 908	orl	$X86_EFLAGS_IF, (%esp)	/* Fix IF */
 909	pushl	$__USER_CS		/* pt_regs->cs */
 910	pushl	$0			/* pt_regs->ip = 0 (placeholder) */
 911	pushl	%eax			/* pt_regs->orig_ax */
 912	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest, stack already switched */
 913
 914	/*
 915	 * SYSENTER doesn't filter flags, so we need to clear NT, AC
 916	 * and TF ourselves.  To save a few cycles, we can check whether
 917	 * either was set instead of doing an unconditional popfq.
 918	 * This needs to happen before enabling interrupts so that
 919	 * we don't get preempted with NT set.
 920	 *
 921	 * If TF is set, we will single-step all the way to here -- do_debug
 922	 * will ignore all the traps.  (Yes, this is slow, but so is
 923	 * single-stepping in general.  This allows us to avoid having
 924	 * a more complicated code to handle the case where a user program
 925	 * forces us to single-step through the SYSENTER entry code.)
 926	 *
 927	 * NB.: .Lsysenter_fix_flags is a label with the code under it moved
 928	 * out-of-line as an optimization: NT is unlikely to be set in the
 929	 * majority of the cases and instead of polluting the I$ unnecessarily,
 930	 * we're keeping that code behind a branch which will predict as
 931	 * not-taken and therefore its instructions won't be fetched.
 932	 */
 933	testl	$X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp)
 934	jnz	.Lsysenter_fix_flags
 935.Lsysenter_flags_fixed:
 936
 937	/*
 938	 * User mode is traced as though IRQs are on, and SYSENTER
 939	 * turned them off.
 940	 */
 941	TRACE_IRQS_OFF
 942
 943	movl	%esp, %eax
 944	call	do_fast_syscall_32
 945	/* XEN PV guests always use IRET path */
 946	ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
 947		    "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
 948
 949	STACKLEAK_ERASE
 950
 951/* Opportunistic SYSEXIT */
 952	TRACE_IRQS_ON			/* User mode traces as IRQs on. */
 953
 954	/*
 955	 * Setup entry stack - we keep the pointer in %eax and do the
 956	 * switch after almost all user-state is restored.
 957	 */
 958
 959	/* Load entry stack pointer and allocate frame for eflags/eax */
 960	movl	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %eax
 961	subl	$(2*4), %eax
 962
 963	/* Copy eflags and eax to entry stack */
 964	movl	PT_EFLAGS(%esp), %edi
 965	movl	PT_EAX(%esp), %esi
 966	movl	%edi, (%eax)
 967	movl	%esi, 4(%eax)
 968
 969	/* Restore user registers and segments */
 970	movl	PT_EIP(%esp), %edx	/* pt_regs->ip */
 971	movl	PT_OLDESP(%esp), %ecx	/* pt_regs->sp */
 9721:	mov	PT_FS(%esp), %fs
 973	PTGS_TO_GS
 974
 975	popl	%ebx			/* pt_regs->bx */
 976	addl	$2*4, %esp		/* skip pt_regs->cx and pt_regs->dx */
 977	popl	%esi			/* pt_regs->si */
 978	popl	%edi			/* pt_regs->di */
 979	popl	%ebp			/* pt_regs->bp */
 980
 981	/* Switch to entry stack */
 982	movl	%eax, %esp
 983
 984	/* Now ready to switch the cr3 */
 985	SWITCH_TO_USER_CR3 scratch_reg=%eax
 986
 987	/*
 988	 * Restore all flags except IF. (We restore IF separately because
 989	 * STI gives a one-instruction window in which we won't be interrupted,
 990	 * whereas POPF does not.)
 991	 */
 992	btrl	$X86_EFLAGS_IF_BIT, (%esp)
 993	BUG_IF_WRONG_CR3 no_user_check=1
 994	popfl
 995	popl	%eax
 996
 997	/*
 998	 * Return back to the vDSO, which will pop ecx and edx.
 999	 * Don't bother with DS and ES (they already contain __USER_DS).
1000	 */
1001	sti
1002	sysexit
1003
1004.pushsection .fixup, "ax"
10052:	movl	$0, PT_FS(%esp)
1006	jmp	1b
1007.popsection
1008	_ASM_EXTABLE(1b, 2b)
1009	PTGS_TO_GS_EX
1010
1011.Lsysenter_fix_flags:
1012	pushl	$X86_EFLAGS_FIXED
1013	popfl
1014	jmp	.Lsysenter_flags_fixed
1015GLOBAL(__end_SYSENTER_singlestep_region)
1016ENDPROC(entry_SYSENTER_32)
1017
1018/*
1019 * 32-bit legacy system call entry.
1020 *
1021 * 32-bit x86 Linux system calls traditionally used the INT $0x80
1022 * instruction.  INT $0x80 lands here.
1023 *
1024 * This entry point can be used by any 32-bit perform system calls.
1025 * Instances of INT $0x80 can be found inline in various programs and
1026 * libraries.  It is also used by the vDSO's __kernel_vsyscall
1027 * fallback for hardware that doesn't support a faster entry method.
1028 * Restarted 32-bit system calls also fall back to INT $0x80
1029 * regardless of what instruction was originally used to do the system
1030 * call.  (64-bit programs can use INT $0x80 as well, but they can
1031 * only run on 64-bit kernels and therefore land in
1032 * entry_INT80_compat.)
1033 *
1034 * This is considered a slow path.  It is not used by most libc
1035 * implementations on modern hardware except during process startup.
1036 *
1037 * Arguments:
1038 * eax  system call number
1039 * ebx  arg1
1040 * ecx  arg2
1041 * edx  arg3
1042 * esi  arg4
1043 * edi  arg5
1044 * ebp  arg6
1045 */
1046ENTRY(entry_INT80_32)
1047	ASM_CLAC
1048	pushl	%eax			/* pt_regs->orig_ax */
1049
1050	SAVE_ALL pt_regs_ax=$-ENOSYS switch_stacks=1	/* save rest */
1051
1052	/*
1053	 * User mode is traced as though IRQs are on, and the interrupt gate
1054	 * turned them off.
1055	 */
1056	TRACE_IRQS_OFF
1057
1058	movl	%esp, %eax
1059	call	do_int80_syscall_32
1060.Lsyscall_32_done:
1061
1062	STACKLEAK_ERASE
1063
1064restore_all:
1065	TRACE_IRQS_IRET
1066	SWITCH_TO_ENTRY_STACK
1067.Lrestore_all_notrace:
1068	CHECK_AND_APPLY_ESPFIX
1069.Lrestore_nocheck:
1070	/* Switch back to user CR3 */
1071	SWITCH_TO_USER_CR3 scratch_reg=%eax
1072
1073	BUG_IF_WRONG_CR3
1074
1075	/* Restore user state */
1076	RESTORE_REGS pop=4			# skip orig_eax/error_code
1077.Lirq_return:
1078	IRET_FRAME
1079	/*
1080	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
1081	 * when returning from IPI handler and when returning from
1082	 * scheduler to user-space.
1083	 */
1084	INTERRUPT_RETURN
1085
1086restore_all_kernel:
1087#ifdef CONFIG_PREEMPTION
1088	DISABLE_INTERRUPTS(CLBR_ANY)
1089	cmpl	$0, PER_CPU_VAR(__preempt_count)
1090	jnz	.Lno_preempt
1091	testl	$X86_EFLAGS_IF, PT_EFLAGS(%esp)	# interrupts off (exception path) ?
1092	jz	.Lno_preempt
1093	call	preempt_schedule_irq
1094.Lno_preempt:
1095#endif
1096	TRACE_IRQS_IRET
1097	PARANOID_EXIT_TO_KERNEL_MODE
1098	BUG_IF_WRONG_CR3
1099	RESTORE_REGS 4
1100	jmp	.Lirq_return
1101
1102.section .fixup, "ax"
1103ENTRY(iret_exc	)
1104	pushl	$0				# no error code
1105	pushl	$do_iret_error
1106
1107#ifdef CONFIG_DEBUG_ENTRY
1108	/*
1109	 * The stack-frame here is the one that iret faulted on, so its a
1110	 * return-to-user frame. We are on kernel-cr3 because we come here from
1111	 * the fixup code. This confuses the CR3 checker, so switch to user-cr3
1112	 * as the checker expects it.
1113	 */
1114	pushl	%eax
1115	SWITCH_TO_USER_CR3 scratch_reg=%eax
1116	popl	%eax
1117#endif
1118
1119	jmp	common_exception
1120.previous
1121	_ASM_EXTABLE(.Lirq_return, iret_exc)
1122ENDPROC(entry_INT80_32)
1123
1124.macro FIXUP_ESPFIX_STACK
1125/*
1126 * Switch back for ESPFIX stack to the normal zerobased stack
1127 *
1128 * We can't call C functions using the ESPFIX stack. This code reads
1129 * the high word of the segment base from the GDT and swiches to the
1130 * normal stack and adjusts ESP with the matching offset.
1131 */
1132#ifdef CONFIG_X86_ESPFIX32
1133	/* fixup the stack */
1134	mov	GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
1135	mov	GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
1136	shl	$16, %eax
1137	addl	%esp, %eax			/* the adjusted stack pointer */
1138	pushl	$__KERNEL_DS
1139	pushl	%eax
1140	lss	(%esp), %esp			/* switch to the normal stack segment */
1141#endif
1142.endm
1143.macro UNWIND_ESPFIX_STACK
1144#ifdef CONFIG_X86_ESPFIX32
1145	movl	%ss, %eax
1146	/* see if on espfix stack */
1147	cmpw	$__ESPFIX_SS, %ax
1148	jne	27f
1149	movl	$__KERNEL_DS, %eax
1150	movl	%eax, %ds
1151	movl	%eax, %es
1152	/* switch to normal stack */
1153	FIXUP_ESPFIX_STACK
115427:
1155#endif
1156.endm
1157
1158/*
1159 * Build the entry stubs with some assembler magic.
1160 * We pack 1 stub into every 8-byte block.
1161 */
1162	.align 8
1163ENTRY(irq_entries_start)
1164    vector=FIRST_EXTERNAL_VECTOR
1165    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
1166	pushl	$(~vector+0x80)			/* Note: always in signed byte range */
1167    vector=vector+1
1168	jmp	common_interrupt
1169	.align	8
1170    .endr
1171END(irq_entries_start)
1172
1173#ifdef CONFIG_X86_LOCAL_APIC
1174	.align 8
1175ENTRY(spurious_entries_start)
1176    vector=FIRST_SYSTEM_VECTOR
1177    .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
1178	pushl	$(~vector+0x80)			/* Note: always in signed byte range */
1179    vector=vector+1
1180	jmp	common_spurious
1181	.align	8
1182    .endr
1183END(spurious_entries_start)
1184
1185common_spurious:
1186	ASM_CLAC
1187	addl	$-0x80, (%esp)			/* Adjust vector into the [-256, -1] range */
1188	SAVE_ALL switch_stacks=1
1189	ENCODE_FRAME_POINTER
1190	TRACE_IRQS_OFF
1191	movl	%esp, %eax
1192	call	smp_spurious_interrupt
1193	jmp	ret_from_intr
1194ENDPROC(common_spurious)
1195#endif
1196
1197/*
1198 * the CPU automatically disables interrupts when executing an IRQ vector,
1199 * so IRQ-flags tracing has to follow that:
1200 */
1201	.p2align CONFIG_X86_L1_CACHE_SHIFT
1202common_interrupt:
1203	ASM_CLAC
1204	addl	$-0x80, (%esp)			/* Adjust vector into the [-256, -1] range */
1205
1206	SAVE_ALL switch_stacks=1
1207	ENCODE_FRAME_POINTER
1208	TRACE_IRQS_OFF
1209	movl	%esp, %eax
1210	call	do_IRQ
1211	jmp	ret_from_intr
1212ENDPROC(common_interrupt)
1213
1214#define BUILD_INTERRUPT3(name, nr, fn)			\
1215ENTRY(name)						\
1216	ASM_CLAC;					\
1217	pushl	$~(nr);					\
1218	SAVE_ALL switch_stacks=1;			\
1219	ENCODE_FRAME_POINTER;				\
1220	TRACE_IRQS_OFF					\
1221	movl	%esp, %eax;				\
1222	call	fn;					\
1223	jmp	ret_from_intr;				\
1224ENDPROC(name)
1225
1226#define BUILD_INTERRUPT(name, nr)		\
1227	BUILD_INTERRUPT3(name, nr, smp_##name);	\
1228
1229/* The include is where all of the SMP etc. interrupts come from */
1230#include <asm/entry_arch.h>
1231
1232ENTRY(coprocessor_error)
1233	ASM_CLAC
1234	pushl	$0
1235	pushl	$do_coprocessor_error
1236	jmp	common_exception
1237END(coprocessor_error)
1238
1239ENTRY(simd_coprocessor_error)
1240	ASM_CLAC
1241	pushl	$0
1242#ifdef CONFIG_X86_INVD_BUG
1243	/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
1244	ALTERNATIVE "pushl	$do_general_protection",	\
1245		    "pushl	$do_simd_coprocessor_error",	\
1246		    X86_FEATURE_XMM
1247#else
1248	pushl	$do_simd_coprocessor_error
1249#endif
1250	jmp	common_exception
1251END(simd_coprocessor_error)
1252
1253ENTRY(device_not_available)
1254	ASM_CLAC
1255	pushl	$-1				# mark this as an int
1256	pushl	$do_device_not_available
1257	jmp	common_exception
1258END(device_not_available)
1259
1260#ifdef CONFIG_PARAVIRT
1261ENTRY(native_iret)
1262	iret
1263	_ASM_EXTABLE(native_iret, iret_exc)
1264END(native_iret)
1265#endif
1266
1267ENTRY(overflow)
1268	ASM_CLAC
1269	pushl	$0
1270	pushl	$do_overflow
1271	jmp	common_exception
1272END(overflow)
1273
1274ENTRY(bounds)
1275	ASM_CLAC
1276	pushl	$0
1277	pushl	$do_bounds
1278	jmp	common_exception
1279END(bounds)
1280
1281ENTRY(invalid_op)
1282	ASM_CLAC
1283	pushl	$0
1284	pushl	$do_invalid_op
1285	jmp	common_exception
1286END(invalid_op)
1287
1288ENTRY(coprocessor_segment_overrun)
1289	ASM_CLAC
1290	pushl	$0
1291	pushl	$do_coprocessor_segment_overrun
1292	jmp	common_exception
1293END(coprocessor_segment_overrun)
1294
1295ENTRY(invalid_TSS)
1296	ASM_CLAC
1297	pushl	$do_invalid_TSS
1298	jmp	common_exception
1299END(invalid_TSS)
1300
1301ENTRY(segment_not_present)
1302	ASM_CLAC
1303	pushl	$do_segment_not_present
1304	jmp	common_exception
1305END(segment_not_present)
1306
1307ENTRY(stack_segment)
1308	ASM_CLAC
1309	pushl	$do_stack_segment
1310	jmp	common_exception
1311END(stack_segment)
1312
1313ENTRY(alignment_check)
1314	ASM_CLAC
1315	pushl	$do_alignment_check
1316	jmp	common_exception
1317END(alignment_check)
1318
1319ENTRY(divide_error)
1320	ASM_CLAC
1321	pushl	$0				# no error code
1322	pushl	$do_divide_error
1323	jmp	common_exception
1324END(divide_error)
1325
1326#ifdef CONFIG_X86_MCE
1327ENTRY(machine_check)
1328	ASM_CLAC
1329	pushl	$0
1330	pushl	machine_check_vector
1331	jmp	common_exception
1332END(machine_check)
1333#endif
1334
1335ENTRY(spurious_interrupt_bug)
1336	ASM_CLAC
1337	pushl	$0
1338	pushl	$do_spurious_interrupt_bug
1339	jmp	common_exception
1340END(spurious_interrupt_bug)
1341
1342#ifdef CONFIG_XEN_PV
1343ENTRY(xen_hypervisor_callback)
1344	pushl	$-1				/* orig_ax = -1 => not a system call */
1345	SAVE_ALL
1346	ENCODE_FRAME_POINTER
1347	TRACE_IRQS_OFF
1348
1349	/*
1350	 * Check to see if we got the event in the critical
1351	 * region in xen_iret_direct, after we've reenabled
1352	 * events and checked for pending events.  This simulates
1353	 * iret instruction's behaviour where it delivers a
1354	 * pending interrupt when enabling interrupts:
1355	 */
1356	movl	PT_EIP(%esp), %eax
1357	cmpl	$xen_iret_start_crit, %eax
1358	jb	1f
1359	cmpl	$xen_iret_end_crit, %eax
1360	jae	1f
1361
1362	jmp	xen_iret_crit_fixup
1363
1364ENTRY(xen_do_upcall)
13651:	mov	%esp, %eax
1366	call	xen_evtchn_do_upcall
1367#ifndef CONFIG_PREEMPTION
1368	call	xen_maybe_preempt_hcall
1369#endif
1370	jmp	ret_from_intr
1371ENDPROC(xen_hypervisor_callback)
1372
1373/*
1374 * Hypervisor uses this for application faults while it executes.
1375 * We get here for two reasons:
1376 *  1. Fault while reloading DS, ES, FS or GS
1377 *  2. Fault while executing IRET
1378 * Category 1 we fix up by reattempting the load, and zeroing the segment
1379 * register if the load fails.
1380 * Category 2 we fix up by jumping to do_iret_error. We cannot use the
1381 * normal Linux return path in this case because if we use the IRET hypercall
1382 * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1383 * We distinguish between categories by maintaining a status value in EAX.
1384 */
1385ENTRY(xen_failsafe_callback)
1386	pushl	%eax
1387	movl	$1, %eax
13881:	mov	4(%esp), %ds
13892:	mov	8(%esp), %es
13903:	mov	12(%esp), %fs
13914:	mov	16(%esp), %gs
1392	/* EAX == 0 => Category 1 (Bad segment)
1393	   EAX != 0 => Category 2 (Bad IRET) */
1394	testl	%eax, %eax
1395	popl	%eax
1396	lea	16(%esp), %esp
1397	jz	5f
1398	jmp	iret_exc
13995:	pushl	$-1				/* orig_ax = -1 => not a system call */
1400	SAVE_ALL
1401	ENCODE_FRAME_POINTER
1402	jmp	ret_from_exception
1403
1404.section .fixup, "ax"
14056:	xorl	%eax, %eax
1406	movl	%eax, 4(%esp)
1407	jmp	1b
14087:	xorl	%eax, %eax
1409	movl	%eax, 8(%esp)
1410	jmp	2b
14118:	xorl	%eax, %eax
1412	movl	%eax, 12(%esp)
1413	jmp	3b
14149:	xorl	%eax, %eax
1415	movl	%eax, 16(%esp)
1416	jmp	4b
1417.previous
1418	_ASM_EXTABLE(1b, 6b)
1419	_ASM_EXTABLE(2b, 7b)
1420	_ASM_EXTABLE(3b, 8b)
1421	_ASM_EXTABLE(4b, 9b)
1422ENDPROC(xen_failsafe_callback)
1423#endif /* CONFIG_XEN_PV */
1424
1425#ifdef CONFIG_XEN_PVHVM
1426BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
1427		 xen_evtchn_do_upcall)
1428#endif
1429
1430
1431#if IS_ENABLED(CONFIG_HYPERV)
1432
1433BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
1434		 hyperv_vector_handler)
1435
1436BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR,
1437		 hyperv_reenlightenment_intr)
1438
1439BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
1440		 hv_stimer0_vector_handler)
1441
1442#endif /* CONFIG_HYPERV */
1443
1444ENTRY(page_fault)
1445	ASM_CLAC
1446	pushl	$do_page_fault
1447	jmp	common_exception_read_cr2
1448END(page_fault)
1449
1450common_exception_read_cr2:
1451	/* the function address is in %gs's slot on the stack */
1452	SAVE_ALL switch_stacks=1 skip_gs=1
1453
1454	ENCODE_FRAME_POINTER
1455	UNWIND_ESPFIX_STACK
1456
1457	/* fixup %gs */
1458	GS_TO_REG %ecx
1459	movl	PT_GS(%esp), %edi
1460	REG_TO_PTGS %ecx
1461	SET_KERNEL_GS %ecx
1462
1463	GET_CR2_INTO(%ecx)			# might clobber %eax
1464
1465	/* fixup orig %eax */
1466	movl	PT_ORIG_EAX(%esp), %edx		# get the error code
1467	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
1468
1469	TRACE_IRQS_OFF
1470	movl	%esp, %eax			# pt_regs pointer
1471	CALL_NOSPEC %edi
1472	jmp	ret_from_exception
1473END(common_exception_read_cr2)
1474
1475common_exception:
1476	/* the function address is in %gs's slot on the stack */
1477	SAVE_ALL switch_stacks=1 skip_gs=1
1478	ENCODE_FRAME_POINTER
1479	UNWIND_ESPFIX_STACK
1480
1481	/* fixup %gs */
1482	GS_TO_REG %ecx
1483	movl	PT_GS(%esp), %edi		# get the function address
1484	REG_TO_PTGS %ecx
1485	SET_KERNEL_GS %ecx
1486
1487	/* fixup orig %eax */
1488	movl	PT_ORIG_EAX(%esp), %edx		# get the error code
1489	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
1490
1491	TRACE_IRQS_OFF
1492	movl	%esp, %eax			# pt_regs pointer
1493	CALL_NOSPEC %edi
1494	jmp	ret_from_exception
1495END(common_exception)
1496
1497ENTRY(debug)
1498	/*
1499	 * Entry from sysenter is now handled in common_exception
1500	 */
1501	ASM_CLAC
1502	pushl	$-1				# mark this as an int
1503	pushl	$do_debug
1504	jmp	common_exception
1505END(debug)
1506
1507/*
1508 * NMI is doubly nasty.  It can happen on the first instruction of
1509 * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
1510 * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32
1511 * switched stacks.  We handle both conditions by simply checking whether we
1512 * interrupted kernel code running on the SYSENTER stack.
1513 */
1514ENTRY(nmi)
1515	ASM_CLAC
1516
1517#ifdef CONFIG_X86_ESPFIX32
1518	pushl	%eax
1519	movl	%ss, %eax
1520	cmpw	$__ESPFIX_SS, %ax
1521	popl	%eax
1522	je	.Lnmi_espfix_stack
1523#endif
1524
1525	pushl	%eax				# pt_regs->orig_ax
1526	SAVE_ALL_NMI cr3_reg=%edi
1527	ENCODE_FRAME_POINTER
1528	xorl	%edx, %edx			# zero error code
1529	movl	%esp, %eax			# pt_regs pointer
1530
1531	/* Are we currently on the SYSENTER stack? */
1532	movl	PER_CPU_VAR(cpu_entry_area), %ecx
1533	addl	$CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
1534	subl	%eax, %ecx	/* ecx = (end of entry_stack) - esp */
1535	cmpl	$SIZEOF_entry_stack, %ecx
1536	jb	.Lnmi_from_sysenter_stack
1537
1538	/* Not on SYSENTER stack. */
1539	call	do_nmi
1540	jmp	.Lnmi_return
1541
1542.Lnmi_from_sysenter_stack:
1543	/*
1544	 * We're on the SYSENTER stack.  Switch off.  No one (not even debug)
1545	 * is using the thread stack right now, so it's safe for us to use it.
1546	 */
1547	movl	%esp, %ebx
1548	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
1549	call	do_nmi
1550	movl	%ebx, %esp
1551
1552.Lnmi_return:
1553	CHECK_AND_APPLY_ESPFIX
1554	RESTORE_ALL_NMI cr3_reg=%edi pop=4
1555	jmp	.Lirq_return
1556
1557#ifdef CONFIG_X86_ESPFIX32
1558.Lnmi_espfix_stack:
1559	/*
1560	 * create the pointer to lss back
1561	 */
1562	pushl	%ss
1563	pushl	%esp
1564	addl	$4, (%esp)
1565	/* copy the iret frame of 12 bytes */
1566	.rept 3
1567	pushl	16(%esp)
1568	.endr
1569	pushl	%eax
1570	SAVE_ALL_NMI cr3_reg=%edi
1571	ENCODE_FRAME_POINTER
1572	FIXUP_ESPFIX_STACK			# %eax == %esp
1573	xorl	%edx, %edx			# zero error code
1574	call	do_nmi
1575	RESTORE_ALL_NMI cr3_reg=%edi
1576	lss	12+4(%esp), %esp		# back to espfix stack
1577	jmp	.Lirq_return
1578#endif
1579END(nmi)
1580
1581ENTRY(int3)
1582	ASM_CLAC
1583	pushl	$-1				# mark this as an int
1584
1585	SAVE_ALL switch_stacks=1
1586	ENCODE_FRAME_POINTER
1587	TRACE_IRQS_OFF
1588	xorl	%edx, %edx			# zero error code
1589	movl	%esp, %eax			# pt_regs pointer
1590	call	do_int3
1591	jmp	ret_from_exception
1592END(int3)
1593
1594ENTRY(general_protection)
1595	pushl	$do_general_protection
1596	jmp	common_exception
1597END(general_protection)
1598
1599#ifdef CONFIG_KVM_GUEST
1600ENTRY(async_page_fault)
1601	ASM_CLAC
1602	pushl	$do_async_page_fault
1603	jmp	common_exception_read_cr2
1604END(async_page_fault)
1605#endif
1606
1607ENTRY(rewind_stack_do_exit)
1608	/* Prevent any naive code from trying to unwind to our caller. */
1609	xorl	%ebp, %ebp
1610
1611	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esi
1612	leal	-TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
1613
1614	call	do_exit
16151:	jmp 1b
1616END(rewind_stack_do_exit)