Loading...
1/*
2 * linux/boot/head.S
3 *
4 * Copyright (C) 1991, 1992, 1993 Linus Torvalds
5 */
6
7/*
8 * head.S contains the 32-bit startup code.
9 *
10 * NOTE!!! Startup happens at absolute address 0x00001000, which is also where
11 * the page directory will exist. The startup code will be overwritten by
12 * the page directory. [According to comments etc elsewhere on a compressed
13 * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
14 *
15 * Page 0 is deliberately kept safe, since System Management Mode code in
16 * laptops may need to access the BIOS data stored there. This is also
17 * useful for future device drivers that either access the BIOS via VM86
18 * mode.
19 */
20
21/*
22 * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
23 */
24 .code32
25 .text
26
27#include <linux/init.h>
28#include <linux/linkage.h>
29#include <asm/segment.h>
30#include <asm/boot.h>
31#include <asm/msr.h>
32#include <asm/processor-flags.h>
33#include <asm/asm-offsets.h>
34
35 __HEAD
36 .code32
37ENTRY(startup_32)
38 /*
39 * 32bit entry is 0 and it is ABI so immutable!
40 * If we come here directly from a bootloader,
41 * kernel(text+data+bss+brk) ramdisk, zero_page, command line
42 * all need to be under the 4G limit.
43 */
44 cld
45 /*
46 * Test KEEP_SEGMENTS flag to see if the bootloader is asking
47 * us to not reload segments
48 */
49 testb $(1<<6), BP_loadflags(%esi)
50 jnz 1f
51
52 cli
53 movl $(__BOOT_DS), %eax
54 movl %eax, %ds
55 movl %eax, %es
56 movl %eax, %ss
571:
58
59/*
60 * Calculate the delta between where we were compiled to run
61 * at and where we were actually loaded at. This can only be done
62 * with a short local call on x86. Nothing else will tell us what
63 * address we are running at. The reserved chunk of the real-mode
64 * data at 0x1e4 (defined as a scratch field) are used as the stack
65 * for this calculation. Only 4 bytes are needed.
66 */
67 leal (BP_scratch+4)(%esi), %esp
68 call 1f
691: popl %ebp
70 subl $1b, %ebp
71
72/* setup a stack and make sure cpu supports long mode. */
73 movl $boot_stack_end, %eax
74 addl %ebp, %eax
75 movl %eax, %esp
76
77 call verify_cpu
78 testl %eax, %eax
79 jnz no_longmode
80
81/*
82 * Compute the delta between where we were compiled to run at
83 * and where the code will actually run at.
84 *
85 * %ebp contains the address we are loaded at by the boot loader and %ebx
86 * contains the address where we should move the kernel image temporarily
87 * for safe in-place decompression.
88 */
89
90#ifdef CONFIG_RELOCATABLE
91 movl %ebp, %ebx
92 movl BP_kernel_alignment(%esi), %eax
93 decl %eax
94 addl %eax, %ebx
95 notl %eax
96 andl %eax, %ebx
97 cmpl $LOAD_PHYSICAL_ADDR, %ebx
98 jge 1f
99#endif
100 movl $LOAD_PHYSICAL_ADDR, %ebx
1011:
102
103 /* Target address to relocate to for decompression */
104 addl $z_extract_offset, %ebx
105
106/*
107 * Prepare for entering 64 bit mode
108 */
109
110 /* Load new GDT with the 64bit segments using 32bit descriptor */
111 leal gdt(%ebp), %eax
112 movl %eax, gdt+2(%ebp)
113 lgdt gdt(%ebp)
114
115 /* Enable PAE mode */
116 movl %cr4, %eax
117 orl $X86_CR4_PAE, %eax
118 movl %eax, %cr4
119
120 /*
121 * Build early 4G boot pagetable
122 */
123 /* Initialize Page tables to 0 */
124 leal pgtable(%ebx), %edi
125 xorl %eax, %eax
126 movl $((4096*6)/4), %ecx
127 rep stosl
128
129 /* Build Level 4 */
130 leal pgtable + 0(%ebx), %edi
131 leal 0x1007 (%edi), %eax
132 movl %eax, 0(%edi)
133
134 /* Build Level 3 */
135 leal pgtable + 0x1000(%ebx), %edi
136 leal 0x1007(%edi), %eax
137 movl $4, %ecx
1381: movl %eax, 0x00(%edi)
139 addl $0x00001000, %eax
140 addl $8, %edi
141 decl %ecx
142 jnz 1b
143
144 /* Build Level 2 */
145 leal pgtable + 0x2000(%ebx), %edi
146 movl $0x00000183, %eax
147 movl $2048, %ecx
1481: movl %eax, 0(%edi)
149 addl $0x00200000, %eax
150 addl $8, %edi
151 decl %ecx
152 jnz 1b
153
154 /* Enable the boot page tables */
155 leal pgtable(%ebx), %eax
156 movl %eax, %cr3
157
158 /* Enable Long mode in EFER (Extended Feature Enable Register) */
159 movl $MSR_EFER, %ecx
160 rdmsr
161 btsl $_EFER_LME, %eax
162 wrmsr
163
164 /* After gdt is loaded */
165 xorl %eax, %eax
166 lldt %ax
167 movl $0x20, %eax
168 ltr %ax
169
170 /*
171 * Setup for the jump to 64bit mode
172 *
173 * When the jump is performend we will be in long mode but
174 * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
175 * (and in turn EFER.LMA = 1). To jump into 64bit mode we use
176 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
177 * We place all of the values on our mini stack so lret can
178 * used to perform that far jump.
179 */
180 pushl $__KERNEL_CS
181 leal startup_64(%ebp), %eax
182#ifdef CONFIG_EFI_MIXED
183 movl efi32_config(%ebp), %ebx
184 cmp $0, %ebx
185 jz 1f
186 leal handover_entry(%ebp), %eax
1871:
188#endif
189 pushl %eax
190
191 /* Enter paged protected Mode, activating Long Mode */
192 movl $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */
193 movl %eax, %cr0
194
195 /* Jump from 32bit compatibility mode into 64bit mode. */
196 lret
197ENDPROC(startup_32)
198
199#ifdef CONFIG_EFI_MIXED
200 .org 0x190
201ENTRY(efi32_stub_entry)
202 add $0x4, %esp /* Discard return address */
203 popl %ecx
204 popl %edx
205 popl %esi
206
207 leal (BP_scratch+4)(%esi), %esp
208 call 1f
2091: pop %ebp
210 subl $1b, %ebp
211
212 movl %ecx, efi32_config(%ebp)
213 movl %edx, efi32_config+8(%ebp)
214 sgdtl efi32_boot_gdt(%ebp)
215
216 leal efi32_config(%ebp), %eax
217 movl %eax, efi_config(%ebp)
218
219 jmp startup_32
220ENDPROC(efi32_stub_entry)
221#endif
222
223 .code64
224 .org 0x200
225ENTRY(startup_64)
226 /*
227 * 64bit entry is 0x200 and it is ABI so immutable!
228 * We come here either from startup_32 or directly from a
229 * 64bit bootloader.
230 * If we come here from a bootloader, kernel(text+data+bss+brk),
231 * ramdisk, zero_page, command line could be above 4G.
232 * We depend on an identity mapped page table being provided
233 * that maps our entire kernel(text+data+bss+brk), zero page
234 * and command line.
235 */
236#ifdef CONFIG_EFI_STUB
237 /*
238 * The entry point for the PE/COFF executable is efi_pe_entry, so
239 * only legacy boot loaders will execute this jmp.
240 */
241 jmp preferred_addr
242
243ENTRY(efi_pe_entry)
244 movq %rcx, efi64_config(%rip) /* Handle */
245 movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */
246
247 leaq efi64_config(%rip), %rax
248 movq %rax, efi_config(%rip)
249
250 call 1f
2511: popq %rbp
252 subq $1b, %rbp
253
254 /*
255 * Relocate efi_config->call().
256 */
257 addq %rbp, efi64_config+88(%rip)
258
259 movq %rax, %rdi
260 call make_boot_params
261 cmpq $0,%rax
262 je fail
263 mov %rax, %rsi
264 leaq startup_32(%rip), %rax
265 movl %eax, BP_code32_start(%rsi)
266 jmp 2f /* Skip the relocation */
267
268handover_entry:
269 call 1f
2701: popq %rbp
271 subq $1b, %rbp
272
273 /*
274 * Relocate efi_config->call().
275 */
276 movq efi_config(%rip), %rax
277 addq %rbp, 88(%rax)
2782:
279 movq efi_config(%rip), %rdi
280 call efi_main
281 movq %rax,%rsi
282 cmpq $0,%rax
283 jne 2f
284fail:
285 /* EFI init failed, so hang. */
286 hlt
287 jmp fail
2882:
289 movl BP_code32_start(%esi), %eax
290 leaq preferred_addr(%rax), %rax
291 jmp *%rax
292
293preferred_addr:
294#endif
295
296 /* Setup data segments. */
297 xorl %eax, %eax
298 movl %eax, %ds
299 movl %eax, %es
300 movl %eax, %ss
301 movl %eax, %fs
302 movl %eax, %gs
303
304 /*
305 * Compute the decompressed kernel start address. It is where
306 * we were loaded at aligned to a 2M boundary. %rbp contains the
307 * decompressed kernel start address.
308 *
309 * If it is a relocatable kernel then decompress and run the kernel
310 * from load address aligned to 2MB addr, otherwise decompress and
311 * run the kernel from LOAD_PHYSICAL_ADDR
312 *
313 * We cannot rely on the calculation done in 32-bit mode, since we
314 * may have been invoked via the 64-bit entry point.
315 */
316
317 /* Start with the delta to where the kernel will run at. */
318#ifdef CONFIG_RELOCATABLE
319 leaq startup_32(%rip) /* - $startup_32 */, %rbp
320 movl BP_kernel_alignment(%rsi), %eax
321 decl %eax
322 addq %rax, %rbp
323 notq %rax
324 andq %rax, %rbp
325 cmpq $LOAD_PHYSICAL_ADDR, %rbp
326 jge 1f
327#endif
328 movq $LOAD_PHYSICAL_ADDR, %rbp
3291:
330
331 /* Target address to relocate to for decompression */
332 leaq z_extract_offset(%rbp), %rbx
333
334 /* Set up the stack */
335 leaq boot_stack_end(%rbx), %rsp
336
337 /* Zero EFLAGS */
338 pushq $0
339 popfq
340
341/*
342 * Copy the compressed kernel to the end of our buffer
343 * where decompression in place becomes safe.
344 */
345 pushq %rsi
346 leaq (_bss-8)(%rip), %rsi
347 leaq (_bss-8)(%rbx), %rdi
348 movq $_bss /* - $startup_32 */, %rcx
349 shrq $3, %rcx
350 std
351 rep movsq
352 cld
353 popq %rsi
354
355/*
356 * Jump to the relocated address.
357 */
358 leaq relocated(%rbx), %rax
359 jmp *%rax
360
361#ifdef CONFIG_EFI_STUB
362 .org 0x390
363ENTRY(efi64_stub_entry)
364 movq %rdi, efi64_config(%rip) /* Handle */
365 movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */
366
367 leaq efi64_config(%rip), %rax
368 movq %rax, efi_config(%rip)
369
370 movq %rdx, %rsi
371 jmp handover_entry
372ENDPROC(efi64_stub_entry)
373#endif
374
375 .text
376relocated:
377
378/*
379 * Clear BSS (stack is currently empty)
380 */
381 xorl %eax, %eax
382 leaq _bss(%rip), %rdi
383 leaq _ebss(%rip), %rcx
384 subq %rdi, %rcx
385 shrq $3, %rcx
386 rep stosq
387
388/*
389 * Adjust our own GOT
390 */
391 leaq _got(%rip), %rdx
392 leaq _egot(%rip), %rcx
3931:
394 cmpq %rcx, %rdx
395 jae 2f
396 addq %rbx, (%rdx)
397 addq $8, %rdx
398 jmp 1b
3992:
400
401/*
402 * Do the decompression, and jump to the new kernel..
403 */
404 pushq %rsi /* Save the real mode argument */
405 movq %rsi, %rdi /* real mode address */
406 leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
407 leaq input_data(%rip), %rdx /* input_data */
408 movl $z_input_len, %ecx /* input_len */
409 movq %rbp, %r8 /* output target address */
410 movq $z_output_len, %r9 /* decompressed length */
411 call decompress_kernel /* returns kernel location in %rax */
412 popq %rsi
413
414/*
415 * Jump to the decompressed kernel.
416 */
417 jmp *%rax
418
419 .code32
420no_longmode:
421 /* This isn't an x86-64 CPU so hang */
4221:
423 hlt
424 jmp 1b
425
426#include "../../kernel/verify_cpu.S"
427
428 .data
429gdt:
430 .word gdt_end - gdt
431 .long gdt
432 .word 0
433 .quad 0x0000000000000000 /* NULL descriptor */
434 .quad 0x00af9a000000ffff /* __KERNEL_CS */
435 .quad 0x00cf92000000ffff /* __KERNEL_DS */
436 .quad 0x0080890000000000 /* TS descriptor */
437 .quad 0x0000000000000000 /* TS continued */
438gdt_end:
439
440#ifdef CONFIG_EFI_STUB
441efi_config:
442 .quad 0
443
444#ifdef CONFIG_EFI_MIXED
445 .global efi32_config
446efi32_config:
447 .fill 11,8,0
448 .quad efi64_thunk
449 .byte 0
450#endif
451
452 .global efi64_config
453efi64_config:
454 .fill 11,8,0
455 .quad efi_call6
456 .byte 1
457#endif /* CONFIG_EFI_STUB */
458
459/*
460 * Stack and heap for uncompression
461 */
462 .bss
463 .balign 4
464boot_heap:
465 .fill BOOT_HEAP_SIZE, 1, 0
466boot_stack:
467 .fill BOOT_STACK_SIZE, 1, 0
468boot_stack_end:
469
470/*
471 * Space for page tables (not in .bss so not zeroed)
472 */
473 .section ".pgtable","a",@nobits
474 .balign 4096
475pgtable:
476 .fill 6*4096, 1, 0
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * linux/boot/head.S
4 *
5 * Copyright (C) 1991, 1992, 1993 Linus Torvalds
6 */
7
8/*
9 * head.S contains the 32-bit startup code.
10 *
11 * NOTE!!! Startup happens at absolute address 0x00001000, which is also where
12 * the page directory will exist. The startup code will be overwritten by
13 * the page directory. [According to comments etc elsewhere on a compressed
14 * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
15 *
16 * Page 0 is deliberately kept safe, since System Management Mode code in
17 * laptops may need to access the BIOS data stored there. This is also
18 * useful for future device drivers that either access the BIOS via VM86
19 * mode.
20 */
21
22/*
23 * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
24 */
25 .code32
26 .text
27
28#include <linux/init.h>
29#include <linux/linkage.h>
30#include <asm/segment.h>
31#include <asm/boot.h>
32#include <asm/msr.h>
33#include <asm/processor-flags.h>
34#include <asm/asm-offsets.h>
35#include <asm/bootparam.h>
36#include <asm/desc_defs.h>
37#include <asm/trapnr.h>
38#include "pgtable.h"
39
40/*
41 * Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result
42 * in assembly errors due to trying to move .org backward due to the excessive
43 * alignment.
44 */
45#undef __ALIGN
46#define __ALIGN .balign 16, 0x90
47
48/*
49 * Locally defined symbols should be marked hidden:
50 */
51 .hidden _bss
52 .hidden _ebss
53 .hidden _end
54
55 __HEAD
56
57/*
58 * This macro gives the relative virtual address of X, i.e. the offset of X
59 * from startup_32. This is the same as the link-time virtual address of X,
60 * since startup_32 is at 0, but defining it this way tells the
61 * assembler/linker that we do not want the actual run-time address of X. This
62 * prevents the linker from trying to create unwanted run-time relocation
63 * entries for the reference when the compressed kernel is linked as PIE.
64 *
65 * A reference X(%reg) will result in the link-time VA of X being stored with
66 * the instruction, and a run-time R_X86_64_RELATIVE relocation entry that
67 * adds the 64-bit base address where the kernel is loaded.
68 *
69 * Replacing it with (X-startup_32)(%reg) results in the offset being stored,
70 * and no run-time relocation.
71 *
72 * The macro should be used as a displacement with a base register containing
73 * the run-time address of startup_32 [i.e. rva(X)(%reg)], or as an immediate
74 * [$ rva(X)].
75 *
76 * This macro can only be used from within the .head.text section, since the
77 * expression requires startup_32 to be in the same section as the code being
78 * assembled.
79 */
80#define rva(X) ((X) - startup_32)
81
82 .code32
83SYM_FUNC_START(startup_32)
84 /*
85 * 32bit entry is 0 and it is ABI so immutable!
86 * If we come here directly from a bootloader,
87 * kernel(text+data+bss+brk) ramdisk, zero_page, command line
88 * all need to be under the 4G limit.
89 */
90 cld
91 cli
92
93/*
94 * Calculate the delta between where we were compiled to run
95 * at and where we were actually loaded at. This can only be done
96 * with a short local call on x86. Nothing else will tell us what
97 * address we are running at. The reserved chunk of the real-mode
98 * data at 0x1e4 (defined as a scratch field) are used as the stack
99 * for this calculation. Only 4 bytes are needed.
100 */
101 leal (BP_scratch+4)(%esi), %esp
102 call 1f
1031: popl %ebp
104 subl $ rva(1b), %ebp
105
106 /* Load new GDT with the 64bit segments using 32bit descriptor */
107 leal rva(gdt)(%ebp), %eax
108 movl %eax, 2(%eax)
109 lgdt (%eax)
110
111 /* Load segment registers with our descriptors */
112 movl $__BOOT_DS, %eax
113 movl %eax, %ds
114 movl %eax, %es
115 movl %eax, %fs
116 movl %eax, %gs
117 movl %eax, %ss
118
119 /* Setup a stack and load CS from current GDT */
120 leal rva(boot_stack_end)(%ebp), %esp
121
122 pushl $__KERNEL32_CS
123 leal rva(1f)(%ebp), %eax
124 pushl %eax
125 lretl
1261:
127
128 /* Setup Exception handling for SEV-ES */
129#ifdef CONFIG_AMD_MEM_ENCRYPT
130 call startup32_load_idt
131#endif
132
133 /* Make sure cpu supports long mode. */
134 call verify_cpu
135 testl %eax, %eax
136 jnz .Lno_longmode
137
138/*
139 * Compute the delta between where we were compiled to run at
140 * and where the code will actually run at.
141 *
142 * %ebp contains the address we are loaded at by the boot loader and %ebx
143 * contains the address where we should move the kernel image temporarily
144 * for safe in-place decompression.
145 */
146
147#ifdef CONFIG_RELOCATABLE
148 movl %ebp, %ebx
149
150#ifdef CONFIG_EFI_STUB
151/*
152 * If we were loaded via the EFI LoadImage service, startup_32 will be at an
153 * offset to the start of the space allocated for the image. efi_pe_entry will
154 * set up image_offset to tell us where the image actually starts, so that we
155 * can use the full available buffer.
156 * image_offset = startup_32 - image_base
157 * Otherwise image_offset will be zero and has no effect on the calculations.
158 */
159 subl rva(image_offset)(%ebp), %ebx
160#endif
161
162 movl BP_kernel_alignment(%esi), %eax
163 decl %eax
164 addl %eax, %ebx
165 notl %eax
166 andl %eax, %ebx
167 cmpl $LOAD_PHYSICAL_ADDR, %ebx
168 jae 1f
169#endif
170 movl $LOAD_PHYSICAL_ADDR, %ebx
1711:
172
173 /* Target address to relocate to for decompression */
174 addl BP_init_size(%esi), %ebx
175 subl $ rva(_end), %ebx
176
177/*
178 * Prepare for entering 64 bit mode
179 */
180
181 /* Enable PAE mode */
182 movl %cr4, %eax
183 orl $X86_CR4_PAE, %eax
184 movl %eax, %cr4
185
186 /*
187 * Build early 4G boot pagetable
188 */
189 /*
190 * If SEV is active then set the encryption mask in the page tables.
191 * This will ensure that when the kernel is copied and decompressed
192 * it will be done so encrypted.
193 */
194 xorl %edx, %edx
195#ifdef CONFIG_AMD_MEM_ENCRYPT
196 call get_sev_encryption_bit
197 xorl %edx, %edx
198 testl %eax, %eax
199 jz 1f
200 subl $32, %eax /* Encryption bit is always above bit 31 */
201 bts %eax, %edx /* Set encryption mask for page tables */
202 /*
203 * Set MSR_AMD64_SEV_ENABLED_BIT in sev_status so that
204 * startup32_check_sev_cbit() will do a check. sev_enable() will
205 * initialize sev_status with all the bits reported by
206 * MSR_AMD_SEV_STATUS later, but only MSR_AMD64_SEV_ENABLED_BIT
207 * needs to be set for now.
208 */
209 movl $1, rva(sev_status)(%ebp)
2101:
211#endif
212
213 /* Initialize Page tables to 0 */
214 leal rva(pgtable)(%ebx), %edi
215 xorl %eax, %eax
216 movl $(BOOT_INIT_PGT_SIZE/4), %ecx
217 rep stosl
218
219 /* Build Level 4 */
220 leal rva(pgtable + 0)(%ebx), %edi
221 leal 0x1007 (%edi), %eax
222 movl %eax, 0(%edi)
223 addl %edx, 4(%edi)
224
225 /* Build Level 3 */
226 leal rva(pgtable + 0x1000)(%ebx), %edi
227 leal 0x1007(%edi), %eax
228 movl $4, %ecx
2291: movl %eax, 0x00(%edi)
230 addl %edx, 0x04(%edi)
231 addl $0x00001000, %eax
232 addl $8, %edi
233 decl %ecx
234 jnz 1b
235
236 /* Build Level 2 */
237 leal rva(pgtable + 0x2000)(%ebx), %edi
238 movl $0x00000183, %eax
239 movl $2048, %ecx
2401: movl %eax, 0(%edi)
241 addl %edx, 4(%edi)
242 addl $0x00200000, %eax
243 addl $8, %edi
244 decl %ecx
245 jnz 1b
246
247 /* Enable the boot page tables */
248 leal rva(pgtable)(%ebx), %eax
249 movl %eax, %cr3
250
251 /* Enable Long mode in EFER (Extended Feature Enable Register) */
252 movl $MSR_EFER, %ecx
253 rdmsr
254 btsl $_EFER_LME, %eax
255 wrmsr
256
257 /* After gdt is loaded */
258 xorl %eax, %eax
259 lldt %ax
260 movl $__BOOT_TSS, %eax
261 ltr %ax
262
263#ifdef CONFIG_AMD_MEM_ENCRYPT
264 /* Check if the C-bit position is correct when SEV is active */
265 call startup32_check_sev_cbit
266#endif
267
268 /*
269 * Setup for the jump to 64bit mode
270 *
271 * When the jump is performed we will be in long mode but
272 * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
273 * (and in turn EFER.LMA = 1). To jump into 64bit mode we use
274 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
275 * We place all of the values on our mini stack so lret can
276 * used to perform that far jump.
277 */
278 leal rva(startup_64)(%ebp), %eax
279#ifdef CONFIG_EFI_MIXED
280 cmpb $1, rva(efi_is64)(%ebp)
281 je 1f
282 leal rva(startup_64_mixed_mode)(%ebp), %eax
2831:
284#endif
285
286 pushl $__KERNEL_CS
287 pushl %eax
288
289 /* Enter paged protected Mode, activating Long Mode */
290 movl $CR0_STATE, %eax
291 movl %eax, %cr0
292
293 /* Jump from 32bit compatibility mode into 64bit mode. */
294 lret
295SYM_FUNC_END(startup_32)
296
297#if IS_ENABLED(CONFIG_EFI_MIXED) && IS_ENABLED(CONFIG_EFI_HANDOVER_PROTOCOL)
298 .org 0x190
299SYM_FUNC_START(efi32_stub_entry)
300 add $0x4, %esp /* Discard return address */
301 popl %ecx
302 popl %edx
303 popl %esi
304 jmp efi32_entry
305SYM_FUNC_END(efi32_stub_entry)
306#endif
307
308 .code64
309 .org 0x200
310SYM_CODE_START(startup_64)
311 /*
312 * 64bit entry is 0x200 and it is ABI so immutable!
313 * We come here either from startup_32 or directly from a
314 * 64bit bootloader.
315 * If we come here from a bootloader, kernel(text+data+bss+brk),
316 * ramdisk, zero_page, command line could be above 4G.
317 * We depend on an identity mapped page table being provided
318 * that maps our entire kernel(text+data+bss+brk), zero page
319 * and command line.
320 */
321
322 cld
323 cli
324
325 /* Setup data segments. */
326 xorl %eax, %eax
327 movl %eax, %ds
328 movl %eax, %es
329 movl %eax, %ss
330 movl %eax, %fs
331 movl %eax, %gs
332
333 /*
334 * Compute the decompressed kernel start address. It is where
335 * we were loaded at aligned to a 2M boundary. %rbp contains the
336 * decompressed kernel start address.
337 *
338 * If it is a relocatable kernel then decompress and run the kernel
339 * from load address aligned to 2MB addr, otherwise decompress and
340 * run the kernel from LOAD_PHYSICAL_ADDR
341 *
342 * We cannot rely on the calculation done in 32-bit mode, since we
343 * may have been invoked via the 64-bit entry point.
344 */
345
346 /* Start with the delta to where the kernel will run at. */
347#ifdef CONFIG_RELOCATABLE
348 leaq startup_32(%rip) /* - $startup_32 */, %rbp
349
350#ifdef CONFIG_EFI_STUB
351/*
352 * If we were loaded via the EFI LoadImage service, startup_32 will be at an
353 * offset to the start of the space allocated for the image. efi_pe_entry will
354 * set up image_offset to tell us where the image actually starts, so that we
355 * can use the full available buffer.
356 * image_offset = startup_32 - image_base
357 * Otherwise image_offset will be zero and has no effect on the calculations.
358 */
359 movl image_offset(%rip), %eax
360 subq %rax, %rbp
361#endif
362
363 movl BP_kernel_alignment(%rsi), %eax
364 decl %eax
365 addq %rax, %rbp
366 notq %rax
367 andq %rax, %rbp
368 cmpq $LOAD_PHYSICAL_ADDR, %rbp
369 jae 1f
370#endif
371 movq $LOAD_PHYSICAL_ADDR, %rbp
3721:
373
374 /* Target address to relocate to for decompression */
375 movl BP_init_size(%rsi), %ebx
376 subl $ rva(_end), %ebx
377 addq %rbp, %rbx
378
379 /* Set up the stack */
380 leaq rva(boot_stack_end)(%rbx), %rsp
381
382 /*
383 * At this point we are in long mode with 4-level paging enabled,
384 * but we might want to enable 5-level paging or vice versa.
385 *
386 * The problem is that we cannot do it directly. Setting or clearing
387 * CR4.LA57 in long mode would trigger #GP. So we need to switch off
388 * long mode and paging first.
389 *
390 * We also need a trampoline in lower memory to switch over from
391 * 4- to 5-level paging for cases when the bootloader puts the kernel
392 * above 4G, but didn't enable 5-level paging for us.
393 *
394 * The same trampoline can be used to switch from 5- to 4-level paging
395 * mode, like when starting 4-level paging kernel via kexec() when
396 * original kernel worked in 5-level paging mode.
397 *
398 * For the trampoline, we need the top page table to reside in lower
399 * memory as we don't have a way to load 64-bit values into CR3 in
400 * 32-bit mode.
401 *
402 * We go though the trampoline even if we don't have to: if we're
403 * already in a desired paging mode. This way the trampoline code gets
404 * tested on every boot.
405 */
406
407 /* Make sure we have GDT with 32-bit code segment */
408 leaq gdt64(%rip), %rax
409 addq %rax, 2(%rax)
410 lgdt (%rax)
411
412 /* Reload CS so IRET returns to a CS actually in the GDT */
413 pushq $__KERNEL_CS
414 leaq .Lon_kernel_cs(%rip), %rax
415 pushq %rax
416 lretq
417
418.Lon_kernel_cs:
419
420 pushq %rsi
421 call load_stage1_idt
422 popq %rsi
423
424#ifdef CONFIG_AMD_MEM_ENCRYPT
425 /*
426 * Now that the stage1 interrupt handlers are set up, #VC exceptions from
427 * CPUID instructions can be properly handled for SEV-ES guests.
428 *
429 * For SEV-SNP, the CPUID table also needs to be set up in advance of any
430 * CPUID instructions being issued, so go ahead and do that now via
431 * sev_enable(), which will also handle the rest of the SEV-related
432 * detection/setup to ensure that has been done in advance of any dependent
433 * code.
434 */
435 pushq %rsi
436 movq %rsi, %rdi /* real mode address */
437 call sev_enable
438 popq %rsi
439#endif
440
441 /*
442 * paging_prepare() sets up the trampoline and checks if we need to
443 * enable 5-level paging.
444 *
445 * paging_prepare() returns a two-quadword structure which lands
446 * into RDX:RAX:
447 * - Address of the trampoline is returned in RAX.
448 * - Non zero RDX means trampoline needs to enable 5-level
449 * paging.
450 *
451 * RSI holds real mode data and needs to be preserved across
452 * this function call.
453 */
454 pushq %rsi
455 movq %rsi, %rdi /* real mode address */
456 call paging_prepare
457 popq %rsi
458
459 /* Save the trampoline address in RCX */
460 movq %rax, %rcx
461
462 /*
463 * Load the address of trampoline_return() into RDI.
464 * It will be used by the trampoline to return to the main code.
465 */
466 leaq trampoline_return(%rip), %rdi
467
468 /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
469 pushq $__KERNEL32_CS
470 leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
471 pushq %rax
472 lretq
473trampoline_return:
474 /* Restore the stack, the 32-bit trampoline uses its own stack */
475 leaq rva(boot_stack_end)(%rbx), %rsp
476
477 /*
478 * cleanup_trampoline() would restore trampoline memory.
479 *
480 * RDI is address of the page table to use instead of page table
481 * in trampoline memory (if required).
482 *
483 * RSI holds real mode data and needs to be preserved across
484 * this function call.
485 */
486 pushq %rsi
487 leaq rva(top_pgtable)(%rbx), %rdi
488 call cleanup_trampoline
489 popq %rsi
490
491 /* Zero EFLAGS */
492 pushq $0
493 popfq
494
495/*
496 * Copy the compressed kernel to the end of our buffer
497 * where decompression in place becomes safe.
498 */
499 pushq %rsi
500 leaq (_bss-8)(%rip), %rsi
501 leaq rva(_bss-8)(%rbx), %rdi
502 movl $(_bss - startup_32), %ecx
503 shrl $3, %ecx
504 std
505 rep movsq
506 cld
507 popq %rsi
508
509 /*
510 * The GDT may get overwritten either during the copy we just did or
511 * during extract_kernel below. To avoid any issues, repoint the GDTR
512 * to the new copy of the GDT.
513 */
514 leaq rva(gdt64)(%rbx), %rax
515 leaq rva(gdt)(%rbx), %rdx
516 movq %rdx, 2(%rax)
517 lgdt (%rax)
518
519/*
520 * Jump to the relocated address.
521 */
522 leaq rva(.Lrelocated)(%rbx), %rax
523 jmp *%rax
524SYM_CODE_END(startup_64)
525
526#ifdef CONFIG_EFI_STUB
527#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
528 .org 0x390
529#endif
530SYM_FUNC_START(efi64_stub_entry)
531 and $~0xf, %rsp /* realign the stack */
532 movq %rdx, %rbx /* save boot_params pointer */
533 call efi_main
534 movq %rbx,%rsi
535 leaq rva(startup_64)(%rax), %rax
536 jmp *%rax
537SYM_FUNC_END(efi64_stub_entry)
538SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry)
539#endif
540
541 .text
542SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
543
544/*
545 * Clear BSS (stack is currently empty)
546 */
547 xorl %eax, %eax
548 leaq _bss(%rip), %rdi
549 leaq _ebss(%rip), %rcx
550 subq %rdi, %rcx
551 shrq $3, %rcx
552 rep stosq
553
554 pushq %rsi
555 call load_stage2_idt
556
557 /* Pass boot_params to initialize_identity_maps() */
558 movq (%rsp), %rdi
559 call initialize_identity_maps
560 popq %rsi
561
562/*
563 * Do the extraction, and jump to the new kernel..
564 */
565 pushq %rsi /* Save the real mode argument */
566 movq %rsi, %rdi /* real mode address */
567 leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
568 leaq input_data(%rip), %rdx /* input_data */
569 movl input_len(%rip), %ecx /* input_len */
570 movq %rbp, %r8 /* output target address */
571 movl output_len(%rip), %r9d /* decompressed length, end of relocs */
572 call extract_kernel /* returns kernel location in %rax */
573 popq %rsi
574
575/*
576 * Jump to the decompressed kernel.
577 */
578 jmp *%rax
579SYM_FUNC_END(.Lrelocated)
580
581 .code32
582/*
583 * This is the 32-bit trampoline that will be copied over to low memory.
584 *
585 * RDI contains the return address (might be above 4G).
586 * ECX contains the base address of the trampoline memory.
587 * Non zero RDX means trampoline needs to enable 5-level paging.
588 */
589SYM_CODE_START(trampoline_32bit_src)
590 /* Set up data and stack segments */
591 movl $__KERNEL_DS, %eax
592 movl %eax, %ds
593 movl %eax, %ss
594
595 /* Set up new stack */
596 leal TRAMPOLINE_32BIT_STACK_END(%ecx), %esp
597
598 /* Disable paging */
599 movl %cr0, %eax
600 btrl $X86_CR0_PG_BIT, %eax
601 movl %eax, %cr0
602
603 /* Check what paging mode we want to be in after the trampoline */
604 testl %edx, %edx
605 jz 1f
606
607 /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
608 movl %cr4, %eax
609 testl $X86_CR4_LA57, %eax
610 jnz 3f
611 jmp 2f
6121:
613 /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
614 movl %cr4, %eax
615 testl $X86_CR4_LA57, %eax
616 jz 3f
6172:
618 /* Point CR3 to the trampoline's new top level page table */
619 leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
620 movl %eax, %cr3
6213:
622 /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
623 pushl %ecx
624 pushl %edx
625 movl $MSR_EFER, %ecx
626 rdmsr
627 btsl $_EFER_LME, %eax
628 /* Avoid writing EFER if no change was made (for TDX guest) */
629 jc 1f
630 wrmsr
6311: popl %edx
632 popl %ecx
633
634#ifdef CONFIG_X86_MCE
635 /*
636 * Preserve CR4.MCE if the kernel will enable #MC support.
637 * Clearing MCE may fault in some environments (that also force #MC
638 * support). Any machine check that occurs before #MC support is fully
639 * configured will crash the system regardless of the CR4.MCE value set
640 * here.
641 */
642 movl %cr4, %eax
643 andl $X86_CR4_MCE, %eax
644#else
645 movl $0, %eax
646#endif
647
648 /* Enable PAE and LA57 (if required) paging modes */
649 orl $X86_CR4_PAE, %eax
650 testl %edx, %edx
651 jz 1f
652 orl $X86_CR4_LA57, %eax
6531:
654 movl %eax, %cr4
655
656 /* Calculate address of paging_enabled() once we are executing in the trampoline */
657 leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
658
659 /* Prepare the stack for far return to Long Mode */
660 pushl $__KERNEL_CS
661 pushl %eax
662
663 /* Enable paging again. */
664 movl %cr0, %eax
665 btsl $X86_CR0_PG_BIT, %eax
666 movl %eax, %cr0
667
668 lret
669SYM_CODE_END(trampoline_32bit_src)
670
671 .code64
672SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
673 /* Return from the trampoline */
674 jmp *%rdi
675SYM_FUNC_END(.Lpaging_enabled)
676
677 /*
678 * The trampoline code has a size limit.
679 * Make sure we fail to compile if the trampoline code grows
680 * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
681 */
682 .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
683
684 .code32
685SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
686 /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
6871:
688 hlt
689 jmp 1b
690SYM_FUNC_END(.Lno_longmode)
691
692 .globl verify_cpu
693#include "../../kernel/verify_cpu.S"
694
695 .data
696SYM_DATA_START_LOCAL(gdt64)
697 .word gdt_end - gdt - 1
698 .quad gdt - gdt64
699SYM_DATA_END(gdt64)
700 .balign 8
701SYM_DATA_START_LOCAL(gdt)
702 .word gdt_end - gdt - 1
703 .long 0
704 .word 0
705 .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
706 .quad 0x00af9a000000ffff /* __KERNEL_CS */
707 .quad 0x00cf92000000ffff /* __KERNEL_DS */
708 .quad 0x0080890000000000 /* TS descriptor */
709 .quad 0x0000000000000000 /* TS continued */
710SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
711
712SYM_DATA_START(boot_idt_desc)
713 .word boot_idt_end - boot_idt - 1
714 .quad 0
715SYM_DATA_END(boot_idt_desc)
716 .balign 8
717SYM_DATA_START(boot_idt)
718 .rept BOOT_IDT_ENTRIES
719 .quad 0
720 .quad 0
721 .endr
722SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)
723
724/*
725 * Stack and heap for uncompression
726 */
727 .bss
728 .balign 4
729SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0)
730
731SYM_DATA_START_LOCAL(boot_stack)
732 .fill BOOT_STACK_SIZE, 1, 0
733 .balign 16
734SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end)
735
736/*
737 * Space for page tables (not in .bss so not zeroed)
738 */
739 .section ".pgtable","aw",@nobits
740 .balign 4096
741SYM_DATA_LOCAL(pgtable, .fill BOOT_PGT_SIZE, 1, 0)
742
743/*
744 * The page table is going to be used instead of page table in the trampoline
745 * memory.
746 */
747SYM_DATA_LOCAL(top_pgtable, .fill PAGE_SIZE, 1, 0)