Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 | // SPDX-License-Identifier: GPL-2.0 /* * This code is used on x86_64 to create page table identity mappings on * demand by building up a new set of page tables (or appending to the * existing ones), and then switching over to them when ready. * * Copyright (C) 2015-2016 Yinghai Lu * Copyright (C) 2016 Kees Cook */ /* No MITIGATION_PAGE_TABLE_ISOLATION support needed either: */ #undef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION #include "error.h" #include "misc.h" /* These actually do the work of building the kernel identity maps. */ #include <linux/pgtable.h> #include <asm/cmpxchg.h> #include <asm/trap_pf.h> #include <asm/trapnr.h> #include <asm/init.h> /* Use the static base for this part of the boot process */ #undef __PAGE_OFFSET #define __PAGE_OFFSET __PAGE_OFFSET_BASE #include "../../mm/ident_map.c" #define _SETUP #include <asm/setup.h> /* For COMMAND_LINE_SIZE */ #undef _SETUP extern unsigned long get_cmd_line_ptr(void); /* Used by PAGE_KERN* macros: */ pteval_t __default_kernel_pte_mask __read_mostly = ~0; /* Used to track our page table allocation area. */ struct alloc_pgt_data { unsigned char *pgt_buf; unsigned long pgt_buf_size; unsigned long pgt_buf_offset; }; /* * Allocates space for a page table entry, using struct alloc_pgt_data * above. Besides the local callers, this is used as the allocation * callback in mapping_info below. */ static void *alloc_pgt_page(void *context) { struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context; unsigned char *entry; /* Validate there is space available for a new page. */ if (pages->pgt_buf_offset >= pages->pgt_buf_size) { debug_putstr("out of pgt_buf in " __FILE__ "!?\n"); debug_putaddr(pages->pgt_buf_offset); debug_putaddr(pages->pgt_buf_size); return NULL; } /* Consumed more tables than expected? */ if (pages->pgt_buf_offset == BOOT_PGT_SIZE_WARN) { debug_putstr("pgt_buf running low in " __FILE__ "\n"); debug_putstr("Need to raise BOOT_PGT_SIZE?\n"); debug_putaddr(pages->pgt_buf_offset); debug_putaddr(pages->pgt_buf_size); } entry = pages->pgt_buf + pages->pgt_buf_offset; pages->pgt_buf_offset += PAGE_SIZE; return entry; } /* Used to track our allocated page tables. */ static struct alloc_pgt_data pgt_data; /* The top level page table entry pointer. */ static unsigned long top_level_pgt; phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; /* * Mapping information structure passed to kernel_ident_mapping_init(). * Due to relocation, pointers must be assigned at run time not build time. */ static struct x86_mapping_info mapping_info; /* * Adds the specified range to the identity mappings. */ void kernel_add_identity_map(unsigned long start, unsigned long end) { int ret; /* Align boundary to 2M. */ start = round_down(start, PMD_SIZE); end = round_up(end, PMD_SIZE); if (start >= end) return; /* Build the mapping. */ ret = kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, start, end); if (ret) error("Error: kernel_ident_mapping_init() failed\n"); } /* Locates and clears a region for a new top level page table. */ void initialize_identity_maps(void *rmode) { unsigned long cmdline; struct setup_data *sd; /* Exclude the encryption mask from __PHYSICAL_MASK */ physical_mask &= ~sme_me_mask; /* Init mapping_info with run-time function/buffer pointers. */ mapping_info.alloc_pgt_page = alloc_pgt_page; mapping_info.context = &pgt_data; mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask; mapping_info.kernpg_flag = _KERNPG_TABLE; /* * It should be impossible for this not to already be true, * but since calling this a second time would rewind the other * counters, let's just make sure this is reset too. */ pgt_data.pgt_buf_offset = 0; /* * If we came here via startup_32(), cr3 will be _pgtable already * and we must append to the existing area instead of entirely * overwriting it. * * With 5-level paging, we use '_pgtable' to allocate the p4d page table, * the top-level page table is allocated separately. * * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level * cases. On 4-level paging it's equal to 'top_level_pgt'. */ top_level_pgt = read_cr3_pa(); if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) { pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE; pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE; memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); } else { pgt_data.pgt_buf = _pgtable; pgt_data.pgt_buf_size = BOOT_PGT_SIZE; memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data); } /* * New page-table is set up - map the kernel image, boot_params and the * command line. The uncompressed kernel requires boot_params and the * command line to be mapped in the identity mapping. Map them * explicitly here in case the compressed kernel does not touch them, * or does not touch all the pages covering them. */ kernel_add_identity_map((unsigned long)_head, (unsigned long)_end); boot_params_ptr = rmode; kernel_add_identity_map((unsigned long)boot_params_ptr, (unsigned long)(boot_params_ptr + 1)); cmdline = get_cmd_line_ptr(); kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE); /* * Also map the setup_data entries passed via boot_params in case they * need to be accessed by uncompressed kernel via the identity mapping. */ sd = (struct setup_data *)boot_params_ptr->hdr.setup_data; while (sd) { unsigned long sd_addr = (unsigned long)sd; kernel_add_identity_map(sd_addr, sd_addr + sizeof(*sd) + sd->len); sd = (struct setup_data *)sd->next; } sev_prep_identity_maps(top_level_pgt); /* Load the new page-table. */ write_cr3(top_level_pgt); /* * Now that the required page table mappings are established and a * GHCB can be used, check for SNP guest/HV feature compatibility. */ snp_check_features(); } static pte_t *split_large_pmd(struct x86_mapping_info *info, pmd_t *pmdp, unsigned long __address) { unsigned long page_flags; unsigned long address; pte_t *pte; pmd_t pmd; int i; pte = (pte_t *)info->alloc_pgt_page(info->context); if (!pte) return NULL; address = __address & PMD_MASK; /* No large page - clear PSE flag */ page_flags = info->page_flag & ~_PAGE_PSE; /* Populate the PTEs */ for (i = 0; i < PTRS_PER_PMD; i++) { set_pte(&pte[i], __pte(address | page_flags)); address += PAGE_SIZE; } /* * Ideally we need to clear the large PMD first and do a TLB * flush before we write the new PMD. But the 2M range of the * PMD might contain the code we execute and/or the stack * we are on, so we can't do that. But that should be safe here * because we are going from large to small mappings and we are * also the only user of the page-table, so there is no chance * of a TLB multihit. */ pmd = __pmd((unsigned long)pte | info->kernpg_flag); set_pmd(pmdp, pmd); /* Flush TLB to establish the new PMD */ write_cr3(top_level_pgt); return pte + pte_index(__address); } static void clflush_page(unsigned long address) { unsigned int flush_size; char *cl, *start, *end; /* * Hardcode cl-size to 64 - CPUID can't be used here because that might * cause another #VC exception and the GHCB is not ready to use yet. */ flush_size = 64; start = (char *)(address & PAGE_MASK); end = start + PAGE_SIZE; /* * First make sure there are no pending writes on the cache-lines to * flush. */ asm volatile("mfence" : : : "memory"); for (cl = start; cl != end; cl += flush_size) clflush(cl); } static int set_clr_page_flags(struct x86_mapping_info *info, unsigned long address, pteval_t set, pteval_t clr) { pgd_t *pgdp = (pgd_t *)top_level_pgt; p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep, pte; /* * First make sure there is a PMD mapping for 'address'. * It should already exist, but keep things generic. * * To map the page just read from it and fault it in if there is no * mapping yet. kernel_add_identity_map() can't be called here because * that would unconditionally map the address on PMD level, destroying * any PTE-level mappings that might already exist. Use assembly here * so the access won't be optimized away. */ asm volatile("mov %[address], %%r9" :: [address] "g" (*(unsigned long *)address) : "r9", "memory"); /* * The page is mapped at least with PMD size - so skip checks and walk * directly to the PMD. */ p4dp = p4d_offset(pgdp, address); pudp = pud_offset(p4dp, address); pmdp = pmd_offset(pudp, address); if (pmd_leaf(*pmdp)) ptep = split_large_pmd(info, pmdp, address); else ptep = pte_offset_kernel(pmdp, address); if (!ptep) return -ENOMEM; /* * Changing encryption attributes of a page requires to flush it from * the caches. */ if ((set | clr) & _PAGE_ENC) { clflush_page(address); /* * If the encryption attribute is being cleared, change the page state * to shared in the RMP table. */ if (clr) snp_set_page_shared(__pa(address & PAGE_MASK)); } /* Update PTE */ pte = *ptep; pte = pte_set_flags(pte, set); pte = pte_clear_flags(pte, clr); set_pte(ptep, pte); /* * If the encryption attribute is being set, then change the page state to * private in the RMP entry. The page state change must be done after the PTE * is updated. */ if (set & _PAGE_ENC) snp_set_page_private(__pa(address & PAGE_MASK)); /* Flush TLB after changing encryption attribute */ write_cr3(top_level_pgt); return 0; } int set_page_decrypted(unsigned long address) { return set_clr_page_flags(&mapping_info, address, 0, _PAGE_ENC); } int set_page_encrypted(unsigned long address) { return set_clr_page_flags(&mapping_info, address, _PAGE_ENC, 0); } int set_page_non_present(unsigned long address) { return set_clr_page_flags(&mapping_info, address, 0, _PAGE_PRESENT); } static void do_pf_error(const char *msg, unsigned long error_code, unsigned long address, unsigned long ip) { error_putstr(msg); error_putstr("\nError Code: "); error_puthex(error_code); error_putstr("\nCR2: 0x"); error_puthex(address); error_putstr("\nRIP relative to _head: 0x"); error_puthex(ip - (unsigned long)_head); error_putstr("\n"); error("Stopping.\n"); } void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code) { unsigned long address = native_read_cr2(); unsigned long end; bool ghcb_fault; ghcb_fault = sev_es_check_ghcb_fault(address); address &= PMD_MASK; end = address + PMD_SIZE; /* * Check for unexpected error codes. Unexpected are: * - Faults on present pages * - User faults * - Reserved bits set */ if (error_code & (X86_PF_PROT | X86_PF_USER | X86_PF_RSVD)) do_pf_error("Unexpected page-fault:", error_code, address, regs->ip); else if (ghcb_fault) do_pf_error("Page-fault on GHCB page:", error_code, address, regs->ip); /* * Error code is sane - now identity map the 2M region around * the faulting address. */ kernel_add_identity_map(address, end); } void do_boot_nmi_trap(struct pt_regs *regs, unsigned long error_code) { spurious_nmi_count++; } |