Loading...
Note: File does not exist in v4.6.
1// SPDX-License-Identifier: GPL-2.0
2#include <linux/sched/task.h>
3#include <linux/pgtable.h>
4#include <linux/kasan.h>
5#include <asm/page-states.h>
6#include <asm/pgalloc.h>
7#include <asm/facility.h>
8#include <asm/sections.h>
9#include <asm/ctlreg.h>
10#include <asm/physmem_info.h>
11#include <asm/maccess.h>
12#include <asm/abs_lowcore.h>
13#include "decompressor.h"
14#include "boot.h"
15
16#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
17struct ctlreg __bootdata_preserved(s390_invalid_asce);
18
19#ifdef CONFIG_PROC_FS
20atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
21#endif
22
23#define init_mm (*(struct mm_struct *)vmlinux.init_mm_off)
24#define swapper_pg_dir vmlinux.swapper_pg_dir_off
25#define invalid_pg_dir vmlinux.invalid_pg_dir_off
26
27enum populate_mode {
28 POPULATE_NONE,
29 POPULATE_DIRECT,
30 POPULATE_LOWCORE,
31 POPULATE_ABS_LOWCORE,
32 POPULATE_IDENTITY,
33 POPULATE_KERNEL,
34#ifdef CONFIG_KASAN
35 POPULATE_KASAN_MAP_SHADOW,
36 POPULATE_KASAN_ZERO_SHADOW,
37 POPULATE_KASAN_SHALLOW
38#endif
39};
40
41static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode);
42
43#ifdef CONFIG_KASAN
44
45#define kasan_early_shadow_page vmlinux.kasan_early_shadow_page_off
46#define kasan_early_shadow_pte ((pte_t *)vmlinux.kasan_early_shadow_pte_off)
47#define kasan_early_shadow_pmd ((pmd_t *)vmlinux.kasan_early_shadow_pmd_off)
48#define kasan_early_shadow_pud ((pud_t *)vmlinux.kasan_early_shadow_pud_off)
49#define kasan_early_shadow_p4d ((p4d_t *)vmlinux.kasan_early_shadow_p4d_off)
50#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x))
51
52static pte_t pte_z;
53
54static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode)
55{
56 start = PAGE_ALIGN_DOWN(__sha(start));
57 end = PAGE_ALIGN(__sha(end));
58 pgtable_populate(start, end, mode);
59}
60
61static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end)
62{
63 pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
64 pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY);
65 p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY);
66 unsigned long memgap_start = 0;
67 unsigned long untracked_end;
68 unsigned long start, end;
69 int i;
70
71 pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO));
72 if (!machine.has_nx)
73 pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC));
74 crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z));
75 crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
76 crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
77 memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
78 __arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER);
79 __arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER);
80 __arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER);
81 __arch_set_page_dat(kasan_early_shadow_pte, 1);
82
83 for_each_physmem_usable_range(i, &start, &end) {
84 kasan_populate((unsigned long)__identity_va(start),
85 (unsigned long)__identity_va(end),
86 POPULATE_KASAN_MAP_SHADOW);
87 if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) {
88 kasan_populate((unsigned long)__identity_va(memgap_start),
89 (unsigned long)__identity_va(start),
90 POPULATE_KASAN_ZERO_SHADOW);
91 }
92 memgap_start = end;
93 }
94 kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW);
95 kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW);
96 kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW);
97 if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
98 untracked_end = VMALLOC_START;
99 /* shallowly populate kasan shadow for vmalloc and modules */
100 kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW);
101 } else {
102 untracked_end = MODULES_VADDR;
103 }
104 /* populate kasan shadow for untracked memory */
105 kasan_populate((unsigned long)__identity_va(ident_map_size), untracked_end,
106 POPULATE_KASAN_ZERO_SHADOW);
107 kasan_populate(kernel_end, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW);
108}
109
110static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr,
111 unsigned long end, enum populate_mode mode)
112{
113 if (mode == POPULATE_KASAN_ZERO_SHADOW &&
114 IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) {
115 pgd_populate(&init_mm, pgd, kasan_early_shadow_p4d);
116 return true;
117 }
118 return false;
119}
120
121static bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr,
122 unsigned long end, enum populate_mode mode)
123{
124 if (mode == POPULATE_KASAN_ZERO_SHADOW &&
125 IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) {
126 p4d_populate(&init_mm, p4d, kasan_early_shadow_pud);
127 return true;
128 }
129 return false;
130}
131
132static bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr,
133 unsigned long end, enum populate_mode mode)
134{
135 if (mode == POPULATE_KASAN_ZERO_SHADOW &&
136 IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) {
137 pud_populate(&init_mm, pud, kasan_early_shadow_pmd);
138 return true;
139 }
140 return false;
141}
142
143static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr,
144 unsigned long end, enum populate_mode mode)
145{
146 if (mode == POPULATE_KASAN_ZERO_SHADOW &&
147 IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) {
148 pmd_populate(&init_mm, pmd, kasan_early_shadow_pte);
149 return true;
150 }
151 return false;
152}
153
154static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode)
155{
156 if (mode == POPULATE_KASAN_ZERO_SHADOW) {
157 set_pte(pte, pte_z);
158 return true;
159 }
160 return false;
161}
162#else
163
164static inline void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end)
165{
166}
167
168static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr,
169 unsigned long end, enum populate_mode mode)
170{
171 return false;
172}
173
174static inline bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr,
175 unsigned long end, enum populate_mode mode)
176{
177 return false;
178}
179
180static inline bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr,
181 unsigned long end, enum populate_mode mode)
182{
183 return false;
184}
185
186static inline bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr,
187 unsigned long end, enum populate_mode mode)
188{
189 return false;
190}
191
192static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode)
193{
194 return false;
195}
196
197#endif
198
199/*
200 * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though.
201 */
202static inline pte_t *__virt_to_kpte(unsigned long va)
203{
204 return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va);
205}
206
207static void *boot_crst_alloc(unsigned long val)
208{
209 unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER;
210 unsigned long *table;
211
212 table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size);
213 crst_table_init(table, val);
214 __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
215 return table;
216}
217
218static pte_t *boot_pte_alloc(void)
219{
220 static void *pte_leftover;
221 pte_t *pte;
222
223 /*
224 * handling pte_leftovers this way helps to avoid memory fragmentation
225 * during POPULATE_KASAN_MAP_SHADOW when EDAT is off
226 */
227 if (!pte_leftover) {
228 pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
229 pte = pte_leftover + _PAGE_TABLE_SIZE;
230 __arch_set_page_dat(pte, 1);
231 } else {
232 pte = pte_leftover;
233 pte_leftover = NULL;
234 }
235
236 memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
237 return pte;
238}
239
240static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size,
241 enum populate_mode mode)
242{
243 switch (mode) {
244 case POPULATE_NONE:
245 return INVALID_PHYS_ADDR;
246 case POPULATE_DIRECT:
247 return addr;
248 case POPULATE_LOWCORE:
249 return __lowcore_pa(addr);
250 case POPULATE_ABS_LOWCORE:
251 return __abs_lowcore_pa(addr);
252 case POPULATE_KERNEL:
253 return __kernel_pa(addr);
254 case POPULATE_IDENTITY:
255 return __identity_pa(addr);
256#ifdef CONFIG_KASAN
257 case POPULATE_KASAN_MAP_SHADOW:
258 addr = physmem_alloc_top_down(RR_VMEM, size, size);
259 memset((void *)addr, 0, size);
260 return addr;
261#endif
262 default:
263 return INVALID_PHYS_ADDR;
264 }
265}
266
267static bool large_page_mapping_allowed(enum populate_mode mode)
268{
269 switch (mode) {
270 case POPULATE_DIRECT:
271 case POPULATE_IDENTITY:
272 case POPULATE_KERNEL:
273#ifdef CONFIG_KASAN
274 case POPULATE_KASAN_MAP_SHADOW:
275#endif
276 return true;
277 default:
278 return false;
279 }
280}
281
282static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, unsigned long end,
283 enum populate_mode mode)
284{
285 unsigned long pa, size = end - addr;
286
287 if (!machine.has_edat2 || !large_page_mapping_allowed(mode) ||
288 !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE))
289 return INVALID_PHYS_ADDR;
290
291 pa = resolve_pa_may_alloc(addr, size, mode);
292 if (!IS_ALIGNED(pa, PUD_SIZE))
293 return INVALID_PHYS_ADDR;
294
295 return pa;
296}
297
298static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, unsigned long end,
299 enum populate_mode mode)
300{
301 unsigned long pa, size = end - addr;
302
303 if (!machine.has_edat1 || !large_page_mapping_allowed(mode) ||
304 !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE))
305 return INVALID_PHYS_ADDR;
306
307 pa = resolve_pa_may_alloc(addr, size, mode);
308 if (!IS_ALIGNED(pa, PMD_SIZE))
309 return INVALID_PHYS_ADDR;
310
311 return pa;
312}
313
314static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
315 enum populate_mode mode)
316{
317 unsigned long pages = 0;
318 pte_t *pte, entry;
319
320 pte = pte_offset_kernel(pmd, addr);
321 for (; addr < end; addr += PAGE_SIZE, pte++) {
322 if (pte_none(*pte)) {
323 if (kasan_pte_populate_zero_shadow(pte, mode))
324 continue;
325 entry = __pte(resolve_pa_may_alloc(addr, PAGE_SIZE, mode));
326 entry = set_pte_bit(entry, PAGE_KERNEL);
327 if (!machine.has_nx)
328 entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC));
329 set_pte(pte, entry);
330 pages++;
331 }
332 }
333 if (mode == POPULATE_IDENTITY)
334 update_page_count(PG_DIRECT_MAP_4K, pages);
335}
336
337static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end,
338 enum populate_mode mode)
339{
340 unsigned long pa, next, pages = 0;
341 pmd_t *pmd, entry;
342 pte_t *pte;
343
344 pmd = pmd_offset(pud, addr);
345 for (; addr < end; addr = next, pmd++) {
346 next = pmd_addr_end(addr, end);
347 if (pmd_none(*pmd)) {
348 if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode))
349 continue;
350 pa = try_get_large_pmd_pa(pmd, addr, next, mode);
351 if (pa != INVALID_PHYS_ADDR) {
352 entry = __pmd(pa);
353 entry = set_pmd_bit(entry, SEGMENT_KERNEL);
354 if (!machine.has_nx)
355 entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
356 set_pmd(pmd, entry);
357 pages++;
358 continue;
359 }
360 pte = boot_pte_alloc();
361 pmd_populate(&init_mm, pmd, pte);
362 } else if (pmd_leaf(*pmd)) {
363 continue;
364 }
365 pgtable_pte_populate(pmd, addr, next, mode);
366 }
367 if (mode == POPULATE_IDENTITY)
368 update_page_count(PG_DIRECT_MAP_1M, pages);
369}
370
371static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end,
372 enum populate_mode mode)
373{
374 unsigned long pa, next, pages = 0;
375 pud_t *pud, entry;
376 pmd_t *pmd;
377
378 pud = pud_offset(p4d, addr);
379 for (; addr < end; addr = next, pud++) {
380 next = pud_addr_end(addr, end);
381 if (pud_none(*pud)) {
382 if (kasan_pud_populate_zero_shadow(pud, addr, next, mode))
383 continue;
384 pa = try_get_large_pud_pa(pud, addr, next, mode);
385 if (pa != INVALID_PHYS_ADDR) {
386 entry = __pud(pa);
387 entry = set_pud_bit(entry, REGION3_KERNEL);
388 if (!machine.has_nx)
389 entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC));
390 set_pud(pud, entry);
391 pages++;
392 continue;
393 }
394 pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY);
395 pud_populate(&init_mm, pud, pmd);
396 } else if (pud_leaf(*pud)) {
397 continue;
398 }
399 pgtable_pmd_populate(pud, addr, next, mode);
400 }
401 if (mode == POPULATE_IDENTITY)
402 update_page_count(PG_DIRECT_MAP_2G, pages);
403}
404
405static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end,
406 enum populate_mode mode)
407{
408 unsigned long next;
409 p4d_t *p4d;
410 pud_t *pud;
411
412 p4d = p4d_offset(pgd, addr);
413 for (; addr < end; addr = next, p4d++) {
414 next = p4d_addr_end(addr, end);
415 if (p4d_none(*p4d)) {
416 if (kasan_p4d_populate_zero_shadow(p4d, addr, next, mode))
417 continue;
418 pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY);
419 p4d_populate(&init_mm, p4d, pud);
420 }
421 pgtable_pud_populate(p4d, addr, next, mode);
422 }
423}
424
425static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode)
426{
427 unsigned long next;
428 pgd_t *pgd;
429 p4d_t *p4d;
430
431 pgd = pgd_offset(&init_mm, addr);
432 for (; addr < end; addr = next, pgd++) {
433 next = pgd_addr_end(addr, end);
434 if (pgd_none(*pgd)) {
435 if (kasan_pgd_populate_zero_shadow(pgd, addr, next, mode))
436 continue;
437 p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY);
438 pgd_populate(&init_mm, pgd, p4d);
439 }
440#ifdef CONFIG_KASAN
441 if (mode == POPULATE_KASAN_SHALLOW)
442 continue;
443#endif
444 pgtable_p4d_populate(pgd, addr, next, mode);
445 }
446}
447
448void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit)
449{
450 unsigned long lowcore_address = 0;
451 unsigned long start, end;
452 unsigned long asce_type;
453 unsigned long asce_bits;
454 pgd_t *init_mm_pgd;
455 int i;
456
457 /*
458 * Mark whole memory as no-dat. This must be done before any
459 * page tables are allocated, or kernel image builtin pages
460 * are marked as dat tables.
461 */
462 for_each_physmem_online_range(i, &start, &end)
463 __arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT);
464
465 /*
466 * init_mm->pgd contains virtual address of swapper_pg_dir.
467 * It is unusable at this stage since DAT is yet off. Swap
468 * it for physical address of swapper_pg_dir and restore
469 * the virtual address after all page tables are created.
470 */
471 init_mm_pgd = init_mm.pgd;
472 init_mm.pgd = (pgd_t *)swapper_pg_dir;
473
474 if (asce_limit == _REGION1_SIZE) {
475 asce_type = _REGION2_ENTRY_EMPTY;
476 asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
477 } else {
478 asce_type = _REGION3_ENTRY_EMPTY;
479 asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
480 }
481 s390_invalid_asce.val = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
482
483 crst_table_init((unsigned long *)swapper_pg_dir, asce_type);
484 crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
485 __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER);
486 __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER);
487
488 if (relocate_lowcore)
489 lowcore_address = LOWCORE_ALT_ADDRESS;
490
491 /*
492 * To allow prefixing the lowcore must be mapped with 4KB pages.
493 * To prevent creation of a large page at address 0 first map
494 * the lowcore and create the identity mapping only afterwards.
495 */
496 pgtable_populate(lowcore_address,
497 lowcore_address + sizeof(struct lowcore),
498 POPULATE_LOWCORE);
499 for_each_physmem_usable_range(i, &start, &end) {
500 pgtable_populate((unsigned long)__identity_va(start),
501 (unsigned long)__identity_va(end),
502 POPULATE_IDENTITY);
503 }
504
505 /*
506 * [kernel_start..kernel_start + TEXT_OFFSET] region is never
507 * accessed as per the linker script:
508 *
509 * . = TEXT_OFFSET;
510 *
511 * Therefore, skip mapping TEXT_OFFSET bytes to prevent access to
512 * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region.
513 */
514 pgtable_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KERNEL);
515 pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT);
516 pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore),
517 POPULATE_ABS_LOWCORE);
518 pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE,
519 POPULATE_NONE);
520 memcpy_real_ptep = __identity_va(__virt_to_kpte(__memcpy_real_area));
521
522 kasan_populate_shadow(kernel_start, kernel_end);
523
524 get_lowcore()->kernel_asce.val = swapper_pg_dir | asce_bits;
525 get_lowcore()->user_asce = s390_invalid_asce;
526
527 local_ctl_load(1, &get_lowcore()->kernel_asce);
528 local_ctl_load(7, &get_lowcore()->user_asce);
529 local_ctl_load(13, &get_lowcore()->kernel_asce);
530
531 init_mm.context.asce = get_lowcore()->kernel_asce.val;
532 init_mm.pgd = init_mm_pgd;
533}