Loading...
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * KVM/MIPS MMU handling in the KVM module.
7 *
8 * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved.
9 * Authors: Sanjay Lal <sanjayl@kymasys.com>
10 */
11
12#include <linux/highmem.h>
13#include <linux/kvm_host.h>
14#include <linux/uaccess.h>
15#include <asm/mmu_context.h>
16#include <asm/pgalloc.h>
17
18/*
19 * KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels
20 * for which pages need to be cached.
21 */
22#if defined(__PAGETABLE_PMD_FOLDED)
23#define KVM_MMU_CACHE_MIN_PAGES 1
24#else
25#define KVM_MMU_CACHE_MIN_PAGES 2
26#endif
27
28static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
29 int min, int max)
30{
31 void *page;
32
33 BUG_ON(max > KVM_NR_MEM_OBJS);
34 if (cache->nobjs >= min)
35 return 0;
36 while (cache->nobjs < max) {
37 page = (void *)__get_free_page(GFP_KERNEL);
38 if (!page)
39 return -ENOMEM;
40 cache->objects[cache->nobjs++] = page;
41 }
42 return 0;
43}
44
45static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
46{
47 while (mc->nobjs)
48 free_page((unsigned long)mc->objects[--mc->nobjs]);
49}
50
51static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
52{
53 void *p;
54
55 BUG_ON(!mc || !mc->nobjs);
56 p = mc->objects[--mc->nobjs];
57 return p;
58}
59
60void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
61{
62 mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
63}
64
65/**
66 * kvm_pgd_init() - Initialise KVM GPA page directory.
67 * @page: Pointer to page directory (PGD) for KVM GPA.
68 *
69 * Initialise a KVM GPA page directory with pointers to the invalid table, i.e.
70 * representing no mappings. This is similar to pgd_init(), however it
71 * initialises all the page directory pointers, not just the ones corresponding
72 * to the userland address space (since it is for the guest physical address
73 * space rather than a virtual address space).
74 */
75static void kvm_pgd_init(void *page)
76{
77 unsigned long *p, *end;
78 unsigned long entry;
79
80#ifdef __PAGETABLE_PMD_FOLDED
81 entry = (unsigned long)invalid_pte_table;
82#else
83 entry = (unsigned long)invalid_pmd_table;
84#endif
85
86 p = (unsigned long *)page;
87 end = p + PTRS_PER_PGD;
88
89 do {
90 p[0] = entry;
91 p[1] = entry;
92 p[2] = entry;
93 p[3] = entry;
94 p[4] = entry;
95 p += 8;
96 p[-3] = entry;
97 p[-2] = entry;
98 p[-1] = entry;
99 } while (p != end);
100}
101
102/**
103 * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory.
104 *
105 * Allocate a blank KVM GPA page directory (PGD) for representing guest physical
106 * to host physical page mappings.
107 *
108 * Returns: Pointer to new KVM GPA page directory.
109 * NULL on allocation failure.
110 */
111pgd_t *kvm_pgd_alloc(void)
112{
113 pgd_t *ret;
114
115 ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD_ORDER);
116 if (ret)
117 kvm_pgd_init(ret);
118
119 return ret;
120}
121
122/**
123 * kvm_mips_walk_pgd() - Walk page table with optional allocation.
124 * @pgd: Page directory pointer.
125 * @addr: Address to index page table using.
126 * @cache: MMU page cache to allocate new page tables from, or NULL.
127 *
128 * Walk the page tables pointed to by @pgd to find the PTE corresponding to the
129 * address @addr. If page tables don't exist for @addr, they will be created
130 * from the MMU cache if @cache is not NULL.
131 *
132 * Returns: Pointer to pte_t corresponding to @addr.
133 * NULL if a page table doesn't exist for @addr and !@cache.
134 * NULL if a page table allocation failed.
135 */
136static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
137 unsigned long addr)
138{
139 pud_t *pud;
140 pmd_t *pmd;
141
142 pgd += pgd_index(addr);
143 if (pgd_none(*pgd)) {
144 /* Not used on MIPS yet */
145 BUG();
146 return NULL;
147 }
148 pud = pud_offset(pgd, addr);
149 if (pud_none(*pud)) {
150 pmd_t *new_pmd;
151
152 if (!cache)
153 return NULL;
154 new_pmd = mmu_memory_cache_alloc(cache);
155 pmd_init((unsigned long)new_pmd,
156 (unsigned long)invalid_pte_table);
157 pud_populate(NULL, pud, new_pmd);
158 }
159 pmd = pmd_offset(pud, addr);
160 if (pmd_none(*pmd)) {
161 pte_t *new_pte;
162
163 if (!cache)
164 return NULL;
165 new_pte = mmu_memory_cache_alloc(cache);
166 clear_page(new_pte);
167 pmd_populate_kernel(NULL, pmd, new_pte);
168 }
169 return pte_offset(pmd, addr);
170}
171
172/* Caller must hold kvm->mm_lock */
173static pte_t *kvm_mips_pte_for_gpa(struct kvm *kvm,
174 struct kvm_mmu_memory_cache *cache,
175 unsigned long addr)
176{
177 return kvm_mips_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr);
178}
179
180/*
181 * kvm_mips_flush_gpa_{pte,pmd,pud,pgd,pt}.
182 * Flush a range of guest physical address space from the VM's GPA page tables.
183 */
184
185static bool kvm_mips_flush_gpa_pte(pte_t *pte, unsigned long start_gpa,
186 unsigned long end_gpa)
187{
188 int i_min = __pte_offset(start_gpa);
189 int i_max = __pte_offset(end_gpa);
190 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1);
191 int i;
192
193 for (i = i_min; i <= i_max; ++i) {
194 if (!pte_present(pte[i]))
195 continue;
196
197 set_pte(pte + i, __pte(0));
198 }
199 return safe_to_remove;
200}
201
202static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa,
203 unsigned long end_gpa)
204{
205 pte_t *pte;
206 unsigned long end = ~0ul;
207 int i_min = __pmd_offset(start_gpa);
208 int i_max = __pmd_offset(end_gpa);
209 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
210 int i;
211
212 for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
213 if (!pmd_present(pmd[i]))
214 continue;
215
216 pte = pte_offset(pmd + i, 0);
217 if (i == i_max)
218 end = end_gpa;
219
220 if (kvm_mips_flush_gpa_pte(pte, start_gpa, end)) {
221 pmd_clear(pmd + i);
222 pte_free_kernel(NULL, pte);
223 } else {
224 safe_to_remove = false;
225 }
226 }
227 return safe_to_remove;
228}
229
230static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa,
231 unsigned long end_gpa)
232{
233 pmd_t *pmd;
234 unsigned long end = ~0ul;
235 int i_min = __pud_offset(start_gpa);
236 int i_max = __pud_offset(end_gpa);
237 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
238 int i;
239
240 for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
241 if (!pud_present(pud[i]))
242 continue;
243
244 pmd = pmd_offset(pud + i, 0);
245 if (i == i_max)
246 end = end_gpa;
247
248 if (kvm_mips_flush_gpa_pmd(pmd, start_gpa, end)) {
249 pud_clear(pud + i);
250 pmd_free(NULL, pmd);
251 } else {
252 safe_to_remove = false;
253 }
254 }
255 return safe_to_remove;
256}
257
258static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa,
259 unsigned long end_gpa)
260{
261 pud_t *pud;
262 unsigned long end = ~0ul;
263 int i_min = pgd_index(start_gpa);
264 int i_max = pgd_index(end_gpa);
265 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1);
266 int i;
267
268 for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
269 if (!pgd_present(pgd[i]))
270 continue;
271
272 pud = pud_offset(pgd + i, 0);
273 if (i == i_max)
274 end = end_gpa;
275
276 if (kvm_mips_flush_gpa_pud(pud, start_gpa, end)) {
277 pgd_clear(pgd + i);
278 pud_free(NULL, pud);
279 } else {
280 safe_to_remove = false;
281 }
282 }
283 return safe_to_remove;
284}
285
286/**
287 * kvm_mips_flush_gpa_pt() - Flush a range of guest physical addresses.
288 * @kvm: KVM pointer.
289 * @start_gfn: Guest frame number of first page in GPA range to flush.
290 * @end_gfn: Guest frame number of last page in GPA range to flush.
291 *
292 * Flushes a range of GPA mappings from the GPA page tables.
293 *
294 * The caller must hold the @kvm->mmu_lock spinlock.
295 *
296 * Returns: Whether its safe to remove the top level page directory because
297 * all lower levels have been removed.
298 */
299bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
300{
301 return kvm_mips_flush_gpa_pgd(kvm->arch.gpa_mm.pgd,
302 start_gfn << PAGE_SHIFT,
303 end_gfn << PAGE_SHIFT);
304}
305
306#define BUILD_PTE_RANGE_OP(name, op) \
307static int kvm_mips_##name##_pte(pte_t *pte, unsigned long start, \
308 unsigned long end) \
309{ \
310 int ret = 0; \
311 int i_min = __pte_offset(start); \
312 int i_max = __pte_offset(end); \
313 int i; \
314 pte_t old, new; \
315 \
316 for (i = i_min; i <= i_max; ++i) { \
317 if (!pte_present(pte[i])) \
318 continue; \
319 \
320 old = pte[i]; \
321 new = op(old); \
322 if (pte_val(new) == pte_val(old)) \
323 continue; \
324 set_pte(pte + i, new); \
325 ret = 1; \
326 } \
327 return ret; \
328} \
329 \
330/* returns true if anything was done */ \
331static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start, \
332 unsigned long end) \
333{ \
334 int ret = 0; \
335 pte_t *pte; \
336 unsigned long cur_end = ~0ul; \
337 int i_min = __pmd_offset(start); \
338 int i_max = __pmd_offset(end); \
339 int i; \
340 \
341 for (i = i_min; i <= i_max; ++i, start = 0) { \
342 if (!pmd_present(pmd[i])) \
343 continue; \
344 \
345 pte = pte_offset(pmd + i, 0); \
346 if (i == i_max) \
347 cur_end = end; \
348 \
349 ret |= kvm_mips_##name##_pte(pte, start, cur_end); \
350 } \
351 return ret; \
352} \
353 \
354static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start, \
355 unsigned long end) \
356{ \
357 int ret = 0; \
358 pmd_t *pmd; \
359 unsigned long cur_end = ~0ul; \
360 int i_min = __pud_offset(start); \
361 int i_max = __pud_offset(end); \
362 int i; \
363 \
364 for (i = i_min; i <= i_max; ++i, start = 0) { \
365 if (!pud_present(pud[i])) \
366 continue; \
367 \
368 pmd = pmd_offset(pud + i, 0); \
369 if (i == i_max) \
370 cur_end = end; \
371 \
372 ret |= kvm_mips_##name##_pmd(pmd, start, cur_end); \
373 } \
374 return ret; \
375} \
376 \
377static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start, \
378 unsigned long end) \
379{ \
380 int ret = 0; \
381 pud_t *pud; \
382 unsigned long cur_end = ~0ul; \
383 int i_min = pgd_index(start); \
384 int i_max = pgd_index(end); \
385 int i; \
386 \
387 for (i = i_min; i <= i_max; ++i, start = 0) { \
388 if (!pgd_present(pgd[i])) \
389 continue; \
390 \
391 pud = pud_offset(pgd + i, 0); \
392 if (i == i_max) \
393 cur_end = end; \
394 \
395 ret |= kvm_mips_##name##_pud(pud, start, cur_end); \
396 } \
397 return ret; \
398}
399
400/*
401 * kvm_mips_mkclean_gpa_pt.
402 * Mark a range of guest physical address space clean (writes fault) in the VM's
403 * GPA page table to allow dirty page tracking.
404 */
405
406BUILD_PTE_RANGE_OP(mkclean, pte_mkclean)
407
408/**
409 * kvm_mips_mkclean_gpa_pt() - Make a range of guest physical addresses clean.
410 * @kvm: KVM pointer.
411 * @start_gfn: Guest frame number of first page in GPA range to flush.
412 * @end_gfn: Guest frame number of last page in GPA range to flush.
413 *
414 * Make a range of GPA mappings clean so that guest writes will fault and
415 * trigger dirty page logging.
416 *
417 * The caller must hold the @kvm->mmu_lock spinlock.
418 *
419 * Returns: Whether any GPA mappings were modified, which would require
420 * derived mappings (GVA page tables & TLB enties) to be
421 * invalidated.
422 */
423int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
424{
425 return kvm_mips_mkclean_pgd(kvm->arch.gpa_mm.pgd,
426 start_gfn << PAGE_SHIFT,
427 end_gfn << PAGE_SHIFT);
428}
429
430/**
431 * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages
432 * @kvm: The KVM pointer
433 * @slot: The memory slot associated with mask
434 * @gfn_offset: The gfn offset in memory slot
435 * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory
436 * slot to be write protected
437 *
438 * Walks bits set in mask write protects the associated pte's. Caller must
439 * acquire @kvm->mmu_lock.
440 */
441void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
442 struct kvm_memory_slot *slot,
443 gfn_t gfn_offset, unsigned long mask)
444{
445 gfn_t base_gfn = slot->base_gfn + gfn_offset;
446 gfn_t start = base_gfn + __ffs(mask);
447 gfn_t end = base_gfn + __fls(mask);
448
449 kvm_mips_mkclean_gpa_pt(kvm, start, end);
450}
451
452/*
453 * kvm_mips_mkold_gpa_pt.
454 * Mark a range of guest physical address space old (all accesses fault) in the
455 * VM's GPA page table to allow detection of commonly used pages.
456 */
457
458BUILD_PTE_RANGE_OP(mkold, pte_mkold)
459
460static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn,
461 gfn_t end_gfn)
462{
463 return kvm_mips_mkold_pgd(kvm->arch.gpa_mm.pgd,
464 start_gfn << PAGE_SHIFT,
465 end_gfn << PAGE_SHIFT);
466}
467
468static int handle_hva_to_gpa(struct kvm *kvm,
469 unsigned long start,
470 unsigned long end,
471 int (*handler)(struct kvm *kvm, gfn_t gfn,
472 gpa_t gfn_end,
473 struct kvm_memory_slot *memslot,
474 void *data),
475 void *data)
476{
477 struct kvm_memslots *slots;
478 struct kvm_memory_slot *memslot;
479 int ret = 0;
480
481 slots = kvm_memslots(kvm);
482
483 /* we only care about the pages that the guest sees */
484 kvm_for_each_memslot(memslot, slots) {
485 unsigned long hva_start, hva_end;
486 gfn_t gfn, gfn_end;
487
488 hva_start = max(start, memslot->userspace_addr);
489 hva_end = min(end, memslot->userspace_addr +
490 (memslot->npages << PAGE_SHIFT));
491 if (hva_start >= hva_end)
492 continue;
493
494 /*
495 * {gfn(page) | page intersects with [hva_start, hva_end)} =
496 * {gfn_start, gfn_start+1, ..., gfn_end-1}.
497 */
498 gfn = hva_to_gfn_memslot(hva_start, memslot);
499 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
500
501 ret |= handler(kvm, gfn, gfn_end, memslot, data);
502 }
503
504 return ret;
505}
506
507
508static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
509 struct kvm_memory_slot *memslot, void *data)
510{
511 kvm_mips_flush_gpa_pt(kvm, gfn, gfn_end);
512 return 1;
513}
514
515int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
516{
517 handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
518
519 kvm_mips_callbacks->flush_shadow_all(kvm);
520 return 0;
521}
522
523static int kvm_set_spte_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
524 struct kvm_memory_slot *memslot, void *data)
525{
526 gpa_t gpa = gfn << PAGE_SHIFT;
527 pte_t hva_pte = *(pte_t *)data;
528 pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
529 pte_t old_pte;
530
531 if (!gpa_pte)
532 return 0;
533
534 /* Mapping may need adjusting depending on memslot flags */
535 old_pte = *gpa_pte;
536 if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte))
537 hva_pte = pte_mkclean(hva_pte);
538 else if (memslot->flags & KVM_MEM_READONLY)
539 hva_pte = pte_wrprotect(hva_pte);
540
541 set_pte(gpa_pte, hva_pte);
542
543 /* Replacing an absent or old page doesn't need flushes */
544 if (!pte_present(old_pte) || !pte_young(old_pte))
545 return 0;
546
547 /* Pages swapped, aged, moved, or cleaned require flushes */
548 return !pte_present(hva_pte) ||
549 !pte_young(hva_pte) ||
550 pte_pfn(old_pte) != pte_pfn(hva_pte) ||
551 (pte_dirty(old_pte) && !pte_dirty(hva_pte));
552}
553
554int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
555{
556 unsigned long end = hva + PAGE_SIZE;
557 int ret;
558
559 ret = handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &pte);
560 if (ret)
561 kvm_mips_callbacks->flush_shadow_all(kvm);
562 return 0;
563}
564
565static int kvm_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
566 struct kvm_memory_slot *memslot, void *data)
567{
568 return kvm_mips_mkold_gpa_pt(kvm, gfn, gfn_end);
569}
570
571static int kvm_test_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
572 struct kvm_memory_slot *memslot, void *data)
573{
574 gpa_t gpa = gfn << PAGE_SHIFT;
575 pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
576
577 if (!gpa_pte)
578 return 0;
579 return pte_young(*gpa_pte);
580}
581
582int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
583{
584 return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
585}
586
587int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
588{
589 return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
590}
591
592/**
593 * _kvm_mips_map_page_fast() - Fast path GPA fault handler.
594 * @vcpu: VCPU pointer.
595 * @gpa: Guest physical address of fault.
596 * @write_fault: Whether the fault was due to a write.
597 * @out_entry: New PTE for @gpa (written on success unless NULL).
598 * @out_buddy: New PTE for @gpa's buddy (written on success unless
599 * NULL).
600 *
601 * Perform fast path GPA fault handling, doing all that can be done without
602 * calling into KVM. This handles marking old pages young (for idle page
603 * tracking), and dirtying of clean pages (for dirty page logging).
604 *
605 * Returns: 0 on success, in which case we can update derived mappings and
606 * resume guest execution.
607 * -EFAULT on failure due to absent GPA mapping or write to
608 * read-only page, in which case KVM must be consulted.
609 */
610static int _kvm_mips_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa,
611 bool write_fault,
612 pte_t *out_entry, pte_t *out_buddy)
613{
614 struct kvm *kvm = vcpu->kvm;
615 gfn_t gfn = gpa >> PAGE_SHIFT;
616 pte_t *ptep;
617 kvm_pfn_t pfn = 0; /* silence bogus GCC warning */
618 bool pfn_valid = false;
619 int ret = 0;
620
621 spin_lock(&kvm->mmu_lock);
622
623 /* Fast path - just check GPA page table for an existing entry */
624 ptep = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
625 if (!ptep || !pte_present(*ptep)) {
626 ret = -EFAULT;
627 goto out;
628 }
629
630 /* Track access to pages marked old */
631 if (!pte_young(*ptep)) {
632 set_pte(ptep, pte_mkyoung(*ptep));
633 pfn = pte_pfn(*ptep);
634 pfn_valid = true;
635 /* call kvm_set_pfn_accessed() after unlock */
636 }
637 if (write_fault && !pte_dirty(*ptep)) {
638 if (!pte_write(*ptep)) {
639 ret = -EFAULT;
640 goto out;
641 }
642
643 /* Track dirtying of writeable pages */
644 set_pte(ptep, pte_mkdirty(*ptep));
645 pfn = pte_pfn(*ptep);
646 mark_page_dirty(kvm, gfn);
647 kvm_set_pfn_dirty(pfn);
648 }
649
650 if (out_entry)
651 *out_entry = *ptep;
652 if (out_buddy)
653 *out_buddy = *ptep_buddy(ptep);
654
655out:
656 spin_unlock(&kvm->mmu_lock);
657 if (pfn_valid)
658 kvm_set_pfn_accessed(pfn);
659 return ret;
660}
661
662/**
663 * kvm_mips_map_page() - Map a guest physical page.
664 * @vcpu: VCPU pointer.
665 * @gpa: Guest physical address of fault.
666 * @write_fault: Whether the fault was due to a write.
667 * @out_entry: New PTE for @gpa (written on success unless NULL).
668 * @out_buddy: New PTE for @gpa's buddy (written on success unless
669 * NULL).
670 *
671 * Handle GPA faults by creating a new GPA mapping (or updating an existing
672 * one).
673 *
674 * This takes care of marking pages young or dirty (idle/dirty page tracking),
675 * asking KVM for the corresponding PFN, and creating a mapping in the GPA page
676 * tables. Derived mappings (GVA page tables and TLBs) must be handled by the
677 * caller.
678 *
679 * Returns: 0 on success, in which case the caller may use the @out_entry
680 * and @out_buddy PTEs to update derived mappings and resume guest
681 * execution.
682 * -EFAULT if there is no memory region at @gpa or a write was
683 * attempted to a read-only memory region. This is usually handled
684 * as an MMIO access.
685 */
686static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa,
687 bool write_fault,
688 pte_t *out_entry, pte_t *out_buddy)
689{
690 struct kvm *kvm = vcpu->kvm;
691 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
692 gfn_t gfn = gpa >> PAGE_SHIFT;
693 int srcu_idx, err;
694 kvm_pfn_t pfn;
695 pte_t *ptep, entry, old_pte;
696 bool writeable;
697 unsigned long prot_bits;
698 unsigned long mmu_seq;
699
700 /* Try the fast path to handle old / clean pages */
701 srcu_idx = srcu_read_lock(&kvm->srcu);
702 err = _kvm_mips_map_page_fast(vcpu, gpa, write_fault, out_entry,
703 out_buddy);
704 if (!err)
705 goto out;
706
707 /* We need a minimum of cached pages ready for page table creation */
708 err = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
709 KVM_NR_MEM_OBJS);
710 if (err)
711 goto out;
712
713retry:
714 /*
715 * Used to check for invalidations in progress, of the pfn that is
716 * returned by pfn_to_pfn_prot below.
717 */
718 mmu_seq = kvm->mmu_notifier_seq;
719 /*
720 * Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in
721 * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't
722 * risk the page we get a reference to getting unmapped before we have a
723 * chance to grab the mmu_lock without mmu_notifier_retry() noticing.
724 *
725 * This smp_rmb() pairs with the effective smp_wmb() of the combination
726 * of the pte_unmap_unlock() after the PTE is zapped, and the
727 * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before
728 * mmu_notifier_seq is incremented.
729 */
730 smp_rmb();
731
732 /* Slow path - ask KVM core whether we can access this GPA */
733 pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writeable);
734 if (is_error_noslot_pfn(pfn)) {
735 err = -EFAULT;
736 goto out;
737 }
738
739 spin_lock(&kvm->mmu_lock);
740 /* Check if an invalidation has taken place since we got pfn */
741 if (mmu_notifier_retry(kvm, mmu_seq)) {
742 /*
743 * This can happen when mappings are changed asynchronously, but
744 * also synchronously if a COW is triggered by
745 * gfn_to_pfn_prot().
746 */
747 spin_unlock(&kvm->mmu_lock);
748 kvm_release_pfn_clean(pfn);
749 goto retry;
750 }
751
752 /* Ensure page tables are allocated */
753 ptep = kvm_mips_pte_for_gpa(kvm, memcache, gpa);
754
755 /* Set up the PTE */
756 prot_bits = _PAGE_PRESENT | __READABLE | _page_cachable_default;
757 if (writeable) {
758 prot_bits |= _PAGE_WRITE;
759 if (write_fault) {
760 prot_bits |= __WRITEABLE;
761 mark_page_dirty(kvm, gfn);
762 kvm_set_pfn_dirty(pfn);
763 }
764 }
765 entry = pfn_pte(pfn, __pgprot(prot_bits));
766
767 /* Write the PTE */
768 old_pte = *ptep;
769 set_pte(ptep, entry);
770
771 err = 0;
772 if (out_entry)
773 *out_entry = *ptep;
774 if (out_buddy)
775 *out_buddy = *ptep_buddy(ptep);
776
777 spin_unlock(&kvm->mmu_lock);
778 kvm_release_pfn_clean(pfn);
779 kvm_set_pfn_accessed(pfn);
780out:
781 srcu_read_unlock(&kvm->srcu, srcu_idx);
782 return err;
783}
784
785static pte_t *kvm_trap_emul_pte_for_gva(struct kvm_vcpu *vcpu,
786 unsigned long addr)
787{
788 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
789 pgd_t *pgdp;
790 int ret;
791
792 /* We need a minimum of cached pages ready for page table creation */
793 ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
794 KVM_NR_MEM_OBJS);
795 if (ret)
796 return NULL;
797
798 if (KVM_GUEST_KERNEL_MODE(vcpu))
799 pgdp = vcpu->arch.guest_kernel_mm.pgd;
800 else
801 pgdp = vcpu->arch.guest_user_mm.pgd;
802
803 return kvm_mips_walk_pgd(pgdp, memcache, addr);
804}
805
806void kvm_trap_emul_invalidate_gva(struct kvm_vcpu *vcpu, unsigned long addr,
807 bool user)
808{
809 pgd_t *pgdp;
810 pte_t *ptep;
811
812 addr &= PAGE_MASK << 1;
813
814 pgdp = vcpu->arch.guest_kernel_mm.pgd;
815 ptep = kvm_mips_walk_pgd(pgdp, NULL, addr);
816 if (ptep) {
817 ptep[0] = pfn_pte(0, __pgprot(0));
818 ptep[1] = pfn_pte(0, __pgprot(0));
819 }
820
821 if (user) {
822 pgdp = vcpu->arch.guest_user_mm.pgd;
823 ptep = kvm_mips_walk_pgd(pgdp, NULL, addr);
824 if (ptep) {
825 ptep[0] = pfn_pte(0, __pgprot(0));
826 ptep[1] = pfn_pte(0, __pgprot(0));
827 }
828 }
829}
830
831/*
832 * kvm_mips_flush_gva_{pte,pmd,pud,pgd,pt}.
833 * Flush a range of guest physical address space from the VM's GPA page tables.
834 */
835
836static bool kvm_mips_flush_gva_pte(pte_t *pte, unsigned long start_gva,
837 unsigned long end_gva)
838{
839 int i_min = __pte_offset(start_gva);
840 int i_max = __pte_offset(end_gva);
841 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1);
842 int i;
843
844 /*
845 * There's no freeing to do, so there's no point clearing individual
846 * entries unless only part of the last level page table needs flushing.
847 */
848 if (safe_to_remove)
849 return true;
850
851 for (i = i_min; i <= i_max; ++i) {
852 if (!pte_present(pte[i]))
853 continue;
854
855 set_pte(pte + i, __pte(0));
856 }
857 return false;
858}
859
860static bool kvm_mips_flush_gva_pmd(pmd_t *pmd, unsigned long start_gva,
861 unsigned long end_gva)
862{
863 pte_t *pte;
864 unsigned long end = ~0ul;
865 int i_min = __pmd_offset(start_gva);
866 int i_max = __pmd_offset(end_gva);
867 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
868 int i;
869
870 for (i = i_min; i <= i_max; ++i, start_gva = 0) {
871 if (!pmd_present(pmd[i]))
872 continue;
873
874 pte = pte_offset(pmd + i, 0);
875 if (i == i_max)
876 end = end_gva;
877
878 if (kvm_mips_flush_gva_pte(pte, start_gva, end)) {
879 pmd_clear(pmd + i);
880 pte_free_kernel(NULL, pte);
881 } else {
882 safe_to_remove = false;
883 }
884 }
885 return safe_to_remove;
886}
887
888static bool kvm_mips_flush_gva_pud(pud_t *pud, unsigned long start_gva,
889 unsigned long end_gva)
890{
891 pmd_t *pmd;
892 unsigned long end = ~0ul;
893 int i_min = __pud_offset(start_gva);
894 int i_max = __pud_offset(end_gva);
895 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
896 int i;
897
898 for (i = i_min; i <= i_max; ++i, start_gva = 0) {
899 if (!pud_present(pud[i]))
900 continue;
901
902 pmd = pmd_offset(pud + i, 0);
903 if (i == i_max)
904 end = end_gva;
905
906 if (kvm_mips_flush_gva_pmd(pmd, start_gva, end)) {
907 pud_clear(pud + i);
908 pmd_free(NULL, pmd);
909 } else {
910 safe_to_remove = false;
911 }
912 }
913 return safe_to_remove;
914}
915
916static bool kvm_mips_flush_gva_pgd(pgd_t *pgd, unsigned long start_gva,
917 unsigned long end_gva)
918{
919 pud_t *pud;
920 unsigned long end = ~0ul;
921 int i_min = pgd_index(start_gva);
922 int i_max = pgd_index(end_gva);
923 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1);
924 int i;
925
926 for (i = i_min; i <= i_max; ++i, start_gva = 0) {
927 if (!pgd_present(pgd[i]))
928 continue;
929
930 pud = pud_offset(pgd + i, 0);
931 if (i == i_max)
932 end = end_gva;
933
934 if (kvm_mips_flush_gva_pud(pud, start_gva, end)) {
935 pgd_clear(pgd + i);
936 pud_free(NULL, pud);
937 } else {
938 safe_to_remove = false;
939 }
940 }
941 return safe_to_remove;
942}
943
944void kvm_mips_flush_gva_pt(pgd_t *pgd, enum kvm_mips_flush flags)
945{
946 if (flags & KMF_GPA) {
947 /* all of guest virtual address space could be affected */
948 if (flags & KMF_KERN)
949 /* useg, kseg0, seg2/3 */
950 kvm_mips_flush_gva_pgd(pgd, 0, 0x7fffffff);
951 else
952 /* useg */
953 kvm_mips_flush_gva_pgd(pgd, 0, 0x3fffffff);
954 } else {
955 /* useg */
956 kvm_mips_flush_gva_pgd(pgd, 0, 0x3fffffff);
957
958 /* kseg2/3 */
959 if (flags & KMF_KERN)
960 kvm_mips_flush_gva_pgd(pgd, 0x60000000, 0x7fffffff);
961 }
962}
963
964static pte_t kvm_mips_gpa_pte_to_gva_unmapped(pte_t pte)
965{
966 /*
967 * Don't leak writeable but clean entries from GPA page tables. We don't
968 * want the normal Linux tlbmod handler to handle dirtying when KVM
969 * accesses guest memory.
970 */
971 if (!pte_dirty(pte))
972 pte = pte_wrprotect(pte);
973
974 return pte;
975}
976
977static pte_t kvm_mips_gpa_pte_to_gva_mapped(pte_t pte, long entrylo)
978{
979 /* Guest EntryLo overrides host EntryLo */
980 if (!(entrylo & ENTRYLO_D))
981 pte = pte_mkclean(pte);
982
983 return kvm_mips_gpa_pte_to_gva_unmapped(pte);
984}
985
986#ifdef CONFIG_KVM_MIPS_VZ
987int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr,
988 struct kvm_vcpu *vcpu,
989 bool write_fault)
990{
991 int ret;
992
993 ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL);
994 if (ret)
995 return ret;
996
997 /* Invalidate this entry in the TLB */
998 return kvm_vz_host_tlb_inv(vcpu, badvaddr);
999}
1000#endif
1001
1002/* XXXKYMA: Must be called with interrupts disabled */
1003int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
1004 struct kvm_vcpu *vcpu,
1005 bool write_fault)
1006{
1007 unsigned long gpa;
1008 pte_t pte_gpa[2], *ptep_gva;
1009 int idx;
1010
1011 if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) {
1012 kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr);
1013 kvm_mips_dump_host_tlbs();
1014 return -1;
1015 }
1016
1017 /* Get the GPA page table entry */
1018 gpa = KVM_GUEST_CPHYSADDR(badvaddr);
1019 idx = (badvaddr >> PAGE_SHIFT) & 1;
1020 if (kvm_mips_map_page(vcpu, gpa, write_fault, &pte_gpa[idx],
1021 &pte_gpa[!idx]) < 0)
1022 return -1;
1023
1024 /* Get the GVA page table entry */
1025 ptep_gva = kvm_trap_emul_pte_for_gva(vcpu, badvaddr & ~PAGE_SIZE);
1026 if (!ptep_gva) {
1027 kvm_err("No ptep for gva %lx\n", badvaddr);
1028 return -1;
1029 }
1030
1031 /* Copy a pair of entries from GPA page table to GVA page table */
1032 ptep_gva[0] = kvm_mips_gpa_pte_to_gva_unmapped(pte_gpa[0]);
1033 ptep_gva[1] = kvm_mips_gpa_pte_to_gva_unmapped(pte_gpa[1]);
1034
1035 /* Invalidate this entry in the TLB, guest kernel ASID only */
1036 kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true);
1037 return 0;
1038}
1039
1040int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
1041 struct kvm_mips_tlb *tlb,
1042 unsigned long gva,
1043 bool write_fault)
1044{
1045 struct kvm *kvm = vcpu->kvm;
1046 long tlb_lo[2];
1047 pte_t pte_gpa[2], *ptep_buddy, *ptep_gva;
1048 unsigned int idx = TLB_LO_IDX(*tlb, gva);
1049 bool kernel = KVM_GUEST_KERNEL_MODE(vcpu);
1050
1051 tlb_lo[0] = tlb->tlb_lo[0];
1052 tlb_lo[1] = tlb->tlb_lo[1];
1053
1054 /*
1055 * The commpage address must not be mapped to anything else if the guest
1056 * TLB contains entries nearby, or commpage accesses will break.
1057 */
1058 if (!((gva ^ KVM_GUEST_COMMPAGE_ADDR) & VPN2_MASK & (PAGE_MASK << 1)))
1059 tlb_lo[TLB_LO_IDX(*tlb, KVM_GUEST_COMMPAGE_ADDR)] = 0;
1060
1061 /* Get the GPA page table entry */
1062 if (kvm_mips_map_page(vcpu, mips3_tlbpfn_to_paddr(tlb_lo[idx]),
1063 write_fault, &pte_gpa[idx], NULL) < 0)
1064 return -1;
1065
1066 /* And its GVA buddy's GPA page table entry if it also exists */
1067 pte_gpa[!idx] = pfn_pte(0, __pgprot(0));
1068 if (tlb_lo[!idx] & ENTRYLO_V) {
1069 spin_lock(&kvm->mmu_lock);
1070 ptep_buddy = kvm_mips_pte_for_gpa(kvm, NULL,
1071 mips3_tlbpfn_to_paddr(tlb_lo[!idx]));
1072 if (ptep_buddy)
1073 pte_gpa[!idx] = *ptep_buddy;
1074 spin_unlock(&kvm->mmu_lock);
1075 }
1076
1077 /* Get the GVA page table entry pair */
1078 ptep_gva = kvm_trap_emul_pte_for_gva(vcpu, gva & ~PAGE_SIZE);
1079 if (!ptep_gva) {
1080 kvm_err("No ptep for gva %lx\n", gva);
1081 return -1;
1082 }
1083
1084 /* Copy a pair of entries from GPA page table to GVA page table */
1085 ptep_gva[0] = kvm_mips_gpa_pte_to_gva_mapped(pte_gpa[0], tlb_lo[0]);
1086 ptep_gva[1] = kvm_mips_gpa_pte_to_gva_mapped(pte_gpa[1], tlb_lo[1]);
1087
1088 /* Invalidate this entry in the TLB, current guest mode ASID only */
1089 kvm_mips_host_tlb_inv(vcpu, gva, !kernel, kernel);
1090
1091 kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
1092 tlb->tlb_lo[0], tlb->tlb_lo[1]);
1093
1094 return 0;
1095}
1096
1097int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
1098 struct kvm_vcpu *vcpu)
1099{
1100 kvm_pfn_t pfn;
1101 pte_t *ptep;
1102
1103 ptep = kvm_trap_emul_pte_for_gva(vcpu, badvaddr);
1104 if (!ptep) {
1105 kvm_err("No ptep for commpage %lx\n", badvaddr);
1106 return -1;
1107 }
1108
1109 pfn = PFN_DOWN(virt_to_phys(vcpu->arch.kseg0_commpage));
1110 /* Also set valid and dirty, so refill handler doesn't have to */
1111 *ptep = pte_mkyoung(pte_mkdirty(pfn_pte(pfn, PAGE_SHARED)));
1112
1113 /* Invalidate this entry in the TLB, guest kernel ASID only */
1114 kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true);
1115 return 0;
1116}
1117
1118/**
1119 * kvm_mips_migrate_count() - Migrate timer.
1120 * @vcpu: Virtual CPU.
1121 *
1122 * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it
1123 * if it was running prior to being cancelled.
1124 *
1125 * Must be called when the VCPU is migrated to a different CPU to ensure that
1126 * timer expiry during guest execution interrupts the guest and causes the
1127 * interrupt to be delivered in a timely manner.
1128 */
1129static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu)
1130{
1131 if (hrtimer_cancel(&vcpu->arch.comparecount_timer))
1132 hrtimer_restart(&vcpu->arch.comparecount_timer);
1133}
1134
1135/* Restore ASID once we are scheduled back after preemption */
1136void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1137{
1138 unsigned long flags;
1139
1140 kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu);
1141
1142 local_irq_save(flags);
1143
1144 vcpu->cpu = cpu;
1145 if (vcpu->arch.last_sched_cpu != cpu) {
1146 kvm_debug("[%d->%d]KVM VCPU[%d] switch\n",
1147 vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
1148 /*
1149 * Migrate the timer interrupt to the current CPU so that it
1150 * always interrupts the guest and synchronously triggers a
1151 * guest timer interrupt.
1152 */
1153 kvm_mips_migrate_count(vcpu);
1154 }
1155
1156 /* restore guest state to registers */
1157 kvm_mips_callbacks->vcpu_load(vcpu, cpu);
1158
1159 local_irq_restore(flags);
1160}
1161
1162/* ASID can change if another task is scheduled during preemption */
1163void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1164{
1165 unsigned long flags;
1166 int cpu;
1167
1168 local_irq_save(flags);
1169
1170 cpu = smp_processor_id();
1171 vcpu->arch.last_sched_cpu = cpu;
1172 vcpu->cpu = -1;
1173
1174 /* save guest state in registers */
1175 kvm_mips_callbacks->vcpu_put(vcpu, cpu);
1176
1177 local_irq_restore(flags);
1178}
1179
1180/**
1181 * kvm_trap_emul_gva_fault() - Safely attempt to handle a GVA access fault.
1182 * @vcpu: Virtual CPU.
1183 * @gva: Guest virtual address to be accessed.
1184 * @write: True if write attempted (must be dirtied and made writable).
1185 *
1186 * Safely attempt to handle a GVA fault, mapping GVA pages if necessary, and
1187 * dirtying the page if @write so that guest instructions can be modified.
1188 *
1189 * Returns: KVM_MIPS_MAPPED on success.
1190 * KVM_MIPS_GVA if bad guest virtual address.
1191 * KVM_MIPS_GPA if bad guest physical address.
1192 * KVM_MIPS_TLB if guest TLB not present.
1193 * KVM_MIPS_TLBINV if guest TLB present but not valid.
1194 * KVM_MIPS_TLBMOD if guest TLB read only.
1195 */
1196enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu,
1197 unsigned long gva,
1198 bool write)
1199{
1200 struct mips_coproc *cop0 = vcpu->arch.cop0;
1201 struct kvm_mips_tlb *tlb;
1202 int index;
1203
1204 if (KVM_GUEST_KSEGX(gva) == KVM_GUEST_KSEG0) {
1205 if (kvm_mips_handle_kseg0_tlb_fault(gva, vcpu, write) < 0)
1206 return KVM_MIPS_GPA;
1207 } else if ((KVM_GUEST_KSEGX(gva) < KVM_GUEST_KSEG0) ||
1208 KVM_GUEST_KSEGX(gva) == KVM_GUEST_KSEG23) {
1209 /* Address should be in the guest TLB */
1210 index = kvm_mips_guest_tlb_lookup(vcpu, (gva & VPN2_MASK) |
1211 (kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID));
1212 if (index < 0)
1213 return KVM_MIPS_TLB;
1214 tlb = &vcpu->arch.guest_tlb[index];
1215
1216 /* Entry should be valid, and dirty for writes */
1217 if (!TLB_IS_VALID(*tlb, gva))
1218 return KVM_MIPS_TLBINV;
1219 if (write && !TLB_IS_DIRTY(*tlb, gva))
1220 return KVM_MIPS_TLBMOD;
1221
1222 if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, gva, write))
1223 return KVM_MIPS_GPA;
1224 } else {
1225 return KVM_MIPS_GVA;
1226 }
1227
1228 return KVM_MIPS_MAPPED;
1229}
1230
1231int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out)
1232{
1233 int err;
1234
1235 if (WARN(IS_ENABLED(CONFIG_KVM_MIPS_VZ),
1236 "Expect BadInstr/BadInstrP registers to be used with VZ\n"))
1237 return -EINVAL;
1238
1239retry:
1240 kvm_trap_emul_gva_lockless_begin(vcpu);
1241 err = get_user(*out, opc);
1242 kvm_trap_emul_gva_lockless_end(vcpu);
1243
1244 if (unlikely(err)) {
1245 /*
1246 * Try to handle the fault, maybe we just raced with a GVA
1247 * invalidation.
1248 */
1249 err = kvm_trap_emul_gva_fault(vcpu, (unsigned long)opc,
1250 false);
1251 if (unlikely(err)) {
1252 kvm_err("%s: illegal address: %p\n",
1253 __func__, opc);
1254 return -EFAULT;
1255 }
1256
1257 /* Hopefully it'll work now */
1258 goto retry;
1259 }
1260 return 0;
1261}
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * KVM/MIPS MMU handling in the KVM module.
7 *
8 * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved.
9 * Authors: Sanjay Lal <sanjayl@kymasys.com>
10 */
11
12#include <linux/highmem.h>
13#include <linux/kvm_host.h>
14#include <linux/uaccess.h>
15#include <asm/mmu_context.h>
16#include <asm/pgalloc.h>
17
18/*
19 * KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels
20 * for which pages need to be cached.
21 */
22#if defined(__PAGETABLE_PMD_FOLDED)
23#define KVM_MMU_CACHE_MIN_PAGES 1
24#else
25#define KVM_MMU_CACHE_MIN_PAGES 2
26#endif
27
28void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
29{
30 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
31}
32
33/**
34 * kvm_pgd_init() - Initialise KVM GPA page directory.
35 * @page: Pointer to page directory (PGD) for KVM GPA.
36 *
37 * Initialise a KVM GPA page directory with pointers to the invalid table, i.e.
38 * representing no mappings. This is similar to pgd_init(), however it
39 * initialises all the page directory pointers, not just the ones corresponding
40 * to the userland address space (since it is for the guest physical address
41 * space rather than a virtual address space).
42 */
43static void kvm_pgd_init(void *page)
44{
45 unsigned long *p, *end;
46 unsigned long entry;
47
48#ifdef __PAGETABLE_PMD_FOLDED
49 entry = (unsigned long)invalid_pte_table;
50#else
51 entry = (unsigned long)invalid_pmd_table;
52#endif
53
54 p = (unsigned long *)page;
55 end = p + PTRS_PER_PGD;
56
57 do {
58 p[0] = entry;
59 p[1] = entry;
60 p[2] = entry;
61 p[3] = entry;
62 p[4] = entry;
63 p += 8;
64 p[-3] = entry;
65 p[-2] = entry;
66 p[-1] = entry;
67 } while (p != end);
68}
69
70/**
71 * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory.
72 *
73 * Allocate a blank KVM GPA page directory (PGD) for representing guest physical
74 * to host physical page mappings.
75 *
76 * Returns: Pointer to new KVM GPA page directory.
77 * NULL on allocation failure.
78 */
79pgd_t *kvm_pgd_alloc(void)
80{
81 pgd_t *ret;
82
83 ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD_ORDER);
84 if (ret)
85 kvm_pgd_init(ret);
86
87 return ret;
88}
89
90/**
91 * kvm_mips_walk_pgd() - Walk page table with optional allocation.
92 * @pgd: Page directory pointer.
93 * @addr: Address to index page table using.
94 * @cache: MMU page cache to allocate new page tables from, or NULL.
95 *
96 * Walk the page tables pointed to by @pgd to find the PTE corresponding to the
97 * address @addr. If page tables don't exist for @addr, they will be created
98 * from the MMU cache if @cache is not NULL.
99 *
100 * Returns: Pointer to pte_t corresponding to @addr.
101 * NULL if a page table doesn't exist for @addr and !@cache.
102 * NULL if a page table allocation failed.
103 */
104static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
105 unsigned long addr)
106{
107 p4d_t *p4d;
108 pud_t *pud;
109 pmd_t *pmd;
110
111 pgd += pgd_index(addr);
112 if (pgd_none(*pgd)) {
113 /* Not used on MIPS yet */
114 BUG();
115 return NULL;
116 }
117 p4d = p4d_offset(pgd, addr);
118 pud = pud_offset(p4d, addr);
119 if (pud_none(*pud)) {
120 pmd_t *new_pmd;
121
122 if (!cache)
123 return NULL;
124 new_pmd = kvm_mmu_memory_cache_alloc(cache);
125 pmd_init((unsigned long)new_pmd,
126 (unsigned long)invalid_pte_table);
127 pud_populate(NULL, pud, new_pmd);
128 }
129 pmd = pmd_offset(pud, addr);
130 if (pmd_none(*pmd)) {
131 pte_t *new_pte;
132
133 if (!cache)
134 return NULL;
135 new_pte = kvm_mmu_memory_cache_alloc(cache);
136 clear_page(new_pte);
137 pmd_populate_kernel(NULL, pmd, new_pte);
138 }
139 return pte_offset_kernel(pmd, addr);
140}
141
142/* Caller must hold kvm->mm_lock */
143static pte_t *kvm_mips_pte_for_gpa(struct kvm *kvm,
144 struct kvm_mmu_memory_cache *cache,
145 unsigned long addr)
146{
147 return kvm_mips_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr);
148}
149
150/*
151 * kvm_mips_flush_gpa_{pte,pmd,pud,pgd,pt}.
152 * Flush a range of guest physical address space from the VM's GPA page tables.
153 */
154
155static bool kvm_mips_flush_gpa_pte(pte_t *pte, unsigned long start_gpa,
156 unsigned long end_gpa)
157{
158 int i_min = pte_index(start_gpa);
159 int i_max = pte_index(end_gpa);
160 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1);
161 int i;
162
163 for (i = i_min; i <= i_max; ++i) {
164 if (!pte_present(pte[i]))
165 continue;
166
167 set_pte(pte + i, __pte(0));
168 }
169 return safe_to_remove;
170}
171
172static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa,
173 unsigned long end_gpa)
174{
175 pte_t *pte;
176 unsigned long end = ~0ul;
177 int i_min = pmd_index(start_gpa);
178 int i_max = pmd_index(end_gpa);
179 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
180 int i;
181
182 for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
183 if (!pmd_present(pmd[i]))
184 continue;
185
186 pte = pte_offset_kernel(pmd + i, 0);
187 if (i == i_max)
188 end = end_gpa;
189
190 if (kvm_mips_flush_gpa_pte(pte, start_gpa, end)) {
191 pmd_clear(pmd + i);
192 pte_free_kernel(NULL, pte);
193 } else {
194 safe_to_remove = false;
195 }
196 }
197 return safe_to_remove;
198}
199
200static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa,
201 unsigned long end_gpa)
202{
203 pmd_t *pmd;
204 unsigned long end = ~0ul;
205 int i_min = pud_index(start_gpa);
206 int i_max = pud_index(end_gpa);
207 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
208 int i;
209
210 for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
211 if (!pud_present(pud[i]))
212 continue;
213
214 pmd = pmd_offset(pud + i, 0);
215 if (i == i_max)
216 end = end_gpa;
217
218 if (kvm_mips_flush_gpa_pmd(pmd, start_gpa, end)) {
219 pud_clear(pud + i);
220 pmd_free(NULL, pmd);
221 } else {
222 safe_to_remove = false;
223 }
224 }
225 return safe_to_remove;
226}
227
228static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa,
229 unsigned long end_gpa)
230{
231 p4d_t *p4d;
232 pud_t *pud;
233 unsigned long end = ~0ul;
234 int i_min = pgd_index(start_gpa);
235 int i_max = pgd_index(end_gpa);
236 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1);
237 int i;
238
239 for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
240 if (!pgd_present(pgd[i]))
241 continue;
242
243 p4d = p4d_offset(pgd, 0);
244 pud = pud_offset(p4d + i, 0);
245 if (i == i_max)
246 end = end_gpa;
247
248 if (kvm_mips_flush_gpa_pud(pud, start_gpa, end)) {
249 pgd_clear(pgd + i);
250 pud_free(NULL, pud);
251 } else {
252 safe_to_remove = false;
253 }
254 }
255 return safe_to_remove;
256}
257
258/**
259 * kvm_mips_flush_gpa_pt() - Flush a range of guest physical addresses.
260 * @kvm: KVM pointer.
261 * @start_gfn: Guest frame number of first page in GPA range to flush.
262 * @end_gfn: Guest frame number of last page in GPA range to flush.
263 *
264 * Flushes a range of GPA mappings from the GPA page tables.
265 *
266 * The caller must hold the @kvm->mmu_lock spinlock.
267 *
268 * Returns: Whether its safe to remove the top level page directory because
269 * all lower levels have been removed.
270 */
271bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
272{
273 return kvm_mips_flush_gpa_pgd(kvm->arch.gpa_mm.pgd,
274 start_gfn << PAGE_SHIFT,
275 end_gfn << PAGE_SHIFT);
276}
277
278#define BUILD_PTE_RANGE_OP(name, op) \
279static int kvm_mips_##name##_pte(pte_t *pte, unsigned long start, \
280 unsigned long end) \
281{ \
282 int ret = 0; \
283 int i_min = pte_index(start); \
284 int i_max = pte_index(end); \
285 int i; \
286 pte_t old, new; \
287 \
288 for (i = i_min; i <= i_max; ++i) { \
289 if (!pte_present(pte[i])) \
290 continue; \
291 \
292 old = pte[i]; \
293 new = op(old); \
294 if (pte_val(new) == pte_val(old)) \
295 continue; \
296 set_pte(pte + i, new); \
297 ret = 1; \
298 } \
299 return ret; \
300} \
301 \
302/* returns true if anything was done */ \
303static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start, \
304 unsigned long end) \
305{ \
306 int ret = 0; \
307 pte_t *pte; \
308 unsigned long cur_end = ~0ul; \
309 int i_min = pmd_index(start); \
310 int i_max = pmd_index(end); \
311 int i; \
312 \
313 for (i = i_min; i <= i_max; ++i, start = 0) { \
314 if (!pmd_present(pmd[i])) \
315 continue; \
316 \
317 pte = pte_offset_kernel(pmd + i, 0); \
318 if (i == i_max) \
319 cur_end = end; \
320 \
321 ret |= kvm_mips_##name##_pte(pte, start, cur_end); \
322 } \
323 return ret; \
324} \
325 \
326static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start, \
327 unsigned long end) \
328{ \
329 int ret = 0; \
330 pmd_t *pmd; \
331 unsigned long cur_end = ~0ul; \
332 int i_min = pud_index(start); \
333 int i_max = pud_index(end); \
334 int i; \
335 \
336 for (i = i_min; i <= i_max; ++i, start = 0) { \
337 if (!pud_present(pud[i])) \
338 continue; \
339 \
340 pmd = pmd_offset(pud + i, 0); \
341 if (i == i_max) \
342 cur_end = end; \
343 \
344 ret |= kvm_mips_##name##_pmd(pmd, start, cur_end); \
345 } \
346 return ret; \
347} \
348 \
349static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start, \
350 unsigned long end) \
351{ \
352 int ret = 0; \
353 p4d_t *p4d; \
354 pud_t *pud; \
355 unsigned long cur_end = ~0ul; \
356 int i_min = pgd_index(start); \
357 int i_max = pgd_index(end); \
358 int i; \
359 \
360 for (i = i_min; i <= i_max; ++i, start = 0) { \
361 if (!pgd_present(pgd[i])) \
362 continue; \
363 \
364 p4d = p4d_offset(pgd, 0); \
365 pud = pud_offset(p4d + i, 0); \
366 if (i == i_max) \
367 cur_end = end; \
368 \
369 ret |= kvm_mips_##name##_pud(pud, start, cur_end); \
370 } \
371 return ret; \
372}
373
374/*
375 * kvm_mips_mkclean_gpa_pt.
376 * Mark a range of guest physical address space clean (writes fault) in the VM's
377 * GPA page table to allow dirty page tracking.
378 */
379
380BUILD_PTE_RANGE_OP(mkclean, pte_mkclean)
381
382/**
383 * kvm_mips_mkclean_gpa_pt() - Make a range of guest physical addresses clean.
384 * @kvm: KVM pointer.
385 * @start_gfn: Guest frame number of first page in GPA range to flush.
386 * @end_gfn: Guest frame number of last page in GPA range to flush.
387 *
388 * Make a range of GPA mappings clean so that guest writes will fault and
389 * trigger dirty page logging.
390 *
391 * The caller must hold the @kvm->mmu_lock spinlock.
392 *
393 * Returns: Whether any GPA mappings were modified, which would require
394 * derived mappings (GVA page tables & TLB enties) to be
395 * invalidated.
396 */
397int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
398{
399 return kvm_mips_mkclean_pgd(kvm->arch.gpa_mm.pgd,
400 start_gfn << PAGE_SHIFT,
401 end_gfn << PAGE_SHIFT);
402}
403
404/**
405 * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages
406 * @kvm: The KVM pointer
407 * @slot: The memory slot associated with mask
408 * @gfn_offset: The gfn offset in memory slot
409 * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory
410 * slot to be write protected
411 *
412 * Walks bits set in mask write protects the associated pte's. Caller must
413 * acquire @kvm->mmu_lock.
414 */
415void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
416 struct kvm_memory_slot *slot,
417 gfn_t gfn_offset, unsigned long mask)
418{
419 gfn_t base_gfn = slot->base_gfn + gfn_offset;
420 gfn_t start = base_gfn + __ffs(mask);
421 gfn_t end = base_gfn + __fls(mask);
422
423 kvm_mips_mkclean_gpa_pt(kvm, start, end);
424}
425
426/*
427 * kvm_mips_mkold_gpa_pt.
428 * Mark a range of guest physical address space old (all accesses fault) in the
429 * VM's GPA page table to allow detection of commonly used pages.
430 */
431
432BUILD_PTE_RANGE_OP(mkold, pte_mkold)
433
434static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn,
435 gfn_t end_gfn)
436{
437 return kvm_mips_mkold_pgd(kvm->arch.gpa_mm.pgd,
438 start_gfn << PAGE_SHIFT,
439 end_gfn << PAGE_SHIFT);
440}
441
442static int handle_hva_to_gpa(struct kvm *kvm,
443 unsigned long start,
444 unsigned long end,
445 int (*handler)(struct kvm *kvm, gfn_t gfn,
446 gpa_t gfn_end,
447 struct kvm_memory_slot *memslot,
448 void *data),
449 void *data)
450{
451 struct kvm_memslots *slots;
452 struct kvm_memory_slot *memslot;
453 int ret = 0;
454
455 slots = kvm_memslots(kvm);
456
457 /* we only care about the pages that the guest sees */
458 kvm_for_each_memslot(memslot, slots) {
459 unsigned long hva_start, hva_end;
460 gfn_t gfn, gfn_end;
461
462 hva_start = max(start, memslot->userspace_addr);
463 hva_end = min(end, memslot->userspace_addr +
464 (memslot->npages << PAGE_SHIFT));
465 if (hva_start >= hva_end)
466 continue;
467
468 /*
469 * {gfn(page) | page intersects with [hva_start, hva_end)} =
470 * {gfn_start, gfn_start+1, ..., gfn_end-1}.
471 */
472 gfn = hva_to_gfn_memslot(hva_start, memslot);
473 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
474
475 ret |= handler(kvm, gfn, gfn_end, memslot, data);
476 }
477
478 return ret;
479}
480
481
482static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
483 struct kvm_memory_slot *memslot, void *data)
484{
485 kvm_mips_flush_gpa_pt(kvm, gfn, gfn_end);
486 return 1;
487}
488
489int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
490 unsigned flags)
491{
492 handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
493
494 kvm_mips_callbacks->flush_shadow_all(kvm);
495 return 0;
496}
497
498static int kvm_set_spte_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
499 struct kvm_memory_slot *memslot, void *data)
500{
501 gpa_t gpa = gfn << PAGE_SHIFT;
502 pte_t hva_pte = *(pte_t *)data;
503 pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
504 pte_t old_pte;
505
506 if (!gpa_pte)
507 return 0;
508
509 /* Mapping may need adjusting depending on memslot flags */
510 old_pte = *gpa_pte;
511 if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte))
512 hva_pte = pte_mkclean(hva_pte);
513 else if (memslot->flags & KVM_MEM_READONLY)
514 hva_pte = pte_wrprotect(hva_pte);
515
516 set_pte(gpa_pte, hva_pte);
517
518 /* Replacing an absent or old page doesn't need flushes */
519 if (!pte_present(old_pte) || !pte_young(old_pte))
520 return 0;
521
522 /* Pages swapped, aged, moved, or cleaned require flushes */
523 return !pte_present(hva_pte) ||
524 !pte_young(hva_pte) ||
525 pte_pfn(old_pte) != pte_pfn(hva_pte) ||
526 (pte_dirty(old_pte) && !pte_dirty(hva_pte));
527}
528
529int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
530{
531 unsigned long end = hva + PAGE_SIZE;
532 int ret;
533
534 ret = handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &pte);
535 if (ret)
536 kvm_mips_callbacks->flush_shadow_all(kvm);
537 return 0;
538}
539
540static int kvm_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
541 struct kvm_memory_slot *memslot, void *data)
542{
543 return kvm_mips_mkold_gpa_pt(kvm, gfn, gfn_end);
544}
545
546static int kvm_test_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
547 struct kvm_memory_slot *memslot, void *data)
548{
549 gpa_t gpa = gfn << PAGE_SHIFT;
550 pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
551
552 if (!gpa_pte)
553 return 0;
554 return pte_young(*gpa_pte);
555}
556
557int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
558{
559 return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
560}
561
562int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
563{
564 return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
565}
566
567/**
568 * _kvm_mips_map_page_fast() - Fast path GPA fault handler.
569 * @vcpu: VCPU pointer.
570 * @gpa: Guest physical address of fault.
571 * @write_fault: Whether the fault was due to a write.
572 * @out_entry: New PTE for @gpa (written on success unless NULL).
573 * @out_buddy: New PTE for @gpa's buddy (written on success unless
574 * NULL).
575 *
576 * Perform fast path GPA fault handling, doing all that can be done without
577 * calling into KVM. This handles marking old pages young (for idle page
578 * tracking), and dirtying of clean pages (for dirty page logging).
579 *
580 * Returns: 0 on success, in which case we can update derived mappings and
581 * resume guest execution.
582 * -EFAULT on failure due to absent GPA mapping or write to
583 * read-only page, in which case KVM must be consulted.
584 */
585static int _kvm_mips_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa,
586 bool write_fault,
587 pte_t *out_entry, pte_t *out_buddy)
588{
589 struct kvm *kvm = vcpu->kvm;
590 gfn_t gfn = gpa >> PAGE_SHIFT;
591 pte_t *ptep;
592 kvm_pfn_t pfn = 0; /* silence bogus GCC warning */
593 bool pfn_valid = false;
594 int ret = 0;
595
596 spin_lock(&kvm->mmu_lock);
597
598 /* Fast path - just check GPA page table for an existing entry */
599 ptep = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
600 if (!ptep || !pte_present(*ptep)) {
601 ret = -EFAULT;
602 goto out;
603 }
604
605 /* Track access to pages marked old */
606 if (!pte_young(*ptep)) {
607 set_pte(ptep, pte_mkyoung(*ptep));
608 pfn = pte_pfn(*ptep);
609 pfn_valid = true;
610 /* call kvm_set_pfn_accessed() after unlock */
611 }
612 if (write_fault && !pte_dirty(*ptep)) {
613 if (!pte_write(*ptep)) {
614 ret = -EFAULT;
615 goto out;
616 }
617
618 /* Track dirtying of writeable pages */
619 set_pte(ptep, pte_mkdirty(*ptep));
620 pfn = pte_pfn(*ptep);
621 mark_page_dirty(kvm, gfn);
622 kvm_set_pfn_dirty(pfn);
623 }
624
625 if (out_entry)
626 *out_entry = *ptep;
627 if (out_buddy)
628 *out_buddy = *ptep_buddy(ptep);
629
630out:
631 spin_unlock(&kvm->mmu_lock);
632 if (pfn_valid)
633 kvm_set_pfn_accessed(pfn);
634 return ret;
635}
636
637/**
638 * kvm_mips_map_page() - Map a guest physical page.
639 * @vcpu: VCPU pointer.
640 * @gpa: Guest physical address of fault.
641 * @write_fault: Whether the fault was due to a write.
642 * @out_entry: New PTE for @gpa (written on success unless NULL).
643 * @out_buddy: New PTE for @gpa's buddy (written on success unless
644 * NULL).
645 *
646 * Handle GPA faults by creating a new GPA mapping (or updating an existing
647 * one).
648 *
649 * This takes care of marking pages young or dirty (idle/dirty page tracking),
650 * asking KVM for the corresponding PFN, and creating a mapping in the GPA page
651 * tables. Derived mappings (GVA page tables and TLBs) must be handled by the
652 * caller.
653 *
654 * Returns: 0 on success, in which case the caller may use the @out_entry
655 * and @out_buddy PTEs to update derived mappings and resume guest
656 * execution.
657 * -EFAULT if there is no memory region at @gpa or a write was
658 * attempted to a read-only memory region. This is usually handled
659 * as an MMIO access.
660 */
661static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa,
662 bool write_fault,
663 pte_t *out_entry, pte_t *out_buddy)
664{
665 struct kvm *kvm = vcpu->kvm;
666 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
667 gfn_t gfn = gpa >> PAGE_SHIFT;
668 int srcu_idx, err;
669 kvm_pfn_t pfn;
670 pte_t *ptep, entry, old_pte;
671 bool writeable;
672 unsigned long prot_bits;
673 unsigned long mmu_seq;
674
675 /* Try the fast path to handle old / clean pages */
676 srcu_idx = srcu_read_lock(&kvm->srcu);
677 err = _kvm_mips_map_page_fast(vcpu, gpa, write_fault, out_entry,
678 out_buddy);
679 if (!err)
680 goto out;
681
682 /* We need a minimum of cached pages ready for page table creation */
683 err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
684 if (err)
685 goto out;
686
687retry:
688 /*
689 * Used to check for invalidations in progress, of the pfn that is
690 * returned by pfn_to_pfn_prot below.
691 */
692 mmu_seq = kvm->mmu_notifier_seq;
693 /*
694 * Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in
695 * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't
696 * risk the page we get a reference to getting unmapped before we have a
697 * chance to grab the mmu_lock without mmu_notifier_retry() noticing.
698 *
699 * This smp_rmb() pairs with the effective smp_wmb() of the combination
700 * of the pte_unmap_unlock() after the PTE is zapped, and the
701 * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before
702 * mmu_notifier_seq is incremented.
703 */
704 smp_rmb();
705
706 /* Slow path - ask KVM core whether we can access this GPA */
707 pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writeable);
708 if (is_error_noslot_pfn(pfn)) {
709 err = -EFAULT;
710 goto out;
711 }
712
713 spin_lock(&kvm->mmu_lock);
714 /* Check if an invalidation has taken place since we got pfn */
715 if (mmu_notifier_retry(kvm, mmu_seq)) {
716 /*
717 * This can happen when mappings are changed asynchronously, but
718 * also synchronously if a COW is triggered by
719 * gfn_to_pfn_prot().
720 */
721 spin_unlock(&kvm->mmu_lock);
722 kvm_release_pfn_clean(pfn);
723 goto retry;
724 }
725
726 /* Ensure page tables are allocated */
727 ptep = kvm_mips_pte_for_gpa(kvm, memcache, gpa);
728
729 /* Set up the PTE */
730 prot_bits = _PAGE_PRESENT | __READABLE | _page_cachable_default;
731 if (writeable) {
732 prot_bits |= _PAGE_WRITE;
733 if (write_fault) {
734 prot_bits |= __WRITEABLE;
735 mark_page_dirty(kvm, gfn);
736 kvm_set_pfn_dirty(pfn);
737 }
738 }
739 entry = pfn_pte(pfn, __pgprot(prot_bits));
740
741 /* Write the PTE */
742 old_pte = *ptep;
743 set_pte(ptep, entry);
744
745 err = 0;
746 if (out_entry)
747 *out_entry = *ptep;
748 if (out_buddy)
749 *out_buddy = *ptep_buddy(ptep);
750
751 spin_unlock(&kvm->mmu_lock);
752 kvm_release_pfn_clean(pfn);
753 kvm_set_pfn_accessed(pfn);
754out:
755 srcu_read_unlock(&kvm->srcu, srcu_idx);
756 return err;
757}
758
759static pte_t *kvm_trap_emul_pte_for_gva(struct kvm_vcpu *vcpu,
760 unsigned long addr)
761{
762 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
763 pgd_t *pgdp;
764 int ret;
765
766 /* We need a minimum of cached pages ready for page table creation */
767 ret = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
768 if (ret)
769 return NULL;
770
771 if (KVM_GUEST_KERNEL_MODE(vcpu))
772 pgdp = vcpu->arch.guest_kernel_mm.pgd;
773 else
774 pgdp = vcpu->arch.guest_user_mm.pgd;
775
776 return kvm_mips_walk_pgd(pgdp, memcache, addr);
777}
778
779void kvm_trap_emul_invalidate_gva(struct kvm_vcpu *vcpu, unsigned long addr,
780 bool user)
781{
782 pgd_t *pgdp;
783 pte_t *ptep;
784
785 addr &= PAGE_MASK << 1;
786
787 pgdp = vcpu->arch.guest_kernel_mm.pgd;
788 ptep = kvm_mips_walk_pgd(pgdp, NULL, addr);
789 if (ptep) {
790 ptep[0] = pfn_pte(0, __pgprot(0));
791 ptep[1] = pfn_pte(0, __pgprot(0));
792 }
793
794 if (user) {
795 pgdp = vcpu->arch.guest_user_mm.pgd;
796 ptep = kvm_mips_walk_pgd(pgdp, NULL, addr);
797 if (ptep) {
798 ptep[0] = pfn_pte(0, __pgprot(0));
799 ptep[1] = pfn_pte(0, __pgprot(0));
800 }
801 }
802}
803
804/*
805 * kvm_mips_flush_gva_{pte,pmd,pud,pgd,pt}.
806 * Flush a range of guest physical address space from the VM's GPA page tables.
807 */
808
809static bool kvm_mips_flush_gva_pte(pte_t *pte, unsigned long start_gva,
810 unsigned long end_gva)
811{
812 int i_min = pte_index(start_gva);
813 int i_max = pte_index(end_gva);
814 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1);
815 int i;
816
817 /*
818 * There's no freeing to do, so there's no point clearing individual
819 * entries unless only part of the last level page table needs flushing.
820 */
821 if (safe_to_remove)
822 return true;
823
824 for (i = i_min; i <= i_max; ++i) {
825 if (!pte_present(pte[i]))
826 continue;
827
828 set_pte(pte + i, __pte(0));
829 }
830 return false;
831}
832
833static bool kvm_mips_flush_gva_pmd(pmd_t *pmd, unsigned long start_gva,
834 unsigned long end_gva)
835{
836 pte_t *pte;
837 unsigned long end = ~0ul;
838 int i_min = pmd_index(start_gva);
839 int i_max = pmd_index(end_gva);
840 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
841 int i;
842
843 for (i = i_min; i <= i_max; ++i, start_gva = 0) {
844 if (!pmd_present(pmd[i]))
845 continue;
846
847 pte = pte_offset_kernel(pmd + i, 0);
848 if (i == i_max)
849 end = end_gva;
850
851 if (kvm_mips_flush_gva_pte(pte, start_gva, end)) {
852 pmd_clear(pmd + i);
853 pte_free_kernel(NULL, pte);
854 } else {
855 safe_to_remove = false;
856 }
857 }
858 return safe_to_remove;
859}
860
861static bool kvm_mips_flush_gva_pud(pud_t *pud, unsigned long start_gva,
862 unsigned long end_gva)
863{
864 pmd_t *pmd;
865 unsigned long end = ~0ul;
866 int i_min = pud_index(start_gva);
867 int i_max = pud_index(end_gva);
868 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
869 int i;
870
871 for (i = i_min; i <= i_max; ++i, start_gva = 0) {
872 if (!pud_present(pud[i]))
873 continue;
874
875 pmd = pmd_offset(pud + i, 0);
876 if (i == i_max)
877 end = end_gva;
878
879 if (kvm_mips_flush_gva_pmd(pmd, start_gva, end)) {
880 pud_clear(pud + i);
881 pmd_free(NULL, pmd);
882 } else {
883 safe_to_remove = false;
884 }
885 }
886 return safe_to_remove;
887}
888
889static bool kvm_mips_flush_gva_pgd(pgd_t *pgd, unsigned long start_gva,
890 unsigned long end_gva)
891{
892 p4d_t *p4d;
893 pud_t *pud;
894 unsigned long end = ~0ul;
895 int i_min = pgd_index(start_gva);
896 int i_max = pgd_index(end_gva);
897 bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1);
898 int i;
899
900 for (i = i_min; i <= i_max; ++i, start_gva = 0) {
901 if (!pgd_present(pgd[i]))
902 continue;
903
904 p4d = p4d_offset(pgd, 0);
905 pud = pud_offset(p4d + i, 0);
906 if (i == i_max)
907 end = end_gva;
908
909 if (kvm_mips_flush_gva_pud(pud, start_gva, end)) {
910 pgd_clear(pgd + i);
911 pud_free(NULL, pud);
912 } else {
913 safe_to_remove = false;
914 }
915 }
916 return safe_to_remove;
917}
918
919void kvm_mips_flush_gva_pt(pgd_t *pgd, enum kvm_mips_flush flags)
920{
921 if (flags & KMF_GPA) {
922 /* all of guest virtual address space could be affected */
923 if (flags & KMF_KERN)
924 /* useg, kseg0, seg2/3 */
925 kvm_mips_flush_gva_pgd(pgd, 0, 0x7fffffff);
926 else
927 /* useg */
928 kvm_mips_flush_gva_pgd(pgd, 0, 0x3fffffff);
929 } else {
930 /* useg */
931 kvm_mips_flush_gva_pgd(pgd, 0, 0x3fffffff);
932
933 /* kseg2/3 */
934 if (flags & KMF_KERN)
935 kvm_mips_flush_gva_pgd(pgd, 0x60000000, 0x7fffffff);
936 }
937}
938
939static pte_t kvm_mips_gpa_pte_to_gva_unmapped(pte_t pte)
940{
941 /*
942 * Don't leak writeable but clean entries from GPA page tables. We don't
943 * want the normal Linux tlbmod handler to handle dirtying when KVM
944 * accesses guest memory.
945 */
946 if (!pte_dirty(pte))
947 pte = pte_wrprotect(pte);
948
949 return pte;
950}
951
952static pte_t kvm_mips_gpa_pte_to_gva_mapped(pte_t pte, long entrylo)
953{
954 /* Guest EntryLo overrides host EntryLo */
955 if (!(entrylo & ENTRYLO_D))
956 pte = pte_mkclean(pte);
957
958 return kvm_mips_gpa_pte_to_gva_unmapped(pte);
959}
960
961#ifdef CONFIG_KVM_MIPS_VZ
962int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr,
963 struct kvm_vcpu *vcpu,
964 bool write_fault)
965{
966 int ret;
967
968 ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL);
969 if (ret)
970 return ret;
971
972 /* Invalidate this entry in the TLB */
973 return kvm_vz_host_tlb_inv(vcpu, badvaddr);
974}
975#endif
976
977/* XXXKYMA: Must be called with interrupts disabled */
978int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
979 struct kvm_vcpu *vcpu,
980 bool write_fault)
981{
982 unsigned long gpa;
983 pte_t pte_gpa[2], *ptep_gva;
984 int idx;
985
986 if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) {
987 kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr);
988 kvm_mips_dump_host_tlbs();
989 return -1;
990 }
991
992 /* Get the GPA page table entry */
993 gpa = KVM_GUEST_CPHYSADDR(badvaddr);
994 idx = (badvaddr >> PAGE_SHIFT) & 1;
995 if (kvm_mips_map_page(vcpu, gpa, write_fault, &pte_gpa[idx],
996 &pte_gpa[!idx]) < 0)
997 return -1;
998
999 /* Get the GVA page table entry */
1000 ptep_gva = kvm_trap_emul_pte_for_gva(vcpu, badvaddr & ~PAGE_SIZE);
1001 if (!ptep_gva) {
1002 kvm_err("No ptep for gva %lx\n", badvaddr);
1003 return -1;
1004 }
1005
1006 /* Copy a pair of entries from GPA page table to GVA page table */
1007 ptep_gva[0] = kvm_mips_gpa_pte_to_gva_unmapped(pte_gpa[0]);
1008 ptep_gva[1] = kvm_mips_gpa_pte_to_gva_unmapped(pte_gpa[1]);
1009
1010 /* Invalidate this entry in the TLB, guest kernel ASID only */
1011 kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true);
1012 return 0;
1013}
1014
1015int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
1016 struct kvm_mips_tlb *tlb,
1017 unsigned long gva,
1018 bool write_fault)
1019{
1020 struct kvm *kvm = vcpu->kvm;
1021 long tlb_lo[2];
1022 pte_t pte_gpa[2], *ptep_buddy, *ptep_gva;
1023 unsigned int idx = TLB_LO_IDX(*tlb, gva);
1024 bool kernel = KVM_GUEST_KERNEL_MODE(vcpu);
1025
1026 tlb_lo[0] = tlb->tlb_lo[0];
1027 tlb_lo[1] = tlb->tlb_lo[1];
1028
1029 /*
1030 * The commpage address must not be mapped to anything else if the guest
1031 * TLB contains entries nearby, or commpage accesses will break.
1032 */
1033 if (!((gva ^ KVM_GUEST_COMMPAGE_ADDR) & VPN2_MASK & (PAGE_MASK << 1)))
1034 tlb_lo[TLB_LO_IDX(*tlb, KVM_GUEST_COMMPAGE_ADDR)] = 0;
1035
1036 /* Get the GPA page table entry */
1037 if (kvm_mips_map_page(vcpu, mips3_tlbpfn_to_paddr(tlb_lo[idx]),
1038 write_fault, &pte_gpa[idx], NULL) < 0)
1039 return -1;
1040
1041 /* And its GVA buddy's GPA page table entry if it also exists */
1042 pte_gpa[!idx] = pfn_pte(0, __pgprot(0));
1043 if (tlb_lo[!idx] & ENTRYLO_V) {
1044 spin_lock(&kvm->mmu_lock);
1045 ptep_buddy = kvm_mips_pte_for_gpa(kvm, NULL,
1046 mips3_tlbpfn_to_paddr(tlb_lo[!idx]));
1047 if (ptep_buddy)
1048 pte_gpa[!idx] = *ptep_buddy;
1049 spin_unlock(&kvm->mmu_lock);
1050 }
1051
1052 /* Get the GVA page table entry pair */
1053 ptep_gva = kvm_trap_emul_pte_for_gva(vcpu, gva & ~PAGE_SIZE);
1054 if (!ptep_gva) {
1055 kvm_err("No ptep for gva %lx\n", gva);
1056 return -1;
1057 }
1058
1059 /* Copy a pair of entries from GPA page table to GVA page table */
1060 ptep_gva[0] = kvm_mips_gpa_pte_to_gva_mapped(pte_gpa[0], tlb_lo[0]);
1061 ptep_gva[1] = kvm_mips_gpa_pte_to_gva_mapped(pte_gpa[1], tlb_lo[1]);
1062
1063 /* Invalidate this entry in the TLB, current guest mode ASID only */
1064 kvm_mips_host_tlb_inv(vcpu, gva, !kernel, kernel);
1065
1066 kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
1067 tlb->tlb_lo[0], tlb->tlb_lo[1]);
1068
1069 return 0;
1070}
1071
1072int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
1073 struct kvm_vcpu *vcpu)
1074{
1075 kvm_pfn_t pfn;
1076 pte_t *ptep;
1077
1078 ptep = kvm_trap_emul_pte_for_gva(vcpu, badvaddr);
1079 if (!ptep) {
1080 kvm_err("No ptep for commpage %lx\n", badvaddr);
1081 return -1;
1082 }
1083
1084 pfn = PFN_DOWN(virt_to_phys(vcpu->arch.kseg0_commpage));
1085 /* Also set valid and dirty, so refill handler doesn't have to */
1086 *ptep = pte_mkyoung(pte_mkdirty(pfn_pte(pfn, PAGE_SHARED)));
1087
1088 /* Invalidate this entry in the TLB, guest kernel ASID only */
1089 kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true);
1090 return 0;
1091}
1092
1093/**
1094 * kvm_mips_migrate_count() - Migrate timer.
1095 * @vcpu: Virtual CPU.
1096 *
1097 * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it
1098 * if it was running prior to being cancelled.
1099 *
1100 * Must be called when the VCPU is migrated to a different CPU to ensure that
1101 * timer expiry during guest execution interrupts the guest and causes the
1102 * interrupt to be delivered in a timely manner.
1103 */
1104static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu)
1105{
1106 if (hrtimer_cancel(&vcpu->arch.comparecount_timer))
1107 hrtimer_restart(&vcpu->arch.comparecount_timer);
1108}
1109
1110/* Restore ASID once we are scheduled back after preemption */
1111void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1112{
1113 unsigned long flags;
1114
1115 kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu);
1116
1117 local_irq_save(flags);
1118
1119 vcpu->cpu = cpu;
1120 if (vcpu->arch.last_sched_cpu != cpu) {
1121 kvm_debug("[%d->%d]KVM VCPU[%d] switch\n",
1122 vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
1123 /*
1124 * Migrate the timer interrupt to the current CPU so that it
1125 * always interrupts the guest and synchronously triggers a
1126 * guest timer interrupt.
1127 */
1128 kvm_mips_migrate_count(vcpu);
1129 }
1130
1131 /* restore guest state to registers */
1132 kvm_mips_callbacks->vcpu_load(vcpu, cpu);
1133
1134 local_irq_restore(flags);
1135}
1136
1137/* ASID can change if another task is scheduled during preemption */
1138void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1139{
1140 unsigned long flags;
1141 int cpu;
1142
1143 local_irq_save(flags);
1144
1145 cpu = smp_processor_id();
1146 vcpu->arch.last_sched_cpu = cpu;
1147 vcpu->cpu = -1;
1148
1149 /* save guest state in registers */
1150 kvm_mips_callbacks->vcpu_put(vcpu, cpu);
1151
1152 local_irq_restore(flags);
1153}
1154
1155/**
1156 * kvm_trap_emul_gva_fault() - Safely attempt to handle a GVA access fault.
1157 * @vcpu: Virtual CPU.
1158 * @gva: Guest virtual address to be accessed.
1159 * @write: True if write attempted (must be dirtied and made writable).
1160 *
1161 * Safely attempt to handle a GVA fault, mapping GVA pages if necessary, and
1162 * dirtying the page if @write so that guest instructions can be modified.
1163 *
1164 * Returns: KVM_MIPS_MAPPED on success.
1165 * KVM_MIPS_GVA if bad guest virtual address.
1166 * KVM_MIPS_GPA if bad guest physical address.
1167 * KVM_MIPS_TLB if guest TLB not present.
1168 * KVM_MIPS_TLBINV if guest TLB present but not valid.
1169 * KVM_MIPS_TLBMOD if guest TLB read only.
1170 */
1171enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu,
1172 unsigned long gva,
1173 bool write)
1174{
1175 struct mips_coproc *cop0 = vcpu->arch.cop0;
1176 struct kvm_mips_tlb *tlb;
1177 int index;
1178
1179 if (KVM_GUEST_KSEGX(gva) == KVM_GUEST_KSEG0) {
1180 if (kvm_mips_handle_kseg0_tlb_fault(gva, vcpu, write) < 0)
1181 return KVM_MIPS_GPA;
1182 } else if ((KVM_GUEST_KSEGX(gva) < KVM_GUEST_KSEG0) ||
1183 KVM_GUEST_KSEGX(gva) == KVM_GUEST_KSEG23) {
1184 /* Address should be in the guest TLB */
1185 index = kvm_mips_guest_tlb_lookup(vcpu, (gva & VPN2_MASK) |
1186 (kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID));
1187 if (index < 0)
1188 return KVM_MIPS_TLB;
1189 tlb = &vcpu->arch.guest_tlb[index];
1190
1191 /* Entry should be valid, and dirty for writes */
1192 if (!TLB_IS_VALID(*tlb, gva))
1193 return KVM_MIPS_TLBINV;
1194 if (write && !TLB_IS_DIRTY(*tlb, gva))
1195 return KVM_MIPS_TLBMOD;
1196
1197 if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, gva, write))
1198 return KVM_MIPS_GPA;
1199 } else {
1200 return KVM_MIPS_GVA;
1201 }
1202
1203 return KVM_MIPS_MAPPED;
1204}
1205
1206int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out)
1207{
1208 int err;
1209
1210 if (WARN(IS_ENABLED(CONFIG_KVM_MIPS_VZ),
1211 "Expect BadInstr/BadInstrP registers to be used with VZ\n"))
1212 return -EINVAL;
1213
1214retry:
1215 kvm_trap_emul_gva_lockless_begin(vcpu);
1216 err = get_user(*out, opc);
1217 kvm_trap_emul_gva_lockless_end(vcpu);
1218
1219 if (unlikely(err)) {
1220 /*
1221 * Try to handle the fault, maybe we just raced with a GVA
1222 * invalidation.
1223 */
1224 err = kvm_trap_emul_gva_fault(vcpu, (unsigned long)opc,
1225 false);
1226 if (unlikely(err)) {
1227 kvm_err("%s: illegal address: %p\n",
1228 __func__, opc);
1229 return -EFAULT;
1230 }
1231
1232 /* Hopefully it'll work now */
1233 goto retry;
1234 }
1235 return 0;
1236}