Loading...
1// SPDX-License-Identifier: GPL-2.0
2
3#ifndef __KVM_X86_MMU_TDP_ITER_H
4#define __KVM_X86_MMU_TDP_ITER_H
5
6#include <linux/kvm_host.h>
7
8#include "mmu.h"
9#include "spte.h"
10
11/*
12 * TDP MMU SPTEs are RCU protected to allow paging structures (non-leaf SPTEs)
13 * to be zapped while holding mmu_lock for read, and to allow TLB flushes to be
14 * batched without having to collect the list of zapped SPs. Flows that can
15 * remove SPs must service pending TLB flushes prior to dropping RCU protection.
16 */
17static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)
18{
19 return READ_ONCE(*rcu_dereference(sptep));
20}
21
22static inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte)
23{
24 KVM_MMU_WARN_ON(is_ept_ve_possible(new_spte));
25 return xchg(rcu_dereference(sptep), new_spte);
26}
27
28static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
29{
30 KVM_MMU_WARN_ON(is_ept_ve_possible(new_spte));
31 WRITE_ONCE(*rcu_dereference(sptep), new_spte);
32}
33
34/*
35 * SPTEs must be modified atomically if they are shadow-present, leaf
36 * SPTEs, and have volatile bits, i.e. has bits that can be set outside
37 * of mmu_lock. The Writable bit can be set by KVM's fast page fault
38 * handler, and Accessed and Dirty bits can be set by the CPU.
39 *
40 * Note, non-leaf SPTEs do have Accessed bits and those bits are
41 * technically volatile, but KVM doesn't consume the Accessed bit of
42 * non-leaf SPTEs, i.e. KVM doesn't care if it clobbers the bit. This
43 * logic needs to be reassessed if KVM were to use non-leaf Accessed
44 * bits, e.g. to skip stepping down into child SPTEs when aging SPTEs.
45 */
46static inline bool kvm_tdp_mmu_spte_need_atomic_write(u64 old_spte, int level)
47{
48 return is_shadow_present_pte(old_spte) &&
49 is_last_spte(old_spte, level) &&
50 spte_has_volatile_bits(old_spte);
51}
52
53static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte,
54 u64 new_spte, int level)
55{
56 if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level))
57 return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte);
58
59 __kvm_tdp_mmu_write_spte(sptep, new_spte);
60 return old_spte;
61}
62
63static inline u64 tdp_mmu_clear_spte_bits(tdp_ptep_t sptep, u64 old_spte,
64 u64 mask, int level)
65{
66 atomic64_t *sptep_atomic;
67
68 if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level)) {
69 sptep_atomic = (atomic64_t *)rcu_dereference(sptep);
70 return (u64)atomic64_fetch_and(~mask, sptep_atomic);
71 }
72
73 __kvm_tdp_mmu_write_spte(sptep, old_spte & ~mask);
74 return old_spte;
75}
76
77/*
78 * A TDP iterator performs a pre-order walk over a TDP paging structure.
79 */
80struct tdp_iter {
81 /*
82 * The iterator will traverse the paging structure towards the mapping
83 * for this GFN.
84 */
85 gfn_t next_last_level_gfn;
86 /*
87 * The next_last_level_gfn at the time when the thread last
88 * yielded. Only yielding when the next_last_level_gfn !=
89 * yielded_gfn helps ensure forward progress.
90 */
91 gfn_t yielded_gfn;
92 /* Pointers to the page tables traversed to reach the current SPTE */
93 tdp_ptep_t pt_path[PT64_ROOT_MAX_LEVEL];
94 /* A pointer to the current SPTE */
95 tdp_ptep_t sptep;
96 /* The lowest GFN mapped by the current SPTE */
97 gfn_t gfn;
98 /* The level of the root page given to the iterator */
99 int root_level;
100 /* The lowest level the iterator should traverse to */
101 int min_level;
102 /* The iterator's current level within the paging structure */
103 int level;
104 /* The address space ID, i.e. SMM vs. regular. */
105 int as_id;
106 /* A snapshot of the value at sptep */
107 u64 old_spte;
108 /*
109 * Whether the iterator has a valid state. This will be false if the
110 * iterator walks off the end of the paging structure.
111 */
112 bool valid;
113 /*
114 * True if KVM dropped mmu_lock and yielded in the middle of a walk, in
115 * which case tdp_iter_next() needs to restart the walk at the root
116 * level instead of advancing to the next entry.
117 */
118 bool yielded;
119};
120
121/*
122 * Iterates over every SPTE mapping the GFN range [start, end) in a
123 * preorder traversal.
124 */
125#define for_each_tdp_pte_min_level(iter, root, min_level, start, end) \
126 for (tdp_iter_start(&iter, root, min_level, start); \
127 iter.valid && iter.gfn < end; \
128 tdp_iter_next(&iter))
129
130#define for_each_tdp_pte(iter, root, start, end) \
131 for_each_tdp_pte_min_level(iter, root, PG_LEVEL_4K, start, end)
132
133tdp_ptep_t spte_to_child_pt(u64 pte, int level);
134
135void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
136 int min_level, gfn_t next_last_level_gfn);
137void tdp_iter_next(struct tdp_iter *iter);
138void tdp_iter_restart(struct tdp_iter *iter);
139
140#endif /* __KVM_X86_MMU_TDP_ITER_H */
1// SPDX-License-Identifier: GPL-2.0
2
3#ifndef __KVM_X86_MMU_TDP_ITER_H
4#define __KVM_X86_MMU_TDP_ITER_H
5
6#include <linux/kvm_host.h>
7
8#include "mmu.h"
9#include "spte.h"
10
11/*
12 * TDP MMU SPTEs are RCU protected to allow paging structures (non-leaf SPTEs)
13 * to be zapped while holding mmu_lock for read, and to allow TLB flushes to be
14 * batched without having to collect the list of zapped SPs. Flows that can
15 * remove SPs must service pending TLB flushes prior to dropping RCU protection.
16 */
17static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)
18{
19 return READ_ONCE(*rcu_dereference(sptep));
20}
21
22static inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte)
23{
24 return xchg(rcu_dereference(sptep), new_spte);
25}
26
27static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
28{
29 WRITE_ONCE(*rcu_dereference(sptep), new_spte);
30}
31
32static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte,
33 u64 new_spte, int level)
34{
35 /*
36 * Atomically write the SPTE if it is a shadow-present, leaf SPTE with
37 * volatile bits, i.e. has bits that can be set outside of mmu_lock.
38 * The Writable bit can be set by KVM's fast page fault handler, and
39 * Accessed and Dirty bits can be set by the CPU.
40 *
41 * Note, non-leaf SPTEs do have Accessed bits and those bits are
42 * technically volatile, but KVM doesn't consume the Accessed bit of
43 * non-leaf SPTEs, i.e. KVM doesn't care if it clobbers the bit. This
44 * logic needs to be reassessed if KVM were to use non-leaf Accessed
45 * bits, e.g. to skip stepping down into child SPTEs when aging SPTEs.
46 */
47 if (is_shadow_present_pte(old_spte) && is_last_spte(old_spte, level) &&
48 spte_has_volatile_bits(old_spte))
49 return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte);
50
51 __kvm_tdp_mmu_write_spte(sptep, new_spte);
52 return old_spte;
53}
54
55/*
56 * A TDP iterator performs a pre-order walk over a TDP paging structure.
57 */
58struct tdp_iter {
59 /*
60 * The iterator will traverse the paging structure towards the mapping
61 * for this GFN.
62 */
63 gfn_t next_last_level_gfn;
64 /*
65 * The next_last_level_gfn at the time when the thread last
66 * yielded. Only yielding when the next_last_level_gfn !=
67 * yielded_gfn helps ensure forward progress.
68 */
69 gfn_t yielded_gfn;
70 /* Pointers to the page tables traversed to reach the current SPTE */
71 tdp_ptep_t pt_path[PT64_ROOT_MAX_LEVEL];
72 /* A pointer to the current SPTE */
73 tdp_ptep_t sptep;
74 /* The lowest GFN mapped by the current SPTE */
75 gfn_t gfn;
76 /* The level of the root page given to the iterator */
77 int root_level;
78 /* The lowest level the iterator should traverse to */
79 int min_level;
80 /* The iterator's current level within the paging structure */
81 int level;
82 /* The address space ID, i.e. SMM vs. regular. */
83 int as_id;
84 /* A snapshot of the value at sptep */
85 u64 old_spte;
86 /*
87 * Whether the iterator has a valid state. This will be false if the
88 * iterator walks off the end of the paging structure.
89 */
90 bool valid;
91 /*
92 * True if KVM dropped mmu_lock and yielded in the middle of a walk, in
93 * which case tdp_iter_next() needs to restart the walk at the root
94 * level instead of advancing to the next entry.
95 */
96 bool yielded;
97};
98
99/*
100 * Iterates over every SPTE mapping the GFN range [start, end) in a
101 * preorder traversal.
102 */
103#define for_each_tdp_pte_min_level(iter, root, min_level, start, end) \
104 for (tdp_iter_start(&iter, root, min_level, start); \
105 iter.valid && iter.gfn < end; \
106 tdp_iter_next(&iter))
107
108#define for_each_tdp_pte(iter, root, start, end) \
109 for_each_tdp_pte_min_level(iter, root, PG_LEVEL_4K, start, end)
110
111tdp_ptep_t spte_to_child_pt(u64 pte, int level);
112
113void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
114 int min_level, gfn_t next_last_level_gfn);
115void tdp_iter_next(struct tdp_iter *iter);
116void tdp_iter_restart(struct tdp_iter *iter);
117
118#endif /* __KVM_X86_MMU_TDP_ITER_H */