Loading...
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Support KVM gust page tracking
4 *
5 * This feature allows us to track page access in guest. Currently, only
6 * write access is tracked.
7 *
8 * Copyright(C) 2015 Intel Corporation.
9 *
10 * Author:
11 * Xiao Guangrong <guangrong.xiao@linux.intel.com>
12 */
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15#include <linux/lockdep.h>
16#include <linux/kvm_host.h>
17#include <linux/rculist.h>
18
19#include "mmu.h"
20#include "mmu_internal.h"
21#include "page_track.h"
22
23static bool kvm_external_write_tracking_enabled(struct kvm *kvm)
24{
25#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
26 /*
27 * Read external_write_tracking_enabled before related pointers. Pairs
28 * with the smp_store_release in kvm_page_track_write_tracking_enable().
29 */
30 return smp_load_acquire(&kvm->arch.external_write_tracking_enabled);
31#else
32 return false;
33#endif
34}
35
36bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
37{
38 return kvm_external_write_tracking_enabled(kvm) ||
39 kvm_shadow_root_allocated(kvm) || !tdp_enabled;
40}
41
42void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
43{
44 vfree(slot->arch.gfn_write_track);
45 slot->arch.gfn_write_track = NULL;
46}
47
48static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot,
49 unsigned long npages)
50{
51 const size_t size = sizeof(*slot->arch.gfn_write_track);
52
53 if (!slot->arch.gfn_write_track)
54 slot->arch.gfn_write_track = __vcalloc(npages, size,
55 GFP_KERNEL_ACCOUNT);
56
57 return slot->arch.gfn_write_track ? 0 : -ENOMEM;
58}
59
60int kvm_page_track_create_memslot(struct kvm *kvm,
61 struct kvm_memory_slot *slot,
62 unsigned long npages)
63{
64 if (!kvm_page_track_write_tracking_enabled(kvm))
65 return 0;
66
67 return __kvm_page_track_write_tracking_alloc(slot, npages);
68}
69
70int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot)
71{
72 return __kvm_page_track_write_tracking_alloc(slot, slot->npages);
73}
74
75static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn,
76 short count)
77{
78 int index, val;
79
80 index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
81
82 val = slot->arch.gfn_write_track[index];
83
84 if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX))
85 return;
86
87 slot->arch.gfn_write_track[index] += count;
88}
89
90void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
91 gfn_t gfn)
92{
93 lockdep_assert_held_write(&kvm->mmu_lock);
94
95 lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
96 srcu_read_lock_held(&kvm->srcu));
97
98 if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
99 return;
100
101 update_gfn_write_track(slot, gfn, 1);
102
103 /*
104 * new track stops large page mapping for the
105 * tracked page.
106 */
107 kvm_mmu_gfn_disallow_lpage(slot, gfn);
108
109 if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
110 kvm_flush_remote_tlbs(kvm);
111}
112
113void __kvm_write_track_remove_gfn(struct kvm *kvm,
114 struct kvm_memory_slot *slot, gfn_t gfn)
115{
116 lockdep_assert_held_write(&kvm->mmu_lock);
117
118 lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
119 srcu_read_lock_held(&kvm->srcu));
120
121 if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
122 return;
123
124 update_gfn_write_track(slot, gfn, -1);
125
126 /*
127 * allow large page mapping for the tracked page
128 * after the tracker is gone.
129 */
130 kvm_mmu_gfn_allow_lpage(slot, gfn);
131}
132
133/*
134 * check if the corresponding access on the specified guest page is tracked.
135 */
136bool kvm_gfn_is_write_tracked(struct kvm *kvm,
137 const struct kvm_memory_slot *slot, gfn_t gfn)
138{
139 int index;
140
141 if (!slot)
142 return false;
143
144 if (!kvm_page_track_write_tracking_enabled(kvm))
145 return false;
146
147 index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
148 return !!READ_ONCE(slot->arch.gfn_write_track[index]);
149}
150
151#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
152void kvm_page_track_cleanup(struct kvm *kvm)
153{
154 struct kvm_page_track_notifier_head *head;
155
156 head = &kvm->arch.track_notifier_head;
157 cleanup_srcu_struct(&head->track_srcu);
158}
159
160int kvm_page_track_init(struct kvm *kvm)
161{
162 struct kvm_page_track_notifier_head *head;
163
164 head = &kvm->arch.track_notifier_head;
165 INIT_HLIST_HEAD(&head->track_notifier_list);
166 return init_srcu_struct(&head->track_srcu);
167}
168
169static int kvm_enable_external_write_tracking(struct kvm *kvm)
170{
171 struct kvm_memslots *slots;
172 struct kvm_memory_slot *slot;
173 int r = 0, i, bkt;
174
175 mutex_lock(&kvm->slots_arch_lock);
176
177 /*
178 * Check for *any* write tracking user (not just external users) under
179 * lock. This avoids unnecessary work, e.g. if KVM itself is using
180 * write tracking, or if two external users raced when registering.
181 */
182 if (kvm_page_track_write_tracking_enabled(kvm))
183 goto out_success;
184
185 for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
186 slots = __kvm_memslots(kvm, i);
187 kvm_for_each_memslot(slot, bkt, slots) {
188 /*
189 * Intentionally do NOT free allocations on failure to
190 * avoid having to track which allocations were made
191 * now versus when the memslot was created. The
192 * metadata is guaranteed to be freed when the slot is
193 * freed, and will be kept/used if userspace retries
194 * the failed ioctl() instead of killing the VM.
195 */
196 r = kvm_page_track_write_tracking_alloc(slot);
197 if (r)
198 goto out_unlock;
199 }
200 }
201
202out_success:
203 /*
204 * Ensure that external_write_tracking_enabled becomes true strictly
205 * after all the related pointers are set.
206 */
207 smp_store_release(&kvm->arch.external_write_tracking_enabled, true);
208out_unlock:
209 mutex_unlock(&kvm->slots_arch_lock);
210 return r;
211}
212
213/*
214 * register the notifier so that event interception for the tracked guest
215 * pages can be received.
216 */
217int kvm_page_track_register_notifier(struct kvm *kvm,
218 struct kvm_page_track_notifier_node *n)
219{
220 struct kvm_page_track_notifier_head *head;
221 int r;
222
223 if (!kvm || kvm->mm != current->mm)
224 return -ESRCH;
225
226 if (!kvm_external_write_tracking_enabled(kvm)) {
227 r = kvm_enable_external_write_tracking(kvm);
228 if (r)
229 return r;
230 }
231
232 kvm_get_kvm(kvm);
233
234 head = &kvm->arch.track_notifier_head;
235
236 write_lock(&kvm->mmu_lock);
237 hlist_add_head_rcu(&n->node, &head->track_notifier_list);
238 write_unlock(&kvm->mmu_lock);
239 return 0;
240}
241EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
242
243/*
244 * stop receiving the event interception. It is the opposed operation of
245 * kvm_page_track_register_notifier().
246 */
247void kvm_page_track_unregister_notifier(struct kvm *kvm,
248 struct kvm_page_track_notifier_node *n)
249{
250 struct kvm_page_track_notifier_head *head;
251
252 head = &kvm->arch.track_notifier_head;
253
254 write_lock(&kvm->mmu_lock);
255 hlist_del_rcu(&n->node);
256 write_unlock(&kvm->mmu_lock);
257 synchronize_srcu(&head->track_srcu);
258
259 kvm_put_kvm(kvm);
260}
261EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
262
263/*
264 * Notify the node that write access is intercepted and write emulation is
265 * finished at this time.
266 *
267 * The node should figure out if the written page is the one that node is
268 * interested in by itself.
269 */
270void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes)
271{
272 struct kvm_page_track_notifier_head *head;
273 struct kvm_page_track_notifier_node *n;
274 int idx;
275
276 head = &kvm->arch.track_notifier_head;
277
278 if (hlist_empty(&head->track_notifier_list))
279 return;
280
281 idx = srcu_read_lock(&head->track_srcu);
282 hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
283 srcu_read_lock_held(&head->track_srcu))
284 if (n->track_write)
285 n->track_write(gpa, new, bytes, n);
286 srcu_read_unlock(&head->track_srcu, idx);
287}
288
289/*
290 * Notify external page track nodes that a memory region is being removed from
291 * the VM, e.g. so that users can free any associated metadata.
292 */
293void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
294{
295 struct kvm_page_track_notifier_head *head;
296 struct kvm_page_track_notifier_node *n;
297 int idx;
298
299 head = &kvm->arch.track_notifier_head;
300
301 if (hlist_empty(&head->track_notifier_list))
302 return;
303
304 idx = srcu_read_lock(&head->track_srcu);
305 hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
306 srcu_read_lock_held(&head->track_srcu))
307 if (n->track_remove_region)
308 n->track_remove_region(slot->base_gfn, slot->npages, n);
309 srcu_read_unlock(&head->track_srcu, idx);
310}
311
312/*
313 * add guest page to the tracking pool so that corresponding access on that
314 * page will be intercepted.
315 *
316 * @kvm: the guest instance we are interested in.
317 * @gfn: the guest page.
318 */
319int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
320{
321 struct kvm_memory_slot *slot;
322 int idx;
323
324 idx = srcu_read_lock(&kvm->srcu);
325
326 slot = gfn_to_memslot(kvm, gfn);
327 if (!slot) {
328 srcu_read_unlock(&kvm->srcu, idx);
329 return -EINVAL;
330 }
331
332 write_lock(&kvm->mmu_lock);
333 __kvm_write_track_add_gfn(kvm, slot, gfn);
334 write_unlock(&kvm->mmu_lock);
335
336 srcu_read_unlock(&kvm->srcu, idx);
337
338 return 0;
339}
340EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
341
342/*
343 * remove the guest page from the tracking pool which stops the interception
344 * of corresponding access on that page.
345 *
346 * @kvm: the guest instance we are interested in.
347 * @gfn: the guest page.
348 */
349int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn)
350{
351 struct kvm_memory_slot *slot;
352 int idx;
353
354 idx = srcu_read_lock(&kvm->srcu);
355
356 slot = gfn_to_memslot(kvm, gfn);
357 if (!slot) {
358 srcu_read_unlock(&kvm->srcu, idx);
359 return -EINVAL;
360 }
361
362 write_lock(&kvm->mmu_lock);
363 __kvm_write_track_remove_gfn(kvm, slot, gfn);
364 write_unlock(&kvm->mmu_lock);
365
366 srcu_read_unlock(&kvm->srcu, idx);
367
368 return 0;
369}
370EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn);
371#endif
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Support KVM gust page tracking
4 *
5 * This feature allows us to track page access in guest. Currently, only
6 * write access is tracked.
7 *
8 * Copyright(C) 2015 Intel Corporation.
9 *
10 * Author:
11 * Xiao Guangrong <guangrong.xiao@linux.intel.com>
12 */
13
14#include <linux/kvm_host.h>
15#include <linux/rculist.h>
16
17#include <asm/kvm_page_track.h>
18
19#include "mmu_internal.h"
20
21void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
22{
23 int i;
24
25 for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
26 kvfree(slot->arch.gfn_track[i]);
27 slot->arch.gfn_track[i] = NULL;
28 }
29}
30
31int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
32 unsigned long npages)
33{
34 int i;
35
36 for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
37 slot->arch.gfn_track[i] =
38 kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
39 GFP_KERNEL_ACCOUNT);
40 if (!slot->arch.gfn_track[i])
41 goto track_free;
42 }
43
44 return 0;
45
46track_free:
47 kvm_page_track_free_memslot(slot);
48 return -ENOMEM;
49}
50
51static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode)
52{
53 if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX)
54 return false;
55
56 return true;
57}
58
59static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
60 enum kvm_page_track_mode mode, short count)
61{
62 int index, val;
63
64 index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
65
66 val = slot->arch.gfn_track[mode][index];
67
68 if (WARN_ON(val + count < 0 || val + count > USHRT_MAX))
69 return;
70
71 slot->arch.gfn_track[mode][index] += count;
72}
73
74/*
75 * add guest page to the tracking pool so that corresponding access on that
76 * page will be intercepted.
77 *
78 * It should be called under the protection both of mmu-lock and kvm->srcu
79 * or kvm->slots_lock.
80 *
81 * @kvm: the guest instance we are interested in.
82 * @slot: the @gfn belongs to.
83 * @gfn: the guest page.
84 * @mode: tracking mode, currently only write track is supported.
85 */
86void kvm_slot_page_track_add_page(struct kvm *kvm,
87 struct kvm_memory_slot *slot, gfn_t gfn,
88 enum kvm_page_track_mode mode)
89{
90
91 if (WARN_ON(!page_track_mode_is_valid(mode)))
92 return;
93
94 update_gfn_track(slot, gfn, mode, 1);
95
96 /*
97 * new track stops large page mapping for the
98 * tracked page.
99 */
100 kvm_mmu_gfn_disallow_lpage(slot, gfn);
101
102 if (mode == KVM_PAGE_TRACK_WRITE)
103 if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn))
104 kvm_flush_remote_tlbs(kvm);
105}
106EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
107
108/*
109 * remove the guest page from the tracking pool which stops the interception
110 * of corresponding access on that page. It is the opposed operation of
111 * kvm_slot_page_track_add_page().
112 *
113 * It should be called under the protection both of mmu-lock and kvm->srcu
114 * or kvm->slots_lock.
115 *
116 * @kvm: the guest instance we are interested in.
117 * @slot: the @gfn belongs to.
118 * @gfn: the guest page.
119 * @mode: tracking mode, currently only write track is supported.
120 */
121void kvm_slot_page_track_remove_page(struct kvm *kvm,
122 struct kvm_memory_slot *slot, gfn_t gfn,
123 enum kvm_page_track_mode mode)
124{
125 if (WARN_ON(!page_track_mode_is_valid(mode)))
126 return;
127
128 update_gfn_track(slot, gfn, mode, -1);
129
130 /*
131 * allow large page mapping for the tracked page
132 * after the tracker is gone.
133 */
134 kvm_mmu_gfn_allow_lpage(slot, gfn);
135}
136EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page);
137
138/*
139 * check if the corresponding access on the specified guest page is tracked.
140 */
141bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
142 enum kvm_page_track_mode mode)
143{
144 struct kvm_memory_slot *slot;
145 int index;
146
147 if (WARN_ON(!page_track_mode_is_valid(mode)))
148 return false;
149
150 slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
151 if (!slot)
152 return false;
153
154 index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
155 return !!READ_ONCE(slot->arch.gfn_track[mode][index]);
156}
157
158void kvm_page_track_cleanup(struct kvm *kvm)
159{
160 struct kvm_page_track_notifier_head *head;
161
162 head = &kvm->arch.track_notifier_head;
163 cleanup_srcu_struct(&head->track_srcu);
164}
165
166void kvm_page_track_init(struct kvm *kvm)
167{
168 struct kvm_page_track_notifier_head *head;
169
170 head = &kvm->arch.track_notifier_head;
171 init_srcu_struct(&head->track_srcu);
172 INIT_HLIST_HEAD(&head->track_notifier_list);
173}
174
175/*
176 * register the notifier so that event interception for the tracked guest
177 * pages can be received.
178 */
179void
180kvm_page_track_register_notifier(struct kvm *kvm,
181 struct kvm_page_track_notifier_node *n)
182{
183 struct kvm_page_track_notifier_head *head;
184
185 head = &kvm->arch.track_notifier_head;
186
187 spin_lock(&kvm->mmu_lock);
188 hlist_add_head_rcu(&n->node, &head->track_notifier_list);
189 spin_unlock(&kvm->mmu_lock);
190}
191EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
192
193/*
194 * stop receiving the event interception. It is the opposed operation of
195 * kvm_page_track_register_notifier().
196 */
197void
198kvm_page_track_unregister_notifier(struct kvm *kvm,
199 struct kvm_page_track_notifier_node *n)
200{
201 struct kvm_page_track_notifier_head *head;
202
203 head = &kvm->arch.track_notifier_head;
204
205 spin_lock(&kvm->mmu_lock);
206 hlist_del_rcu(&n->node);
207 spin_unlock(&kvm->mmu_lock);
208 synchronize_srcu(&head->track_srcu);
209}
210EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
211
212/*
213 * Notify the node that write access is intercepted and write emulation is
214 * finished at this time.
215 *
216 * The node should figure out if the written page is the one that node is
217 * interested in by itself.
218 */
219void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
220 int bytes)
221{
222 struct kvm_page_track_notifier_head *head;
223 struct kvm_page_track_notifier_node *n;
224 int idx;
225
226 head = &vcpu->kvm->arch.track_notifier_head;
227
228 if (hlist_empty(&head->track_notifier_list))
229 return;
230
231 idx = srcu_read_lock(&head->track_srcu);
232 hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
233 if (n->track_write)
234 n->track_write(vcpu, gpa, new, bytes, n);
235 srcu_read_unlock(&head->track_srcu, idx);
236}
237
238/*
239 * Notify the node that memory slot is being removed or moved so that it can
240 * drop write-protection for the pages in the memory slot.
241 *
242 * The node should figure out it has any write-protected pages in this slot
243 * by itself.
244 */
245void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
246{
247 struct kvm_page_track_notifier_head *head;
248 struct kvm_page_track_notifier_node *n;
249 int idx;
250
251 head = &kvm->arch.track_notifier_head;
252
253 if (hlist_empty(&head->track_notifier_list))
254 return;
255
256 idx = srcu_read_lock(&head->track_srcu);
257 hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
258 if (n->track_flush_slot)
259 n->track_flush_slot(kvm, slot, n);
260 srcu_read_unlock(&head->track_srcu, idx);
261}