1 // SPDX-License-Identifier: GPL-2.0-only
3 * Support KVM gust page tracking
5 * This feature allows us to track page access in guest. Currently, only
6 * write access is tracked.
8 * Copyright(C) 2015 Intel Corporation.
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/lockdep.h>
16 #include <linux/kvm_host.h>
17 #include <linux/rculist.h>
20 #include "mmu_internal.h"
21 #include "page_track.h"
23 static bool kvm_external_write_tracking_enabled(struct kvm *kvm)
25 #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
27 * Read external_write_tracking_enabled before related pointers. Pairs
28 * with the smp_store_release in kvm_page_track_write_tracking_enable().
30 return smp_load_acquire(&kvm->arch.external_write_tracking_enabled);
36 bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
38 return kvm_external_write_tracking_enabled(kvm) ||
39 kvm_shadow_root_allocated(kvm) || !tdp_enabled;
42 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
44 vfree(slot->arch.gfn_write_track);
45 slot->arch.gfn_write_track = NULL;
48 static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot,
51 const size_t size = sizeof(*slot->arch.gfn_write_track);
53 if (!slot->arch.gfn_write_track)
54 slot->arch.gfn_write_track = __vcalloc(npages, size,
57 return slot->arch.gfn_write_track ? 0 : -ENOMEM;
60 int kvm_page_track_create_memslot(struct kvm *kvm,
61 struct kvm_memory_slot *slot,
64 if (!kvm_page_track_write_tracking_enabled(kvm))
67 return __kvm_page_track_write_tracking_alloc(slot, npages);
70 int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot)
72 return __kvm_page_track_write_tracking_alloc(slot, slot->npages);
75 static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn,
80 index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
82 val = slot->arch.gfn_write_track[index];
84 if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX))
87 slot->arch.gfn_write_track[index] += count;
90 void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
93 lockdep_assert_held_write(&kvm->mmu_lock);
95 lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
96 srcu_read_lock_held(&kvm->srcu));
98 if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
101 update_gfn_write_track(slot, gfn, 1);
104 * new track stops large page mapping for the
107 kvm_mmu_gfn_disallow_lpage(slot, gfn);
109 if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
110 kvm_flush_remote_tlbs(kvm);
113 void __kvm_write_track_remove_gfn(struct kvm *kvm,
114 struct kvm_memory_slot *slot, gfn_t gfn)
116 lockdep_assert_held_write(&kvm->mmu_lock);
118 lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
119 srcu_read_lock_held(&kvm->srcu));
121 if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
124 update_gfn_write_track(slot, gfn, -1);
127 * allow large page mapping for the tracked page
128 * after the tracker is gone.
130 kvm_mmu_gfn_allow_lpage(slot, gfn);
134 * check if the corresponding access on the specified guest page is tracked.
136 bool kvm_gfn_is_write_tracked(struct kvm *kvm,
137 const struct kvm_memory_slot *slot, gfn_t gfn)
144 if (!kvm_page_track_write_tracking_enabled(kvm))
147 index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
148 return !!READ_ONCE(slot->arch.gfn_write_track[index]);
151 #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
152 void kvm_page_track_cleanup(struct kvm *kvm)
154 struct kvm_page_track_notifier_head *head;
156 head = &kvm->arch.track_notifier_head;
157 cleanup_srcu_struct(&head->track_srcu);
160 int kvm_page_track_init(struct kvm *kvm)
162 struct kvm_page_track_notifier_head *head;
164 head = &kvm->arch.track_notifier_head;
165 INIT_HLIST_HEAD(&head->track_notifier_list);
166 return init_srcu_struct(&head->track_srcu);
169 static int kvm_enable_external_write_tracking(struct kvm *kvm)
171 struct kvm_memslots *slots;
172 struct kvm_memory_slot *slot;
175 mutex_lock(&kvm->slots_arch_lock);
178 * Check for *any* write tracking user (not just external users) under
179 * lock. This avoids unnecessary work, e.g. if KVM itself is using
180 * write tracking, or if two external users raced when registering.
182 if (kvm_page_track_write_tracking_enabled(kvm))
185 for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
186 slots = __kvm_memslots(kvm, i);
187 kvm_for_each_memslot(slot, bkt, slots) {
189 * Intentionally do NOT free allocations on failure to
190 * avoid having to track which allocations were made
191 * now versus when the memslot was created. The
192 * metadata is guaranteed to be freed when the slot is
193 * freed, and will be kept/used if userspace retries
194 * the failed ioctl() instead of killing the VM.
196 r = kvm_page_track_write_tracking_alloc(slot);
204 * Ensure that external_write_tracking_enabled becomes true strictly
205 * after all the related pointers are set.
207 smp_store_release(&kvm->arch.external_write_tracking_enabled, true);
209 mutex_unlock(&kvm->slots_arch_lock);
214 * register the notifier so that event interception for the tracked guest
215 * pages can be received.
217 int kvm_page_track_register_notifier(struct kvm *kvm,
218 struct kvm_page_track_notifier_node *n)
220 struct kvm_page_track_notifier_head *head;
223 if (!kvm || kvm->mm != current->mm)
226 if (!kvm_external_write_tracking_enabled(kvm)) {
227 r = kvm_enable_external_write_tracking(kvm);
234 head = &kvm->arch.track_notifier_head;
236 write_lock(&kvm->mmu_lock);
237 hlist_add_head_rcu(&n->node, &head->track_notifier_list);
238 write_unlock(&kvm->mmu_lock);
241 EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
244 * stop receiving the event interception. It is the opposed operation of
245 * kvm_page_track_register_notifier().
247 void kvm_page_track_unregister_notifier(struct kvm *kvm,
248 struct kvm_page_track_notifier_node *n)
250 struct kvm_page_track_notifier_head *head;
252 head = &kvm->arch.track_notifier_head;
254 write_lock(&kvm->mmu_lock);
255 hlist_del_rcu(&n->node);
256 write_unlock(&kvm->mmu_lock);
257 synchronize_srcu(&head->track_srcu);
261 EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
264 * Notify the node that write access is intercepted and write emulation is
265 * finished at this time.
267 * The node should figure out if the written page is the one that node is
268 * interested in by itself.
270 void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes)
272 struct kvm_page_track_notifier_head *head;
273 struct kvm_page_track_notifier_node *n;
276 head = &kvm->arch.track_notifier_head;
278 if (hlist_empty(&head->track_notifier_list))
281 idx = srcu_read_lock(&head->track_srcu);
282 hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
283 srcu_read_lock_held(&head->track_srcu))
285 n->track_write(gpa, new, bytes, n);
286 srcu_read_unlock(&head->track_srcu, idx);
290 * Notify external page track nodes that a memory region is being removed from
291 * the VM, e.g. so that users can free any associated metadata.
293 void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
295 struct kvm_page_track_notifier_head *head;
296 struct kvm_page_track_notifier_node *n;
299 head = &kvm->arch.track_notifier_head;
301 if (hlist_empty(&head->track_notifier_list))
304 idx = srcu_read_lock(&head->track_srcu);
305 hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
306 srcu_read_lock_held(&head->track_srcu))
307 if (n->track_remove_region)
308 n->track_remove_region(slot->base_gfn, slot->npages, n);
309 srcu_read_unlock(&head->track_srcu, idx);
313 * add guest page to the tracking pool so that corresponding access on that
314 * page will be intercepted.
316 * @kvm: the guest instance we are interested in.
317 * @gfn: the guest page.
319 int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
321 struct kvm_memory_slot *slot;
324 idx = srcu_read_lock(&kvm->srcu);
326 slot = gfn_to_memslot(kvm, gfn);
328 srcu_read_unlock(&kvm->srcu, idx);
332 write_lock(&kvm->mmu_lock);
333 __kvm_write_track_add_gfn(kvm, slot, gfn);
334 write_unlock(&kvm->mmu_lock);
336 srcu_read_unlock(&kvm->srcu, idx);
340 EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
343 * remove the guest page from the tracking pool which stops the interception
344 * of corresponding access on that page.
346 * @kvm: the guest instance we are interested in.
347 * @gfn: the guest page.
349 int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn)
351 struct kvm_memory_slot *slot;
354 idx = srcu_read_lock(&kvm->srcu);
356 slot = gfn_to_memslot(kvm, gfn);
358 srcu_read_unlock(&kvm->srcu, idx);
362 write_lock(&kvm->mmu_lock);
363 __kvm_write_track_remove_gfn(kvm, slot, gfn);
364 write_unlock(&kvm->mmu_lock);
366 srcu_read_unlock(&kvm->srcu, idx);
370 EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn);