Some libraries want to ensure they are single threaded before forking,
so making the kernel's kvm huge page recovery process a vhost task of
the user process breaks those. The minijail library used by crosvm is
one such affected application.
Defer the task to after the first VM_RUN call, which occurs after the
parent process has forked all its jailed processes. This needs to happen
only once for the kvm instance, so introduce some general-purpose
infrastructure for that, too. It's similar in concept to pthread_once;
except it is actually usable, because the callback takes a parameter.
Cc: Sean Christopherson <[email protected]>
Cc: Paolo Bonzini <[email protected]>
Tested-by: Alyssa Ross <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
Message-ID: <
20250123153543.
2769928[email protected]>
[Move call_once API to include/linux. - Paolo]
Cc: [email protected]
Fixes: d96c77bd4eeb ("KVM: x86: switch hugepage recovery thread to vhost_task")
Signed-off-by: Paolo Bonzini <[email protected]>
#include <linux/hyperv.h>
#include <linux/kfifo.h>
#include <linux/sched/vhost_task.h>
+#include <linux/call_once.h>
#include <asm/apic.h>
#include <asm/pvclock-abi.h>
struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
struct vhost_task *nx_huge_page_recovery_thread;
u64 nx_huge_page_last;
+ struct once nx_once;
#ifdef CONFIG_X86_64
/* The number of TDP MMU pages across all roots. */
return true;
}
-int kvm_mmu_post_init_vm(struct kvm *kvm)
+static void kvm_mmu_start_lpage_recovery(struct once *once)
{
- if (nx_hugepage_mitigation_hard_disabled)
- return 0;
+ struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once);
+ struct kvm *kvm = container_of(ka, struct kvm, arch);
kvm->arch.nx_huge_page_last = get_jiffies_64();
kvm->arch.nx_huge_page_recovery_thread = vhost_task_create(
kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill,
kvm, "kvm-nx-lpage-recovery");
+ if (kvm->arch.nx_huge_page_recovery_thread)
+ vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
+}
+
+int kvm_mmu_post_init_vm(struct kvm *kvm)
+{
+ if (nx_hugepage_mitigation_hard_disabled)
+ return 0;
+
+ call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
if (!kvm->arch.nx_huge_page_recovery_thread)
return -ENOMEM;
-
- vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
return 0;
}
struct kvm_run *kvm_run = vcpu->run;
int r;
+ r = kvm_mmu_post_init_vm(vcpu->kvm);
+ if (r)
+ return r;
+
vcpu_load(vcpu);
kvm_sigset_activate(vcpu);
kvm_run->flags = 0;
int kvm_arch_post_init_vm(struct kvm *kvm)
{
- return kvm_mmu_post_init_vm(kvm);
+ once_init(&kvm->arch.nx_once);
+ return 0;
}
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
--- /dev/null
+#ifndef _LINUX_CALL_ONCE_H
+#define _LINUX_CALL_ONCE_H
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+
+#define ONCE_NOT_STARTED 0
+#define ONCE_RUNNING 1
+#define ONCE_COMPLETED 2
+
+struct once {
+ atomic_t state;
+ struct mutex lock;
+};
+
+static inline void __once_init(struct once *once, const char *name,
+ struct lock_class_key *key)
+{
+ atomic_set(&once->state, ONCE_NOT_STARTED);
+ __mutex_init(&once->lock, name, key);
+}
+
+#define once_init(once) \
+do { \
+ static struct lock_class_key __key; \
+ __once_init((once), #once, &__key); \
+} while (0)
+
+static inline void call_once(struct once *once, void (*cb)(struct once *))
+{
+ /* Pairs with atomic_set_release() below. */
+ if (atomic_read_acquire(&once->state) == ONCE_COMPLETED)
+ return;
+
+ guard(mutex)(&once->lock);
+ WARN_ON(atomic_read(&once->state) == ONCE_RUNNING);
+ if (atomic_read(&once->state) != ONCE_NOT_STARTED)
+ return;
+
+ atomic_set(&once->state, ONCE_RUNNING);
+ cb(once);
+ atomic_set_release(&once->state, ONCE_COMPLETED);
+}
+
+#endif /* _LINUX_CALL_ONCE_H */