]> Git Repo - linux.git/commitdiff
kvm: defer huge page recovery vhost task to later
authorKeith Busch <[email protected]>
Thu, 23 Jan 2025 15:35:43 +0000 (07:35 -0800)
committerPaolo Bonzini <[email protected]>
Fri, 24 Jan 2025 15:53:56 +0000 (10:53 -0500)
Some libraries want to ensure they are single threaded before forking,
so making the kernel's kvm huge page recovery process a vhost task of
the user process breaks those. The minijail library used by crosvm is
one such affected application.

Defer the task to after the first VM_RUN call, which occurs after the
parent process has forked all its jailed processes. This needs to happen
only once for the kvm instance, so introduce some general-purpose
infrastructure for that, too.  It's similar in concept to pthread_once;
except it is actually usable, because the callback takes a parameter.

Cc: Sean Christopherson <[email protected]>
Cc: Paolo Bonzini <[email protected]>
Tested-by: Alyssa Ross <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
Message-ID: <20250123153543.2769928[email protected]>
[Move call_once API to include/linux. - Paolo]
Cc: [email protected]
Fixes: d96c77bd4eeb ("KVM: x86: switch hugepage recovery thread to vhost_task")
Signed-off-by: Paolo Bonzini <[email protected]>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/x86.c
include/linux/call_once.h [new file with mode: 0644]

index 2f442701dc755fbbbf18950fee57d44edf06af74..f378cd43241c6570411e2452ddbdbc060d3a41ba 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/hyperv.h>
 #include <linux/kfifo.h>
 #include <linux/sched/vhost_task.h>
+#include <linux/call_once.h>
 
 #include <asm/apic.h>
 #include <asm/pvclock-abi.h>
@@ -1466,6 +1467,7 @@ struct kvm_arch {
        struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
        struct vhost_task *nx_huge_page_recovery_thread;
        u64 nx_huge_page_last;
+       struct once nx_once;
 
 #ifdef CONFIG_X86_64
        /* The number of TDP MMU pages across all roots. */
index 26b4ba7e7cb5eeec3bbbc8a306cdef2abfd258f1..a45ae60e84ab460619c9c73753a524748a285cac 100644 (file)
@@ -7447,20 +7447,28 @@ static bool kvm_nx_huge_page_recovery_worker(void *data)
        return true;
 }
 
-int kvm_mmu_post_init_vm(struct kvm *kvm)
+static void kvm_mmu_start_lpage_recovery(struct once *once)
 {
-       if (nx_hugepage_mitigation_hard_disabled)
-               return 0;
+       struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once);
+       struct kvm *kvm = container_of(ka, struct kvm, arch);
 
        kvm->arch.nx_huge_page_last = get_jiffies_64();
        kvm->arch.nx_huge_page_recovery_thread = vhost_task_create(
                kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill,
                kvm, "kvm-nx-lpage-recovery");
 
+       if (kvm->arch.nx_huge_page_recovery_thread)
+               vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
+}
+
+int kvm_mmu_post_init_vm(struct kvm *kvm)
+{
+       if (nx_hugepage_mitigation_hard_disabled)
+               return 0;
+
+       call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
        if (!kvm->arch.nx_huge_page_recovery_thread)
                return -ENOMEM;
-
-       vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
        return 0;
 }
 
index 6e248152fa134e9d8f117bdf8da5976cce3a5226..6d4a6734b2d69b1574dda172267c55b9a9d3f9d4 100644 (file)
@@ -11471,6 +11471,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
        struct kvm_run *kvm_run = vcpu->run;
        int r;
 
+       r = kvm_mmu_post_init_vm(vcpu->kvm);
+       if (r)
+               return r;
+
        vcpu_load(vcpu);
        kvm_sigset_activate(vcpu);
        kvm_run->flags = 0;
@@ -12748,7 +12752,8 @@ out:
 
 int kvm_arch_post_init_vm(struct kvm *kvm)
 {
-       return kvm_mmu_post_init_vm(kvm);
+       once_init(&kvm->arch.nx_once);
+       return 0;
 }
 
 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
diff --git a/include/linux/call_once.h b/include/linux/call_once.h
new file mode 100644 (file)
index 0000000..6261aa0
--- /dev/null
@@ -0,0 +1,45 @@
+#ifndef _LINUX_CALL_ONCE_H
+#define _LINUX_CALL_ONCE_H
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+
+#define ONCE_NOT_STARTED 0
+#define ONCE_RUNNING     1
+#define ONCE_COMPLETED   2
+
+struct once {
+        atomic_t state;
+        struct mutex lock;
+};
+
+static inline void __once_init(struct once *once, const char *name,
+                              struct lock_class_key *key)
+{
+        atomic_set(&once->state, ONCE_NOT_STARTED);
+        __mutex_init(&once->lock, name, key);
+}
+
+#define once_init(once)                                                        \
+do {                                                                   \
+       static struct lock_class_key __key;                             \
+       __once_init((once), #once, &__key);                             \
+} while (0)
+
+static inline void call_once(struct once *once, void (*cb)(struct once *))
+{
+        /* Pairs with atomic_set_release() below.  */
+        if (atomic_read_acquire(&once->state) == ONCE_COMPLETED)
+                return;
+
+        guard(mutex)(&once->lock);
+        WARN_ON(atomic_read(&once->state) == ONCE_RUNNING);
+        if (atomic_read(&once->state) != ONCE_NOT_STARTED)
+                return;
+
+        atomic_set(&once->state, ONCE_RUNNING);
+        cb(once);
+        atomic_set_release(&once->state, ONCE_COMPLETED);
+}
+
+#endif /* _LINUX_CALL_ONCE_H */
This page took 0.115037 seconds and 4 git commands to generate.