]> Git Repo - linux.git/commitdiff
sched/fair: Prepare exit/cleanup paths for delayed_dequeue
authorPeter Zijlstra <[email protected]>
Thu, 23 May 2024 09:03:42 +0000 (11:03 +0200)
committerPeter Zijlstra <[email protected]>
Sat, 17 Aug 2024 09:06:43 +0000 (11:06 +0200)
When dequeue_task() is delayed it becomes possible to exit a task (or
cgroup) that is still enqueued. Ensure things are dequeued before
freeing.

Thanks to Valentin for asking the obvious questions and making
switched_from_fair() less weird.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Valentin Schneider <[email protected]>
Tested-by: Valentin Schneider <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
kernel/sched/fair.c

index 37acd530f413f5cfc3f0397553ee222ff9af9e88..9a849030f19c1be90e1e1990eaf000db7b4bccb4 100644 (file)
@@ -8342,7 +8342,21 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
 
 static void task_dead_fair(struct task_struct *p)
 {
-       remove_entity_load_avg(&p->se);
+       struct sched_entity *se = &p->se;
+
+       if (se->sched_delayed) {
+               struct rq_flags rf;
+               struct rq *rq;
+
+               rq = task_rq_lock(p, &rf);
+               if (se->sched_delayed) {
+                       update_rq_clock(rq);
+                       dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
+               }
+               task_rq_unlock(rq, p, &rf);
+       }
+
+       remove_entity_load_avg(se);
 }
 
 /*
@@ -12854,10 +12868,22 @@ static void attach_task_cfs_rq(struct task_struct *p)
 static void switched_from_fair(struct rq *rq, struct task_struct *p)
 {
        detach_task_cfs_rq(p);
+       /*
+        * Since this is called after changing class, this is a little weird
+        * and we cannot use DEQUEUE_DELAYED.
+        */
+       if (p->se.sched_delayed) {
+               dequeue_task(rq, p, DEQUEUE_NOCLOCK | DEQUEUE_SLEEP);
+               p->se.sched_delayed = 0;
+               if (sched_feat(DELAY_ZERO) && p->se.vlag > 0)
+                       p->se.vlag = 0;
+       }
 }
 
 static void switched_to_fair(struct rq *rq, struct task_struct *p)
 {
+       SCHED_WARN_ON(p->se.sched_delayed);
+
        attach_task_cfs_rq(p);
 
        set_task_max_allowed_capacity(p);
@@ -13008,28 +13034,35 @@ void online_fair_sched_group(struct task_group *tg)
 
 void unregister_fair_sched_group(struct task_group *tg)
 {
-       unsigned long flags;
-       struct rq *rq;
        int cpu;
 
        destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
 
        for_each_possible_cpu(cpu) {
-               if (tg->se[cpu])
-                       remove_entity_load_avg(tg->se[cpu]);
+               struct cfs_rq *cfs_rq = tg->cfs_rq[cpu];
+               struct sched_entity *se = tg->se[cpu];
+               struct rq *rq = cpu_rq(cpu);
+
+               if (se) {
+                       if (se->sched_delayed) {
+                               guard(rq_lock_irqsave)(rq);
+                               if (se->sched_delayed) {
+                                       update_rq_clock(rq);
+                                       dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
+                               }
+                               list_del_leaf_cfs_rq(cfs_rq);
+                       }
+                       remove_entity_load_avg(se);
+               }
 
                /*
                 * Only empty task groups can be destroyed; so we can speculatively
                 * check on_list without danger of it being re-added.
                 */
-               if (!tg->cfs_rq[cpu]->on_list)
-                       continue;
-
-               rq = cpu_rq(cpu);
-
-               raw_spin_rq_lock_irqsave(rq, flags);
-               list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
-               raw_spin_rq_unlock_irqrestore(rq, flags);
+               if (cfs_rq->on_list) {
+                       guard(rq_lock_irqsave)(rq);
+                       list_del_leaf_cfs_rq(cfs_rq);
+               }
        }
 }
 
This page took 0.111841 seconds and 4 git commands to generate.