]> Git Repo - J-linux.git/commitdiff
Merge tag 'sched_urgent_for_v6.13_rc3-p2' of git://git.kernel.org/pub/scm/linux/kerne...
authorLinus Torvalds <[email protected]>
Sun, 15 Dec 2024 17:38:03 +0000 (09:38 -0800)
committerLinus Torvalds <[email protected]>
Sun, 15 Dec 2024 17:38:03 +0000 (09:38 -0800)
Pull scheduler fixes from Borislav Petkov:

 - Prevent incorrect dequeueing of the deadline dlserver helper task and
   fix its time accounting

 - Properly track the CFS runqueue runnable stats

 - Check the total number of all queued tasks in a sched fair's runqueue
   hierarchy before deciding to stop the tick

 - Fix the scheduling of the task that got woken last (NEXT_BUDDY) by
   preventing those from being delayed

* tag 'sched_urgent_for_v6.13_rc3-p2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/dlserver: Fix dlserver time accounting
  sched/dlserver: Fix dlserver double enqueue
  sched/eevdf: More PELT vs DELAYED_DEQUEUE
  sched/fair: Fix sched_can_stop_tick() for fair tasks
  sched/fair: Fix NEXT_BUDDY

1  2 
kernel/sched/fair.c

diff --combined kernel/sched/fair.c
index aa0238ee485724af807797be2647eb6270161116,53a4f78b834078e7e06f254a90e711c49ca990dd..3e9ca38512dee6172ffea85d36ed017e4b77e5d4
@@@ -1159,8 -1159,6 +1159,6 @@@ static inline void update_curr_task(str
        trace_sched_stat_runtime(p, delta_exec);
        account_group_exec_runtime(p, delta_exec);
        cgroup_account_cputime(p, delta_exec);
-       if (p->dl_server)
-               dl_server_update(p->dl_server, delta_exec);
  }
  
  static inline bool did_preempt_short(struct cfs_rq *cfs_rq, struct sched_entity *curr)
@@@ -1237,11 -1235,16 +1235,16 @@@ static void update_curr(struct cfs_rq *
                update_curr_task(p, delta_exec);
  
                /*
-                * Any fair task that runs outside of fair_server should
-                * account against fair_server such that it can account for
-                * this time and possibly avoid running this period.
+                * If the fair_server is active, we need to account for the
+                * fair_server time whether or not the task is running on
+                * behalf of fair_server or not:
+                *  - If the task is running on behalf of fair_server, we need
+                *    to limit its time based on the assigned runtime.
+                *  - Fair task that runs outside of fair_server should account
+                *    against fair_server such that it can account for this time
+                *    and possibly avoid running this period.
                 */
-               if (p->dl_server != &rq->fair_server)
+               if (dl_server_active(&rq->fair_server))
                        dl_server_update(&rq->fair_server, delta_exec);
        }
  
@@@ -3399,16 -3402,10 +3402,16 @@@ retry_pids
  
                /* Initialise new per-VMA NUMAB state. */
                if (!vma->numab_state) {
 -                      vma->numab_state = kzalloc(sizeof(struct vma_numab_state),
 -                              GFP_KERNEL);
 -                      if (!vma->numab_state)
 +                      struct vma_numab_state *ptr;
 +
 +                      ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
 +                      if (!ptr)
 +                              continue;
 +
 +                      if (cmpxchg(&vma->numab_state, NULL, ptr)) {
 +                              kfree(ptr);
                                continue;
 +                      }
  
                        vma->numab_state->start_scan_seq = mm->numa_scan_seq;
  
@@@ -5471,9 -5468,33 +5474,33 @@@ static void clear_buddies(struct cfs_r
  
  static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
  
- static inline void finish_delayed_dequeue_entity(struct sched_entity *se)
+ static void set_delayed(struct sched_entity *se)
+ {
+       se->sched_delayed = 1;
+       for_each_sched_entity(se) {
+               struct cfs_rq *cfs_rq = cfs_rq_of(se);
+               cfs_rq->h_nr_delayed++;
+               if (cfs_rq_throttled(cfs_rq))
+                       break;
+       }
+ }
+ static void clear_delayed(struct sched_entity *se)
  {
        se->sched_delayed = 0;
+       for_each_sched_entity(se) {
+               struct cfs_rq *cfs_rq = cfs_rq_of(se);
+               cfs_rq->h_nr_delayed--;
+               if (cfs_rq_throttled(cfs_rq))
+                       break;
+       }
+ }
+ static inline void finish_delayed_dequeue_entity(struct sched_entity *se)
+ {
+       clear_delayed(se);
        if (sched_feat(DELAY_ZERO) && se->vlag > 0)
                se->vlag = 0;
  }
@@@ -5484,6 -5505,7 +5511,7 @@@ dequeue_entity(struct cfs_rq *cfs_rq, s
        bool sleep = flags & DEQUEUE_SLEEP;
  
        update_curr(cfs_rq);
+       clear_buddies(cfs_rq, se);
  
        if (flags & DEQUEUE_DELAYED) {
                SCHED_WARN_ON(!se->sched_delayed);
  
                if (sched_feat(DELAY_DEQUEUE) && delay &&
                    !entity_eligible(cfs_rq, se)) {
-                       if (cfs_rq->next == se)
-                               cfs_rq->next = NULL;
                        update_load_avg(cfs_rq, se, 0);
-                       se->sched_delayed = 1;
+                       set_delayed(se);
                        return false;
                }
        }
  
        update_stats_dequeue_fair(cfs_rq, se, flags);
  
-       clear_buddies(cfs_rq, se);
        update_entity_lag(cfs_rq, se);
        if (sched_feat(PLACE_REL_DEADLINE) && !sleep) {
                se->deadline -= se->vruntime;
@@@ -5917,7 -5935,7 +5941,7 @@@ static bool throttle_cfs_rq(struct cfs_
        struct rq *rq = rq_of(cfs_rq);
        struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
        struct sched_entity *se;
-       long task_delta, idle_task_delta, dequeue = 1;
+       long task_delta, idle_task_delta, delayed_delta, dequeue = 1;
        long rq_h_nr_running = rq->cfs.h_nr_running;
  
        raw_spin_lock(&cfs_b->lock);
  
        task_delta = cfs_rq->h_nr_running;
        idle_task_delta = cfs_rq->idle_h_nr_running;
+       delayed_delta = cfs_rq->h_nr_delayed;
        for_each_sched_entity(se) {
                struct cfs_rq *qcfs_rq = cfs_rq_of(se);
                int flags;
  
                qcfs_rq->h_nr_running -= task_delta;
                qcfs_rq->idle_h_nr_running -= idle_task_delta;
+               qcfs_rq->h_nr_delayed -= delayed_delta;
  
                if (qcfs_rq->load.weight) {
                        /* Avoid re-evaluating load for this entity: */
  
                qcfs_rq->h_nr_running -= task_delta;
                qcfs_rq->idle_h_nr_running -= idle_task_delta;
+               qcfs_rq->h_nr_delayed -= delayed_delta;
        }
  
        /* At this point se is NULL and we are at root level*/
@@@ -6020,7 -6041,7 +6047,7 @@@ void unthrottle_cfs_rq(struct cfs_rq *c
        struct rq *rq = rq_of(cfs_rq);
        struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
        struct sched_entity *se;
-       long task_delta, idle_task_delta;
+       long task_delta, idle_task_delta, delayed_delta;
        long rq_h_nr_running = rq->cfs.h_nr_running;
  
        se = cfs_rq->tg->se[cpu_of(rq)];
  
        task_delta = cfs_rq->h_nr_running;
        idle_task_delta = cfs_rq->idle_h_nr_running;
+       delayed_delta = cfs_rq->h_nr_delayed;
        for_each_sched_entity(se) {
                struct cfs_rq *qcfs_rq = cfs_rq_of(se);
  
  
                qcfs_rq->h_nr_running += task_delta;
                qcfs_rq->idle_h_nr_running += idle_task_delta;
+               qcfs_rq->h_nr_delayed += delayed_delta;
  
                /* end evaluation on encountering a throttled cfs_rq */
                if (cfs_rq_throttled(qcfs_rq))
  
                qcfs_rq->h_nr_running += task_delta;
                qcfs_rq->idle_h_nr_running += idle_task_delta;
+               qcfs_rq->h_nr_delayed += delayed_delta;
  
                /* end evaluation on encountering a throttled cfs_rq */
                if (cfs_rq_throttled(qcfs_rq))
@@@ -6943,7 -6967,7 +6973,7 @@@ requeue_delayed_entity(struct sched_ent
        }
  
        update_load_avg(cfs_rq, se, 0);
-       se->sched_delayed = 0;
+       clear_delayed(se);
  }
  
  /*
@@@ -6957,6 -6981,7 +6987,7 @@@ enqueue_task_fair(struct rq *rq, struc
        struct cfs_rq *cfs_rq;
        struct sched_entity *se = &p->se;
        int idle_h_nr_running = task_has_idle_policy(p);
+       int h_nr_delayed = 0;
        int task_new = !(flags & ENQUEUE_WAKEUP);
        int rq_h_nr_running = rq->cfs.h_nr_running;
        u64 slice = 0;
        if (p->in_iowait)
                cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
  
+       if (task_new)
+               h_nr_delayed = !!se->sched_delayed;
        for_each_sched_entity(se) {
                if (se->on_rq) {
                        if (se->sched_delayed)
  
                cfs_rq->h_nr_running++;
                cfs_rq->idle_h_nr_running += idle_h_nr_running;
+               cfs_rq->h_nr_delayed += h_nr_delayed;
  
                if (cfs_rq_is_idle(cfs_rq))
                        idle_h_nr_running = 1;
  
                cfs_rq->h_nr_running++;
                cfs_rq->idle_h_nr_running += idle_h_nr_running;
+               cfs_rq->h_nr_delayed += h_nr_delayed;
  
                if (cfs_rq_is_idle(cfs_rq))
                        idle_h_nr_running = 1;
@@@ -7090,6 -7120,7 +7126,7 @@@ static int dequeue_entities(struct rq *
        struct task_struct *p = NULL;
        int idle_h_nr_running = 0;
        int h_nr_running = 0;
+       int h_nr_delayed = 0;
        struct cfs_rq *cfs_rq;
        u64 slice = 0;
  
                p = task_of(se);
                h_nr_running = 1;
                idle_h_nr_running = task_has_idle_policy(p);
+               if (!task_sleep && !task_delayed)
+                       h_nr_delayed = !!se->sched_delayed;
        } else {
                cfs_rq = group_cfs_rq(se);
                slice = cfs_rq_min_slice(cfs_rq);
  
                cfs_rq->h_nr_running -= h_nr_running;
                cfs_rq->idle_h_nr_running -= idle_h_nr_running;
+               cfs_rq->h_nr_delayed -= h_nr_delayed;
  
                if (cfs_rq_is_idle(cfs_rq))
                        idle_h_nr_running = h_nr_running;
  
                cfs_rq->h_nr_running -= h_nr_running;
                cfs_rq->idle_h_nr_running -= idle_h_nr_running;
+               cfs_rq->h_nr_delayed -= h_nr_delayed;
  
                if (cfs_rq_is_idle(cfs_rq))
                        idle_h_nr_running = h_nr_running;
@@@ -8780,7 -8815,7 +8821,7 @@@ static void check_preempt_wakeup_fair(s
        if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
                return;
  
-       if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) {
+       if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK) && !pse->sched_delayed) {
                set_next_buddy(pse);
        }
  
This page took 0.076824 seconds and 4 git commands to generate.