]> Git Repo - linux.git/commitdiff
sched/fair: Fix fairness issue on migration
authorPeter Zijlstra <[email protected]>
Wed, 9 Mar 2016 12:04:03 +0000 (13:04 +0100)
committerIngo Molnar <[email protected]>
Mon, 21 Mar 2016 09:49:23 +0000 (10:49 +0100)
Pavan reported that in the presence of very light tasks (or cgroups)
the placement of migrated tasks can cause severe fairness issues.

The problem is that enqueue_entity() places the task before it updates
time, thereby it can place the task far in the past (remember that
light tasks will shoot virtual time forward at a high speed, so in
relation to the pre-existing light task, we can land far in the past).

This is done because update_curr() needs the current task, and we
might be placing the current task.

The obvious solution is to differentiate between the current and any
other task; placing the current before we update time, and placing any
other task after, such that !curr tasks end up at the current moment
in time, and not in the past.

Reported-by: Pavan Kondeti <[email protected]>
Tested-by: Pavan Kondeti <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Cc: Ben Segall <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Matt Fleming <[email protected]>
Cc: Mike Galbraith <[email protected]>
Cc: Morten Rasmussen <[email protected]>
Cc: Paul Turner <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
kernel/sched/fair.c

index 33130529e9b5628b53cd7031c4914da0de0e7148..3c114d971d84fbe641c8d7bc6a20be6e91454752 100644 (file)
@@ -3157,17 +3157,25 @@ static inline void check_schedstat_required(void)
 static void
 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
+       bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING);
+       bool curr = cfs_rq->curr == se;
+
        /*
-        * Update the normalized vruntime before updating min_vruntime
-        * through calling update_curr().
+        * If we're the current task, we must renormalise before calling
+        * update_curr().
         */
-       if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
+       if (renorm && curr)
                se->vruntime += cfs_rq->min_vruntime;
 
+       update_curr(cfs_rq);
+
        /*
-        * Update run-time statistics of the 'current'.
+        * Otherwise, renormalise after, such that we're placed at the current
+        * moment in time, instead of some random moment in the past.
         */
-       update_curr(cfs_rq);
+       if (renorm && !curr)
+               se->vruntime += cfs_rq->min_vruntime;
+
        enqueue_entity_load_avg(cfs_rq, se);
        account_entity_enqueue(cfs_rq, se);
        update_cfs_shares(cfs_rq);
@@ -3183,7 +3191,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
                update_stats_enqueue(cfs_rq, se);
                check_spread(cfs_rq, se);
        }
-       if (se != cfs_rq->curr)
+       if (!curr)
                __enqueue_entity(cfs_rq, se);
        se->on_rq = 1;
 
This page took 0.059353 seconds and 4 git commands to generate.