Merge tag 'sched_ext-for-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tj...

author Linus Torvalds <[email protected]>

Wed, 20 Nov 2024 18:08:00 +0000 (10:08 -0800)

committer Linus Torvalds <[email protected]>

Wed, 20 Nov 2024 18:08:00 +0000 (10:08 -0800)
author Linus Torvalds <[email protected]>
Wed, 20 Nov 2024 18:08:00 +0000 (10:08 -0800)
committer Linus Torvalds <[email protected]>
Wed, 20 Nov 2024 18:08:00 +0000 (10:08 -0800)
diff --cc include/linux/sched/ext.h
Simple merge
diff --cc kernel/sched/ext.c

index ecb88c52854475c44713a6185fb631364912ab6e,3c4a94e4258f077d3853ed2587197be6c78ce8c7..7fff1d0454770f5fab3e13708816d56653637ee9
--- 1/kernel/sched/ext.c
--- 2/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@@ -2642,10 -2759,10 +2759,10 @@@ static int balance_one(struct rq *rq, s
                  * If the previous sched_class for the current CPU was not SCX,
                  * notify the BPF scheduler that it again has control of the
                  * core. This callback complements ->cpu_release(), which is
-                * emitted in scx_next_task_picked().
+                * emitted in switch_class().
                  */
                 if (SCX_HAS_OP(cpu_acquire))
- -                      SCX_CALL_OP(0, cpu_acquire, cpu_of(rq), NULL);
+ +                      SCX_CALL_OP(SCX_KF_REST, cpu_acquire, cpu_of(rq), NULL);
                 rq->scx.cpu_released = false;
         }
   
@@@ -4277,9 -4623,52 +4636,52 @@@ bool task_should_scx(int policy
                 return false;
         if (READ_ONCE(scx_switching_all))
                 return true;
- -      return p->policy == SCHED_EXT;
+ +      return policy == SCHED_EXT;
   }
   
+ /**
+  * scx_softlockup - sched_ext softlockup handler
+  *
+  * On some multi-socket setups (e.g. 2x Intel 8480c), the BPF scheduler can
+  * live-lock the system by making many CPUs target the same DSQ to the point
+  * where soft-lockup detection triggers. This function is called from
+  * soft-lockup watchdog when the triggering point is close and tries to unjam
+  * the system by enabling the breather and aborting the BPF scheduler.
+  */
+ void scx_softlockup(u32 dur_s)
+ {
+       switch (scx_ops_enable_state()) {
+       case SCX_OPS_ENABLING:
+       case SCX_OPS_ENABLED:
+               break;
+       default:
+               return;
+       }
+ 
+       /* allow only one instance, cleared at the end of scx_ops_bypass() */
+       if (test_and_set_bit(0, &scx_in_softlockup))
+               return;
+ 
+       printk_deferred(KERN_ERR "sched_ext: Soft lockup - CPU%d stuck for %us, disabling \"%s\"\n",
+                       smp_processor_id(), dur_s, scx_ops.name);
+ 
+       /*
+        * Some CPUs may be trapped in the dispatch paths. Enable breather
+        * immediately; otherwise, we might even be able to get to
+        * scx_ops_bypass().
+        */
+       atomic_inc(&scx_ops_breather_depth);
+ 
+       scx_ops_error("soft lockup - CPU#%d stuck for %us",
+                     smp_processor_id(), dur_s);
+ }
+ 
+ static void scx_clear_softlockup(void)
+ {
+       if (test_and_clear_bit(0, &scx_in_softlockup))
+               atomic_dec(&scx_ops_breather_depth);
+ }
+ 
   /**
    * scx_ops_bypass - [Un]bypass scx_ops and guarantee forward progress
    *
author	Linus Torvalds <[email protected]>
	Wed, 20 Nov 2024 18:08:00 +0000 (10:08 -0800)
committer	Linus Torvalds <[email protected]>
	Wed, 20 Nov 2024 18:08:00 +0000 (10:08 -0800)
		1	2
include/linux/sched/ext.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched/ext.c	patch \|	diff1 \|	diff2 \|	blob \| history