From: Paul E. McKenney Date: Wed, 11 May 2022 18:49:35 +0000 (-0700) Subject: Merge branch 'exp.2022.05.11a' into HEAD X-Git-Url: https://repo.jachan.dev/J-linux.git/commitdiff_plain/ce13389053a347aa9f8ffbfda2238352536e15c9?hp=-c Merge branch 'exp.2022.05.11a' into HEAD exp.2022.05.11a: Expedited-grace-period latency-reduction updates. --- ce13389053a347aa9f8ffbfda2238352536e15c9 diff --combined Documentation/admin-guide/kernel-parameters.txt index 82dd253e5dbd,5e21a3fb57c4..f8d9af5d51e5 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@@ -4893,6 -4893,18 +4893,18 @@@ rcupdate.rcu_cpu_stall_timeout= [KNL] Set timeout for RCU CPU stall warning messages. + The value is in seconds and the maximum allowed + value is 300 seconds. + + rcupdate.rcu_exp_cpu_stall_timeout= [KNL] + Set timeout for expedited RCU CPU stall warning + messages. The value is in milliseconds + and the maximum allowed value is 21000 + milliseconds. Please note that this value is + adjusted to an arch timer tick resolution. + Setting this to zero causes the value from + rcupdate.rcu_cpu_stall_timeout to be used (after + conversion from seconds to milliseconds). rcupdate.rcu_expedited= [KNL] Use expedited grace-period primitives, for @@@ -4955,34 -4967,10 +4967,34 @@@ number avoids disturbing real-time workloads, but lengthens grace periods. + rcupdate.rcu_task_stall_info= [KNL] + Set initial timeout in jiffies for RCU task stall + informational messages, which give some indication + of the problem for those not patient enough to + wait for ten minutes. Informational messages are + only printed prior to the stall-warning message + for a given grace period. Disable with a value + less than or equal to zero. Defaults to ten + seconds. A change in value does not take effect + until the beginning of the next grace period. + + rcupdate.rcu_task_stall_info_mult= [KNL] + Multiplier for time interval between successive + RCU task stall informational messages for a given + RCU tasks grace period. This value is clamped + to one through ten, inclusive. It defaults to + the value three, so that the first informational + message is printed 10 seconds into the grace + period, the second at 40 seconds, the third at + 160 seconds, and then the stall warning at 600 + seconds would prevent a fourth at 640 seconds. + rcupdate.rcu_task_stall_timeout= [KNL] - Set timeout in jiffies for RCU task stall warning - messages. Disable with a value less than or equal - to zero. + Set timeout in jiffies for RCU task stall + warning messages. Disable with a value less + than or equal to zero. Defaults to ten minutes. + A change in value does not take effect until + the beginning of the next grace period. rcupdate.rcu_self_test= [KNL] Run the RCU early boot self tests @@@ -5401,17 -5389,6 +5413,17 @@@ smart2= [HW] Format: [,[,...,]] + smp.csd_lock_timeout= [KNL] + Specify the period of time in milliseconds + that smp_call_function() and friends will wait + for a CPU to release the CSD lock. This is + useful when diagnosing bugs involving CPUs + disabling interrupts for extended periods + of time. Defaults to 5,000 milliseconds, and + setting a value of zero disables this feature. + This feature may be more efficiently disabled + using the csdlock_debug- kernel parameter. + smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices smsc-ircc2.ircc_cfg= [HW] Device configuration I/O port smsc-ircc2.ircc_sir= [HW] SIR base I/O port @@@ -5643,30 -5620,6 +5655,30 @@@ off: Disable mitigation and remove performance impact to RDRAND and RDSEED + srcutree.big_cpu_lim [KNL] + Specifies the number of CPUs constituting a + large system, such that srcu_struct structures + should immediately allocate an srcu_node array. + This kernel-boot parameter defaults to 128, + but takes effect only when the low-order four + bits of srcutree.convert_to_big is equal to 3 + (decide at boot). + + srcutree.convert_to_big [KNL] + Specifies under what conditions an SRCU tree + srcu_struct structure will be converted to big + form, that is, with an rcu_node tree: + + 0: Never. + 1: At init_srcu_struct() time. + 2: When rcutorture decides to. + 3: Decide at boot time (default). + 0x1X: Above plus if high contention. + + Either way, the srcu_node tree will be sized based + on the actual runtime number of CPUs (nr_cpu_ids) + instead of the compile-time CONFIG_NR_CPUS. + srcutree.counter_wrap_check [KNL] Specifies how frequently to check for grace-period sequence counter wrap for the @@@ -5684,14 -5637,6 +5696,14 @@@ expediting. Set to zero to disable automatic expediting. + srcutree.small_contention_lim [KNL] + Specifies the number of update-side contention + events per jiffy will be tolerated before + initiating a conversion of an srcu_struct + structure to big form. Note that the value of + srcutree.convert_to_big must have the 0x10 bit + set for contention-based conversions to occur. + ssbd= [ARM64,HW] Speculative Store Bypass Disable control diff --combined kernel/rcu/Kconfig index 65d45c00fd1b,fd64a75823cb..1c630e573548 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@@ -77,56 -77,31 +77,56 @@@ config TASKS_RCU_GENERI This option enables generic infrastructure code supporting task-based RCU implementations. Not for manual selection. +config FORCE_TASKS_RCU + bool "Force selection of TASKS_RCU" + depends on RCU_EXPERT + select TASKS_RCU + default n + help + This option force-enables a task-based RCU implementation + that uses only voluntary context switch (not preemption!), + idle, and user-mode execution as quiescent states. Not for + manual selection in most cases. + config TASKS_RCU - def_bool PREEMPTION + bool + default n + select IRQ_WORK + +config FORCE_TASKS_RUDE_RCU + bool "Force selection of Tasks Rude RCU" + depends on RCU_EXPERT + select TASKS_RUDE_RCU + default n help - This option enables a task-based RCU implementation that uses - only voluntary context switch (not preemption!), idle, and - user-mode execution as quiescent states. Not for manual selection. + This option force-enables a task-based RCU implementation + that uses only context switch (including preemption) and + user-mode execution as quiescent states. It forces IPIs and + context switches on all online CPUs, including idle ones, + so use with caution. Not for manual selection in most cases. config TASKS_RUDE_RCU - def_bool 0 + bool + default n + select IRQ_WORK + +config FORCE_TASKS_TRACE_RCU + bool "Force selection of Tasks Trace RCU" + depends on RCU_EXPERT + select TASKS_TRACE_RCU + default n help This option enables a task-based RCU implementation that uses - only context switch (including preemption) and user-mode - execution as quiescent states. It forces IPIs and context - switches on all online CPUs, including idle ones, so use - with caution. + explicit rcu_read_lock_trace() read-side markers, and allows + these readers to appear in the idle loop as well as on the + CPU hotplug code paths. It can force IPIs on online CPUs, + including idle ones, so use with caution. Not for manual + selection in most cases. config TASKS_TRACE_RCU - def_bool 0 + bool + default n select IRQ_WORK - help - This option enables a task-based RCU implementation that uses - explicit rcu_read_lock_trace() read-side markers, and allows - these readers to appear in the idle loop as well as on the CPU - hotplug code paths. It can force IPIs on online CPUs, including - idle ones, so use with caution. config RCU_STALL_COMMON def_bool TREE_RCU @@@ -219,6 -194,20 +219,20 @@@ config RCU_BOOST_DELA blocking an expedited RCU grace period is boosted immediately. Accept the default if unsure. + + config RCU_EXP_KTHREAD + bool "Perform RCU expedited work in a real-time kthread" + depends on RCU_BOOST && RCU_EXPERT + default !PREEMPT_RT && NR_CPUS <= 32 + help + Use this option to further reduce the latencies of expedited + grace periods at the expense of being more disruptive. + + This option is disabled by default on PREEMPT_RT=y kernels which + disable expedited grace periods after boot by unconditionally + setting rcupdate.rcu_normal_after_boot=1. + + Accept the default if unsure. config RCU_NOCB_CPU bool "Offload RCU callback processing from boot-selected CPUs" @@@ -250,7 -239,7 +264,7 @@@ config TASKS_TRACE_RCU_READ_MB bool "Tasks Trace RCU readers use memory barriers in user and idle" - depends on RCU_EXPERT + depends on RCU_EXPERT && TASKS_TRACE_RCU default PREEMPT_RT || NR_CPUS < 8 help Use this option to further reduce the number of IPIs sent diff --combined kernel/rcu/Kconfig.debug index 68092e1db64b,0b397b5bf846..9b64e55d4f61 --- a/kernel/rcu/Kconfig.debug +++ b/kernel/rcu/Kconfig.debug @@@ -28,6 -28,9 +28,6 @@@ config RCU_SCALE_TES depends on DEBUG_KERNEL select TORTURE_TEST select SRCU - select TASKS_RCU - select TASKS_RUDE_RCU - select TASKS_TRACE_RCU default n help This option provides a kernel module that runs performance @@@ -44,6 -47,9 +44,6 @@@ config RCU_TORTURE_TES depends on DEBUG_KERNEL select TORTURE_TEST select SRCU - select TASKS_RCU - select TASKS_RUDE_RCU - select TASKS_TRACE_RCU default n help This option provides a kernel module that runs torture tests @@@ -60,6 -66,9 +60,6 @@@ config RCU_REF_SCALE_TES depends on DEBUG_KERNEL select TORTURE_TEST select SRCU - select TASKS_RCU - select TASKS_RUDE_RCU - select TASKS_TRACE_RCU default n help This option provides a kernel module that runs performance tests @@@ -82,6 -91,20 +82,20 @@@ config RCU_CPU_STALL_TIMEOU RCU grace period persists, additional CPU stall warnings are printed at more widely spaced intervals. + config RCU_EXP_CPU_STALL_TIMEOUT + int "Expedited RCU CPU stall timeout in milliseconds" + depends on RCU_STALL_COMMON + range 0 21000 + default 20 if ANDROID + default 0 if !ANDROID + help + If a given expedited RCU grace period extends more than the + specified number of milliseconds, a CPU stall warning is printed. + If the RCU grace period persists, additional CPU stall warnings + are printed at more widely spaced intervals. A value of zero + says to use the RCU_CPU_STALL_TIMEOUT value converted from + seconds to milliseconds. + config RCU_TRACE bool "Enable tracing for RCU" depends on DEBUG_KERNEL diff --combined kernel/rcu/rcu.h index a985dfc2ce26,e27bf7d1e3a4..152492d52715 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@@ -210,7 -210,9 +210,9 @@@ static inline bool rcu_stall_is_suppres extern int rcu_cpu_stall_ftrace_dump; extern int rcu_cpu_stall_suppress; extern int rcu_cpu_stall_timeout; + extern int rcu_exp_cpu_stall_timeout; int rcu_jiffies_till_stall_check(void); + int rcu_exp_jiffies_till_stall_check(void); static inline bool rcu_stall_is_suppressed(void) { @@@ -523,8 -525,6 +525,8 @@@ static inline bool rcu_check_boost_fail static inline void show_rcu_gp_kthreads(void) { } static inline int rcu_get_gp_kthreads_prio(void) { return 0; } static inline void rcu_fwd_progress_check(unsigned long j) { } +static inline void rcu_gp_slow_register(atomic_t *rgssp) { } +static inline void rcu_gp_slow_unregister(atomic_t *rgssp) { } #else /* #ifdef CONFIG_TINY_RCU */ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp); unsigned long rcu_get_gp_seq(void); @@@ -536,14 -536,19 +538,19 @@@ int rcu_get_gp_kthreads_prio(void) void rcu_fwd_progress_check(unsigned long j); void rcu_force_quiescent_state(void); extern struct workqueue_struct *rcu_gp_wq; + #ifdef CONFIG_RCU_EXP_KTHREAD + extern struct kthread_worker *rcu_exp_gp_kworker; + extern struct kthread_worker *rcu_exp_par_gp_kworker; + #else /* !CONFIG_RCU_EXP_KTHREAD */ extern struct workqueue_struct *rcu_par_gp_wq; + #endif /* CONFIG_RCU_EXP_KTHREAD */ +void rcu_gp_slow_register(atomic_t *rgssp); +void rcu_gp_slow_unregister(atomic_t *rgssp); #endif /* #else #ifdef CONFIG_TINY_RCU */ #ifdef CONFIG_RCU_NOCB_CPU -bool rcu_is_nocb_cpu(int cpu); void rcu_bind_current_to_nocb(void); #else -static inline bool rcu_is_nocb_cpu(int cpu) { return false; } static inline void rcu_bind_current_to_nocb(void) { } #endif diff --combined kernel/rcu/tree.c index 75a35b7adbfa,763e45fdf49b..c25ba442044a --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@@ -1679,8 -1679,6 +1679,8 @@@ static bool __note_gp_changes(struct rc rdp->gp_seq = rnp->gp_seq; /* Remember new grace-period state. */ if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap) WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed); + if (IS_ENABLED(CONFIG_PROVE_RCU) && READ_ONCE(rdp->gpwrap)) + WRITE_ONCE(rdp->last_sched_clock, jiffies); WRITE_ONCE(rdp->gpwrap, false); rcu_gpnum_ovf(rnp, rdp); return ret; @@@ -1707,37 -1705,11 +1707,37 @@@ static void note_gp_changes(struct rcu_ rcu_gp_kthread_wake(); } +static atomic_t *rcu_gp_slow_suppress; + +/* Register a counter to suppress debugging grace-period delays. */ +void rcu_gp_slow_register(atomic_t *rgssp) +{ + WARN_ON_ONCE(rcu_gp_slow_suppress); + + WRITE_ONCE(rcu_gp_slow_suppress, rgssp); +} +EXPORT_SYMBOL_GPL(rcu_gp_slow_register); + +/* Unregister a counter, with NULL for not caring which. */ +void rcu_gp_slow_unregister(atomic_t *rgssp) +{ + WARN_ON_ONCE(rgssp && rgssp != rcu_gp_slow_suppress); + + WRITE_ONCE(rcu_gp_slow_suppress, NULL); +} +EXPORT_SYMBOL_GPL(rcu_gp_slow_unregister); + +static bool rcu_gp_slow_is_suppressed(void) +{ + atomic_t *rgssp = READ_ONCE(rcu_gp_slow_suppress); + + return rgssp && atomic_read(rgssp); +} + static void rcu_gp_slow(int delay) { - if (delay > 0 && - !(rcu_seq_ctr(rcu_state.gp_seq) % - (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay))) + if (!rcu_gp_slow_is_suppressed() && delay > 0 && + !(rcu_seq_ctr(rcu_state.gp_seq) % (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay))) schedule_timeout_idle(delay); } @@@ -2124,29 -2096,14 +2124,29 @@@ static noinline void rcu_gp_cleanup(voi /* Advance CBs to reduce false positives below. */ offloaded = rcu_rdp_is_offloaded(rdp); if ((offloaded || !rcu_accelerate_cbs(rnp, rdp)) && needgp) { + + // We get here if a grace period was needed (“needgp”) + // and the above call to rcu_accelerate_cbs() did not set + // the RCU_GP_FLAG_INIT bit in ->gp_state (which records + // the need for another grace period).  The purpose + // of the “offloaded” check is to avoid invoking + // rcu_accelerate_cbs() on an offloaded CPU because we do not + // hold the ->nocb_lock needed to safely access an offloaded + // ->cblist.  We do not want to acquire that lock because + // it can be heavily contended during callback floods. + WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT); WRITE_ONCE(rcu_state.gp_req_activity, jiffies); - trace_rcu_grace_period(rcu_state.name, - rcu_state.gp_seq, - TPS("newreq")); + trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("newreq")); } else { - WRITE_ONCE(rcu_state.gp_flags, - rcu_state.gp_flags & RCU_GP_FLAG_INIT); + + // We get here either if there is no need for an + // additional grace period or if rcu_accelerate_cbs() has + // already set the RCU_GP_FLAG_INIT bit in ->gp_flags.  + // So all we need to do is to clear all of the other + // ->gp_flags bits. + + WRITE_ONCE(rcu_state.gp_flags, rcu_state.gp_flags & RCU_GP_FLAG_INIT); } raw_spin_unlock_irq_rcu_node(rnp); @@@ -2652,13 -2609,6 +2652,13 @@@ static void rcu_do_batch(struct rcu_dat */ void rcu_sched_clock_irq(int user) { + unsigned long j; + + if (IS_ENABLED(CONFIG_PROVE_RCU)) { + j = jiffies; + WARN_ON_ONCE(time_before(j, __this_cpu_read(rcu_data.last_sched_clock))); + __this_cpu_write(rcu_data.last_sched_clock, j); + } trace_rcu_utilization(TPS("Start scheduler-tick")); lockdep_assert_irqs_disabled(); raw_cpu_inc(rcu_data.ticks_this_gp); @@@ -2674,8 -2624,6 +2674,8 @@@ rcu_flavor_sched_clock_irq(user); if (rcu_pending(user)) invoke_rcu_core(); + if (user) + rcu_tasks_classic_qs(current, false); lockdep_assert_irqs_disabled(); trace_rcu_utilization(TPS("End scheduler-tick")); @@@ -3769,9 -3717,7 +3769,9 @@@ static int rcu_blocking_is_gp(void { int ret; - if (IS_ENABLED(CONFIG_PREEMPTION)) + // Invoking preempt_model_*() too early gets a splat. + if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE || + preempt_model_full() || preempt_model_rt()) return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE; might_sleep(); /* Check for RCU read-side critical section. */ preempt_disable(); @@@ -4233,7 -4179,6 +4233,7 @@@ rcu_boot_init_percpu_data(int cpu rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED; rdp->rcu_onl_gp_seq = rcu_state.gp_seq; rdp->rcu_onl_gp_flags = RCU_GP_CLEANED; + rdp->last_sched_clock = jiffies; rdp->cpu = cpu; rcu_boot_init_nocb_percpu_data(rdp); } @@@ -4526,6 -4471,51 +4526,51 @@@ static int rcu_pm_notify(struct notifie return NOTIFY_OK; } + #ifdef CONFIG_RCU_EXP_KTHREAD + struct kthread_worker *rcu_exp_gp_kworker; + struct kthread_worker *rcu_exp_par_gp_kworker; + + static void __init rcu_start_exp_gp_kworkers(void) + { + const char *par_gp_kworker_name = "rcu_exp_par_gp_kthread_worker"; + const char *gp_kworker_name = "rcu_exp_gp_kthread_worker"; + struct sched_param param = { .sched_priority = kthread_prio }; + + rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name); + if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) { + pr_err("Failed to create %s!\n", gp_kworker_name); + return; + } + + rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name); + if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) { + pr_err("Failed to create %s!\n", par_gp_kworker_name); + kthread_destroy_worker(rcu_exp_gp_kworker); + return; + } + + sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, ¶m); + sched_setscheduler_nocheck(rcu_exp_par_gp_kworker->task, SCHED_FIFO, + ¶m); + } + + static inline void rcu_alloc_par_gp_wq(void) + { + } + #else /* !CONFIG_RCU_EXP_KTHREAD */ + struct workqueue_struct *rcu_par_gp_wq; + + static void __init rcu_start_exp_gp_kworkers(void) + { + } + + static inline void rcu_alloc_par_gp_wq(void) + { + rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0); + WARN_ON(!rcu_par_gp_wq); + } + #endif /* CONFIG_RCU_EXP_KTHREAD */ + /* * Spawn the kthreads that handle RCU's grace periods. */ @@@ -4535,7 -4525,6 +4580,7 @@@ static int __init rcu_spawn_gp_kthread( struct rcu_node *rnp; struct sched_param sp; struct task_struct *t; + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); rcu_scheduler_fully_active = 1; t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name); @@@ -4553,15 -4542,11 +4598,17 @@@ smp_store_release(&rcu_state.gp_kthread, t); /* ^^^ */ raw_spin_unlock_irqrestore_rcu_node(rnp, flags); wake_up_process(t); - rcu_spawn_nocb_kthreads(); - rcu_spawn_boost_kthreads(); + /* This is a pre-SMP initcall, we expect a single CPU */ + WARN_ON(num_online_cpus() > 1); + /* + * Those kthreads couldn't be created on rcu_init() -> rcutree_prepare_cpu() + * due to rcu_scheduler_fully_active. + */ + rcu_spawn_cpu_nocb_kthread(smp_processor_id()); + rcu_spawn_one_boost_kthread(rdp->mynode); rcu_spawn_core_kthreads(); + /* Create kthread worker for expedited GPs */ + rcu_start_exp_gp_kworkers(); return 0; } early_initcall(rcu_spawn_gp_kthread); @@@ -4807,7 -4792,6 +4854,6 @@@ static void __init rcu_dump_rcu_node_tr } struct workqueue_struct *rcu_gp_wq; - struct workqueue_struct *rcu_par_gp_wq; static void __init kfree_rcu_batch_init(void) { @@@ -4844,7 -4828,7 +4890,7 @@@ void __init rcu_init(void) { - int cpu; + int cpu = smp_processor_id(); rcu_early_boot_tests(); @@@ -4864,16 -4848,16 +4910,15 @@@ * or the scheduler are operational. */ pm_notifier(rcu_pm_notify, 0); - for_each_online_cpu(cpu) { - rcutree_prepare_cpu(cpu); - rcu_cpu_starting(cpu); - rcutree_online_cpu(cpu); - } + WARN_ON(num_online_cpus() > 1); // Only one CPU this early in boot. + rcutree_prepare_cpu(cpu); + rcu_cpu_starting(cpu); + rcutree_online_cpu(cpu); /* Create workqueue for Tree SRCU and for expedited GPs. */ rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0); WARN_ON(!rcu_gp_wq); - rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0); - WARN_ON(!rcu_par_gp_wq); + rcu_alloc_par_gp_wq(); /* Fill in default value for rcutree.qovld boot parameter. */ /* -After- the rcu_node ->lock fields are initialized! */ diff --combined kernel/rcu/tree.h index 8aa5bf74e796,b577cdfdc851..2ccf5845957d --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@@ -10,6 -10,7 +10,7 @@@ */ #include + #include #include #include #include @@@ -23,7 -24,11 +24,11 @@@ /* Communicate arguments to a workqueue handler. */ struct rcu_exp_work { unsigned long rew_s; + #ifdef CONFIG_RCU_EXP_KTHREAD + struct kthread_work rew_work; + #else struct work_struct rew_work; + #endif /* CONFIG_RCU_EXP_KTHREAD */ }; /* RCU's kthread states for tracing. */ @@@ -254,7 -259,6 +259,7 @@@ struct rcu_data unsigned long rcu_onl_gp_seq; /* ->gp_seq at last online. */ short rcu_onl_gp_flags; /* ->gp_flags at last online. */ unsigned long last_fqs_resched; /* Time of last rcu_resched(). */ + unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */ int cpu; }; @@@ -365,7 -369,6 +370,7 @@@ struct rcu_state arch_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp; /* Synchronize offline with */ /* GP pre-initialization. */ + int nocb_is_setup; /* nocb is setup from boot */ }; /* Values for rcu_state structure's gp_flags field. */ @@@ -423,6 -426,7 +428,6 @@@ static void rcu_preempt_boost_start_gp( static bool rcu_is_callbacks_kthread(void); static void rcu_cpu_kthread_setup(unsigned int cpu); static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp); -static void __init rcu_spawn_boost_kthreads(void); static bool rcu_preempt_has_tasks(struct rcu_node *rnp); static bool rcu_preempt_need_deferred_qs(struct task_struct *t); static void rcu_preempt_deferred_qs(struct task_struct *t); @@@ -440,6 -444,7 +445,6 @@@ static int rcu_nocb_need_deferred_wakeu static bool do_nocb_deferred_wakeup(struct rcu_data *rdp); static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); static void rcu_spawn_cpu_nocb_kthread(int cpu); -static void __init rcu_spawn_nocb_kthreads(void); static void show_rcu_nocb_state(struct rcu_data *rdp); static void rcu_nocb_lock(struct rcu_data *rdp); static void rcu_nocb_unlock(struct rcu_data *rdp); diff --combined kernel/rcu/tree_stall.h index 268dd79c58e7,009d3f9305cf..a001e1e7a992 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@@ -25,6 -25,34 +25,34 @@@ int sysctl_max_rcu_stall_to_panic __rea #define RCU_STALL_MIGHT_DIV 8 #define RCU_STALL_MIGHT_MIN (2 * HZ) + int rcu_exp_jiffies_till_stall_check(void) + { + int cpu_stall_timeout = READ_ONCE(rcu_exp_cpu_stall_timeout); + int exp_stall_delay_delta = 0; + int till_stall_check; + + // Zero says to use rcu_cpu_stall_timeout, but in milliseconds. + if (!cpu_stall_timeout) + cpu_stall_timeout = jiffies_to_msecs(rcu_jiffies_till_stall_check()); + + // Limit check must be consistent with the Kconfig limits for + // CONFIG_RCU_EXP_CPU_STALL_TIMEOUT, so check the allowed range. + // The minimum clamped value is "2UL", because at least one full + // tick has to be guaranteed. + till_stall_check = clamp(msecs_to_jiffies(cpu_stall_timeout), 2UL, 21UL * HZ); + + if (cpu_stall_timeout && jiffies_to_msecs(till_stall_check) != cpu_stall_timeout) + WRITE_ONCE(rcu_exp_cpu_stall_timeout, jiffies_to_msecs(till_stall_check)); + + #ifdef CONFIG_PROVE_RCU + /* Add extra ~25% out of till_stall_check. */ + exp_stall_delay_delta = ((till_stall_check * 25) / 100) + 1; + #endif + + return till_stall_check + exp_stall_delay_delta; + } + EXPORT_SYMBOL_GPL(rcu_exp_jiffies_till_stall_check); + /* Limit-check stall timeouts specified at boottime and runtime. */ int rcu_jiffies_till_stall_check(void) { @@@ -565,9 -593,9 +593,9 @@@ static void print_other_cpu_stall(unsig for_each_possible_cpu(cpu) totqlen += rcu_get_n_cbs_cpu(cpu); - pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n", + pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu ncpus=%d)\n", smp_processor_id(), (long)(jiffies - gps), - (long)rcu_seq_current(&rcu_state.gp_seq), totqlen); + (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus); if (ndetected) { rcu_dump_cpu_stacks(); @@@ -626,9 -654,9 +654,9 @@@ static void print_cpu_stall(unsigned lo raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags); for_each_possible_cpu(cpu) totqlen += rcu_get_n_cbs_cpu(cpu); - pr_cont("\t(t=%lu jiffies g=%ld q=%lu)\n", + pr_cont("\t(t=%lu jiffies g=%ld q=%lu ncpus=%d)\n", jiffies - gps, - (long)rcu_seq_current(&rcu_state.gp_seq), totqlen); + (long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus); rcu_check_gp_kthread_expired_fqs_timer(); rcu_check_gp_kthread_starvation();