Merge branches 'rcu/fixes', 'rcu/nocb', 'rcu/torture', 'rcu/stall' and 'rcu/srcu...

author Frederic Weisbecker <[email protected]>

Fri, 15 Nov 2024 21:38:53 +0000 (22:38 +0100)

committer Frederic Weisbecker <[email protected]>

Fri, 15 Nov 2024 21:38:53 +0000 (22:38 +0100)
author Frederic Weisbecker <[email protected]>
Fri, 15 Nov 2024 21:38:53 +0000 (22:38 +0100)
committer Frederic Weisbecker <[email protected]>
Fri, 15 Nov 2024 21:38:53 +0000 (22:38 +0100)
diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst

index ca7b7cd806a16c9c98fdf04eadbe4f4997249c82..30080ff6f4062d9c11affa57cb56cc1e9afd0285 100644 (file)
--- a/Documentation/RCU/stallwarn.rst
+++ b/Documentation/RCU/stallwarn.rst
@@ -249,7 +249,7 @@ ticks this GP)" indicates that this CPU has not taken any scheduling-clock
  interrupts during the current stalled grace period.
  
  The "idle=" portion of the message prints the dyntick-idle state.
-The hex number before the first "/" is the low-order 12 bits of the
+The hex number before the first "/" is the low-order 16 bits of the
  dynticks counter, which will have an even-numbered value if the CPU
  is in dyntick-idle mode and an odd-numbered value otherwise.  The hex
  number between the two "/"s is the value of the nesting, which will be
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt

index 203ec51e41d48eab09a9b7e3f1544336f869a414..686ea876a89c7d08836490fe15bb81c8bb96cf39 100644 (file)
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5412,11 +5412,6 @@
                         Set time (jiffies) between CPU-hotplug operations,
                         or zero to disable CPU-hotplug testing.
  
-       rcutorture.read_exit= [KNL]
-                       Set the number of read-then-exit kthreads used
-                       to test the interaction of RCU updaters and
-                       task-exit processing.
-
         rcutorture.read_exit_burst= [KNL]
                         The number of times in a given read-then-exit
                         episode that a set of read-then-exit kthreads
diff --git a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst

index b6aeae3327ceb537b78fdbd86961ae670614395b..ea7fa2a8bbf0b95a7116e19a0cb6db3d3348c71a 100644 (file)
--- a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
+++ b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
@@ -315,7 +315,7 @@ To reduce its OS jitter, do at least one of the following:
         to do.
  
  Name:
-  rcuop/%d and rcuos/%d
+  rcuop/%d, rcuos/%d, and rcuog/%d
  
  Purpose:
    Offload RCU callbacks from the corresponding CPU.
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h

index 0ee270b3f5ed2fb8cb8aa5dc4d11e7a24c9675f3..fe42315f667fc5be7f2ed8eae6ea0c7193030846 100644 (file)
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -165,7 +165,6 @@ static inline bool rcu_inkernel_boot_has_ended(void) { return true; }
  static inline bool rcu_is_watching(void) { return true; }
  static inline void rcu_momentary_eqs(void) { }
  static inline void kfree_rcu_scheduler_running(void) { }
-static inline bool rcu_gp_might_be_stalled(void) { return false; }
  
  /* Avoid RCU read-side critical sections leaking across. */
  static inline void rcu_all_qs(void) { barrier(); }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h

index 90a684f94776ed8cccf53e0e35239c50729943d6..27d86d9127817e50f8d4dd79e1990d70a02435bb 100644 (file)
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -40,7 +40,6 @@ void kvfree_rcu_barrier(void);
  void rcu_barrier(void);
  void rcu_momentary_eqs(void);
  void kfree_rcu_scheduler_running(void);
-bool rcu_gp_might_be_stalled(void);
  
  struct rcu_gp_oldstate {
         unsigned long rgos_norm;
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig

index 3e079de0f5b434bd394033f08a38a00228c07432..b9b6bc55185dba365236b4f01d8aaa3596f70ecd 100644 (file)
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -249,16 +249,24 @@ config RCU_NOCB_CPU
           workloads will incur significant increases in context-switch
           rates.
  
-         This option offloads callback invocation from the set of CPUs
-         specified at boot time by the rcu_nocbs parameter.  For each
-         such CPU, a kthread ("rcuox/N") will be created to invoke
-         callbacks, where the "N" is the CPU being offloaded, and where
-         the "x" is "p" for RCU-preempt (PREEMPTION kernels) and "s" for
-         RCU-sched (!PREEMPTION kernels).  Nothing prevents this kthread
-         from running on the specified CPUs, but (1) the kthreads may be
-         preempted between each callback, and (2) affinity or cgroups can
-         be used to force the kthreads to run on whatever set of CPUs is
-         desired.
+         This option offloads callback invocation from the set of
+         CPUs specified at boot time by the rcu_nocbs parameter.
+         For each such CPU, a kthread ("rcuox/N") will be created to
+         invoke callbacks, where the "N" is the CPU being offloaded,
+         and where the "x" is "p" for RCU-preempt (PREEMPTION kernels)
+         and "s" for RCU-sched (!PREEMPTION kernels).  This option
+         also creates another kthread for each sqrt(nr_cpu_ids) CPUs
+         ("rcuog/N", where N is the first CPU in that group to come
+         online), which handles grace periods for its group.  Nothing
+         prevents these kthreads from running on the specified CPUs,
+         but (1) the kthreads may be preempted between each callback,
+         and (2) affinity or cgroups can be used to force the kthreads
+         to run on whatever set of CPUs is desired.
+
+         The sqrt(nr_cpu_ids) grouping may be overridden using the
+         rcutree.rcu_nocb_gp_stride kernel boot parameter.  This can
+         be especially helpful for smaller numbers of CPUs, where
+         sqrt(nr_cpu_ids) can be a bit of a blunt instrument.
  
           Say Y here if you need reduced OS jitter, despite added overhead.
           Say N here if you are unsure.
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h

index 2599040756369a0cb4bbd7dd66d2a37d47d7223d..fadc08ad4b7b603de81a752e9c06b5e702a74f7c 100644 (file)
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -120,7 +120,6 @@ void rcu_segcblist_inc_len(struct rcu_segcblist *rsclp);
  void rcu_segcblist_add_len(struct rcu_segcblist *rsclp, long v);
  void rcu_segcblist_init(struct rcu_segcblist *rsclp);
  void rcu_segcblist_disable(struct rcu_segcblist *rsclp);
-void rcu_segcblist_offload(struct rcu_segcblist *rsclp, bool offload);
  bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp);
  bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp);
  struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp);
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c

index 6d37596deb1f123ce855b430559faeee9fa50f33..0f3059b1b80d03ddb8884fa4352e9497f476eecf 100644 (file)
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -889,14 +889,14 @@ kfree_scale_init(void)
  
                 if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start < 2 * HZ)) {
                         pr_alert("ERROR: call_rcu() CBs are not being lazy as expected!\n");
-                       WARN_ON_ONCE(1);
-                       return -1;
+                       firsterr = -1;
+                       goto unwind;
                 }
  
                 if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start > 3 * HZ)) {
                         pr_alert("ERROR: call_rcu() CBs are being too lazy!\n");
-                       WARN_ON_ONCE(1);
-                       return -1;
+                       firsterr = -1;
+                       goto unwind;
                 }
         }
  
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c

index a313cdcb0960f3502bc5a3bb29a251d0f8a23068..2ae8a5e5e99aa37ab54110a33c12c29746bfa510 100644 (file)
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -397,6 +397,7 @@ struct rcu_torture_ops {
         int slow_gps;
         int no_pi_lock;
         int debug_objects;
+       int start_poll_irqsoff;
         const char *name;
  };
  
@@ -585,6 +586,7 @@ static struct rcu_torture_ops rcu_ops = {
         .can_boost              = IS_ENABLED(CONFIG_RCU_BOOST),
         .extendables            = RCUTORTURE_MAX_EXTEND,
         .debug_objects          = 1,
+       .start_poll_irqsoff     = 1,
         .name                   = "rcu"
  };
  
@@ -1081,8 +1083,13 @@ static bool rcu_torture_boost_failed(unsigned long gp_state, unsigned long *star
                         // At most one persisted message per boost test.
                         j = jiffies;
                         lp = READ_ONCE(last_persist);
-                       if (time_after(j, lp + mininterval) && cmpxchg(&last_persist, lp, j) == lp)
-                               pr_info("Boost inversion persisted: No QS from CPU %d\n", cpu);
+                       if (time_after(j, lp + mininterval) &&
+                           cmpxchg(&last_persist, lp, j) == lp) {
+                               if (cpu < 0)
+                                       pr_info("Boost inversion persisted: QS from all CPUs\n");
+                               else
+                                       pr_info("Boost inversion persisted: No QS from CPU %d\n", cpu);
+                       }
                         return false; // passed on a technicality
                 }
                 VERBOSE_TOROUT_STRING("rcu_torture_boost boosting failed");
@@ -1717,14 +1724,22 @@ rcu_torture_fakewriter(void *arg)
                                 cur_ops->cond_sync_exp_full(&gp_snap_full);
                                 break;
                         case RTWS_POLL_GET:
+                               if (cur_ops->start_poll_irqsoff)
+                                       local_irq_disable();
                                 gp_snap = cur_ops->start_gp_poll();
+                               if (cur_ops->start_poll_irqsoff)
+                                       local_irq_enable();
                                 while (!cur_ops->poll_gp_state(gp_snap)) {
                                         torture_hrtimeout_jiffies(torture_random(&rand) % 16,
                                                                   &rand);
                                 }
                                 break;
                         case RTWS_POLL_GET_FULL:
+                               if (cur_ops->start_poll_irqsoff)
+                                       local_irq_disable();
                                 cur_ops->start_gp_poll_full(&gp_snap_full);
+                               if (cur_ops->start_poll_irqsoff)
+                                       local_irq_enable();
                                 while (!cur_ops->poll_gp_state_full(&gp_snap_full)) {
                                         torture_hrtimeout_jiffies(torture_random(&rand) % 16,
                                                                   &rand);
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c

index 338e7c5ac44a14ae107ad5bc1d0867f5b3a271d9..aacfcc9838b374e750f81c5744f893c38655a0ac 100644 (file)
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -75,6 +75,9 @@ MODULE_PARM_DESC(scale_type, "Type of test (rcu, srcu, refcnt, rwsem, rwlock.");
  torture_param(int, verbose, 0, "Enable verbose debugging printk()s");
  torture_param(int, verbose_batched, 0, "Batch verbose debugging printk()s");
  
+// Number of seconds to extend warm-up and cool-down for multiple guest OSes
+torture_param(long, guest_os_delay, 0,
+             "Number of seconds to extend warm-up/cool-down for multiple guest OSes.");
  // Wait until there are multiple CPUs before starting test.
  torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0,
               "Holdoff time before test start (s)");
@@ -831,6 +834,18 @@ static void rcu_scale_one_reader(void)
                 cur_ops->delaysection(loops, readdelay / 1000, readdelay % 1000);
  }
  
+// Warm up cache, or, if needed run a series of rcu_scale_one_reader()
+// to allow multiple rcuscale guest OSes to collect mutually valid data.
+static void rcu_scale_warm_cool(void)
+{
+       unsigned long jdone = jiffies + (guest_os_delay > 0 ? guest_os_delay * HZ : -1);
+
+       do {
+               rcu_scale_one_reader();
+               cond_resched();
+       } while (time_before(jiffies, jdone));
+}
+
  // Reader kthread.  Repeatedly does empty RCU read-side
  // critical section, minimizing update-side interference.
  static int
@@ -859,7 +874,7 @@ repeat:
                 goto end;
  
         // Make sure that the CPU is affinitized appropriately during testing.
-       WARN_ON_ONCE(raw_smp_processor_id() != me);
+       WARN_ON_ONCE(raw_smp_processor_id() != me % nr_cpu_ids);
  
         WRITE_ONCE(rt->start_reader, 0);
         if (!atomic_dec_return(&n_started))
@@ -987,6 +1002,7 @@ static int main_func(void *arg)
                 schedule_timeout_uninterruptible(1);
  
         // Start exp readers up per experiment
+       rcu_scale_warm_cool();
         for (exp = 0; exp < nruns && !torture_must_stop(); exp++) {
                 if (torture_must_stop())
                         goto end;
@@ -1017,6 +1033,7 @@ static int main_func(void *arg)
  
                 result_avg[exp] = div_u64(1000 * process_durations(nreaders), nreaders * loops);
         }
+       rcu_scale_warm_cool();
  
         // Print the average of all experiments
         SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n");
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c

index 549c03336ee97150598a37e4c39f981ad21c98d5..4dcbf8aa80ff73e9844482356c28967d13a87e24 100644 (file)
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -122,8 +122,8 @@ void srcu_drive_gp(struct work_struct *wp)
         ssp = container_of(wp, struct srcu_struct, srcu_work);
         preempt_disable();  // Needed for PREEMPT_AUTO
         if (ssp->srcu_gp_running || ULONG_CMP_GE(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max))) {
-               return; /* Already running or nothing to do. */
                 preempt_enable();
+               return; /* Already running or nothing to do. */
         }
  
         /* Remove recently arrived callbacks and wait for readers. */
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h

index 6333f4ccf024be2cb5f8dfffc82f3ca679161415..c789d994e7ebc9eaef18a4fc201b43b9887b91ec 100644 (file)
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -1398,7 +1398,8 @@ static void call_rcu_tasks_rude(struct rcu_head *rhp, rcu_callback_t func)
   */
  void synchronize_rcu_tasks_rude(void)
  {
-       synchronize_rcu_tasks_generic(&rcu_tasks_rude);
+       if (!IS_ENABLED(CONFIG_ARCH_WANTS_NO_INSTR) || IS_ENABLED(CONFIG_FORCE_TASKS_RUDE_RCU))
+               synchronize_rcu_tasks_generic(&rcu_tasks_rude);
  }
  EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_rude);
  
@@ -1540,22 +1541,7 @@ static void rcu_st_need_qs(struct task_struct *t, u8 v)
   */
  u8 rcu_trc_cmpxchg_need_qs(struct task_struct *t, u8 old, u8 new)
  {
-       union rcu_special ret;
-       union rcu_special trs_old = READ_ONCE(t->trc_reader_special);
-       union rcu_special trs_new = trs_old;
-
-       if (trs_old.b.need_qs != old)
-               return trs_old.b.need_qs;
-       trs_new.b.need_qs = new;
-
-       // Although cmpxchg() appears to KCSAN to update all four bytes,
-       // only the .b.need_qs byte actually changes.
-       instrument_atomic_read_write(&t->trc_reader_special.b.need_qs,
-                                    sizeof(t->trc_reader_special.b.need_qs));
-       // Avoid false-positive KCSAN failures.
-       ret.s = data_race(cmpxchg(&t->trc_reader_special.s, trs_old.s, trs_new.s));
-
-       return ret.b.need_qs;
+       return cmpxchg(&t->trc_reader_special.b.need_qs, old, new);
  }
  EXPORT_SYMBOL_GPL(rcu_trc_cmpxchg_need_qs);
  
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c

index b1f883fcd9185a5e22c10102d1024c40688f57fb..ff98233d4aa59f6ad4488ee96c1ea07ce56e9935 100644 (file)
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3511,7 +3511,7 @@ static int krc_count(struct kfree_rcu_cpu *krcp)
  }
  
  static void
-schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
+__schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
  {
         long delay, delay_left;
  
@@ -3525,6 +3525,16 @@ schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
         queue_delayed_work(system_unbound_wq, &krcp->monitor_work, delay);
  }
  
+static void
+schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
+{
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&krcp->lock, flags);
+       __schedule_delayed_monitor_work(krcp);
+       raw_spin_unlock_irqrestore(&krcp->lock, flags);
+}
+
  static void
  kvfree_rcu_drain_ready(struct kfree_rcu_cpu *krcp)
  {
@@ -3836,7 +3846,7 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr)
  
         // Set timer to drain after KFREE_DRAIN_JIFFIES.
         if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING)
-               schedule_delayed_monitor_work(krcp);
+               __schedule_delayed_monitor_work(krcp);
  
  unlock_return:
         krc_this_cpu_unlock(krcp, flags);
@@ -4194,7 +4204,6 @@ static void start_poll_synchronize_rcu_common(void)
         struct rcu_data *rdp;
         struct rcu_node *rnp;
  
-       lockdep_assert_irqs_enabled();
         local_irq_save(flags);
         rdp = this_cpu_ptr(&rcu_data);
         rnp = rdp->mynode;
@@ -4219,9 +4228,6 @@ static void start_poll_synchronize_rcu_common(void)
   * grace period has elapsed in the meantime.  If the needed grace period
   * is not already slated to start, notifies RCU core of the need for that
   * grace period.
- *
- * Interrupts must be enabled for the case where it is necessary to awaken
- * the grace-period kthread.
   */
  unsigned long start_poll_synchronize_rcu(void)
  {
@@ -4242,9 +4248,6 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu);
   * grace period (whether normal or expedited) has elapsed in the meantime.
   * If the needed grace period is not already slated to start, notifies
   * RCU core of the need for that grace period.
- *
- * Interrupts must be enabled for the case where it is necessary to awaken
- * the grace-period kthread.
   */
  void start_poll_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
  {
@@ -5580,8 +5583,7 @@ void rcu_init_geometry(void)
          * Complain and fall back to the compile-time values if this
          * limit is exceeded.
          */
-       if (rcu_fanout_leaf < 2 ||
-           rcu_fanout_leaf > sizeof(unsigned long) * 8) {
+       if (rcu_fanout_leaf < 2 || rcu_fanout_leaf > BITS_PER_LONG) {
                 rcu_fanout_leaf = RCU_FANOUT_LEAF;
                 WARN_ON(1);
                 return;
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h

index 16865475120ba38c741aae897a3dc8d99f95d0ee..2605dd234a13c8aec5b74ebf5e006005ce98ea0f 100644 (file)
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -891,7 +891,18 @@ static void nocb_cb_wait(struct rcu_data *rdp)
         swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
                                             nocb_cb_wait_cond(rdp));
         if (kthread_should_park()) {
-               kthread_parkme();
+               /*
+                * kthread_park() must be preceded by an rcu_barrier().
+                * But yet another rcu_barrier() might have sneaked in between
+                * the barrier callback execution and the callbacks counter
+                * decrement.
+                */
+               if (rdp->nocb_cb_sleep) {
+                       rcu_nocb_lock_irqsave(rdp, flags);
+                       WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist));
+                       rcu_nocb_unlock_irqrestore(rdp, flags);
+                       kthread_parkme();
+               }
         } else if (READ_ONCE(rdp->nocb_cb_sleep)) {
                 WARN_ON(signal_pending(current));
                 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h

index 1c7cbd145d5e3766c1cfd67e99ffb8a69ce8adc5..3927ea5f7955c0838359c9cc2fde7515b4d7113c 100644 (file)
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -183,9 +183,9 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
         switch (blkd_state) {
         case 0:
         case                RCU_EXP_TASKS:
-       case                RCU_EXP_TASKS + RCU_GP_BLKD:
+       case                RCU_EXP_TASKS | RCU_GP_BLKD:
         case RCU_GP_TASKS:
-       case RCU_GP_TASKS + RCU_EXP_TASKS:
+       case RCU_GP_TASKS | RCU_EXP_TASKS:
  
                 /*
                  * Blocking neither GP, or first task blocking the normal
@@ -198,10 +198,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
  
         case                                              RCU_EXP_BLKD:
         case                                RCU_GP_BLKD:
-       case                                RCU_GP_BLKD + RCU_EXP_BLKD:
-       case RCU_GP_TASKS +                               RCU_EXP_BLKD:
-       case RCU_GP_TASKS +                 RCU_GP_BLKD + RCU_EXP_BLKD:
-       case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
+       case                                RCU_GP_BLKD | RCU_EXP_BLKD:
+       case RCU_GP_TASKS |                               RCU_EXP_BLKD:
+       case RCU_GP_TASKS |                 RCU_GP_BLKD | RCU_EXP_BLKD:
+       case RCU_GP_TASKS | RCU_EXP_TASKS | RCU_GP_BLKD | RCU_EXP_BLKD:
  
                 /*
                  * First task arriving that blocks either GP, or first task
@@ -214,9 +214,9 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
                 list_add_tail(&t->rcu_node_entry, &rnp->blkd_tasks);
                 break;
  
-       case                RCU_EXP_TASKS +               RCU_EXP_BLKD:
-       case                RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
-       case RCU_GP_TASKS + RCU_EXP_TASKS +               RCU_EXP_BLKD:
+       case                RCU_EXP_TASKS |               RCU_EXP_BLKD:
+       case                RCU_EXP_TASKS | RCU_GP_BLKD | RCU_EXP_BLKD:
+       case RCU_GP_TASKS | RCU_EXP_TASKS |               RCU_EXP_BLKD:
  
                 /*
                  * Second or subsequent task blocking the expedited GP.
@@ -227,8 +227,8 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
                 list_add(&t->rcu_node_entry, rnp->exp_tasks);
                 break;
  
-       case RCU_GP_TASKS +                 RCU_GP_BLKD:
-       case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD:
+       case RCU_GP_TASKS |                 RCU_GP_BLKD:
+       case RCU_GP_TASKS | RCU_EXP_TASKS | RCU_GP_BLKD:
  
                 /*
                  * Second or subsequent task blocking the normal GP.
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h

index 4432db6d0b99b3bfd79194abfe3be478e8929f59..925fcdad5dea22cfc8b0648546b78870cee485a6 100644 (file)
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -76,36 +76,6 @@ int rcu_jiffies_till_stall_check(void)
  }
  EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check);
  
-/**
- * rcu_gp_might_be_stalled - Is it likely that the grace period is stalled?
- *
- * Returns @true if the current grace period is sufficiently old that
- * it is reasonable to assume that it might be stalled.  This can be
- * useful when deciding whether to allocate memory to enable RCU-mediated
- * freeing on the one hand or just invoking synchronize_rcu() on the other.
- * The latter is preferable when the grace period is stalled.
- *
- * Note that sampling of the .gp_start and .gp_seq fields must be done
- * carefully to avoid false positives at the beginnings and ends of
- * grace periods.
- */
-bool rcu_gp_might_be_stalled(void)
-{
-       unsigned long d = rcu_jiffies_till_stall_check() / RCU_STALL_MIGHT_DIV;
-       unsigned long j = jiffies;
-
-       if (d < RCU_STALL_MIGHT_MIN)
-               d = RCU_STALL_MIGHT_MIN;
-       smp_mb(); // jiffies before .gp_seq to avoid false positives.
-       if (!rcu_gp_in_progress())
-               return false;
-       // Long delays at this point avoids false positive, but a delay
-       // of ULONG_MAX/4 jiffies voids your no-false-positive warranty.
-       smp_mb(); // .gp_seq before second .gp_start
-       // And ditto here.
-       return !time_before(j, READ_ONCE(rcu_state.gp_start) + d);
-}
-
  /* Don't do RCU CPU stall warnings during long sysrq printouts. */
  void rcu_sysrq_start(void)
  {
@@ -365,7 +335,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
   * that don't support NMI-based stack dumps.  The NMI-triggered stack
   * traces are more accurate because they are printed by the target CPU.
   */
-static void rcu_dump_cpu_stacks(void)
+static void rcu_dump_cpu_stacks(unsigned long gp_seq)
  {
         int cpu;
         unsigned long flags;
@@ -373,15 +343,23 @@ static void rcu_dump_cpu_stacks(void)
  
         rcu_for_each_leaf_node(rnp) {
                 printk_deferred_enter();
-               raw_spin_lock_irqsave_rcu_node(rnp, flags);
-               for_each_leaf_node_possible_cpu(rnp, cpu)
+               for_each_leaf_node_possible_cpu(rnp, cpu) {
+                       if (gp_seq != data_race(rcu_state.gp_seq)) {
+                               printk_deferred_exit();
+                               pr_err("INFO: Stall ended during stack backtracing.\n");
+                               return;
+                       }
+                       if (!(data_race(rnp->qsmask) & leaf_node_cpu_bit(rnp, cpu)))
+                               continue;
+                       raw_spin_lock_irqsave_rcu_node(rnp, flags);
                         if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
                                 if (cpu_is_offline(cpu))
                                         pr_err("Offline CPU %d blocking current GP.\n", cpu);
                                 else
                                         dump_cpu_task(cpu);
                         }
-               raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+                       raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+               }
                 printk_deferred_exit();
         }
  }
@@ -638,7 +616,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
                (long)rcu_seq_current(&rcu_state.gp_seq), totqlen,
                data_race(rcu_state.n_online_cpus)); // Diagnostic read
         if (ndetected) {
-               rcu_dump_cpu_stacks();
+               rcu_dump_cpu_stacks(gp_seq);
  
                 /* Complain about tasks blocking the grace period. */
                 rcu_for_each_leaf_node(rnp)
@@ -670,7 +648,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
         rcu_force_quiescent_state();  /* Kick them all. */
  }
  
-static void print_cpu_stall(unsigned long gps)
+static void print_cpu_stall(unsigned long gp_seq, unsigned long gps)
  {
         int cpu;
         unsigned long flags;
@@ -707,7 +685,7 @@ static void print_cpu_stall(unsigned long gps)
         rcu_check_gp_kthread_expired_fqs_timer();
         rcu_check_gp_kthread_starvation();
  
-       rcu_dump_cpu_stacks();
+       rcu_dump_cpu_stacks(gp_seq);
  
         raw_spin_lock_irqsave_rcu_node(rnp, flags);
         /* Rewrite if needed in case of slow consoles. */
@@ -789,7 +767,8 @@ static void check_cpu_stall(struct rcu_data *rdp)
         gs2 = READ_ONCE(rcu_state.gp_seq);
         if (gs1 != gs2 ||
             ULONG_CMP_LT(j, js) ||
-           ULONG_CMP_GE(gps, js))
+           ULONG_CMP_GE(gps, js) ||
+           !rcu_seq_state(gs2))
                 return; /* No stall or GP completed since entering function. */
         rnp = rdp->mynode;
         jn = jiffies + ULONG_MAX / 2;
@@ -810,7 +789,7 @@ static void check_cpu_stall(struct rcu_data *rdp)
                         pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
                 } else if (self_detected) {
                         /* We haven't checked in, so go dump stack. */
-                       print_cpu_stall(gps);
+                       print_cpu_stall(gs2, gps);
                 } else {
                         /* They had a few time units to dump stack, so complain. */
                         print_other_cpu_stall(gs2, gps);
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh

index c3808c490d92dd0b044c2fab06f61492424c2826..f87046b702d88e6819bf6b2368b7d5b2a09b1d66 100755 (executable)
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
@@ -56,27 +56,30 @@ do
         echo > $i/kvm-test-1-run-qemu.sh.out
         export TORTURE_AFFINITY=
         kvm-get-cpus-script.sh $T/cpuarray.awk $T/cpubatches.awk $T/cpustate
-       cat << '        ___EOF___' >> $T/cpubatches.awk
-       END {
-               affinitylist = "";
-               if (!gotcpus()) {
-                       print "echo No CPU-affinity information, so no taskset command.";
-               } else if (cpu_count !~ /^[0-9][0-9]*$/) {
-                       print "echo " scenario ": Bogus number of CPUs (old qemu-cmd?), so no taskset command.";
-               } else {
-                       affinitylist = nextcpus(cpu_count);
-                       if (!(affinitylist ~ /^[0-9,-][0-9,-]*$/))
-                               print "echo " scenario ": Bogus CPU-affinity information, so no taskset command.";
-                       else if (!dumpcpustate())
-                               print "echo " scenario ": Could not dump state, so no taskset command.";
-                       else
-                               print "export TORTURE_AFFINITY=" affinitylist;
+       if test -z "${TORTURE_NO_AFFINITY}"
+       then
+               cat << '                ___EOF___' >> $T/cpubatches.awk
+               END {
+                       affinitylist = "";
+                       if (!gotcpus()) {
+                               print "echo No CPU-affinity information, so no taskset command.";
+                       } else if (cpu_count !~ /^[0-9][0-9]*$/) {
+                               print "echo " scenario ": Bogus number of CPUs (old qemu-cmd?), so no taskset command.";
+                       } else {
+                               affinitylist = nextcpus(cpu_count);
+                               if (!(affinitylist ~ /^[0-9,-][0-9,-]*$/))
+                                       print "echo " scenario ": Bogus CPU-affinity information, so no taskset command.";
+                               else if (!dumpcpustate())
+                                       print "echo " scenario ": Could not dump state, so no taskset command.";
+                               else
+                                       print "export TORTURE_AFFINITY=" affinitylist;
+                       }
                 }
-       }
-       ___EOF___
-       cpu_count="`grep '# TORTURE_CPU_COUNT=' $i/qemu-cmd | sed -e 's/^.*=//'`"
-       affinity_export="`awk -f $T/cpubatches.awk -v cpu_count="$cpu_count" -v scenario=$i < /dev/null`"
-       $affinity_export
+               ___EOF___
+               cpu_count="`grep '# TORTURE_CPU_COUNT=' $i/qemu-cmd | sed -e 's/^.*=//'`"
+               affinity_export="`awk -f $T/cpubatches.awk -v cpu_count="$cpu_count" -v scenario=$i < /dev/null`"
+               $affinity_export
+       fi
         kvm-test-1-run-qemu.sh $i >> $i/kvm-test-1-run-qemu.sh.out 2>&1 &
  done
  for i in $runfiles
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh

index 7af73ddc148d1dfa95ac8e01d2f4ea087dfe7160..42e5e8597a1a6e1aca0a7c3b9c94a34155daf435 100755 (executable)
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -42,6 +42,7 @@ TORTURE_JITTER_STOP=""
  TORTURE_KCONFIG_KASAN_ARG=""
  TORTURE_KCONFIG_KCSAN_ARG=""
  TORTURE_KMAKE_ARG=""
+TORTURE_NO_AFFINITY=""
  TORTURE_QEMU_MEM=512
  torture_qemu_mem_default=1
  TORTURE_REMOTE=
@@ -82,6 +83,7 @@ usage () {
         echo "       --kmake-arg kernel-make-arguments"
         echo "       --mac nn:nn:nn:nn:nn:nn"
         echo "       --memory megabytes|nnnG"
+       echo "       --no-affinity"
         echo "       --no-initrd"
         echo "       --qemu-args qemu-arguments"
         echo "       --qemu-cmd qemu-system-..."
@@ -220,6 +222,9 @@ do
                 torture_qemu_mem_default=
                 shift
                 ;;
+       --no-affinity)
+               TORTURE_NO_AFFINITY="no-affinity"
+               ;;
         --no-initrd)
                 TORTURE_INITRD=""; export TORTURE_INITRD
                 ;;
@@ -417,6 +422,7 @@ TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_K
  TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG
  TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
  TORTURE_MOD="$TORTURE_MOD"; export TORTURE_MOD
+TORTURE_NO_AFFINITY="$TORTURE_NO_AFFINITY"; export TORTURE_NO_AFFINITY
  TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
  TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
  TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
author	Frederic Weisbecker <[email protected]>
	Fri, 15 Nov 2024 21:38:53 +0000 (22:38 +0100)
committer	Frederic Weisbecker <[email protected]>
	Fri, 15 Nov 2024 21:38:53 +0000 (22:38 +0100)
Documentation/RCU/stallwarn.rst		patch \| blob \| blame \| history
Documentation/admin-guide/kernel-parameters.txt		patch \| blob \| blame \| history
Documentation/admin-guide/kernel-per-CPU-kthreads.rst		patch \| blob \| blame \| history
include/linux/rcutiny.h		patch \| blob \| blame \| history
include/linux/rcutree.h		patch \| blob \| blame \| history
kernel/rcu/Kconfig		patch \| blob \| blame \| history
kernel/rcu/rcu_segcblist.h		patch \| blob \| blame \| history
kernel/rcu/rcuscale.c		patch \| blob \| blame \| history
kernel/rcu/rcutorture.c		patch \| blob \| blame \| history
kernel/rcu/refscale.c		patch \| blob \| blame \| history
kernel/rcu/srcutiny.c		patch \| blob \| blame \| history
kernel/rcu/tasks.h		patch \| blob \| blame \| history
kernel/rcu/tree.c		patch \| blob \| blame \| history
kernel/rcu/tree_nocb.h		patch \| blob \| blame \| history
kernel/rcu/tree_plugin.h		patch \| blob \| blame \| history
kernel/rcu/tree_stall.h		patch \| blob \| blame \| history
tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh		patch \| blob \| blame \| history
tools/testing/selftests/rcutorture/bin/kvm.sh		patch \| blob \| blame \| history