]> Git Repo - linux.git/commitdiff
Merge branches 'rcu/fixes', 'rcu/nocb', 'rcu/torture', 'rcu/stall' and 'rcu/srcu...
authorFrederic Weisbecker <[email protected]>
Fri, 15 Nov 2024 21:38:53 +0000 (22:38 +0100)
committerFrederic Weisbecker <[email protected]>
Fri, 15 Nov 2024 21:38:53 +0000 (22:38 +0100)
18 files changed:
Documentation/RCU/stallwarn.rst
Documentation/admin-guide/kernel-parameters.txt
Documentation/admin-guide/kernel-per-CPU-kthreads.rst
include/linux/rcutiny.h
include/linux/rcutree.h
kernel/rcu/Kconfig
kernel/rcu/rcu_segcblist.h
kernel/rcu/rcuscale.c
kernel/rcu/rcutorture.c
kernel/rcu/refscale.c
kernel/rcu/srcutiny.c
kernel/rcu/tasks.h
kernel/rcu/tree.c
kernel/rcu/tree_nocb.h
kernel/rcu/tree_plugin.h
kernel/rcu/tree_stall.h
tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
tools/testing/selftests/rcutorture/bin/kvm.sh

index ca7b7cd806a16c9c98fdf04eadbe4f4997249c82..30080ff6f4062d9c11affa57cb56cc1e9afd0285 100644 (file)
@@ -249,7 +249,7 @@ ticks this GP)" indicates that this CPU has not taken any scheduling-clock
 interrupts during the current stalled grace period.
 
 The "idle=" portion of the message prints the dyntick-idle state.
-The hex number before the first "/" is the low-order 12 bits of the
+The hex number before the first "/" is the low-order 16 bits of the
 dynticks counter, which will have an even-numbered value if the CPU
 is in dyntick-idle mode and an odd-numbered value otherwise.  The hex
 number between the two "/"s is the value of the nesting, which will be
index 203ec51e41d48eab09a9b7e3f1544336f869a414..686ea876a89c7d08836490fe15bb81c8bb96cf39 100644 (file)
                        Set time (jiffies) between CPU-hotplug operations,
                        or zero to disable CPU-hotplug testing.
 
-       rcutorture.read_exit= [KNL]
-                       Set the number of read-then-exit kthreads used
-                       to test the interaction of RCU updaters and
-                       task-exit processing.
-
        rcutorture.read_exit_burst= [KNL]
                        The number of times in a given read-then-exit
                        episode that a set of read-then-exit kthreads
index b6aeae3327ceb537b78fdbd86961ae670614395b..ea7fa2a8bbf0b95a7116e19a0cb6db3d3348c71a 100644 (file)
@@ -315,7 +315,7 @@ To reduce its OS jitter, do at least one of the following:
        to do.
 
 Name:
-  rcuop/%d and rcuos/%d
+  rcuop/%d, rcuos/%d, and rcuog/%d
 
 Purpose:
   Offload RCU callbacks from the corresponding CPU.
index 0ee270b3f5ed2fb8cb8aa5dc4d11e7a24c9675f3..fe42315f667fc5be7f2ed8eae6ea0c7193030846 100644 (file)
@@ -165,7 +165,6 @@ static inline bool rcu_inkernel_boot_has_ended(void) { return true; }
 static inline bool rcu_is_watching(void) { return true; }
 static inline void rcu_momentary_eqs(void) { }
 static inline void kfree_rcu_scheduler_running(void) { }
-static inline bool rcu_gp_might_be_stalled(void) { return false; }
 
 /* Avoid RCU read-side critical sections leaking across. */
 static inline void rcu_all_qs(void) { barrier(); }
index 90a684f94776ed8cccf53e0e35239c50729943d6..27d86d9127817e50f8d4dd79e1990d70a02435bb 100644 (file)
@@ -40,7 +40,6 @@ void kvfree_rcu_barrier(void);
 void rcu_barrier(void);
 void rcu_momentary_eqs(void);
 void kfree_rcu_scheduler_running(void);
-bool rcu_gp_might_be_stalled(void);
 
 struct rcu_gp_oldstate {
        unsigned long rgos_norm;
index 3e079de0f5b434bd394033f08a38a00228c07432..b9b6bc55185dba365236b4f01d8aaa3596f70ecd 100644 (file)
@@ -249,16 +249,24 @@ config RCU_NOCB_CPU
          workloads will incur significant increases in context-switch
          rates.
 
-         This option offloads callback invocation from the set of CPUs
-         specified at boot time by the rcu_nocbs parameter.  For each
-         such CPU, a kthread ("rcuox/N") will be created to invoke
-         callbacks, where the "N" is the CPU being offloaded, and where
-         the "x" is "p" for RCU-preempt (PREEMPTION kernels) and "s" for
-         RCU-sched (!PREEMPTION kernels).  Nothing prevents this kthread
-         from running on the specified CPUs, but (1) the kthreads may be
-         preempted between each callback, and (2) affinity or cgroups can
-         be used to force the kthreads to run on whatever set of CPUs is
-         desired.
+         This option offloads callback invocation from the set of
+         CPUs specified at boot time by the rcu_nocbs parameter.
+         For each such CPU, a kthread ("rcuox/N") will be created to
+         invoke callbacks, where the "N" is the CPU being offloaded,
+         and where the "x" is "p" for RCU-preempt (PREEMPTION kernels)
+         and "s" for RCU-sched (!PREEMPTION kernels).  This option
+         also creates another kthread for each sqrt(nr_cpu_ids) CPUs
+         ("rcuog/N", where N is the first CPU in that group to come
+         online), which handles grace periods for its group.  Nothing
+         prevents these kthreads from running on the specified CPUs,
+         but (1) the kthreads may be preempted between each callback,
+         and (2) affinity or cgroups can be used to force the kthreads
+         to run on whatever set of CPUs is desired.
+
+         The sqrt(nr_cpu_ids) grouping may be overridden using the
+         rcutree.rcu_nocb_gp_stride kernel boot parameter.  This can
+         be especially helpful for smaller numbers of CPUs, where
+         sqrt(nr_cpu_ids) can be a bit of a blunt instrument.
 
          Say Y here if you need reduced OS jitter, despite added overhead.
          Say N here if you are unsure.
index 2599040756369a0cb4bbd7dd66d2a37d47d7223d..fadc08ad4b7b603de81a752e9c06b5e702a74f7c 100644 (file)
@@ -120,7 +120,6 @@ void rcu_segcblist_inc_len(struct rcu_segcblist *rsclp);
 void rcu_segcblist_add_len(struct rcu_segcblist *rsclp, long v);
 void rcu_segcblist_init(struct rcu_segcblist *rsclp);
 void rcu_segcblist_disable(struct rcu_segcblist *rsclp);
-void rcu_segcblist_offload(struct rcu_segcblist *rsclp, bool offload);
 bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp);
 bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp);
 struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp);
index 6d37596deb1f123ce855b430559faeee9fa50f33..0f3059b1b80d03ddb8884fa4352e9497f476eecf 100644 (file)
@@ -889,14 +889,14 @@ kfree_scale_init(void)
 
                if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start < 2 * HZ)) {
                        pr_alert("ERROR: call_rcu() CBs are not being lazy as expected!\n");
-                       WARN_ON_ONCE(1);
-                       return -1;
+                       firsterr = -1;
+                       goto unwind;
                }
 
                if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start > 3 * HZ)) {
                        pr_alert("ERROR: call_rcu() CBs are being too lazy!\n");
-                       WARN_ON_ONCE(1);
-                       return -1;
+                       firsterr = -1;
+                       goto unwind;
                }
        }
 
index a313cdcb0960f3502bc5a3bb29a251d0f8a23068..2ae8a5e5e99aa37ab54110a33c12c29746bfa510 100644 (file)
@@ -397,6 +397,7 @@ struct rcu_torture_ops {
        int slow_gps;
        int no_pi_lock;
        int debug_objects;
+       int start_poll_irqsoff;
        const char *name;
 };
 
@@ -585,6 +586,7 @@ static struct rcu_torture_ops rcu_ops = {
        .can_boost              = IS_ENABLED(CONFIG_RCU_BOOST),
        .extendables            = RCUTORTURE_MAX_EXTEND,
        .debug_objects          = 1,
+       .start_poll_irqsoff     = 1,
        .name                   = "rcu"
 };
 
@@ -1081,8 +1083,13 @@ static bool rcu_torture_boost_failed(unsigned long gp_state, unsigned long *star
                        // At most one persisted message per boost test.
                        j = jiffies;
                        lp = READ_ONCE(last_persist);
-                       if (time_after(j, lp + mininterval) && cmpxchg(&last_persist, lp, j) == lp)
-                               pr_info("Boost inversion persisted: No QS from CPU %d\n", cpu);
+                       if (time_after(j, lp + mininterval) &&
+                           cmpxchg(&last_persist, lp, j) == lp) {
+                               if (cpu < 0)
+                                       pr_info("Boost inversion persisted: QS from all CPUs\n");
+                               else
+                                       pr_info("Boost inversion persisted: No QS from CPU %d\n", cpu);
+                       }
                        return false; // passed on a technicality
                }
                VERBOSE_TOROUT_STRING("rcu_torture_boost boosting failed");
@@ -1717,14 +1724,22 @@ rcu_torture_fakewriter(void *arg)
                                cur_ops->cond_sync_exp_full(&gp_snap_full);
                                break;
                        case RTWS_POLL_GET:
+                               if (cur_ops->start_poll_irqsoff)
+                                       local_irq_disable();
                                gp_snap = cur_ops->start_gp_poll();
+                               if (cur_ops->start_poll_irqsoff)
+                                       local_irq_enable();
                                while (!cur_ops->poll_gp_state(gp_snap)) {
                                        torture_hrtimeout_jiffies(torture_random(&rand) % 16,
                                                                  &rand);
                                }
                                break;
                        case RTWS_POLL_GET_FULL:
+                               if (cur_ops->start_poll_irqsoff)
+                                       local_irq_disable();
                                cur_ops->start_gp_poll_full(&gp_snap_full);
+                               if (cur_ops->start_poll_irqsoff)
+                                       local_irq_enable();
                                while (!cur_ops->poll_gp_state_full(&gp_snap_full)) {
                                        torture_hrtimeout_jiffies(torture_random(&rand) % 16,
                                                                  &rand);
index 338e7c5ac44a14ae107ad5bc1d0867f5b3a271d9..aacfcc9838b374e750f81c5744f893c38655a0ac 100644 (file)
@@ -75,6 +75,9 @@ MODULE_PARM_DESC(scale_type, "Type of test (rcu, srcu, refcnt, rwsem, rwlock.");
 torture_param(int, verbose, 0, "Enable verbose debugging printk()s");
 torture_param(int, verbose_batched, 0, "Batch verbose debugging printk()s");
 
+// Number of seconds to extend warm-up and cool-down for multiple guest OSes
+torture_param(long, guest_os_delay, 0,
+             "Number of seconds to extend warm-up/cool-down for multiple guest OSes.");
 // Wait until there are multiple CPUs before starting test.
 torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0,
              "Holdoff time before test start (s)");
@@ -831,6 +834,18 @@ static void rcu_scale_one_reader(void)
                cur_ops->delaysection(loops, readdelay / 1000, readdelay % 1000);
 }
 
+// Warm up cache, or, if needed run a series of rcu_scale_one_reader()
+// to allow multiple rcuscale guest OSes to collect mutually valid data.
+static void rcu_scale_warm_cool(void)
+{
+       unsigned long jdone = jiffies + (guest_os_delay > 0 ? guest_os_delay * HZ : -1);
+
+       do {
+               rcu_scale_one_reader();
+               cond_resched();
+       } while (time_before(jiffies, jdone));
+}
+
 // Reader kthread.  Repeatedly does empty RCU read-side
 // critical section, minimizing update-side interference.
 static int
@@ -859,7 +874,7 @@ repeat:
                goto end;
 
        // Make sure that the CPU is affinitized appropriately during testing.
-       WARN_ON_ONCE(raw_smp_processor_id() != me);
+       WARN_ON_ONCE(raw_smp_processor_id() != me % nr_cpu_ids);
 
        WRITE_ONCE(rt->start_reader, 0);
        if (!atomic_dec_return(&n_started))
@@ -987,6 +1002,7 @@ static int main_func(void *arg)
                schedule_timeout_uninterruptible(1);
 
        // Start exp readers up per experiment
+       rcu_scale_warm_cool();
        for (exp = 0; exp < nruns && !torture_must_stop(); exp++) {
                if (torture_must_stop())
                        goto end;
@@ -1017,6 +1033,7 @@ static int main_func(void *arg)
 
                result_avg[exp] = div_u64(1000 * process_durations(nreaders), nreaders * loops);
        }
+       rcu_scale_warm_cool();
 
        // Print the average of all experiments
        SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n");
index 549c03336ee97150598a37e4c39f981ad21c98d5..4dcbf8aa80ff73e9844482356c28967d13a87e24 100644 (file)
@@ -122,8 +122,8 @@ void srcu_drive_gp(struct work_struct *wp)
        ssp = container_of(wp, struct srcu_struct, srcu_work);
        preempt_disable();  // Needed for PREEMPT_AUTO
        if (ssp->srcu_gp_running || ULONG_CMP_GE(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max))) {
-               return; /* Already running or nothing to do. */
                preempt_enable();
+               return; /* Already running or nothing to do. */
        }
 
        /* Remove recently arrived callbacks and wait for readers. */
index 6333f4ccf024be2cb5f8dfffc82f3ca679161415..c789d994e7ebc9eaef18a4fc201b43b9887b91ec 100644 (file)
@@ -1398,7 +1398,8 @@ static void call_rcu_tasks_rude(struct rcu_head *rhp, rcu_callback_t func)
  */
 void synchronize_rcu_tasks_rude(void)
 {
-       synchronize_rcu_tasks_generic(&rcu_tasks_rude);
+       if (!IS_ENABLED(CONFIG_ARCH_WANTS_NO_INSTR) || IS_ENABLED(CONFIG_FORCE_TASKS_RUDE_RCU))
+               synchronize_rcu_tasks_generic(&rcu_tasks_rude);
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_rude);
 
@@ -1540,22 +1541,7 @@ static void rcu_st_need_qs(struct task_struct *t, u8 v)
  */
 u8 rcu_trc_cmpxchg_need_qs(struct task_struct *t, u8 old, u8 new)
 {
-       union rcu_special ret;
-       union rcu_special trs_old = READ_ONCE(t->trc_reader_special);
-       union rcu_special trs_new = trs_old;
-
-       if (trs_old.b.need_qs != old)
-               return trs_old.b.need_qs;
-       trs_new.b.need_qs = new;
-
-       // Although cmpxchg() appears to KCSAN to update all four bytes,
-       // only the .b.need_qs byte actually changes.
-       instrument_atomic_read_write(&t->trc_reader_special.b.need_qs,
-                                    sizeof(t->trc_reader_special.b.need_qs));
-       // Avoid false-positive KCSAN failures.
-       ret.s = data_race(cmpxchg(&t->trc_reader_special.s, trs_old.s, trs_new.s));
-
-       return ret.b.need_qs;
+       return cmpxchg(&t->trc_reader_special.b.need_qs, old, new);
 }
 EXPORT_SYMBOL_GPL(rcu_trc_cmpxchg_need_qs);
 
index b1f883fcd9185a5e22c10102d1024c40688f57fb..ff98233d4aa59f6ad4488ee96c1ea07ce56e9935 100644 (file)
@@ -3511,7 +3511,7 @@ static int krc_count(struct kfree_rcu_cpu *krcp)
 }
 
 static void
-schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
+__schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
 {
        long delay, delay_left;
 
@@ -3525,6 +3525,16 @@ schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
        queue_delayed_work(system_unbound_wq, &krcp->monitor_work, delay);
 }
 
+static void
+schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
+{
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&krcp->lock, flags);
+       __schedule_delayed_monitor_work(krcp);
+       raw_spin_unlock_irqrestore(&krcp->lock, flags);
+}
+
 static void
 kvfree_rcu_drain_ready(struct kfree_rcu_cpu *krcp)
 {
@@ -3836,7 +3846,7 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr)
 
        // Set timer to drain after KFREE_DRAIN_JIFFIES.
        if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING)
-               schedule_delayed_monitor_work(krcp);
+               __schedule_delayed_monitor_work(krcp);
 
 unlock_return:
        krc_this_cpu_unlock(krcp, flags);
@@ -4194,7 +4204,6 @@ static void start_poll_synchronize_rcu_common(void)
        struct rcu_data *rdp;
        struct rcu_node *rnp;
 
-       lockdep_assert_irqs_enabled();
        local_irq_save(flags);
        rdp = this_cpu_ptr(&rcu_data);
        rnp = rdp->mynode;
@@ -4219,9 +4228,6 @@ static void start_poll_synchronize_rcu_common(void)
  * grace period has elapsed in the meantime.  If the needed grace period
  * is not already slated to start, notifies RCU core of the need for that
  * grace period.
- *
- * Interrupts must be enabled for the case where it is necessary to awaken
- * the grace-period kthread.
  */
 unsigned long start_poll_synchronize_rcu(void)
 {
@@ -4242,9 +4248,6 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu);
  * grace period (whether normal or expedited) has elapsed in the meantime.
  * If the needed grace period is not already slated to start, notifies
  * RCU core of the need for that grace period.
- *
- * Interrupts must be enabled for the case where it is necessary to awaken
- * the grace-period kthread.
  */
 void start_poll_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
 {
@@ -5580,8 +5583,7 @@ void rcu_init_geometry(void)
         * Complain and fall back to the compile-time values if this
         * limit is exceeded.
         */
-       if (rcu_fanout_leaf < 2 ||
-           rcu_fanout_leaf > sizeof(unsigned long) * 8) {
+       if (rcu_fanout_leaf < 2 || rcu_fanout_leaf > BITS_PER_LONG) {
                rcu_fanout_leaf = RCU_FANOUT_LEAF;
                WARN_ON(1);
                return;
index 16865475120ba38c741aae897a3dc8d99f95d0ee..2605dd234a13c8aec5b74ebf5e006005ce98ea0f 100644 (file)
@@ -891,7 +891,18 @@ static void nocb_cb_wait(struct rcu_data *rdp)
        swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
                                            nocb_cb_wait_cond(rdp));
        if (kthread_should_park()) {
-               kthread_parkme();
+               /*
+                * kthread_park() must be preceded by an rcu_barrier().
+                * But yet another rcu_barrier() might have sneaked in between
+                * the barrier callback execution and the callbacks counter
+                * decrement.
+                */
+               if (rdp->nocb_cb_sleep) {
+                       rcu_nocb_lock_irqsave(rdp, flags);
+                       WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist));
+                       rcu_nocb_unlock_irqrestore(rdp, flags);
+                       kthread_parkme();
+               }
        } else if (READ_ONCE(rdp->nocb_cb_sleep)) {
                WARN_ON(signal_pending(current));
                trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
index 1c7cbd145d5e3766c1cfd67e99ffb8a69ce8adc5..3927ea5f7955c0838359c9cc2fde7515b4d7113c 100644 (file)
@@ -183,9 +183,9 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
        switch (blkd_state) {
        case 0:
        case                RCU_EXP_TASKS:
-       case                RCU_EXP_TASKS + RCU_GP_BLKD:
+       case                RCU_EXP_TASKS | RCU_GP_BLKD:
        case RCU_GP_TASKS:
-       case RCU_GP_TASKS + RCU_EXP_TASKS:
+       case RCU_GP_TASKS | RCU_EXP_TASKS:
 
                /*
                 * Blocking neither GP, or first task blocking the normal
@@ -198,10 +198,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
 
        case                                              RCU_EXP_BLKD:
        case                                RCU_GP_BLKD:
-       case                                RCU_GP_BLKD + RCU_EXP_BLKD:
-       case RCU_GP_TASKS +                               RCU_EXP_BLKD:
-       case RCU_GP_TASKS +                 RCU_GP_BLKD + RCU_EXP_BLKD:
-       case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
+       case                                RCU_GP_BLKD | RCU_EXP_BLKD:
+       case RCU_GP_TASKS |                               RCU_EXP_BLKD:
+       case RCU_GP_TASKS |                 RCU_GP_BLKD | RCU_EXP_BLKD:
+       case RCU_GP_TASKS | RCU_EXP_TASKS | RCU_GP_BLKD | RCU_EXP_BLKD:
 
                /*
                 * First task arriving that blocks either GP, or first task
@@ -214,9 +214,9 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
                list_add_tail(&t->rcu_node_entry, &rnp->blkd_tasks);
                break;
 
-       case                RCU_EXP_TASKS +               RCU_EXP_BLKD:
-       case                RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
-       case RCU_GP_TASKS + RCU_EXP_TASKS +               RCU_EXP_BLKD:
+       case                RCU_EXP_TASKS |               RCU_EXP_BLKD:
+       case                RCU_EXP_TASKS | RCU_GP_BLKD | RCU_EXP_BLKD:
+       case RCU_GP_TASKS | RCU_EXP_TASKS |               RCU_EXP_BLKD:
 
                /*
                 * Second or subsequent task blocking the expedited GP.
@@ -227,8 +227,8 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
                list_add(&t->rcu_node_entry, rnp->exp_tasks);
                break;
 
-       case RCU_GP_TASKS +                 RCU_GP_BLKD:
-       case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD:
+       case RCU_GP_TASKS |                 RCU_GP_BLKD:
+       case RCU_GP_TASKS | RCU_EXP_TASKS | RCU_GP_BLKD:
 
                /*
                 * Second or subsequent task blocking the normal GP.
index 4432db6d0b99b3bfd79194abfe3be478e8929f59..925fcdad5dea22cfc8b0648546b78870cee485a6 100644 (file)
@@ -76,36 +76,6 @@ int rcu_jiffies_till_stall_check(void)
 }
 EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check);
 
-/**
- * rcu_gp_might_be_stalled - Is it likely that the grace period is stalled?
- *
- * Returns @true if the current grace period is sufficiently old that
- * it is reasonable to assume that it might be stalled.  This can be
- * useful when deciding whether to allocate memory to enable RCU-mediated
- * freeing on the one hand or just invoking synchronize_rcu() on the other.
- * The latter is preferable when the grace period is stalled.
- *
- * Note that sampling of the .gp_start and .gp_seq fields must be done
- * carefully to avoid false positives at the beginnings and ends of
- * grace periods.
- */
-bool rcu_gp_might_be_stalled(void)
-{
-       unsigned long d = rcu_jiffies_till_stall_check() / RCU_STALL_MIGHT_DIV;
-       unsigned long j = jiffies;
-
-       if (d < RCU_STALL_MIGHT_MIN)
-               d = RCU_STALL_MIGHT_MIN;
-       smp_mb(); // jiffies before .gp_seq to avoid false positives.
-       if (!rcu_gp_in_progress())
-               return false;
-       // Long delays at this point avoids false positive, but a delay
-       // of ULONG_MAX/4 jiffies voids your no-false-positive warranty.
-       smp_mb(); // .gp_seq before second .gp_start
-       // And ditto here.
-       return !time_before(j, READ_ONCE(rcu_state.gp_start) + d);
-}
-
 /* Don't do RCU CPU stall warnings during long sysrq printouts. */
 void rcu_sysrq_start(void)
 {
@@ -365,7 +335,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
  * that don't support NMI-based stack dumps.  The NMI-triggered stack
  * traces are more accurate because they are printed by the target CPU.
  */
-static void rcu_dump_cpu_stacks(void)
+static void rcu_dump_cpu_stacks(unsigned long gp_seq)
 {
        int cpu;
        unsigned long flags;
@@ -373,15 +343,23 @@ static void rcu_dump_cpu_stacks(void)
 
        rcu_for_each_leaf_node(rnp) {
                printk_deferred_enter();
-               raw_spin_lock_irqsave_rcu_node(rnp, flags);
-               for_each_leaf_node_possible_cpu(rnp, cpu)
+               for_each_leaf_node_possible_cpu(rnp, cpu) {
+                       if (gp_seq != data_race(rcu_state.gp_seq)) {
+                               printk_deferred_exit();
+                               pr_err("INFO: Stall ended during stack backtracing.\n");
+                               return;
+                       }
+                       if (!(data_race(rnp->qsmask) & leaf_node_cpu_bit(rnp, cpu)))
+                               continue;
+                       raw_spin_lock_irqsave_rcu_node(rnp, flags);
                        if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
                                if (cpu_is_offline(cpu))
                                        pr_err("Offline CPU %d blocking current GP.\n", cpu);
                                else
                                        dump_cpu_task(cpu);
                        }
-               raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+                       raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+               }
                printk_deferred_exit();
        }
 }
@@ -638,7 +616,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
               (long)rcu_seq_current(&rcu_state.gp_seq), totqlen,
               data_race(rcu_state.n_online_cpus)); // Diagnostic read
        if (ndetected) {
-               rcu_dump_cpu_stacks();
+               rcu_dump_cpu_stacks(gp_seq);
 
                /* Complain about tasks blocking the grace period. */
                rcu_for_each_leaf_node(rnp)
@@ -670,7 +648,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
        rcu_force_quiescent_state();  /* Kick them all. */
 }
 
-static void print_cpu_stall(unsigned long gps)
+static void print_cpu_stall(unsigned long gp_seq, unsigned long gps)
 {
        int cpu;
        unsigned long flags;
@@ -707,7 +685,7 @@ static void print_cpu_stall(unsigned long gps)
        rcu_check_gp_kthread_expired_fqs_timer();
        rcu_check_gp_kthread_starvation();
 
-       rcu_dump_cpu_stacks();
+       rcu_dump_cpu_stacks(gp_seq);
 
        raw_spin_lock_irqsave_rcu_node(rnp, flags);
        /* Rewrite if needed in case of slow consoles. */
@@ -789,7 +767,8 @@ static void check_cpu_stall(struct rcu_data *rdp)
        gs2 = READ_ONCE(rcu_state.gp_seq);
        if (gs1 != gs2 ||
            ULONG_CMP_LT(j, js) ||
-           ULONG_CMP_GE(gps, js))
+           ULONG_CMP_GE(gps, js) ||
+           !rcu_seq_state(gs2))
                return; /* No stall or GP completed since entering function. */
        rnp = rdp->mynode;
        jn = jiffies + ULONG_MAX / 2;
@@ -810,7 +789,7 @@ static void check_cpu_stall(struct rcu_data *rdp)
                        pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
                } else if (self_detected) {
                        /* We haven't checked in, so go dump stack. */
-                       print_cpu_stall(gps);
+                       print_cpu_stall(gs2, gps);
                } else {
                        /* They had a few time units to dump stack, so complain. */
                        print_other_cpu_stall(gs2, gps);
index c3808c490d92dd0b044c2fab06f61492424c2826..f87046b702d88e6819bf6b2368b7d5b2a09b1d66 100755 (executable)
@@ -56,27 +56,30 @@ do
        echo > $i/kvm-test-1-run-qemu.sh.out
        export TORTURE_AFFINITY=
        kvm-get-cpus-script.sh $T/cpuarray.awk $T/cpubatches.awk $T/cpustate
-       cat << '        ___EOF___' >> $T/cpubatches.awk
-       END {
-               affinitylist = "";
-               if (!gotcpus()) {
-                       print "echo No CPU-affinity information, so no taskset command.";
-               } else if (cpu_count !~ /^[0-9][0-9]*$/) {
-                       print "echo " scenario ": Bogus number of CPUs (old qemu-cmd?), so no taskset command.";
-               } else {
-                       affinitylist = nextcpus(cpu_count);
-                       if (!(affinitylist ~ /^[0-9,-][0-9,-]*$/))
-                               print "echo " scenario ": Bogus CPU-affinity information, so no taskset command.";
-                       else if (!dumpcpustate())
-                               print "echo " scenario ": Could not dump state, so no taskset command.";
-                       else
-                               print "export TORTURE_AFFINITY=" affinitylist;
+       if test -z "${TORTURE_NO_AFFINITY}"
+       then
+               cat << '                ___EOF___' >> $T/cpubatches.awk
+               END {
+                       affinitylist = "";
+                       if (!gotcpus()) {
+                               print "echo No CPU-affinity information, so no taskset command.";
+                       } else if (cpu_count !~ /^[0-9][0-9]*$/) {
+                               print "echo " scenario ": Bogus number of CPUs (old qemu-cmd?), so no taskset command.";
+                       } else {
+                               affinitylist = nextcpus(cpu_count);
+                               if (!(affinitylist ~ /^[0-9,-][0-9,-]*$/))
+                                       print "echo " scenario ": Bogus CPU-affinity information, so no taskset command.";
+                               else if (!dumpcpustate())
+                                       print "echo " scenario ": Could not dump state, so no taskset command.";
+                               else
+                                       print "export TORTURE_AFFINITY=" affinitylist;
+                       }
                }
-       }
-       ___EOF___
-       cpu_count="`grep '# TORTURE_CPU_COUNT=' $i/qemu-cmd | sed -e 's/^.*=//'`"
-       affinity_export="`awk -f $T/cpubatches.awk -v cpu_count="$cpu_count" -v scenario=$i < /dev/null`"
-       $affinity_export
+               ___EOF___
+               cpu_count="`grep '# TORTURE_CPU_COUNT=' $i/qemu-cmd | sed -e 's/^.*=//'`"
+               affinity_export="`awk -f $T/cpubatches.awk -v cpu_count="$cpu_count" -v scenario=$i < /dev/null`"
+               $affinity_export
+       fi
        kvm-test-1-run-qemu.sh $i >> $i/kvm-test-1-run-qemu.sh.out 2>&1 &
 done
 for i in $runfiles
index 7af73ddc148d1dfa95ac8e01d2f4ea087dfe7160..42e5e8597a1a6e1aca0a7c3b9c94a34155daf435 100755 (executable)
@@ -42,6 +42,7 @@ TORTURE_JITTER_STOP=""
 TORTURE_KCONFIG_KASAN_ARG=""
 TORTURE_KCONFIG_KCSAN_ARG=""
 TORTURE_KMAKE_ARG=""
+TORTURE_NO_AFFINITY=""
 TORTURE_QEMU_MEM=512
 torture_qemu_mem_default=1
 TORTURE_REMOTE=
@@ -82,6 +83,7 @@ usage () {
        echo "       --kmake-arg kernel-make-arguments"
        echo "       --mac nn:nn:nn:nn:nn:nn"
        echo "       --memory megabytes|nnnG"
+       echo "       --no-affinity"
        echo "       --no-initrd"
        echo "       --qemu-args qemu-arguments"
        echo "       --qemu-cmd qemu-system-..."
@@ -220,6 +222,9 @@ do
                torture_qemu_mem_default=
                shift
                ;;
+       --no-affinity)
+               TORTURE_NO_AFFINITY="no-affinity"
+               ;;
        --no-initrd)
                TORTURE_INITRD=""; export TORTURE_INITRD
                ;;
@@ -417,6 +422,7 @@ TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_K
 TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG
 TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
 TORTURE_MOD="$TORTURE_MOD"; export TORTURE_MOD
+TORTURE_NO_AFFINITY="$TORTURE_NO_AFFINITY"; export TORTURE_NO_AFFINITY
 TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
 TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
 TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
This page took 0.163019 seconds and 4 git commands to generate.