1 /* SPDX-License-Identifier: GPL-2.0+ */
3 * Read-Copy Update definitions shared among RCU implementations.
5 * Copyright IBM Corporation, 2011
13 #include <trace/events/rcu.h>
16 * Grace-period counter management.
19 #define RCU_SEQ_CTR_SHIFT 2
20 #define RCU_SEQ_STATE_MASK ((1 << RCU_SEQ_CTR_SHIFT) - 1)
22 /* Low-order bit definition for polled grace-period APIs. */
23 #define RCU_GET_STATE_COMPLETED 0x1
25 extern int sysctl_sched_rt_runtime;
28 * Return the counter portion of a sequence number previously returned
29 * by rcu_seq_snap() or rcu_seq_current().
31 static inline unsigned long rcu_seq_ctr(unsigned long s)
33 return s >> RCU_SEQ_CTR_SHIFT;
37 * Return the state portion of a sequence number previously returned
38 * by rcu_seq_snap() or rcu_seq_current().
40 static inline int rcu_seq_state(unsigned long s)
42 return s & RCU_SEQ_STATE_MASK;
46 * Set the state portion of the pointed-to sequence number.
47 * The caller is responsible for preventing conflicting updates.
49 static inline void rcu_seq_set_state(unsigned long *sp, int newstate)
51 WARN_ON_ONCE(newstate & ~RCU_SEQ_STATE_MASK);
52 WRITE_ONCE(*sp, (*sp & ~RCU_SEQ_STATE_MASK) + newstate);
55 /* Adjust sequence number for start of update-side operation. */
56 static inline void rcu_seq_start(unsigned long *sp)
58 WRITE_ONCE(*sp, *sp + 1);
59 smp_mb(); /* Ensure update-side operation after counter increment. */
60 WARN_ON_ONCE(rcu_seq_state(*sp) != 1);
63 /* Compute the end-of-grace-period value for the specified sequence number. */
64 static inline unsigned long rcu_seq_endval(unsigned long *sp)
66 return (*sp | RCU_SEQ_STATE_MASK) + 1;
69 /* Adjust sequence number for end of update-side operation. */
70 static inline void rcu_seq_end(unsigned long *sp)
72 smp_mb(); /* Ensure update-side operation before counter increment. */
73 WARN_ON_ONCE(!rcu_seq_state(*sp));
74 WRITE_ONCE(*sp, rcu_seq_endval(sp));
78 * rcu_seq_snap - Take a snapshot of the update side's sequence number.
80 * This function returns the earliest value of the grace-period sequence number
81 * that will indicate that a full grace period has elapsed since the current
82 * time. Once the grace-period sequence number has reached this value, it will
83 * be safe to invoke all callbacks that have been registered prior to the
84 * current time. This value is the current grace-period number plus two to the
85 * power of the number of low-order bits reserved for state, then rounded up to
86 * the next value in which the state bits are all zero.
88 static inline unsigned long rcu_seq_snap(unsigned long *sp)
92 s = (READ_ONCE(*sp) + 2 * RCU_SEQ_STATE_MASK + 1) & ~RCU_SEQ_STATE_MASK;
93 smp_mb(); /* Above access must not bleed into critical section. */
97 /* Return the current value the update side's sequence number, no ordering. */
98 static inline unsigned long rcu_seq_current(unsigned long *sp)
100 return READ_ONCE(*sp);
104 * Given a snapshot from rcu_seq_snap(), determine whether or not the
105 * corresponding update-side operation has started.
107 static inline bool rcu_seq_started(unsigned long *sp, unsigned long s)
109 return ULONG_CMP_LT((s - 1) & ~RCU_SEQ_STATE_MASK, READ_ONCE(*sp));
113 * Given a snapshot from rcu_seq_snap(), determine whether or not a
114 * full update-side operation has occurred.
116 static inline bool rcu_seq_done(unsigned long *sp, unsigned long s)
118 return ULONG_CMP_GE(READ_ONCE(*sp), s);
122 * Given a snapshot from rcu_seq_snap(), determine whether or not a
123 * full update-side operation has occurred, but do not allow the
124 * (ULONG_MAX / 2) safety-factor/guard-band.
126 static inline bool rcu_seq_done_exact(unsigned long *sp, unsigned long s)
128 unsigned long cur_s = READ_ONCE(*sp);
130 return ULONG_CMP_GE(cur_s, s) || ULONG_CMP_LT(cur_s, s - (2 * RCU_SEQ_STATE_MASK + 1));
134 * Has a grace period completed since the time the old gp_seq was collected?
136 static inline bool rcu_seq_completed_gp(unsigned long old, unsigned long new)
138 return ULONG_CMP_LT(old, new & ~RCU_SEQ_STATE_MASK);
142 * Has a grace period started since the time the old gp_seq was collected?
144 static inline bool rcu_seq_new_gp(unsigned long old, unsigned long new)
146 return ULONG_CMP_LT((old + RCU_SEQ_STATE_MASK) & ~RCU_SEQ_STATE_MASK,
151 * Roughly how many full grace periods have elapsed between the collection
152 * of the two specified grace periods?
154 static inline unsigned long rcu_seq_diff(unsigned long new, unsigned long old)
156 unsigned long rnd_diff;
161 * Compute the number of grace periods (still shifted up), plus
162 * one if either of new and old is not an exact grace period.
164 rnd_diff = (new & ~RCU_SEQ_STATE_MASK) -
165 ((old + RCU_SEQ_STATE_MASK) & ~RCU_SEQ_STATE_MASK) +
166 ((new & RCU_SEQ_STATE_MASK) || (old & RCU_SEQ_STATE_MASK));
167 if (ULONG_CMP_GE(RCU_SEQ_STATE_MASK, rnd_diff))
168 return 1; /* Definitely no grace period has elapsed. */
169 return ((rnd_diff - RCU_SEQ_STATE_MASK - 1) >> RCU_SEQ_CTR_SHIFT) + 2;
173 * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
174 * by call_rcu() and rcu callback execution, and are therefore not part
175 * of the RCU API. These are in rcupdate.h because they are used by all
176 * RCU implementations.
179 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
180 # define STATE_RCU_HEAD_READY 0
181 # define STATE_RCU_HEAD_QUEUED 1
183 extern const struct debug_obj_descr rcuhead_debug_descr;
185 static inline int debug_rcu_head_queue(struct rcu_head *head)
189 r1 = debug_object_activate(head, &rcuhead_debug_descr);
190 debug_object_active_state(head, &rcuhead_debug_descr,
191 STATE_RCU_HEAD_READY,
192 STATE_RCU_HEAD_QUEUED);
196 static inline void debug_rcu_head_unqueue(struct rcu_head *head)
198 debug_object_active_state(head, &rcuhead_debug_descr,
199 STATE_RCU_HEAD_QUEUED,
200 STATE_RCU_HEAD_READY);
201 debug_object_deactivate(head, &rcuhead_debug_descr);
203 #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
204 static inline int debug_rcu_head_queue(struct rcu_head *head)
209 static inline void debug_rcu_head_unqueue(struct rcu_head *head)
212 #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
214 extern int rcu_cpu_stall_suppress_at_boot;
216 static inline bool rcu_stall_is_suppressed_at_boot(void)
218 return rcu_cpu_stall_suppress_at_boot && !rcu_inkernel_boot_has_ended();
221 #ifdef CONFIG_RCU_STALL_COMMON
223 extern int rcu_cpu_stall_ftrace_dump;
224 extern int rcu_cpu_stall_suppress;
225 extern int rcu_cpu_stall_timeout;
226 extern int rcu_exp_cpu_stall_timeout;
227 int rcu_jiffies_till_stall_check(void);
228 int rcu_exp_jiffies_till_stall_check(void);
230 static inline bool rcu_stall_is_suppressed(void)
232 return rcu_stall_is_suppressed_at_boot() || rcu_cpu_stall_suppress;
235 #define rcu_ftrace_dump_stall_suppress() \
237 if (!rcu_cpu_stall_suppress) \
238 rcu_cpu_stall_suppress = 3; \
241 #define rcu_ftrace_dump_stall_unsuppress() \
243 if (rcu_cpu_stall_suppress == 3) \
244 rcu_cpu_stall_suppress = 0; \
247 #else /* #endif #ifdef CONFIG_RCU_STALL_COMMON */
249 static inline bool rcu_stall_is_suppressed(void)
251 return rcu_stall_is_suppressed_at_boot();
253 #define rcu_ftrace_dump_stall_suppress()
254 #define rcu_ftrace_dump_stall_unsuppress()
255 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
258 * Strings used in tracepoints need to be exported via the
259 * tracing system such that tools like perf and trace-cmd can
260 * translate the string address pointers to actual text.
262 #define TPS(x) tracepoint_string(x)
265 * Dump the ftrace buffer, but only one time per callsite per boot.
267 #define rcu_ftrace_dump(oops_dump_mode) \
269 static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \
271 if (!atomic_read(&___rfd_beenhere) && \
272 !atomic_xchg(&___rfd_beenhere, 1)) { \
274 rcu_ftrace_dump_stall_suppress(); \
275 ftrace_dump(oops_dump_mode); \
276 rcu_ftrace_dump_stall_unsuppress(); \
280 void rcu_early_boot_tests(void);
281 void rcu_test_sync_prims(void);
284 * This function really isn't for public consumption, but RCU is special in
285 * that context switches can allow the state machine to make progress.
287 extern void resched_cpu(int cpu);
289 #if !defined(CONFIG_TINY_RCU)
291 #include <linux/rcu_node_tree.h>
293 extern int rcu_num_lvls;
294 extern int num_rcu_lvl[];
295 extern int rcu_num_nodes;
296 static bool rcu_fanout_exact;
297 static int rcu_fanout_leaf;
300 * Compute the per-level fanout, either using the exact fanout specified
301 * or balancing the tree, depending on the rcu_fanout_exact boot parameter.
303 static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
307 for (i = 0; i < RCU_NUM_LVLS; i++)
308 levelspread[i] = INT_MIN;
309 if (rcu_fanout_exact) {
310 levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
311 for (i = rcu_num_lvls - 2; i >= 0; i--)
312 levelspread[i] = RCU_FANOUT;
318 for (i = rcu_num_lvls - 1; i >= 0; i--) {
320 levelspread[i] = (cprv + ccur - 1) / ccur;
326 extern void rcu_init_geometry(void);
328 /* Returns a pointer to the first leaf rcu_node structure. */
329 #define rcu_first_leaf_node() (rcu_state.level[rcu_num_lvls - 1])
331 /* Is this rcu_node a leaf? */
332 #define rcu_is_leaf_node(rnp) ((rnp)->level == rcu_num_lvls - 1)
334 /* Is this rcu_node the last leaf? */
335 #define rcu_is_last_leaf_node(rnp) ((rnp) == &rcu_state.node[rcu_num_nodes - 1])
338 * Do a full breadth-first scan of the {s,}rcu_node structures for the
339 * specified state structure (for SRCU) or the only rcu_state structure
342 #define srcu_for_each_node_breadth_first(sp, rnp) \
343 for ((rnp) = &(sp)->node[0]; \
344 (rnp) < &(sp)->node[rcu_num_nodes]; (rnp)++)
345 #define rcu_for_each_node_breadth_first(rnp) \
346 srcu_for_each_node_breadth_first(&rcu_state, rnp)
349 * Scan the leaves of the rcu_node hierarchy for the rcu_state structure.
350 * Note that if there is a singleton rcu_node tree with but one rcu_node
351 * structure, this loop -will- visit the rcu_node structure. It is still
352 * a leaf node, even if it is also the root node.
354 #define rcu_for_each_leaf_node(rnp) \
355 for ((rnp) = rcu_first_leaf_node(); \
356 (rnp) < &rcu_state.node[rcu_num_nodes]; (rnp)++)
359 * Iterate over all possible CPUs in a leaf RCU node.
361 #define for_each_leaf_node_possible_cpu(rnp, cpu) \
362 for (WARN_ON_ONCE(!rcu_is_leaf_node(rnp)), \
363 (cpu) = cpumask_next((rnp)->grplo - 1, cpu_possible_mask); \
364 (cpu) <= rnp->grphi; \
365 (cpu) = cpumask_next((cpu), cpu_possible_mask))
368 * Iterate over all CPUs in a leaf RCU node's specified mask.
370 #define rcu_find_next_bit(rnp, cpu, mask) \
371 ((rnp)->grplo + find_next_bit(&(mask), BITS_PER_LONG, (cpu)))
372 #define for_each_leaf_node_cpu_mask(rnp, cpu, mask) \
373 for (WARN_ON_ONCE(!rcu_is_leaf_node(rnp)), \
374 (cpu) = rcu_find_next_bit((rnp), 0, (mask)); \
375 (cpu) <= rnp->grphi; \
376 (cpu) = rcu_find_next_bit((rnp), (cpu) + 1 - (rnp->grplo), (mask)))
378 #endif /* !defined(CONFIG_TINY_RCU) */
380 #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_TASKS_RCU_GENERIC)
383 * Wrappers for the rcu_node::lock acquire and release.
385 * Because the rcu_nodes form a tree, the tree traversal locking will observe
386 * different lock values, this in turn means that an UNLOCK of one level
387 * followed by a LOCK of another level does not imply a full memory barrier;
388 * and most importantly transitivity is lost.
390 * In order to restore full ordering between tree levels, augment the regular
391 * lock acquire functions with smp_mb__after_unlock_lock().
393 * As ->lock of struct rcu_node is a __private field, therefore one should use
394 * these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock.
396 #define raw_spin_lock_rcu_node(p) \
398 raw_spin_lock(&ACCESS_PRIVATE(p, lock)); \
399 smp_mb__after_unlock_lock(); \
402 #define raw_spin_unlock_rcu_node(p) \
404 lockdep_assert_irqs_disabled(); \
405 raw_spin_unlock(&ACCESS_PRIVATE(p, lock)); \
408 #define raw_spin_lock_irq_rcu_node(p) \
410 raw_spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \
411 smp_mb__after_unlock_lock(); \
414 #define raw_spin_unlock_irq_rcu_node(p) \
416 lockdep_assert_irqs_disabled(); \
417 raw_spin_unlock_irq(&ACCESS_PRIVATE(p, lock)); \
420 #define raw_spin_lock_irqsave_rcu_node(p, flags) \
422 raw_spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
423 smp_mb__after_unlock_lock(); \
426 #define raw_spin_unlock_irqrestore_rcu_node(p, flags) \
428 lockdep_assert_irqs_disabled(); \
429 raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags); \
432 #define raw_spin_trylock_rcu_node(p) \
434 bool ___locked = raw_spin_trylock(&ACCESS_PRIVATE(p, lock)); \
437 smp_mb__after_unlock_lock(); \
441 #define raw_lockdep_assert_held_rcu_node(p) \
442 lockdep_assert_held(&ACCESS_PRIVATE(p, lock))
444 #endif // #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_TASKS_RCU_GENERIC)
446 #ifdef CONFIG_TINY_RCU
447 /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
448 static inline bool rcu_gp_is_normal(void) { return true; }
449 static inline bool rcu_gp_is_expedited(void) { return false; }
450 static inline void rcu_expedite_gp(void) { }
451 static inline void rcu_unexpedite_gp(void) { }
452 static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
453 #else /* #ifdef CONFIG_TINY_RCU */
454 bool rcu_gp_is_normal(void); /* Internal RCU use. */
455 bool rcu_gp_is_expedited(void); /* Internal RCU use. */
456 void rcu_expedite_gp(void);
457 void rcu_unexpedite_gp(void);
458 void rcupdate_announce_bootup_oddness(void);
459 #ifdef CONFIG_TASKS_RCU_GENERIC
460 void show_rcu_tasks_gp_kthreads(void);
461 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
462 static inline void show_rcu_tasks_gp_kthreads(void) {}
463 #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
464 void rcu_request_urgent_qs_task(struct task_struct *t);
465 #endif /* #else #ifdef CONFIG_TINY_RCU */
467 #define RCU_SCHEDULER_INACTIVE 0
468 #define RCU_SCHEDULER_INIT 1
469 #define RCU_SCHEDULER_RUNNING 2
471 enum rcutorture_type {
474 RCU_TASKS_RUDE_FLAVOR,
475 RCU_TASKS_TRACING_FLAVOR,
481 #if defined(CONFIG_RCU_LAZY)
482 unsigned long rcu_lazy_get_jiffies_till_flush(void);
483 void rcu_lazy_set_jiffies_till_flush(unsigned long j);
485 static inline unsigned long rcu_lazy_get_jiffies_till_flush(void) { return 0; }
486 static inline void rcu_lazy_set_jiffies_till_flush(unsigned long j) { }
489 #if defined(CONFIG_TREE_RCU)
490 void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
491 unsigned long *gp_seq);
492 void do_trace_rcu_torture_read(const char *rcutorturename,
493 struct rcu_head *rhp,
497 void rcu_gp_set_torture_wait(int duration);
499 static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
500 int *flags, unsigned long *gp_seq)
505 #ifdef CONFIG_RCU_TRACE
506 void do_trace_rcu_torture_read(const char *rcutorturename,
507 struct rcu_head *rhp,
512 #define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
515 static inline void rcu_gp_set_torture_wait(int duration) { }
518 #if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST)
519 long rcutorture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask);
522 #ifdef CONFIG_TINY_SRCU
524 static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
525 struct srcu_struct *sp, int *flags,
526 unsigned long *gp_seq)
528 if (test_type != SRCU_FLAVOR)
531 *gp_seq = sp->srcu_idx;
534 #elif defined(CONFIG_TREE_SRCU)
536 void srcutorture_get_gp_data(enum rcutorture_type test_type,
537 struct srcu_struct *sp, int *flags,
538 unsigned long *gp_seq);
542 #ifdef CONFIG_TINY_RCU
543 static inline bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) { return false; }
544 static inline unsigned long rcu_get_gp_seq(void) { return 0; }
545 static inline unsigned long rcu_exp_batches_completed(void) { return 0; }
546 static inline unsigned long
547 srcu_batches_completed(struct srcu_struct *sp) { return 0; }
548 static inline void rcu_force_quiescent_state(void) { }
549 static inline bool rcu_check_boost_fail(unsigned long gp_state, int *cpup) { return true; }
550 static inline void show_rcu_gp_kthreads(void) { }
551 static inline int rcu_get_gp_kthreads_prio(void) { return 0; }
552 static inline void rcu_fwd_progress_check(unsigned long j) { }
553 static inline void rcu_gp_slow_register(atomic_t *rgssp) { }
554 static inline void rcu_gp_slow_unregister(atomic_t *rgssp) { }
555 #else /* #ifdef CONFIG_TINY_RCU */
556 bool rcu_dynticks_zero_in_eqs(int cpu, int *vp);
557 unsigned long rcu_get_gp_seq(void);
558 unsigned long rcu_exp_batches_completed(void);
559 unsigned long srcu_batches_completed(struct srcu_struct *sp);
560 bool rcu_check_boost_fail(unsigned long gp_state, int *cpup);
561 void show_rcu_gp_kthreads(void);
562 int rcu_get_gp_kthreads_prio(void);
563 void rcu_fwd_progress_check(unsigned long j);
564 void rcu_force_quiescent_state(void);
565 extern struct workqueue_struct *rcu_gp_wq;
566 #ifdef CONFIG_RCU_EXP_KTHREAD
567 extern struct kthread_worker *rcu_exp_gp_kworker;
568 extern struct kthread_worker *rcu_exp_par_gp_kworker;
569 #else /* !CONFIG_RCU_EXP_KTHREAD */
570 extern struct workqueue_struct *rcu_par_gp_wq;
571 #endif /* CONFIG_RCU_EXP_KTHREAD */
572 void rcu_gp_slow_register(atomic_t *rgssp);
573 void rcu_gp_slow_unregister(atomic_t *rgssp);
574 #endif /* #else #ifdef CONFIG_TINY_RCU */
576 #ifdef CONFIG_RCU_NOCB_CPU
577 void rcu_bind_current_to_nocb(void);
579 static inline void rcu_bind_current_to_nocb(void) { }
582 #if !defined(CONFIG_TINY_RCU) && defined(CONFIG_TASKS_RCU)
583 void show_rcu_tasks_classic_gp_kthread(void);
585 static inline void show_rcu_tasks_classic_gp_kthread(void) {}
587 #if !defined(CONFIG_TINY_RCU) && defined(CONFIG_TASKS_RUDE_RCU)
588 void show_rcu_tasks_rude_gp_kthread(void);
590 static inline void show_rcu_tasks_rude_gp_kthread(void) {}
592 #if !defined(CONFIG_TINY_RCU) && defined(CONFIG_TASKS_TRACE_RCU)
593 void show_rcu_tasks_trace_gp_kthread(void);
595 static inline void show_rcu_tasks_trace_gp_kthread(void) {}
598 #endif /* __LINUX_RCU_H */