kernel/rcu/rcuscale.c

   1 // SPDX-License-Identifier: GPL-2.0+
   2 /*
   3  * Read-Copy Update module-based scalability-test facility
   4  *
   5  * Copyright (C) IBM Corporation, 2015
   6  *
   7  * Authors: Paul E. McKenney <[email protected]>
   8  */
   9
  10 #define pr_fmt(fmt) fmt
  11
  12 #include <linux/types.h>
  13 #include <linux/kernel.h>
  14 #include <linux/init.h>
  15 #include <linux/mm.h>
  16 #include <linux/module.h>
  17 #include <linux/kthread.h>
  18 #include <linux/err.h>
  19 #include <linux/spinlock.h>
  20 #include <linux/smp.h>
  21 #include <linux/rcupdate.h>
  22 #include <linux/interrupt.h>
  23 #include <linux/sched.h>
  24 #include <uapi/linux/sched/types.h>
  25 #include <linux/atomic.h>
  26 #include <linux/bitops.h>
  27 #include <linux/completion.h>
  28 #include <linux/moduleparam.h>
  29 #include <linux/percpu.h>
  30 #include <linux/notifier.h>
  31 #include <linux/reboot.h>
  32 #include <linux/freezer.h>
  33 #include <linux/cpu.h>
  34 #include <linux/delay.h>
  35 #include <linux/stat.h>
  36 #include <linux/srcu.h>
  37 #include <linux/slab.h>
  38 #include <asm/byteorder.h>
  39 #include <linux/torture.h>
  40 #include <linux/vmalloc.h>
  41 #include <linux/rcupdate_trace.h>
  42
  43 #include "rcu.h"
  44
  45 MODULE_LICENSE("GPL");
  46 MODULE_AUTHOR("Paul E. McKenney <[email protected]>");
  47
  48 #define SCALE_FLAG "-scale:"
  49 #define SCALEOUT_STRING(s) \
  50         pr_alert("%s" SCALE_FLAG " %s\n", scale_type, s)
  51 #define VERBOSE_SCALEOUT_STRING(s) \
  52         do { if (verbose) pr_alert("%s" SCALE_FLAG " %s\n", scale_type, s); } while (0)
  53 #define SCALEOUT_ERRSTRING(s) \
  54         pr_alert("%s" SCALE_FLAG "!!! %s\n", scale_type, s)
  55
  56 /*
  57  * The intended use cases for the nreaders and nwriters module parameters
  58  * are as follows:
  59  *
  60  * 1.   Specify only the nr_cpus kernel boot parameter.  This will
  61  *      set both nreaders and nwriters to the value specified by
  62  *      nr_cpus for a mixed reader/writer test.
  63  *
  64  * 2.   Specify the nr_cpus kernel boot parameter, but set
  65  *      rcuscale.nreaders to zero.  This will set nwriters to the
  66  *      value specified by nr_cpus for an update-only test.
  67  *
  68  * 3.   Specify the nr_cpus kernel boot parameter, but set
  69  *      rcuscale.nwriters to zero.  This will set nreaders to the
  70  *      value specified by nr_cpus for a read-only test.
  71  *
  72  * Various other use cases may of course be specified.
  73  *
  74  * Note that this test's readers are intended only as a test load for
  75  * the writers.  The reader scalability statistics will be overly
  76  * pessimistic due to the per-critical-section interrupt disabling,
  77  * test-end checks, and the pair of calls through pointers.
  78  */
  79
  80 #ifdef MODULE
  81 # define RCUSCALE_SHUTDOWN 0
  82 #else
  83 # define RCUSCALE_SHUTDOWN 1
  84 #endif
  85
  86 torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives");
  87 torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader");
  88 torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
  89 torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
  90 torture_param(int, nreaders, -1, "Number of RCU reader threads");
  91 torture_param(int, nwriters, -1, "Number of RCU updater threads");
  92 torture_param(bool, shutdown, RCUSCALE_SHUTDOWN,
  93               "Shutdown at end of scalability tests.");
  94 torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
  95 torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
  96 torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() scale test?");
  97 torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
  98 torture_param(int, kfree_by_call_rcu, 0, "Use call_rcu() to emulate kfree_rcu()?");
  99
 100 static char *scale_type = "rcu";
 101 module_param(scale_type, charp, 0444);
 102 MODULE_PARM_DESC(scale_type, "Type of RCU to scalability-test (rcu, srcu, ...)");
 103
 104 static int nrealreaders;
 105 static int nrealwriters;
 106 static struct task_struct **writer_tasks;
 107 static struct task_struct **reader_tasks;
 108 static struct task_struct *shutdown_task;
 109
 110 static u64 **writer_durations;
 111 static int *writer_n_durations;
 112 static atomic_t n_rcu_scale_reader_started;
 113 static atomic_t n_rcu_scale_writer_started;
 114 static atomic_t n_rcu_scale_writer_finished;
 115 static wait_queue_head_t shutdown_wq;
 116 static u64 t_rcu_scale_writer_started;
 117 static u64 t_rcu_scale_writer_finished;
 118 static unsigned long b_rcu_gp_test_started;
 119 static unsigned long b_rcu_gp_test_finished;
 120 static DEFINE_PER_CPU(atomic_t, n_async_inflight);
 121
 122 #define MAX_MEAS 10000
 123 #define MIN_MEAS 100
 124
 125 /*
 126  * Operations vector for selecting different types of tests.
 127  */
 128
 129 struct rcu_scale_ops {
 130         int ptype;
 131         void (*init)(void);
 132         void (*cleanup)(void);
 133         int (*readlock)(void);
 134         void (*readunlock)(int idx);
 135         unsigned long (*get_gp_seq)(void);
 136         unsigned long (*gp_diff)(unsigned long new, unsigned long old);
 137         unsigned long (*exp_completed)(void);
 138         void (*async)(struct rcu_head *head, rcu_callback_t func);
 139         void (*gp_barrier)(void);
 140         void (*sync)(void);
 141         void (*exp_sync)(void);
 142         const char *name;
 143 };
 144
 145 static struct rcu_scale_ops *cur_ops;
 146
 147 /*
 148  * Definitions for rcu scalability testing.
 149  */
 150
 151 static int rcu_scale_read_lock(void) __acquires(RCU)
 152 {
 153         rcu_read_lock();
 154         return 0;
 155 }
 156
 157 static void rcu_scale_read_unlock(int idx) __releases(RCU)
 158 {
 159         rcu_read_unlock();
 160 }
 161
 162 static unsigned long __maybe_unused rcu_no_completed(void)
 163 {
 164         return 0;
 165 }
 166
 167 static void rcu_sync_scale_init(void)
 168 {
 169 }
 170
 171 static struct rcu_scale_ops rcu_ops = {
 172         .ptype          = RCU_FLAVOR,
 173         .init           = rcu_sync_scale_init,
 174         .readlock       = rcu_scale_read_lock,
 175         .readunlock     = rcu_scale_read_unlock,
 176         .get_gp_seq     = rcu_get_gp_seq,
 177         .gp_diff        = rcu_seq_diff,
 178         .exp_completed  = rcu_exp_batches_completed,
 179         .async          = call_rcu_hurry,
 180         .gp_barrier     = rcu_barrier,
 181         .sync           = synchronize_rcu,
 182         .exp_sync       = synchronize_rcu_expedited,
 183         .name           = "rcu"
 184 };
 185
 186 /*
 187  * Definitions for srcu scalability testing.
 188  */
 189
 190 DEFINE_STATIC_SRCU(srcu_ctl_scale);
 191 static struct srcu_struct *srcu_ctlp = &srcu_ctl_scale;
 192
 193 static int srcu_scale_read_lock(void) __acquires(srcu_ctlp)
 194 {
 195         return srcu_read_lock(srcu_ctlp);
 196 }
 197
 198 static void srcu_scale_read_unlock(int idx) __releases(srcu_ctlp)
 199 {
 200         srcu_read_unlock(srcu_ctlp, idx);
 201 }
 202
 203 static unsigned long srcu_scale_completed(void)
 204 {
 205         return srcu_batches_completed(srcu_ctlp);
 206 }
 207
 208 static void srcu_call_rcu(struct rcu_head *head, rcu_callback_t func)
 209 {
 210         call_srcu(srcu_ctlp, head, func);
 211 }
 212
 213 static void srcu_rcu_barrier(void)
 214 {
 215         srcu_barrier(srcu_ctlp);
 216 }
 217
 218 static void srcu_scale_synchronize(void)
 219 {
 220         synchronize_srcu(srcu_ctlp);
 221 }
 222
 223 static void srcu_scale_synchronize_expedited(void)
 224 {
 225         synchronize_srcu_expedited(srcu_ctlp);
 226 }
 227
 228 static struct rcu_scale_ops srcu_ops = {
 229         .ptype          = SRCU_FLAVOR,
 230         .init           = rcu_sync_scale_init,
 231         .readlock       = srcu_scale_read_lock,
 232         .readunlock     = srcu_scale_read_unlock,
 233         .get_gp_seq     = srcu_scale_completed,
 234         .gp_diff        = rcu_seq_diff,
 235         .exp_completed  = srcu_scale_completed,
 236         .async          = srcu_call_rcu,
 237         .gp_barrier     = srcu_rcu_barrier,
 238         .sync           = srcu_scale_synchronize,
 239         .exp_sync       = srcu_scale_synchronize_expedited,
 240         .name           = "srcu"
 241 };
 242
 243 static struct srcu_struct srcud;
 244
 245 static void srcu_sync_scale_init(void)
 246 {
 247         srcu_ctlp = &srcud;
 248         init_srcu_struct(srcu_ctlp);
 249 }
 250
 251 static void srcu_sync_scale_cleanup(void)
 252 {
 253         cleanup_srcu_struct(srcu_ctlp);
 254 }
 255
 256 static struct rcu_scale_ops srcud_ops = {
 257         .ptype          = SRCU_FLAVOR,
 258         .init           = srcu_sync_scale_init,
 259         .cleanup        = srcu_sync_scale_cleanup,
 260         .readlock       = srcu_scale_read_lock,
 261         .readunlock     = srcu_scale_read_unlock,
 262         .get_gp_seq     = srcu_scale_completed,
 263         .gp_diff        = rcu_seq_diff,
 264         .exp_completed  = srcu_scale_completed,
 265         .async          = srcu_call_rcu,
 266         .gp_barrier     = srcu_rcu_barrier,
 267         .sync           = srcu_scale_synchronize,
 268         .exp_sync       = srcu_scale_synchronize_expedited,
 269         .name           = "srcud"
 270 };
 271
 272 #ifdef CONFIG_TASKS_RCU
 273
 274 /*
 275  * Definitions for RCU-tasks scalability testing.
 276  */
 277
 278 static int tasks_scale_read_lock(void)
 279 {
 280         return 0;
 281 }
 282
 283 static void tasks_scale_read_unlock(int idx)
 284 {
 285 }
 286
 287 static struct rcu_scale_ops tasks_ops = {
 288         .ptype          = RCU_TASKS_FLAVOR,
 289         .init           = rcu_sync_scale_init,
 290         .readlock       = tasks_scale_read_lock,
 291         .readunlock     = tasks_scale_read_unlock,
 292         .get_gp_seq     = rcu_no_completed,
 293         .gp_diff        = rcu_seq_diff,
 294         .async          = call_rcu_tasks,
 295         .gp_barrier     = rcu_barrier_tasks,
 296         .sync           = synchronize_rcu_tasks,
 297         .exp_sync       = synchronize_rcu_tasks,
 298         .name           = "tasks"
 299 };
 300
 301 #define TASKS_OPS &tasks_ops,
 302
 303 #else // #ifdef CONFIG_TASKS_RCU
 304
 305 #define TASKS_OPS
 306
 307 #endif // #else // #ifdef CONFIG_TASKS_RCU
 308
 309 #ifdef CONFIG_TASKS_TRACE_RCU
 310
 311 /*
 312  * Definitions for RCU-tasks-trace scalability testing.
 313  */
 314
 315 static int tasks_trace_scale_read_lock(void)
 316 {
 317         rcu_read_lock_trace();
 318         return 0;
 319 }
 320
 321 static void tasks_trace_scale_read_unlock(int idx)
 322 {
 323         rcu_read_unlock_trace();
 324 }
 325
 326 static struct rcu_scale_ops tasks_tracing_ops = {
 327         .ptype          = RCU_TASKS_FLAVOR,
 328         .init           = rcu_sync_scale_init,
 329         .readlock       = tasks_trace_scale_read_lock,
 330         .readunlock     = tasks_trace_scale_read_unlock,
 331         .get_gp_seq     = rcu_no_completed,
 332         .gp_diff        = rcu_seq_diff,
 333         .async          = call_rcu_tasks_trace,
 334         .gp_barrier     = rcu_barrier_tasks_trace,
 335         .sync           = synchronize_rcu_tasks_trace,
 336         .exp_sync       = synchronize_rcu_tasks_trace,
 337         .name           = "tasks-tracing"
 338 };
 339
 340 #define TASKS_TRACING_OPS &tasks_tracing_ops,
 341
 342 #else // #ifdef CONFIG_TASKS_TRACE_RCU
 343
 344 #define TASKS_TRACING_OPS
 345
 346 #endif // #else // #ifdef CONFIG_TASKS_TRACE_RCU
 347
 348 static unsigned long rcuscale_seq_diff(unsigned long new, unsigned long old)
 349 {
 350         if (!cur_ops->gp_diff)
 351                 return new - old;
 352         return cur_ops->gp_diff(new, old);
 353 }
 354
 355 /*
 356  * If scalability tests complete, wait for shutdown to commence.
 357  */
 358 static void rcu_scale_wait_shutdown(void)
 359 {
 360         cond_resched_tasks_rcu_qs();
 361         if (atomic_read(&n_rcu_scale_writer_finished) < nrealwriters)
 362                 return;
 363         while (!torture_must_stop())
 364                 schedule_timeout_uninterruptible(1);
 365 }
 366
 367 /*
 368  * RCU scalability reader kthread.  Repeatedly does empty RCU read-side
 369  * critical section, minimizing update-side interference.  However, the
 370  * point of this test is not to evaluate reader scalability, but instead
 371  * to serve as a test load for update-side scalability testing.
 372  */
 373 static int
 374 rcu_scale_reader(void *arg)
 375 {
 376         unsigned long flags;
 377         int idx;
 378         long me = (long)arg;
 379
 380         VERBOSE_SCALEOUT_STRING("rcu_scale_reader task started");
 381         set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
 382         set_user_nice(current, MAX_NICE);
 383         atomic_inc(&n_rcu_scale_reader_started);
 384
 385         do {
 386                 local_irq_save(flags);
 387                 idx = cur_ops->readlock();
 388                 cur_ops->readunlock(idx);
 389                 local_irq_restore(flags);
 390                 rcu_scale_wait_shutdown();
 391         } while (!torture_must_stop());
 392         torture_kthread_stopping("rcu_scale_reader");
 393         return 0;
 394 }
 395
 396 /*
 397  * Callback function for asynchronous grace periods from rcu_scale_writer().
 398  */
 399 static void rcu_scale_async_cb(struct rcu_head *rhp)
 400 {
 401         atomic_dec(this_cpu_ptr(&n_async_inflight));
 402         kfree(rhp);
 403 }
 404
 405 /*
 406  * RCU scale writer kthread.  Repeatedly does a grace period.
 407  */
 408 static int
 409 rcu_scale_writer(void *arg)
 410 {
 411         int i = 0;
 412         int i_max;
 413         long me = (long)arg;
 414         struct rcu_head *rhp = NULL;
 415         bool started = false, done = false, alldone = false;
 416         u64 t;
 417         u64 *wdp;
 418         u64 *wdpp = writer_durations[me];
 419
 420         VERBOSE_SCALEOUT_STRING("rcu_scale_writer task started");
 421         WARN_ON(!wdpp);
 422         set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
 423         current->flags |= PF_NO_SETAFFINITY;
 424         sched_set_fifo_low(current);
 425
 426         if (holdoff)
 427                 schedule_timeout_uninterruptible(holdoff * HZ);
 428
 429         /*
 430          * Wait until rcu_end_inkernel_boot() is called for normal GP tests
 431          * so that RCU is not always expedited for normal GP tests.
 432          * The system_state test is approximate, but works well in practice.
 433          */
 434         while (!gp_exp && system_state != SYSTEM_RUNNING)
 435                 schedule_timeout_uninterruptible(1);
 436
 437         t = ktime_get_mono_fast_ns();
 438         if (atomic_inc_return(&n_rcu_scale_writer_started) >= nrealwriters) {
 439                 t_rcu_scale_writer_started = t;
 440                 if (gp_exp) {
 441                         b_rcu_gp_test_started =
 442                                 cur_ops->exp_completed() / 2;
 443                 } else {
 444                         b_rcu_gp_test_started = cur_ops->get_gp_seq();
 445                 }
 446         }
 447
 448         do {
 449                 if (writer_holdoff)
 450                         udelay(writer_holdoff);
 451                 wdp = &wdpp[i];
 452                 *wdp = ktime_get_mono_fast_ns();
 453                 if (gp_async) {
 454 retry:
 455                         if (!rhp)
 456                                 rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
 457                         if (rhp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) {
 458                                 atomic_inc(this_cpu_ptr(&n_async_inflight));
 459                                 cur_ops->async(rhp, rcu_scale_async_cb);
 460                                 rhp = NULL;
 461                         } else if (!kthread_should_stop()) {
 462                                 cur_ops->gp_barrier();
 463                                 goto retry;
 464                         } else {
 465                                 kfree(rhp); /* Because we are stopping. */
 466                         }
 467                 } else if (gp_exp) {
 468                         cur_ops->exp_sync();
 469                 } else {
 470                         cur_ops->sync();
 471                 }
 472                 t = ktime_get_mono_fast_ns();
 473                 *wdp = t - *wdp;
 474                 i_max = i;
 475                 if (!started &&
 476                     atomic_read(&n_rcu_scale_writer_started) >= nrealwriters)
 477                         started = true;
 478                 if (!done && i >= MIN_MEAS) {
 479                         done = true;
 480                         sched_set_normal(current, 0);
 481                         pr_alert("%s%s rcu_scale_writer %ld has %d measurements\n",
 482                                  scale_type, SCALE_FLAG, me, MIN_MEAS);
 483                         if (atomic_inc_return(&n_rcu_scale_writer_finished) >=
 484                             nrealwriters) {
 485                                 schedule_timeout_interruptible(10);
 486                                 rcu_ftrace_dump(DUMP_ALL);
 487                                 SCALEOUT_STRING("Test complete");
 488                                 t_rcu_scale_writer_finished = t;
 489                                 if (gp_exp) {
 490                                         b_rcu_gp_test_finished =
 491                                                 cur_ops->exp_completed() / 2;
 492                                 } else {
 493                                         b_rcu_gp_test_finished =
 494                                                 cur_ops->get_gp_seq();
 495                                 }
 496                                 if (shutdown) {
 497                                         smp_mb(); /* Assign before wake. */
 498                                         wake_up(&shutdown_wq);
 499                                 }
 500                         }
 501                 }
 502                 if (done && !alldone &&
 503                     atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters)
 504                         alldone = true;
 505                 if (started && !alldone && i < MAX_MEAS - 1)
 506                         i++;
 507                 rcu_scale_wait_shutdown();
 508         } while (!torture_must_stop());
 509         if (gp_async) {
 510                 cur_ops->gp_barrier();
 511         }
 512         writer_n_durations[me] = i_max + 1;
 513         torture_kthread_stopping("rcu_scale_writer");
 514         return 0;
 515 }
 516
 517 static void
 518 rcu_scale_print_module_parms(struct rcu_scale_ops *cur_ops, const char *tag)
 519 {
 520         pr_alert("%s" SCALE_FLAG
 521                  "--- %s: nreaders=%d nwriters=%d verbose=%d shutdown=%d\n",
 522                  scale_type, tag, nrealreaders, nrealwriters, verbose, shutdown);
 523 }
 524
 525 /*
 526  * Return the number if non-negative.  If -1, the number of CPUs.
 527  * If less than -1, that much less than the number of CPUs, but
 528  * at least one.
 529  */
 530 static int compute_real(int n)
 531 {
 532         int nr;
 533
 534         if (n >= 0) {
 535                 nr = n;
 536         } else {
 537                 nr = num_online_cpus() + 1 + n;
 538                 if (nr <= 0)
 539                         nr = 1;
 540         }
 541         return nr;
 542 }
 543
 544 /*
 545  * kfree_rcu() scalability tests: Start a kfree_rcu() loop on all CPUs for number
 546  * of iterations and measure total time and number of GP for all iterations to complete.
 547  */
 548
 549 torture_param(int, kfree_nthreads, -1, "Number of threads running loops of kfree_rcu().");
 550 torture_param(int, kfree_alloc_num, 8000, "Number of allocations and frees done in an iteration.");
 551 torture_param(int, kfree_loops, 10, "Number of loops doing kfree_alloc_num allocations and frees.");
 552 torture_param(bool, kfree_rcu_test_double, false, "Do we run a kfree_rcu() double-argument scale test?");
 553 torture_param(bool, kfree_rcu_test_single, false, "Do we run a kfree_rcu() single-argument scale test?");
 554
 555 static struct task_struct **kfree_reader_tasks;
 556 static int kfree_nrealthreads;
 557 static atomic_t n_kfree_scale_thread_started;
 558 static atomic_t n_kfree_scale_thread_ended;
 559
 560 struct kfree_obj {
 561         char kfree_obj[8];
 562         struct rcu_head rh;
 563 };
 564
 565 /* Used if doing RCU-kfree'ing via call_rcu(). */
 566 static void kfree_call_rcu(struct rcu_head *rh)
 567 {
 568         struct kfree_obj *obj = container_of(rh, struct kfree_obj, rh);
 569
 570         kfree(obj);
 571 }
 572
 573 static int
 574 kfree_scale_thread(void *arg)
 575 {
 576         int i, loop = 0;
 577         long me = (long)arg;
 578         struct kfree_obj *alloc_ptr;
 579         u64 start_time, end_time;
 580         long long mem_begin, mem_during = 0;
 581         bool kfree_rcu_test_both;
 582         DEFINE_TORTURE_RANDOM(tr);
 583
 584         VERBOSE_SCALEOUT_STRING("kfree_scale_thread task started");
 585         set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
 586         set_user_nice(current, MAX_NICE);
 587         kfree_rcu_test_both = (kfree_rcu_test_single == kfree_rcu_test_double);
 588
 589         start_time = ktime_get_mono_fast_ns();
 590
 591         if (atomic_inc_return(&n_kfree_scale_thread_started) >= kfree_nrealthreads) {
 592                 if (gp_exp)
 593                         b_rcu_gp_test_started = cur_ops->exp_completed() / 2;
 594                 else
 595                         b_rcu_gp_test_started = cur_ops->get_gp_seq();
 596         }
 597
 598         do {
 599                 if (!mem_during) {
 600                         mem_during = mem_begin = si_mem_available();
 601                 } else if (loop % (kfree_loops / 4) == 0) {
 602                         mem_during = (mem_during + si_mem_available()) / 2;
 603                 }
 604
 605                 for (i = 0; i < kfree_alloc_num; i++) {
 606                         alloc_ptr = kmalloc(kfree_mult * sizeof(struct kfree_obj), GFP_KERNEL);
 607                         if (!alloc_ptr)
 608                                 return -ENOMEM;
 609
 610                         if (kfree_by_call_rcu) {
 611                                 call_rcu(&(alloc_ptr->rh), kfree_call_rcu);
 612                                 continue;
 613                         }
 614
 615                         // By default kfree_rcu_test_single and kfree_rcu_test_double are
 616                         // initialized to false. If both have the same value (false or true)
 617                         // both are randomly tested, otherwise only the one with value true
 618                         // is tested.
 619                         if ((kfree_rcu_test_single && !kfree_rcu_test_double) ||
 620                                         (kfree_rcu_test_both && torture_random(&tr) & 0x800))
 621                                 kfree_rcu_mightsleep(alloc_ptr);
 622                         else
 623                                 kfree_rcu(alloc_ptr, rh);
 624                 }
 625
 626                 cond_resched();
 627         } while (!torture_must_stop() && ++loop < kfree_loops);
 628
 629         if (atomic_inc_return(&n_kfree_scale_thread_ended) >= kfree_nrealthreads) {
 630                 end_time = ktime_get_mono_fast_ns();
 631
 632                 if (gp_exp)
 633                         b_rcu_gp_test_finished = cur_ops->exp_completed() / 2;
 634                 else
 635                         b_rcu_gp_test_finished = cur_ops->get_gp_seq();
 636
 637                 pr_alert("Total time taken by all kfree'ers: %llu ns, loops: %d, batches: %ld, memory footprint: %lldMB\n",
 638                        (unsigned long long)(end_time - start_time), kfree_loops,
 639                        rcuscale_seq_diff(b_rcu_gp_test_finished, b_rcu_gp_test_started),
 640                        (mem_begin - mem_during) >> (20 - PAGE_SHIFT));
 641
 642                 if (shutdown) {
 643                         smp_mb(); /* Assign before wake. */
 644                         wake_up(&shutdown_wq);
 645                 }
 646         }
 647
 648         torture_kthread_stopping("kfree_scale_thread");
 649         return 0;
 650 }
 651
 652 static void
 653 kfree_scale_cleanup(void)
 654 {
 655         int i;
 656
 657         if (torture_cleanup_begin())
 658                 return;
 659
 660         if (kfree_reader_tasks) {
 661                 for (i = 0; i < kfree_nrealthreads; i++)
 662                         torture_stop_kthread(kfree_scale_thread,
 663                                              kfree_reader_tasks[i]);
 664                 kfree(kfree_reader_tasks);
 665         }
 666
 667         torture_cleanup_end();
 668 }
 669
 670 /*
 671  * shutdown kthread.  Just waits to be awakened, then shuts down system.
 672  */
 673 static int
 674 kfree_scale_shutdown(void *arg)
 675 {
 676         wait_event_idle(shutdown_wq,
 677                         atomic_read(&n_kfree_scale_thread_ended) >= kfree_nrealthreads);
 678
 679         smp_mb(); /* Wake before output. */
 680
 681         kfree_scale_cleanup();
 682         kernel_power_off();
 683         return -EINVAL;
 684 }
 685
 686 // Used if doing RCU-kfree'ing via call_rcu().
 687 static unsigned long jiffies_at_lazy_cb;
 688 static struct rcu_head lazy_test1_rh;
 689 static int rcu_lazy_test1_cb_called;
 690 static void call_rcu_lazy_test1(struct rcu_head *rh)
 691 {
 692         jiffies_at_lazy_cb = jiffies;
 693         WRITE_ONCE(rcu_lazy_test1_cb_called, 1);
 694 }
 695
 696 static int __init
 697 kfree_scale_init(void)
 698 {
 699         int firsterr = 0;
 700         long i;
 701         unsigned long jif_start;
 702         unsigned long orig_jif;
 703
 704         // Also, do a quick self-test to ensure laziness is as much as
 705         // expected.
 706         if (kfree_by_call_rcu && !IS_ENABLED(CONFIG_RCU_LAZY)) {
 707                 pr_alert("CONFIG_RCU_LAZY is disabled, falling back to kfree_rcu() for delayed RCU kfree'ing\n");
 708                 kfree_by_call_rcu = 0;
 709         }
 710
 711         if (kfree_by_call_rcu) {
 712                 /* do a test to check the timeout. */
 713                 orig_jif = rcu_lazy_get_jiffies_till_flush();
 714
 715                 rcu_lazy_set_jiffies_till_flush(2 * HZ);
 716                 rcu_barrier();
 717
 718                 jif_start = jiffies;
 719                 jiffies_at_lazy_cb = 0;
 720                 call_rcu(&lazy_test1_rh, call_rcu_lazy_test1);
 721
 722                 smp_cond_load_relaxed(&rcu_lazy_test1_cb_called, VAL == 1);
 723
 724                 rcu_lazy_set_jiffies_till_flush(orig_jif);
 725
 726                 if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start < 2 * HZ)) {
 727                         pr_alert("ERROR: call_rcu() CBs are not being lazy as expected!\n");
 728                         WARN_ON_ONCE(1);
 729                         return -1;
 730                 }
 731
 732                 if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start > 3 * HZ)) {
 733                         pr_alert("ERROR: call_rcu() CBs are being too lazy!\n");
 734                         WARN_ON_ONCE(1);
 735                         return -1;
 736                 }
 737         }
 738
 739         kfree_nrealthreads = compute_real(kfree_nthreads);
 740         /* Start up the kthreads. */
 741         if (shutdown) {
 742                 init_waitqueue_head(&shutdown_wq);
 743                 firsterr = torture_create_kthread(kfree_scale_shutdown, NULL,
 744                                                   shutdown_task);
 745                 if (torture_init_error(firsterr))
 746                         goto unwind;
 747                 schedule_timeout_uninterruptible(1);
 748         }
 749
 750         pr_alert("kfree object size=%zu, kfree_by_call_rcu=%d\n",
 751                         kfree_mult * sizeof(struct kfree_obj),
 752                         kfree_by_call_rcu);
 753
 754         kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]),
 755                                GFP_KERNEL);
 756         if (kfree_reader_tasks == NULL) {
 757                 firsterr = -ENOMEM;
 758                 goto unwind;
 759         }
 760
 761         for (i = 0; i < kfree_nrealthreads; i++) {
 762                 firsterr = torture_create_kthread(kfree_scale_thread, (void *)i,
 763                                                   kfree_reader_tasks[i]);
 764                 if (torture_init_error(firsterr))
 765                         goto unwind;
 766         }
 767
 768         while (atomic_read(&n_kfree_scale_thread_started) < kfree_nrealthreads)
 769                 schedule_timeout_uninterruptible(1);
 770
 771         torture_init_end();
 772         return 0;
 773
 774 unwind:
 775         torture_init_end();
 776         kfree_scale_cleanup();
 777         return firsterr;
 778 }
 779
 780 static void
 781 rcu_scale_cleanup(void)
 782 {
 783         int i;
 784         int j;
 785         int ngps = 0;
 786         u64 *wdp;
 787         u64 *wdpp;
 788
 789         /*
 790          * Would like warning at start, but everything is expedited
 791          * during the mid-boot phase, so have to wait till the end.
 792          */
 793         if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp)
 794                 SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
 795         if (rcu_gp_is_normal() && gp_exp)
 796                 SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
 797         if (gp_exp && gp_async)
 798                 SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
 799
 800         if (kfree_rcu_test) {
 801                 kfree_scale_cleanup();
 802                 return;
 803         }
 804
 805         if (torture_cleanup_begin())
 806                 return;
 807         if (!cur_ops) {
 808                 torture_cleanup_end();
 809                 return;
 810         }
 811
 812         if (reader_tasks) {
 813                 for (i = 0; i < nrealreaders; i++)
 814                         torture_stop_kthread(rcu_scale_reader,
 815                                              reader_tasks[i]);
 816                 kfree(reader_tasks);
 817         }
 818
 819         if (writer_tasks) {
 820                 for (i = 0; i < nrealwriters; i++) {
 821                         torture_stop_kthread(rcu_scale_writer,
 822                                              writer_tasks[i]);
 823                         if (!writer_n_durations)
 824                                 continue;
 825                         j = writer_n_durations[i];
 826                         pr_alert("%s%s writer %d gps: %d\n",
 827                                  scale_type, SCALE_FLAG, i, j);
 828                         ngps += j;
 829                 }
 830                 pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n",
 831                          scale_type, SCALE_FLAG,
 832                          t_rcu_scale_writer_started, t_rcu_scale_writer_finished,
 833                          t_rcu_scale_writer_finished -
 834                          t_rcu_scale_writer_started,
 835                          ngps,
 836                          rcuscale_seq_diff(b_rcu_gp_test_finished,
 837                                            b_rcu_gp_test_started));
 838                 for (i = 0; i < nrealwriters; i++) {
 839                         if (!writer_durations)
 840                                 break;
 841                         if (!writer_n_durations)
 842                                 continue;
 843                         wdpp = writer_durations[i];
 844                         if (!wdpp)
 845                                 continue;
 846                         for (j = 0; j < writer_n_durations[i]; j++) {
 847                                 wdp = &wdpp[j];
 848                                 pr_alert("%s%s %4d writer-duration: %5d %llu\n",
 849                                         scale_type, SCALE_FLAG,
 850                                         i, j, *wdp);
 851                                 if (j % 100 == 0)
 852                                         schedule_timeout_uninterruptible(1);
 853                         }
 854                         kfree(writer_durations[i]);
 855                 }
 856                 kfree(writer_tasks);
 857                 kfree(writer_durations);
 858                 kfree(writer_n_durations);
 859         }
 860
 861         /* Do torture-type-specific cleanup operations.  */
 862         if (cur_ops->cleanup != NULL)
 863                 cur_ops->cleanup();
 864
 865         torture_cleanup_end();
 866 }
 867
 868 /*
 869  * RCU scalability shutdown kthread.  Just waits to be awakened, then shuts
 870  * down system.
 871  */
 872 static int
 873 rcu_scale_shutdown(void *arg)
 874 {
 875         wait_event_idle(shutdown_wq, atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters);
 876         smp_mb(); /* Wake before output. */
 877         rcu_scale_cleanup();
 878         kernel_power_off();
 879         return -EINVAL;
 880 }
 881
 882 static int __init
 883 rcu_scale_init(void)
 884 {
 885         long i;
 886         int firsterr = 0;
 887         static struct rcu_scale_ops *scale_ops[] = {
 888                 &rcu_ops, &srcu_ops, &srcud_ops, TASKS_OPS TASKS_TRACING_OPS
 889         };
 890
 891         if (!torture_init_begin(scale_type, verbose))
 892                 return -EBUSY;
 893
 894         /* Process args and announce that the scalability'er is on the job. */
 895         for (i = 0; i < ARRAY_SIZE(scale_ops); i++) {
 896                 cur_ops = scale_ops[i];
 897                 if (strcmp(scale_type, cur_ops->name) == 0)
 898                         break;
 899         }
 900         if (i == ARRAY_SIZE(scale_ops)) {
 901                 pr_alert("rcu-scale: invalid scale type: \"%s\"\n", scale_type);
 902                 pr_alert("rcu-scale types:");
 903                 for (i = 0; i < ARRAY_SIZE(scale_ops); i++)
 904                         pr_cont(" %s", scale_ops[i]->name);
 905                 pr_cont("\n");
 906                 firsterr = -EINVAL;
 907                 cur_ops = NULL;
 908                 goto unwind;
 909         }
 910         if (cur_ops->init)
 911                 cur_ops->init();
 912
 913         if (kfree_rcu_test)
 914                 return kfree_scale_init();
 915
 916         nrealwriters = compute_real(nwriters);
 917         nrealreaders = compute_real(nreaders);
 918         atomic_set(&n_rcu_scale_reader_started, 0);
 919         atomic_set(&n_rcu_scale_writer_started, 0);
 920         atomic_set(&n_rcu_scale_writer_finished, 0);
 921         rcu_scale_print_module_parms(cur_ops, "Start of test");
 922
 923         /* Start up the kthreads. */
 924
 925         if (shutdown) {
 926                 init_waitqueue_head(&shutdown_wq);
 927                 firsterr = torture_create_kthread(rcu_scale_shutdown, NULL,
 928                                                   shutdown_task);
 929                 if (torture_init_error(firsterr))
 930                         goto unwind;
 931                 schedule_timeout_uninterruptible(1);
 932         }
 933         reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]),
 934                                GFP_KERNEL);
 935         if (reader_tasks == NULL) {
 936                 SCALEOUT_ERRSTRING("out of memory");
 937                 firsterr = -ENOMEM;
 938                 goto unwind;
 939         }
 940         for (i = 0; i < nrealreaders; i++) {
 941                 firsterr = torture_create_kthread(rcu_scale_reader, (void *)i,
 942                                                   reader_tasks[i]);
 943                 if (torture_init_error(firsterr))
 944                         goto unwind;
 945         }
 946         while (atomic_read(&n_rcu_scale_reader_started) < nrealreaders)
 947                 schedule_timeout_uninterruptible(1);
 948         writer_tasks = kcalloc(nrealwriters, sizeof(reader_tasks[0]),
 949                                GFP_KERNEL);
 950         writer_durations = kcalloc(nrealwriters, sizeof(*writer_durations),
 951                                    GFP_KERNEL);
 952         writer_n_durations =
 953                 kcalloc(nrealwriters, sizeof(*writer_n_durations),
 954                         GFP_KERNEL);
 955         if (!writer_tasks || !writer_durations || !writer_n_durations) {
 956                 SCALEOUT_ERRSTRING("out of memory");
 957                 firsterr = -ENOMEM;
 958                 goto unwind;
 959         }
 960         for (i = 0; i < nrealwriters; i++) {
 961                 writer_durations[i] =
 962                         kcalloc(MAX_MEAS, sizeof(*writer_durations[i]),
 963                                 GFP_KERNEL);
 964                 if (!writer_durations[i]) {
 965                         firsterr = -ENOMEM;
 966                         goto unwind;
 967                 }
 968                 firsterr = torture_create_kthread(rcu_scale_writer, (void *)i,
 969                                                   writer_tasks[i]);
 970                 if (torture_init_error(firsterr))
 971                         goto unwind;
 972         }
 973         torture_init_end();
 974         return 0;
 975
 976 unwind:
 977         torture_init_end();
 978         rcu_scale_cleanup();
 979         if (shutdown) {
 980                 WARN_ON(!IS_MODULE(CONFIG_RCU_SCALE_TEST));
 981                 kernel_power_off();
 982         }
 983         return firsterr;
 984 }
 985
 986 module_init(rcu_scale_init);
 987 module_exit(rcu_scale_cleanup);