1 // SPDX-License-Identifier: GPL-2.0
4 #include <linux/limits.h>
5 #include <sys/sysinfo.h>
13 #include "../kselftest.h"
14 #include "cgroup_util.h"
17 // Count elapsed time using the CLOCK_PROCESS_CPUTIME_ID clock.
18 CPU_HOG_CLOCK_PROCESS,
19 // Count elapsed time using system wallclock time.
29 struct cpu_hog_func_param {
32 enum hog_clock_type clock_type;
36 * This test creates two nested cgroups with and without enabling
39 static int test_cpucg_subtree_control(const char *root)
41 char *parent = NULL, *child = NULL, *parent2 = NULL, *child2 = NULL;
44 // Create two nested cgroups with the cpu controller enabled.
45 parent = cg_name(root, "cpucg_test_0");
49 if (cg_create(parent))
52 if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
55 child = cg_name(parent, "cpucg_test_child");
62 if (cg_read_strstr(child, "cgroup.controllers", "cpu"))
65 // Create two nested cgroups without enabling the cpu controller.
66 parent2 = cg_name(root, "cpucg_test_1");
70 if (cg_create(parent2))
73 child2 = cg_name(parent2, "cpucg_test_child");
77 if (cg_create(child2))
80 if (!cg_read_strstr(child2, "cgroup.controllers", "cpu"))
98 static void *hog_cpu_thread_func(void *arg)
106 static struct timespec
107 timespec_sub(const struct timespec *lhs, const struct timespec *rhs)
109 struct timespec zero = {
115 if (lhs->tv_sec < rhs->tv_sec)
118 ret.tv_sec = lhs->tv_sec - rhs->tv_sec;
120 if (lhs->tv_nsec < rhs->tv_nsec) {
125 ret.tv_nsec = NSEC_PER_SEC - rhs->tv_nsec + lhs->tv_nsec;
127 ret.tv_nsec = lhs->tv_nsec - rhs->tv_nsec;
132 static int hog_cpus_timed(const char *cgroup, void *arg)
134 const struct cpu_hog_func_param *param =
135 (struct cpu_hog_func_param *)arg;
136 struct timespec ts_run = param->ts;
137 struct timespec ts_remaining = ts_run;
138 struct timespec ts_start;
141 ret = clock_gettime(CLOCK_MONOTONIC, &ts_start);
145 for (i = 0; i < param->nprocs; i++) {
148 ret = pthread_create(&tid, NULL, &hog_cpu_thread_func, NULL);
153 while (ts_remaining.tv_sec > 0 || ts_remaining.tv_nsec > 0) {
154 struct timespec ts_total;
156 ret = nanosleep(&ts_remaining, NULL);
157 if (ret && errno != EINTR)
160 if (param->clock_type == CPU_HOG_CLOCK_PROCESS) {
161 ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total);
165 struct timespec ts_current;
167 ret = clock_gettime(CLOCK_MONOTONIC, &ts_current);
171 ts_total = timespec_sub(&ts_current, &ts_start);
174 ts_remaining = timespec_sub(&ts_run, &ts_total);
181 * Creates a cpu cgroup, burns a CPU for a few quanta, and verifies that
182 * cpu.stat shows the expected output.
184 static int test_cpucg_stats(const char *root)
187 long usage_usec, user_usec, system_usec;
188 long usage_seconds = 2;
189 long expected_usage_usec = usage_seconds * USEC_PER_SEC;
192 cpucg = cg_name(root, "cpucg_test");
196 if (cg_create(cpucg))
199 usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
200 user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
201 system_usec = cg_read_key_long(cpucg, "cpu.stat", "system_usec");
202 if (usage_usec != 0 || user_usec != 0 || system_usec != 0)
205 struct cpu_hog_func_param param = {
208 .tv_sec = usage_seconds,
211 .clock_type = CPU_HOG_CLOCK_PROCESS,
213 if (cg_run(cpucg, hog_cpus_timed, (void *)¶m))
216 usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
217 user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
221 if (!values_close(usage_usec, expected_usage_usec, 1))
234 * Creates a nice process that consumes CPU and checks that the elapsed
235 * usertime in the cgroup is close to the expected time.
237 static int test_cpucg_nice(const char *root)
241 long user_usec, nice_usec;
242 long usage_seconds = 2;
243 long expected_nice_usec = usage_seconds * USEC_PER_SEC;
247 cpucg = cg_name(root, "cpucg_test");
251 if (cg_create(cpucg))
254 user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
255 nice_usec = cg_read_key_long(cpucg, "cpu.stat", "nice_usec");
258 if (user_usec != 0 || nice_usec != 0)
262 * We fork here to create a new process that can be niced without
263 * polluting the nice value of other selftests
268 } else if (pid == 0) {
269 struct cpu_hog_func_param param = {
272 .tv_sec = usage_seconds,
275 .clock_type = CPU_HOG_CLOCK_PROCESS,
278 snprintf(buf, sizeof(buf), "%d", getpid());
279 if (cg_write(cpucg, "cgroup.procs", buf))
282 /* Try to keep niced CPU usage as constrained to hog_cpu as possible */
284 hog_cpus_timed(cpucg, ¶m);
287 waitpid(pid, &status, 0);
288 if (!WIFEXITED(status))
291 user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
292 nice_usec = cg_read_key_long(cpucg, "cpu.stat", "nice_usec");
293 if (!values_close(nice_usec, expected_nice_usec, 1))
307 run_cpucg_weight_test(
309 pid_t (*spawn_child)(const struct cpu_hogger *child),
310 int (*validate)(const struct cpu_hogger *children, int num_children))
312 int ret = KSFT_FAIL, i;
314 struct cpu_hogger children[3] = {};
316 parent = cg_name(root, "cpucg_test_0");
320 if (cg_create(parent))
323 if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
326 for (i = 0; i < ARRAY_SIZE(children); i++) {
327 children[i].cgroup = cg_name_indexed(parent, "cpucg_child", i);
328 if (!children[i].cgroup)
331 if (cg_create(children[i].cgroup))
334 if (cg_write_numeric(children[i].cgroup, "cpu.weight",
339 for (i = 0; i < ARRAY_SIZE(children); i++) {
340 pid_t pid = spawn_child(&children[i]);
343 children[i].pid = pid;
346 for (i = 0; i < ARRAY_SIZE(children); i++) {
349 waitpid(children[i].pid, &retcode, 0);
350 if (!WIFEXITED(retcode))
352 if (WEXITSTATUS(retcode))
356 for (i = 0; i < ARRAY_SIZE(children); i++)
357 children[i].usage = cg_read_key_long(children[i].cgroup,
358 "cpu.stat", "usage_usec");
360 if (validate(children, ARRAY_SIZE(children)))
365 for (i = 0; i < ARRAY_SIZE(children); i++) {
366 cg_destroy(children[i].cgroup);
367 free(children[i].cgroup);
375 static pid_t weight_hog_ncpus(const struct cpu_hogger *child, int ncpus)
377 long usage_seconds = 10;
378 struct cpu_hog_func_param param = {
381 .tv_sec = usage_seconds,
384 .clock_type = CPU_HOG_CLOCK_WALL,
386 return cg_run_nowait(child->cgroup, hog_cpus_timed, (void *)¶m);
389 static pid_t weight_hog_all_cpus(const struct cpu_hogger *child)
391 return weight_hog_ncpus(child, get_nprocs());
395 overprovision_validate(const struct cpu_hogger *children, int num_children)
397 int ret = KSFT_FAIL, i;
399 for (i = 0; i < num_children - 1; i++) {
402 if (children[i + 1].usage <= children[i].usage)
405 delta = children[i + 1].usage - children[i].usage;
406 if (!values_close(delta, children[0].usage, 35))
416 * First, this test creates the following hierarchy:
418 * A/B cpu.weight = 50
419 * A/C cpu.weight = 100
420 * A/D cpu.weight = 150
422 * A separate process is then created for each child cgroup which spawns as
423 * many threads as there are cores, and hogs each CPU as much as possible
424 * for some time interval.
426 * Once all of the children have exited, we verify that each child cgroup
427 * was given proportional runtime as informed by their cpu.weight.
429 static int test_cpucg_weight_overprovisioned(const char *root)
431 return run_cpucg_weight_test(root, weight_hog_all_cpus,
432 overprovision_validate);
435 static pid_t weight_hog_one_cpu(const struct cpu_hogger *child)
437 return weight_hog_ncpus(child, 1);
441 underprovision_validate(const struct cpu_hogger *children, int num_children)
443 int ret = KSFT_FAIL, i;
445 for (i = 0; i < num_children - 1; i++) {
446 if (!values_close(children[i + 1].usage, children[0].usage, 15))
456 * First, this test creates the following hierarchy:
458 * A/B cpu.weight = 50
459 * A/C cpu.weight = 100
460 * A/D cpu.weight = 150
462 * A separate process is then created for each child cgroup which spawns a
463 * single thread that hogs a CPU. The testcase is only run on systems that
464 * have at least one core per-thread in the child processes.
466 * Once all of the children have exited, we verify that each child cgroup
467 * had roughly the same runtime despite having different cpu.weight.
469 static int test_cpucg_weight_underprovisioned(const char *root)
471 // Only run the test if there are enough cores to avoid overprovisioning
473 if (get_nprocs() < 4)
476 return run_cpucg_weight_test(root, weight_hog_one_cpu,
477 underprovision_validate);
481 run_cpucg_nested_weight_test(const char *root, bool overprovisioned)
483 int ret = KSFT_FAIL, i;
484 char *parent = NULL, *child = NULL;
485 struct cpu_hogger leaf[3] = {};
486 long nested_leaf_usage, child_usage;
487 int nprocs = get_nprocs();
489 if (!overprovisioned) {
492 * Only run the test if there are enough cores to avoid overprovisioning
499 parent = cg_name(root, "cpucg_test");
500 child = cg_name(parent, "cpucg_child");
501 if (!parent || !child)
504 if (cg_create(parent))
506 if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
509 if (cg_create(child))
511 if (cg_write(child, "cgroup.subtree_control", "+cpu"))
513 if (cg_write(child, "cpu.weight", "1000"))
516 for (i = 0; i < ARRAY_SIZE(leaf); i++) {
517 const char *ancestor;
527 leaf[i].cgroup = cg_name_indexed(ancestor, "cpucg_leaf", i);
531 if (cg_create(leaf[i].cgroup))
534 if (cg_write_numeric(leaf[i].cgroup, "cpu.weight", weight))
538 for (i = 0; i < ARRAY_SIZE(leaf); i++) {
540 struct cpu_hog_func_param param = {
546 .clock_type = CPU_HOG_CLOCK_WALL,
549 pid = cg_run_nowait(leaf[i].cgroup, hog_cpus_timed,
556 for (i = 0; i < ARRAY_SIZE(leaf); i++) {
559 waitpid(leaf[i].pid, &retcode, 0);
560 if (!WIFEXITED(retcode))
562 if (WEXITSTATUS(retcode))
566 for (i = 0; i < ARRAY_SIZE(leaf); i++) {
567 leaf[i].usage = cg_read_key_long(leaf[i].cgroup,
568 "cpu.stat", "usage_usec");
569 if (leaf[i].usage <= 0)
573 nested_leaf_usage = leaf[1].usage + leaf[2].usage;
574 if (overprovisioned) {
575 if (!values_close(leaf[0].usage, nested_leaf_usage, 15))
577 } else if (!values_close(leaf[0].usage * 2, nested_leaf_usage, 15))
581 child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec");
582 if (child_usage <= 0)
584 if (!values_close(child_usage, nested_leaf_usage, 1))
589 for (i = 0; i < ARRAY_SIZE(leaf); i++) {
590 cg_destroy(leaf[i].cgroup);
591 free(leaf[i].cgroup);
602 * First, this test creates the following hierarchy:
604 * A/B cpu.weight = 1000
605 * A/C cpu.weight = 1000
606 * A/C/D cpu.weight = 5000
607 * A/C/E cpu.weight = 5000
609 * A separate process is then created for each leaf, which spawn nproc threads
610 * that burn a CPU for a few seconds.
612 * Once all of those processes have exited, we verify that each of the leaf
613 * cgroups have roughly the same usage from cpu.stat.
616 test_cpucg_nested_weight_overprovisioned(const char *root)
618 return run_cpucg_nested_weight_test(root, true);
622 * First, this test creates the following hierarchy:
624 * A/B cpu.weight = 1000
625 * A/C cpu.weight = 1000
626 * A/C/D cpu.weight = 5000
627 * A/C/E cpu.weight = 5000
629 * A separate process is then created for each leaf, which nproc / 4 threads
630 * that burns a CPU for a few seconds.
632 * Once all of those processes have exited, we verify that each of the leaf
633 * cgroups have roughly the same usage from cpu.stat.
636 test_cpucg_nested_weight_underprovisioned(const char *root)
638 return run_cpucg_nested_weight_test(root, false);
642 * This test creates a cgroup with some maximum value within a period, and
643 * verifies that a process in the cgroup is not overscheduled.
645 static int test_cpucg_max(const char *root)
648 long usage_usec, user_usec;
649 long usage_seconds = 1;
650 long expected_usage_usec = usage_seconds * USEC_PER_SEC;
653 cpucg = cg_name(root, "cpucg_test");
657 if (cg_create(cpucg))
660 if (cg_write(cpucg, "cpu.max", "1000"))
663 struct cpu_hog_func_param param = {
666 .tv_sec = usage_seconds,
669 .clock_type = CPU_HOG_CLOCK_WALL,
671 if (cg_run(cpucg, hog_cpus_timed, (void *)¶m))
674 usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
675 user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
679 if (user_usec >= expected_usage_usec)
682 if (values_close(usage_usec, expected_usage_usec, 95))
695 * This test verifies that a process inside of a nested cgroup whose parent
696 * group has a cpu.max value set, is properly throttled.
698 static int test_cpucg_max_nested(const char *root)
701 long usage_usec, user_usec;
702 long usage_seconds = 1;
703 long expected_usage_usec = usage_seconds * USEC_PER_SEC;
704 char *parent, *child;
706 parent = cg_name(root, "cpucg_parent");
707 child = cg_name(parent, "cpucg_child");
708 if (!parent || !child)
711 if (cg_create(parent))
714 if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
717 if (cg_create(child))
720 if (cg_write(parent, "cpu.max", "1000"))
723 struct cpu_hog_func_param param = {
726 .tv_sec = usage_seconds,
729 .clock_type = CPU_HOG_CLOCK_WALL,
731 if (cg_run(child, hog_cpus_timed, (void *)¶m))
734 usage_usec = cg_read_key_long(child, "cpu.stat", "usage_usec");
735 user_usec = cg_read_key_long(child, "cpu.stat", "user_usec");
739 if (user_usec >= expected_usage_usec)
742 if (values_close(usage_usec, expected_usage_usec, 95))
756 #define T(x) { x, #x }
758 int (*fn)(const char *root);
761 T(test_cpucg_subtree_control),
764 T(test_cpucg_weight_overprovisioned),
765 T(test_cpucg_weight_underprovisioned),
766 T(test_cpucg_nested_weight_overprovisioned),
767 T(test_cpucg_nested_weight_underprovisioned),
769 T(test_cpucg_max_nested),
773 int main(int argc, char *argv[])
776 int i, ret = EXIT_SUCCESS;
778 if (cg_find_unified_root(root, sizeof(root), NULL))
779 ksft_exit_skip("cgroup v2 isn't mounted\n");
781 if (cg_read_strstr(root, "cgroup.subtree_control", "cpu"))
782 if (cg_write(root, "cgroup.subtree_control", "+cpu"))
783 ksft_exit_skip("Failed to set cpu controller\n");
785 for (i = 0; i < ARRAY_SIZE(tests); i++) {
786 switch (tests[i].fn(root)) {
788 ksft_test_result_pass("%s\n", tests[i].name);
791 ksft_test_result_skip("%s\n", tests[i].name);
795 ksft_test_result_fail("%s\n", tests[i].name);