1 /* SPDX-License-Identifier: GPL-2.0 */
4 #include <linux/limits.h>
11 #include <sys/types.h>
13 #include <sys/socket.h>
15 #include <arpa/inet.h>
16 #include <netinet/in.h>
21 #include "../kselftest.h"
22 #include "cgroup_util.h"
24 static bool has_localevents;
25 static bool has_recursiveprot;
28 * This test creates two nested cgroups with and without enabling
29 * the memory controller.
31 static int test_memcg_subtree_control(const char *root)
33 char *parent, *child, *parent2 = NULL, *child2 = NULL;
37 /* Create two nested cgroups with the memory controller enabled */
38 parent = cg_name(root, "memcg_test_0");
39 child = cg_name(root, "memcg_test_0/memcg_test_1");
40 if (!parent || !child)
43 if (cg_create(parent))
46 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
52 if (cg_read_strstr(child, "cgroup.controllers", "memory"))
55 /* Create two nested cgroups without enabling memory controller */
56 parent2 = cg_name(root, "memcg_test_1");
57 child2 = cg_name(root, "memcg_test_1/memcg_test_1");
58 if (!parent2 || !child2)
61 if (cg_create(parent2))
64 if (cg_create(child2))
67 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
70 if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
93 static int alloc_anon_50M_check(const char *cgroup, void *arg)
102 fprintf(stderr, "malloc() failed\n");
106 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
109 current = cg_read_long(cgroup, "memory.current");
113 if (!values_close(size, current, 3))
116 anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
120 if (!values_close(anon, current, 3))
129 static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
131 size_t size = MB(50);
140 if (alloc_pagecache(fd, size))
143 current = cg_read_long(cgroup, "memory.current");
147 file = cg_read_key_long(cgroup, "memory.stat", "file ");
151 if (!values_close(file, current, 10))
162 * This test create a memory cgroup, allocates
163 * some anonymous memory and some pagecache
164 * and checks memory.current, memory.peak, and some memory.stat values.
166 static int test_memcg_current_peak(const char *root)
169 long current, peak, peak_reset;
171 bool fd2_closed = false, fd3_closed = false, fd4_closed = false;
172 int peak_fd = -1, peak_fd2 = -1, peak_fd3 = -1, peak_fd4 = -1;
175 memcg = cg_name(root, "memcg_test");
179 if (cg_create(memcg))
182 current = cg_read_long(memcg, "memory.current");
186 peak = cg_read_long(memcg, "memory.peak");
190 if (cg_run(memcg, alloc_anon_50M_check, NULL))
193 peak = cg_read_long(memcg, "memory.peak");
198 * We'll open a few FDs for the same memory.peak file to exercise the free-path
199 * We need at least three to be closed in a different order than writes occurred to test
200 * the linked-list handling.
202 peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
211 * Before we try to use memory.peak's fd, try to figure out whether
212 * this kernel supports writing to that file in the first place. (by
213 * checking the writable bit on the file's st_mode)
215 if (fstat(peak_fd, &ss))
218 if ((ss.st_mode & S_IWUSR) == 0) {
223 peak_fd2 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
228 peak_fd3 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
233 /* any non-empty string resets, but make it clear */
234 static const char reset_string[] = "reset\n";
236 peak_reset = write(peak_fd, reset_string, sizeof(reset_string));
237 if (peak_reset != sizeof(reset_string))
240 peak_reset = write(peak_fd2, reset_string, sizeof(reset_string));
241 if (peak_reset != sizeof(reset_string))
244 peak_reset = write(peak_fd3, reset_string, sizeof(reset_string));
245 if (peak_reset != sizeof(reset_string))
248 /* Make sure a completely independent read isn't affected by our FD-local reset above*/
249 peak = cg_read_long(memcg, "memory.peak");
257 peak_fd4 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
262 peak_reset = write(peak_fd4, reset_string, sizeof(reset_string));
263 if (peak_reset != sizeof(reset_string))
266 peak = cg_read_long_fd(peak_fd);
267 if (peak > MB(30) || peak < 0)
270 if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
273 peak = cg_read_long(memcg, "memory.peak");
277 /* Make sure everything is back to normal */
278 peak = cg_read_long_fd(peak_fd);
282 peak = cg_read_long_fd(peak_fd4);
310 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
313 int ppid = getppid();
315 if (alloc_pagecache(fd, MB(50)))
318 while (getppid() == ppid)
324 static int alloc_anon_noexit(const char *cgroup, void *arg)
326 int ppid = getppid();
327 size_t size = (unsigned long)arg;
332 fprintf(stderr, "malloc() failed\n");
336 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
339 while (getppid() == ppid)
347 * Wait until processes are killed asynchronously by the OOM killer
348 * If we exceed a timeout, fail.
350 static int cg_test_proc_killed(const char *cgroup)
354 for (limit = 10; limit > 0; limit--) {
355 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
363 static bool reclaim_until(const char *memcg, long goal);
366 * First, this test creates the following hierarchy:
367 * A memory.min = 0, memory.max = 200M
368 * A/B memory.min = 50M
369 * A/B/C memory.min = 75M, memory.current = 50M
370 * A/B/D memory.min = 25M, memory.current = 50M
371 * A/B/E memory.min = 0, memory.current = 50M
372 * A/B/F memory.min = 500M, memory.current = 0
374 * (or memory.low if we test soft protection)
376 * Usages are pagecache and the test keeps a running
377 * process in every leaf cgroup.
378 * Then it creates A/G and creates a significant
379 * memory pressure in A.
381 * Then it checks actual memory usages and expects that:
382 * A/B memory.current ~= 50M
383 * A/B/C memory.current ~= 29M
384 * A/B/D memory.current ~= 21M
385 * A/B/E memory.current ~= 0
386 * A/B/F memory.current = 0
387 * (for origin of the numbers, see model in memcg_protection.m.)
389 * After that it tries to allocate more than there is
390 * unprotected memory in A available, and checks that:
391 * a) memory.min protects pagecache even in this case,
392 * b) memory.low allows reclaiming page cache with low events.
394 * Then we try to reclaim from A/B/C using memory.reclaim until its
396 * This makes sure that:
397 * (a) We ignore the protection of the reclaim target memcg.
398 * (b) The previously calculated emin value (~29M) should be dismissed.
400 static int test_memcg_protection(const char *root, bool min)
402 int ret = KSFT_FAIL, rc;
403 char *parent[3] = {NULL};
404 char *children[4] = {NULL};
405 const char *attribute = min ? "memory.min" : "memory.low";
415 parent[0] = cg_name(root, "memcg_test_0");
419 parent[1] = cg_name(parent[0], "memcg_test_1");
423 parent[2] = cg_name(parent[0], "memcg_test_2");
427 if (cg_create(parent[0]))
430 if (cg_read_long(parent[0], attribute)) {
431 /* No memory.min on older kernels is fine */
437 if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
440 if (cg_write(parent[0], "memory.max", "200M"))
443 if (cg_write(parent[0], "memory.swap.max", "0"))
446 if (cg_create(parent[1]))
449 if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
452 if (cg_create(parent[2]))
455 for (i = 0; i < ARRAY_SIZE(children); i++) {
456 children[i] = cg_name_indexed(parent[1], "child_memcg", i);
460 if (cg_create(children[i]))
466 cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
470 if (cg_write(parent[1], attribute, "50M"))
472 if (cg_write(children[0], attribute, "75M"))
474 if (cg_write(children[1], attribute, "25M"))
476 if (cg_write(children[2], attribute, "0"))
478 if (cg_write(children[3], attribute, "500M"))
482 while (!values_close(cg_read_long(parent[1], "memory.current"),
489 if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
492 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
495 for (i = 0; i < ARRAY_SIZE(children); i++)
496 c[i] = cg_read_long(children[i], "memory.current");
498 if (!values_close(c[0], MB(29), 10))
501 if (!values_close(c[1], MB(21), 10))
507 rc = cg_run(parent[2], alloc_anon, (void *)MB(170));
510 else if (!min && rc) {
512 "memory.low prevents from allocating anon memory\n");
516 current = min ? MB(50) : MB(30);
517 if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3))
520 if (!reclaim_until(children[0], MB(10)))
528 for (i = 0; i < ARRAY_SIZE(children); i++) {
529 int no_low_events_index = 1;
532 oom = cg_read_key_long(children[i], "memory.events", "oom ");
533 low = cg_read_key_long(children[i], "memory.events", "low ");
537 if (i <= no_low_events_index && low <= 0)
539 if (i > no_low_events_index && low)
547 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
551 cg_destroy(children[i]);
555 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
559 cg_destroy(parent[i]);
566 static int test_memcg_min(const char *root)
568 return test_memcg_protection(root, true);
571 static int test_memcg_low(const char *root)
573 return test_memcg_protection(root, false);
576 static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
578 size_t size = MB(50);
580 long current, high, max;
583 high = cg_read_long(cgroup, "memory.high");
584 max = cg_read_long(cgroup, "memory.max");
585 if (high != MB(30) && max != MB(30))
592 if (alloc_pagecache(fd, size))
595 current = cg_read_long(cgroup, "memory.current");
596 if (!values_close(current, MB(30), 5))
608 * This test checks that memory.high limits the amount of
609 * memory which can be consumed by either anonymous memory
612 static int test_memcg_high(const char *root)
618 memcg = cg_name(root, "memcg_test");
622 if (cg_create(memcg))
625 if (cg_read_strcmp(memcg, "memory.high", "max\n"))
628 if (cg_write(memcg, "memory.swap.max", "0"))
631 if (cg_write(memcg, "memory.high", "30M"))
634 if (cg_run(memcg, alloc_anon, (void *)MB(31)))
637 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
640 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
643 high = cg_read_key_long(memcg, "memory.events", "high ");
656 static int alloc_anon_mlock(const char *cgroup, void *arg)
658 size_t size = (size_t)arg;
661 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
663 if (buf == MAP_FAILED)
672 * This test checks that memory.high is able to throttle big single shot
673 * allocation i.e. large allocation within one kernel entry.
675 static int test_memcg_high_sync(const char *root)
677 int ret = KSFT_FAIL, pid, fd = -1;
679 long pre_high, pre_max;
680 long post_high, post_max;
682 memcg = cg_name(root, "memcg_test");
686 if (cg_create(memcg))
689 pre_high = cg_read_key_long(memcg, "memory.events", "high ");
690 pre_max = cg_read_key_long(memcg, "memory.events", "max ");
691 if (pre_high < 0 || pre_max < 0)
694 if (cg_write(memcg, "memory.swap.max", "0"))
697 if (cg_write(memcg, "memory.high", "30M"))
700 if (cg_write(memcg, "memory.max", "140M"))
703 fd = memcg_prepare_for_wait(memcg);
707 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
713 post_high = cg_read_key_long(memcg, "memory.events", "high ");
714 post_max = cg_read_key_long(memcg, "memory.events", "max ");
715 if (post_high < 0 || post_max < 0)
718 if (pre_high == post_high || pre_max != post_max)
733 * This test checks that memory.max limits the amount of
734 * memory which can be consumed by either anonymous memory
737 static int test_memcg_max(const char *root)
743 memcg = cg_name(root, "memcg_test");
747 if (cg_create(memcg))
750 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
753 if (cg_write(memcg, "memory.swap.max", "0"))
756 if (cg_write(memcg, "memory.max", "30M"))
759 /* Should be killed by OOM killer */
760 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
763 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
766 current = cg_read_long(memcg, "memory.current");
767 if (current > MB(30) || !current)
770 max = cg_read_key_long(memcg, "memory.events", "max ");
784 * Reclaim from @memcg until usage reaches @goal by writing to
787 * This function will return false if the usage is already below the
790 * This function assumes that writing to memory.reclaim is the only
791 * source of change in memory.current (no concurrent allocations or
794 * This function makes sure memory.reclaim is sane. It will return
795 * false if memory.reclaim's error codes do not make sense, even if
796 * the usage goal was satisfied.
798 static bool reclaim_until(const char *memcg, long goal)
802 long current, to_reclaim;
803 bool reclaimed = false;
805 for (retries = 5; retries > 0; retries--) {
806 current = cg_read_long(memcg, "memory.current");
808 if (current < goal || values_close(current, goal, 3))
810 /* Did memory.reclaim return 0 incorrectly? */
814 to_reclaim = current - goal;
815 snprintf(buf, sizeof(buf), "%ld", to_reclaim);
816 err = cg_write(memcg, "memory.reclaim", buf);
819 else if (err != -EAGAIN)
826 * This test checks that memory.reclaim reclaims the given
827 * amount of memory (from both anon and file, if possible).
829 static int test_memcg_reclaim(const char *root)
835 long current, expected_usage;
837 memcg = cg_name(root, "memcg_test");
841 if (cg_create(memcg))
844 current = cg_read_long(memcg, "memory.current");
852 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
855 * If swap is enabled, try to reclaim from both anon and file, else try
856 * to reclaim from file only.
858 if (is_swap_enabled()) {
859 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
860 expected_usage = MB(100);
862 expected_usage = MB(50);
865 * Wait until current usage reaches the expected usage (or we run out of
869 while (!values_close(cg_read_long(memcg, "memory.current"),
870 expected_usage, 10)) {
876 "failed to allocate %ld for memcg reclaim test\n",
883 * Reclaim until current reaches 30M, this makes sure we hit both anon
884 * and file if swap is enabled.
886 if (!reclaim_until(memcg, MB(30)))
898 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
900 long mem_max = (long)arg;
901 size_t size = MB(50);
903 long mem_current, swap_current;
908 fprintf(stderr, "malloc() failed\n");
912 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
915 mem_current = cg_read_long(cgroup, "memory.current");
916 if (!mem_current || !values_close(mem_current, mem_max, 3))
919 swap_current = cg_read_long(cgroup, "memory.swap.current");
921 !values_close(mem_current + swap_current, size, 3))
931 * This test checks that memory.swap.max limits the amount of
932 * anonymous memory which can be swapped out. Additionally, it verifies that
933 * memory.swap.peak reflects the high watermark and can be reset.
935 static int test_memcg_swap_max_peak(const char *root)
941 int swap_peak_fd = -1, mem_peak_fd = -1;
943 /* any non-empty string resets */
944 static const char reset_string[] = "foobarbaz";
946 if (!is_swap_enabled())
949 memcg = cg_name(root, "memcg_test");
953 if (cg_create(memcg))
956 if (cg_read_long(memcg, "memory.swap.current")) {
961 swap_peak_fd = cg_open(memcg, "memory.swap.peak",
962 O_RDWR | O_APPEND | O_CLOEXEC);
964 if (swap_peak_fd == -1) {
971 * Before we try to use memory.swap.peak's fd, try to figure out
972 * whether this kernel supports writing to that file in the first
973 * place. (by checking the writable bit on the file's st_mode)
975 if (fstat(swap_peak_fd, &ss))
978 if ((ss.st_mode & S_IWUSR) == 0) {
983 mem_peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
985 if (mem_peak_fd == -1)
988 if (cg_read_long(memcg, "memory.swap.peak"))
991 if (cg_read_long_fd(swap_peak_fd))
994 /* switch the swap and mem fds into local-peak tracking mode*/
995 int peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string));
997 if (peak_reset != sizeof(reset_string))
1000 if (cg_read_long_fd(swap_peak_fd))
1003 if (cg_read_long(memcg, "memory.peak"))
1006 if (cg_read_long_fd(mem_peak_fd))
1009 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string));
1010 if (peak_reset != sizeof(reset_string))
1013 if (cg_read_long_fd(mem_peak_fd))
1016 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
1019 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
1022 if (cg_write(memcg, "memory.swap.max", "30M"))
1025 if (cg_write(memcg, "memory.max", "30M"))
1028 /* Should be killed by OOM killer */
1029 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1032 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
1035 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
1038 peak = cg_read_long(memcg, "memory.peak");
1042 peak = cg_read_long(memcg, "memory.swap.peak");
1046 peak = cg_read_long_fd(mem_peak_fd);
1050 peak = cg_read_long_fd(swap_peak_fd);
1055 * open, reset and close the peak swap on another FD to make sure
1056 * multiple extant fds don't corrupt the linked-list
1058 peak_reset = cg_write(memcg, "memory.swap.peak", (char *)reset_string);
1062 peak_reset = cg_write(memcg, "memory.peak", (char *)reset_string);
1066 /* actually reset on the fds */
1067 peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string));
1068 if (peak_reset != sizeof(reset_string))
1071 peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string));
1072 if (peak_reset != sizeof(reset_string))
1075 peak = cg_read_long_fd(swap_peak_fd);
1080 * The cgroup is now empty, but there may be a page or two associated
1081 * with the open FD accounted to it.
1083 peak = cg_read_long_fd(mem_peak_fd);
1087 if (cg_read_long(memcg, "memory.peak") < MB(29))
1090 if (cg_read_long(memcg, "memory.swap.peak") < MB(29))
1093 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
1096 max = cg_read_key_long(memcg, "memory.events", "max ");
1100 peak = cg_read_long(memcg, "memory.peak");
1104 peak = cg_read_long(memcg, "memory.swap.peak");
1108 peak = cg_read_long_fd(mem_peak_fd);
1112 peak = cg_read_long_fd(swap_peak_fd);
1119 if (mem_peak_fd != -1 && close(mem_peak_fd))
1121 if (swap_peak_fd != -1 && close(swap_peak_fd))
1130 * This test disables swapping and tries to allocate anonymous memory
1131 * up to OOM. Then it checks for oom and oom_kill events in
1134 static int test_memcg_oom_events(const char *root)
1136 int ret = KSFT_FAIL;
1139 memcg = cg_name(root, "memcg_test");
1143 if (cg_create(memcg))
1146 if (cg_write(memcg, "memory.max", "30M"))
1149 if (cg_write(memcg, "memory.swap.max", "0"))
1152 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1155 if (cg_read_strcmp(memcg, "cgroup.procs", ""))
1158 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
1161 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
1173 struct tcp_server_args {
1174 unsigned short port;
1178 static int tcp_server(const char *cgroup, void *arg)
1180 struct tcp_server_args *srv_args = arg;
1181 struct sockaddr_in6 saddr = { 0 };
1182 socklen_t slen = sizeof(saddr);
1183 int sk, client_sk, ctl_fd, yes = 1, ret = -1;
1185 close(srv_args->ctl[0]);
1186 ctl_fd = srv_args->ctl[1];
1188 saddr.sin6_family = AF_INET6;
1189 saddr.sin6_addr = in6addr_any;
1190 saddr.sin6_port = htons(srv_args->port);
1192 sk = socket(AF_INET6, SOCK_STREAM, 0);
1196 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
1199 if (bind(sk, (struct sockaddr *)&saddr, slen)) {
1200 write(ctl_fd, &errno, sizeof(errno));
1208 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
1213 client_sk = accept(sk, NULL, NULL);
1219 uint8_t buf[0x100000];
1221 if (write(client_sk, buf, sizeof(buf)) <= 0) {
1222 if (errno == ECONNRESET)
1235 static int tcp_client(const char *cgroup, unsigned short port)
1237 const char server[] = "localhost";
1238 struct addrinfo *ai;
1240 int retries = 0x10; /* nice round number */
1244 allocated = cg_read_long(cgroup, "memory.current");
1245 snprintf(servport, sizeof(servport), "%hd", port);
1246 ret = getaddrinfo(server, servport, NULL, &ai);
1250 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
1254 ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
1260 uint8_t buf[0x100000];
1263 if (read(sk, buf, sizeof(buf)) <= 0)
1266 current = cg_read_long(cgroup, "memory.current");
1267 sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
1269 if (current < 0 || sock < 0)
1272 /* exclude the memory not related to socket connection */
1273 if (values_close(current - allocated, sock, 10)) {
1287 * This test checks socket memory accounting.
1288 * The test forks a TCP server listens on a random port between 1000
1289 * and 61000. Once it gets a client connection, it starts writing to
1291 * The TCP client interleaves reads from the socket with check whether
1292 * memory.current and memory.stat.sock are similar.
1294 static int test_memcg_sock(const char *root)
1296 int bind_retries = 5, ret = KSFT_FAIL, pid, err;
1297 unsigned short port;
1300 memcg = cg_name(root, "memcg_test");
1304 if (cg_create(memcg))
1307 while (bind_retries--) {
1308 struct tcp_server_args args;
1313 port = args.port = 1000 + rand() % 60000;
1315 pid = cg_run_nowait(memcg, tcp_server, &args);
1320 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
1326 if (err != EADDRINUSE)
1329 waitpid(pid, NULL, 0);
1332 if (err == EADDRINUSE) {
1337 if (tcp_client(memcg, port) != KSFT_PASS)
1340 waitpid(pid, &err, 0);
1341 if (WEXITSTATUS(err))
1344 if (cg_read_long(memcg, "memory.current") < 0)
1347 if (cg_read_key_long(memcg, "memory.stat", "sock "))
1360 * This test disables swapping and tries to allocate anonymous memory
1361 * up to OOM with memory.group.oom set. Then it checks that all
1362 * processes in the leaf were killed. It also checks that oom_events
1363 * were propagated to the parent level.
1365 static int test_memcg_oom_group_leaf_events(const char *root)
1367 int ret = KSFT_FAIL;
1368 char *parent, *child;
1369 long parent_oom_events;
1371 parent = cg_name(root, "memcg_test_0");
1372 child = cg_name(root, "memcg_test_0/memcg_test_1");
1374 if (!parent || !child)
1377 if (cg_create(parent))
1380 if (cg_create(child))
1383 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
1386 if (cg_write(child, "memory.max", "50M"))
1389 if (cg_write(child, "memory.swap.max", "0"))
1392 if (cg_write(child, "memory.oom.group", "1"))
1395 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1396 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1397 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1398 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1401 if (cg_test_proc_killed(child))
1404 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
1407 parent_oom_events = cg_read_key_long(
1408 parent, "memory.events", "oom_kill ");
1410 * If memory_localevents is not enabled (the default), the parent should
1411 * count OOM events in its children groups. Otherwise, it should not
1412 * have observed any events.
1414 if (has_localevents && parent_oom_events != 0)
1416 else if (!has_localevents && parent_oom_events <= 0)
1433 * This test disables swapping and tries to allocate anonymous memory
1434 * up to OOM with memory.group.oom set. Then it checks that all
1435 * processes in the parent and leaf were killed.
1437 static int test_memcg_oom_group_parent_events(const char *root)
1439 int ret = KSFT_FAIL;
1440 char *parent, *child;
1442 parent = cg_name(root, "memcg_test_0");
1443 child = cg_name(root, "memcg_test_0/memcg_test_1");
1445 if (!parent || !child)
1448 if (cg_create(parent))
1451 if (cg_create(child))
1454 if (cg_write(parent, "memory.max", "80M"))
1457 if (cg_write(parent, "memory.swap.max", "0"))
1460 if (cg_write(parent, "memory.oom.group", "1"))
1463 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1464 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1465 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1467 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1470 if (cg_test_proc_killed(child))
1472 if (cg_test_proc_killed(parent))
1489 * This test disables swapping and tries to allocate anonymous memory
1490 * up to OOM with memory.group.oom set. Then it checks that all
1491 * processes were killed except those set with OOM_SCORE_ADJ_MIN
1493 static int test_memcg_oom_group_score_events(const char *root)
1495 int ret = KSFT_FAIL;
1499 memcg = cg_name(root, "memcg_test_0");
1504 if (cg_create(memcg))
1507 if (cg_write(memcg, "memory.max", "50M"))
1510 if (cg_write(memcg, "memory.swap.max", "0"))
1513 if (cg_write(memcg, "memory.oom.group", "1"))
1516 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1517 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
1520 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1521 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1524 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
1527 if (kill(safe_pid, SIGKILL))
1540 #define T(x) { x, #x }
1542 int (*fn)(const char *root);
1545 T(test_memcg_subtree_control),
1546 T(test_memcg_current_peak),
1550 T(test_memcg_high_sync),
1552 T(test_memcg_reclaim),
1553 T(test_memcg_oom_events),
1554 T(test_memcg_swap_max_peak),
1556 T(test_memcg_oom_group_leaf_events),
1557 T(test_memcg_oom_group_parent_events),
1558 T(test_memcg_oom_group_score_events),
1562 int main(int argc, char **argv)
1564 char root[PATH_MAX];
1565 int i, proc_status, ret = EXIT_SUCCESS;
1567 if (cg_find_unified_root(root, sizeof(root), NULL))
1568 ksft_exit_skip("cgroup v2 isn't mounted\n");
1571 * Check that memory controller is available:
1572 * memory is listed in cgroup.controllers
1574 if (cg_read_strstr(root, "cgroup.controllers", "memory"))
1575 ksft_exit_skip("memory controller isn't available\n");
1577 if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
1578 if (cg_write(root, "cgroup.subtree_control", "+memory"))
1579 ksft_exit_skip("Failed to set memory controller\n");
1581 proc_status = proc_mount_contains("memory_recursiveprot");
1582 if (proc_status < 0)
1583 ksft_exit_skip("Failed to query cgroup mount option\n");
1584 has_recursiveprot = proc_status;
1586 proc_status = proc_mount_contains("memory_localevents");
1587 if (proc_status < 0)
1588 ksft_exit_skip("Failed to query cgroup mount option\n");
1589 has_localevents = proc_status;
1591 for (i = 0; i < ARRAY_SIZE(tests); i++) {
1592 switch (tests[i].fn(root)) {
1594 ksft_test_result_pass("%s\n", tests[i].name);
1597 ksft_test_result_skip("%s\n", tests[i].name);
1601 ksft_test_result_fail("%s\n", tests[i].name);