1 /* SPDX-License-Identifier: GPL-2.0 */
7 #include <linux/limits.h>
13 #include <sys/inotify.h>
15 #include <sys/types.h>
19 #include "cgroup_util.h"
20 #include "../clone3/clone3_selftests.h"
22 /* Returns read len on success, or -errno on failure. */
23 static ssize_t read_text(const char *path, char *buf, size_t max_len)
28 fd = open(path, O_RDONLY);
32 len = read(fd, buf, max_len - 1);
38 return len < 0 ? -errno : len;
41 /* Returns written len on success, or -errno on failure. */
42 static ssize_t write_text(const char *path, char *buf, ssize_t len)
46 fd = open(path, O_WRONLY | O_APPEND);
50 len = write(fd, buf, len);
52 return len < 0 ? -errno : len;
55 char *cg_name(const char *root, const char *name)
57 size_t len = strlen(root) + strlen(name) + 2;
58 char *ret = malloc(len);
60 snprintf(ret, len, "%s/%s", root, name);
65 char *cg_name_indexed(const char *root, const char *name, int index)
67 size_t len = strlen(root) + strlen(name) + 10;
68 char *ret = malloc(len);
70 snprintf(ret, len, "%s/%s_%d", root, name, index);
75 char *cg_control(const char *cgroup, const char *control)
77 size_t len = strlen(cgroup) + strlen(control) + 2;
78 char *ret = malloc(len);
80 snprintf(ret, len, "%s/%s", cgroup, control);
85 /* Returns 0 on success, or -errno on failure. */
86 int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
91 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
93 ret = read_text(path, buf, len);
94 return ret >= 0 ? 0 : ret;
97 int cg_read_strcmp(const char *cgroup, const char *control,
104 /* Handle the case of comparing against empty string */
108 size = strlen(expected) + 1;
114 if (cg_read(cgroup, control, buf, size)) {
119 ret = strcmp(expected, buf);
124 int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
128 if (cg_read(cgroup, control, buf, sizeof(buf)))
131 return strstr(buf, needle) ? 0 : -1;
134 long cg_read_long(const char *cgroup, const char *control)
138 if (cg_read(cgroup, control, buf, sizeof(buf)))
144 long cg_read_long_fd(int fd)
148 if (pread(fd, buf, sizeof(buf), 0) <= 0)
154 long cg_read_key_long(const char *cgroup, const char *control, const char *key)
159 if (cg_read(cgroup, control, buf, sizeof(buf)))
162 ptr = strstr(buf, key);
166 return atol(ptr + strlen(key));
169 long cg_read_lc(const char *cgroup, const char *control)
172 const char delim[] = "\n";
176 if (cg_read(cgroup, control, buf, sizeof(buf)))
179 for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
185 /* Returns 0 on success, or -errno on failure. */
186 int cg_write(const char *cgroup, const char *control, char *buf)
189 ssize_t len = strlen(buf), ret;
191 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
192 ret = write_text(path, buf, len);
193 return ret == len ? 0 : ret;
197 * Returns fd on success, or -1 on failure.
198 * (fd should be closed with close() as usual)
200 int cg_open(const char *cgroup, const char *control, int flags)
204 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
205 return open(path, flags);
208 int cg_write_numeric(const char *cgroup, const char *control, long value)
213 ret = sprintf(buf, "%lu", value);
217 return cg_write(cgroup, control, buf);
220 int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
222 char buf[10 * PAGE_SIZE];
223 char *fs, *mount, *type, *options;
224 const char delim[] = "\n\t ";
226 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
231 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
233 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
234 mount = strtok(NULL, delim);
235 type = strtok(NULL, delim);
236 options = strtok(NULL, delim);
240 if (strcmp(type, "cgroup2") == 0) {
241 strncpy(root, mount, len);
243 *nsdelegate = !!strstr(options, "nsdelegate");
251 int cg_create(const char *cgroup)
253 return mkdir(cgroup, 0755);
256 int cg_wait_for_proc_count(const char *cgroup, int count)
258 char buf[10 * PAGE_SIZE] = {0};
262 for (attempts = 10; attempts >= 0; attempts--) {
265 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
268 for (ptr = buf; *ptr; ptr++)
281 int cg_killall(const char *cgroup)
286 /* If cgroup.kill exists use it. */
287 if (!cg_write(cgroup, "cgroup.kill", "1"))
290 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
293 while (ptr < buf + sizeof(buf)) {
294 int pid = strtol(ptr, &ptr, 10);
302 if (kill(pid, SIGKILL))
309 int cg_destroy(const char *cgroup)
317 if (ret && errno == EBUSY) {
323 if (ret && errno == ENOENT)
329 int cg_enter(const char *cgroup, int pid)
333 snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
334 return cg_write(cgroup, "cgroup.procs", pidbuf);
337 int cg_enter_current(const char *cgroup)
339 return cg_write(cgroup, "cgroup.procs", "0");
342 int cg_enter_current_thread(const char *cgroup)
344 return cg_write(cgroup, "cgroup.threads", "0");
347 int cg_run(const char *cgroup,
348 int (*fn)(const char *cgroup, void *arg),
356 } else if (pid == 0) {
359 snprintf(buf, sizeof(buf), "%d", getpid());
360 if (cg_write(cgroup, "cgroup.procs", buf))
362 exit(fn(cgroup, arg));
364 waitpid(pid, &retcode, 0);
365 if (WIFEXITED(retcode))
366 return WEXITSTATUS(retcode);
372 pid_t clone_into_cgroup(int cgroup_fd)
374 #ifdef CLONE_ARGS_SIZE_VER2
377 struct __clone_args args = {
378 .flags = CLONE_INTO_CGROUP,
379 .exit_signal = SIGCHLD,
383 pid = sys_clone3(&args, sizeof(struct __clone_args));
385 * Verify that this is a genuine test failure:
386 * ENOSYS -> clone3() not available
387 * E2BIG -> CLONE_INTO_CGROUP not available
389 if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
400 int clone_reap(pid_t pid, int options)
408 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
415 if (options & WEXITED) {
416 if (WIFEXITED(info.si_status))
417 return WEXITSTATUS(info.si_status);
420 if (options & WSTOPPED) {
421 if (WIFSTOPPED(info.si_status))
422 return WSTOPSIG(info.si_status);
425 if (options & WCONTINUED) {
426 if (WIFCONTINUED(info.si_status))
433 int dirfd_open_opath(const char *dir)
435 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
438 #define close_prot_errno(fd) \
445 static int clone_into_cgroup_run_nowait(const char *cgroup,
446 int (*fn)(const char *cgroup, void *arg),
452 cgroup_fd = dirfd_open_opath(cgroup);
456 pid = clone_into_cgroup(cgroup_fd);
457 close_prot_errno(cgroup_fd);
459 exit(fn(cgroup, arg));
464 int cg_run_nowait(const char *cgroup,
465 int (*fn)(const char *cgroup, void *arg),
470 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
474 /* Genuine test failure. */
475 if (pid < 0 && errno != ENOSYS)
482 snprintf(buf, sizeof(buf), "%d", getpid());
483 if (cg_write(cgroup, "cgroup.procs", buf))
485 exit(fn(cgroup, arg));
491 int get_temp_fd(void)
493 return open(".", O_TMPFILE | O_RDWR | O_EXCL);
496 int alloc_pagecache(int fd, size_t size)
507 if (ftruncate(fd, size))
510 for (i = 0; i < size; i += sizeof(buf))
511 read(fd, buf, sizeof(buf));
519 int alloc_anon(const char *cgroup, void *arg)
521 size_t size = (unsigned long)arg;
525 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
532 int is_swap_enabled(void)
535 const char delim[] = "\n";
539 if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
542 for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
548 int set_oom_adj_score(int pid, int score)
553 sprintf(path, "/proc/%d/oom_score_adj", pid);
555 fd = open(path, O_WRONLY | O_APPEND);
559 len = dprintf(fd, "%d", score);
569 int proc_mount_contains(const char *option)
571 char buf[4 * PAGE_SIZE];
574 read = read_text("/proc/mounts", buf, sizeof(buf));
578 return strstr(buf, option) != NULL;
581 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
587 snprintf(path, sizeof(path), "/proc/%s/%s",
588 thread ? "thread-self" : "self", item);
590 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
592 ret = read_text(path, buf, size);
593 return ret < 0 ? -1 : ret;
596 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
600 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
603 return strstr(buf, needle) ? 0 : -1;
606 int clone_into_cgroup_run_wait(const char *cgroup)
611 cgroup_fd = dirfd_open_opath(cgroup);
615 pid = clone_into_cgroup(cgroup_fd);
616 close_prot_errno(cgroup_fd);
624 * We don't care whether this fails. We only care whether the initial
627 (void)clone_reap(pid, WEXITED);
631 static int __prepare_for_wait(const char *cgroup, const char *filename)
635 fd = inotify_init1(0);
639 ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
648 int cg_prepare_for_wait(const char *cgroup)
650 return __prepare_for_wait(cgroup, "cgroup.events");
653 int memcg_prepare_for_wait(const char *cgroup)
655 return __prepare_for_wait(cgroup, "memory.events");
658 int cg_wait_for(int fd)
661 struct pollfd fds = {
667 ret = poll(&fds, 1, 10000);
676 if (ret > 0 && fds.revents & POLLIN) {