1 // SPDX-License-Identifier: GPL-2.0
6 #include <linux/kernel.h>
14 #include <sys/resource.h>
15 #include <linux/close_range.h>
17 #include "../kselftest_harness.h"
18 #include "../clone3/clone3_selftests.h"
21 #ifndef F_LINUX_SPECIFIC_BASE
22 #define F_LINUX_SPECIFIC_BASE 1024
26 #define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3)
29 #ifndef F_CREATED_QUERY
30 #define F_CREATED_QUERY (F_LINUX_SPECIFIC_BASE + 4)
33 static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
36 return syscall(__NR_close_range, fd, max_fd, flags);
39 TEST(core_close_range)
44 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
47 fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
50 SKIP(return, "Skipping test since /dev/null does not exist");
56 EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) {
58 SKIP(return, "close_range() syscall not supported");
61 for (i = 0; i < 100; i++) {
62 ret = fcntl(open_fds[i], F_DUPFD_QUERY, open_fds[i + 1]);
64 EXPECT_EQ(errno, EINVAL);
70 EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
72 for (i = 0; i <= 50; i++)
73 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
75 for (i = 51; i <= 100; i++)
76 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
78 /* create a couple of gaps */
86 EXPECT_EQ(0, sys_close_range(open_fds[51], open_fds[92], 0));
88 for (i = 51; i <= 92; i++)
89 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
91 for (i = 93; i <= 100; i++)
92 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
94 /* test that the kernel caps and still closes all fds */
95 EXPECT_EQ(0, sys_close_range(open_fds[93], open_fds[99], 0));
97 for (i = 93; i <= 99; i++)
98 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
100 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
102 EXPECT_EQ(0, sys_close_range(open_fds[100], open_fds[100], 0));
104 EXPECT_EQ(-1, fcntl(open_fds[100], F_GETFL));
107 TEST(close_range_unshare)
112 struct __clone_args args = {
113 .flags = CLONE_FILES,
114 .exit_signal = SIGCHLD,
117 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
120 fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
123 SKIP(return, "Skipping test since /dev/null does not exist");
129 pid = sys_clone3(&args, sizeof(args));
133 ret = sys_close_range(open_fds[0], open_fds[50],
134 CLOSE_RANGE_UNSHARE);
138 for (i = 0; i <= 50; i++)
139 if (fcntl(open_fds[i], F_GETFL) != -1)
142 for (i = 51; i <= 100; i++)
143 if (fcntl(open_fds[i], F_GETFL) == -1)
146 /* create a couple of gaps */
154 ret = sys_close_range(open_fds[51], open_fds[92],
155 CLOSE_RANGE_UNSHARE);
159 for (i = 51; i <= 92; i++)
160 if (fcntl(open_fds[i], F_GETFL) != -1)
163 for (i = 93; i <= 100; i++)
164 if (fcntl(open_fds[i], F_GETFL) == -1)
167 /* test that the kernel caps and still closes all fds */
168 ret = sys_close_range(open_fds[93], open_fds[99],
169 CLOSE_RANGE_UNSHARE);
173 for (i = 93; i <= 99; i++)
174 if (fcntl(open_fds[i], F_GETFL) != -1)
177 if (fcntl(open_fds[100], F_GETFL) == -1)
180 ret = sys_close_range(open_fds[100], open_fds[100],
181 CLOSE_RANGE_UNSHARE);
185 if (fcntl(open_fds[100], F_GETFL) != -1)
191 EXPECT_EQ(waitpid(pid, &status, 0), pid);
192 EXPECT_EQ(true, WIFEXITED(status));
193 EXPECT_EQ(0, WEXITSTATUS(status));
196 TEST(close_range_unshare_capped)
201 struct __clone_args args = {
202 .flags = CLONE_FILES,
203 .exit_signal = SIGCHLD,
206 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
209 fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
212 SKIP(return, "Skipping test since /dev/null does not exist");
218 pid = sys_clone3(&args, sizeof(args));
222 ret = sys_close_range(open_fds[0], UINT_MAX,
223 CLOSE_RANGE_UNSHARE);
227 for (i = 0; i <= 100; i++)
228 if (fcntl(open_fds[i], F_GETFL) != -1)
234 EXPECT_EQ(waitpid(pid, &status, 0), pid);
235 EXPECT_EQ(true, WIFEXITED(status));
236 EXPECT_EQ(0, WEXITSTATUS(status));
239 TEST(close_range_cloexec)
243 struct rlimit rlimit;
245 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
248 fd = open("/dev/null", O_RDONLY);
251 SKIP(return, "Skipping test since /dev/null does not exist");
257 ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
260 SKIP(return, "close_range() syscall not supported");
262 SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
265 /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */
266 ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
267 rlimit.rlim_cur = 25;
268 ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
270 /* Set close-on-exec for two ranges: [0-50] and [75-100]. */
271 ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC);
273 ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC);
276 for (i = 0; i <= 50; i++) {
277 int flags = fcntl(open_fds[i], F_GETFD);
279 EXPECT_GT(flags, -1);
280 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
283 for (i = 51; i <= 74; i++) {
284 int flags = fcntl(open_fds[i], F_GETFD);
286 EXPECT_GT(flags, -1);
287 EXPECT_EQ(flags & FD_CLOEXEC, 0);
290 for (i = 75; i <= 100; i++) {
291 int flags = fcntl(open_fds[i], F_GETFD);
293 EXPECT_GT(flags, -1);
294 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
297 /* Test a common pattern. */
298 ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC);
299 for (i = 0; i <= 100; i++) {
300 int flags = fcntl(open_fds[i], F_GETFD);
302 EXPECT_GT(flags, -1);
303 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
307 TEST(close_range_cloexec_unshare)
311 struct rlimit rlimit;
313 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
316 fd = open("/dev/null", O_RDONLY);
319 SKIP(return, "Skipping test since /dev/null does not exist");
325 ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
328 SKIP(return, "close_range() syscall not supported");
330 SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
333 /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */
334 ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
335 rlimit.rlim_cur = 25;
336 ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
338 /* Set close-on-exec for two ranges: [0-50] and [75-100]. */
339 ret = sys_close_range(open_fds[0], open_fds[50],
340 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
342 ret = sys_close_range(open_fds[75], open_fds[100],
343 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
346 for (i = 0; i <= 50; i++) {
347 int flags = fcntl(open_fds[i], F_GETFD);
349 EXPECT_GT(flags, -1);
350 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
353 for (i = 51; i <= 74; i++) {
354 int flags = fcntl(open_fds[i], F_GETFD);
356 EXPECT_GT(flags, -1);
357 EXPECT_EQ(flags & FD_CLOEXEC, 0);
360 for (i = 75; i <= 100; i++) {
361 int flags = fcntl(open_fds[i], F_GETFD);
363 EXPECT_GT(flags, -1);
364 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
367 /* Test a common pattern. */
368 ret = sys_close_range(3, UINT_MAX,
369 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
370 for (i = 0; i <= 100; i++) {
371 int flags = fcntl(open_fds[i], F_GETFD);
373 EXPECT_GT(flags, -1);
374 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
381 TEST(close_range_cloexec_syzbot)
383 int fd1, fd2, fd3, fd4, flags, ret, status;
385 struct __clone_args args = {
386 .flags = CLONE_FILES,
387 .exit_signal = SIGCHLD,
390 /* Create a huge gap in the fd table. */
391 fd1 = open("/dev/null", O_RDWR);
394 fd2 = dup2(fd1, 1000);
397 flags = fcntl(fd1, F_DUPFD_QUERY, fd2);
399 EXPECT_EQ(errno, EINVAL);
404 pid = sys_clone3(&args, sizeof(args));
408 ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC);
413 * We now have a private file descriptor table and all
414 * our open fds should still be open but made
417 flags = fcntl(fd1, F_GETFD);
418 EXPECT_GT(flags, -1);
419 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
421 flags = fcntl(fd2, F_GETFD);
422 EXPECT_GT(flags, -1);
423 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
428 flags = fcntl(fd1, F_DUPFD_QUERY, fd3);
430 EXPECT_EQ(errno, EINVAL);
438 * Duplicating the file descriptor must remove the
441 flags = fcntl(fd3, F_GETFD);
442 EXPECT_GT(flags, -1);
443 EXPECT_EQ(flags & FD_CLOEXEC, 0);
448 EXPECT_EQ(waitpid(pid, &status, 0), pid);
449 EXPECT_EQ(true, WIFEXITED(status));
450 EXPECT_EQ(0, WEXITSTATUS(status));
453 * We had a shared file descriptor table before along with requesting
454 * close-on-exec so the original fds must not be close-on-exec.
456 flags = fcntl(fd1, F_GETFD);
457 EXPECT_GT(flags, -1);
458 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
460 flags = fcntl(fd2, F_GETFD);
461 EXPECT_GT(flags, -1);
462 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
467 flags = fcntl(fd1, F_DUPFD_QUERY, fd3);
469 EXPECT_EQ(errno, EINVAL);
474 fd4 = open("/dev/null", O_RDWR);
477 /* Same inode, different file pointers. */
478 flags = fcntl(fd1, F_DUPFD_QUERY, fd4);
480 EXPECT_EQ(errno, EINVAL);
485 flags = fcntl(fd3, F_GETFD);
486 EXPECT_GT(flags, -1);
487 EXPECT_EQ(flags & FD_CLOEXEC, 0);
489 EXPECT_EQ(close(fd1), 0);
490 EXPECT_EQ(close(fd2), 0);
491 EXPECT_EQ(close(fd3), 0);
492 EXPECT_EQ(close(fd4), 0);
498 TEST(close_range_cloexec_unshare_syzbot)
500 int i, fd1, fd2, fd3, flags, ret, status;
502 struct __clone_args args = {
503 .flags = CLONE_FILES,
504 .exit_signal = SIGCHLD,
508 * Create a huge gap in the fd table. When we now call
509 * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper
510 * bound the kernel will only copy up to fd1 file descriptors into the
511 * new fd table. If the kernel is buggy and doesn't handle
512 * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file
513 * descriptors and we will oops!
515 * On a buggy kernel this should immediately oops. But let's loop just
518 fd1 = open("/dev/null", O_RDWR);
521 fd2 = dup2(fd1, 1000);
524 for (i = 0; i < 100; i++) {
526 pid = sys_clone3(&args, sizeof(args));
530 ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE |
531 CLOSE_RANGE_CLOEXEC);
536 * We now have a private file descriptor table and all
537 * our open fds should still be open but made
540 flags = fcntl(fd1, F_GETFD);
541 EXPECT_GT(flags, -1);
542 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
544 flags = fcntl(fd2, F_GETFD);
545 EXPECT_GT(flags, -1);
546 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
552 * Duplicating the file descriptor must remove the
555 flags = fcntl(fd3, F_GETFD);
556 EXPECT_GT(flags, -1);
557 EXPECT_EQ(flags & FD_CLOEXEC, 0);
559 EXPECT_EQ(close(fd1), 0);
560 EXPECT_EQ(close(fd2), 0);
561 EXPECT_EQ(close(fd3), 0);
566 EXPECT_EQ(waitpid(pid, &status, 0), pid);
567 EXPECT_EQ(true, WIFEXITED(status));
568 EXPECT_EQ(0, WEXITSTATUS(status));
572 * We created a private file descriptor table before along with
573 * requesting close-on-exec so the original fds must not be
576 flags = fcntl(fd1, F_GETFD);
577 EXPECT_GT(flags, -1);
578 EXPECT_EQ(flags & FD_CLOEXEC, 0);
580 flags = fcntl(fd2, F_GETFD);
581 EXPECT_GT(flags, -1);
582 EXPECT_EQ(flags & FD_CLOEXEC, 0);
587 flags = fcntl(fd3, F_GETFD);
588 EXPECT_GT(flags, -1);
589 EXPECT_EQ(flags & FD_CLOEXEC, 0);
591 EXPECT_EQ(close(fd1), 0);
592 EXPECT_EQ(close(fd2), 0);
593 EXPECT_EQ(close(fd3), 0);
596 TEST(close_range_bitmap_corruption)
600 struct __clone_args args = {
601 .flags = CLONE_FILES,
602 .exit_signal = SIGCHLD,
605 /* get the first 128 descriptors open */
606 for (int i = 2; i < 128; i++)
607 EXPECT_GE(dup2(0, i), 0);
609 /* get descriptor table shared */
610 pid = sys_clone3(&args, sizeof(args));
614 /* unshare and truncate descriptor table down to 64 */
615 if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE))
618 ASSERT_EQ(fcntl(64, F_GETFD), -1);
619 /* ... and verify that the range 64..127 is not
620 stuck "fully used" according to secondary bitmap */
621 EXPECT_EQ(dup(0), 64)
626 EXPECT_EQ(waitpid(pid, &status, 0), pid);
627 EXPECT_EQ(true, WIFEXITED(status));
628 EXPECT_EQ(0, WEXITSTATUS(status));
633 for (int i = 0; i < 101; i++) {
637 fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
641 "Skipping test since /dev/null does not exist");
644 /* We didn't create "/dev/null". */
645 EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0);
648 sprintf(path, "aaaa_%d", i);
649 fd = open(path, O_CREAT | O_RDONLY | O_CLOEXEC, 0600);
652 /* We created "aaaa_%d". */
653 EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 1);
656 fd = open(path, O_RDONLY | O_CLOEXEC);
659 /* We're opening it again, so no positive creation check. */
660 EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0);