1 /* SPDX-License-Identifier: GPL-2.0 */
3 * syscall_numbering.c - test calling the x86-64 kernel with various
4 * valid and invalid system call numbers.
6 * Copyright (c) 2018 Andrew Lutomirski
22 #include <sys/ptrace.h>
27 #include <linux/ptrace.h>
29 /* Common system call numbers */
33 /* x64-only system call numbers */
37 /* x32-only system call numbers (without X32_BIT) */
40 #define X32_WRITEV 516
42 #define X32_BIT 0x40000000
44 static int nullfd = -1; /* File descriptor for /dev/null */
45 static bool with_x32; /* x32 supported on this kernel? */
57 static const char * const ptrace_pass_name[] =
59 [PTP_NOTHING] = "just stop, no data read",
60 [PTP_GETREGS] = "only getregs",
61 [PTP_WRITEBACK] = "getregs, unmodified setregs",
62 [PTP_FUZZRET] = "modifying the default return",
63 [PTP_FUZZHIGH] = "clobbering the top 32 bits",
64 [PTP_INTNUM] = "sign-extending the syscall number",
68 * Shared memory block between tracer and test
71 unsigned int nerr; /* Total error count */
72 unsigned int indent; /* Message indentation level */
73 enum ptrace_pass ptrace_pass;
74 bool probing_syscall; /* In probe_syscall() */
76 static volatile struct shared *sh;
78 static inline unsigned int offset(void)
80 unsigned int level = sh ? sh->indent : 0;
85 #define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \
88 #define run(fmt, ...) msg(RUN, fmt, ## __VA_ARGS__)
89 #define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__)
90 #define ok(fmt, ...) msg(OK, fmt, ## __VA_ARGS__)
92 #define fail(fmt, ...) \
94 msg(FAIL, fmt, ## __VA_ARGS__); \
98 #define crit(fmt, ...) \
101 msg(FAIL, fmt, ## __VA_ARGS__); \
102 msg(SKIP, "Unable to run test\n"); \
106 /* Sentinel for ptrace-modified return value */
107 #define MODIFIED_BY_PTRACE -9999
110 * Directly invokes the given syscall with nullfd as the first argument
111 * and the rest zero. Avoids involving glibc wrappers in case they ever
112 * end up intercepting some system calls for some reason, or modify
113 * the system call number itself.
115 static long long probe_syscall(int msb, int lsb)
117 register long long arg1 asm("rdi") = nullfd;
118 register long long arg2 asm("rsi") = 0;
119 register long long arg3 asm("rdx") = 0;
120 register long long arg4 asm("r10") = 0;
121 register long long arg5 asm("r8") = 0;
122 register long long arg6 asm("r9") = 0;
123 long long nr = ((long long)msb << 32) | (unsigned int)lsb;
127 * We pass in an extra copy of the extended system call number
128 * in %rbx, so we can examine it from the ptrace handler without
129 * worrying about it being possibly modified. This is to test
130 * the validity of struct user regs.orig_rax a.k.a.
131 * struct pt_regs.orig_ax.
133 sh->probing_syscall = true;
134 asm volatile("syscall"
136 : "a" (nr), "b" (nr),
137 "r" (arg1), "r" (arg2), "r" (arg3),
138 "r" (arg4), "r" (arg5), "r" (arg6)
139 : "rcx", "r11", "memory", "cc");
140 sh->probing_syscall = false;
145 static const char *syscall_str(int msb, int start, int end)
148 const char * const type = (start & X32_BIT) ? "x32" : "x64";
152 * Improve readability by stripping the x32 bit, but round
153 * toward zero so we don't display -1 as -1073741825.
161 snprintf(buf, sizeof buf, "%s syscall %d:%d",
164 snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d",
165 type, msb, lsb, lsb + (end-start));
170 static unsigned int _check_for(int msb, int start, int end, long long expect,
171 const char *expect_str)
173 unsigned int err = 0;
179 for (int nr = start; nr <= end; nr++) {
180 long long ret = probe_syscall(msb, nr);
183 fail("%s returned %lld, but it should have returned %s\n",
184 syscall_str(msb, nr, nr),
195 fail("%s had %u failure%s\n",
196 syscall_str(msb, start, end),
197 err, err == 1 ? "s" : "");
199 ok("%s returned %s as expected\n",
200 syscall_str(msb, start, end), expect_str);
208 #define check_for(msb,start,end,expect) \
209 _check_for(msb,start,end,expect,#expect)
211 static bool check_zero(int msb, int nr)
213 return check_for(msb, nr, nr, 0);
216 static bool check_enosys(int msb, int nr)
218 return check_for(msb, nr, nr, -ENOSYS);
222 * Anyone diagnosing a failure will want to know whether the kernel
223 * supports x32. Tell them. This can also be used to conditionalize
224 * tests based on existence or nonexistence of x32.
226 static bool test_x32(void)
229 pid_t mypid = getpid();
231 run("Checking for x32 by calling x32 getpid()\n");
232 ret = probe_syscall(0, SYS_GETPID | X32_BIT);
236 info("x32 is supported\n");
238 } else if (ret == -ENOSYS) {
239 info("x32 is not supported\n");
242 fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid);
249 static void test_syscalls_common(int msb)
251 enum ptrace_pass pass = sh->ptrace_pass;
253 run("Checking some common syscalls as 64 bit\n");
254 check_zero(msb, SYS_READ);
255 check_zero(msb, SYS_WRITE);
257 run("Checking some 64-bit only syscalls as 64 bit\n");
258 check_zero(msb, X64_READV);
259 check_zero(msb, X64_WRITEV);
261 run("Checking out of range system calls\n");
262 check_for(msb, -64, -2, -ENOSYS);
263 if (pass >= PTP_FUZZRET)
264 check_for(msb, -1, -1, MODIFIED_BY_PTRACE);
266 check_for(msb, -1, -1, -ENOSYS);
267 check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS);
268 check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS);
269 check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS);
272 static void test_syscalls_with_x32(int msb)
275 * Syscalls 512-547 are "x32" syscalls. They are
276 * intended to be called with the x32 (0x40000000) bit
277 * set. Calling them without the x32 bit set is
278 * nonsense and should not work.
280 run("Checking x32 syscalls as 64 bit\n");
281 check_for(msb, 512, 547, -ENOSYS);
283 run("Checking some common syscalls as x32\n");
284 check_zero(msb, SYS_READ | X32_BIT);
285 check_zero(msb, SYS_WRITE | X32_BIT);
287 run("Checking some x32 syscalls as x32\n");
288 check_zero(msb, X32_READV | X32_BIT);
289 check_zero(msb, X32_WRITEV | X32_BIT);
291 run("Checking some 64-bit syscalls as x32\n");
292 check_enosys(msb, X64_IOCTL | X32_BIT);
293 check_enosys(msb, X64_READV | X32_BIT);
294 check_enosys(msb, X64_WRITEV | X32_BIT);
297 static void test_syscalls_without_x32(int msb)
299 run("Checking for absence of x32 system calls\n");
300 check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS);
303 static void test_syscall_numbering(void)
305 static const int msbs[] = {
306 0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX,
313 * The MSB is supposed to be ignored, so we loop over a few
316 for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) {
318 run("Checking system calls with msb = %d (0x%x)\n",
323 test_syscalls_common(msb);
325 test_syscalls_with_x32(msb);
327 test_syscalls_without_x32(msb);
335 static void syscall_numbering_tracee(void)
337 enum ptrace_pass pass;
339 if (ptrace(PTRACE_TRACEME, 0, 0, 0)) {
340 crit("Failed to request tracing\n");
345 for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE;
346 sh->ptrace_pass = ++pass) {
347 run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]);
348 test_syscall_numbering();
352 static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass)
354 struct user_regs_struct regs;
356 sh->probing_syscall = false; /* Do this on entry only */
358 /* For these, don't even getregs */
359 if (pass == PTP_NOTHING || pass == PTP_DONE)
362 ptrace(PTRACE_GETREGS, testpid, NULL, ®s);
364 if (regs.orig_rax != regs.rbx) {
365 fail("orig_rax %#llx doesn't match syscall number %#llx\n",
366 (unsigned long long)regs.orig_rax,
367 (unsigned long long)regs.rbx);
372 /* Just read, no writeback */
375 /* Write back the same register state verbatim */
378 regs.rax = MODIFIED_BY_PTRACE;
381 regs.rax = MODIFIED_BY_PTRACE;
382 regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL;
385 regs.rax = MODIFIED_BY_PTRACE;
386 regs.orig_rax = (int)regs.orig_rax;
389 crit("invalid ptrace_pass\n");
393 ptrace(PTRACE_SETREGS, testpid, NULL, ®s);
396 static void syscall_numbering_tracer(pid_t testpid)
401 pid_t wpid = waitpid(testpid, &wstatus, 0);
402 if (wpid < 0 && errno != EINTR)
406 if (!WIFSTOPPED(wstatus))
407 break; /* Thread exited? */
409 if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP)
410 mess_with_syscall(testpid, sh->ptrace_pass);
411 } while (sh->ptrace_pass != PTP_DONE &&
412 !ptrace(PTRACE_SYSCALL, testpid, NULL, NULL));
414 ptrace(PTRACE_DETACH, testpid, NULL, NULL);
416 /* Wait for the child process to terminate */
417 while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus))
418 /* wait some more */;
421 static void test_traced_syscall_numbering(void)
425 /* Launch the test thread; this thread continues as the tracer thread */
429 crit("Unable to launch tracer process\n");
430 } else if (testpid == 0) {
431 syscall_numbering_tracee();
434 syscall_numbering_tracer(testpid);
443 * It is quite likely to get a segfault on a failure, so make
444 * sure the message gets out by setting stdout to nonbuffered.
446 setvbuf(stdout, NULL, _IONBF, 0);
449 * Harmless file descriptor to work on...
451 nullfd = open("/dev/null", O_RDWR);
453 crit("Unable to open /dev/null: %s\n", strerror(errno));
457 * Set up a block of shared memory...
459 sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE,
460 MAP_ANONYMOUS|MAP_SHARED, 0, 0);
461 if (sh == MAP_FAILED) {
462 crit("Unable to allocated shared memory block: %s\n",
466 with_x32 = test_x32();
468 run("Running tests without ptrace...\n");
469 test_syscall_numbering();
471 test_traced_syscall_numbering();
475 ok("All system calls succeeded or failed as expected\n");
478 fail("A total of %u system call%s had incorrect behavior\n",
479 nerr, nerr != 1 ? "s" : "");