X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/5c697ae74170d43928cb185f5ac1a9058adcae0b..5ce9bb5937aa549efb0f93ee78a06ce8bded0d50:/linux-user/syscall.c diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 5720195654..933c2cd9a9 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -17,24 +17,15 @@ * along with this program; if not, see . */ #define _ATFILE_SOURCE -#include -#include -#include -#include +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "qemu/path.h" #include #include -#include -#include -#include -#include -#include #include -#include #include #include #include -#include -#include #include #include #include @@ -44,7 +35,6 @@ #include #include #include -#include #include #ifdef __ia64__ int __clone2(int (*fn)(void *), void *child_stack_base, @@ -60,6 +50,7 @@ int __clone2(int (*fn)(void *), void *child_stack_base, #include #include #include +#include //#include #include #include @@ -110,6 +101,9 @@ int __clone2(int (*fn)(void *), void *child_stack_base, #include #include #include +#include +#include +#include #include "linux_loop.h" #include "uname.h" @@ -119,6 +113,10 @@ int __clone2(int (*fn)(void *), void *child_stack_base, CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID) //#define DEBUG +/* Define DEBUG_ERESTARTSYS to force every syscall to be restarted + * once. This exercises the codepaths for restart. + */ +//#define DEBUG_ERESTARTSYS //#include #define VFAT_IOCTL_READDIR_BOTH _IOR('r', 1, struct linux_dirent [2]) @@ -213,7 +211,7 @@ static int gettid(void) { return -ENOSYS; } #endif -#ifdef __NR_getdents +#if defined(TARGET_NR_getdents) && defined(__NR_getdents) _syscall3(int, sys_getdents, uint, fd, struct linux_dirent *, dirp, uint, count); #endif #if !defined(__NR_getdents) || \ @@ -260,6 +258,9 @@ _syscall2(int, ioprio_get, int, which, int, who) #if defined(TARGET_NR_ioprio_set) && defined(__NR_ioprio_set) _syscall3(int, ioprio_set, int, which, int, who, int, ioprio) #endif +#if defined(TARGET_NR_getrandom) && defined(__NR_getrandom) +_syscall3(int, getrandom, void *, buf, size_t, buflen, unsigned int, flags) +#endif static bitmask_transtbl fcntl_flags_tbl[] = { { TARGET_O_ACCMODE, TARGET_O_WRONLY, O_ACCMODE, O_WRONLY, }, @@ -294,6 +295,72 @@ static bitmask_transtbl fcntl_flags_tbl[] = { { 0, 0, 0, 0 } }; +typedef abi_long (*TargetFdDataFunc)(void *, size_t); +typedef abi_long (*TargetFdAddrFunc)(void *, abi_ulong, socklen_t); +typedef struct TargetFdTrans { + TargetFdDataFunc host_to_target_data; + TargetFdDataFunc target_to_host_data; + TargetFdAddrFunc target_to_host_addr; +} TargetFdTrans; + +static TargetFdTrans **target_fd_trans; + +static unsigned int target_fd_max; + +static TargetFdDataFunc fd_trans_target_to_host_data(int fd) +{ + if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) { + return target_fd_trans[fd]->target_to_host_data; + } + return NULL; +} + +static TargetFdDataFunc fd_trans_host_to_target_data(int fd) +{ + if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) { + return target_fd_trans[fd]->host_to_target_data; + } + return NULL; +} + +static TargetFdAddrFunc fd_trans_target_to_host_addr(int fd) +{ + if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) { + return target_fd_trans[fd]->target_to_host_addr; + } + return NULL; +} + +static void fd_trans_register(int fd, TargetFdTrans *trans) +{ + unsigned int oldmax; + + if (fd >= target_fd_max) { + oldmax = target_fd_max; + target_fd_max = ((fd >> 6) + 1) << 6; /* by slice of 64 entries */ + target_fd_trans = g_renew(TargetFdTrans *, + target_fd_trans, target_fd_max); + memset((void *)(target_fd_trans + oldmax), 0, + (target_fd_max - oldmax) * sizeof(TargetFdTrans *)); + } + target_fd_trans[fd] = trans; +} + +static void fd_trans_unregister(int fd) +{ + if (fd >= 0 && fd < target_fd_max) { + target_fd_trans[fd] = NULL; + } +} + +static void fd_trans_dup(int oldfd, int newfd) +{ + fd_trans_unregister(newfd); + if (oldfd < target_fd_max && target_fd_trans[oldfd]) { + fd_trans_register(newfd, target_fd_trans[oldfd]); + } +} + static int sys_getcwd1(char *buf, size_t size) { if (getcwd(buf, size) == NULL) { @@ -303,18 +370,6 @@ static int sys_getcwd1(char *buf, size_t size) return strlen(buf)+1; } -static int sys_openat(int dirfd, const char *pathname, int flags, mode_t mode) -{ - /* - * open(2) has extra parameter 'mode' when called with - * flag O_CREAT. - */ - if ((flags & O_CREAT) != 0) { - return (openat(dirfd, pathname, flags, mode)); - } - return (openat(dirfd, pathname, flags)); -} - #ifdef TARGET_NR_utimensat #ifdef CONFIG_UTIMENSAT static int sys_utimensat(int dirfd, const char *pathname, @@ -386,15 +441,6 @@ _syscall5(int, sys_ppoll, struct pollfd *, fds, nfds_t, nfds, size_t, sigsetsize) #endif -#if defined(TARGET_NR_pselect6) -#ifndef __NR_pselect6 -# define __NR_pselect6 -1 -#endif -#define __NR_sys_pselect6 __NR_pselect6 -_syscall6(int, sys_pselect6, int, nfds, fd_set *, readfds, fd_set *, writefds, - fd_set *, exceptfds, struct timespec *, timeout, void *, sig); -#endif - #if defined(TARGET_NR_prlimit64) #ifndef __NR_prlimit64 # define __NR_prlimit64 -1 @@ -457,6 +503,7 @@ static uint16_t target_to_host_errno_table[ERRNO_TABLE_SIZE] = { * minus the errnos that are not actually generic to all archs. */ static uint16_t host_to_target_errno_table[ERRNO_TABLE_SIZE] = { + [EAGAIN] = TARGET_EAGAIN, [EIDRM] = TARGET_EIDRM, [ECHRNG] = TARGET_ECHRNG, [EL2NSYNC] = TARGET_EL2NSYNC, @@ -566,15 +613,19 @@ static uint16_t host_to_target_errno_table[ERRNO_TABLE_SIZE] = { static inline int host_to_target_errno(int err) { - if(host_to_target_errno_table[err]) + if (err >= 0 && err < ERRNO_TABLE_SIZE && + host_to_target_errno_table[err]) { return host_to_target_errno_table[err]; + } return err; } static inline int target_to_host_errno(int err) { - if (target_to_host_errno_table[err]) + if (err >= 0 && err < ERRNO_TABLE_SIZE && + target_to_host_errno_table[err]) { return target_to_host_errno_table[err]; + } return err; } @@ -599,6 +650,67 @@ char *target_strerror(int err) return strerror(target_to_host_errno(err)); } +#define safe_syscall0(type, name) \ +static type safe_##name(void) \ +{ \ + return safe_syscall(__NR_##name); \ +} + +#define safe_syscall1(type, name, type1, arg1) \ +static type safe_##name(type1 arg1) \ +{ \ + return safe_syscall(__NR_##name, arg1); \ +} + +#define safe_syscall2(type, name, type1, arg1, type2, arg2) \ +static type safe_##name(type1 arg1, type2 arg2) \ +{ \ + return safe_syscall(__NR_##name, arg1, arg2); \ +} + +#define safe_syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ +static type safe_##name(type1 arg1, type2 arg2, type3 arg3) \ +{ \ + return safe_syscall(__NR_##name, arg1, arg2, arg3); \ +} + +#define safe_syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4) \ +static type safe_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ +{ \ + return safe_syscall(__NR_##name, arg1, arg2, arg3, arg4); \ +} + +#define safe_syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ +static type safe_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) \ +{ \ + return safe_syscall(__NR_##name, arg1, arg2, arg3, arg4, arg5); \ +} + +#define safe_syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ +static type safe_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) \ +{ \ + return safe_syscall(__NR_##name, arg1, arg2, arg3, arg4, arg5, arg6); \ +} + +safe_syscall3(ssize_t, read, int, fd, void *, buff, size_t, count) +safe_syscall3(ssize_t, write, int, fd, const void *, buff, size_t, count) +safe_syscall4(int, openat, int, dirfd, const char *, pathname, \ + int, flags, mode_t, mode) +safe_syscall4(pid_t, wait4, pid_t, pid, int *, status, int, options, \ + struct rusage *, rusage) +safe_syscall5(int, waitid, idtype_t, idtype, id_t, id, siginfo_t *, infop, \ + int, options, struct rusage *, rusage) +safe_syscall3(int, execve, const char *, filename, char **, argv, char **, envp) +safe_syscall6(int, pselect6, int, nfds, fd_set *, readfds, fd_set *, writefds, \ + fd_set *, exceptfds, struct timespec *, timeout, void *, sig) +safe_syscall6(int,futex,int *,uaddr,int,op,int,val, \ + const struct timespec *,timeout,int *,uaddr2,int,val3) + static inline int host_to_target_sock_type(int host_type) { int target_type; @@ -1009,7 +1121,8 @@ static abi_long do_select(int n, { fd_set rfds, wfds, efds; fd_set *rfds_ptr, *wfds_ptr, *efds_ptr; - struct timeval tv, *tv_ptr; + struct timeval tv; + struct timespec ts, *ts_ptr; abi_long ret; ret = copy_from_user_fdset_ptr(&rfds, &rfds_ptr, rfd_addr, n); @@ -1028,12 +1141,15 @@ static abi_long do_select(int n, if (target_tv_addr) { if (copy_from_user_timeval(&tv, target_tv_addr)) return -TARGET_EFAULT; - tv_ptr = &tv; + ts.tv_sec = tv.tv_sec; + ts.tv_nsec = tv.tv_usec * 1000; + ts_ptr = &ts; } else { - tv_ptr = NULL; + ts_ptr = NULL; } - ret = get_errno(select(n, rfds_ptr, wfds_ptr, efds_ptr, tv_ptr)); + ret = get_errno(safe_pselect6(n, rfds_ptr, wfds_ptr, efds_ptr, + ts_ptr, NULL)); if (!is_error(ret)) { if (rfd_addr && copy_to_user_fdset(rfd_addr, &rfds, n)) @@ -1043,8 +1159,13 @@ static abi_long do_select(int n, if (efd_addr && copy_to_user_fdset(efd_addr, &efds, n)) return -TARGET_EFAULT; - if (target_tv_addr && copy_to_user_timeval(target_tv_addr, &tv)) - return -TARGET_EFAULT; + if (target_tv_addr) { + tv.tv_sec = ts.tv_sec; + tv.tv_usec = ts.tv_nsec / 1000; + if (copy_to_user_timeval(target_tv_addr, &tv)) { + return -TARGET_EFAULT; + } + } } return ret; @@ -1112,7 +1233,7 @@ static inline abi_long target_to_host_ip_mreq(struct ip_mreqn *mreqn, return 0; } -static inline abi_long target_to_host_sockaddr(struct sockaddr *addr, +static inline abi_long target_to_host_sockaddr(int fd, struct sockaddr *addr, abi_ulong target_addr, socklen_t len) { @@ -1120,6 +1241,10 @@ static inline abi_long target_to_host_sockaddr(struct sockaddr *addr, sa_family_t sa_family; struct target_sockaddr *target_saddr; + if (fd_trans_target_to_host_addr(fd)) { + return fd_trans_target_to_host_addr(fd)(addr, target_addr, len); + } + target_saddr = lock_user(VERIFY_READ, target_addr, len, 1); if (!target_saddr) return -TARGET_EFAULT; @@ -1147,7 +1272,13 @@ static inline abi_long target_to_host_sockaddr(struct sockaddr *addr, memcpy(addr, target_saddr, len); addr->sa_family = sa_family; - if (sa_family == AF_PACKET) { + if (sa_family == AF_NETLINK) { + struct sockaddr_nl *nladdr; + + nladdr = (struct sockaddr_nl *)addr; + nladdr->nl_pid = tswap32(nladdr->nl_pid); + nladdr->nl_groups = tswap32(nladdr->nl_groups); + } else if (sa_family == AF_PACKET) { struct target_sockaddr_ll *lladdr; lladdr = (struct target_sockaddr_ll *)addr; @@ -1170,6 +1301,11 @@ static inline abi_long host_to_target_sockaddr(abi_ulong target_addr, return -TARGET_EFAULT; memcpy(target_saddr, addr, len); target_saddr->sa_family = tswap16(addr->sa_family); + if (addr->sa_family == AF_NETLINK) { + struct sockaddr_nl *target_nl = (struct sockaddr_nl *)target_saddr; + target_nl->nl_pid = tswap32(target_nl->nl_pid); + target_nl->nl_groups = tswap32(target_nl->nl_groups); + } unlock_user(target_saddr, target_addr, len); return 0; @@ -1181,7 +1317,7 @@ static inline abi_long target_to_host_cmsg(struct msghdr *msgh, struct cmsghdr *cmsg = CMSG_FIRSTHDR(msgh); abi_long msg_controllen; abi_ulong target_cmsg_addr; - struct target_cmsghdr *target_cmsg; + struct target_cmsghdr *target_cmsg, *target_cmsg_start; socklen_t space = 0; msg_controllen = tswapal(target_msgh->msg_controllen); @@ -1189,6 +1325,7 @@ static inline abi_long target_to_host_cmsg(struct msghdr *msgh, goto the_end; target_cmsg_addr = tswapal(target_msgh->msg_control); target_cmsg = lock_user(VERIFY_READ, target_cmsg_addr, msg_controllen, 1); + target_cmsg_start = target_cmsg; if (!target_cmsg) return -TARGET_EFAULT; @@ -1202,6 +1339,15 @@ static inline abi_long target_to_host_cmsg(struct msghdr *msgh, space += CMSG_SPACE(len); if (space > msgh->msg_controllen) { space -= CMSG_SPACE(len); + /* This is a QEMU bug, since we allocated the payload + * area ourselves (unlike overflow in host-to-target + * conversion, which is just the guest giving us a buffer + * that's too small). It can't happen for the payload types + * we currently support; if it becomes an issue in future + * we would need to improve our allocation strategy to + * something more intelligent than "twice the size of the + * target buffer we're reading from". + */ gemu_log("Host cmsg overflow\n"); break; } @@ -1219,17 +1365,18 @@ static inline abi_long target_to_host_cmsg(struct msghdr *msgh, int *target_fd = (int *)target_data; int i, numfds = len / sizeof(int); - for (i = 0; i < numfds; i++) - fd[i] = tswap32(target_fd[i]); + for (i = 0; i < numfds; i++) { + __get_user(fd[i], target_fd + i); + } } else if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_CREDENTIALS) { struct ucred *cred = (struct ucred *)data; struct target_ucred *target_cred = (struct target_ucred *)target_data; - __put_user(target_cred->pid, &cred->pid); - __put_user(target_cred->uid, &cred->uid); - __put_user(target_cred->gid, &cred->gid); + __get_user(cred->pid, &target_cred->pid); + __get_user(cred->uid, &target_cred->uid); + __get_user(cred->gid, &target_cred->gid); } else { gemu_log("Unsupported ancillary data: %d/%d\n", cmsg->cmsg_level, cmsg->cmsg_type); @@ -1237,7 +1384,8 @@ static inline abi_long target_to_host_cmsg(struct msghdr *msgh, } cmsg = CMSG_NXTHDR(msgh, cmsg); - target_cmsg = TARGET_CMSG_NXTHDR(target_msgh, target_cmsg); + target_cmsg = TARGET_CMSG_NXTHDR(target_msgh, target_cmsg, + target_cmsg_start); } unlock_user(target_cmsg, target_cmsg_addr, 0); the_end: @@ -1251,7 +1399,7 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, struct cmsghdr *cmsg = CMSG_FIRSTHDR(msgh); abi_long msg_controllen; abi_ulong target_cmsg_addr; - struct target_cmsghdr *target_cmsg; + struct target_cmsghdr *target_cmsg, *target_cmsg_start; socklen_t space = 0; msg_controllen = tswapal(target_msgh->msg_controllen); @@ -1259,6 +1407,7 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, goto the_end; target_cmsg_addr = tswapal(target_msgh->msg_control); target_cmsg = lock_user(VERIFY_WRITE, target_cmsg_addr, msg_controllen, 0); + target_cmsg_start = target_cmsg; if (!target_cmsg) return -TARGET_EFAULT; @@ -1267,11 +1416,16 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, void *target_data = TARGET_CMSG_DATA(target_cmsg); int len = cmsg->cmsg_len - CMSG_ALIGN(sizeof (struct cmsghdr)); + int tgt_len, tgt_space; - space += TARGET_CMSG_SPACE(len); - if (space > msg_controllen) { - space -= TARGET_CMSG_SPACE(len); - gemu_log("Target cmsg overflow\n"); + /* We never copy a half-header but may copy half-data; + * this is Linux's behaviour in put_cmsg(). Note that + * truncation here is a guest problem (which we report + * to the guest via the CTRUNC bit), unlike truncation + * in target_to_host_cmsg, which is a QEMU bug. + */ + if (msg_controllen < sizeof(struct cmsghdr)) { + target_msgh->msg_flags |= tswap32(MSG_CTRUNC); break; } @@ -1281,8 +1435,35 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, target_cmsg->cmsg_level = tswap32(cmsg->cmsg_level); } target_cmsg->cmsg_type = tswap32(cmsg->cmsg_type); - target_cmsg->cmsg_len = tswapal(TARGET_CMSG_LEN(len)); + tgt_len = TARGET_CMSG_LEN(len); + + /* Payload types which need a different size of payload on + * the target must adjust tgt_len here. + */ + switch (cmsg->cmsg_level) { + case SOL_SOCKET: + switch (cmsg->cmsg_type) { + case SO_TIMESTAMP: + tgt_len = sizeof(struct target_timeval); + break; + default: + break; + } + default: + break; + } + + if (msg_controllen < tgt_len) { + target_msgh->msg_flags |= tswap32(MSG_CTRUNC); + tgt_len = msg_controllen; + } + + /* We must now copy-and-convert len bytes of payload + * into tgt_len bytes of destination space. Bear in mind + * that in both source and destination we may be dealing + * with a truncated value! + */ switch (cmsg->cmsg_level) { case SOL_SOCKET: switch (cmsg->cmsg_type) { @@ -1290,10 +1471,11 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, { int *fd = (int *)data; int *target_fd = (int *)target_data; - int i, numfds = len / sizeof(int); + int i, numfds = tgt_len / sizeof(int); - for (i = 0; i < numfds; i++) - target_fd[i] = tswap32(fd[i]); + for (i = 0; i < numfds; i++) { + __put_user(fd[i], target_fd + i); + } break; } case SO_TIMESTAMP: @@ -1302,12 +1484,14 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, struct target_timeval *target_tv = (struct target_timeval *)target_data; - if (len != sizeof(struct timeval)) + if (len != sizeof(struct timeval) || + tgt_len != sizeof(struct target_timeval)) { goto unimplemented; + } /* copy struct timeval to target */ - target_tv->tv_sec = tswapal(tv->tv_sec); - target_tv->tv_usec = tswapal(tv->tv_usec); + __put_user(tv->tv_sec, &target_tv->tv_sec); + __put_user(tv->tv_usec, &target_tv->tv_usec); break; } case SCM_CREDENTIALS: @@ -1330,11 +1514,22 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, unimplemented: gemu_log("Unsupported ancillary data: %d/%d\n", cmsg->cmsg_level, cmsg->cmsg_type); - memcpy(target_data, data, len); + memcpy(target_data, data, MIN(len, tgt_len)); + if (tgt_len > len) { + memset(target_data + len, 0, tgt_len - len); + } } + target_cmsg->cmsg_len = tswapal(tgt_len); + tgt_space = TARGET_CMSG_SPACE(len); + if (msg_controllen < tgt_space) { + tgt_space = msg_controllen; + } + msg_controllen -= tgt_space; + space += tgt_space; cmsg = CMSG_NXTHDR(msgh, cmsg); - target_cmsg = TARGET_CMSG_NXTHDR(target_msgh, target_cmsg); + target_cmsg = TARGET_CMSG_NXTHDR(target_msgh, target_cmsg, + target_cmsg_start); } unlock_user(target_cmsg, target_cmsg_addr, space); the_end: @@ -1342,6 +1537,549 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, return 0; } +static void tswap_nlmsghdr(struct nlmsghdr *nlh) +{ + nlh->nlmsg_len = tswap32(nlh->nlmsg_len); + nlh->nlmsg_type = tswap16(nlh->nlmsg_type); + nlh->nlmsg_flags = tswap16(nlh->nlmsg_flags); + nlh->nlmsg_seq = tswap32(nlh->nlmsg_seq); + nlh->nlmsg_pid = tswap32(nlh->nlmsg_pid); +} + +static abi_long host_to_target_for_each_nlmsg(struct nlmsghdr *nlh, + size_t len, + abi_long (*host_to_target_nlmsg) + (struct nlmsghdr *)) +{ + uint32_t nlmsg_len; + abi_long ret; + + while (len > sizeof(struct nlmsghdr)) { + + nlmsg_len = nlh->nlmsg_len; + if (nlmsg_len < sizeof(struct nlmsghdr) || + nlmsg_len > len) { + break; + } + + switch (nlh->nlmsg_type) { + case NLMSG_DONE: + tswap_nlmsghdr(nlh); + return 0; + case NLMSG_NOOP: + break; + case NLMSG_ERROR: + { + struct nlmsgerr *e = NLMSG_DATA(nlh); + e->error = tswap32(e->error); + tswap_nlmsghdr(&e->msg); + tswap_nlmsghdr(nlh); + return 0; + } + default: + ret = host_to_target_nlmsg(nlh); + if (ret < 0) { + tswap_nlmsghdr(nlh); + return ret; + } + break; + } + tswap_nlmsghdr(nlh); + len -= NLMSG_ALIGN(nlmsg_len); + nlh = (struct nlmsghdr *)(((char*)nlh) + NLMSG_ALIGN(nlmsg_len)); + } + return 0; +} + +static abi_long target_to_host_for_each_nlmsg(struct nlmsghdr *nlh, + size_t len, + abi_long (*target_to_host_nlmsg) + (struct nlmsghdr *)) +{ + int ret; + + while (len > sizeof(struct nlmsghdr)) { + if (tswap32(nlh->nlmsg_len) < sizeof(struct nlmsghdr) || + tswap32(nlh->nlmsg_len) > len) { + break; + } + tswap_nlmsghdr(nlh); + switch (nlh->nlmsg_type) { + case NLMSG_DONE: + return 0; + case NLMSG_NOOP: + break; + case NLMSG_ERROR: + { + struct nlmsgerr *e = NLMSG_DATA(nlh); + e->error = tswap32(e->error); + tswap_nlmsghdr(&e->msg); + } + default: + ret = target_to_host_nlmsg(nlh); + if (ret < 0) { + return ret; + } + } + len -= NLMSG_ALIGN(nlh->nlmsg_len); + nlh = (struct nlmsghdr *)(((char *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len)); + } + return 0; +} + +static abi_long host_to_target_for_each_rtattr(struct rtattr *rtattr, + size_t len, + abi_long (*host_to_target_rtattr) + (struct rtattr *)) +{ + unsigned short rta_len; + abi_long ret; + + while (len > sizeof(struct rtattr)) { + rta_len = rtattr->rta_len; + if (rta_len < sizeof(struct rtattr) || + rta_len > len) { + break; + } + ret = host_to_target_rtattr(rtattr); + rtattr->rta_len = tswap16(rtattr->rta_len); + rtattr->rta_type = tswap16(rtattr->rta_type); + if (ret < 0) { + return ret; + } + len -= RTA_ALIGN(rta_len); + rtattr = (struct rtattr *)(((char *)rtattr) + RTA_ALIGN(rta_len)); + } + return 0; +} + +static abi_long host_to_target_data_link_rtattr(struct rtattr *rtattr) +{ + uint32_t *u32; + struct rtnl_link_stats *st; + struct rtnl_link_stats64 *st64; + struct rtnl_link_ifmap *map; + + switch (rtattr->rta_type) { + /* binary stream */ + case IFLA_ADDRESS: + case IFLA_BROADCAST: + /* string */ + case IFLA_IFNAME: + case IFLA_QDISC: + break; + /* uin8_t */ + case IFLA_OPERSTATE: + case IFLA_LINKMODE: + case IFLA_CARRIER: + case IFLA_PROTO_DOWN: + break; + /* uint32_t */ + case IFLA_MTU: + case IFLA_LINK: + case IFLA_WEIGHT: + case IFLA_TXQLEN: + case IFLA_CARRIER_CHANGES: + case IFLA_NUM_RX_QUEUES: + case IFLA_NUM_TX_QUEUES: + case IFLA_PROMISCUITY: + case IFLA_EXT_MASK: + case IFLA_LINK_NETNSID: + case IFLA_GROUP: + case IFLA_MASTER: + case IFLA_NUM_VF: + u32 = RTA_DATA(rtattr); + *u32 = tswap32(*u32); + break; + /* struct rtnl_link_stats */ + case IFLA_STATS: + st = RTA_DATA(rtattr); + st->rx_packets = tswap32(st->rx_packets); + st->tx_packets = tswap32(st->tx_packets); + st->rx_bytes = tswap32(st->rx_bytes); + st->tx_bytes = tswap32(st->tx_bytes); + st->rx_errors = tswap32(st->rx_errors); + st->tx_errors = tswap32(st->tx_errors); + st->rx_dropped = tswap32(st->rx_dropped); + st->tx_dropped = tswap32(st->tx_dropped); + st->multicast = tswap32(st->multicast); + st->collisions = tswap32(st->collisions); + + /* detailed rx_errors: */ + st->rx_length_errors = tswap32(st->rx_length_errors); + st->rx_over_errors = tswap32(st->rx_over_errors); + st->rx_crc_errors = tswap32(st->rx_crc_errors); + st->rx_frame_errors = tswap32(st->rx_frame_errors); + st->rx_fifo_errors = tswap32(st->rx_fifo_errors); + st->rx_missed_errors = tswap32(st->rx_missed_errors); + + /* detailed tx_errors */ + st->tx_aborted_errors = tswap32(st->tx_aborted_errors); + st->tx_carrier_errors = tswap32(st->tx_carrier_errors); + st->tx_fifo_errors = tswap32(st->tx_fifo_errors); + st->tx_heartbeat_errors = tswap32(st->tx_heartbeat_errors); + st->tx_window_errors = tswap32(st->tx_window_errors); + + /* for cslip etc */ + st->rx_compressed = tswap32(st->rx_compressed); + st->tx_compressed = tswap32(st->tx_compressed); + break; + /* struct rtnl_link_stats64 */ + case IFLA_STATS64: + st64 = RTA_DATA(rtattr); + st64->rx_packets = tswap64(st64->rx_packets); + st64->tx_packets = tswap64(st64->tx_packets); + st64->rx_bytes = tswap64(st64->rx_bytes); + st64->tx_bytes = tswap64(st64->tx_bytes); + st64->rx_errors = tswap64(st64->rx_errors); + st64->tx_errors = tswap64(st64->tx_errors); + st64->rx_dropped = tswap64(st64->rx_dropped); + st64->tx_dropped = tswap64(st64->tx_dropped); + st64->multicast = tswap64(st64->multicast); + st64->collisions = tswap64(st64->collisions); + + /* detailed rx_errors: */ + st64->rx_length_errors = tswap64(st64->rx_length_errors); + st64->rx_over_errors = tswap64(st64->rx_over_errors); + st64->rx_crc_errors = tswap64(st64->rx_crc_errors); + st64->rx_frame_errors = tswap64(st64->rx_frame_errors); + st64->rx_fifo_errors = tswap64(st64->rx_fifo_errors); + st64->rx_missed_errors = tswap64(st64->rx_missed_errors); + + /* detailed tx_errors */ + st64->tx_aborted_errors = tswap64(st64->tx_aborted_errors); + st64->tx_carrier_errors = tswap64(st64->tx_carrier_errors); + st64->tx_fifo_errors = tswap64(st64->tx_fifo_errors); + st64->tx_heartbeat_errors = tswap64(st64->tx_heartbeat_errors); + st64->tx_window_errors = tswap64(st64->tx_window_errors); + + /* for cslip etc */ + st64->rx_compressed = tswap64(st64->rx_compressed); + st64->tx_compressed = tswap64(st64->tx_compressed); + break; + /* struct rtnl_link_ifmap */ + case IFLA_MAP: + map = RTA_DATA(rtattr); + map->mem_start = tswap64(map->mem_start); + map->mem_end = tswap64(map->mem_end); + map->base_addr = tswap64(map->base_addr); + map->irq = tswap16(map->irq); + break; + /* nested */ + case IFLA_AF_SPEC: + case IFLA_LINKINFO: + /* FIXME: implement nested type */ + gemu_log("Unimplemented nested type %d\n", rtattr->rta_type); + break; + default: + gemu_log("Unknown host IFLA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long host_to_target_data_addr_rtattr(struct rtattr *rtattr) +{ + uint32_t *u32; + struct ifa_cacheinfo *ci; + + switch (rtattr->rta_type) { + /* binary: depends on family type */ + case IFA_ADDRESS: + case IFA_LOCAL: + break; + /* string */ + case IFA_LABEL: + break; + /* u32 */ + case IFA_FLAGS: + case IFA_BROADCAST: + u32 = RTA_DATA(rtattr); + *u32 = tswap32(*u32); + break; + /* struct ifa_cacheinfo */ + case IFA_CACHEINFO: + ci = RTA_DATA(rtattr); + ci->ifa_prefered = tswap32(ci->ifa_prefered); + ci->ifa_valid = tswap32(ci->ifa_valid); + ci->cstamp = tswap32(ci->cstamp); + ci->tstamp = tswap32(ci->tstamp); + break; + default: + gemu_log("Unknown host IFA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long host_to_target_data_route_rtattr(struct rtattr *rtattr) +{ + uint32_t *u32; + switch (rtattr->rta_type) { + /* binary: depends on family type */ + case RTA_GATEWAY: + case RTA_DST: + case RTA_PREFSRC: + break; + /* u32 */ + case RTA_PRIORITY: + case RTA_TABLE: + case RTA_OIF: + u32 = RTA_DATA(rtattr); + *u32 = tswap32(*u32); + break; + default: + gemu_log("Unknown host RTA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long host_to_target_link_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + return host_to_target_for_each_rtattr(rtattr, rtattr_len, + host_to_target_data_link_rtattr); +} + +static abi_long host_to_target_addr_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + return host_to_target_for_each_rtattr(rtattr, rtattr_len, + host_to_target_data_addr_rtattr); +} + +static abi_long host_to_target_route_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + return host_to_target_for_each_rtattr(rtattr, rtattr_len, + host_to_target_data_route_rtattr); +} + +static abi_long host_to_target_data_route(struct nlmsghdr *nlh) +{ + uint32_t nlmsg_len; + struct ifinfomsg *ifi; + struct ifaddrmsg *ifa; + struct rtmsg *rtm; + + nlmsg_len = nlh->nlmsg_len; + switch (nlh->nlmsg_type) { + case RTM_NEWLINK: + case RTM_DELLINK: + case RTM_GETLINK: + ifi = NLMSG_DATA(nlh); + ifi->ifi_type = tswap16(ifi->ifi_type); + ifi->ifi_index = tswap32(ifi->ifi_index); + ifi->ifi_flags = tswap32(ifi->ifi_flags); + ifi->ifi_change = tswap32(ifi->ifi_change); + host_to_target_link_rtattr(IFLA_RTA(ifi), + nlmsg_len - NLMSG_LENGTH(sizeof(*ifi))); + break; + case RTM_NEWADDR: + case RTM_DELADDR: + case RTM_GETADDR: + ifa = NLMSG_DATA(nlh); + ifa->ifa_index = tswap32(ifa->ifa_index); + host_to_target_addr_rtattr(IFA_RTA(ifa), + nlmsg_len - NLMSG_LENGTH(sizeof(*ifa))); + break; + case RTM_NEWROUTE: + case RTM_DELROUTE: + case RTM_GETROUTE: + rtm = NLMSG_DATA(nlh); + rtm->rtm_flags = tswap32(rtm->rtm_flags); + host_to_target_route_rtattr(RTM_RTA(rtm), + nlmsg_len - NLMSG_LENGTH(sizeof(*rtm))); + break; + default: + return -TARGET_EINVAL; + } + return 0; +} + +static inline abi_long host_to_target_nlmsg_route(struct nlmsghdr *nlh, + size_t len) +{ + return host_to_target_for_each_nlmsg(nlh, len, host_to_target_data_route); +} + +static abi_long target_to_host_for_each_rtattr(struct rtattr *rtattr, + size_t len, + abi_long (*target_to_host_rtattr) + (struct rtattr *)) +{ + abi_long ret; + + while (len >= sizeof(struct rtattr)) { + if (tswap16(rtattr->rta_len) < sizeof(struct rtattr) || + tswap16(rtattr->rta_len) > len) { + break; + } + rtattr->rta_len = tswap16(rtattr->rta_len); + rtattr->rta_type = tswap16(rtattr->rta_type); + ret = target_to_host_rtattr(rtattr); + if (ret < 0) { + return ret; + } + len -= RTA_ALIGN(rtattr->rta_len); + rtattr = (struct rtattr *)(((char *)rtattr) + + RTA_ALIGN(rtattr->rta_len)); + } + return 0; +} + +static abi_long target_to_host_data_link_rtattr(struct rtattr *rtattr) +{ + switch (rtattr->rta_type) { + default: + gemu_log("Unknown target IFLA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long target_to_host_data_addr_rtattr(struct rtattr *rtattr) +{ + switch (rtattr->rta_type) { + /* binary: depends on family type */ + case IFA_LOCAL: + case IFA_ADDRESS: + break; + default: + gemu_log("Unknown target IFA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static abi_long target_to_host_data_route_rtattr(struct rtattr *rtattr) +{ + uint32_t *u32; + switch (rtattr->rta_type) { + /* binary: depends on family type */ + case RTA_DST: + case RTA_SRC: + case RTA_GATEWAY: + break; + /* u32 */ + case RTA_OIF: + u32 = RTA_DATA(rtattr); + *u32 = tswap32(*u32); + break; + default: + gemu_log("Unknown target RTA type: %d\n", rtattr->rta_type); + break; + } + return 0; +} + +static void target_to_host_link_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + target_to_host_for_each_rtattr(rtattr, rtattr_len, + target_to_host_data_link_rtattr); +} + +static void target_to_host_addr_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + target_to_host_for_each_rtattr(rtattr, rtattr_len, + target_to_host_data_addr_rtattr); +} + +static void target_to_host_route_rtattr(struct rtattr *rtattr, + uint32_t rtattr_len) +{ + target_to_host_for_each_rtattr(rtattr, rtattr_len, + target_to_host_data_route_rtattr); +} + +static abi_long target_to_host_data_route(struct nlmsghdr *nlh) +{ + struct ifinfomsg *ifi; + struct ifaddrmsg *ifa; + struct rtmsg *rtm; + + switch (nlh->nlmsg_type) { + case RTM_GETLINK: + break; + case RTM_NEWLINK: + case RTM_DELLINK: + ifi = NLMSG_DATA(nlh); + ifi->ifi_type = tswap16(ifi->ifi_type); + ifi->ifi_index = tswap32(ifi->ifi_index); + ifi->ifi_flags = tswap32(ifi->ifi_flags); + ifi->ifi_change = tswap32(ifi->ifi_change); + target_to_host_link_rtattr(IFLA_RTA(ifi), nlh->nlmsg_len - + NLMSG_LENGTH(sizeof(*ifi))); + break; + case RTM_GETADDR: + case RTM_NEWADDR: + case RTM_DELADDR: + ifa = NLMSG_DATA(nlh); + ifa->ifa_index = tswap32(ifa->ifa_index); + target_to_host_addr_rtattr(IFA_RTA(ifa), nlh->nlmsg_len - + NLMSG_LENGTH(sizeof(*ifa))); + break; + case RTM_GETROUTE: + break; + case RTM_NEWROUTE: + case RTM_DELROUTE: + rtm = NLMSG_DATA(nlh); + rtm->rtm_flags = tswap32(rtm->rtm_flags); + target_to_host_route_rtattr(RTM_RTA(rtm), nlh->nlmsg_len - + NLMSG_LENGTH(sizeof(*rtm))); + break; + default: + return -TARGET_EOPNOTSUPP; + } + return 0; +} + +static abi_long target_to_host_nlmsg_route(struct nlmsghdr *nlh, size_t len) +{ + return target_to_host_for_each_nlmsg(nlh, len, target_to_host_data_route); +} + +static abi_long host_to_target_data_audit(struct nlmsghdr *nlh) +{ + switch (nlh->nlmsg_type) { + default: + gemu_log("Unknown host audit message type %d\n", + nlh->nlmsg_type); + return -TARGET_EINVAL; + } + return 0; +} + +static inline abi_long host_to_target_nlmsg_audit(struct nlmsghdr *nlh, + size_t len) +{ + return host_to_target_for_each_nlmsg(nlh, len, host_to_target_data_audit); +} + +static abi_long target_to_host_data_audit(struct nlmsghdr *nlh) +{ + switch (nlh->nlmsg_type) { + case AUDIT_USER: + case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: + case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: + break; + default: + gemu_log("Unknown target audit message type %d\n", + nlh->nlmsg_type); + return -TARGET_EINVAL; + } + + return 0; +} + +static abi_long target_to_host_nlmsg_audit(struct nlmsghdr *nlh, size_t len) +{ + return target_to_host_for_each_nlmsg(nlh, len, target_to_host_data_audit); +} + /* do_setsockopt() Must return target values and target errnos. */ static abi_long do_setsockopt(int sockfd, int level, int optname, abi_ulong optval_addr, socklen_t optlen) @@ -1499,7 +2237,7 @@ set_timeout: } fprog.len = tswap16(tfprog->len); - filter = malloc(fprog.len * sizeof(*filter)); + filter = g_try_new(struct sock_filter, fprog.len); if (filter == NULL) { unlock_user_struct(tfilter, tfprog->filter, 1); unlock_user_struct(tfprog, optval_addr, 1); @@ -1515,7 +2253,7 @@ set_timeout: ret = get_errno(setsockopt(sockfd, SOL_SOCKET, SO_ATTACH_FILTER, &fprog, sizeof(fprog))); - free(filter); + g_free(filter); unlock_user_struct(tfilter, tfprog->filter, 1); unlock_user_struct(tfprog, optval_addr, 1); @@ -1536,7 +2274,8 @@ set_timeout: addr_ifname = alloca(IFNAMSIZ); memcpy(addr_ifname, dev_ifname, optlen); addr_ifname[optlen] = 0; - ret = get_errno(setsockopt(sockfd, level, optname, addr_ifname, optlen)); + ret = get_errno(setsockopt(sockfd, SOL_SOCKET, optname, + addr_ifname, optlen)); unlock_user (dev_ifname, optval_addr, 0); return ret; } @@ -1826,7 +2565,7 @@ static struct iovec *lock_iovec(int type, abi_ulong target_addr, return NULL; } - vec = calloc(count, sizeof(struct iovec)); + vec = g_try_new0(struct iovec, count); if (vec == NULL) { errno = ENOMEM; return NULL; @@ -1890,7 +2629,7 @@ static struct iovec *lock_iovec(int type, abi_ulong target_addr, } unlock_user(target_vec, target_addr, 0); fail2: - free(vec); + g_free(vec); errno = err; return NULL; } @@ -1915,7 +2654,7 @@ static void unlock_iovec(struct iovec *vec, abi_ulong target_addr, unlock_user(target_vec, target_addr, 0); } - free(vec); + g_free(vec); } static inline int target_to_host_sock_type(int *type) @@ -1967,6 +2706,60 @@ static int sock_flags_fixup(int fd, int target_type) return fd; } +static abi_long packet_target_to_host_sockaddr(void *host_addr, + abi_ulong target_addr, + socklen_t len) +{ + struct sockaddr *addr = host_addr; + struct target_sockaddr *target_saddr; + + target_saddr = lock_user(VERIFY_READ, target_addr, len, 1); + if (!target_saddr) { + return -TARGET_EFAULT; + } + + memcpy(addr, target_saddr, len); + addr->sa_family = tswap16(target_saddr->sa_family); + /* spkt_protocol is big-endian */ + + unlock_user(target_saddr, target_addr, 0); + return 0; +} + +static TargetFdTrans target_packet_trans = { + .target_to_host_addr = packet_target_to_host_sockaddr, +}; + +static abi_long netlink_route_target_to_host(void *buf, size_t len) +{ + return target_to_host_nlmsg_route(buf, len); +} + +static abi_long netlink_route_host_to_target(void *buf, size_t len) +{ + return host_to_target_nlmsg_route(buf, len); +} + +static TargetFdTrans target_netlink_route_trans = { + .target_to_host_data = netlink_route_target_to_host, + .host_to_target_data = netlink_route_host_to_target, +}; + +static abi_long netlink_audit_target_to_host(void *buf, size_t len) +{ + return target_to_host_nlmsg_audit(buf, len); +} + +static abi_long netlink_audit_host_to_target(void *buf, size_t len) +{ + return host_to_target_nlmsg_audit(buf, len); +} + +static TargetFdTrans target_netlink_audit_trans = { + .target_to_host_data = netlink_audit_target_to_host, + .host_to_target_data = netlink_audit_host_to_target, +}; + /* do_socket() Must return target values and target errnos. */ static abi_long do_socket(int domain, int type, int protocol) { @@ -1978,11 +2771,41 @@ static abi_long do_socket(int domain, int type, int protocol) return ret; } - if (domain == PF_NETLINK) - return -TARGET_EAFNOSUPPORT; + if (domain == PF_NETLINK && + !(protocol == NETLINK_ROUTE || + protocol == NETLINK_KOBJECT_UEVENT || + protocol == NETLINK_AUDIT)) { + return -EPFNOSUPPORT; + } + + if (domain == AF_PACKET || + (domain == AF_INET && type == SOCK_PACKET)) { + protocol = tswap16(protocol); + } + ret = get_errno(socket(domain, type, protocol)); if (ret >= 0) { ret = sock_flags_fixup(ret, target_type); + if (type == SOCK_PACKET) { + /* Manage an obsolete case : + * if socket type is SOCK_PACKET, bind by name + */ + fd_trans_register(ret, &target_packet_trans); + } else if (domain == PF_NETLINK) { + switch (protocol) { + case NETLINK_ROUTE: + fd_trans_register(ret, &target_netlink_route_trans); + break; + case NETLINK_KOBJECT_UEVENT: + /* nothing to do: messages are strings */ + break; + case NETLINK_AUDIT: + fd_trans_register(ret, &target_netlink_audit_trans); + break; + default: + g_assert_not_reached(); + } + } } return ret; } @@ -2000,7 +2823,7 @@ static abi_long do_bind(int sockfd, abi_ulong target_addr, addr = alloca(addrlen+1); - ret = target_to_host_sockaddr(addr, target_addr, addrlen); + ret = target_to_host_sockaddr(sockfd, addr, target_addr, addrlen); if (ret) return ret; @@ -2020,7 +2843,7 @@ static abi_long do_connect(int sockfd, abi_ulong target_addr, addr = alloca(addrlen+1); - ret = target_to_host_sockaddr(addr, target_addr, addrlen); + ret = target_to_host_sockaddr(sockfd, addr, target_addr, addrlen); if (ret) return ret; @@ -2040,8 +2863,9 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp, if (msgp->msg_name) { msg.msg_namelen = tswap32(msgp->msg_namelen); msg.msg_name = alloca(msg.msg_namelen+1); - ret = target_to_host_sockaddr(msg.msg_name, tswapal(msgp->msg_name), - msg.msg_namelen); + ret = target_to_host_sockaddr(fd, msg.msg_name, + tswapal(msgp->msg_name), + msg.msg_namelen); if (ret) { goto out2; } @@ -2065,14 +2889,25 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp, msg.msg_iov = vec; if (send) { - ret = target_to_host_cmsg(&msg, msgp); - if (ret == 0) + if (fd_trans_target_to_host_data(fd)) { + ret = fd_trans_target_to_host_data(fd)(msg.msg_iov->iov_base, + msg.msg_iov->iov_len); + } else { + ret = target_to_host_cmsg(&msg, msgp); + } + if (ret == 0) { ret = get_errno(sendmsg(fd, &msg, flags)); + } } else { ret = get_errno(recvmsg(fd, &msg, flags)); if (!is_error(ret)) { len = ret; - ret = host_to_target_cmsg(msgp, &msg); + if (fd_trans_host_to_target_data(fd)) { + ret = fd_trans_host_to_target_data(fd)(msg.msg_iov->iov_base, + msg.msg_iov->iov_len); + } else { + ret = host_to_target_cmsg(msgp, &msg); + } if (!is_error(ret)) { msgp->msg_namelen = tswap32(msg.msg_namelen); if (msg.msg_name != NULL) { @@ -2111,7 +2946,6 @@ static abi_long do_sendrecvmsg(int fd, abi_ulong target_msg, return ret; } -#ifdef TARGET_NR_sendmmsg /* We don't rely on the C library to have sendmmsg/recvmmsg support, * so it might not have this *mmsg-specific flag either. */ @@ -2158,7 +2992,6 @@ static abi_long do_sendrecvmmsg(int fd, abi_ulong target_msgvec, } return ret; } -#endif /* If we don't have a system accept4() then just call accept. * The callsites to do_accept4() will ensure that they don't @@ -2301,9 +3134,16 @@ static abi_long do_sendto(int fd, abi_ulong msg, size_t len, int flags, host_msg = lock_user(VERIFY_READ, msg, len, 1); if (!host_msg) return -TARGET_EFAULT; + if (fd_trans_target_to_host_data(fd)) { + ret = fd_trans_target_to_host_data(fd)(host_msg, len); + if (ret < 0) { + unlock_user(host_msg, msg, 0); + return ret; + } + } if (target_addr) { addr = alloca(addrlen+1); - ret = target_to_host_sockaddr(addr, target_addr, addrlen); + ret = target_to_host_sockaddr(fd, addr, target_addr, addrlen); if (ret) { unlock_user(host_msg, msg, 0); return ret; @@ -2381,6 +3221,8 @@ static abi_long do_socketcall(int num, abi_ulong vptr) [SOCKOP_shutdown] = 2, /* sockfd, how */ [SOCKOP_sendmsg] = 3, /* sockfd, msg, flags */ [SOCKOP_recvmsg] = 3, /* sockfd, msg, flags */ + [SOCKOP_sendmmsg] = 4, /* sockfd, msgvec, vlen, flags */ + [SOCKOP_recvmmsg] = 4, /* sockfd, msgvec, vlen, flags */ [SOCKOP_setsockopt] = 5, /* sockfd, level, optname, optval, optlen */ [SOCKOP_getsockopt] = 5, /* sockfd, level, optname, optval, optlen */ }; @@ -2431,6 +3273,10 @@ static abi_long do_socketcall(int num, abi_ulong vptr) return do_sendrecvmsg(a[0], a[1], a[2], 1); case SOCKOP_recvmsg: /* sockfd, msg, flags */ return do_sendrecvmsg(a[0], a[1], a[2], 0); + case SOCKOP_sendmmsg: /* sockfd, msgvec, vlen, flags */ + return do_sendrecvmmsg(a[0], a[1], a[2], a[3], 1); + case SOCKOP_recvmmsg: /* sockfd, msgvec, vlen, flags */ + return do_sendrecvmmsg(a[0], a[1], a[2], a[3], 0); case SOCKOP_setsockopt: /* sockfd, level, optname, optval, optlen */ return do_setsockopt(a[0], a[1], a[2], a[3], a[4]); case SOCKOP_getsockopt: /* sockfd, level, optname, optval, optlen */ @@ -2445,8 +3291,9 @@ static abi_long do_socketcall(int num, abi_ulong vptr) #define N_SHM_REGIONS 32 static struct shm_region { - abi_ulong start; - abi_ulong size; + abi_ulong start; + abi_ulong size; + bool in_use; } shm_regions[N_SHM_REGIONS]; struct target_semid_ds @@ -2617,14 +3464,14 @@ static inline abi_long target_to_host_semarray(int semid, unsigned short **host_ nsems = semid_ds.sem_nsems; - *host_array = malloc(nsems*sizeof(unsigned short)); + *host_array = g_try_new(unsigned short, nsems); if (!*host_array) { return -TARGET_ENOMEM; } array = lock_user(VERIFY_READ, target_addr, nsems*sizeof(unsigned short), 1); if (!array) { - free(*host_array); + g_free(*host_array); return -TARGET_EFAULT; } @@ -2661,15 +3508,16 @@ static inline abi_long host_to_target_semarray(int semid, abi_ulong target_addr, for(i=0; imtype = (abi_long) tswapal(target_mb->mtype); memcpy(host_mb->mtext, target_mb->mtext, msgsz); ret = get_errno(msgsnd(msqid, host_mb, msgsz, msgflg)); - free(host_mb); + g_free(host_mb); unlock_user_struct(target_mb, msgp, 0); return ret; } static inline abi_long do_msgrcv(int msqid, abi_long msgp, - unsigned int msgsz, abi_long msgtyp, + ssize_t msgsz, abi_long msgtyp, int msgflg) { struct target_msgbuf *target_mb; @@ -2943,10 +3791,18 @@ static inline abi_long do_msgrcv(int msqid, abi_long msgp, struct msgbuf *host_mb; abi_long ret = 0; + if (msgsz < 0) { + return -TARGET_EINVAL; + } + if (!lock_user_struct(VERIFY_WRITE, target_mb, msgp, 0)) return -TARGET_EFAULT; - host_mb = g_malloc(msgsz+sizeof(long)); + host_mb = g_try_malloc(msgsz + sizeof(long)); + if (!host_mb) { + ret = -TARGET_ENOMEM; + goto end; + } ret = get_errno(msgrcv(msqid, host_mb, msgsz, msgtyp, msgflg)); if (ret > 0) { @@ -3137,7 +3993,8 @@ static inline abi_ulong do_shmat(int shmid, abi_ulong shmaddr, int shmflg) ((shmflg & SHM_RDONLY)? 0 : PAGE_WRITE)); for (i = 0; i < N_SHM_REGIONS; i++) { - if (shm_regions[i].start == 0) { + if (!shm_regions[i].in_use) { + shm_regions[i].in_use = true; shm_regions[i].start = raddr; shm_regions[i].size = shm_info.shm_segsz; break; @@ -3154,8 +4011,8 @@ static inline abi_long do_shmdt(abi_ulong shmaddr) int i; for (i = 0; i < N_SHM_REGIONS; ++i) { - if (shm_regions[i].start == shmaddr) { - shm_regions[i].start = 0; + if (shm_regions[i].in_use && shm_regions[i].start == shmaddr) { + shm_regions[i].in_use = false; page_set_flags(shmaddr, shmaddr + shm_regions[i].size, 0); break; } @@ -3191,8 +4048,7 @@ static abi_long do_ipc(unsigned int call, abi_long first, * ptr argument. */ abi_ulong atptr; get_user_ual(atptr, ptr); - ret = do_semctl(first, second, third, - (union target_semun) atptr); + ret = do_semctl(first, second, third, atptr); break; } @@ -3277,6 +4133,7 @@ static abi_long do_ipc(unsigned int call, abi_long first, #define STRUCT_SPECIAL(name) STRUCT_ ## name, enum { #include "syscall_types.h" +STRUCT_MAX }; #undef STRUCT #undef STRUCT_SPECIAL @@ -3290,7 +4147,7 @@ enum { typedef struct IOCTLEntry IOCTLEntry; typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp, - int fd, abi_long cmd, abi_long arg); + int fd, int cmd, abi_long arg); struct IOCTLEntry { int target_cmd; @@ -3316,7 +4173,7 @@ struct IOCTLEntry { / sizeof(struct fiemap_extent)) static abi_long do_ioctl_fs_ioc_fiemap(const IOCTLEntry *ie, uint8_t *buf_temp, - int fd, abi_long cmd, abi_long arg) + int fd, int cmd, abi_long arg) { /* The parameter for this ioctl is a struct fiemap followed * by an array of struct fiemap_extent whose size is set @@ -3355,7 +4212,7 @@ static abi_long do_ioctl_fs_ioc_fiemap(const IOCTLEntry *ie, uint8_t *buf_temp, /* We can't fit all the extents into the fixed size buffer. * Allocate one that is large enough and use it instead. */ - fm = malloc(outbufsz); + fm = g_try_malloc(outbufsz); if (!fm) { return -TARGET_ENOMEM; } @@ -3390,14 +4247,14 @@ static abi_long do_ioctl_fs_ioc_fiemap(const IOCTLEntry *ie, uint8_t *buf_temp, } } if (free_fm) { - free(fm); + g_free(fm); } return ret; } #endif static abi_long do_ioctl_ifconf(const IOCTLEntry *ie, uint8_t *buf_temp, - int fd, abi_long cmd, abi_long arg) + int fd, int cmd, abi_long arg) { const argtype *arg_type = ie->arg_type; int target_size; @@ -3491,7 +4348,7 @@ static abi_long do_ioctl_ifconf(const IOCTLEntry *ie, uint8_t *buf_temp, } static abi_long do_ioctl_dm(const IOCTLEntry *ie, uint8_t *buf_temp, int fd, - abi_long cmd, abi_long arg) + int cmd, abi_long arg) { void *argptr; struct dm_ioctl *host_dm; @@ -3716,7 +4573,7 @@ out: } static abi_long do_ioctl_blkpg(const IOCTLEntry *ie, uint8_t *buf_temp, int fd, - abi_long cmd, abi_long arg) + int cmd, abi_long arg) { void *argptr; int target_size; @@ -3769,7 +4626,7 @@ out: } static abi_long do_ioctl_rt(const IOCTLEntry *ie, uint8_t *buf_temp, - int fd, abi_long cmd, abi_long arg) + int fd, int cmd, abi_long arg) { const argtype *arg_type = ie->arg_type; const StructEntry *se; @@ -3832,7 +4689,7 @@ static abi_long do_ioctl_rt(const IOCTLEntry *ie, uint8_t *buf_temp, } static abi_long do_ioctl_kdsigaccept(const IOCTLEntry *ie, uint8_t *buf_temp, - int fd, abi_long cmd, abi_long arg) + int fd, int cmd, abi_long arg) { int sig = target_to_host_signal(arg); return get_errno(ioctl(fd, ie->host_cmd, sig)); @@ -3849,7 +4706,7 @@ static IOCTLEntry ioctl_entries[] = { /* ??? Implement proper locking for ioctls. */ /* do_ioctl() Must return target values and target errnos. */ -static abi_long do_ioctl(int fd, abi_long cmd, abi_long arg) +static abi_long do_ioctl(int fd, int cmd, abi_long arg) { const IOCTLEntry *ie; const argtype *arg_type; @@ -3883,7 +4740,6 @@ static abi_long do_ioctl(int fd, abi_long cmd, abi_long arg) break; case TYPE_PTRVOID: case TYPE_INT: - /* int argment */ ret = get_errno(ioctl(fd, ie->host_cmd, arg)); break; case TYPE_PTR: @@ -4457,6 +5313,7 @@ static void *clone_func(void *arg) CPUState *cpu; TaskState *ts; + rcu_register_thread(); env = info->env; cpu = ENV_GET_CPU(env); thread_cpu = cpu; @@ -4505,7 +5362,7 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp, new_thread_info info; pthread_attr_t attr; - ts = g_malloc0(sizeof(TaskState)); + ts = g_new0(TaskState, 1); init_task_state(ts); /* we create a new CPU instance. */ new_env = cpu_copy(env); @@ -4566,12 +5423,14 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp, pthread_mutex_unlock(&clone_lock); } else { /* if no CLONE_VM, we consider it is a fork */ - if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0) - return -EINVAL; + if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0) { + return -TARGET_EINVAL; + } fork_start(); ret = fork(); if (ret == 0) { /* Child Process. */ + rcu_after_fork(); cpu_clone_regs(env, newsp); fork_end(1); /* There is a race condition here. The parent process could @@ -4871,6 +5730,40 @@ static inline int tswapid(int id) #endif /* USE_UID16 */ +/* We must do direct syscalls for setting UID/GID, because we want to + * implement the Linux system call semantics of "change only for this thread", + * not the libc/POSIX semantics of "change for all threads in process". + * (See http://ewontfix.com/17/ for more details.) + * We use the 32-bit version of the syscalls if present; if it is not + * then either the host architecture supports 32-bit UIDs natively with + * the standard syscall, or the 16-bit UID is the best we can do. + */ +#ifdef __NR_setuid32 +#define __NR_sys_setuid __NR_setuid32 +#else +#define __NR_sys_setuid __NR_setuid +#endif +#ifdef __NR_setgid32 +#define __NR_sys_setgid __NR_setgid32 +#else +#define __NR_sys_setgid __NR_setgid +#endif +#ifdef __NR_setresuid32 +#define __NR_sys_setresuid __NR_setresuid32 +#else +#define __NR_sys_setresuid __NR_setresuid +#endif +#ifdef __NR_setresgid32 +#define __NR_sys_setresgid __NR_setresgid32 +#else +#define __NR_sys_setresgid __NR_setresgid +#endif + +_syscall1(int, sys_setuid, uid_t, uid) +_syscall1(int, sys_setgid, gid_t, gid) +_syscall3(int, sys_setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) +_syscall3(int, sys_setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) + void syscall_init(void) { IOCTLEntry *ie; @@ -4878,6 +5771,8 @@ void syscall_init(void) int size; int i; + thunk_init(STRUCT_MAX); + #define STRUCT(name, ...) thunk_register_struct(STRUCT_ ## name, #name, struct_ ## name ## _def); #define STRUCT_SPECIAL(name) thunk_register_struct_direct(STRUCT_ ## name, #name, &struct_ ## name ## _def); #include "syscall_types.h" @@ -4972,8 +5867,8 @@ static inline abi_long target_to_host_timespec(struct timespec *host_ts, if (!lock_user_struct(VERIFY_READ, target_ts, target_addr, 1)) return -TARGET_EFAULT; - host_ts->tv_sec = tswapal(target_ts->tv_sec); - host_ts->tv_nsec = tswapal(target_ts->tv_nsec); + __get_user(host_ts->tv_sec, &target_ts->tv_sec); + __get_user(host_ts->tv_nsec, &target_ts->tv_nsec); unlock_user_struct(target_ts, target_addr, 0); return 0; } @@ -4985,8 +5880,8 @@ static inline abi_long host_to_target_timespec(abi_ulong target_addr, if (!lock_user_struct(VERIFY_WRITE, target_ts, target_addr, 0)) return -TARGET_EFAULT; - target_ts->tv_sec = tswapal(host_ts->tv_sec); - target_ts->tv_nsec = tswapal(host_ts->tv_nsec); + __put_user(host_ts->tv_sec, &target_ts->tv_sec); + __put_user(host_ts->tv_nsec, &target_ts->tv_nsec); unlock_user_struct(target_ts, target_addr, 1); return 0; } @@ -5071,7 +5966,6 @@ static inline int target_to_host_mlockall_arg(int arg) } #endif -#if defined(TARGET_NR_stat64) || defined(TARGET_NR_newfstatat) static inline abi_long host_to_target_stat64(void *cpu_env, abi_ulong target_addr, struct stat *host_st) @@ -5134,7 +6028,6 @@ static inline abi_long host_to_target_stat64(void *cpu_env, return 0; } -#endif /* ??? Using host futex calls even when target atomic operations are not really atomic probably breaks things. However implementing @@ -5163,12 +6056,12 @@ static int do_futex(target_ulong uaddr, int op, int val, target_ulong timeout, } else { pts = NULL; } - return get_errno(sys_futex(g2h(uaddr), op, tswap32(val), + return get_errno(safe_futex(g2h(uaddr), op, tswap32(val), pts, NULL, val3)); case FUTEX_WAKE: - return get_errno(sys_futex(g2h(uaddr), op, val, NULL, NULL, 0)); + return get_errno(safe_futex(g2h(uaddr), op, val, NULL, NULL, 0)); case FUTEX_FD: - return get_errno(sys_futex(g2h(uaddr), op, val, NULL, NULL, 0)); + return get_errno(safe_futex(g2h(uaddr), op, val, NULL, NULL, 0)); case FUTEX_REQUEUE: case FUTEX_CMP_REQUEUE: case FUTEX_WAKE_OP: @@ -5178,15 +6071,188 @@ static int do_futex(target_ulong uaddr, int op, int val, target_ulong timeout, to satisfy the compiler. We do not need to tswap TIMEOUT since it's not compared to guest memory. */ pts = (struct timespec *)(uintptr_t) timeout; - return get_errno(sys_futex(g2h(uaddr), op, val, pts, - g2h(uaddr2), - (base_op == FUTEX_CMP_REQUEUE - ? tswap32(val3) - : val3))); + return get_errno(safe_futex(g2h(uaddr), op, val, pts, + g2h(uaddr2), + (base_op == FUTEX_CMP_REQUEUE + ? tswap32(val3) + : val3))); default: return -TARGET_ENOSYS; } } +#if defined(TARGET_NR_name_to_handle_at) && defined(CONFIG_OPEN_BY_HANDLE) +static abi_long do_name_to_handle_at(abi_long dirfd, abi_long pathname, + abi_long handle, abi_long mount_id, + abi_long flags) +{ + struct file_handle *target_fh; + struct file_handle *fh; + int mid = 0; + abi_long ret; + char *name; + unsigned int size, total_size; + + if (get_user_s32(size, handle)) { + return -TARGET_EFAULT; + } + + name = lock_user_string(pathname); + if (!name) { + return -TARGET_EFAULT; + } + + total_size = sizeof(struct file_handle) + size; + target_fh = lock_user(VERIFY_WRITE, handle, total_size, 0); + if (!target_fh) { + unlock_user(name, pathname, 0); + return -TARGET_EFAULT; + } + + fh = g_malloc0(total_size); + fh->handle_bytes = size; + + ret = get_errno(name_to_handle_at(dirfd, path(name), fh, &mid, flags)); + unlock_user(name, pathname, 0); + + /* man name_to_handle_at(2): + * Other than the use of the handle_bytes field, the caller should treat + * the file_handle structure as an opaque data type + */ + + memcpy(target_fh, fh, total_size); + target_fh->handle_bytes = tswap32(fh->handle_bytes); + target_fh->handle_type = tswap32(fh->handle_type); + g_free(fh); + unlock_user(target_fh, handle, total_size); + + if (put_user_s32(mid, mount_id)) { + return -TARGET_EFAULT; + } + + return ret; + +} +#endif + +#if defined(TARGET_NR_open_by_handle_at) && defined(CONFIG_OPEN_BY_HANDLE) +static abi_long do_open_by_handle_at(abi_long mount_fd, abi_long handle, + abi_long flags) +{ + struct file_handle *target_fh; + struct file_handle *fh; + unsigned int size, total_size; + abi_long ret; + + if (get_user_s32(size, handle)) { + return -TARGET_EFAULT; + } + + total_size = sizeof(struct file_handle) + size; + target_fh = lock_user(VERIFY_READ, handle, total_size, 1); + if (!target_fh) { + return -TARGET_EFAULT; + } + + fh = g_memdup(target_fh, total_size); + fh->handle_bytes = size; + fh->handle_type = tswap32(target_fh->handle_type); + + ret = get_errno(open_by_handle_at(mount_fd, fh, + target_to_host_bitmask(flags, fcntl_flags_tbl))); + + g_free(fh); + + unlock_user(target_fh, handle, total_size); + + return ret; +} +#endif + +#if defined(TARGET_NR_signalfd) || defined(TARGET_NR_signalfd4) + +/* signalfd siginfo conversion */ + +static void +host_to_target_signalfd_siginfo(struct signalfd_siginfo *tinfo, + const struct signalfd_siginfo *info) +{ + int sig = host_to_target_signal(info->ssi_signo); + + /* linux/signalfd.h defines a ssi_addr_lsb + * not defined in sys/signalfd.h but used by some kernels + */ + +#ifdef BUS_MCEERR_AO + if (tinfo->ssi_signo == SIGBUS && + (tinfo->ssi_code == BUS_MCEERR_AR || + tinfo->ssi_code == BUS_MCEERR_AO)) { + uint16_t *ssi_addr_lsb = (uint16_t *)(&info->ssi_addr + 1); + uint16_t *tssi_addr_lsb = (uint16_t *)(&tinfo->ssi_addr + 1); + *tssi_addr_lsb = tswap16(*ssi_addr_lsb); + } +#endif + + tinfo->ssi_signo = tswap32(sig); + tinfo->ssi_errno = tswap32(tinfo->ssi_errno); + tinfo->ssi_code = tswap32(info->ssi_code); + tinfo->ssi_pid = tswap32(info->ssi_pid); + tinfo->ssi_uid = tswap32(info->ssi_uid); + tinfo->ssi_fd = tswap32(info->ssi_fd); + tinfo->ssi_tid = tswap32(info->ssi_tid); + tinfo->ssi_band = tswap32(info->ssi_band); + tinfo->ssi_overrun = tswap32(info->ssi_overrun); + tinfo->ssi_trapno = tswap32(info->ssi_trapno); + tinfo->ssi_status = tswap32(info->ssi_status); + tinfo->ssi_int = tswap32(info->ssi_int); + tinfo->ssi_ptr = tswap64(info->ssi_ptr); + tinfo->ssi_utime = tswap64(info->ssi_utime); + tinfo->ssi_stime = tswap64(info->ssi_stime); + tinfo->ssi_addr = tswap64(info->ssi_addr); +} + +static abi_long host_to_target_data_signalfd(void *buf, size_t len) +{ + int i; + + for (i = 0; i < len; i += sizeof(struct signalfd_siginfo)) { + host_to_target_signalfd_siginfo(buf + i, buf + i); + } + + return len; +} + +static TargetFdTrans target_signalfd_trans = { + .host_to_target_data = host_to_target_data_signalfd, +}; + +static abi_long do_signalfd4(int fd, abi_long mask, int flags) +{ + int host_flags; + target_sigset_t *target_mask; + sigset_t host_mask; + abi_long ret; + + if (flags & ~(TARGET_O_NONBLOCK | TARGET_O_CLOEXEC)) { + return -TARGET_EINVAL; + } + if (!lock_user_struct(VERIFY_READ, target_mask, mask, 1)) { + return -TARGET_EFAULT; + } + + target_to_host_sigset(&host_mask, target_mask); + + host_flags = target_to_host_bitmask(flags, fcntl_flags_tbl); + + ret = get_errno(signalfd(fd, &host_mask, host_flags)); + if (ret >= 0) { + fd_trans_register(ret, &target_signalfd_trans); + } + + unlock_user_struct(target_mask, mask, 0); + + return ret; +} +#endif /* Map host to target signal numbers for the wait family of syscalls. Assume all other status bits are the same. */ @@ -5219,7 +6285,9 @@ static int open_self_cmdline(void *cpu_env, int fd) nb_read = read(fd_orig, buf, sizeof(buf)); if (nb_read < 0) { + int e = errno; fd_orig = close(fd_orig); + errno = e; return -1; } else if (nb_read == 0) { break; @@ -5239,7 +6307,9 @@ static int open_self_cmdline(void *cpu_env, int fd) if (word_skipped) { if (write(fd, cp_buf, nb_read) != nb_read) { + int e = errno; close(fd_orig); + errno = e; return -1; } } @@ -5259,7 +6329,7 @@ static int open_self_maps(void *cpu_env, int fd) fp = fopen("/proc/self/maps", "r"); if (fp == NULL) { - return -EACCES; + return -1; } while ((read = getline(&line, &len, fp)) != -1) { @@ -5403,7 +6473,7 @@ static int open_net_route(void *cpu_env, int fd) fp = fopen("/proc/net/route", "r"); if (fp == NULL) { - return -EACCES; + return -1; } /* read header */ @@ -5453,7 +6523,7 @@ static int do_openat(void *cpu_env, int dirfd, const char *pathname, int flags, if (is_proc_myself(pathname, "exe")) { int execfd = qemu_getauxval(AT_EXECFD); - return execfd ? execfd : get_errno(sys_openat(dirfd, exec_path, flags, mode)); + return execfd ? execfd : safe_openat(dirfd, exec_path, flags, mode); } for (fake_open = fakes; fake_open->filename; fake_open++) { @@ -5479,7 +6549,9 @@ static int do_openat(void *cpu_env, int dirfd, const char *pathname, int flags, unlink(filename); if ((r = fake_open->fill(cpu_env, fd))) { + int e = errno; close(fd); + errno = e; return r; } lseek(fd, 0, SEEK_SET); @@ -5487,7 +6559,7 @@ static int do_openat(void *cpu_env, int dirfd, const char *pathname, int flags, return fd; } - return get_errno(sys_openat(dirfd, path(pathname), flags, mode)); + return safe_openat(dirfd, path(pathname), flags, mode); } #define TIMER_MAGIC 0x0caf0000 @@ -5525,6 +6597,21 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, struct statfs stfs; void *p; +#if defined(DEBUG_ERESTARTSYS) + /* Debug-only code for exercising the syscall-restart code paths + * in the per-architecture cpu main loops: restart every syscall + * the guest makes once before letting it through. + */ + { + static int flag; + + flag = !flag; + if (flag) { + return -TARGET_ERESTARTSYS; + } + } +#endif + #ifdef DEBUG gemu_log("syscall %d", num); #endif @@ -5555,6 +6642,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, thread_cpu = NULL; object_unref(OBJECT(cpu)); g_free(ts); + rcu_unregister_thread(); pthread_exit(NULL); } #ifdef TARGET_GPROF @@ -5570,46 +6658,68 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, else { if (!(p = lock_user(VERIFY_WRITE, arg2, arg3, 0))) goto efault; - ret = get_errno(read(arg1, p, arg3)); + ret = get_errno(safe_read(arg1, p, arg3)); + if (ret >= 0 && + fd_trans_host_to_target_data(arg1)) { + ret = fd_trans_host_to_target_data(arg1)(p, ret); + } unlock_user(p, arg2, ret); } break; case TARGET_NR_write: if (!(p = lock_user(VERIFY_READ, arg2, arg3, 1))) goto efault; - ret = get_errno(write(arg1, p, arg3)); + ret = get_errno(safe_write(arg1, p, arg3)); unlock_user(p, arg2, 0); break; +#ifdef TARGET_NR_open case TARGET_NR_open: if (!(p = lock_user_string(arg1))) goto efault; ret = get_errno(do_openat(cpu_env, AT_FDCWD, p, target_to_host_bitmask(arg2, fcntl_flags_tbl), arg3)); + fd_trans_unregister(ret); unlock_user(p, arg1, 0); break; +#endif case TARGET_NR_openat: if (!(p = lock_user_string(arg2))) goto efault; ret = get_errno(do_openat(cpu_env, arg1, p, target_to_host_bitmask(arg3, fcntl_flags_tbl), arg4)); + fd_trans_unregister(ret); unlock_user(p, arg2, 0); break; +#if defined(TARGET_NR_name_to_handle_at) && defined(CONFIG_OPEN_BY_HANDLE) + case TARGET_NR_name_to_handle_at: + ret = do_name_to_handle_at(arg1, arg2, arg3, arg4, arg5); + break; +#endif +#if defined(TARGET_NR_open_by_handle_at) && defined(CONFIG_OPEN_BY_HANDLE) + case TARGET_NR_open_by_handle_at: + ret = do_open_by_handle_at(arg1, arg2, arg3); + fd_trans_unregister(ret); + break; +#endif case TARGET_NR_close: + fd_trans_unregister(arg1); ret = get_errno(close(arg1)); break; case TARGET_NR_brk: ret = do_brk(arg1); break; +#ifdef TARGET_NR_fork case TARGET_NR_fork: ret = get_errno(do_fork(cpu_env, SIGCHLD, 0, 0, 0, 0)); break; +#endif #ifdef TARGET_NR_waitpid case TARGET_NR_waitpid: { int status; - ret = get_errno(waitpid(arg1, &status, arg3)); + ret = get_errno(safe_wait4(arg1, &status, arg3, 0)); if (!is_error(ret) && arg2 && ret && put_user_s32(host_to_target_waitstatus(status), arg2)) goto efault; @@ -5621,7 +6731,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, { siginfo_t info; info.si_pid = 0; - ret = get_errno(waitid(arg1, arg2, &info, arg4)); + ret = get_errno(safe_waitid(arg1, arg2, &info, arg4, NULL)); if (!is_error(ret) && arg3 && info.si_pid != 0) { if (!(p = lock_user(VERIFY_WRITE, arg3, sizeof(target_siginfo_t), 0))) goto efault; @@ -5636,9 +6746,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, if (!(p = lock_user_string(arg1))) goto efault; ret = get_errno(creat(p, arg2)); + fd_trans_unregister(ret); unlock_user(p, arg1, 0); break; #endif +#ifdef TARGET_NR_link case TARGET_NR_link: { void * p2; @@ -5652,6 +6764,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, unlock_user(p, arg1, 0); } break; +#endif #if defined(TARGET_NR_linkat) case TARGET_NR_linkat: { @@ -5669,12 +6782,14 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } break; #endif +#ifdef TARGET_NR_unlink case TARGET_NR_unlink: if (!(p = lock_user_string(arg1))) goto efault; ret = get_errno(unlink(p)); unlock_user(p, arg1, 0); break; +#endif #if defined(TARGET_NR_unlinkat) case TARGET_NR_unlinkat: if (!(p = lock_user_string(arg2))) @@ -5740,15 +6855,19 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } *q = NULL; - /* This case will not be caught by the host's execve() if its - page size is bigger than the target's. */ - if (total_size > MAX_ARG_PAGES * TARGET_PAGE_SIZE) { - ret = -TARGET_E2BIG; - goto execve_end; - } if (!(p = lock_user_string(arg1))) goto execve_efault; - ret = get_errno(execve(p, argp, envp)); + /* Although execve() is not an interruptible syscall it is + * a special case where we must use the safe_syscall wrapper: + * if we allow a signal to happen before we make the host + * syscall then we will 'lose' it, because at the point of + * execve the process leaves QEMU's control. So we use the + * safe syscall wrapper to ensure that we either take the + * signal as a guest signal, or else it does not happen + * before the execve completes and makes it the other + * program's problem. + */ + ret = get_errno(safe_execve(p, argp, envp)); unlock_user(p, arg1, 0); goto execve_end; @@ -5791,12 +6910,14 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } break; #endif +#ifdef TARGET_NR_mknod case TARGET_NR_mknod: if (!(p = lock_user_string(arg1))) goto efault; ret = get_errno(mknod(p, arg2, arg3)); unlock_user(p, arg1, 0); break; +#endif #if defined(TARGET_NR_mknodat) case TARGET_NR_mknodat: if (!(p = lock_user_string(arg2))) @@ -5805,12 +6926,14 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, unlock_user(p, arg2, 0); break; #endif +#ifdef TARGET_NR_chmod case TARGET_NR_chmod: if (!(p = lock_user_string(arg1))) goto efault; ret = get_errno(chmod(p, arg2)); unlock_user(p, arg1, 0); break; +#endif #ifdef TARGET_NR_break case TARGET_NR_break: goto unimplemented; @@ -5945,6 +7068,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } break; #endif +#ifdef TARGET_NR_utimes case TARGET_NR_utimes: { struct timeval *tvp, tv[2]; @@ -5963,6 +7087,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, unlock_user(p, arg1, 0); } break; +#endif #if defined(TARGET_NR_futimesat) case TARGET_NR_futimesat: { @@ -5991,12 +7116,14 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, case TARGET_NR_gtty: goto unimplemented; #endif +#ifdef TARGET_NR_access case TARGET_NR_access: if (!(p = lock_user_string(arg1))) goto efault; ret = get_errno(access(path(p), arg2)); unlock_user(p, arg1, 0); break; +#endif #if defined(TARGET_NR_faccessat) && defined(__NR_faccessat) case TARGET_NR_faccessat: if (!(p = lock_user_string(arg2))) @@ -6021,6 +7148,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, case TARGET_NR_kill: ret = get_errno(kill(arg1, target_to_host_signal(arg2))); break; +#ifdef TARGET_NR_rename case TARGET_NR_rename: { void *p2; @@ -6034,6 +7162,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, unlock_user(p, arg1, 0); } break; +#endif #if defined(TARGET_NR_renameat) case TARGET_NR_renameat: { @@ -6049,12 +7178,14 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } break; #endif +#ifdef TARGET_NR_mkdir case TARGET_NR_mkdir: if (!(p = lock_user_string(arg1))) goto efault; ret = get_errno(mkdir(p, arg2)); unlock_user(p, arg1, 0); break; +#endif #if defined(TARGET_NR_mkdirat) case TARGET_NR_mkdirat: if (!(p = lock_user_string(arg2))) @@ -6063,18 +7194,25 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, unlock_user(p, arg2, 0); break; #endif +#ifdef TARGET_NR_rmdir case TARGET_NR_rmdir: if (!(p = lock_user_string(arg1))) goto efault; ret = get_errno(rmdir(p)); unlock_user(p, arg1, 0); break; +#endif case TARGET_NR_dup: ret = get_errno(dup(arg1)); + if (ret >= 0) { + fd_trans_dup(arg1, ret); + } break; +#ifdef TARGET_NR_pipe case TARGET_NR_pipe: ret = do_pipe(cpu_env, arg1, 0, 0); break; +#endif #ifdef TARGET_NR_pipe2 case TARGET_NR_pipe2: ret = do_pipe(cpu_env, arg1, @@ -6159,14 +7297,24 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, ret = get_errno(chroot(p)); unlock_user(p, arg1, 0); break; +#ifdef TARGET_NR_ustat case TARGET_NR_ustat: goto unimplemented; +#endif +#ifdef TARGET_NR_dup2 case TARGET_NR_dup2: ret = get_errno(dup2(arg1, arg2)); + if (ret >= 0) { + fd_trans_dup(arg1, arg2); + } break; +#endif #if defined(CONFIG_DUP3) && defined(TARGET_NR_dup3) case TARGET_NR_dup3: ret = get_errno(dup3(arg1, arg2, arg3)); + if (ret >= 0) { + fd_trans_dup(arg1, arg2); + } break; #endif #ifdef TARGET_NR_getppid /* not on alpha */ @@ -6174,9 +7322,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, ret = get_errno(getppid()); break; #endif +#ifdef TARGET_NR_getpgrp case TARGET_NR_getpgrp: ret = get_errno(getpgrp()); break; +#endif case TARGET_NR_setsid: ret = get_errno(setsid()); break; @@ -6541,12 +7691,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, break; #ifdef TARGET_NR_sigreturn case TARGET_NR_sigreturn: - /* NOTE: ret is eax, so not transcoding must be done */ ret = do_sigreturn(cpu_env); break; #endif case TARGET_NR_rt_sigreturn: - /* NOTE: ret is eax, so not transcoding must be done */ ret = do_rt_sigreturn(cpu_env); break; case TARGET_NR_sethostname: @@ -6735,8 +7883,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, sig_ptr = NULL; } - ret = get_errno(sys_pselect6(n, rfds_ptr, wfds_ptr, efds_ptr, - ts_ptr, sig_ptr)); + ret = get_errno(safe_pselect6(n, rfds_ptr, wfds_ptr, efds_ptr, + ts_ptr, sig_ptr)); if (!is_error(ret)) { if (rfd_addr && copy_to_user_fdset(rfd_addr, &rfds, n)) @@ -6752,6 +7900,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } break; #endif +#ifdef TARGET_NR_symlink case TARGET_NR_symlink: { void *p2; @@ -6765,6 +7914,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, unlock_user(p, arg1, 0); } break; +#endif #if defined(TARGET_NR_symlinkat) case TARGET_NR_symlinkat: { @@ -6784,6 +7934,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, case TARGET_NR_oldlstat: goto unimplemented; #endif +#ifdef TARGET_NR_readlink case TARGET_NR_readlink: { void *p2; @@ -6814,6 +7965,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, unlock_user(p, arg1, 0); } break; +#endif #if defined(TARGET_NR_readlinkat) case TARGET_NR_readlinkat: { @@ -7153,9 +8305,20 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, ret = get_errno(shutdown(arg1, arg2)); break; #endif +#if defined(TARGET_NR_getrandom) && defined(__NR_getrandom) + case TARGET_NR_getrandom: + p = lock_user(VERIFY_WRITE, arg1, arg2, 0); + if (!p) { + goto efault; + } + ret = get_errno(getrandom(p, arg2, arg3)); + unlock_user(p, arg1, ret); + break; +#endif #ifdef TARGET_NR_socket case TARGET_NR_socket: ret = do_socket(arg1, arg2, arg3); + fd_trans_unregister(ret); break; #endif #ifdef TARGET_NR_socketpair @@ -7213,22 +8376,28 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } } break; +#ifdef TARGET_NR_stat case TARGET_NR_stat: if (!(p = lock_user_string(arg1))) goto efault; ret = get_errno(stat(path(p), &st)); unlock_user(p, arg1, 0); goto do_stat; +#endif +#ifdef TARGET_NR_lstat case TARGET_NR_lstat: if (!(p = lock_user_string(arg1))) goto efault; ret = get_errno(lstat(path(p), &st)); unlock_user(p, arg1, 0); goto do_stat; +#endif case TARGET_NR_fstat: { ret = get_errno(fstat(arg1, &st)); +#if defined(TARGET_NR_stat) || defined(TARGET_NR_lstat) do_stat: +#endif if (!is_error(ret)) { struct target_stat *target_st; @@ -7284,7 +8453,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, rusage_ptr = &rusage; else rusage_ptr = NULL; - ret = get_errno(wait4(arg1, &status, arg3, rusage_ptr)); + ret = get_errno(safe_wait4(arg1, &status, arg3, rusage_ptr)); if (!is_error(ret)) { if (status_ptr && ret) { status = host_to_target_waitstatus(status); @@ -7352,7 +8521,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_semctl case TARGET_NR_semctl: - ret = do_semctl(arg1, arg2, arg3, (union target_semun)(abi_ulong)arg4); + ret = do_semctl(arg1, arg2, arg3, arg4); break; #endif #ifdef TARGET_NR_msgctl @@ -7516,6 +8685,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } break; #endif +#ifdef TARGET_NR_getdents case TARGET_NR_getdents: #ifdef __NR_getdents #if TARGET_ABI_BITS == 32 && HOST_LONG_BITS == 64 @@ -7524,8 +8694,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, struct linux_dirent *dirp; abi_long count = arg3; - dirp = malloc(count); - if (!dirp) { + dirp = g_try_malloc(count); + if (!dirp) { ret = -TARGET_ENOMEM; goto fail; } @@ -7561,7 +8731,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, ret = count1; unlock_user(target_dirp, arg2, ret); } - free(dirp); + g_free(dirp); } #else { @@ -7646,6 +8816,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } #endif break; +#endif /* TARGET_NR_getdents */ #if defined(TARGET_NR_getdents64) && defined(__NR_getdents64) case TARGET_NR_getdents64: { @@ -7693,14 +8864,20 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, struct pollfd *pfd; unsigned int i; - target_pfd = lock_user(VERIFY_WRITE, arg1, sizeof(struct target_pollfd) * nfds, 1); - if (!target_pfd) - goto efault; + pfd = NULL; + target_pfd = NULL; + if (nfds) { + target_pfd = lock_user(VERIFY_WRITE, arg1, + sizeof(struct target_pollfd) * nfds, 1); + if (!target_pfd) { + goto efault; + } - pfd = alloca(sizeof(struct pollfd) * nfds); - for(i = 0; i < nfds; i++) { - pfd[i].fd = tswap32(target_pfd[i].fd); - pfd[i].events = tswap16(target_pfd[i].events); + pfd = alloca(sizeof(struct pollfd) * nfds); + for (i = 0; i < nfds; i++) { + pfd[i].fd = tswap32(target_pfd[i].fd); + pfd[i].events = tswap16(target_pfd[i].events); + } } # ifdef TARGET_NR_ppoll @@ -7785,11 +8962,13 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, ret = get_errno(fdatasync(arg1)); break; #endif +#ifdef TARGET_NR__sysctl case TARGET_NR__sysctl: /* We don't implement this, but ENOTDIR is always a safe return value. */ ret = -TARGET_ENOTDIR; break; +#endif case TARGET_NR_sched_getaffinity: { unsigned int mask_size; @@ -8091,14 +9270,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, break; } case TARGET_NR_sigaltstack: -#if defined(TARGET_I386) || defined(TARGET_ARM) || defined(TARGET_MIPS) || \ - defined(TARGET_SPARC) || defined(TARGET_PPC) || defined(TARGET_ALPHA) || \ - defined(TARGET_M68K) || defined(TARGET_S390X) || defined(TARGET_OPENRISC) ret = do_sigaltstack(arg1, arg2, get_sp_from_cpustate((CPUArchState *)cpu_env)); break; -#else - goto unimplemented; -#endif #ifdef CONFIG_SENDFILE case TARGET_NR_sendfile: @@ -8236,12 +9409,14 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, ret = host_to_target_stat64(cpu_env, arg3, &st); break; #endif +#ifdef TARGET_NR_lchown case TARGET_NR_lchown: if (!(p = lock_user_string(arg1))) goto efault; ret = get_errno(lchown(p, low2highuid(arg2), low2highgid(arg3))); unlock_user(p, arg1, 0); break; +#endif #ifdef TARGET_NR_getuid case TARGET_NR_getuid: ret = get_errno(high2lowuid(getuid())); @@ -8324,9 +9499,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_setresuid case TARGET_NR_setresuid: - ret = get_errno(setresuid(low2highuid(arg1), - low2highuid(arg2), - low2highuid(arg3))); + ret = get_errno(sys_setresuid(low2highuid(arg1), + low2highuid(arg2), + low2highuid(arg3))); break; #endif #ifdef TARGET_NR_getresuid @@ -8345,9 +9520,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_getresgid case TARGET_NR_setresgid: - ret = get_errno(setresgid(low2highgid(arg1), - low2highgid(arg2), - low2highgid(arg3))); + ret = get_errno(sys_setresgid(low2highgid(arg1), + low2highgid(arg2), + low2highgid(arg3))); break; #endif #ifdef TARGET_NR_getresgid @@ -8364,17 +9539,19 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } break; #endif +#ifdef TARGET_NR_chown case TARGET_NR_chown: if (!(p = lock_user_string(arg1))) goto efault; ret = get_errno(chown(p, low2highuid(arg2), low2highgid(arg3))); unlock_user(p, arg1, 0); break; +#endif case TARGET_NR_setuid: - ret = get_errno(setuid(low2highuid(arg1))); + ret = get_errno(sys_setuid(low2highuid(arg1))); break; case TARGET_NR_setgid: - ret = get_errno(setgid(low2highgid(arg1))); + ret = get_errno(sys_setgid(low2highgid(arg1))); break; case TARGET_NR_setfsuid: ret = get_errno(setfsuid(arg1)); @@ -8656,7 +9833,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_setresuid32 case TARGET_NR_setresuid32: - ret = get_errno(setresuid(arg1, arg2, arg3)); + ret = get_errno(sys_setresuid(arg1, arg2, arg3)); break; #endif #ifdef TARGET_NR_getresuid32 @@ -8675,7 +9852,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_setresgid32 case TARGET_NR_setresgid32: - ret = get_errno(setresgid(arg1, arg2, arg3)); + ret = get_errno(sys_setresgid(arg1, arg2, arg3)); break; #endif #ifdef TARGET_NR_getresgid32 @@ -8702,12 +9879,12 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_setuid32 case TARGET_NR_setuid32: - ret = get_errno(setuid(arg1)); + ret = get_errno(sys_setuid(arg1)); break; #endif #ifdef TARGET_NR_setgid32 case TARGET_NR_setgid32: - ret = get_errno(setgid(arg1)); + ret = get_errno(sys_setgid(arg1)); break; #endif #ifdef TARGET_NR_setfsuid32 @@ -9351,15 +10528,29 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, { loff_t loff_in, loff_out; loff_t *ploff_in = NULL, *ploff_out = NULL; - if(arg2) { - get_user_u64(loff_in, arg2); + if (arg2) { + if (get_user_u64(loff_in, arg2)) { + goto efault; + } ploff_in = &loff_in; } - if(arg4) { - get_user_u64(loff_out, arg2); + if (arg4) { + if (get_user_u64(loff_out, arg4)) { + goto efault; + } ploff_out = &loff_out; } ret = get_errno(splice(arg1, ploff_in, arg3, ploff_out, arg5, arg6)); + if (arg2) { + if (put_user_u64(loff_in, arg2)) { + goto efault; + } + } + if (arg4) { + if (put_user_u64(loff_out, arg4)) { + goto efault; + } + } } break; #endif @@ -9381,6 +10572,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #if defined(TARGET_NR_eventfd) case TARGET_NR_eventfd: ret = get_errno(eventfd(arg1, 0)); + fd_trans_unregister(ret); break; #endif #if defined(TARGET_NR_eventfd2) @@ -9394,6 +10586,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, host_flags |= O_CLOEXEC; } ret = get_errno(eventfd(arg1, host_flags)); + fd_trans_unregister(ret); break; } #endif @@ -9436,6 +10629,16 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, break; #endif #endif +#if defined(TARGET_NR_signalfd4) + case TARGET_NR_signalfd4: + ret = do_signalfd4(arg1, arg2, arg4); + break; +#endif +#if defined(TARGET_NR_signalfd) + case TARGET_NR_signalfd: + ret = do_signalfd4(arg1, arg2, 0); + break; +#endif #if defined(CONFIG_EPOLL) #if defined(TARGET_NR_epoll_create) case TARGET_NR_epoll_create: @@ -9707,6 +10910,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, timer_t htimer = g_posix_timers[timerid]; ret = get_errno(timer_getoverrun(htimer)); } + fd_trans_unregister(ret); break; } #endif