4 Configuring procfs
4.1 Mount options
+ 5 Filesystem behavior
+
Preface
=======
hg huge page advise flag
nh no huge page advise flag
mg mergable advise flag
+ bt - arm64 BTI guarded page
== =======================================
Note that there is no guarantee that every flag and associated mnemonic will
amount of memory dedicated to the lowest level of page
tables.
NFS_Unstable
- NFS pages sent to the server, but not yet committed to stable
- storage
+ Always zero. Previous counted pages which had been written to
+ the server, but has not been committed to stable storage.
Bounce
Memory used for block device "bounce buffers"
WritebackTmp
For more information on mount propagation see:
- Documentation/filesystems/sharedsubtree.txt
+ Documentation/filesystems/sharedsubtree.rst
3.6 /proc/<pid>/comm & /proc/<pid>/task/<tid>/comm
========= ========================================================
hidepid= Set /proc/<pid>/ access mode.
gid= Set the group authorized to learn processes information.
+ subset= Show only the specified subset of procfs.
========= ========================================================
- hidepid=0 means classic mode - everybody may access all /proc/<pid>/ directories
- (default).
-
- hidepid=1 means users may not access any /proc/<pid>/ directories but their
- own. Sensitive files like cmdline, sched*, status are now protected against
- other users. This makes it impossible to learn whether any user runs
- specific program (given the program doesn't reveal itself by its behaviour).
- As an additional bonus, as /proc/<pid>/cmdline is unaccessible for other users,
- poorly written programs passing sensitive information via program arguments are
- now protected against local eavesdroppers.
-
- hidepid=2 means hidepid=1 plus all /proc/<pid>/ will be fully invisible to other
- users. It doesn't mean that it hides a fact whether a process with a specific
- pid value exists (it can be learned by other means, e.g. by "kill -0 $PID"),
- but it hides process' uid and gid, which may be learned by stat()'ing
- /proc/<pid>/ otherwise. It greatly complicates an intruder's task of gathering
- information about running processes, whether some daemon runs with elevated
- privileges, whether other user runs some sensitive program, whether other users
- run any program at all, etc.
+ hidepid=off or hidepid=0 means classic mode - everybody may access all
+ /proc/<pid>/ directories (default).
+
+ hidepid=noaccess or hidepid=1 means users may not access any /proc/<pid>/
+ directories but their own. Sensitive files like cmdline, sched*, status are now
+ protected against other users. This makes it impossible to learn whether any
+ user runs specific program (given the program doesn't reveal itself by its
+ behaviour). As an additional bonus, as /proc/<pid>/cmdline is unaccessible for
+ other users, poorly written programs passing sensitive information via program
+ arguments are now protected against local eavesdroppers.
+
+ hidepid=invisible or hidepid=2 means hidepid=1 plus all /proc/<pid>/ will be
+ fully invisible to other users. It doesn't mean that it hides a fact whether a
+ process with a specific pid value exists (it can be learned by other means, e.g.
+ by "kill -0 $PID"), but it hides process' uid and gid, which may be learned by
+ stat()'ing /proc/<pid>/ otherwise. It greatly complicates an intruder's task of
+ gathering information about running processes, whether some daemon runs with
+ elevated privileges, whether other user runs some sensitive program, whether
+ other users run any program at all, etc.
+
+ hidepid=ptraceable or hidepid=4 means that procfs should only contain
+ /proc/<pid>/ directories that the caller can ptrace.
gid= defines a group authorized to learn processes information otherwise
prohibited by hidepid=. If you use some daemon like identd which needs to learn
information about processes information, just add identd to this group.
+
+ subset=pid hides all top level files and directories in the procfs that
+ are not related to tasks.
+
+ 5 Filesystem behavior
+ ----------------------------
+
+ Originally, before the advent of pid namepsace, procfs was a global file
+ system. It means that there was only one procfs instance in the system.
+
+ When pid namespace was added, a separate procfs instance was mounted in
+ each pid namespace. So, procfs mount options are global among all
+ mountpoints within the same namespace.
+
+ ::
+
+ # grep ^proc /proc/mounts
+ proc /proc proc rw,relatime,hidepid=2 0 0
+
+ # strace -e mount mount -o hidepid=1 -t proc proc /tmp/proc
+ mount("proc", "/tmp/proc", "proc", 0, "hidepid=1") = 0
+ +++ exited with 0 +++
+
+ # grep ^proc /proc/mounts
+ proc /proc proc rw,relatime,hidepid=2 0 0
+ proc /tmp/proc proc rw,relatime,hidepid=2 0 0
+
+ and only after remounting procfs mount options will change at all
+ mountpoints.
+
+ # mount -o remount,hidepid=1 -t proc proc /tmp/proc
+
+ # grep ^proc /proc/mounts
+ proc /proc proc rw,relatime,hidepid=1 0 0
+ proc /tmp/proc proc rw,relatime,hidepid=1 0 0
+
+ This behavior is different from the behavior of other filesystems.
+
+ The new procfs behavior is more like other filesystems. Each procfs mount
+ creates a new procfs instance. Mount options affect own procfs instance.
+ It means that it became possible to have several procfs instances
+ displaying tasks with different filtering options in one pid namespace.
+
+ # mount -o hidepid=invisible -t proc proc /proc
+ # mount -o hidepid=noaccess -t proc proc /tmp/proc
+ # grep ^proc /proc/mounts
+ proc /proc proc rw,relatime,hidepid=invisible 0 0
+ proc /tmp/proc proc rw,relatime,hidepid=noaccess 0 0
tsk->start_boottime = leader->start_boottime;
BUG_ON(!same_thread_group(leader, tsk));
- BUG_ON(has_group_leader_pid(tsk));
/*
* An exec() starts a new thread group with the
* TGID of the previous thread group. Rehash the
/* Become a process group leader with the old leader's pid.
* The old leader becomes a thread of the this thread group.
- * Note: The old leader also uses this pid until release_task
- * is called. Odd but simple and correct.
*/
- tsk->pid = leader->pid;
- change_pid(tsk, PIDTYPE_PID, task_pid(leader));
+ exchange_tids(tsk, leader);
transfer_pid(leader, tsk, PIDTYPE_TGID);
transfer_pid(leader, tsk, PIDTYPE_PGID);
transfer_pid(leader, tsk, PIDTYPE_SID);
*/
set_mm_exe_file(bprm->mm, bprm->file);
+ would_dump(bprm, bprm->file);
+
/*
* Release all of the old mmap stuff
*/
if (retval < 0)
goto out;
- would_dump(bprm, bprm->file);
-
retval = exec_binprm(bprm);
if (retval < 0)
goto out;
*
* Initial implementation of mandatory locks. SunOS turned out to be
* a rotten model, so I implemented the "obvious" semantics.
- * See 'Documentation/filesystems/mandatory-locking.txt' for details.
+ * See 'Documentation/filesystems/mandatory-locking.rst' for details.
*
* Don't allow mandatory locks on mmap()'ed files. Added simple functions to
{
struct inode *inode = NULL;
unsigned int fl_pid;
- struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
+ struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb);
fl_pid = locks_translate_pid(fl, proc_pidns);
/*
{
struct locks_iterator *iter = f->private;
struct file_lock *fl, *bfl;
- struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
+ struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb);
fl = hlist_entry(v, struct file_lock, fl_link);
* May current process learn task's sched/cmdline info (for hide_pid_min=1)
* or euid/egid (for hide_pid_min=2)?
*/
- static bool has_pid_permissions(struct pid_namespace *pid,
+ static bool has_pid_permissions(struct proc_fs_info *fs_info,
struct task_struct *task,
- int hide_pid_min)
+ enum proc_hidepid hide_pid_min)
{
- if (pid->hide_pid < hide_pid_min)
+ /*
+ * If 'hidpid' mount option is set force a ptrace check,
+ * we indicate that we are using a filesystem syscall
+ * by passing PTRACE_MODE_READ_FSCREDS
+ */
+ if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE)
+ return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
+
+ if (fs_info->hide_pid < hide_pid_min)
return true;
- if (in_group_p(pid->pid_gid))
+ if (in_group_p(fs_info->pid_gid))
return true;
return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
}
static int proc_pid_permission(struct inode *inode, int mask)
{
- struct pid_namespace *pid = proc_pid_ns(inode);
+ struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
struct task_struct *task;
bool has_perms;
task = get_proc_task(inode);
if (!task)
return -ESRCH;
- has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS);
+ has_perms = has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS);
put_task_struct(task);
if (!has_perms) {
- if (pid->hide_pid == HIDEPID_INVISIBLE) {
+ if (fs_info->hide_pid == HIDEPID_INVISIBLE) {
/*
* Let's make getdents(), stat(), and open()
* consistent with each other. If a process
static int proc_single_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
- struct pid_namespace *ns = proc_pid_ns(inode);
+ struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
struct pid *pid = proc_pid(inode);
struct task_struct *task;
int ret;
static int sched_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
- struct pid_namespace *ns = proc_pid_ns(inode);
+ struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
struct task_struct *p;
p = get_proc_task(inode);
noffsets = 0;
for (pos = kbuf; pos; pos = next_line) {
struct proc_timens_offset *off = &offsets[noffsets];
+ char clock[10];
int err;
/* Find the end of line and ensure we don't look past it */
next_line = NULL;
}
- err = sscanf(pos, "%u %lld %lu", &off->clockid,
+ err = sscanf(pos, "%9s %lld %lu", clock,
&off->val.tv_sec, &off->val.tv_nsec);
if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC)
goto out;
+
+ clock[sizeof(clock) - 1] = 0;
+ if (strcmp(clock, "monotonic") == 0 ||
+ strcmp(clock, __stringify(CLOCK_MONOTONIC)) == 0)
+ off->clockid = CLOCK_MONOTONIC;
+ else if (strcmp(clock, "boottime") == 0 ||
+ strcmp(clock, __stringify(CLOCK_BOOTTIME)) == 0)
+ off->clockid = CLOCK_BOOTTIME;
+ else
+ goto out;
+
noffsets++;
if (noffsets == ARRAY_SIZE(offsets)) {
if (next_line)
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
- struct pid_namespace *pid = proc_pid_ns(inode);
+ struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
struct task_struct *task;
generic_fillattr(inode, stat);
rcu_read_lock();
task = pid_task(proc_pid(inode), PIDTYPE_PID);
if (task) {
- if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) {
+ if (!has_pid_permissions(fs_info, task, HIDEPID_INVISIBLE)) {
rcu_read_unlock();
/*
* This doesn't prevent learning whether PID exists,
return -ENOMEM;
tp->pid = proc_pid(inode);
- tp->ns = proc_pid_ns(inode);
+ tp->ns = proc_pid_ns(inode->i_sb);
return 0;
}
{
struct task_struct *task;
unsigned tgid;
+ struct proc_fs_info *fs_info;
struct pid_namespace *ns;
struct dentry *result = ERR_PTR(-ENOENT);
if (tgid == ~0U)
goto out;
- ns = dentry->d_sb->s_fs_info;
+ fs_info = proc_sb_info(dentry->d_sb);
+ ns = fs_info->pid_ns;
rcu_read_lock();
task = find_task_by_pid_ns(tgid, ns);
if (task)
if (!task)
goto out;
+ /* Limit procfs to only ptraceable tasks */
+ if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) {
+ if (!has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS))
+ goto out_put_task;
+ }
+
result = proc_pid_instantiate(dentry, task, NULL);
+ out_put_task:
put_task_struct(task);
out:
return result;
pid = find_ge_pid(iter.tgid, ns);
if (pid) {
iter.tgid = pid_nr_ns(pid, ns);
- iter.task = pid_task(pid, PIDTYPE_PID);
- /* What we to know is if the pid we have find is the
- * pid of a thread_group_leader. Testing for task
- * being a thread_group_leader is the obvious thing
- * todo but there is a window when it fails, due to
- * the pid transfer logic in de_thread.
- *
- * So we perform the straight forward test of seeing
- * if the pid we have found is the pid of a thread
- * group leader, and don't worry if the task we have
- * found doesn't happen to be a thread group leader.
- * As we don't care in the case of readdir.
- */
- if (!iter.task || !has_group_leader_pid(iter.task)) {
+ iter.task = pid_task(pid, PIDTYPE_TGID);
+ if (!iter.task) {
iter.tgid += 1;
goto retry;
}
int proc_pid_readdir(struct file *file, struct dir_context *ctx)
{
struct tgid_iter iter;
- struct pid_namespace *ns = proc_pid_ns(file_inode(file));
+ struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb);
+ struct pid_namespace *ns = proc_pid_ns(file_inode(file)->i_sb);
loff_t pos = ctx->pos;
if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
return 0;
if (pos == TGID_OFFSET - 2) {
- struct inode *inode = d_inode(ns->proc_self);
+ struct inode *inode = d_inode(fs_info->proc_self);
if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
return 0;
ctx->pos = pos = pos + 1;
}
if (pos == TGID_OFFSET - 1) {
- struct inode *inode = d_inode(ns->proc_thread_self);
+ struct inode *inode = d_inode(fs_info->proc_thread_self);
if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
return 0;
ctx->pos = pos = pos + 1;
unsigned int len;
cond_resched();
- if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
+ if (!has_pid_permissions(fs_info, iter.task, HIDEPID_INVISIBLE))
continue;
len = snprintf(name, sizeof(name), "%u", iter.tgid);
struct task_struct *task;
struct task_struct *leader = get_proc_task(dir);
unsigned tid;
+ struct proc_fs_info *fs_info;
struct pid_namespace *ns;
struct dentry *result = ERR_PTR(-ENOENT);
if (tid == ~0U)
goto out;
- ns = dentry->d_sb->s_fs_info;
+ fs_info = proc_sb_info(dentry->d_sb);
+ ns = fs_info->pid_ns;
rcu_read_lock();
task = find_task_by_pid_ns(tid, ns);
if (task)
/* f_version caches the tgid value that the last readdir call couldn't
* return. lseek aka telldir automagically resets f_version to 0.
*/
- ns = proc_pid_ns(inode);
+ ns = proc_pid_ns(inode->i_sb);
tid = (int)file->f_version;
file->f_version = 0;
for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
return res;
}
- struct proc_fs_info {
+ struct proc_fs_opts {
int flag;
const char *str;
};
static int show_sb_opts(struct seq_file *m, struct super_block *sb)
{
- static const struct proc_fs_info fs_info[] = {
+ static const struct proc_fs_opts fs_opts[] = {
{ SB_SYNCHRONOUS, ",sync" },
{ SB_DIRSYNC, ",dirsync" },
{ SB_MANDLOCK, ",mand" },
{ SB_LAZYTIME, ",lazytime" },
{ 0, NULL }
};
- const struct proc_fs_info *fs_infop;
+ const struct proc_fs_opts *fs_infop;
- for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
+ for (fs_infop = fs_opts; fs_infop->flag; fs_infop++) {
if (sb->s_flags & fs_infop->flag)
seq_puts(m, fs_infop->str);
}
static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
{
- static const struct proc_fs_info mnt_info[] = {
+ static const struct proc_fs_opts mnt_opts[] = {
{ MNT_NOSUID, ",nosuid" },
{ MNT_NODEV, ",nodev" },
{ MNT_NOEXEC, ",noexec" },
{ MNT_RELATIME, ",relatime" },
{ 0, NULL }
};
- const struct proc_fs_info *fs_infop;
+ const struct proc_fs_opts *fs_infop;
- for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
+ for (fs_infop = mnt_opts; fs_infop->flag; fs_infop++) {
if (mnt->mnt_flags & fs_infop->flag)
seq_puts(m, fs_infop->str);
}
p->ns = ns;
p->root = root;
p->show = show;
- p->cached_event = ~0ULL;
+ INIT_LIST_HEAD(&p->cursor.mnt_list);
+ p->cursor.mnt.mnt_flags = MNT_CURSOR;
return 0;
struct seq_file *m = file->private_data;
struct proc_mounts *p = m->private;
path_put(&p->root);
+ mnt_cursor_del(p->ns, &p->cursor);
put_mnt_ns(p->ns);
return seq_release_private(inode, file);
}
extern void detach_pid(struct task_struct *task, enum pid_type);
extern void change_pid(struct task_struct *task, enum pid_type,
struct pid *pid);
+ extern void exchange_tids(struct task_struct *task, struct task_struct *old);
extern void transfer_pid(struct task_struct *old, struct task_struct *new,
enum pid_type);
struct pid_namespace;
extern struct pid_namespace init_pid_ns;
+extern int pid_max;
+extern int pid_max_min, pid_max_max;
+
/*
* look up a PID in the hash table. Must be called with the tasklist_lock
* or rcu_read_lock() held.
unsigned long (*proc_get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
} __randomize_layout;
+ /* definitions for hide_pid field */
+ enum proc_hidepid {
+ HIDEPID_OFF = 0,
+ HIDEPID_NO_ACCESS = 1,
+ HIDEPID_INVISIBLE = 2,
+ HIDEPID_NOT_PTRACEABLE = 4, /* Limit pids to only ptraceable pids */
+ };
+
+ /* definitions for proc mount option pidonly */
+ enum proc_pidonly {
+ PROC_PIDONLY_OFF = 0,
+ PROC_PIDONLY_ON = 1,
+ };
+
+ struct proc_fs_info {
+ struct pid_namespace *pid_ns;
+ struct dentry *proc_self; /* For /proc/self */
+ struct dentry *proc_thread_self; /* For /proc/thread-self */
+ kgid_t pid_gid;
+ enum proc_hidepid hide_pid;
+ enum proc_pidonly pidonly;
+ };
+
+ static inline struct proc_fs_info *proc_sb_info(struct super_block *sb)
+ {
+ return sb->s_fs_info;
+ }
+
#ifdef CONFIG_PROC_FS
typedef int (*proc_write_t)(struct file *, char *, size_t);
void *data);
extern struct pid *tgid_pidfd_to_pid(const struct file *file);
+extern int bpf_iter_init_seq_net(void *priv_data);
+extern void bpf_iter_fini_seq_net(void *priv_data);
+
#ifdef CONFIG_PROC_PID_ARCH_STATUS
/*
* The architecture which selects CONFIG_PROC_PID_ARCH_STATUS must
struct ns_common *(*get_ns)(struct ns_common *ns));
/* get the associated pid namespace for a file in procfs */
- static inline struct pid_namespace *proc_pid_ns(const struct inode *inode)
+ static inline struct pid_namespace *proc_pid_ns(struct super_block *sb)
{
- return inode->i_sb->s_fs_info;
+ return proc_sb_info(sb)->pid_ns;
}
+bool proc_ns_file(const struct file *file);
+
#endif /* _LINUX_PROC_FS_H */
* @pos: the type * to use as a loop cursor.
* @head: the head for your list.
* @member: the name of the list_head within the struct.
- * @cond...: optional lockdep expression if called from non-RCU protection.
+ * @cond: optional lockdep expression if called from non-RCU protection.
*
* This list-traversal primitive may safely run concurrently with
* the _rcu list-mutation primitives such as list_add_rcu()
WRITE_ONCE(old->pprev, LIST_POISON2);
}
+ /**
+ * hlists_swap_heads_rcu - swap the lists the hlist heads point to
+ * @left: The hlist head on the left
+ * @right: The hlist head on the right
+ *
+ * The lists start out as [@left ][node1 ... ] and
+ [@right ][node2 ... ]
+ * The lists end up as [@left ][node2 ... ]
+ * [@right ][node1 ... ]
+ */
+ static inline void hlists_swap_heads_rcu(struct hlist_head *left, struct hlist_head *right)
+ {
+ struct hlist_node *node1 = left->first;
+ struct hlist_node *node2 = right->first;
+
+ rcu_assign_pointer(left->first, node2);
+ rcu_assign_pointer(right->first, node1);
+ WRITE_ONCE(node2->pprev, &left->first);
+ WRITE_ONCE(node1->pprev, &right->first);
+ }
+
/*
* return the first or the next element in an RCU protected hlist
*/
* @pos: the type * to use as a loop cursor.
* @head: the head for your list.
* @member: the name of the hlist_node within the struct.
- * @cond...: optional lockdep expression if called from non-RCU protection.
+ * @cond: optional lockdep expression if called from non-RCU protection.
*
* This list-traversal primitive may safely run concurrently with
* the _rcu list-mutation primitives such as hlist_add_head_rcu()
#include <linux/thread_info.h>
#include <linux/stackleak.h>
#include <linux/kasan.h>
+#include <linux/scs.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
void free_task(struct task_struct *tsk)
{
+ scs_release(tsk);
+
#ifndef CONFIG_THREAD_INFO_IN_TASK
/*
* The task is finally done with both the stack and thread_info,
NULL, free_vm_stack_cache);
#endif
+ scs_init();
+
lockdep_init_task(&init_task);
uprobes_init();
}
if (err)
goto free_stack;
+ err = scs_prepare(tsk, node);
+ if (err)
+ goto free_stack;
+
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
p->rcu_tasks_idle_cpu = -1;
#endif /* #ifdef CONFIG_TASKS_RCU */
+#ifdef CONFIG_TASKS_TRACE_RCU
+ p->trc_reader_nesting = 0;
+ p->trc_reader_special.s = 0;
+ INIT_LIST_HEAD(&p->trc_holdout_list);
+#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
}
struct pid *pidfd_pid(const struct file *file)
pid_t nr = -1;
if (likely(pid_has_task(pid, PIDTYPE_PID))) {
- ns = proc_pid_ns(file_inode(m->file));
+ ns = proc_pid_ns(file_inode(m->file)->i_sb);
nr = pid_nr_ns(pid, ns);
}
int __user *child_tidptr)
{
struct kernel_clone_args args = {
- .flags = (clone_flags & ~CSIGNAL),
+ .flags = (lower_32_bits(clone_flags) & ~CSIGNAL),
.pidfd = parent_tidptr,
.child_tid = child_tidptr,
.parent_tid = parent_tidptr,
- .exit_signal = (clone_flags & CSIGNAL),
+ .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL),
.stack = stack_start,
.stack_size = stack_size,
};
pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
struct kernel_clone_args args = {
- .flags = ((flags | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL),
- .exit_signal = (flags & CSIGNAL),
+ .flags = ((lower_32_bits(flags) | CLONE_VM |
+ CLONE_UNTRACED) & ~CSIGNAL),
+ .exit_signal = (lower_32_bits(flags) & CSIGNAL),
.stack = (unsigned long)fn,
.stack_size = (unsigned long)arg,
};
#endif
{
struct kernel_clone_args args = {
- .flags = (clone_flags & ~CSIGNAL),
+ .flags = (lower_32_bits(clone_flags) & ~CSIGNAL),
.pidfd = parent_tidptr,
.child_tid = child_tidptr,
.parent_tid = parent_tidptr,
- .exit_signal = (clone_flags & CSIGNAL),
+ .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL),
.stack = newsp,
.tls = tls,
};
struct clone_args args;
pid_t *kset_tid = kargs->set_tid;
+ BUILD_BUG_ON(offsetofend(struct clone_args, tls) !=
+ CLONE_ARGS_SIZE_VER0);
+ BUILD_BUG_ON(offsetofend(struct clone_args, set_tid_size) !=
+ CLONE_ARGS_SIZE_VER1);
+ BUILD_BUG_ON(offsetofend(struct clone_args, cgroup) !=
+ CLONE_ARGS_SIZE_VER2);
+ BUILD_BUG_ON(sizeof(struct clone_args) != CLONE_ARGS_SIZE_VER2);
+
if (unlikely(usize > PAGE_SIZE))
return -E2BIG;
if (unlikely(usize < CLONE_ARGS_SIZE_VER0))
!valid_signal(args.exit_signal)))
return -EINVAL;
- if ((args.flags & CLONE_INTO_CGROUP) && args.cgroup < 0)
+ if ((args.flags & CLONE_INTO_CGROUP) &&
+ (args.cgroup > INT_MAX || usize < CLONE_ARGS_SIZE_VER2))
return -EINVAL;
*kargs = (struct kernel_clone_args){