/* DYNPTR points to xdp_buff */
DYNPTR_TYPE_XDP = BIT(16 + BPF_BASE_TYPE_BITS),
+ /* Memory must be aligned on some architectures, used in combination with
+ * MEM_FIXED_SIZE.
+ */
+ MEM_ALIGNED = BIT(17 + BPF_BASE_TYPE_BITS),
+
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
ARG_ANYTHING, /* any (initialized) argument is ok */
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
- ARG_PTR_TO_INT, /* pointer to int */
- ARG_PTR_TO_LONG, /* pointer to long */
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */
ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */
ARG_PTR_TO_RINGBUF_MEM, /* pointer to dynamically reserved ringbuf memory */
ARG_PTR_TO_STACK, /* pointer to stack */
ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */
ARG_PTR_TO_TIMER, /* pointer to bpf_timer */
- ARG_PTR_TO_KPTR, /* pointer to referenced kptr */
+ ARG_KPTR_XCHG_DEST, /* pointer to destination that kptrs are bpf_kptr_xchg'd into */
ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */
__BPF_ARG_TYPE_MAX,
bool gpl_only;
bool pkt_access;
bool might_sleep;
- /* set to true if helper follows contract for gcc/llvm
- * attribute no_caller_saved_registers:
+ /* set to true if helper follows contract for llvm
+ * attribute bpf_fastcall:
* - void functions do not scratch r0
* - functions taking N arguments scratch only registers r1-rN
*/
- bool allow_nocsr;
+ bool allow_fastcall;
enum bpf_return_type ret_type;
union {
struct {
struct bpf_insn_access_aux *info);
int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
const struct bpf_prog *prog);
+ int (*gen_epilogue)(struct bpf_insn *insn, const struct bpf_prog *prog,
+ s16 ctx_stack_off);
int (*gen_ld_abs)(const struct bpf_insn *orig,
struct bpf_insn *insn_buf);
u32 (*convert_ctx_access)(enum bpf_access_type type,
struct bpf_map *bpf_map_get(u32 ufd);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
- struct bpf_map *__bpf_map_get(struct fd f);
+
+ static inline struct bpf_map *__bpf_map_get(struct fd f)
+ {
+ if (fd_empty(f))
+ return ERR_PTR(-EBADF);
+ if (unlikely(fd_file(f)->f_op != &bpf_map_fops))
+ return ERR_PTR(-EINVAL);
+ return fd_file(f)->private_data;
+ }
+
void bpf_map_inc(struct bpf_map *map);
void bpf_map_inc_with_uref(struct bpf_map *map);
struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref);
extern const struct bpf_func_proto bpf_get_current_comm_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto;
extern const struct bpf_func_proto bpf_get_stack_proto;
+extern const struct bpf_func_proto bpf_get_stack_sleepable_proto;
extern const struct bpf_func_proto bpf_get_task_stack_proto;
+extern const struct bpf_func_proto bpf_get_task_stack_sleepable_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto_pe;
extern const struct bpf_func_proto bpf_get_stack_proto_pe;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto;
extern const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto;
+extern const struct bpf_func_proto bpf_current_task_under_cgroup_proto;
extern const struct bpf_func_proto bpf_msg_redirect_hash_proto;
extern const struct bpf_func_proto bpf_msg_redirect_map_proto;
extern const struct bpf_func_proto bpf_sk_redirect_hash_proto;
LSM_HOOK(int, 0, syslog, int type)
LSM_HOOK(int, 0, settime, const struct timespec64 *ts,
const struct timezone *tz)
-LSM_HOOK(int, 1, vm_enough_memory, struct mm_struct *mm, long pages)
+LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages)
LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm)
LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file)
LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm)
unsigned int obj_type)
LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode)
LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode)
+LSM_HOOK(void, LSM_RET_VOID, inode_free_security_rcu, void *inode_security)
LSM_HOOK(int, -EOPNOTSUPP, inode_init_security, struct inode *inode,
struct inode *dir, const struct qstr *qstr, struct xattr *xattrs,
int *xattr_count)
LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new)
LSM_HOOK(int, -EOPNOTSUPP, inode_copy_up_xattr, struct dentry *src,
const char *name)
+LSM_HOOK(int, 0, inode_setintegrity, const struct inode *inode,
+ enum lsm_integrity_type type, const void *value, size_t size)
LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir,
struct kernfs_node *kn)
LSM_HOOK(int, 0, file_permission, struct file *file, int mask)
LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_dec, void)
LSM_HOOK(void, LSM_RET_VOID, req_classify_flow, const struct request_sock *req,
struct flowi_common *flic)
-LSM_HOOK(int, 0, tun_dev_alloc_security, void **security)
-LSM_HOOK(void, LSM_RET_VOID, tun_dev_free_security, void *security)
+LSM_HOOK(int, 0, tun_dev_alloc_security, void *security)
LSM_HOOK(int, 0, tun_dev_create, void)
LSM_HOOK(int, 0, tun_dev_attach_queue, void *security)
LSM_HOOK(int, 0, tun_dev_attach, struct sock *sk, void *security)
LSM_HOOK(int, 0, ib_pkey_access, void *sec, u64 subnet_prefix, u16 pkey)
LSM_HOOK(int, 0, ib_endport_manage_subnet, void *sec, const char *dev_name,
u8 port_num)
-LSM_HOOK(int, 0, ib_alloc_security, void **sec)
-LSM_HOOK(void, LSM_RET_VOID, ib_free_security, void *sec)
+LSM_HOOK(int, 0, ib_alloc_security, void *sec)
#endif /* CONFIG_SECURITY_INFINIBAND */
#ifdef CONFIG_SECURITY_NETWORK_XFRM
#ifdef CONFIG_KEYS
LSM_HOOK(int, 0, key_alloc, struct key *key, const struct cred *cred,
unsigned long flags)
-LSM_HOOK(void, LSM_RET_VOID, key_free, struct key *key)
LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred,
enum key_need_perm need_perm)
LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **buffer)
struct bpf_token *token)
LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog)
LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr,
- struct path *path)
+ const struct path *path)
LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token)
LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd)
LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap)
#ifdef CONFIG_PERF_EVENTS
LSM_HOOK(int, 0, perf_event_open, struct perf_event_attr *attr, int type)
LSM_HOOK(int, 0, perf_event_alloc, struct perf_event *event)
-LSM_HOOK(void, LSM_RET_VOID, perf_event_free, struct perf_event *event)
LSM_HOOK(int, 0, perf_event_read, struct perf_event *event)
LSM_HOOK(int, 0, perf_event_write, struct perf_event *event)
#endif /* CONFIG_PERF_EVENTS */
LSM_HOOK(int, 0, uring_sqpoll, void)
LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd)
#endif /* CONFIG_IO_URING */
+
+LSM_HOOK(void, LSM_RET_VOID, initramfs_populated, void)
+
+LSM_HOOK(int, 0, bdev_alloc_security, struct block_device *bdev)
+LSM_HOOK(void, LSM_RET_VOID, bdev_free_security, struct block_device *bdev)
+LSM_HOOK(int, 0, bdev_setintegrity, struct block_device *bdev,
+ enum lsm_integrity_type type, const void *value, size_t size)
LSM_POLICY_CHANGE,
};
+struct dm_verity_digest {
+ const char *alg;
+ const u8 *digest;
+ size_t digest_len;
+};
+
+enum lsm_integrity_type {
+ LSM_INT_DMVERITY_SIG_VALID,
+ LSM_INT_DMVERITY_ROOTHASH,
+ LSM_INT_FSVERITY_BUILTINSIG_VALID,
+};
+
/*
* These are reasons that can be passed to the security_locked_down()
* LSM hook. Lockdown reasons that protect kernel integrity (ie, the
void security_inode_getsecid(struct inode *inode, u32 *secid);
int security_inode_copy_up(struct dentry *src, struct cred **new);
int security_inode_copy_up_xattr(struct dentry *src, const char *name);
+int security_inode_setintegrity(const struct inode *inode,
+ enum lsm_integrity_type type, const void *value,
+ size_t size);
int security_kernfs_init_security(struct kernfs_node *kn_dir,
struct kernfs_node *kn);
int security_file_permission(struct file *file, int mask);
int security_locked_down(enum lockdown_reason what);
int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, u32 *uctx_len,
void *val, size_t val_len, u64 id, u64 flags);
+int security_bdev_alloc(struct block_device *bdev);
+void security_bdev_free(struct block_device *bdev);
+int security_bdev_setintegrity(struct block_device *bdev,
+ enum lsm_integrity_type type, const void *value,
+ size_t size);
#else /* CONFIG_SECURITY */
static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data)
static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
{
- return __vm_enough_memory(mm, pages, cap_vm_enough_memory(mm, pages));
+ return __vm_enough_memory(mm, pages, !cap_vm_enough_memory(mm, pages));
}
static inline int security_bprm_creds_for_exec(struct linux_binprm *bprm)
return 0;
}
+static inline int security_inode_setintegrity(const struct inode *inode,
+ enum lsm_integrity_type type,
+ const void *value, size_t size)
+{
+ return 0;
+}
+
static inline int security_kernfs_init_security(struct kernfs_node *kn_dir,
struct kernfs_node *kn)
{
{
return -EOPNOTSUPP;
}
+
+static inline int security_bdev_alloc(struct block_device *bdev)
+{
+ return 0;
+}
+
+static inline void security_bdev_free(struct block_device *bdev)
+{
+}
+
+static inline int security_bdev_setintegrity(struct block_device *bdev,
+ enum lsm_integrity_type type,
+ const void *value, size_t size)
+{
+ return 0;
+}
+
#endif /* CONFIG_SECURITY */
#if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE)
const char *target,
const struct inode_operations *iops);
extern void securityfs_remove(struct dentry *dentry);
+extern void securityfs_recursive_remove(struct dentry *dentry);
#else /* CONFIG_SECURITYFS */
struct bpf_token *token);
extern void security_bpf_prog_free(struct bpf_prog *prog);
extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
- struct path *path);
+ const struct path *path);
extern void security_bpf_token_free(struct bpf_token *token);
extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
extern int security_bpf_token_capable(const struct bpf_token *token, int cap);
{ }
static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
- struct path *path)
+ const struct path *path)
{
return 0;
}
#endif /* CONFIG_SECURITY */
#endif /* CONFIG_IO_URING */
+#ifdef CONFIG_SECURITY
+extern void security_initramfs_populated(void);
+#else
+static inline void security_initramfs_populated(void)
+{
+}
+#endif /* CONFIG_SECURITY */
+
#endif /* ! __LINUX_SECURITY_H */
BTF_KFUNC_HOOK_TRACING,
BTF_KFUNC_HOOK_SYSCALL,
BTF_KFUNC_HOOK_FMODRET,
- BTF_KFUNC_HOOK_CGROUP_SKB,
+ BTF_KFUNC_HOOK_CGROUP,
BTF_KFUNC_HOOK_SCHED_ACT,
BTF_KFUNC_HOOK_SK_SKB,
BTF_KFUNC_HOOK_SOCKET_FILTER,
return NULL;
}
-static bool __btf_name_valid(const struct btf *btf, u32 offset)
+static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
{
/* offset must be valid */
const char *src = btf_str_by_offset(btf, offset);
return !*src;
}
-static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
-{
- return __btf_name_valid(btf, offset);
-}
-
/* Allow any printable character in DATASEC names */
static bool btf_name_valid_section(const struct btf *btf, u32 offset)
{
const char *src = btf_str_by_offset(btf, offset);
const char *src_limit;
+ if (!*src)
+ return false;
+
/* set a limit on identifier length */
src_limit = src + KSYM_NAME_LEN;
- src++;
while (*src && src < src_limit) {
if (!isprint(*src))
return false;
return -EINVAL;
}
+/* Callers have to ensure the life cycle of btf if it is program BTF */
static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
struct btf_field_info *info)
{
field->kptr.dtor = NULL;
id = info->kptr.type_id;
kptr_btf = (struct btf *)btf;
- btf_get(kptr_btf);
goto found_dtor;
}
if (id < 0)
}
if (!t->name_off ||
- !__btf_name_valid(env->btf, t->name_off)) {
+ !btf_name_valid_identifier(env->btf, t->name_off)) {
btf_verifier_log_type(env, t, "Invalid name");
return -EINVAL;
}
static struct btf_struct_metas *
btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
{
- union {
- struct btf_id_set set;
- struct {
- u32 _cnt;
- u32 _ids[ARRAY_SIZE(alloc_obj_fields)];
- } _arr;
- } aof;
struct btf_struct_metas *tab = NULL;
+ struct btf_id_set *aof;
int i, n, id, ret;
BUILD_BUG_ON(offsetof(struct btf_id_set, cnt) != 0);
BUILD_BUG_ON(sizeof(struct btf_id_set) != sizeof(u32));
- memset(&aof, 0, sizeof(aof));
+ aof = kmalloc(sizeof(*aof), GFP_KERNEL | __GFP_NOWARN);
+ if (!aof)
+ return ERR_PTR(-ENOMEM);
+ aof->cnt = 0;
+
for (i = 0; i < ARRAY_SIZE(alloc_obj_fields); i++) {
/* Try to find whether this special type exists in user BTF, and
* if so remember its ID so we can easily find it among members
* of structs that we iterate in the next loop.
*/
+ struct btf_id_set *new_aof;
+
id = btf_find_by_name_kind(btf, alloc_obj_fields[i], BTF_KIND_STRUCT);
if (id < 0)
continue;
- aof.set.ids[aof.set.cnt++] = id;
+
+ new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!new_aof) {
+ ret = -ENOMEM;
+ goto free_aof;
+ }
+ aof = new_aof;
+ aof->ids[aof->cnt++] = id;
+ }
+
+ n = btf_nr_types(btf);
+ for (i = 1; i < n; i++) {
+ /* Try to find if there are kptrs in user BTF and remember their ID */
+ struct btf_id_set *new_aof;
+ struct btf_field_info tmp;
+ const struct btf_type *t;
+
+ t = btf_type_by_id(btf, i);
+ if (!t) {
+ ret = -EINVAL;
+ goto free_aof;
+ }
+
+ ret = btf_find_kptr(btf, t, 0, 0, &tmp);
+ if (ret != BTF_FIELD_FOUND)
+ continue;
+
+ new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!new_aof) {
+ ret = -ENOMEM;
+ goto free_aof;
+ }
+ aof = new_aof;
+ aof->ids[aof->cnt++] = i;
}
- if (!aof.set.cnt)
+ if (!aof->cnt) {
+ kfree(aof);
return NULL;
- sort(&aof.set.ids, aof.set.cnt, sizeof(aof.set.ids[0]), btf_id_cmp_func, NULL);
+ }
+ sort(&aof->ids, aof->cnt, sizeof(aof->ids[0]), btf_id_cmp_func, NULL);
- n = btf_nr_types(btf);
for (i = 1; i < n; i++) {
struct btf_struct_metas *new_tab;
const struct btf_member *member;
int j, tab_cnt;
t = btf_type_by_id(btf, i);
- if (!t) {
- ret = -EINVAL;
- goto free;
- }
if (!__btf_type_is_struct(t))
continue;
cond_resched();
for_each_member(j, t, member) {
- if (btf_id_set_contains(&aof.set, member->type))
+ if (btf_id_set_contains(aof, member->type))
goto parse;
}
continue;
type = &tab->types[tab->cnt];
type->btf_id = i;
record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
- BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT, t->size);
+ BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT |
+ BPF_KPTR, t->size);
/* The record cannot be unset, treat it as an error if so */
if (IS_ERR_OR_NULL(record)) {
ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
type->record = record;
tab->cnt++;
}
+ kfree(aof);
return tab;
free:
btf_struct_metas_free(tab);
+free_aof:
+ kfree(aof);
return ERR_PTR(ret);
}
btf->kernel_btf = true;
snprintf(btf->name, sizeof(btf->name), "%s", module_name);
- btf->data = kvmalloc(data_size, GFP_KERNEL | __GFP_NOWARN);
+ btf->data = kvmemdup(data, data_size, GFP_KERNEL | __GFP_NOWARN);
if (!btf->data) {
err = -ENOMEM;
goto errout;
}
- memcpy(btf->data, data, data_size);
btf->data_size = data_size;
err = btf_parse_hdr(env);
errout:
btf_verifier_env_free(env);
- if (base_btf != vmlinux_btf)
+ if (!IS_ERR(base_btf) && base_btf != vmlinux_btf)
btf_free(base_btf);
if (btf) {
kvfree(btf->data);
if (prog_args_trusted(prog))
info->reg_type |= PTR_TRUSTED;
+ if (btf_param_match_suffix(btf, &args[arg], "__nullable"))
+ info->reg_type |= PTR_MAYBE_NULL;
+
if (tgt_prog) {
enum bpf_prog_type tgt_type;
struct btf *btf_get_by_fd(int fd)
{
struct btf *btf;
- struct fd f;
+ CLASS(fd, f)(fd);
- f = fdget(fd);
-
- if (!fd_file(f))
+ if (fd_empty(f))
return ERR_PTR(-EBADF);
- if (fd_file(f)->f_op != &btf_fops) {
- fdput(f);
+ if (fd_file(f)->f_op != &btf_fops)
return ERR_PTR(-EINVAL);
- }
btf = fd_file(f)->private_data;
refcount_inc(&btf->refcnt);
- fdput(f);
return btf;
}
BTF_TRACING_TYPE_xxx
#undef BTF_TRACING_TYPE
+/* Validate well-formedness of iter argument type.
+ * On success, return positive BTF ID of iter state's STRUCT type.
+ * On error, negative error is returned.
+ */
+int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx)
+{
+ const struct btf_param *arg;
+ const struct btf_type *t;
+ const char *name;
+ int btf_id;
+
+ if (btf_type_vlen(func) <= arg_idx)
+ return -EINVAL;
+
+ arg = &btf_params(func)[arg_idx];
+ t = btf_type_skip_modifiers(btf, arg->type, NULL);
+ if (!t || !btf_type_is_ptr(t))
+ return -EINVAL;
+ t = btf_type_skip_modifiers(btf, t->type, &btf_id);
+ if (!t || !__btf_type_is_struct(t))
+ return -EINVAL;
+
+ name = btf_name_by_offset(btf, t->name_off);
+ if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1))
+ return -EINVAL;
+
+ return btf_id;
+}
+
static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name,
const struct btf_type *func, u32 func_flags)
{
u32 flags = func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
- const char *name, *sfx, *iter_name;
- const struct btf_param *arg;
+ const char *sfx, *iter_name;
const struct btf_type *t;
char exp_name[128];
u32 nr_args;
+ int btf_id;
/* exactly one of KF_ITER_{NEW,NEXT,DESTROY} can be set */
if (!flags || (flags & (flags - 1)))
if (nr_args < 1)
return -EINVAL;
- arg = &btf_params(func)[0];
- t = btf_type_skip_modifiers(btf, arg->type, NULL);
- if (!t || !btf_type_is_ptr(t))
- return -EINVAL;
- t = btf_type_skip_modifiers(btf, t->type, NULL);
- if (!t || !__btf_type_is_struct(t))
- return -EINVAL;
-
- name = btf_name_by_offset(btf, t->name_off);
- if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1))
- return -EINVAL;
+ btf_id = btf_check_iter_arg(btf, func, 0);
+ if (btf_id < 0)
+ return btf_id;
/* sizeof(struct bpf_iter_<type>) should be a multiple of 8 to
* fit nicely in stack slots
*/
+ t = btf_type_by_id(btf, btf_id);
if (t->size == 0 || (t->size % 8))
return -EINVAL;
/* validate bpf_iter_<type>_{new,next,destroy}(struct bpf_iter_<type> *)
* naming pattern
*/
- iter_name = name + sizeof(ITER_PREFIX) - 1;
+ iter_name = btf_name_by_offset(btf, t->name_off) + sizeof(ITER_PREFIX) - 1;
if (flags & KF_ITER_NEW)
sfx = "new";
else if (flags & KF_ITER_NEXT)
case BPF_PROG_TYPE_STRUCT_OPS:
return BTF_KFUNC_HOOK_STRUCT_OPS;
case BPF_PROG_TYPE_TRACING:
+ case BPF_PROG_TYPE_TRACEPOINT:
+ case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_LSM:
return BTF_KFUNC_HOOK_TRACING;
case BPF_PROG_TYPE_SYSCALL:
return BTF_KFUNC_HOOK_SYSCALL;
case BPF_PROG_TYPE_CGROUP_SKB:
+ case BPF_PROG_TYPE_CGROUP_SOCK:
+ case BPF_PROG_TYPE_CGROUP_DEVICE:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
- return BTF_KFUNC_HOOK_CGROUP_SKB;
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+ case BPF_PROG_TYPE_CGROUP_SYSCTL:
+ return BTF_KFUNC_HOOK_CGROUP;
case BPF_PROG_TYPE_SCHED_ACT:
return BTF_KFUNC_HOOK_SCHED_ACT;
case BPF_PROG_TYPE_SK_SKB:
struct bpf_core_cand_list cands = {};
struct bpf_core_relo_res targ_res;
struct bpf_core_spec *specs;
+ const struct btf_type *type;
int err;
/* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5"
if (!specs)
return -ENOMEM;
+ type = btf_type_by_id(ctx->btf, relo->type_id);
+ if (!type) {
+ bpf_log(ctx->log, "relo #%u: bad type id %u\n",
+ relo_idx, relo->type_id);
+ return -EINVAL;
+ }
+
if (need_cands) {
struct bpf_cand_cache *cc;
int i;
case BPF_KPTR_PERCPU:
if (rec->fields[i].kptr.module)
module_put(rec->fields[i].kptr.module);
- btf_put(rec->fields[i].kptr.btf);
+ if (btf_is_kernel(rec->fields[i].kptr.btf))
+ btf_put(rec->fields[i].kptr.btf);
break;
case BPF_LIST_HEAD:
case BPF_LIST_NODE:
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
- btf_get(fields[i].kptr.btf);
+ if (btf_is_kernel(fields[i].kptr.btf))
+ btf_get(fields[i].kptr.btf);
if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) {
ret = -ENXIO;
goto free;
}
}
-/* called from workqueue */
-static void bpf_map_free_deferred(struct work_struct *work)
+static void bpf_map_free(struct bpf_map *map)
{
- struct bpf_map *map = container_of(work, struct bpf_map, work);
struct btf_record *rec = map->record;
struct btf *btf = map->btf;
- security_bpf_map_free(map);
- bpf_map_release_memcg(map);
/* implementation dependent freeing */
map->ops->map_free(map);
/* Delay freeing of btf_record for maps, as map_free
btf_put(btf);
}
+/* called from workqueue */
+static void bpf_map_free_deferred(struct work_struct *work)
+{
+ struct bpf_map *map = container_of(work, struct bpf_map, work);
+
+ security_bpf_map_free(map);
+ bpf_map_release_memcg(map);
+ bpf_map_free(map);
+}
+
static void bpf_map_put_uref(struct bpf_map *map)
{
if (atomic64_dec_and_test(&map->usercnt)) {
free_map_sec:
security_bpf_map_free(map);
free_map:
- btf_put(map->btf);
- map->ops->map_free(map);
+ bpf_map_free(map);
put_token:
bpf_token_put(token);
return err;
}
- /* if error is returned, fd is released.
- * On success caller should complete fd access with matching fdput()
- */
- struct bpf_map *__bpf_map_get(struct fd f)
- {
- if (!fd_file(f))
- return ERR_PTR(-EBADF);
- if (fd_file(f)->f_op != &bpf_map_fops) {
- fdput(f);
- return ERR_PTR(-EINVAL);
- }
-
- return fd_file(f)->private_data;
- }
-
void bpf_map_inc(struct bpf_map *map)
{
atomic64_inc(&map->refcnt);
struct bpf_map *bpf_map_get(u32 ufd)
{
- struct fd f = fdget(ufd);
- struct bpf_map *map;
+ CLASS(fd, f)(ufd);
+ struct bpf_map *map = __bpf_map_get(f);
- map = __bpf_map_get(f);
- if (IS_ERR(map))
- return map;
-
- bpf_map_inc(map);
- fdput(f);
+ if (!IS_ERR(map))
+ bpf_map_inc(map);
return map;
}
struct bpf_map *bpf_map_get_with_uref(u32 ufd)
{
- struct fd f = fdget(ufd);
- struct bpf_map *map;
+ CLASS(fd, f)(ufd);
+ struct bpf_map *map = __bpf_map_get(f);
- map = __bpf_map_get(f);
- if (IS_ERR(map))
- return map;
-
- bpf_map_inc_with_uref(map);
- fdput(f);
+ if (!IS_ERR(map))
+ bpf_map_inc_with_uref(map);
return map;
}
{
void __user *ukey = u64_to_user_ptr(attr->key);
void __user *uvalue = u64_to_user_ptr(attr->value);
- int ufd = attr->map_fd;
struct bpf_map *map;
void *key, *value;
u32 value_size;
- struct fd f;
int err;
if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
if (attr->flags & ~BPF_F_LOCK)
return -EINVAL;
- f = fdget(ufd);
+ CLASS(fd, f)(attr->map_fd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
- if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
- err = -EPERM;
- goto err_put;
- }
+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ))
+ return -EPERM;
if ((attr->flags & BPF_F_LOCK) &&
- !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
- err = -EINVAL;
- goto err_put;
- }
+ !btf_record_has_field(map->record, BPF_SPIN_LOCK))
+ return -EINVAL;
key = __bpf_copy_key(ukey, map->key_size);
- if (IS_ERR(key)) {
- err = PTR_ERR(key);
- goto err_put;
- }
+ if (IS_ERR(key))
+ return PTR_ERR(key);
value_size = bpf_map_value_size(map);
kvfree(value);
free_key:
kvfree(key);
- err_put:
- fdput(f);
return err;
}
{
bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel);
bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel);
- int ufd = attr->map_fd;
struct bpf_map *map;
void *key, *value;
u32 value_size;
- struct fd f;
int err;
if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
return -EINVAL;
- f = fdget(ufd);
+ CLASS(fd, f)(attr->map_fd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
kvfree(key);
err_put:
bpf_map_write_active_dec(map);
- fdput(f);
return err;
}
static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr)
{
bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel);
- int ufd = attr->map_fd;
struct bpf_map *map;
- struct fd f;
void *key;
int err;
if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
return -EINVAL;
- f = fdget(ufd);
+ CLASS(fd, f)(attr->map_fd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
kvfree(key);
err_put:
bpf_map_write_active_dec(map);
- fdput(f);
return err;
}
{
void __user *ukey = u64_to_user_ptr(attr->key);
void __user *unext_key = u64_to_user_ptr(attr->next_key);
- int ufd = attr->map_fd;
struct bpf_map *map;
void *key, *next_key;
- struct fd f;
int err;
if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
return -EINVAL;
- f = fdget(ufd);
+ CLASS(fd, f)(attr->map_fd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
- if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
- err = -EPERM;
- goto err_put;
- }
+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ))
+ return -EPERM;
if (ukey) {
key = __bpf_copy_key(ukey, map->key_size);
- if (IS_ERR(key)) {
- err = PTR_ERR(key);
- goto err_put;
- }
+ if (IS_ERR(key))
+ return PTR_ERR(key);
} else {
key = NULL;
}
kvfree(next_key);
free_key:
kvfree(key);
- err_put:
- fdput(f);
return err;
}
{
void __user *ukey = u64_to_user_ptr(attr->key);
void __user *uvalue = u64_to_user_ptr(attr->value);
- int ufd = attr->map_fd;
struct bpf_map *map;
void *key, *value;
u32 value_size;
- struct fd f;
int err;
if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
if (attr->flags & ~BPF_F_LOCK)
return -EINVAL;
- f = fdget(ufd);
+ CLASS(fd, f)(attr->map_fd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
kvfree(key);
err_put:
bpf_map_write_active_dec(map);
- fdput(f);
return err;
}
static int map_freeze(const union bpf_attr *attr)
{
- int err = 0, ufd = attr->map_fd;
+ int err = 0;
struct bpf_map *map;
- struct fd f;
if (CHECK_ATTR(BPF_MAP_FREEZE))
return -EINVAL;
- f = fdget(ufd);
+ CLASS(fd, f)(attr->map_fd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
- if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record)) {
- fdput(f);
+ if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record))
return -ENOTSUPP;
- }
- if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
- fdput(f);
+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE))
return -EPERM;
- }
mutex_lock(&map->freeze_mutex);
if (bpf_map_write_active(map)) {
WRITE_ONCE(map->frozen, true);
err_put:
mutex_unlock(&map->freeze_mutex);
- fdput(f);
return err;
}
O_RDWR | O_CLOEXEC);
}
- static struct bpf_prog *____bpf_prog_get(struct fd f)
- {
- if (!fd_file(f))
- return ERR_PTR(-EBADF);
- if (fd_file(f)->f_op != &bpf_prog_fops) {
- fdput(f);
- return ERR_PTR(-EINVAL);
- }
-
- return fd_file(f)->private_data;
- }
-
void bpf_prog_add(struct bpf_prog *prog, int i)
{
atomic64_add(i, &prog->aux->refcnt);
static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
bool attach_drv)
{
- struct fd f = fdget(ufd);
+ CLASS(fd, f)(ufd);
struct bpf_prog *prog;
- prog = ____bpf_prog_get(f);
- if (IS_ERR(prog))
- return prog;
- if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) {
- prog = ERR_PTR(-EINVAL);
- goto out;
- }
+ if (fd_empty(f))
+ return ERR_PTR(-EBADF);
+ if (fd_file(f)->f_op != &bpf_prog_fops)
+ return ERR_PTR(-EINVAL);
+
+ prog = fd_file(f)->private_data;
+ if (!bpf_prog_get_ok(prog, attach_type, attach_drv))
+ return ERR_PTR(-EINVAL);
bpf_prog_inc(prog);
- out:
- fdput(f);
return prog;
}
struct bpf_link *bpf_link_get_from_fd(u32 ufd)
{
- struct fd f = fdget(ufd);
+ CLASS(fd, f)(ufd);
struct bpf_link *link;
- if (!fd_file(f))
+ if (fd_empty(f))
return ERR_PTR(-EBADF);
- if (fd_file(f)->f_op != &bpf_link_fops && fd_file(f)->f_op != &bpf_link_fops_poll) {
- fdput(f);
+ if (fd_file(f)->f_op != &bpf_link_fops && fd_file(f)->f_op != &bpf_link_fops_poll)
return ERR_PTR(-EINVAL);
- }
link = fd_file(f)->private_data;
bpf_link_inc(link);
- fdput(f);
-
return link;
}
EXPORT_SYMBOL(bpf_link_get_from_fd);
static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
- int ufd = attr->info.bpf_fd;
- struct fd f;
- int err;
-
if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
return -EINVAL;
- f = fdget(ufd);
- if (!fd_file(f))
+ CLASS(fd, f)(attr->info.bpf_fd);
+ if (fd_empty(f))
return -EBADFD;
if (fd_file(f)->f_op == &bpf_prog_fops)
- err = bpf_prog_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr,
+ return bpf_prog_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr,
uattr);
else if (fd_file(f)->f_op == &bpf_map_fops)
- err = bpf_map_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr,
+ return bpf_map_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr,
uattr);
else if (fd_file(f)->f_op == &btf_fops)
- err = bpf_btf_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, uattr);
+ return bpf_btf_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, uattr);
else if (fd_file(f)->f_op == &bpf_link_fops || fd_file(f)->f_op == &bpf_link_fops_poll)
- err = bpf_link_get_info_by_fd(fd_file(f), fd_file(f)->private_data,
+ return bpf_link_get_info_by_fd(fd_file(f), fd_file(f)->private_data,
attr, uattr);
- else
- err = -EINVAL;
-
- fdput(f);
- return err;
+ return -EINVAL;
}
#define BPF_BTF_LOAD_LAST_FIELD btf_token_fd
cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH;
bool has_write = cmd != BPF_MAP_LOOKUP_BATCH;
struct bpf_map *map;
- int err, ufd;
- struct fd f;
+ int err;
if (CHECK_ATTR(BPF_MAP_BATCH))
return -EINVAL;
- ufd = attr->batch.map_fd;
- f = fdget(ufd);
+ CLASS(fd, f)(attr->batch.map_fd);
+
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
maybe_wait_bpf_programs(map);
bpf_map_write_active_dec(map);
}
- fdput(f);
return err;
}
return bpf_token_create(attr);
}
-static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
+static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
{
union bpf_attr attr;
int err;
BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res)
{
+ *res = 0;
if (flags)
return -EINVAL;
.arg1_type = ARG_PTR_TO_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
- .arg4_type = ARG_PTR_TO_LONG,
+ .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
+ .arg4_size = sizeof(u64),
};
static const struct bpf_func_proto *
#include <linux/cpumask.h>
#include <linux/bpf_mem_alloc.h>
#include <net/xdp.h>
+#include <linux/trace_events.h>
+#include <linux/kallsyms.h>
#include "disasm.h"
verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval);
}
-static bool type_may_be_null(u32 type)
-{
- return type & PTR_MAYBE_NULL;
-}
-
static bool reg_not_null(const struct bpf_reg_state *reg)
{
enum bpf_reg_type type;
return fls64(reg->umax_value);
}
-/* See comment for mark_nocsr_pattern_for_call() */
-static void check_nocsr_stack_contract(struct bpf_verifier_env *env, struct bpf_func_state *state,
- int insn_idx, int off)
+/* See comment for mark_fastcall_pattern_for_call() */
+static void check_fastcall_stack_contract(struct bpf_verifier_env *env,
+ struct bpf_func_state *state, int insn_idx, int off)
{
struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
struct bpf_insn_aux_data *aux = env->insn_aux_data;
int i;
- if (subprog->nocsr_stack_off <= off || aux[insn_idx].nocsr_pattern)
+ if (subprog->fastcall_stack_off <= off || aux[insn_idx].fastcall_pattern)
return;
- /* access to the region [max_stack_depth .. nocsr_stack_off)
- * from something that is not a part of the nocsr pattern,
- * disable nocsr rewrites for current subprogram by setting
- * nocsr_stack_off to a value smaller than any possible offset.
+ /* access to the region [max_stack_depth .. fastcall_stack_off)
+ * from something that is not a part of the fastcall pattern,
+ * disable fastcall rewrites for current subprogram by setting
+ * fastcall_stack_off to a value smaller than any possible offset.
*/
- subprog->nocsr_stack_off = S16_MIN;
- /* reset nocsr aux flags within subprogram,
+ subprog->fastcall_stack_off = S16_MIN;
+ /* reset fastcall aux flags within subprogram,
* happens at most once per subprogram
*/
for (i = subprog->start; i < (subprog + 1)->start; ++i) {
- aux[i].nocsr_spills_num = 0;
- aux[i].nocsr_pattern = 0;
+ aux[i].fastcall_spills_num = 0;
+ aux[i].fastcall_pattern = 0;
}
}
if (err)
return err;
- check_nocsr_stack_contract(env, state, insn_idx, off);
+ check_fastcall_stack_contract(env, state, insn_idx, off);
mark_stack_slot_scratched(env, spi);
if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
bool reg_value_fits;
return err;
}
- check_nocsr_stack_contract(env, state, insn_idx, min_off);
+ check_fastcall_stack_contract(env, state, insn_idx, min_off);
/* Variable offset writes destroy any spilled pointers in range. */
for (i = min_off; i < max_off; i++) {
u8 new_type, *stype;
reg = ®_state->stack[spi].spilled_ptr;
mark_stack_slot_scratched(env, spi);
- check_nocsr_stack_contract(env, state, env->insn_idx, off);
+ check_fastcall_stack_contract(env, state, env->insn_idx, off);
if (is_spilled_reg(®_state->stack[spi])) {
u8 spill_size = 1;
min_off = reg->smin_value + off;
max_off = reg->smax_value + off;
mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
- check_nocsr_stack_contract(env, ptr_state, env->insn_idx, min_off);
+ check_fastcall_stack_contract(env, ptr_state, env->insn_idx, min_off);
return 0;
}
struct bpf_insn_aux_data *aux = &env->insn_aux_data[env->insn_idx];
int min_valid_off, max_bpf_stack;
- /* If accessing instruction is a spill/fill from nocsr pattern,
+ /* If accessing instruction is a spill/fill from bpf_fastcall pattern,
* add room for all caller saved registers below MAX_BPF_STACK.
- * In case if nocsr rewrite won't happen maximal stack depth
+ * In case if bpf_fastcall rewrite won't happen maximal stack depth
* would be checked by check_max_stack_depth_subprog().
*/
max_bpf_stack = MAX_BPF_STACK;
- if (aux->nocsr_pattern)
+ if (aux->fastcall_pattern)
max_bpf_stack += CALLER_SAVED_REGS * BPF_REG_SIZE;
if (t == BPF_WRITE || env->allow_uninit_stack)
struct bpf_call_arg_meta *meta)
{
struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
- struct bpf_map *map_ptr = reg->map_ptr;
struct btf_field *kptr_field;
+ struct bpf_map *map_ptr;
+ struct btf_record *rec;
u32 kptr_off;
+ if (type_is_ptr_alloc_obj(reg->type)) {
+ rec = reg_btf_record(reg);
+ } else { /* PTR_TO_MAP_VALUE */
+ map_ptr = reg->map_ptr;
+ if (!map_ptr->btf) {
+ verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
+ map_ptr->name);
+ return -EINVAL;
+ }
+ rec = map_ptr->record;
+ meta->map_ptr = map_ptr;
+ }
+
if (!tnum_is_const(reg->var_off)) {
verbose(env,
"R%d doesn't have constant offset. kptr has to be at the constant offset\n",
regno);
return -EINVAL;
}
- if (!map_ptr->btf) {
- verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
- map_ptr->name);
- return -EINVAL;
- }
- if (!btf_record_has_field(map_ptr->record, BPF_KPTR)) {
- verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
+
+ if (!btf_record_has_field(rec, BPF_KPTR)) {
+ verbose(env, "R%d has no valid kptr\n", regno);
return -EINVAL;
}
- meta->map_ptr = map_ptr;
kptr_off = reg->off + reg->var_off.value;
- kptr_field = btf_record_find(map_ptr->record, kptr_off, BPF_KPTR);
+ kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR);
if (!kptr_field) {
verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
return -EACCES;
return meta->kfunc_flags & KF_ITER_DESTROY;
}
-static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg)
+static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx,
+ const struct btf_param *arg)
{
/* btf_check_iter_kfuncs() guarantees that first argument of any iter
* kfunc is iter state pointer
*/
- return arg == 0 && is_iter_kfunc(meta);
+ if (is_iter_kfunc(meta))
+ return arg_idx == 0;
+
+ /* iter passed as an argument to a generic kfunc */
+ return btf_param_match_suffix(meta->btf, arg, "__iter");
}
static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx,
{
struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
const struct btf_type *t;
- const struct btf_param *arg;
- int spi, err, i, nr_slots;
- u32 btf_id;
+ int spi, err, i, nr_slots, btf_id;
- /* btf_check_iter_kfuncs() ensures we don't need to validate anything here */
- arg = &btf_params(meta->func_proto)[0];
- t = btf_type_skip_modifiers(meta->btf, arg->type, NULL); /* PTR */
- t = btf_type_skip_modifiers(meta->btf, t->type, &btf_id); /* STRUCT */
+ /* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs()
+ * ensures struct convention, so we wouldn't need to do any BTF
+ * validation here. But given iter state can be passed as a parameter
+ * to any kfunc, if arg has "__iter" suffix, we need to be a bit more
+ * conservative here.
+ */
+ btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, regno - 1);
+ if (btf_id < 0) {
+ verbose(env, "expected valid iter pointer as arg #%d\n", regno);
+ return -EINVAL;
+ }
+ t = btf_type_by_id(meta->btf, btf_id);
nr_slots = t->size / BPF_REG_SIZE;
if (is_iter_new_kfunc(meta)) {
if (err)
return err;
} else {
- /* iter_next() or iter_destroy() expect initialized iter state*/
+ /* iter_next() or iter_destroy(), as well as any kfunc
+ * accepting iter argument, expect initialized iter state
+ */
err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots);
switch (err) {
case 0:
return 0;
}
+static struct bpf_reg_state *get_iter_from_state(struct bpf_verifier_state *cur_st,
+ struct bpf_kfunc_call_arg_meta *meta)
+{
+ int iter_frameno = meta->iter.frameno;
+ int iter_spi = meta->iter.spi;
+
+ return &cur_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
+}
+
/* process_iter_next_call() is called when verifier gets to iterator's next
* "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
* to it as just "iter_next()" in comments below.
struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
struct bpf_reg_state *cur_iter, *queued_iter;
- int iter_frameno = meta->iter.frameno;
- int iter_spi = meta->iter.spi;
BTF_TYPE_EMIT(struct bpf_iter);
- cur_iter = &env->cur_state->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
+ cur_iter = get_iter_from_state(cur_st, meta);
if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
if (!queued_st)
return -ENOMEM;
- queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
+ queued_iter = get_iter_from_state(queued_st, meta);
queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
queued_iter->iter.depth++;
if (prev_st)
type == ARG_CONST_SIZE_OR_ZERO;
}
+static bool arg_type_is_raw_mem(enum bpf_arg_type type)
+{
+ return base_type(type) == ARG_PTR_TO_MEM &&
+ type & MEM_UNINIT;
+}
+
static bool arg_type_is_release(enum bpf_arg_type type)
{
return type & OBJ_RELEASE;
return base_type(type) == ARG_PTR_TO_DYNPTR;
}
-static int int_ptr_type_to_size(enum bpf_arg_type type)
-{
- if (type == ARG_PTR_TO_INT)
- return sizeof(u32);
- else if (type == ARG_PTR_TO_LONG)
- return sizeof(u64);
-
- return -EINVAL;
-}
-
static int resolve_map_arg_type(struct bpf_verifier_env *env,
const struct bpf_call_arg_meta *meta,
enum bpf_arg_type *arg_type)
},
};
-static const struct bpf_reg_types int_ptr_types = {
- .types = {
- PTR_TO_STACK,
- PTR_TO_PACKET,
- PTR_TO_PACKET_META,
- PTR_TO_MAP_KEY,
- PTR_TO_MAP_VALUE,
- },
-};
-
static const struct bpf_reg_types spin_lock_types = {
.types = {
PTR_TO_MAP_VALUE,
static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
-static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
+static const struct bpf_reg_types kptr_xchg_dest_types = {
+ .types = {
+ PTR_TO_MAP_VALUE,
+ PTR_TO_BTF_ID | MEM_ALLOC
+ }
+};
static const struct bpf_reg_types dynptr_types = {
.types = {
PTR_TO_STACK,
[ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
[ARG_PTR_TO_MEM] = &mem_types,
[ARG_PTR_TO_RINGBUF_MEM] = &ringbuf_mem_types,
- [ARG_PTR_TO_INT] = &int_ptr_types,
- [ARG_PTR_TO_LONG] = &int_ptr_types,
[ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
[ARG_PTR_TO_FUNC] = &func_ptr_types,
[ARG_PTR_TO_STACK] = &stack_ptr_types,
[ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
[ARG_PTR_TO_TIMER] = &timer_types,
- [ARG_PTR_TO_KPTR] = &kptr_types,
+ [ARG_KPTR_XCHG_DEST] = &kptr_xchg_dest_types,
[ARG_PTR_TO_DYNPTR] = &dynptr_types,
};
if (base_type(arg_type) == ARG_PTR_TO_MEM)
type &= ~DYNPTR_TYPE_FLAG_MASK;
- if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) {
+ /* Local kptr types are allowed as the source argument of bpf_kptr_xchg */
+ if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && regno == BPF_REG_2) {
type &= ~MEM_ALLOC;
type &= ~MEM_PERCPU;
}
verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
return -EFAULT;
}
- if (meta->func_id == BPF_FUNC_kptr_xchg) {
+ /* Check if local kptr in src arg matches kptr in dst arg */
+ if (meta->func_id == BPF_FUNC_kptr_xchg && regno == BPF_REG_2) {
if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
return -EACCES;
}
meta->release_regno = regno;
}
- if (reg->ref_obj_id) {
+ if (reg->ref_obj_id && base_type(arg_type) != ARG_KPTR_XCHG_DEST) {
if (meta->ref_obj_id) {
verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
regno, reg->ref_obj_id,
*/
meta->raw_mode = arg_type & MEM_UNINIT;
if (arg_type & MEM_FIXED_SIZE) {
- err = check_helper_mem_access(env, regno,
- fn->arg_size[arg], false,
- meta);
+ err = check_helper_mem_access(env, regno, fn->arg_size[arg], false, meta);
+ if (err)
+ return err;
+ if (arg_type & MEM_ALIGNED)
+ err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true);
}
break;
case ARG_CONST_SIZE:
if (err)
return err;
break;
- case ARG_PTR_TO_INT:
- case ARG_PTR_TO_LONG:
- {
- int size = int_ptr_type_to_size(arg_type);
-
- err = check_helper_mem_access(env, regno, size, false, meta);
- if (err)
- return err;
- err = check_ptr_alignment(env, reg, 0, size, true);
- break;
- }
case ARG_PTR_TO_CONST_STR:
{
err = check_reg_const_str(env, reg, regno);
return err;
break;
}
- case ARG_PTR_TO_KPTR:
+ case ARG_KPTR_XCHG_DEST:
err = process_kptr_func(env, regno, meta);
if (err)
return err;
{
int count = 0;
- if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
+ if (arg_type_is_raw_mem(fn->arg1_type))
count++;
- if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
+ if (arg_type_is_raw_mem(fn->arg2_type))
count++;
- if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
+ if (arg_type_is_raw_mem(fn->arg3_type))
count++;
- if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
+ if (arg_type_is_raw_mem(fn->arg4_type))
count++;
- if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
+ if (arg_type_is_raw_mem(fn->arg5_type))
count++;
/* We only support one arg being in raw mode at the moment,
if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
return KF_ARG_PTR_TO_DYNPTR;
- if (is_kfunc_arg_iter(meta, argno))
+ if (is_kfunc_arg_iter(meta, argno, &args[argno]))
return KF_ARG_PTR_TO_ITER;
if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
* btf_struct_ids_match() to walk the struct at the 0th offset, and
* resolve types.
*/
- if (is_kfunc_acquire(meta) ||
- (is_kfunc_release(meta) && reg->ref_obj_id) ||
+ if ((is_kfunc_release(meta) && reg->ref_obj_id) ||
btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
strict_type_match = true;
switch (kf_arg_type) {
case KF_ARG_PTR_TO_CTX:
if (reg->type != PTR_TO_CTX) {
- verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t));
+ verbose(env, "arg#%d expected pointer to ctx, but got %s\n",
+ i, reg_type_str(env, reg->type));
return -EINVAL;
}
regs[BPF_REG_0].btf = desc_btf;
regs[BPF_REG_0].type = PTR_TO_BTF_ID;
regs[BPF_REG_0].btf_id = ptr_type_id;
+
+ if (is_iter_next_kfunc(&meta)) {
+ struct bpf_reg_state *cur_iter;
+
+ cur_iter = get_iter_from_state(env->cur_state, &meta);
+
+ if (cur_iter->type & MEM_RCU) /* KF_RCU_PROTECTED */
+ regs[BPF_REG_0].type |= MEM_RCU;
+ else
+ regs[BPF_REG_0].type |= PTR_TRUSTED;
+ }
}
if (is_kfunc_ret_null(&meta)) {
/* Return a bitmask specifying which caller saved registers are
* clobbered by a call to a helper *as if* this helper follows
- * no_caller_saved_registers contract:
+ * bpf_fastcall contract:
* - includes R0 if function is non-void;
* - includes R1-R5 if corresponding parameter has is described
* in the function prototype.
*/
-static u32 helper_nocsr_clobber_mask(const struct bpf_func_proto *fn)
+static u32 helper_fastcall_clobber_mask(const struct bpf_func_proto *fn)
{
- u8 mask;
+ u32 mask;
int i;
mask = 0;
}
/* True if do_misc_fixups() replaces calls to helper number 'imm',
- * replacement patch is presumed to follow no_caller_saved_registers contract
- * (see mark_nocsr_pattern_for_call() below).
+ * replacement patch is presumed to follow bpf_fastcall contract
+ * (see mark_fastcall_pattern_for_call() below).
*/
static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
{
}
}
-/* GCC and LLVM define a no_caller_saved_registers function attribute.
+/* Same as helper_fastcall_clobber_mask() but for kfuncs, see comment above */
+static u32 kfunc_fastcall_clobber_mask(struct bpf_kfunc_call_arg_meta *meta)
+{
+ u32 vlen, i, mask;
+
+ vlen = btf_type_vlen(meta->func_proto);
+ mask = 0;
+ if (!btf_type_is_void(btf_type_by_id(meta->btf, meta->func_proto->type)))
+ mask |= BIT(BPF_REG_0);
+ for (i = 0; i < vlen; ++i)
+ mask |= BIT(BPF_REG_1 + i);
+ return mask;
+}
+
+/* Same as verifier_inlines_helper_call() but for kfuncs, see comment above */
+static bool is_fastcall_kfunc_call(struct bpf_kfunc_call_arg_meta *meta)
+{
+ if (meta->btf == btf_vmlinux)
+ return meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
+ meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast];
+ return false;
+}
+
+/* LLVM define a bpf_fastcall function attribute.
* This attribute means that function scratches only some of
* the caller saved registers defined by ABI.
* For BPF the set of such registers could be defined as follows:
*
* The contract between kernel and clang allows to simultaneously use
* such functions and maintain backwards compatibility with old
- * kernels that don't understand no_caller_saved_registers calls
- * (nocsr for short):
+ * kernels that don't understand bpf_fastcall calls:
*
- * - for nocsr calls clang allocates registers as-if relevant r0-r5
+ * - for bpf_fastcall calls clang allocates registers as-if relevant r0-r5
* registers are not scratched by the call;
*
- * - as a post-processing step, clang visits each nocsr call and adds
+ * - as a post-processing step, clang visits each bpf_fastcall call and adds
* spill/fill for every live r0-r5;
*
* - stack offsets used for the spill/fill are allocated as lowest
* purposes;
*
* - when kernel loads a program, it looks for such patterns
- * (nocsr function surrounded by spills/fills) and checks if
- * spill/fill stack offsets are used exclusively in nocsr patterns;
+ * (bpf_fastcall function surrounded by spills/fills) and checks if
+ * spill/fill stack offsets are used exclusively in fastcall patterns;
*
* - if so, and if verifier or current JIT inlines the call to the
- * nocsr function (e.g. a helper call), kernel removes unnecessary
+ * bpf_fastcall function (e.g. a helper call), kernel removes unnecessary
* spill/fill pairs;
*
* - when old kernel loads a program, presence of spill/fill pairs
* r0 += r2;
* exit;
*
- * The purpose of mark_nocsr_pattern_for_call is to:
+ * The purpose of mark_fastcall_pattern_for_call is to:
* - look for such patterns;
- * - mark spill and fill instructions in env->insn_aux_data[*].nocsr_pattern;
- * - mark set env->insn_aux_data[*].nocsr_spills_num for call instruction;
- * - update env->subprog_info[*]->nocsr_stack_off to find an offset
- * at which nocsr spill/fill stack slots start;
- * - update env->subprog_info[*]->keep_nocsr_stack.
+ * - mark spill and fill instructions in env->insn_aux_data[*].fastcall_pattern;
+ * - mark set env->insn_aux_data[*].fastcall_spills_num for call instruction;
+ * - update env->subprog_info[*]->fastcall_stack_off to find an offset
+ * at which bpf_fastcall spill/fill stack slots start;
+ * - update env->subprog_info[*]->keep_fastcall_stack.
*
- * The .nocsr_pattern and .nocsr_stack_off are used by
- * check_nocsr_stack_contract() to check if every stack access to
- * nocsr spill/fill stack slot originates from spill/fill
- * instructions, members of nocsr patterns.
+ * The .fastcall_pattern and .fastcall_stack_off are used by
+ * check_fastcall_stack_contract() to check if every stack access to
+ * fastcall spill/fill stack slot originates from spill/fill
+ * instructions, members of fastcall patterns.
*
- * If such condition holds true for a subprogram, nocsr patterns could
- * be rewritten by remove_nocsr_spills_fills().
- * Otherwise nocsr patterns are not changed in the subprogram
+ * If such condition holds true for a subprogram, fastcall patterns could
+ * be rewritten by remove_fastcall_spills_fills().
+ * Otherwise bpf_fastcall patterns are not changed in the subprogram
* (code, presumably, generated by an older clang version).
*
* For example, it is *not* safe to remove spill/fill below:
* r0 += r1; exit;
* exit;
*/
-static void mark_nocsr_pattern_for_call(struct bpf_verifier_env *env,
- struct bpf_subprog_info *subprog,
- int insn_idx, s16 lowest_off)
+static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env,
+ struct bpf_subprog_info *subprog,
+ int insn_idx, s16 lowest_off)
{
struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
struct bpf_insn *call = &env->prog->insnsi[insn_idx];
if (get_helper_proto(env, call->imm, &fn) < 0)
/* error would be reported later */
return;
- clobbered_regs_mask = helper_nocsr_clobber_mask(fn);
- can_be_inlined = fn->allow_nocsr &&
+ clobbered_regs_mask = helper_fastcall_clobber_mask(fn);
+ can_be_inlined = fn->allow_fastcall &&
(verifier_inlines_helper_call(env, call->imm) ||
bpf_jit_inlines_helper_call(call->imm));
}
+ if (bpf_pseudo_kfunc_call(call)) {
+ struct bpf_kfunc_call_arg_meta meta;
+ int err;
+
+ err = fetch_kfunc_meta(env, call, &meta, NULL);
+ if (err < 0)
+ /* error would be reported later */
+ return;
+
+ clobbered_regs_mask = kfunc_fastcall_clobber_mask(&meta);
+ can_be_inlined = is_fastcall_kfunc_call(&meta);
+ }
+
if (clobbered_regs_mask == ALL_CALLER_SAVED_REGS)
return;
if (stx->off != off || ldx->off != off)
break;
expected_regs_mask &= ~BIT(stx->src_reg);
- env->insn_aux_data[insn_idx - i].nocsr_pattern = 1;
- env->insn_aux_data[insn_idx + i].nocsr_pattern = 1;
+ env->insn_aux_data[insn_idx - i].fastcall_pattern = 1;
+ env->insn_aux_data[insn_idx + i].fastcall_pattern = 1;
}
if (i == 1)
return;
- /* Conditionally set 'nocsr_spills_num' to allow forward
+ /* Conditionally set 'fastcall_spills_num' to allow forward
* compatibility when more helper functions are marked as
- * nocsr at compile time than current kernel supports, e.g:
+ * bpf_fastcall at compile time than current kernel supports, e.g:
*
* 1: *(u64 *)(r10 - 8) = r1
- * 2: call A ;; assume A is nocsr for current kernel
+ * 2: call A ;; assume A is bpf_fastcall for current kernel
* 3: r1 = *(u64 *)(r10 - 8)
* 4: *(u64 *)(r10 - 8) = r1
- * 5: call B ;; assume B is not nocsr for current kernel
+ * 5: call B ;; assume B is not bpf_fastcall for current kernel
* 6: r1 = *(u64 *)(r10 - 8)
*
- * There is no need to block nocsr rewrite for such program.
- * Set 'nocsr_pattern' for both calls to keep check_nocsr_stack_contract() happy,
- * don't set 'nocsr_spills_num' for call B so that remove_nocsr_spills_fills()
+ * There is no need to block bpf_fastcall rewrite for such program.
+ * Set 'fastcall_pattern' for both calls to keep check_fastcall_stack_contract() happy,
+ * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills()
* does not remove spill/fill pair {4,6}.
*/
if (can_be_inlined)
- env->insn_aux_data[insn_idx].nocsr_spills_num = i - 1;
+ env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1;
else
- subprog->keep_nocsr_stack = 1;
- subprog->nocsr_stack_off = min(subprog->nocsr_stack_off, off);
+ subprog->keep_fastcall_stack = 1;
+ subprog->fastcall_stack_off = min(subprog->fastcall_stack_off, off);
}
-static int mark_nocsr_patterns(struct bpf_verifier_env *env)
+static int mark_fastcall_patterns(struct bpf_verifier_env *env)
{
struct bpf_subprog_info *subprog = env->subprog_info;
struct bpf_insn *insn;
continue;
lowest_off = min(lowest_off, insn->off);
}
- /* use this offset to find nocsr patterns */
+ /* use this offset to find fastcall patterns */
for (i = subprog->start; i < (subprog + 1)->start; ++i) {
insn = env->prog->insnsi + i;
if (insn->code != (BPF_JMP | BPF_CALL))
continue;
- mark_nocsr_pattern_for_call(env, subprog, i, lowest_off);
+ mark_fastcall_pattern_for_call(env, subprog, i, lowest_off);
}
}
return 0;
spi = i / BPF_REG_SIZE;
if (exact != NOT_EXACT &&
- old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
- cur->stack[spi].slot_type[i % BPF_REG_SIZE])
+ (i >= cur->allocated_stack ||
+ old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
+ cur->stack[spi].slot_type[i % BPF_REG_SIZE]))
return false;
if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)
map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
}
+ /* Add map behind fd to used maps list, if it's not already there, and return
+ * its index. Also set *reused to true if this map was already in the list of
+ * used maps.
+ * Returns <0 on error, or >= 0 index, on success.
+ */
+ static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd, bool *reused)
+ {
+ CLASS(fd, f)(fd);
+ struct bpf_map *map;
+ int i;
+
+ map = __bpf_map_get(f);
+ if (IS_ERR(map)) {
+ verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
+ return PTR_ERR(map);
+ }
+
+ /* check whether we recorded this map already */
+ for (i = 0; i < env->used_map_cnt; i++) {
+ if (env->used_maps[i] == map) {
+ *reused = true;
+ return i;
+ }
+ }
+
+ if (env->used_map_cnt >= MAX_USED_MAPS) {
+ verbose(env, "The total number of maps per program has reached the limit of %u\n",
+ MAX_USED_MAPS);
+ return -E2BIG;
+ }
+
+ if (env->prog->sleepable)
+ atomic64_inc(&map->sleepable_refcnt);
+
+ /* hold the map. If the program is rejected by verifier,
+ * the map will be released by release_maps() or it
+ * will be used by the valid program until it's unloaded
+ * and all maps are released in bpf_free_used_maps()
+ */
+ bpf_map_inc(map);
+
+ *reused = false;
+ env->used_maps[env->used_map_cnt++] = map;
+
+ return env->used_map_cnt - 1;
+ }
+
/* find and rewrite pseudo imm in ld_imm64 instructions:
*
* 1. if it accesses map FD, replace it with actual map pointer.
{
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
- int i, j, err;
+ int i, err;
err = bpf_prog_calc_tag(env->prog);
if (err)
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
struct bpf_insn_aux_data *aux;
struct bpf_map *map;
- struct fd f;
+ int map_idx;
u64 addr;
u32 fd;
+ bool reused;
if (i == insn_cnt - 1 || insn[1].code != 0 ||
insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
break;
}
- f = fdget(fd);
- map = __bpf_map_get(f);
- if (IS_ERR(map)) {
- verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
- return PTR_ERR(map);
- }
+ map_idx = add_used_map_from_fd(env, fd, &reused);
+ if (map_idx < 0)
+ return map_idx;
+ map = env->used_maps[map_idx];
+
+ aux = &env->insn_aux_data[i];
+ aux->map_index = map_idx;
err = check_map_prog_compatibility(env, map, env->prog);
- if (err) {
- fdput(f);
+ if (err)
return err;
- }
- aux = &env->insn_aux_data[i];
if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
addr = (unsigned long)map;
if (off >= BPF_MAX_VAR_OFF) {
verbose(env, "direct value offset of %u is not allowed\n", off);
- fdput(f);
return -EINVAL;
}
if (!map->ops->map_direct_value_addr) {
verbose(env, "no direct value access support for this map type\n");
- fdput(f);
return -EINVAL;
}
if (err) {
verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
map->value_size, off);
- fdput(f);
return err;
}
insn[0].imm = (u32)addr;
insn[1].imm = addr >> 32;
- /* check whether we recorded this map already */
- for (j = 0; j < env->used_map_cnt; j++) {
- if (env->used_maps[j] == map) {
- aux->map_index = j;
- fdput(f);
- goto next_insn;
- }
- }
-
- if (env->used_map_cnt >= MAX_USED_MAPS) {
- verbose(env, "The total number of maps per program has reached the limit of %u\n",
- MAX_USED_MAPS);
- fdput(f);
- return -E2BIG;
- }
-
- if (env->prog->sleepable)
- atomic64_inc(&map->sleepable_refcnt);
- /* hold the map. If the program is rejected by verifier,
- * the map will be released by release_maps() or it
- * will be used by the valid program until it's unloaded
- * and all maps are released in bpf_free_used_maps()
- */
- bpf_map_inc(map);
-
- aux->map_index = env->used_map_cnt;
- env->used_maps[env->used_map_cnt++] = map;
+ /* proceed with extra checks only if its newly added used map */
+ if (reused)
+ goto next_insn;
if (bpf_map_is_cgroup_storage(map) &&
bpf_cgroup_storage_assign(env->prog->aux, map)) {
verbose(env, "only one cgroup storage of each type is allowed\n");
- fdput(f);
return -EBUSY;
}
if (map->map_type == BPF_MAP_TYPE_ARENA) {
if (env->prog->aux->arena) {
verbose(env, "Only one arena per program\n");
- fdput(f);
return -EBUSY;
}
if (!env->allow_ptr_leaks || !env->bpf_capable) {
verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n");
- fdput(f);
return -EPERM;
}
if (!env->prog->jit_requested) {
verbose(env, "JIT is required to use arena\n");
- fdput(f);
return -EOPNOTSUPP;
}
if (!bpf_jit_supports_arena()) {
verbose(env, "JIT doesn't support arena\n");
- fdput(f);
return -EOPNOTSUPP;
}
env->prog->aux->arena = (void *)map;
if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
verbose(env, "arena's user address must be set via map_extra or mmap()\n");
- fdput(f);
return -EINVAL;
}
}
- fdput(f);
next_insn:
insn++;
i++;
for (i = 0; i < insn_cnt; i++, insn++) {
u8 code = insn->code;
+ if (tgt_idx <= i && i < tgt_idx + delta)
+ continue;
+
if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
continue;
*/
static int convert_ctx_accesses(struct bpf_verifier_env *env)
{
+ struct bpf_subprog_info *subprogs = env->subprog_info;
const struct bpf_verifier_ops *ops = env->ops;
- int i, cnt, size, ctx_field_size, delta = 0;
+ int i, cnt, size, ctx_field_size, delta = 0, epilogue_cnt = 0;
const int insn_cnt = env->prog->len;
- struct bpf_insn insn_buf[16], *insn;
+ struct bpf_insn *epilogue_buf = env->epilogue_buf;
+ struct bpf_insn *insn_buf = env->insn_buf;
+ struct bpf_insn *insn;
u32 target_size, size_default, off;
struct bpf_prog *new_prog;
enum bpf_access_type type;
bool is_narrower_load;
+ int epilogue_idx = 0;
+
+ if (ops->gen_epilogue) {
+ epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog,
+ -(subprogs[0].stack_depth + 8));
+ if (epilogue_cnt >= INSN_BUF_SIZE) {
+ verbose(env, "bpf verifier is misconfigured\n");
+ return -EINVAL;
+ } else if (epilogue_cnt) {
+ /* Save the ARG_PTR_TO_CTX for the epilogue to use */
+ cnt = 0;
+ subprogs[0].stack_depth += 8;
+ insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1,
+ -subprogs[0].stack_depth);
+ insn_buf[cnt++] = env->prog->insnsi[0];
+ new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+ env->prog = new_prog;
+ delta += cnt - 1;
+ }
+ }
if (ops->gen_prologue || env->seen_direct_write) {
if (!ops->gen_prologue) {
}
cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
env->prog);
- if (cnt >= ARRAY_SIZE(insn_buf)) {
+ if (cnt >= INSN_BUF_SIZE) {
verbose(env, "bpf verifier is misconfigured\n");
return -EINVAL;
} else if (cnt) {
}
}
+ if (delta)
+ WARN_ON(adjust_jmp_off(env->prog, 0, delta));
+
if (bpf_prog_is_offloaded(env->prog->aux))
return 0;
insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
env->prog->aux->num_exentries++;
continue;
+ } else if (insn->code == (BPF_JMP | BPF_EXIT) &&
+ epilogue_cnt &&
+ i + delta < subprogs[1].start) {
+ /* Generate epilogue for the main prog */
+ if (epilogue_idx) {
+ /* jump back to the earlier generated epilogue */
+ insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1);
+ cnt = 1;
+ } else {
+ memcpy(insn_buf, epilogue_buf,
+ epilogue_cnt * sizeof(*epilogue_buf));
+ cnt = epilogue_cnt;
+ /* epilogue_idx cannot be 0. It must have at
+ * least one ctx ptr saving insn before the
+ * epilogue.
+ */
+ epilogue_idx = i + delta;
+ }
+ goto patch_insn_buf;
} else {
continue;
}
target_size = 0;
cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
&target_size);
- if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
+ if (cnt == 0 || cnt >= INSN_BUF_SIZE ||
(ctx_field_size && !target_size)) {
verbose(env, "bpf verifier is misconfigured\n");
return -EINVAL;
if (is_narrower_load && size < target_size) {
u8 shift = bpf_ctx_narrow_access_offset(
off, size, size_default) * 8;
- if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
+ if (shift && cnt + 1 >= INSN_BUF_SIZE) {
verbose(env, "bpf verifier narrow ctx load misconfigured\n");
return -EINVAL;
}
insn->dst_reg, insn->dst_reg,
size * 8, 0);
+patch_insn_buf:
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
const int insn_cnt = prog->len;
const struct bpf_map_ops *ops;
struct bpf_insn_aux_data *aux;
- struct bpf_insn insn_buf[16];
+ struct bpf_insn *insn_buf = env->insn_buf;
struct bpf_prog *new_prog;
struct bpf_map *map_ptr;
int i, ret, cnt, delta = 0, cur_subprog = 0;
/* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
- /* Make divide-by-zero exceptions impossible. */
+ /* Make sdiv/smod divide-by-minus-one exceptions impossible. */
+ if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
+ insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
+ insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
+ insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
+ insn->off == 1 && insn->imm == -1) {
+ bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+ bool isdiv = BPF_OP(insn->code) == BPF_DIV;
+ struct bpf_insn *patchlet;
+ struct bpf_insn chk_and_sdiv[] = {
+ BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_NEG | BPF_K, insn->dst_reg,
+ 0, 0, 0),
+ };
+ struct bpf_insn chk_and_smod[] = {
+ BPF_MOV32_IMM(insn->dst_reg, 0),
+ };
+
+ patchlet = isdiv ? chk_and_sdiv : chk_and_smod;
+ cnt = isdiv ? ARRAY_SIZE(chk_and_sdiv) : ARRAY_SIZE(chk_and_smod);
+
+ new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
+ /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
bool isdiv = BPF_OP(insn->code) == BPF_DIV;
+ bool is_sdiv = isdiv && insn->off == 1;
+ bool is_smod = !isdiv && insn->off == 1;
struct bpf_insn *patchlet;
struct bpf_insn chk_and_div[] = {
/* [R,W]x div 0 -> 0 */
BPF_JMP_IMM(BPF_JA, 0, 0, 1),
BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
};
+ struct bpf_insn chk_and_sdiv[] = {
+ /* [R,W]x sdiv 0 -> 0
+ * LLONG_MIN sdiv -1 -> LLONG_MIN
+ * INT_MIN sdiv -1 -> INT_MIN
+ */
+ BPF_MOV64_REG(BPF_REG_AX, insn->src_reg),
+ BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_ADD | BPF_K, BPF_REG_AX,
+ 0, 0, 1),
+ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JGT | BPF_K, BPF_REG_AX,
+ 0, 4, 1),
+ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JEQ | BPF_K, BPF_REG_AX,
+ 0, 1, 0),
+ BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_MOV | BPF_K, insn->dst_reg,
+ 0, 0, 0),
+ /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
+ BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_NEG | BPF_K, insn->dst_reg,
+ 0, 0, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ *insn,
+ };
+ struct bpf_insn chk_and_smod[] = {
+ /* [R,W]x mod 0 -> [R,W]x */
+ /* [R,W]x mod -1 -> 0 */
+ BPF_MOV64_REG(BPF_REG_AX, insn->src_reg),
+ BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_ADD | BPF_K, BPF_REG_AX,
+ 0, 0, 1),
+ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JGT | BPF_K, BPF_REG_AX,
+ 0, 3, 1),
+ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JEQ | BPF_K, BPF_REG_AX,
+ 0, 3 + (is64 ? 0 : 1), 1),
+ BPF_MOV32_IMM(insn->dst_reg, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ *insn,
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
+ };
- patchlet = isdiv ? chk_and_div : chk_and_mod;
- cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
- ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
+ if (is_sdiv) {
+ patchlet = chk_and_sdiv;
+ cnt = ARRAY_SIZE(chk_and_sdiv);
+ } else if (is_smod) {
+ patchlet = chk_and_smod;
+ cnt = ARRAY_SIZE(chk_and_smod) - (is64 ? 2 : 0);
+ } else {
+ patchlet = isdiv ? chk_and_div : chk_and_mod;
+ cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
+ ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
+ }
new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
if (!new_prog)
(BPF_MODE(insn->code) == BPF_ABS ||
BPF_MODE(insn->code) == BPF_IND)) {
cnt = env->ops->gen_ld_abs(insn, insn_buf);
- if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+ if (cnt == 0 || cnt >= INSN_BUF_SIZE) {
verbose(env, "bpf verifier is misconfigured\n");
return -EINVAL;
}
cnt = ops->map_gen_lookup(map_ptr, insn_buf);
if (cnt == -EOPNOTSUPP)
goto patch_map_ops_generic;
- if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+ if (cnt <= 0 || cnt >= INSN_BUF_SIZE) {
verbose(env, "bpf verifier is misconfigured\n");
return -EINVAL;
}
int position,
s32 stack_base,
u32 callback_subprogno,
- u32 *cnt)
+ u32 *total_cnt)
{
s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
int reg_loop_cnt = BPF_REG_7;
int reg_loop_ctx = BPF_REG_8;
+ struct bpf_insn *insn_buf = env->insn_buf;
struct bpf_prog *new_prog;
u32 callback_start;
u32 call_insn_offset;
s32 callback_offset;
+ u32 cnt = 0;
/* This represents an inlined version of bpf_iter.c:bpf_loop,
* be careful to modify this code in sync.
*/
- struct bpf_insn insn_buf[] = {
- /* Return error and jump to the end of the patch if
- * expected number of iterations is too big.
- */
- BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
- BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
- BPF_JMP_IMM(BPF_JA, 0, 0, 16),
- /* spill R6, R7, R8 to use these as loop vars */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
- /* initialize loop vars */
- BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
- BPF_MOV32_IMM(reg_loop_cnt, 0),
- BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
- /* loop header,
- * if reg_loop_cnt >= reg_loop_max skip the loop body
- */
- BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
- /* callback call,
- * correct callback offset would be set after patching
- */
- BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
- BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
- BPF_CALL_REL(0),
- /* increment loop counter */
- BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
- /* jump to loop header if callback returned 0 */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
- /* return value of bpf_loop,
- * set R0 to the number of iterations
- */
- BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
- /* restore original values of R6, R7, R8 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
- BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
- };
- *cnt = ARRAY_SIZE(insn_buf);
- new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
+ /* Return error and jump to the end of the patch if
+ * expected number of iterations is too big.
+ */
+ insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2);
+ insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG);
+ insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16);
+ /* spill R6, R7, R8 to use these as loop vars */
+ insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset);
+ insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset);
+ insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset);
+ /* initialize loop vars */
+ insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1);
+ insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0);
+ insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3);
+ /* loop header,
+ * if reg_loop_cnt >= reg_loop_max skip the loop body
+ */
+ insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5);
+ /* callback call,
+ * correct callback offset would be set after patching
+ */
+ insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt);
+ insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx);
+ insn_buf[cnt++] = BPF_CALL_REL(0);
+ /* increment loop counter */
+ insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1);
+ /* jump to loop header if callback returned 0 */
+ insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6);
+ /* return value of bpf_loop,
+ * set R0 to the number of iterations
+ */
+ insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt);
+ /* restore original values of R6, R7, R8 */
+ insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset);
+ insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset);
+ insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset);
+
+ *total_cnt = cnt;
+ new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt);
if (!new_prog)
return new_prog;
return 0;
}
-/* Remove unnecessary spill/fill pairs, members of nocsr pattern,
+/* Remove unnecessary spill/fill pairs, members of fastcall pattern,
* adjust subprograms stack depth when possible.
*/
-static int remove_nocsr_spills_fills(struct bpf_verifier_env *env)
+static int remove_fastcall_spills_fills(struct bpf_verifier_env *env)
{
struct bpf_subprog_info *subprog = env->subprog_info;
struct bpf_insn_aux_data *aux = env->insn_aux_data;
int i, j;
for (i = 0; i < insn_cnt; i++, insn++) {
- if (aux[i].nocsr_spills_num > 0) {
- spills_num = aux[i].nocsr_spills_num;
+ if (aux[i].fastcall_spills_num > 0) {
+ spills_num = aux[i].fastcall_spills_num;
/* NOPs would be removed by opt_remove_nops() */
for (j = 1; j <= spills_num; ++j) {
*(insn - j) = NOP;
modified = true;
}
if ((subprog + 1)->start == i + 1) {
- if (modified && !subprog->keep_nocsr_stack)
- subprog->stack_depth = -subprog->nocsr_stack_off;
+ if (modified && !subprog->keep_fastcall_stack)
+ subprog->stack_depth = -subprog->fastcall_stack_off;
subprog++;
modified = false;
}
{
bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING;
+ char trace_symbol[KSYM_SYMBOL_LEN];
const char prefix[] = "btf_trace_";
+ struct bpf_raw_event_map *btp;
int ret = 0, subprog = -1, i;
const struct btf_type *t;
bool conservative = true;
- const char *tname;
+ const char *tname, *fname;
struct btf *btf;
long addr = 0;
struct module *mod = NULL;
return -EINVAL;
}
tname += sizeof(prefix) - 1;
- t = btf_type_by_id(btf, t->type);
- if (!btf_type_is_ptr(t))
- /* should never happen in valid vmlinux build */
+
+ /* The func_proto of "btf_trace_##tname" is generated from typedef without argument
+ * names. Thus using bpf_raw_event_map to get argument names.
+ */
+ btp = bpf_get_raw_tracepoint(tname);
+ if (!btp)
return -EINVAL;
+ fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL,
+ trace_symbol);
+ bpf_put_raw_tracepoint(btp);
+
+ if (fname)
+ ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC);
+
+ if (!fname || ret < 0) {
+ bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n",
+ prefix, tname);
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_ptr(t))
+ /* should never happen in valid vmlinux build */
+ return -EINVAL;
+ } else {
+ t = btf_type_by_id(btf, ret);
+ if (!btf_type_is_func(t))
+ /* should never happen in valid vmlinux build */
+ return -EINVAL;
+ }
+
t = btf_type_by_id(btf, t->type);
if (!btf_type_is_func_proto(t))
/* should never happen in valid vmlinux build */
if (ret < 0)
goto skip_full_check;
- ret = mark_nocsr_patterns(env);
+ ret = mark_fastcall_patterns(env);
if (ret < 0)
goto skip_full_check;
* allocate additional slots.
*/
if (ret == 0)
- ret = remove_nocsr_spills_fills(env);
+ ret = remove_fastcall_spills_fills(env);
if (ret == 0)
ret = check_max_stack_depth(env);
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog)
{
- u32 ufd = attr->target_fd;
struct bpf_map *map;
- struct fd f;
int ret;
if (attr->attach_flags || attr->replace_bpf_fd)
return -EINVAL;
- f = fdget(ufd);
+ CLASS(fd, f)(attr->target_fd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
mutex_lock(&sockmap_mutex);
ret = sock_map_prog_update(map, prog, NULL, NULL, attr->attach_type);
mutex_unlock(&sockmap_mutex);
- fdput(f);
return ret;
}
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
{
- u32 ufd = attr->target_fd;
struct bpf_prog *prog;
struct bpf_map *map;
- struct fd f;
int ret;
if (attr->attach_flags || attr->replace_bpf_fd)
return -EINVAL;
- f = fdget(ufd);
+ CLASS(fd, f)(attr->target_fd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
prog = bpf_prog_get(attr->attach_bpf_fd);
- if (IS_ERR(prog)) {
- ret = PTR_ERR(prog);
- goto put_map;
- }
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
if (prog->type != ptype) {
ret = -EINVAL;
mutex_unlock(&sockmap_mutex);
put_prog:
bpf_prog_put(prog);
- put_map:
- fdput(f);
return ret;
}
sock_put(elem->sk);
sock_hash_free_elem(htab, elem);
}
+ cond_resched();
}
/* wait for psock readers accessing its map link */
union bpf_attr __user *uattr)
{
__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
- u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
+ u32 prog_cnt = 0, flags = 0;
struct bpf_prog **pprog;
struct bpf_prog *prog;
struct bpf_map *map;
- struct fd f;
u32 id = 0;
int ret;
if (attr->query.query_flags)
return -EINVAL;
- f = fdget(ufd);
+ CLASS(fd, f)(attr->target_fd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
ret = -EFAULT;
- fdput(f);
return ret;
}
#include <linux/xattr.h>
#include <linux/msg.h>
#include <linux/overflow.h>
+#include <linux/perf_event.h>
+#include <linux/fs.h>
#include <net/flow.h>
+#include <net/sock.h>
-/* How many LSMs were built into the kernel? */
-#define LSM_COUNT (__end_lsm_info - __start_lsm_info)
+#define SECURITY_HOOK_ACTIVE_KEY(HOOK, IDX) security_hook_active_##HOOK##_##IDX
/*
- * How many LSMs are built into the kernel as determined at
- * build time. Used to determine fixed array sizes.
- * The capability module is accounted for by CONFIG_SECURITY
- */
-#define LSM_CONFIG_COUNT ( \
- (IS_ENABLED(CONFIG_SECURITY) ? 1 : 0) + \
- (IS_ENABLED(CONFIG_SECURITY_SELINUX) ? 1 : 0) + \
- (IS_ENABLED(CONFIG_SECURITY_SMACK) ? 1 : 0) + \
- (IS_ENABLED(CONFIG_SECURITY_TOMOYO) ? 1 : 0) + \
- (IS_ENABLED(CONFIG_SECURITY_APPARMOR) ? 1 : 0) + \
- (IS_ENABLED(CONFIG_SECURITY_YAMA) ? 1 : 0) + \
- (IS_ENABLED(CONFIG_SECURITY_LOADPIN) ? 1 : 0) + \
- (IS_ENABLED(CONFIG_SECURITY_SAFESETID) ? 1 : 0) + \
- (IS_ENABLED(CONFIG_SECURITY_LOCKDOWN_LSM) ? 1 : 0) + \
- (IS_ENABLED(CONFIG_BPF_LSM) ? 1 : 0) + \
- (IS_ENABLED(CONFIG_SECURITY_LANDLOCK) ? 1 : 0) + \
- (IS_ENABLED(CONFIG_IMA) ? 1 : 0) + \
- (IS_ENABLED(CONFIG_EVM) ? 1 : 0))
+ * Identifier for the LSM static calls.
+ * HOOK is an LSM hook as defined in linux/lsm_hookdefs.h
+ * IDX is the index of the static call. 0 <= NUM < MAX_LSM_COUNT
+ */
+#define LSM_STATIC_CALL(HOOK, IDX) lsm_static_call_##HOOK##_##IDX
+
+/*
+ * Call the macro M for each LSM hook MAX_LSM_COUNT times.
+ */
+#define LSM_LOOP_UNROLL(M, ...) \
+do { \
+ UNROLL(MAX_LSM_COUNT, M, __VA_ARGS__) \
+} while (0)
+
+#define LSM_DEFINE_UNROLL(M, ...) UNROLL(MAX_LSM_COUNT, M, __VA_ARGS__)
/*
* These are descriptions of the reasons that can be passed to the
[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
};
-struct security_hook_heads security_hook_heads __ro_after_init;
static BLOCKING_NOTIFIER_HEAD(blocking_lsm_notifier_chain);
static struct kmem_cache *lsm_file_cache;
static __initconst const char *const builtin_lsm_order = CONFIG_LSM;
/* Ordered list of LSMs to initialize. */
-static __initdata struct lsm_info **ordered_lsms;
+static __initdata struct lsm_info *ordered_lsms[MAX_LSM_COUNT + 1];
static __initdata struct lsm_info *exclusive;
+#ifdef CONFIG_HAVE_STATIC_CALL
+#define LSM_HOOK_TRAMP(NAME, NUM) \
+ &STATIC_CALL_TRAMP(LSM_STATIC_CALL(NAME, NUM))
+#else
+#define LSM_HOOK_TRAMP(NAME, NUM) NULL
+#endif
+
+/*
+ * Define static calls and static keys for each LSM hook.
+ */
+#define DEFINE_LSM_STATIC_CALL(NUM, NAME, RET, ...) \
+ DEFINE_STATIC_CALL_NULL(LSM_STATIC_CALL(NAME, NUM), \
+ *((RET(*)(__VA_ARGS__))NULL)); \
+ DEFINE_STATIC_KEY_FALSE(SECURITY_HOOK_ACTIVE_KEY(NAME, NUM));
+
+#define LSM_HOOK(RET, DEFAULT, NAME, ...) \
+ LSM_DEFINE_UNROLL(DEFINE_LSM_STATIC_CALL, NAME, RET, __VA_ARGS__)
+#include <linux/lsm_hook_defs.h>
+#undef LSM_HOOK
+#undef DEFINE_LSM_STATIC_CALL
+
+/*
+ * Initialise a table of static calls for each LSM hook.
+ * DEFINE_STATIC_CALL_NULL invocation above generates a key (STATIC_CALL_KEY)
+ * and a trampoline (STATIC_CALL_TRAMP) which are used to call
+ * __static_call_update when updating the static call.
+ *
+ * The static calls table is used by early LSMs, some architectures can fault on
+ * unaligned accesses and the fault handling code may not be ready by then.
+ * Thus, the static calls table should be aligned to avoid any unhandled faults
+ * in early init.
+ */
+struct lsm_static_calls_table
+ static_calls_table __ro_after_init __aligned(sizeof(u64)) = {
+#define INIT_LSM_STATIC_CALL(NUM, NAME) \
+ (struct lsm_static_call) { \
+ .key = &STATIC_CALL_KEY(LSM_STATIC_CALL(NAME, NUM)), \
+ .trampoline = LSM_HOOK_TRAMP(NAME, NUM), \
+ .active = &SECURITY_HOOK_ACTIVE_KEY(NAME, NUM), \
+ },
+#define LSM_HOOK(RET, DEFAULT, NAME, ...) \
+ .NAME = { \
+ LSM_DEFINE_UNROLL(INIT_LSM_STATIC_CALL, NAME) \
+ },
+#include <linux/lsm_hook_defs.h>
+#undef LSM_HOOK
+#undef INIT_LSM_STATIC_CALL
+ };
+
static __initdata bool debug;
#define init_debug(...) \
do { \
if (exists_ordered_lsm(lsm))
return;
- if (WARN(last_lsm == LSM_COUNT, "%s: out of LSM slots!?\n", from))
+ if (WARN(last_lsm == MAX_LSM_COUNT, "%s: out of LSM static calls!?\n", from))
return;
/* Enable this LSM, if it is not already set. */
lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred);
lsm_set_blob_size(&needed->lbs_file, &blob_sizes.lbs_file);
+ lsm_set_blob_size(&needed->lbs_ib, &blob_sizes.lbs_ib);
/*
* The inode blob gets an rcu_head in addition to
* what the modules might need.
blob_sizes.lbs_inode = sizeof(struct rcu_head);
lsm_set_blob_size(&needed->lbs_inode, &blob_sizes.lbs_inode);
lsm_set_blob_size(&needed->lbs_ipc, &blob_sizes.lbs_ipc);
+ lsm_set_blob_size(&needed->lbs_key, &blob_sizes.lbs_key);
lsm_set_blob_size(&needed->lbs_msg_msg, &blob_sizes.lbs_msg_msg);
+ lsm_set_blob_size(&needed->lbs_perf_event, &blob_sizes.lbs_perf_event);
+ lsm_set_blob_size(&needed->lbs_sock, &blob_sizes.lbs_sock);
lsm_set_blob_size(&needed->lbs_superblock, &blob_sizes.lbs_superblock);
lsm_set_blob_size(&needed->lbs_task, &blob_sizes.lbs_task);
+ lsm_set_blob_size(&needed->lbs_tun_dev, &blob_sizes.lbs_tun_dev);
lsm_set_blob_size(&needed->lbs_xattr_count,
&blob_sizes.lbs_xattr_count);
+ lsm_set_blob_size(&needed->lbs_bdev, &blob_sizes.lbs_bdev);
}
/* Prepare LSM for initialization. */
* Current index to use while initializing the lsm id list.
*/
u32 lsm_active_cnt __ro_after_init;
-const struct lsm_id *lsm_idlist[LSM_CONFIG_COUNT];
+const struct lsm_id *lsm_idlist[MAX_LSM_COUNT];
/* Populate ordered LSMs list from comma-separated LSM name list. */
static void __init ordered_lsm_parse(const char *order, const char *origin)
kfree(sep);
}
+static void __init lsm_static_call_init(struct security_hook_list *hl)
+{
+ struct lsm_static_call *scall = hl->scalls;
+ int i;
+
+ for (i = 0; i < MAX_LSM_COUNT; i++) {
+ /* Update the first static call that is not used yet */
+ if (!scall->hl) {
+ __static_call_update(scall->key, scall->trampoline,
+ hl->hook.lsm_func_addr);
+ scall->hl = hl;
+ static_branch_enable(scall->active);
+ return;
+ }
+ scall++;
+ }
+ panic("%s - Ran out of static slots.\n", __func__);
+}
+
static void __init lsm_early_cred(struct cred *cred);
static void __init lsm_early_task(struct task_struct *task);
{
struct lsm_info **lsm;
- ordered_lsms = kcalloc(LSM_COUNT + 1, sizeof(*ordered_lsms),
- GFP_KERNEL);
-
if (chosen_lsm_order) {
if (chosen_major_lsm) {
pr_warn("security=%s is ignored because it is superseded by lsm=%s\n",
init_debug("cred blob size = %d\n", blob_sizes.lbs_cred);
init_debug("file blob size = %d\n", blob_sizes.lbs_file);
+ init_debug("ib blob size = %d\n", blob_sizes.lbs_ib);
init_debug("inode blob size = %d\n", blob_sizes.lbs_inode);
init_debug("ipc blob size = %d\n", blob_sizes.lbs_ipc);
+#ifdef CONFIG_KEYS
+ init_debug("key blob size = %d\n", blob_sizes.lbs_key);
+#endif /* CONFIG_KEYS */
init_debug("msg_msg blob size = %d\n", blob_sizes.lbs_msg_msg);
+ init_debug("sock blob size = %d\n", blob_sizes.lbs_sock);
init_debug("superblock blob size = %d\n", blob_sizes.lbs_superblock);
+ init_debug("perf event blob size = %d\n", blob_sizes.lbs_perf_event);
init_debug("task blob size = %d\n", blob_sizes.lbs_task);
+ init_debug("tun device blob size = %d\n", blob_sizes.lbs_tun_dev);
init_debug("xattr slots = %d\n", blob_sizes.lbs_xattr_count);
+ init_debug("bdev blob size = %d\n", blob_sizes.lbs_bdev);
/*
* Create any kmem_caches needed for blobs
lsm_early_task(current);
for (lsm = ordered_lsms; *lsm; lsm++)
initialize_lsm(*lsm);
-
- kfree(ordered_lsms);
}
int __init early_security_init(void)
{
struct lsm_info *lsm;
-#define LSM_HOOK(RET, DEFAULT, NAME, ...) \
- INIT_HLIST_HEAD(&security_hook_heads.NAME);
-#include "linux/lsm_hook_defs.h"
-#undef LSM_HOOK
-
for (lsm = __start_early_lsm_info; lsm < __end_early_lsm_info; lsm++) {
if (!lsm->enabled)
lsm->enabled = &lsm_enabled_true;
* Look at the previous entry, if there is one, for duplication.
*/
if (lsm_active_cnt == 0 || lsm_idlist[lsm_active_cnt - 1] != lsmid) {
- if (lsm_active_cnt >= LSM_CONFIG_COUNT)
+ if (lsm_active_cnt >= MAX_LSM_COUNT)
panic("%s Too many LSMs registered.\n", __func__);
lsm_idlist[lsm_active_cnt++] = lsmid;
}
for (i = 0; i < count; i++) {
hooks[i].lsmid = lsmid;
- hlist_add_tail_rcu(&hooks[i].list, hooks[i].head);
+ lsm_static_call_init(&hooks[i]);
}
/*
EXPORT_SYMBOL(unregister_blocking_lsm_notifier);
/**
- * lsm_cred_alloc - allocate a composite cred blob
- * @cred: the cred that needs a blob
+ * lsm_blob_alloc - allocate a composite blob
+ * @dest: the destination for the blob
+ * @size: the size of the blob
* @gfp: allocation type
*
- * Allocate the cred blob for all the modules
+ * Allocate a blob for all the modules
*
* Returns 0, or -ENOMEM if memory can't be allocated.
*/
-static int lsm_cred_alloc(struct cred *cred, gfp_t gfp)
+static int lsm_blob_alloc(void **dest, size_t size, gfp_t gfp)
{
- if (blob_sizes.lbs_cred == 0) {
- cred->security = NULL;
+ if (size == 0) {
+ *dest = NULL;
return 0;
}
- cred->security = kzalloc(blob_sizes.lbs_cred, gfp);
- if (cred->security == NULL)
+ *dest = kzalloc(size, gfp);
+ if (*dest == NULL)
return -ENOMEM;
return 0;
}
+/**
+ * lsm_cred_alloc - allocate a composite cred blob
+ * @cred: the cred that needs a blob
+ * @gfp: allocation type
+ *
+ * Allocate the cred blob for all the modules
+ *
+ * Returns 0, or -ENOMEM if memory can't be allocated.
+ */
+static int lsm_cred_alloc(struct cred *cred, gfp_t gfp)
+{
+ return lsm_blob_alloc(&cred->security, blob_sizes.lbs_cred, gfp);
+}
+
/**
* lsm_early_cred - during initialization allocate a composite cred blob
* @cred: the cred that needs a blob
*
* Returns 0, or -ENOMEM if memory can't be allocated.
*/
-int lsm_inode_alloc(struct inode *inode)
+static int lsm_inode_alloc(struct inode *inode)
{
if (!lsm_inode_cache) {
inode->i_security = NULL;
*/
static int lsm_task_alloc(struct task_struct *task)
{
- if (blob_sizes.lbs_task == 0) {
- task->security = NULL;
- return 0;
- }
-
- task->security = kzalloc(blob_sizes.lbs_task, GFP_KERNEL);
- if (task->security == NULL)
- return -ENOMEM;
- return 0;
+ return lsm_blob_alloc(&task->security, blob_sizes.lbs_task, GFP_KERNEL);
}
/**
*/
static int lsm_ipc_alloc(struct kern_ipc_perm *kip)
{
- if (blob_sizes.lbs_ipc == 0) {
- kip->security = NULL;
- return 0;
- }
+ return lsm_blob_alloc(&kip->security, blob_sizes.lbs_ipc, GFP_KERNEL);
+}
- kip->security = kzalloc(blob_sizes.lbs_ipc, GFP_KERNEL);
- if (kip->security == NULL)
- return -ENOMEM;
- return 0;
+#ifdef CONFIG_KEYS
+/**
+ * lsm_key_alloc - allocate a composite key blob
+ * @key: the key that needs a blob
+ *
+ * Allocate the key blob for all the modules
+ *
+ * Returns 0, or -ENOMEM if memory can't be allocated.
+ */
+static int lsm_key_alloc(struct key *key)
+{
+ return lsm_blob_alloc(&key->security, blob_sizes.lbs_key, GFP_KERNEL);
}
+#endif /* CONFIG_KEYS */
/**
* lsm_msg_msg_alloc - allocate a composite msg_msg blob
*/
static int lsm_msg_msg_alloc(struct msg_msg *mp)
{
- if (blob_sizes.lbs_msg_msg == 0) {
- mp->security = NULL;
+ return lsm_blob_alloc(&mp->security, blob_sizes.lbs_msg_msg,
+ GFP_KERNEL);
+}
+
+/**
+ * lsm_bdev_alloc - allocate a composite block_device blob
+ * @bdev: the block_device that needs a blob
+ *
+ * Allocate the block_device blob for all the modules
+ *
+ * Returns 0, or -ENOMEM if memory can't be allocated.
+ */
+static int lsm_bdev_alloc(struct block_device *bdev)
+{
+ if (blob_sizes.lbs_bdev == 0) {
+ bdev->bd_security = NULL;
return 0;
}
- mp->security = kzalloc(blob_sizes.lbs_msg_msg, GFP_KERNEL);
- if (mp->security == NULL)
+ bdev->bd_security = kzalloc(blob_sizes.lbs_bdev, GFP_KERNEL);
+ if (!bdev->bd_security)
return -ENOMEM;
+
return 0;
}
*/
static int lsm_superblock_alloc(struct super_block *sb)
{
- if (blob_sizes.lbs_superblock == 0) {
- sb->s_security = NULL;
- return 0;
- }
-
- sb->s_security = kzalloc(blob_sizes.lbs_superblock, GFP_KERNEL);
- if (sb->s_security == NULL)
- return -ENOMEM;
- return 0;
+ return lsm_blob_alloc(&sb->s_security, blob_sizes.lbs_superblock,
+ GFP_KERNEL);
}
/**
* call_int_hook:
* This is a hook that returns a value.
*/
+#define __CALL_STATIC_VOID(NUM, HOOK, ...) \
+do { \
+ if (static_branch_unlikely(&SECURITY_HOOK_ACTIVE_KEY(HOOK, NUM))) { \
+ static_call(LSM_STATIC_CALL(HOOK, NUM))(__VA_ARGS__); \
+ } \
+} while (0);
-#define call_void_hook(FUNC, ...) \
- do { \
- struct security_hook_list *P; \
- \
- hlist_for_each_entry(P, &security_hook_heads.FUNC, list) \
- P->hook.FUNC(__VA_ARGS__); \
+#define call_void_hook(HOOK, ...) \
+ do { \
+ LSM_LOOP_UNROLL(__CALL_STATIC_VOID, HOOK, __VA_ARGS__); \
} while (0)
-#define call_int_hook(FUNC, ...) ({ \
- int RC = LSM_RET_DEFAULT(FUNC); \
- do { \
- struct security_hook_list *P; \
- \
- hlist_for_each_entry(P, &security_hook_heads.FUNC, list) { \
- RC = P->hook.FUNC(__VA_ARGS__); \
- if (RC != LSM_RET_DEFAULT(FUNC)) \
- break; \
- } \
- } while (0); \
- RC; \
+
+#define __CALL_STATIC_INT(NUM, R, HOOK, LABEL, ...) \
+do { \
+ if (static_branch_unlikely(&SECURITY_HOOK_ACTIVE_KEY(HOOK, NUM))) { \
+ R = static_call(LSM_STATIC_CALL(HOOK, NUM))(__VA_ARGS__); \
+ if (R != LSM_RET_DEFAULT(HOOK)) \
+ goto LABEL; \
+ } \
+} while (0);
+
+#define call_int_hook(HOOK, ...) \
+({ \
+ __label__ OUT; \
+ int RC = LSM_RET_DEFAULT(HOOK); \
+ \
+ LSM_LOOP_UNROLL(__CALL_STATIC_INT, RC, HOOK, OUT, __VA_ARGS__); \
+OUT: \
+ RC; \
})
+#define lsm_for_each_hook(scall, NAME) \
+ for (scall = static_calls_table.NAME; \
+ scall - static_calls_table.NAME < MAX_LSM_COUNT; scall++) \
+ if (static_key_enabled(&scall->active->key))
+
/* Security operations */
/**
*/
int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
{
- struct security_hook_list *hp;
+ struct lsm_static_call *scall;
int cap_sys_admin = 1;
int rc;
/*
- * The module will respond with a positive value if
- * it thinks the __vm_enough_memory() call should be
- * made with the cap_sys_admin set. If all of the modules
- * agree that it should be set it will. If any module
- * thinks it should not be set it won't.
+ * The module will respond with 0 if it thinks the __vm_enough_memory()
+ * call should be made with the cap_sys_admin set. If all of the modules
+ * agree that it should be set it will. If any module thinks it should
+ * not be set it won't.
*/
- hlist_for_each_entry(hp, &security_hook_heads.vm_enough_memory, list) {
- rc = hp->hook.vm_enough_memory(mm, pages);
- if (rc <= 0) {
+ lsm_for_each_hook(scall, vm_enough_memory) {
+ rc = scall->hl->hook.vm_enough_memory(mm, pages);
+ if (rc < 0) {
cap_sys_admin = 0;
break;
}
int security_fs_context_parse_param(struct fs_context *fc,
struct fs_parameter *param)
{
- struct security_hook_list *hp;
+ struct lsm_static_call *scall;
int trc;
int rc = -ENOPARAM;
- hlist_for_each_entry(hp, &security_hook_heads.fs_context_parse_param,
- list) {
- trc = hp->hook.fs_context_parse_param(fc, param);
+ lsm_for_each_hook(scall, fs_context_parse_param) {
+ trc = scall->hl->hook.fs_context_parse_param(fc, param);
if (trc == 0)
rc = 0;
else if (trc != -ENOPARAM)
unsigned long kern_flags,
unsigned long *set_kern_flags)
{
- struct security_hook_list *hp;
+ struct lsm_static_call *scall;
int rc = mnt_opts ? -EOPNOTSUPP : LSM_RET_DEFAULT(sb_set_mnt_opts);
- hlist_for_each_entry(hp, &security_hook_heads.sb_set_mnt_opts,
- list) {
- rc = hp->hook.sb_set_mnt_opts(sb, mnt_opts, kern_flags,
+ lsm_for_each_hook(scall, sb_set_mnt_opts) {
+ rc = scall->hl->hook.sb_set_mnt_opts(sb, mnt_opts, kern_flags,
set_kern_flags);
if (rc != LSM_RET_DEFAULT(sb_set_mnt_opts))
break;
static void inode_free_by_rcu(struct rcu_head *head)
{
- /*
- * The rcu head is at the start of the inode blob
- */
+ /* The rcu head is at the start of the inode blob */
+ call_void_hook(inode_free_security_rcu, head);
kmem_cache_free(lsm_inode_cache, head);
}
* security_inode_free() - Free an inode's LSM blob
* @inode: the inode
*
- * Deallocate the inode security structure and set @inode->i_security to NULL.
+ * Release any LSM resources associated with @inode, although due to the
+ * inode's RCU protections it is possible that the resources will not be
+ * fully released until after the current RCU grace period has elapsed.
+ *
+ * It is important for LSMs to note that despite being present in a call to
+ * security_inode_free(), @inode may still be referenced in a VFS path walk
+ * and calls to security_inode_permission() may be made during, or after,
+ * a call to security_inode_free(). For this reason the inode->i_security
+ * field is released via a call_rcu() callback and any LSMs which need to
+ * retain inode state for use in security_inode_permission() should only
+ * release that state in the inode_free_security_rcu() LSM hook callback.
*/
void security_inode_free(struct inode *inode)
{
call_void_hook(inode_free_security, inode);
- /*
- * The inode may still be referenced in a path walk and
- * a call to security_inode_permission() can be made
- * after inode_free_security() is called. Ideally, the VFS
- * wouldn't do this, but fixing that is a much harder
- * job. For now, simply free the i_security via RCU, and
- * leave the current inode->i_security pointer intact.
- * The inode will be freed after the RCU grace period too.
- */
- if (inode->i_security)
- call_rcu((struct rcu_head *)inode->i_security,
- inode_free_by_rcu);
+ if (!inode->i_security)
+ return;
+ call_rcu((struct rcu_head *)inode->i_security, inode_free_by_rcu);
}
/**
const struct qstr *qstr,
const initxattrs initxattrs, void *fs_data)
{
- struct security_hook_list *hp;
+ struct lsm_static_call *scall;
struct xattr *new_xattrs = NULL;
int ret = -EOPNOTSUPP, xattr_count = 0;
return -ENOMEM;
}
- hlist_for_each_entry(hp, &security_hook_heads.inode_init_security,
- list) {
- ret = hp->hook.inode_init_security(inode, dir, qstr, new_xattrs,
+ lsm_for_each_hook(scall, inode_init_security) {
+ ret = scall->hl->hook.inode_init_security(inode, dir, qstr, new_xattrs,
&xattr_count);
if (ret && ret != -EOPNOTSUPP)
goto out;
* lower layer to the union/overlay layer. The caller is responsible for
* reading and writing the xattrs, this hook is merely a filter.
*
- * Return: Returns 0 to accept the xattr, 1 to discard the xattr, -EOPNOTSUPP
- * if the security module does not know about attribute, or a negative
- * error code to abort the copy up.
+ * Return: Returns 0 to accept the xattr, -ECANCELED to discard the xattr,
+ * -EOPNOTSUPP if the security module does not know about attribute,
+ * or a negative error code to abort the copy up.
*/
int security_inode_copy_up_xattr(struct dentry *src, const char *name)
{
int rc;
- /*
- * The implementation can return 0 (accept the xattr), 1 (discard the
- * xattr), -EOPNOTSUPP if it does not know anything about the xattr or
- * any other error code in case of an error.
- */
rc = call_int_hook(inode_copy_up_xattr, src, name);
if (rc != LSM_RET_DEFAULT(inode_copy_up_xattr))
return rc;
}
EXPORT_SYMBOL(security_inode_copy_up_xattr);
+/**
+ * security_inode_setintegrity() - Set the inode's integrity data
+ * @inode: inode
+ * @type: type of integrity, e.g. hash digest, signature, etc
+ * @value: the integrity value
+ * @size: size of the integrity value
+ *
+ * Register a verified integrity measurement of a inode with LSMs.
+ * LSMs should free the previously saved data if @value is NULL.
+ *
+ * Return: Returns 0 on success, negative values on failure.
+ */
+int security_inode_setintegrity(const struct inode *inode,
+ enum lsm_integrity_type type, const void *value,
+ size_t size)
+{
+ return call_int_hook(inode_setintegrity, inode, type, value, size);
+}
+EXPORT_SYMBOL(security_inode_setintegrity);
+
/**
* security_kernfs_init_security() - Init LSM context for a kernfs node
* @kn_dir: parent kernfs node
* Save owner security information (typically from current->security) in
* file->f_security for later use by the send_sigiotask hook.
*
+ * This hook is called with file->f_owner.lock held.
+ *
* Return: Returns 0 on success.
*/
void security_file_set_fowner(struct file *file)
{
int thisrc;
int rc = LSM_RET_DEFAULT(task_prctl);
- struct security_hook_list *hp;
+ struct lsm_static_call *scall;
- hlist_for_each_entry(hp, &security_hook_heads.task_prctl, list) {
- thisrc = hp->hook.task_prctl(option, arg2, arg3, arg4, arg5);
+ lsm_for_each_hook(scall, task_prctl) {
+ thisrc = scall->hl->hook.task_prctl(option, arg2, arg3, arg4, arg5);
if (thisrc != LSM_RET_DEFAULT(task_prctl)) {
rc = thisrc;
if (thisrc != 0)
int security_getselfattr(unsigned int attr, struct lsm_ctx __user *uctx,
u32 __user *size, u32 flags)
{
- struct security_hook_list *hp;
+ struct lsm_static_call *scall;
struct lsm_ctx lctx = { .id = LSM_ID_UNDEF, };
u8 __user *base = (u8 __user *)uctx;
u32 entrysize;
* In the usual case gather all the data from the LSMs.
* In the single case only get the data from the LSM specified.
*/
- hlist_for_each_entry(hp, &security_hook_heads.getselfattr, list) {
- if (single && lctx.id != hp->lsmid->id)
+ lsm_for_each_hook(scall, getselfattr) {
+ if (single && lctx.id != scall->hl->lsmid->id)
continue;
entrysize = left;
if (base)
uctx = (struct lsm_ctx __user *)(base + total);
- rc = hp->hook.getselfattr(attr, uctx, &entrysize, flags);
+ rc = scall->hl->hook.getselfattr(attr, uctx, &entrysize, flags);
if (rc == -EOPNOTSUPP) {
rc = 0;
continue;
int security_setselfattr(unsigned int attr, struct lsm_ctx __user *uctx,
u32 size, u32 flags)
{
- struct security_hook_list *hp;
+ struct lsm_static_call *scall;
struct lsm_ctx *lctx;
int rc = LSM_RET_DEFAULT(setselfattr);
u64 required_len;
goto free_out;
}
- hlist_for_each_entry(hp, &security_hook_heads.setselfattr, list)
- if ((hp->lsmid->id) == lctx->id) {
- rc = hp->hook.setselfattr(attr, lctx, size, flags);
+ lsm_for_each_hook(scall, setselfattr)
+ if ((scall->hl->lsmid->id) == lctx->id) {
+ rc = scall->hl->hook.setselfattr(attr, lctx, size, flags);
break;
}
int security_getprocattr(struct task_struct *p, int lsmid, const char *name,
char **value)
{
- struct security_hook_list *hp;
+ struct lsm_static_call *scall;
- hlist_for_each_entry(hp, &security_hook_heads.getprocattr, list) {
- if (lsmid != 0 && lsmid != hp->lsmid->id)
+ lsm_for_each_hook(scall, getprocattr) {
+ if (lsmid != 0 && lsmid != scall->hl->lsmid->id)
continue;
- return hp->hook.getprocattr(p, name, value);
+ return scall->hl->hook.getprocattr(p, name, value);
}
return LSM_RET_DEFAULT(getprocattr);
}
*/
int security_setprocattr(int lsmid, const char *name, void *value, size_t size)
{
- struct security_hook_list *hp;
+ struct lsm_static_call *scall;
- hlist_for_each_entry(hp, &security_hook_heads.setprocattr, list) {
- if (lsmid != 0 && lsmid != hp->lsmid->id)
+ lsm_for_each_hook(scall, setprocattr) {
+ if (lsmid != 0 && lsmid != scall->hl->lsmid->id)
continue;
- return hp->hook.setprocattr(name, value, size);
+ return scall->hl->hook.setprocattr(name, value, size);
}
return LSM_RET_DEFAULT(setprocattr);
}
}
EXPORT_SYMBOL(security_socket_getpeersec_dgram);
+/**
+ * lsm_sock_alloc - allocate a composite sock blob
+ * @sock: the sock that needs a blob
+ * @gfp: allocation mode
+ *
+ * Allocate the sock blob for all the modules
+ *
+ * Returns 0, or -ENOMEM if memory can't be allocated.
+ */
+static int lsm_sock_alloc(struct sock *sock, gfp_t gfp)
+{
+ return lsm_blob_alloc(&sock->sk_security, blob_sizes.lbs_sock, gfp);
+}
+
/**
* security_sk_alloc() - Allocate and initialize a sock's LSM blob
* @sk: sock
*/
int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
{
- return call_int_hook(sk_alloc_security, sk, family, priority);
+ int rc = lsm_sock_alloc(sk, priority);
+
+ if (unlikely(rc))
+ return rc;
+ rc = call_int_hook(sk_alloc_security, sk, family, priority);
+ if (unlikely(rc))
+ security_sk_free(sk);
+ return rc;
}
/**
void security_sk_free(struct sock *sk)
{
call_void_hook(sk_free_security, sk);
+ kfree(sk->sk_security);
+ sk->sk_security = NULL;
}
/**
*/
int security_tun_dev_alloc_security(void **security)
{
- return call_int_hook(tun_dev_alloc_security, security);
+ int rc;
+
+ rc = lsm_blob_alloc(security, blob_sizes.lbs_tun_dev, GFP_KERNEL);
+ if (rc)
+ return rc;
+
+ rc = call_int_hook(tun_dev_alloc_security, *security);
+ if (rc) {
+ kfree(*security);
+ *security = NULL;
+ }
+ return rc;
}
EXPORT_SYMBOL(security_tun_dev_alloc_security);
*/
void security_tun_dev_free_security(void *security)
{
- call_void_hook(tun_dev_free_security, security);
+ kfree(security);
}
EXPORT_SYMBOL(security_tun_dev_free_security);
*/
int security_ib_alloc_security(void **sec)
{
- return call_int_hook(ib_alloc_security, sec);
+ int rc;
+
+ rc = lsm_blob_alloc(sec, blob_sizes.lbs_ib, GFP_KERNEL);
+ if (rc)
+ return rc;
+
+ rc = call_int_hook(ib_alloc_security, *sec);
+ if (rc) {
+ kfree(*sec);
+ *sec = NULL;
+ }
+ return rc;
}
EXPORT_SYMBOL(security_ib_alloc_security);
*/
void security_ib_free_security(void *sec)
{
- call_void_hook(ib_free_security, sec);
+ kfree(sec);
}
EXPORT_SYMBOL(security_ib_free_security);
#endif /* CONFIG_SECURITY_INFINIBAND */
struct xfrm_policy *xp,
const struct flowi_common *flic)
{
- struct security_hook_list *hp;
+ struct lsm_static_call *scall;
int rc = LSM_RET_DEFAULT(xfrm_state_pol_flow_match);
/*
* For speed optimization, we explicitly break the loop rather than
* using the macro
*/
- hlist_for_each_entry(hp, &security_hook_heads.xfrm_state_pol_flow_match,
- list) {
- rc = hp->hook.xfrm_state_pol_flow_match(x, xp, flic);
+ lsm_for_each_hook(scall, xfrm_state_pol_flow_match) {
+ rc = scall->hl->hook.xfrm_state_pol_flow_match(x, xp, flic);
break;
}
return rc;
int security_key_alloc(struct key *key, const struct cred *cred,
unsigned long flags)
{
- return call_int_hook(key_alloc, key, cred, flags);
+ int rc = lsm_key_alloc(key);
+
+ if (unlikely(rc))
+ return rc;
+ rc = call_int_hook(key_alloc, key, cred, flags);
+ if (unlikely(rc))
+ security_key_free(key);
+ return rc;
}
/**
*/
void security_key_free(struct key *key)
{
- call_void_hook(key_free, key);
+ kfree(key->security);
+ key->security = NULL;
}
/**
* Return: Returns 0 on success, error on failure.
*/
int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
- struct path *path)
+ const struct path *path)
{
return call_int_hook(bpf_token_create, token, attr, path);
}
}
EXPORT_SYMBOL(security_locked_down);
+/**
+ * security_bdev_alloc() - Allocate a block device LSM blob
+ * @bdev: block device
+ *
+ * Allocate and attach a security structure to @bdev->bd_security. The
+ * security field is initialized to NULL when the bdev structure is
+ * allocated.
+ *
+ * Return: Return 0 if operation was successful.
+ */
+int security_bdev_alloc(struct block_device *bdev)
+{
+ int rc = 0;
+
+ rc = lsm_bdev_alloc(bdev);
+ if (unlikely(rc))
+ return rc;
+
+ rc = call_int_hook(bdev_alloc_security, bdev);
+ if (unlikely(rc))
+ security_bdev_free(bdev);
+
+ return rc;
+}
+EXPORT_SYMBOL(security_bdev_alloc);
+
+/**
+ * security_bdev_free() - Free a block device's LSM blob
+ * @bdev: block device
+ *
+ * Deallocate the bdev security structure and set @bdev->bd_security to NULL.
+ */
+void security_bdev_free(struct block_device *bdev)
+{
+ if (!bdev->bd_security)
+ return;
+
+ call_void_hook(bdev_free_security, bdev);
+
+ kfree(bdev->bd_security);
+ bdev->bd_security = NULL;
+}
+EXPORT_SYMBOL(security_bdev_free);
+
+/**
+ * security_bdev_setintegrity() - Set the device's integrity data
+ * @bdev: block device
+ * @type: type of integrity, e.g. hash digest, signature, etc
+ * @value: the integrity value
+ * @size: size of the integrity value
+ *
+ * Register a verified integrity measurement of a bdev with LSMs.
+ * LSMs should free the previously saved data if @value is NULL.
+ * Please note that the new hook should be invoked every time the security
+ * information is updated to keep these data current. For example, in dm-verity,
+ * if the mapping table is reloaded and configured to use a different dm-verity
+ * target with a new roothash and signing information, the previously stored
+ * data in the LSM blob will become obsolete. It is crucial to re-invoke the
+ * hook to refresh these data and ensure they are up to date. This necessity
+ * arises from the design of device-mapper, where a device-mapper device is
+ * first created, and then targets are subsequently loaded into it. These
+ * targets can be modified multiple times during the device's lifetime.
+ * Therefore, while the LSM blob is allocated during the creation of the block
+ * device, its actual contents are not initialized at this stage and can change
+ * substantially over time. This includes alterations from data that the LSMs
+ * 'trusts' to those they do not, making it essential to handle these changes
+ * correctly. Failure to address this dynamic aspect could potentially allow
+ * for bypassing LSM checks.
+ *
+ * Return: Returns 0 on success, negative values on failure.
+ */
+int security_bdev_setintegrity(struct block_device *bdev,
+ enum lsm_integrity_type type, const void *value,
+ size_t size)
+{
+ return call_int_hook(bdev_setintegrity, bdev, type, value, size);
+}
+EXPORT_SYMBOL(security_bdev_setintegrity);
+
#ifdef CONFIG_PERF_EVENTS
/**
* security_perf_event_open() - Check if a perf event open is allowed
*/
int security_perf_event_alloc(struct perf_event *event)
{
- return call_int_hook(perf_event_alloc, event);
+ int rc;
+
+ rc = lsm_blob_alloc(&event->security, blob_sizes.lbs_perf_event,
+ GFP_KERNEL);
+ if (rc)
+ return rc;
+
+ rc = call_int_hook(perf_event_alloc, event);
+ if (rc) {
+ kfree(event->security);
+ event->security = NULL;
+ }
+ return rc;
}
/**
*/
void security_perf_event_free(struct perf_event *event)
{
- call_void_hook(perf_event_free, event);
+ kfree(event->security);
+ event->security = NULL;
}
/**
return call_int_hook(uring_cmd, ioucmd);
}
#endif /* CONFIG_IO_URING */
+
+/**
+ * security_initramfs_populated() - Notify LSMs that initramfs has been loaded
+ *
+ * Tells the LSMs the initramfs has been unpacked into the rootfs.
+ */
+void security_initramfs_populated(void)
+{
+ call_void_hook(initramfs_populated);
+}
might_sleep_if(may_sleep);
+ /*
+ * The check of isec->initialized below is racy but
+ * inode_doinit_with_dentry() will recheck with
+ * isec->lock held.
+ */
if (selinux_initialized() &&
- isec->initialized != LABEL_INITIALIZED) {
+ data_race(isec->initialized != LABEL_INITIALIZED)) {
if (!may_sleep)
return -ECHILD;
}
/*
- * Check that a process has enough memory to allocate a new virtual
- * mapping. 0 means there is enough memory for the allocation to
- * succeed and -ENOMEM implies there is not.
+ * Check permission for allocating a new virtual mapping. Returns
+ * 0 if permission is granted, negative error code if not.
*
* Do not audit the selinux permission check, as this is applied to all
* processes that allocate mappings.
*/
static int selinux_vm_enough_memory(struct mm_struct *mm, long pages)
{
- int rc, cap_sys_admin = 0;
-
- rc = cred_has_capability(current_cred(), CAP_SYS_ADMIN,
- CAP_OPT_NOAUDIT, true);
- if (rc == 0)
- cap_sys_admin = 1;
-
- return cap_sys_admin;
+ return cred_has_capability(current_cred(), CAP_SYS_ADMIN,
+ CAP_OPT_NOAUDIT, true);
}
/* binprm security operations */
* xattrs up. Instead, filter out SELinux-related xattrs following
* policy load.
*/
- if (selinux_initialized() && strcmp(name, XATTR_NAME_SELINUX) == 0)
- return 1; /* Discard */
+ if (selinux_initialized() && !strcmp(name, XATTR_NAME_SELINUX))
+ return -ECANCELED; /* Discard */
/*
* Any other attribute apart from SELINUX is not claimed, supported
* by selinux.
if (default_noexec &&
(prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) {
int rc = 0;
- if (vma_is_initial_heap(vma)) {
+ /*
+ * We don't use the vma_is_initial_heap() helper as it has
+ * a history of problems and is currently broken on systems
+ * where there is no heap, e.g. brk == start_brk. Before
+ * replacing the conditional below with vma_is_initial_heap(),
+ * or something similar, please ensure that the logic is the
+ * same as what we have below or you have tested every possible
+ * corner case you can think to test.
+ */
+ if (vma->vm_start >= vma->vm_mm->start_brk &&
+ vma->vm_end <= vma->vm_mm->brk) {
rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
PROCESS__EXECHEAP, NULL);
} else if (!vma->vm_file && (vma_is_initial_stack(vma) ||
struct file_security_struct *fsec;
/* struct fown_struct is never outside the context of a struct file */
- file = container_of(fown, struct file, f_owner);
+ file = fown->file;
fsec = selinux_file(file);
static int sock_has_perm(struct sock *sk, u32 perms)
{
- struct sk_security_struct *sksec = sk->sk_security;
+ struct sk_security_struct *sksec = selinux_sock(sk);
struct common_audit_data ad;
struct lsm_network_audit net;
isec->initialized = LABEL_INITIALIZED;
if (sock->sk) {
- sksec = sock->sk->sk_security;
+ sksec = selinux_sock(sock->sk);
sksec->sclass = sclass;
sksec->sid = sid;
/* Allows detection of the first association on this socket */
static int selinux_socket_socketpair(struct socket *socka,
struct socket *sockb)
{
- struct sk_security_struct *sksec_a = socka->sk->sk_security;
- struct sk_security_struct *sksec_b = sockb->sk->sk_security;
+ struct sk_security_struct *sksec_a = selinux_sock(socka->sk);
+ struct sk_security_struct *sksec_b = selinux_sock(sockb->sk);
sksec_a->peer_sid = sksec_b->sid;
sksec_b->peer_sid = sksec_a->sid;
static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen)
{
struct sock *sk = sock->sk;
- struct sk_security_struct *sksec = sk->sk_security;
+ struct sk_security_struct *sksec = selinux_sock(sk);
u16 family;
int err;
struct sockaddr *address, int addrlen)
{
struct sock *sk = sock->sk;
- struct sk_security_struct *sksec = sk->sk_security;
+ struct sk_security_struct *sksec = selinux_sock(sk);
int err;
err = sock_has_perm(sk, SOCKET__CONNECT);
struct sock *other,
struct sock *newsk)
{
- struct sk_security_struct *sksec_sock = sock->sk_security;
- struct sk_security_struct *sksec_other = other->sk_security;
- struct sk_security_struct *sksec_new = newsk->sk_security;
+ struct sk_security_struct *sksec_sock = selinux_sock(sock);
+ struct sk_security_struct *sksec_other = selinux_sock(other);
+ struct sk_security_struct *sksec_new = selinux_sock(newsk);
struct common_audit_data ad;
struct lsm_network_audit net;
int err;
static int selinux_socket_unix_may_send(struct socket *sock,
struct socket *other)
{
- struct sk_security_struct *ssec = sock->sk->sk_security;
- struct sk_security_struct *osec = other->sk->sk_security;
+ struct sk_security_struct *ssec = selinux_sock(sock->sk);
+ struct sk_security_struct *osec = selinux_sock(other->sk);
struct common_audit_data ad;
struct lsm_network_audit net;
u16 family)
{
int err = 0;
- struct sk_security_struct *sksec = sk->sk_security;
+ struct sk_security_struct *sksec = selinux_sock(sk);
u32 sk_sid = sksec->sid;
struct common_audit_data ad;
struct lsm_network_audit net;
static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
int err, peerlbl_active, secmark_active;
- struct sk_security_struct *sksec = sk->sk_security;
+ struct sk_security_struct *sksec = selinux_sock(sk);
u16 family = sk->sk_family;
u32 sk_sid = sksec->sid;
struct common_audit_data ad;
int err = 0;
char *scontext = NULL;
u32 scontext_len;
- struct sk_security_struct *sksec = sock->sk->sk_security;
+ struct sk_security_struct *sksec = selinux_sock(sock->sk);
u32 peer_sid = SECSID_NULL;
if (sksec->sclass == SECCLASS_UNIX_STREAM_SOCKET ||
static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority)
{
- struct sk_security_struct *sksec;
-
- sksec = kzalloc(sizeof(*sksec), priority);
- if (!sksec)
- return -ENOMEM;
+ struct sk_security_struct *sksec = selinux_sock(sk);
sksec->peer_sid = SECINITSID_UNLABELED;
sksec->sid = SECINITSID_UNLABELED;
sksec->sclass = SECCLASS_SOCKET;
selinux_netlbl_sk_security_reset(sksec);
- sk->sk_security = sksec;
return 0;
}
static void selinux_sk_free_security(struct sock *sk)
{
- struct sk_security_struct *sksec = sk->sk_security;
+ struct sk_security_struct *sksec = selinux_sock(sk);
- sk->sk_security = NULL;
selinux_netlbl_sk_security_free(sksec);
- kfree(sksec);
}
static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk)
{
- struct sk_security_struct *sksec = sk->sk_security;
- struct sk_security_struct *newsksec = newsk->sk_security;
+ struct sk_security_struct *sksec = selinux_sock(sk);
+ struct sk_security_struct *newsksec = selinux_sock(newsk);
newsksec->sid = sksec->sid;
newsksec->peer_sid = sksec->peer_sid;
if (!sk)
*secid = SECINITSID_ANY_SOCKET;
else {
- const struct sk_security_struct *sksec = sk->sk_security;
+ const struct sk_security_struct *sksec = selinux_sock(sk);
*secid = sksec->sid;
}
{
struct inode_security_struct *isec =
inode_security_novalidate(SOCK_INODE(parent));
- struct sk_security_struct *sksec = sk->sk_security;
+ struct sk_security_struct *sksec = selinux_sock(sk);
if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6 ||
sk->sk_family == PF_UNIX)
{
struct sock *sk = asoc->base.sk;
u16 family = sk->sk_family;
- struct sk_security_struct *sksec = sk->sk_security;
+ struct sk_security_struct *sksec = selinux_sock(sk);
struct common_audit_data ad;
struct lsm_network_audit net;
int err;
static int selinux_sctp_assoc_request(struct sctp_association *asoc,
struct sk_buff *skb)
{
- struct sk_security_struct *sksec = asoc->base.sk->sk_security;
+ struct sk_security_struct *sksec = selinux_sock(asoc->base.sk);
u32 conn_sid;
int err;
static int selinux_sctp_assoc_established(struct sctp_association *asoc,
struct sk_buff *skb)
{
- struct sk_security_struct *sksec = asoc->base.sk->sk_security;
+ struct sk_security_struct *sksec = selinux_sock(asoc->base.sk);
if (!selinux_policycap_extsockclass())
return 0;
static void selinux_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
struct sock *newsk)
{
- struct sk_security_struct *sksec = sk->sk_security;
- struct sk_security_struct *newsksec = newsk->sk_security;
+ struct sk_security_struct *sksec = selinux_sock(sk);
+ struct sk_security_struct *newsksec = selinux_sock(newsk);
/* If policy does not support SECCLASS_SCTP_SOCKET then call
* the non-sctp clone version.
static int selinux_mptcp_add_subflow(struct sock *sk, struct sock *ssk)
{
- struct sk_security_struct *ssksec = ssk->sk_security;
- struct sk_security_struct *sksec = sk->sk_security;
+ struct sk_security_struct *ssksec = selinux_sock(ssk);
+ struct sk_security_struct *sksec = selinux_sock(sk);
ssksec->sclass = sksec->sclass;
ssksec->sid = sksec->sid;
static int selinux_inet_conn_request(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
- struct sk_security_struct *sksec = sk->sk_security;
+ struct sk_security_struct *sksec = selinux_sock(sk);
int err;
u16 family = req->rsk_ops->family;
u32 connsid;
static void selinux_inet_csk_clone(struct sock *newsk,
const struct request_sock *req)
{
- struct sk_security_struct *newsksec = newsk->sk_security;
+ struct sk_security_struct *newsksec = selinux_sock(newsk);
newsksec->sid = req->secid;
newsksec->peer_sid = req->peer_secid;
static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb)
{
u16 family = sk->sk_family;
- struct sk_security_struct *sksec = sk->sk_security;
+ struct sk_security_struct *sksec = selinux_sock(sk);
/* handle mapped IPv4 packets arriving via IPv6 sockets */
if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP))
flic->flowic_secid = req->secid;
}
-static int selinux_tun_dev_alloc_security(void **security)
+static int selinux_tun_dev_alloc_security(void *security)
{
- struct tun_security_struct *tunsec;
+ struct tun_security_struct *tunsec = selinux_tun_dev(security);
- tunsec = kzalloc(sizeof(*tunsec), GFP_KERNEL);
- if (!tunsec)
- return -ENOMEM;
tunsec->sid = current_sid();
-
- *security = tunsec;
return 0;
}
-static void selinux_tun_dev_free_security(void *security)
-{
- kfree(security);
-}
-
static int selinux_tun_dev_create(void)
{
u32 sid = current_sid();
static int selinux_tun_dev_attach_queue(void *security)
{
- struct tun_security_struct *tunsec = security;
+ struct tun_security_struct *tunsec = selinux_tun_dev(security);
return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET,
TUN_SOCKET__ATTACH_QUEUE, NULL);
static int selinux_tun_dev_attach(struct sock *sk, void *security)
{
- struct tun_security_struct *tunsec = security;
- struct sk_security_struct *sksec = sk->sk_security;
+ struct tun_security_struct *tunsec = selinux_tun_dev(security);
+ struct sk_security_struct *sksec = selinux_sock(sk);
/* we don't currently perform any NetLabel based labeling here and it
* isn't clear that we would want to do so anyway; while we could apply
static int selinux_tun_dev_open(void *security)
{
- struct tun_security_struct *tunsec = security;
+ struct tun_security_struct *tunsec = selinux_tun_dev(security);
u32 sid = current_sid();
int err;
return NF_ACCEPT;
/* standard practice, label using the parent socket */
- sksec = sk->sk_security;
+ sksec = selinux_sock(sk);
sid = sksec->sid;
} else
sid = SECINITSID_KERNEL;
sk = skb_to_full_sk(skb);
if (sk == NULL)
return NF_ACCEPT;
- sksec = sk->sk_security;
+ sksec = selinux_sock(sk);
ad_net_init_from_iif(&ad, &net, state->out->ifindex, state->pf);
if (selinux_parse_skb(skb, &ad, NULL, 0, &proto))
u32 skb_sid;
struct sk_security_struct *sksec;
- sksec = sk->sk_security;
+ sksec = selinux_sock(sk);
if (selinux_skb_peerlbl_sid(skb, family, &skb_sid))
return NF_DROP;
/* At this point, if the returned skb peerlbl is SECSID_NULL
} else {
/* Locally generated packet, fetch the security label from the
* associated socket. */
- struct sk_security_struct *sksec = sk->sk_security;
+ struct sk_security_struct *sksec = selinux_sock(sk);
peer_sid = sksec->sid;
secmark_perm = PACKET__SEND;
}
unsigned int data_len = skb->len;
unsigned char *data = skb->data;
struct nlmsghdr *nlh;
- struct sk_security_struct *sksec = sk->sk_security;
+ struct sk_security_struct *sksec = selinux_sock(sk);
u16 sclass = sksec->sclass;
u32 perm;
*/
static int selinux_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
{
- return __vfs_setxattr_noperm(&nop_mnt_idmap, dentry, XATTR_NAME_SELINUX,
- ctx, ctxlen, 0);
+ return __vfs_setxattr_locked(&nop_mnt_idmap, dentry, XATTR_NAME_SELINUX,
+ ctx, ctxlen, 0, NULL);
}
static int selinux_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
unsigned long flags)
{
const struct task_security_struct *tsec;
- struct key_security_struct *ksec;
-
- ksec = kzalloc(sizeof(struct key_security_struct), GFP_KERNEL);
- if (!ksec)
- return -ENOMEM;
+ struct key_security_struct *ksec = selinux_key(k);
tsec = selinux_cred(cred);
if (tsec->keycreate_sid)
else
ksec->sid = tsec->sid;
- k->security = ksec;
return 0;
}
-static void selinux_key_free(struct key *k)
-{
- struct key_security_struct *ksec = k->security;
-
- k->security = NULL;
- kfree(ksec);
-}
-
static int selinux_key_permission(key_ref_t key_ref,
const struct cred *cred,
enum key_need_perm need_perm)
sid = cred_sid(cred);
key = key_ref_to_ptr(key_ref);
- ksec = key->security;
+ ksec = selinux_key(key);
return avc_has_perm(sid, ksec->sid, SECCLASS_KEY, perm, NULL);
}
static int selinux_key_getsecurity(struct key *key, char **_buffer)
{
- struct key_security_struct *ksec = key->security;
+ struct key_security_struct *ksec = selinux_key(key);
char *context = NULL;
unsigned len;
int rc;
#ifdef CONFIG_KEY_NOTIFICATIONS
static int selinux_watch_key(struct key *key)
{
- struct key_security_struct *ksec = key->security;
+ struct key_security_struct *ksec = selinux_key(key);
u32 sid = current_sid();
return avc_has_perm(sid, ksec->sid, SECCLASS_KEY, KEY__VIEW, NULL);
INFINIBAND_ENDPORT__MANAGE_SUBNET, &ad);
}
-static int selinux_ib_alloc_security(void **ib_sec)
+static int selinux_ib_alloc_security(void *ib_sec)
{
- struct ib_security_struct *sec;
+ struct ib_security_struct *sec = selinux_ib(ib_sec);
- sec = kzalloc(sizeof(*sec), GFP_KERNEL);
- if (!sec)
- return -ENOMEM;
sec->sid = current_sid();
-
- *ib_sec = sec;
return 0;
}
-
-static void selinux_ib_free_security(void *ib_sec)
-{
- kfree(ib_sec);
-}
#endif
#ifdef CONFIG_BPF_SYSCALL
}
static int selinux_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
- struct path *path)
+ const struct path *path)
{
struct bpf_security_struct *bpfsec;
.lbs_file = sizeof(struct file_security_struct),
.lbs_inode = sizeof(struct inode_security_struct),
.lbs_ipc = sizeof(struct ipc_security_struct),
+ .lbs_key = sizeof(struct key_security_struct),
.lbs_msg_msg = sizeof(struct msg_security_struct),
+#ifdef CONFIG_PERF_EVENTS
+ .lbs_perf_event = sizeof(struct perf_event_security_struct),
+#endif
+ .lbs_sock = sizeof(struct sk_security_struct),
.lbs_superblock = sizeof(struct superblock_security_struct),
.lbs_xattr_count = SELINUX_INODE_INIT_XATTRS,
+ .lbs_tun_dev = sizeof(struct tun_security_struct),
+ .lbs_ib = sizeof(struct ib_security_struct),
};
#ifdef CONFIG_PERF_EVENTS
{
struct perf_event_security_struct *perfsec;
- perfsec = kzalloc(sizeof(*perfsec), GFP_KERNEL);
- if (!perfsec)
- return -ENOMEM;
-
+ perfsec = selinux_perf_event(event->security);
perfsec->sid = current_sid();
- event->security = perfsec;
return 0;
}
-static void selinux_perf_event_free(struct perf_event *event)
-{
- struct perf_event_security_struct *perfsec = event->security;
-
- event->security = NULL;
- kfree(perfsec);
-}
-
static int selinux_perf_event_read(struct perf_event *event)
{
struct perf_event_security_struct *perfsec = event->security;
LSM_HOOK_INIT(secmark_refcount_inc, selinux_secmark_refcount_inc),
LSM_HOOK_INIT(secmark_refcount_dec, selinux_secmark_refcount_dec),
LSM_HOOK_INIT(req_classify_flow, selinux_req_classify_flow),
- LSM_HOOK_INIT(tun_dev_free_security, selinux_tun_dev_free_security),
LSM_HOOK_INIT(tun_dev_create, selinux_tun_dev_create),
LSM_HOOK_INIT(tun_dev_attach_queue, selinux_tun_dev_attach_queue),
LSM_HOOK_INIT(tun_dev_attach, selinux_tun_dev_attach),
LSM_HOOK_INIT(ib_pkey_access, selinux_ib_pkey_access),
LSM_HOOK_INIT(ib_endport_manage_subnet,
selinux_ib_endport_manage_subnet),
- LSM_HOOK_INIT(ib_free_security, selinux_ib_free_security),
#endif
#ifdef CONFIG_SECURITY_NETWORK_XFRM
LSM_HOOK_INIT(xfrm_policy_free_security, selinux_xfrm_policy_free),
#endif
#ifdef CONFIG_KEYS
- LSM_HOOK_INIT(key_free, selinux_key_free),
LSM_HOOK_INIT(key_permission, selinux_key_permission),
LSM_HOOK_INIT(key_getsecurity, selinux_key_getsecurity),
#ifdef CONFIG_KEY_NOTIFICATIONS
#ifdef CONFIG_PERF_EVENTS
LSM_HOOK_INIT(perf_event_open, selinux_perf_event_open),
- LSM_HOOK_INIT(perf_event_free, selinux_perf_event_free),
LSM_HOOK_INIT(perf_event_read, selinux_perf_event_read),
LSM_HOOK_INIT(perf_event_write, selinux_perf_event_write),
#endif