]> Git Repo - linux.git/commitdiff
Merge tag 'bpf-next-6.12-struct-fd' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <[email protected]>
Tue, 24 Sep 2024 21:54:26 +0000 (14:54 -0700)
committerLinus Torvalds <[email protected]>
Tue, 24 Sep 2024 21:54:26 +0000 (14:54 -0700)
Pull bpf 'struct fd' updates from Alexei Starovoitov:
 "This includes struct_fd BPF changes from Al and Andrii"

* tag 'bpf-next-6.12-struct-fd' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next:
  bpf: convert bpf_token_create() to CLASS(fd, ...)
  security,bpf: constify struct path in bpf_token_create() LSM hook
  bpf: more trivial fdget() conversions
  bpf: trivial conversions for fdget()
  bpf: switch maps to CLASS(fd, ...)
  bpf: factor out fetching bpf_map from FD and adding it to used_maps list
  bpf: switch fdget_raw() uses to CLASS(fd_raw, ...)
  bpf: convert __bpf_prog_get() to CLASS(fd, ...)

1  2 
include/linux/bpf.h
include/linux/lsm_hook_defs.h
include/linux/security.h
kernel/bpf/btf.c
kernel/bpf/syscall.c
kernel/bpf/verifier.c
net/core/sock_map.c
security/security.c
security/selinux/hooks.c

diff --combined include/linux/bpf.h
index 0c3893c4717110a4b9cd6ae3df39678b2f0e58c9,9f35df07e86d744d94d5ba8d0249001042706646..19d8ca8ac960f75ae8ca37c302d9e1055ef92af4
@@@ -695,11 -695,6 +695,11 @@@ enum bpf_type_flag 
        /* DYNPTR points to xdp_buff */
        DYNPTR_TYPE_XDP         = BIT(16 + BPF_BASE_TYPE_BITS),
  
 +      /* Memory must be aligned on some architectures, used in combination with
 +       * MEM_FIXED_SIZE.
 +       */
 +      MEM_ALIGNED             = BIT(17 + BPF_BASE_TYPE_BITS),
 +
        __BPF_TYPE_FLAG_MAX,
        __BPF_TYPE_LAST_FLAG    = __BPF_TYPE_FLAG_MAX - 1,
  };
@@@ -737,6 -732,8 +737,6 @@@ enum bpf_arg_type 
        ARG_ANYTHING,           /* any (initialized) argument is ok */
        ARG_PTR_TO_SPIN_LOCK,   /* pointer to bpf_spin_lock */
        ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
 -      ARG_PTR_TO_INT,         /* pointer to int */
 -      ARG_PTR_TO_LONG,        /* pointer to long */
        ARG_PTR_TO_SOCKET,      /* pointer to bpf_sock (fullsock) */
        ARG_PTR_TO_BTF_ID,      /* pointer to in-kernel struct */
        ARG_PTR_TO_RINGBUF_MEM, /* pointer to dynamically reserved ringbuf memory */
        ARG_PTR_TO_STACK,       /* pointer to stack */
        ARG_PTR_TO_CONST_STR,   /* pointer to a null terminated read-only string */
        ARG_PTR_TO_TIMER,       /* pointer to bpf_timer */
 -      ARG_PTR_TO_KPTR,        /* pointer to referenced kptr */
 +      ARG_KPTR_XCHG_DEST,     /* pointer to destination that kptrs are bpf_kptr_xchg'd into */
        ARG_PTR_TO_DYNPTR,      /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */
        __BPF_ARG_TYPE_MAX,
  
@@@ -811,12 -808,12 +811,12 @@@ struct bpf_func_proto 
        bool gpl_only;
        bool pkt_access;
        bool might_sleep;
 -      /* set to true if helper follows contract for gcc/llvm
 -       * attribute no_caller_saved_registers:
 +      /* set to true if helper follows contract for llvm
 +       * attribute bpf_fastcall:
         * - void functions do not scratch r0
         * - functions taking N arguments scratch only registers r1-rN
         */
 -      bool allow_nocsr;
 +      bool allow_fastcall;
        enum bpf_return_type ret_type;
        union {
                struct {
@@@ -977,8 -974,6 +977,8 @@@ struct bpf_verifier_ops 
                                struct bpf_insn_access_aux *info);
        int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
                            const struct bpf_prog *prog);
 +      int (*gen_epilogue)(struct bpf_insn *insn, const struct bpf_prog *prog,
 +                          s16 ctx_stack_off);
        int (*gen_ld_abs)(const struct bpf_insn *orig,
                          struct bpf_insn *insn_buf);
        u32 (*convert_ctx_access)(enum bpf_access_type type,
@@@ -2246,7 -2241,16 +2246,16 @@@ void __bpf_obj_drop_impl(void *p, cons
  
  struct bpf_map *bpf_map_get(u32 ufd);
  struct bpf_map *bpf_map_get_with_uref(u32 ufd);
- struct bpf_map *__bpf_map_get(struct fd f);
+ static inline struct bpf_map *__bpf_map_get(struct fd f)
+ {
+       if (fd_empty(f))
+               return ERR_PTR(-EBADF);
+       if (unlikely(fd_file(f)->f_op != &bpf_map_fops))
+               return ERR_PTR(-EINVAL);
+       return fd_file(f)->private_data;
+ }
  void bpf_map_inc(struct bpf_map *map);
  void bpf_map_inc_with_uref(struct bpf_map *map);
  struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref);
@@@ -3203,9 -3207,7 +3212,9 @@@ extern const struct bpf_func_proto bpf_
  extern const struct bpf_func_proto bpf_get_current_comm_proto;
  extern const struct bpf_func_proto bpf_get_stackid_proto;
  extern const struct bpf_func_proto bpf_get_stack_proto;
 +extern const struct bpf_func_proto bpf_get_stack_sleepable_proto;
  extern const struct bpf_func_proto bpf_get_task_stack_proto;
 +extern const struct bpf_func_proto bpf_get_task_stack_sleepable_proto;
  extern const struct bpf_func_proto bpf_get_stackid_proto_pe;
  extern const struct bpf_func_proto bpf_get_stack_proto_pe;
  extern const struct bpf_func_proto bpf_sock_map_update_proto;
@@@ -3213,7 -3215,6 +3222,7 @@@ extern const struct bpf_func_proto bpf_
  extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
  extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto;
  extern const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto;
 +extern const struct bpf_func_proto bpf_current_task_under_cgroup_proto;
  extern const struct bpf_func_proto bpf_msg_redirect_hash_proto;
  extern const struct bpf_func_proto bpf_msg_redirect_map_proto;
  extern const struct bpf_func_proto bpf_sk_redirect_hash_proto;
index 1d59513bf2301c442f3cce37ee90a83c8fd48746,462b553782410c1e715cda5aec4f91744fce07b7..9eca013aa5e1f654050d8981d71543943c347a47
@@@ -48,7 -48,7 +48,7 @@@ LSM_HOOK(int, 0, quota_on, struct dentr
  LSM_HOOK(int, 0, syslog, int type)
  LSM_HOOK(int, 0, settime, const struct timespec64 *ts,
         const struct timezone *tz)
 -LSM_HOOK(int, 1, vm_enough_memory, struct mm_struct *mm, long pages)
 +LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages)
  LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm)
  LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file)
  LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm)
@@@ -114,7 -114,6 +114,7 @@@ LSM_HOOK(int, 0, path_notify, const str
         unsigned int obj_type)
  LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode)
  LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode)
 +LSM_HOOK(void, LSM_RET_VOID, inode_free_security_rcu, void *inode_security)
  LSM_HOOK(int, -EOPNOTSUPP, inode_init_security, struct inode *inode,
         struct inode *dir, const struct qstr *qstr, struct xattr *xattrs,
         int *xattr_count)
@@@ -180,8 -179,6 +180,8 @@@ LSM_HOOK(void, LSM_RET_VOID, inode_gets
  LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new)
  LSM_HOOK(int, -EOPNOTSUPP, inode_copy_up_xattr, struct dentry *src,
         const char *name)
 +LSM_HOOK(int, 0, inode_setintegrity, const struct inode *inode,
 +       enum lsm_integrity_type type, const void *value, size_t size)
  LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir,
         struct kernfs_node *kn)
  LSM_HOOK(int, 0, file_permission, struct file *file, int mask)
@@@ -356,7 -353,8 +356,7 @@@ LSM_HOOK(void, LSM_RET_VOID, secmark_re
  LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_dec, void)
  LSM_HOOK(void, LSM_RET_VOID, req_classify_flow, const struct request_sock *req,
         struct flowi_common *flic)
 -LSM_HOOK(int, 0, tun_dev_alloc_security, void **security)
 -LSM_HOOK(void, LSM_RET_VOID, tun_dev_free_security, void *security)
 +LSM_HOOK(int, 0, tun_dev_alloc_security, void *security)
  LSM_HOOK(int, 0, tun_dev_create, void)
  LSM_HOOK(int, 0, tun_dev_attach_queue, void *security)
  LSM_HOOK(int, 0, tun_dev_attach, struct sock *sk, void *security)
@@@ -376,7 -374,8 +376,7 @@@ LSM_HOOK(int, 0, mptcp_add_subflow, str
  LSM_HOOK(int, 0, ib_pkey_access, void *sec, u64 subnet_prefix, u16 pkey)
  LSM_HOOK(int, 0, ib_endport_manage_subnet, void *sec, const char *dev_name,
         u8 port_num)
 -LSM_HOOK(int, 0, ib_alloc_security, void **sec)
 -LSM_HOOK(void, LSM_RET_VOID, ib_free_security, void *sec)
 +LSM_HOOK(int, 0, ib_alloc_security, void *sec)
  #endif /* CONFIG_SECURITY_INFINIBAND */
  
  #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@@ -404,6 -403,7 +404,6 @@@ LSM_HOOK(int, 0, xfrm_decode_session, s
  #ifdef CONFIG_KEYS
  LSM_HOOK(int, 0, key_alloc, struct key *key, const struct cred *cred,
         unsigned long flags)
 -LSM_HOOK(void, LSM_RET_VOID, key_free, struct key *key)
  LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred,
         enum key_need_perm need_perm)
  LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **buffer)
@@@ -431,7 -431,7 +431,7 @@@ LSM_HOOK(int, 0, bpf_prog_load, struct 
         struct bpf_token *token)
  LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog)
  LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr,
-        struct path *path)
+        const struct path *path)
  LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token)
  LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd)
  LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap)
@@@ -442,6 -442,7 +442,6 @@@ LSM_HOOK(int, 0, locked_down, enum lock
  #ifdef CONFIG_PERF_EVENTS
  LSM_HOOK(int, 0, perf_event_open, struct perf_event_attr *attr, int type)
  LSM_HOOK(int, 0, perf_event_alloc, struct perf_event *event)
 -LSM_HOOK(void, LSM_RET_VOID, perf_event_free, struct perf_event *event)
  LSM_HOOK(int, 0, perf_event_read, struct perf_event *event)
  LSM_HOOK(int, 0, perf_event_write, struct perf_event *event)
  #endif /* CONFIG_PERF_EVENTS */
@@@ -451,10 -452,3 +451,10 @@@ LSM_HOOK(int, 0, uring_override_creds, 
  LSM_HOOK(int, 0, uring_sqpoll, void)
  LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd)
  #endif /* CONFIG_IO_URING */
 +
 +LSM_HOOK(void, LSM_RET_VOID, initramfs_populated, void)
 +
 +LSM_HOOK(int, 0, bdev_alloc_security, struct block_device *bdev)
 +LSM_HOOK(void, LSM_RET_VOID, bdev_free_security, struct block_device *bdev)
 +LSM_HOOK(int, 0, bdev_setintegrity, struct block_device *bdev,
 +       enum lsm_integrity_type type, const void *value, size_t size)
diff --combined include/linux/security.h
index c37c32ebbdcd8736be687414f03ea6934db5cd4d,31523a2c71c4d3fe2b21cc0668304a6f4f1a4d68..b86ec2afc69101e62ded7480a3feb6798d971c62
@@@ -83,18 -83,6 +83,18 @@@ enum lsm_event 
        LSM_POLICY_CHANGE,
  };
  
 +struct dm_verity_digest {
 +      const char *alg;
 +      const u8 *digest;
 +      size_t digest_len;
 +};
 +
 +enum lsm_integrity_type {
 +      LSM_INT_DMVERITY_SIG_VALID,
 +      LSM_INT_DMVERITY_ROOTHASH,
 +      LSM_INT_FSVERITY_BUILTINSIG_VALID,
 +};
 +
  /*
   * These are reasons that can be passed to the security_locked_down()
   * LSM hook. Lockdown reasons that protect kernel integrity (ie, the
@@@ -411,9 -399,6 +411,9 @@@ int security_inode_listsecurity(struct 
  void security_inode_getsecid(struct inode *inode, u32 *secid);
  int security_inode_copy_up(struct dentry *src, struct cred **new);
  int security_inode_copy_up_xattr(struct dentry *src, const char *name);
 +int security_inode_setintegrity(const struct inode *inode,
 +                              enum lsm_integrity_type type, const void *value,
 +                              size_t size);
  int security_kernfs_init_security(struct kernfs_node *kn_dir,
                                  struct kernfs_node *kn);
  int security_file_permission(struct file *file, int mask);
@@@ -524,11 -509,6 +524,11 @@@ int security_inode_getsecctx(struct ino
  int security_locked_down(enum lockdown_reason what);
  int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, u32 *uctx_len,
                      void *val, size_t val_len, u64 id, u64 flags);
 +int security_bdev_alloc(struct block_device *bdev);
 +void security_bdev_free(struct block_device *bdev);
 +int security_bdev_setintegrity(struct block_device *bdev,
 +                             enum lsm_integrity_type type, const void *value,
 +                             size_t size);
  #else /* CONFIG_SECURITY */
  
  static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data)
@@@ -654,7 -634,7 +654,7 @@@ static inline int security_settime64(co
  
  static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
  {
 -      return __vm_enough_memory(mm, pages, cap_vm_enough_memory(mm, pages));
 +      return __vm_enough_memory(mm, pages, !cap_vm_enough_memory(mm, pages));
  }
  
  static inline int security_bprm_creds_for_exec(struct linux_binprm *bprm)
@@@ -1030,13 -1010,6 +1030,13 @@@ static inline int security_inode_copy_u
        return 0;
  }
  
 +static inline int security_inode_setintegrity(const struct inode *inode,
 +                                            enum lsm_integrity_type type,
 +                                            const void *value, size_t size)
 +{
 +      return 0;
 +}
 +
  static inline int security_kernfs_init_security(struct kernfs_node *kn_dir,
                                                struct kernfs_node *kn)
  {
@@@ -1510,23 -1483,6 +1510,23 @@@ static inline int lsm_fill_user_ctx(str
  {
        return -EOPNOTSUPP;
  }
 +
 +static inline int security_bdev_alloc(struct block_device *bdev)
 +{
 +      return 0;
 +}
 +
 +static inline void security_bdev_free(struct block_device *bdev)
 +{
 +}
 +
 +static inline int security_bdev_setintegrity(struct block_device *bdev,
 +                                           enum lsm_integrity_type type,
 +                                           const void *value, size_t size)
 +{
 +      return 0;
 +}
 +
  #endif        /* CONFIG_SECURITY */
  
  #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE)
@@@ -2134,7 -2090,6 +2134,7 @@@ struct dentry *securityfs_create_symlin
                                         const char *target,
                                         const struct inode_operations *iops);
  extern void securityfs_remove(struct dentry *dentry);
 +extern void securityfs_recursive_remove(struct dentry *dentry);
  
  #else /* CONFIG_SECURITYFS */
  
@@@ -2182,7 -2137,7 +2182,7 @@@ extern int security_bpf_prog_load(struc
                                  struct bpf_token *token);
  extern void security_bpf_prog_free(struct bpf_prog *prog);
  extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
-                                    struct path *path);
+                                    const struct path *path);
  extern void security_bpf_token_free(struct bpf_token *token);
  extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
  extern int security_bpf_token_capable(const struct bpf_token *token, int cap);
@@@ -2222,7 -2177,7 +2222,7 @@@ static inline void security_bpf_prog_fr
  { }
  
  static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
-                                    struct path *path)
+                                           const struct path *path)
  {
        return 0;
  }
@@@ -2301,12 -2256,4 +2301,12 @@@ static inline int security_uring_cmd(st
  #endif /* CONFIG_SECURITY */
  #endif /* CONFIG_IO_URING */
  
 +#ifdef CONFIG_SECURITY
 +extern void security_initramfs_populated(void);
 +#else
 +static inline void security_initramfs_populated(void)
 +{
 +}
 +#endif /* CONFIG_SECURITY */
 +
  #endif /* ! __LINUX_SECURITY_H */
diff --combined kernel/bpf/btf.c
index 83bbf935c56289b89757a1eab6da915b83126d4c,c4506d788c858060e60e31a40d9d88211abb1514..75e4fe83c509107374db4651b26f9512b3b45b93
@@@ -212,7 -212,7 +212,7 @@@ enum btf_kfunc_hook 
        BTF_KFUNC_HOOK_TRACING,
        BTF_KFUNC_HOOK_SYSCALL,
        BTF_KFUNC_HOOK_FMODRET,
 -      BTF_KFUNC_HOOK_CGROUP_SKB,
 +      BTF_KFUNC_HOOK_CGROUP,
        BTF_KFUNC_HOOK_SCHED_ACT,
        BTF_KFUNC_HOOK_SK_SKB,
        BTF_KFUNC_HOOK_SOCKET_FILTER,
@@@ -790,7 -790,7 +790,7 @@@ const char *btf_str_by_offset(const str
        return NULL;
  }
  
 -static bool __btf_name_valid(const struct btf *btf, u32 offset)
 +static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
  {
        /* offset must be valid */
        const char *src = btf_str_by_offset(btf, offset);
        return !*src;
  }
  
 -static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
 -{
 -      return __btf_name_valid(btf, offset);
 -}
 -
  /* Allow any printable character in DATASEC names */
  static bool btf_name_valid_section(const struct btf *btf, u32 offset)
  {
        const char *src = btf_str_by_offset(btf, offset);
        const char *src_limit;
  
 +      if (!*src)
 +              return false;
 +
        /* set a limit on identifier length */
        src_limit = src + KSYM_NAME_LEN;
 -      src++;
        while (*src && src < src_limit) {
                if (!isprint(*src))
                        return false;
@@@ -3756,7 -3759,6 +3756,7 @@@ static int btf_find_field(const struct 
        return -EINVAL;
  }
  
 +/* Callers have to ensure the life cycle of btf if it is program BTF */
  static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
                          struct btf_field_info *info)
  {
                field->kptr.dtor = NULL;
                id = info->kptr.type_id;
                kptr_btf = (struct btf *)btf;
 -              btf_get(kptr_btf);
                goto found_dtor;
        }
        if (id < 0)
@@@ -4626,7 -4629,7 +4626,7 @@@ static s32 btf_var_check_meta(struct bt
        }
  
        if (!t->name_off ||
 -          !__btf_name_valid(env->btf, t->name_off)) {
 +          !btf_name_valid_identifier(env->btf, t->name_off)) {
                btf_verifier_log_type(env, t, "Invalid name");
                return -EINVAL;
        }
@@@ -5514,72 -5517,36 +5514,72 @@@ static const char *alloc_obj_fields[] 
  static struct btf_struct_metas *
  btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
  {
 -      union {
 -              struct btf_id_set set;
 -              struct {
 -                      u32 _cnt;
 -                      u32 _ids[ARRAY_SIZE(alloc_obj_fields)];
 -              } _arr;
 -      } aof;
        struct btf_struct_metas *tab = NULL;
 +      struct btf_id_set *aof;
        int i, n, id, ret;
  
        BUILD_BUG_ON(offsetof(struct btf_id_set, cnt) != 0);
        BUILD_BUG_ON(sizeof(struct btf_id_set) != sizeof(u32));
  
 -      memset(&aof, 0, sizeof(aof));
 +      aof = kmalloc(sizeof(*aof), GFP_KERNEL | __GFP_NOWARN);
 +      if (!aof)
 +              return ERR_PTR(-ENOMEM);
 +      aof->cnt = 0;
 +
        for (i = 0; i < ARRAY_SIZE(alloc_obj_fields); i++) {
                /* Try to find whether this special type exists in user BTF, and
                 * if so remember its ID so we can easily find it among members
                 * of structs that we iterate in the next loop.
                 */
 +              struct btf_id_set *new_aof;
 +
                id = btf_find_by_name_kind(btf, alloc_obj_fields[i], BTF_KIND_STRUCT);
                if (id < 0)
                        continue;
 -              aof.set.ids[aof.set.cnt++] = id;
 +
 +              new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
 +                                 GFP_KERNEL | __GFP_NOWARN);
 +              if (!new_aof) {
 +                      ret = -ENOMEM;
 +                      goto free_aof;
 +              }
 +              aof = new_aof;
 +              aof->ids[aof->cnt++] = id;
 +      }
 +
 +      n = btf_nr_types(btf);
 +      for (i = 1; i < n; i++) {
 +              /* Try to find if there are kptrs in user BTF and remember their ID */
 +              struct btf_id_set *new_aof;
 +              struct btf_field_info tmp;
 +              const struct btf_type *t;
 +
 +              t = btf_type_by_id(btf, i);
 +              if (!t) {
 +                      ret = -EINVAL;
 +                      goto free_aof;
 +              }
 +
 +              ret = btf_find_kptr(btf, t, 0, 0, &tmp);
 +              if (ret != BTF_FIELD_FOUND)
 +                      continue;
 +
 +              new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
 +                                 GFP_KERNEL | __GFP_NOWARN);
 +              if (!new_aof) {
 +                      ret = -ENOMEM;
 +                      goto free_aof;
 +              }
 +              aof = new_aof;
 +              aof->ids[aof->cnt++] = i;
        }
  
 -      if (!aof.set.cnt)
 +      if (!aof->cnt) {
 +              kfree(aof);
                return NULL;
 -      sort(&aof.set.ids, aof.set.cnt, sizeof(aof.set.ids[0]), btf_id_cmp_func, NULL);
 +      }
 +      sort(&aof->ids, aof->cnt, sizeof(aof->ids[0]), btf_id_cmp_func, NULL);
  
 -      n = btf_nr_types(btf);
        for (i = 1; i < n; i++) {
                struct btf_struct_metas *new_tab;
                const struct btf_member *member;
                int j, tab_cnt;
  
                t = btf_type_by_id(btf, i);
 -              if (!t) {
 -                      ret = -EINVAL;
 -                      goto free;
 -              }
                if (!__btf_type_is_struct(t))
                        continue;
  
                cond_resched();
  
                for_each_member(j, t, member) {
 -                      if (btf_id_set_contains(&aof.set, member->type))
 +                      if (btf_id_set_contains(aof, member->type))
                                goto parse;
                }
                continue;
                type = &tab->types[tab->cnt];
                type->btf_id = i;
                record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
 -                                                BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT, t->size);
 +                                                BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT |
 +                                                BPF_KPTR, t->size);
                /* The record cannot be unset, treat it as an error if so */
                if (IS_ERR_OR_NULL(record)) {
                        ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
                type->record = record;
                tab->cnt++;
        }
 +      kfree(aof);
        return tab;
  free:
        btf_struct_metas_free(tab);
 +free_aof:
 +      kfree(aof);
        return ERR_PTR(ret);
  }
  
@@@ -6276,11 -6243,12 +6276,11 @@@ static struct btf *btf_parse_module(con
        btf->kernel_btf = true;
        snprintf(btf->name, sizeof(btf->name), "%s", module_name);
  
 -      btf->data = kvmalloc(data_size, GFP_KERNEL | __GFP_NOWARN);
 +      btf->data = kvmemdup(data, data_size, GFP_KERNEL | __GFP_NOWARN);
        if (!btf->data) {
                err = -ENOMEM;
                goto errout;
        }
 -      memcpy(btf->data, data, data_size);
        btf->data_size = data_size;
  
        err = btf_parse_hdr(env);
  
  errout:
        btf_verifier_env_free(env);
 -      if (base_btf != vmlinux_btf)
 +      if (!IS_ERR(base_btf) && base_btf != vmlinux_btf)
                btf_free(base_btf);
        if (btf) {
                kvfree(btf->data);
@@@ -6558,9 -6526,6 +6558,9 @@@ bool btf_ctx_access(int off, int size, 
        if (prog_args_trusted(prog))
                info->reg_type |= PTR_TRUSTED;
  
 +      if (btf_param_match_suffix(btf, &args[arg], "__nullable"))
 +              info->reg_type |= PTR_MAYBE_NULL;
 +
        if (tgt_prog) {
                enum bpf_prog_type tgt_type;
  
@@@ -7711,21 -7676,16 +7711,16 @@@ int btf_new_fd(const union bpf_attr *at
  struct btf *btf_get_by_fd(int fd)
  {
        struct btf *btf;
-       struct fd f;
+       CLASS(fd, f)(fd);
  
-       f = fdget(fd);
-       if (!fd_file(f))
+       if (fd_empty(f))
                return ERR_PTR(-EBADF);
  
-       if (fd_file(f)->f_op != &btf_fops) {
-               fdput(f);
+       if (fd_file(f)->f_op != &btf_fops)
                return ERR_PTR(-EINVAL);
-       }
  
        btf = fd_file(f)->private_data;
        refcount_inc(&btf->refcnt);
-       fdput(f);
  
        return btf;
  }
@@@ -8087,44 -8047,15 +8082,44 @@@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX
  BTF_TRACING_TYPE_xxx
  #undef BTF_TRACING_TYPE
  
 +/* Validate well-formedness of iter argument type.
 + * On success, return positive BTF ID of iter state's STRUCT type.
 + * On error, negative error is returned.
 + */
 +int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx)
 +{
 +      const struct btf_param *arg;
 +      const struct btf_type *t;
 +      const char *name;
 +      int btf_id;
 +
 +      if (btf_type_vlen(func) <= arg_idx)
 +              return -EINVAL;
 +
 +      arg = &btf_params(func)[arg_idx];
 +      t = btf_type_skip_modifiers(btf, arg->type, NULL);
 +      if (!t || !btf_type_is_ptr(t))
 +              return -EINVAL;
 +      t = btf_type_skip_modifiers(btf, t->type, &btf_id);
 +      if (!t || !__btf_type_is_struct(t))
 +              return -EINVAL;
 +
 +      name = btf_name_by_offset(btf, t->name_off);
 +      if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1))
 +              return -EINVAL;
 +
 +      return btf_id;
 +}
 +
  static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name,
                                 const struct btf_type *func, u32 func_flags)
  {
        u32 flags = func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
 -      const char *name, *sfx, *iter_name;
 -      const struct btf_param *arg;
 +      const char *sfx, *iter_name;
        const struct btf_type *t;
        char exp_name[128];
        u32 nr_args;
 +      int btf_id;
  
        /* exactly one of KF_ITER_{NEW,NEXT,DESTROY} can be set */
        if (!flags || (flags & (flags - 1)))
        if (nr_args < 1)
                return -EINVAL;
  
 -      arg = &btf_params(func)[0];
 -      t = btf_type_skip_modifiers(btf, arg->type, NULL);
 -      if (!t || !btf_type_is_ptr(t))
 -              return -EINVAL;
 -      t = btf_type_skip_modifiers(btf, t->type, NULL);
 -      if (!t || !__btf_type_is_struct(t))
 -              return -EINVAL;
 -
 -      name = btf_name_by_offset(btf, t->name_off);
 -      if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1))
 -              return -EINVAL;
 +      btf_id = btf_check_iter_arg(btf, func, 0);
 +      if (btf_id < 0)
 +              return btf_id;
  
        /* sizeof(struct bpf_iter_<type>) should be a multiple of 8 to
         * fit nicely in stack slots
         */
 +      t = btf_type_by_id(btf, btf_id);
        if (t->size == 0 || (t->size % 8))
                return -EINVAL;
  
        /* validate bpf_iter_<type>_{new,next,destroy}(struct bpf_iter_<type> *)
         * naming pattern
         */
 -      iter_name = name + sizeof(ITER_PREFIX) - 1;
 +      iter_name = btf_name_by_offset(btf, t->name_off) + sizeof(ITER_PREFIX) - 1;
        if (flags & KF_ITER_NEW)
                sfx = "new";
        else if (flags & KF_ITER_NEXT)
@@@ -8364,19 -8302,13 +8359,19 @@@ static int bpf_prog_type_to_kfunc_hook(
        case BPF_PROG_TYPE_STRUCT_OPS:
                return BTF_KFUNC_HOOK_STRUCT_OPS;
        case BPF_PROG_TYPE_TRACING:
 +      case BPF_PROG_TYPE_TRACEPOINT:
 +      case BPF_PROG_TYPE_PERF_EVENT:
        case BPF_PROG_TYPE_LSM:
                return BTF_KFUNC_HOOK_TRACING;
        case BPF_PROG_TYPE_SYSCALL:
                return BTF_KFUNC_HOOK_SYSCALL;
        case BPF_PROG_TYPE_CGROUP_SKB:
 +      case BPF_PROG_TYPE_CGROUP_SOCK:
 +      case BPF_PROG_TYPE_CGROUP_DEVICE:
        case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
 -              return BTF_KFUNC_HOOK_CGROUP_SKB;
 +      case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 +      case BPF_PROG_TYPE_CGROUP_SYSCTL:
 +              return BTF_KFUNC_HOOK_CGROUP;
        case BPF_PROG_TYPE_SCHED_ACT:
                return BTF_KFUNC_HOOK_SCHED_ACT;
        case BPF_PROG_TYPE_SK_SKB:
@@@ -8952,7 -8884,6 +8947,7 @@@ int bpf_core_apply(struct bpf_core_ctx 
        struct bpf_core_cand_list cands = {};
        struct bpf_core_relo_res targ_res;
        struct bpf_core_spec *specs;
 +      const struct btf_type *type;
        int err;
  
        /* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5"
        if (!specs)
                return -ENOMEM;
  
 +      type = btf_type_by_id(ctx->btf, relo->type_id);
 +      if (!type) {
 +              bpf_log(ctx->log, "relo #%u: bad type id %u\n",
 +                      relo_idx, relo->type_id);
 +              return -EINVAL;
 +      }
 +
        if (need_cands) {
                struct bpf_cand_cache *cc;
                int i;
diff --combined kernel/bpf/syscall.c
index 8386f25bc532cbb9ff60a6afcd75e7e0a78fefc8,65dcd92d0b2c5d80cb42029fbc883c9a1543d4e6..a8f1808a1ca54371d5b1fbc85c0290055e62bd4a
@@@ -550,8 -550,7 +550,8 @@@ void btf_record_free(struct btf_record 
                case BPF_KPTR_PERCPU:
                        if (rec->fields[i].kptr.module)
                                module_put(rec->fields[i].kptr.module);
 -                      btf_put(rec->fields[i].kptr.btf);
 +                      if (btf_is_kernel(rec->fields[i].kptr.btf))
 +                              btf_put(rec->fields[i].kptr.btf);
                        break;
                case BPF_LIST_HEAD:
                case BPF_LIST_NODE:
@@@ -597,8 -596,7 +597,8 @@@ struct btf_record *btf_record_dup(cons
                case BPF_KPTR_UNREF:
                case BPF_KPTR_REF:
                case BPF_KPTR_PERCPU:
 -                      btf_get(fields[i].kptr.btf);
 +                      if (btf_is_kernel(fields[i].kptr.btf))
 +                              btf_get(fields[i].kptr.btf);
                        if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) {
                                ret = -ENXIO;
                                goto free;
@@@ -735,11 -733,15 +735,11 @@@ void bpf_obj_free_fields(const struct b
        }
  }
  
 -/* called from workqueue */
 -static void bpf_map_free_deferred(struct work_struct *work)
 +static void bpf_map_free(struct bpf_map *map)
  {
 -      struct bpf_map *map = container_of(work, struct bpf_map, work);
        struct btf_record *rec = map->record;
        struct btf *btf = map->btf;
  
 -      security_bpf_map_free(map);
 -      bpf_map_release_memcg(map);
        /* implementation dependent freeing */
        map->ops->map_free(map);
        /* Delay freeing of btf_record for maps, as map_free
        btf_put(btf);
  }
  
 +/* called from workqueue */
 +static void bpf_map_free_deferred(struct work_struct *work)
 +{
 +      struct bpf_map *map = container_of(work, struct bpf_map, work);
 +
 +      security_bpf_map_free(map);
 +      bpf_map_release_memcg(map);
 +      bpf_map_free(map);
 +}
 +
  static void bpf_map_put_uref(struct bpf_map *map)
  {
        if (atomic64_dec_and_test(&map->usercnt)) {
@@@ -1419,27 -1411,13 +1419,12 @@@ static int map_create(union bpf_attr *a
  free_map_sec:
        security_bpf_map_free(map);
  free_map:
 -      btf_put(map->btf);
 -      map->ops->map_free(map);
 +      bpf_map_free(map);
  put_token:
        bpf_token_put(token);
        return err;
  }
  
- /* if error is returned, fd is released.
-  * On success caller should complete fd access with matching fdput()
-  */
- struct bpf_map *__bpf_map_get(struct fd f)
- {
-       if (!fd_file(f))
-               return ERR_PTR(-EBADF);
-       if (fd_file(f)->f_op != &bpf_map_fops) {
-               fdput(f);
-               return ERR_PTR(-EINVAL);
-       }
-       return fd_file(f)->private_data;
- }
  void bpf_map_inc(struct bpf_map *map)
  {
        atomic64_inc(&map->refcnt);
@@@ -1455,15 -1433,11 +1440,11 @@@ EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref
  
  struct bpf_map *bpf_map_get(u32 ufd)
  {
-       struct fd f = fdget(ufd);
-       struct bpf_map *map;
+       CLASS(fd, f)(ufd);
+       struct bpf_map *map = __bpf_map_get(f);
  
-       map = __bpf_map_get(f);
-       if (IS_ERR(map))
-               return map;
-       bpf_map_inc(map);
-       fdput(f);
+       if (!IS_ERR(map))
+               bpf_map_inc(map);
  
        return map;
  }
@@@ -1471,15 -1445,11 +1452,11 @@@ EXPORT_SYMBOL(bpf_map_get)
  
  struct bpf_map *bpf_map_get_with_uref(u32 ufd)
  {
-       struct fd f = fdget(ufd);
-       struct bpf_map *map;
+       CLASS(fd, f)(ufd);
+       struct bpf_map *map = __bpf_map_get(f);
  
-       map = __bpf_map_get(f);
-       if (IS_ERR(map))
-               return map;
-       bpf_map_inc_with_uref(map);
-       fdput(f);
+       if (!IS_ERR(map))
+               bpf_map_inc_with_uref(map);
  
        return map;
  }
@@@ -1544,11 -1514,9 +1521,9 @@@ static int map_lookup_elem(union bpf_at
  {
        void __user *ukey = u64_to_user_ptr(attr->key);
        void __user *uvalue = u64_to_user_ptr(attr->value);
-       int ufd = attr->map_fd;
        struct bpf_map *map;
        void *key, *value;
        u32 value_size;
-       struct fd f;
        int err;
  
        if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
        if (attr->flags & ~BPF_F_LOCK)
                return -EINVAL;
  
-       f = fdget(ufd);
+       CLASS(fd, f)(attr->map_fd);
        map = __bpf_map_get(f);
        if (IS_ERR(map))
                return PTR_ERR(map);
-       if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
-               err = -EPERM;
-               goto err_put;
-       }
+       if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ))
+               return -EPERM;
  
        if ((attr->flags & BPF_F_LOCK) &&
-           !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
-               err = -EINVAL;
-               goto err_put;
-       }
+           !btf_record_has_field(map->record, BPF_SPIN_LOCK))
+               return -EINVAL;
  
        key = __bpf_copy_key(ukey, map->key_size);
-       if (IS_ERR(key)) {
-               err = PTR_ERR(key);
-               goto err_put;
-       }
+       if (IS_ERR(key))
+               return PTR_ERR(key);
  
        value_size = bpf_map_value_size(map);
  
@@@ -1607,8 -1569,6 +1576,6 @@@ free_value
        kvfree(value);
  free_key:
        kvfree(key);
- err_put:
-       fdput(f);
        return err;
  }
  
@@@ -1619,17 -1579,15 +1586,15 @@@ static int map_update_elem(union bpf_at
  {
        bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel);
        bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel);
-       int ufd = attr->map_fd;
        struct bpf_map *map;
        void *key, *value;
        u32 value_size;
-       struct fd f;
        int err;
  
        if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
                return -EINVAL;
  
-       f = fdget(ufd);
+       CLASS(fd, f)(attr->map_fd);
        map = __bpf_map_get(f);
        if (IS_ERR(map))
                return PTR_ERR(map);
@@@ -1667,7 -1625,6 +1632,6 @@@ free_key
        kvfree(key);
  err_put:
        bpf_map_write_active_dec(map);
-       fdput(f);
        return err;
  }
  
  static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr)
  {
        bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel);
-       int ufd = attr->map_fd;
        struct bpf_map *map;
-       struct fd f;
        void *key;
        int err;
  
        if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
                return -EINVAL;
  
-       f = fdget(ufd);
+       CLASS(fd, f)(attr->map_fd);
        map = __bpf_map_get(f);
        if (IS_ERR(map))
                return PTR_ERR(map);
@@@ -1722,7 -1677,6 +1684,6 @@@ out
        kvfree(key);
  err_put:
        bpf_map_write_active_dec(map);
-       fdput(f);
        return err;
  }
  
@@@ -1733,30 -1687,24 +1694,24 @@@ static int map_get_next_key(union bpf_a
  {
        void __user *ukey = u64_to_user_ptr(attr->key);
        void __user *unext_key = u64_to_user_ptr(attr->next_key);
-       int ufd = attr->map_fd;
        struct bpf_map *map;
        void *key, *next_key;
-       struct fd f;
        int err;
  
        if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
                return -EINVAL;
  
-       f = fdget(ufd);
+       CLASS(fd, f)(attr->map_fd);
        map = __bpf_map_get(f);
        if (IS_ERR(map))
                return PTR_ERR(map);
-       if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
-               err = -EPERM;
-               goto err_put;
-       }
+       if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ))
+               return -EPERM;
  
        if (ukey) {
                key = __bpf_copy_key(ukey, map->key_size);
-               if (IS_ERR(key)) {
-                       err = PTR_ERR(key);
-                       goto err_put;
-               }
+               if (IS_ERR(key))
+                       return PTR_ERR(key);
        } else {
                key = NULL;
        }
@@@ -1788,8 -1736,6 +1743,6 @@@ free_next_key
        kvfree(next_key);
  free_key:
        kvfree(key);
- err_put:
-       fdput(f);
        return err;
  }
  
@@@ -2018,11 -1964,9 +1971,9 @@@ static int map_lookup_and_delete_elem(u
  {
        void __user *ukey = u64_to_user_ptr(attr->key);
        void __user *uvalue = u64_to_user_ptr(attr->value);
-       int ufd = attr->map_fd;
        struct bpf_map *map;
        void *key, *value;
        u32 value_size;
-       struct fd f;
        int err;
  
        if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
        if (attr->flags & ~BPF_F_LOCK)
                return -EINVAL;
  
-       f = fdget(ufd);
+       CLASS(fd, f)(attr->map_fd);
        map = __bpf_map_get(f);
        if (IS_ERR(map))
                return PTR_ERR(map);
@@@ -2101,7 -2045,6 +2052,6 @@@ free_key
        kvfree(key);
  err_put:
        bpf_map_write_active_dec(map);
-       fdput(f);
        return err;
  }
  
  
  static int map_freeze(const union bpf_attr *attr)
  {
-       int err = 0, ufd = attr->map_fd;
+       int err = 0;
        struct bpf_map *map;
-       struct fd f;
  
        if (CHECK_ATTR(BPF_MAP_FREEZE))
                return -EINVAL;
  
-       f = fdget(ufd);
+       CLASS(fd, f)(attr->map_fd);
        map = __bpf_map_get(f);
        if (IS_ERR(map))
                return PTR_ERR(map);
  
-       if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record)) {
-               fdput(f);
+       if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record))
                return -ENOTSUPP;
-       }
  
-       if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
-               fdput(f);
+       if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE))
                return -EPERM;
-       }
  
        mutex_lock(&map->freeze_mutex);
        if (bpf_map_write_active(map)) {
        WRITE_ONCE(map->frozen, true);
  err_put:
        mutex_unlock(&map->freeze_mutex);
-       fdput(f);
        return err;
  }
  
@@@ -2414,18 -2351,6 +2358,6 @@@ int bpf_prog_new_fd(struct bpf_prog *pr
                                O_RDWR | O_CLOEXEC);
  }
  
- static struct bpf_prog *____bpf_prog_get(struct fd f)
- {
-       if (!fd_file(f))
-               return ERR_PTR(-EBADF);
-       if (fd_file(f)->f_op != &bpf_prog_fops) {
-               fdput(f);
-               return ERR_PTR(-EINVAL);
-       }
-       return fd_file(f)->private_data;
- }
  void bpf_prog_add(struct bpf_prog *prog, int i)
  {
        atomic64_add(i, &prog->aux->refcnt);
@@@ -2481,20 -2406,19 +2413,19 @@@ bool bpf_prog_get_ok(struct bpf_prog *p
  static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
                                       bool attach_drv)
  {
-       struct fd f = fdget(ufd);
+       CLASS(fd, f)(ufd);
        struct bpf_prog *prog;
  
-       prog = ____bpf_prog_get(f);
-       if (IS_ERR(prog))
-               return prog;
-       if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) {
-               prog = ERR_PTR(-EINVAL);
-               goto out;
-       }
+       if (fd_empty(f))
+               return ERR_PTR(-EBADF);
+       if (fd_file(f)->f_op != &bpf_prog_fops)
+               return ERR_PTR(-EINVAL);
+       prog = fd_file(f)->private_data;
+       if (!bpf_prog_get_ok(prog, attach_type, attach_drv))
+               return ERR_PTR(-EINVAL);
  
        bpf_prog_inc(prog);
- out:
-       fdput(f);
        return prog;
  }
  
@@@ -3263,20 -3187,16 +3194,16 @@@ int bpf_link_new_fd(struct bpf_link *li
  
  struct bpf_link *bpf_link_get_from_fd(u32 ufd)
  {
-       struct fd f = fdget(ufd);
+       CLASS(fd, f)(ufd);
        struct bpf_link *link;
  
-       if (!fd_file(f))
+       if (fd_empty(f))
                return ERR_PTR(-EBADF);
-       if (fd_file(f)->f_op != &bpf_link_fops && fd_file(f)->f_op != &bpf_link_fops_poll) {
-               fdput(f);
+       if (fd_file(f)->f_op != &bpf_link_fops && fd_file(f)->f_op != &bpf_link_fops_poll)
                return ERR_PTR(-EINVAL);
-       }
  
        link = fd_file(f)->private_data;
        bpf_link_inc(link);
-       fdput(f);
        return link;
  }
  EXPORT_SYMBOL(bpf_link_get_from_fd);
@@@ -4981,33 -4901,25 +4908,25 @@@ static int bpf_link_get_info_by_fd(stru
  static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
                                  union bpf_attr __user *uattr)
  {
-       int ufd = attr->info.bpf_fd;
-       struct fd f;
-       int err;
        if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
                return -EINVAL;
  
-       f = fdget(ufd);
-       if (!fd_file(f))
+       CLASS(fd, f)(attr->info.bpf_fd);
+       if (fd_empty(f))
                return -EBADFD;
  
        if (fd_file(f)->f_op == &bpf_prog_fops)
-               err = bpf_prog_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr,
+               return bpf_prog_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr,
                                              uattr);
        else if (fd_file(f)->f_op == &bpf_map_fops)
-               err = bpf_map_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr,
+               return bpf_map_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr,
                                             uattr);
        else if (fd_file(f)->f_op == &btf_fops)
-               err = bpf_btf_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, uattr);
+               return bpf_btf_get_info_by_fd(fd_file(f), fd_file(f)->private_data, attr, uattr);
        else if (fd_file(f)->f_op == &bpf_link_fops || fd_file(f)->f_op == &bpf_link_fops_poll)
-               err = bpf_link_get_info_by_fd(fd_file(f), fd_file(f)->private_data,
+               return bpf_link_get_info_by_fd(fd_file(f), fd_file(f)->private_data,
                                              attr, uattr);
-       else
-               err = -EINVAL;
-       fdput(f);
-       return err;
+       return -EINVAL;
  }
  
  #define BPF_BTF_LOAD_LAST_FIELD btf_token_fd
@@@ -5195,14 -5107,13 +5114,13 @@@ static int bpf_map_do_batch(const unio
                         cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH;
        bool has_write = cmd != BPF_MAP_LOOKUP_BATCH;
        struct bpf_map *map;
-       int err, ufd;
-       struct fd f;
+       int err;
  
        if (CHECK_ATTR(BPF_MAP_BATCH))
                return -EINVAL;
  
-       ufd = attr->batch.map_fd;
-       f = fdget(ufd);
+       CLASS(fd, f)(attr->batch.map_fd);
        map = __bpf_map_get(f);
        if (IS_ERR(map))
                return PTR_ERR(map);
@@@ -5230,7 -5141,6 +5148,6 @@@ err_put
                maybe_wait_bpf_programs(map);
                bpf_map_write_active_dec(map);
        }
-       fdput(f);
        return err;
  }
  
@@@ -5675,7 -5585,7 +5592,7 @@@ static int token_create(union bpf_attr 
        return bpf_token_create(attr);
  }
  
 -static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
 +static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
  {
        union bpf_attr attr;
        int err;
@@@ -5939,7 -5849,6 +5856,7 @@@ static const struct bpf_func_proto bpf_
  
  BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res)
  {
 +      *res = 0;
        if (flags)
                return -EINVAL;
  
@@@ -5960,8 -5869,7 +5877,8 @@@ static const struct bpf_func_proto bpf_
        .arg1_type      = ARG_PTR_TO_MEM,
        .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
        .arg3_type      = ARG_ANYTHING,
 -      .arg4_type      = ARG_PTR_TO_LONG,
 +      .arg4_type      = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
 +      .arg4_size      = sizeof(u64),
  };
  
  static const struct bpf_func_proto *
diff --combined kernel/bpf/verifier.c
index dd86282ccaa4a0e9ab3e4e67dad01d738528b225,e3932f8ce10a39fe7d5a02a0bf178e13bd032318..9a7ed527e47e343070cf9ccb899ee1fc6fcfdd49
@@@ -28,8 -28,6 +28,8 @@@
  #include <linux/cpumask.h>
  #include <linux/bpf_mem_alloc.h>
  #include <net/xdp.h>
 +#include <linux/trace_events.h>
 +#include <linux/kallsyms.h>
  
  #include "disasm.h"
  
@@@ -385,6 -383,11 +385,6 @@@ static void verbose_invalid_scalar(stru
        verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval);
  }
  
 -static bool type_may_be_null(u32 type)
 -{
 -      return type & PTR_MAYBE_NULL;
 -}
 -
  static bool reg_not_null(const struct bpf_reg_state *reg)
  {
        enum bpf_reg_type type;
@@@ -4576,28 -4579,28 +4576,28 @@@ static int get_reg_width(struct bpf_reg
        return fls64(reg->umax_value);
  }
  
 -/* See comment for mark_nocsr_pattern_for_call() */
 -static void check_nocsr_stack_contract(struct bpf_verifier_env *env, struct bpf_func_state *state,
 -                                     int insn_idx, int off)
 +/* See comment for mark_fastcall_pattern_for_call() */
 +static void check_fastcall_stack_contract(struct bpf_verifier_env *env,
 +                                        struct bpf_func_state *state, int insn_idx, int off)
  {
        struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
        struct bpf_insn_aux_data *aux = env->insn_aux_data;
        int i;
  
 -      if (subprog->nocsr_stack_off <= off || aux[insn_idx].nocsr_pattern)
 +      if (subprog->fastcall_stack_off <= off || aux[insn_idx].fastcall_pattern)
                return;
 -      /* access to the region [max_stack_depth .. nocsr_stack_off)
 -       * from something that is not a part of the nocsr pattern,
 -       * disable nocsr rewrites for current subprogram by setting
 -       * nocsr_stack_off to a value smaller than any possible offset.
 +      /* access to the region [max_stack_depth .. fastcall_stack_off)
 +       * from something that is not a part of the fastcall pattern,
 +       * disable fastcall rewrites for current subprogram by setting
 +       * fastcall_stack_off to a value smaller than any possible offset.
         */
 -      subprog->nocsr_stack_off = S16_MIN;
 -      /* reset nocsr aux flags within subprogram,
 +      subprog->fastcall_stack_off = S16_MIN;
 +      /* reset fastcall aux flags within subprogram,
         * happens at most once per subprogram
         */
        for (i = subprog->start; i < (subprog + 1)->start; ++i) {
 -              aux[i].nocsr_spills_num = 0;
 -              aux[i].nocsr_pattern = 0;
 +              aux[i].fastcall_spills_num = 0;
 +              aux[i].fastcall_pattern = 0;
        }
  }
  
@@@ -4649,7 -4652,7 +4649,7 @@@ static int check_stack_write_fixed_off(
        if (err)
                return err;
  
 -      check_nocsr_stack_contract(env, state, insn_idx, off);
 +      check_fastcall_stack_contract(env, state, insn_idx, off);
        mark_stack_slot_scratched(env, spi);
        if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
                bool reg_value_fits;
@@@ -4784,7 -4787,7 +4784,7 @@@ static int check_stack_write_var_off(st
                        return err;
        }
  
 -      check_nocsr_stack_contract(env, state, insn_idx, min_off);
 +      check_fastcall_stack_contract(env, state, insn_idx, min_off);
        /* Variable offset writes destroy any spilled pointers in range. */
        for (i = min_off; i < max_off; i++) {
                u8 new_type, *stype;
@@@ -4923,7 -4926,7 +4923,7 @@@ static int check_stack_read_fixed_off(s
        reg = &reg_state->stack[spi].spilled_ptr;
  
        mark_stack_slot_scratched(env, spi);
 -      check_nocsr_stack_contract(env, state, env->insn_idx, off);
 +      check_fastcall_stack_contract(env, state, env->insn_idx, off);
  
        if (is_spilled_reg(&reg_state->stack[spi])) {
                u8 spill_size = 1;
@@@ -5084,7 -5087,7 +5084,7 @@@ static int check_stack_read_var_off(str
        min_off = reg->smin_value + off;
        max_off = reg->smax_value + off;
        mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
 -      check_nocsr_stack_contract(env, ptr_state, env->insn_idx, min_off);
 +      check_fastcall_stack_contract(env, ptr_state, env->insn_idx, min_off);
        return 0;
  }
  
@@@ -6801,13 -6804,13 +6801,13 @@@ static int check_stack_slot_within_boun
        struct bpf_insn_aux_data *aux = &env->insn_aux_data[env->insn_idx];
        int min_valid_off, max_bpf_stack;
  
 -      /* If accessing instruction is a spill/fill from nocsr pattern,
 +      /* If accessing instruction is a spill/fill from bpf_fastcall pattern,
         * add room for all caller saved registers below MAX_BPF_STACK.
 -       * In case if nocsr rewrite won't happen maximal stack depth
 +       * In case if bpf_fastcall rewrite won't happen maximal stack depth
         * would be checked by check_max_stack_depth_subprog().
         */
        max_bpf_stack = MAX_BPF_STACK;
 -      if (aux->nocsr_pattern)
 +      if (aux->fastcall_pattern)
                max_bpf_stack += CALLER_SAVED_REGS * BPF_REG_SIZE;
  
        if (t == BPF_WRITE || env->allow_uninit_stack)
@@@ -7800,38 -7803,29 +7800,38 @@@ static int process_kptr_func(struct bpf
                             struct bpf_call_arg_meta *meta)
  {
        struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
 -      struct bpf_map *map_ptr = reg->map_ptr;
        struct btf_field *kptr_field;
 +      struct bpf_map *map_ptr;
 +      struct btf_record *rec;
        u32 kptr_off;
  
 +      if (type_is_ptr_alloc_obj(reg->type)) {
 +              rec = reg_btf_record(reg);
 +      } else { /* PTR_TO_MAP_VALUE */
 +              map_ptr = reg->map_ptr;
 +              if (!map_ptr->btf) {
 +                      verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
 +                              map_ptr->name);
 +                      return -EINVAL;
 +              }
 +              rec = map_ptr->record;
 +              meta->map_ptr = map_ptr;
 +      }
 +
        if (!tnum_is_const(reg->var_off)) {
                verbose(env,
                        "R%d doesn't have constant offset. kptr has to be at the constant offset\n",
                        regno);
                return -EINVAL;
        }
 -      if (!map_ptr->btf) {
 -              verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
 -                      map_ptr->name);
 -              return -EINVAL;
 -      }
 -      if (!btf_record_has_field(map_ptr->record, BPF_KPTR)) {
 -              verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
 +
 +      if (!btf_record_has_field(rec, BPF_KPTR)) {
 +              verbose(env, "R%d has no valid kptr\n", regno);
                return -EINVAL;
        }
  
 -      meta->map_ptr = map_ptr;
        kptr_off = reg->off + reg->var_off.value;
 -      kptr_field = btf_record_find(map_ptr->record, kptr_off, BPF_KPTR);
 +      kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR);
        if (!kptr_field) {
                verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
                return -EACCES;
@@@ -7976,17 -7970,12 +7976,17 @@@ static bool is_iter_destroy_kfunc(struc
        return meta->kfunc_flags & KF_ITER_DESTROY;
  }
  
 -static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg)
 +static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx,
 +                            const struct btf_param *arg)
  {
        /* btf_check_iter_kfuncs() guarantees that first argument of any iter
         * kfunc is iter state pointer
         */
 -      return arg == 0 && is_iter_kfunc(meta);
 +      if (is_iter_kfunc(meta))
 +              return arg_idx == 0;
 +
 +      /* iter passed as an argument to a generic kfunc */
 +      return btf_param_match_suffix(meta->btf, arg, "__iter");
  }
  
  static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx,
  {
        struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
        const struct btf_type *t;
 -      const struct btf_param *arg;
 -      int spi, err, i, nr_slots;
 -      u32 btf_id;
 +      int spi, err, i, nr_slots, btf_id;
  
 -      /* btf_check_iter_kfuncs() ensures we don't need to validate anything here */
 -      arg = &btf_params(meta->func_proto)[0];
 -      t = btf_type_skip_modifiers(meta->btf, arg->type, NULL);        /* PTR */
 -      t = btf_type_skip_modifiers(meta->btf, t->type, &btf_id);       /* STRUCT */
 +      /* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs()
 +       * ensures struct convention, so we wouldn't need to do any BTF
 +       * validation here. But given iter state can be passed as a parameter
 +       * to any kfunc, if arg has "__iter" suffix, we need to be a bit more
 +       * conservative here.
 +       */
 +      btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, regno - 1);
 +      if (btf_id < 0) {
 +              verbose(env, "expected valid iter pointer as arg #%d\n", regno);
 +              return -EINVAL;
 +      }
 +      t = btf_type_by_id(meta->btf, btf_id);
        nr_slots = t->size / BPF_REG_SIZE;
  
        if (is_iter_new_kfunc(meta)) {
                if (err)
                        return err;
        } else {
 -              /* iter_next() or iter_destroy() expect initialized iter state*/
 +              /* iter_next() or iter_destroy(), as well as any kfunc
 +               * accepting iter argument, expect initialized iter state
 +               */
                err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots);
                switch (err) {
                case 0:
@@@ -8145,15 -8126,6 +8145,15 @@@ static int widen_imprecise_scalars(stru
        return 0;
  }
  
 +static struct bpf_reg_state *get_iter_from_state(struct bpf_verifier_state *cur_st,
 +                                               struct bpf_kfunc_call_arg_meta *meta)
 +{
 +      int iter_frameno = meta->iter.frameno;
 +      int iter_spi = meta->iter.spi;
 +
 +      return &cur_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
 +}
 +
  /* process_iter_next_call() is called when verifier gets to iterator's next
   * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
   * to it as just "iter_next()" in comments below.
@@@ -8238,10 -8210,12 +8238,10 @@@ static int process_iter_next_call(struc
        struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
        struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
        struct bpf_reg_state *cur_iter, *queued_iter;
 -      int iter_frameno = meta->iter.frameno;
 -      int iter_spi = meta->iter.spi;
  
        BTF_TYPE_EMIT(struct bpf_iter);
  
 -      cur_iter = &env->cur_state->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
 +      cur_iter = get_iter_from_state(cur_st, meta);
  
        if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
            cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
                if (!queued_st)
                        return -ENOMEM;
  
 -              queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
 +              queued_iter = get_iter_from_state(queued_st, meta);
                queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
                queued_iter->iter.depth++;
                if (prev_st)
@@@ -8293,12 -8267,6 +8293,12 @@@ static bool arg_type_is_mem_size(enum b
               type == ARG_CONST_SIZE_OR_ZERO;
  }
  
 +static bool arg_type_is_raw_mem(enum bpf_arg_type type)
 +{
 +      return base_type(type) == ARG_PTR_TO_MEM &&
 +             type & MEM_UNINIT;
 +}
 +
  static bool arg_type_is_release(enum bpf_arg_type type)
  {
        return type & OBJ_RELEASE;
@@@ -8309,6 -8277,16 +8309,6 @@@ static bool arg_type_is_dynptr(enum bpf
        return base_type(type) == ARG_PTR_TO_DYNPTR;
  }
  
 -static int int_ptr_type_to_size(enum bpf_arg_type type)
 -{
 -      if (type == ARG_PTR_TO_INT)
 -              return sizeof(u32);
 -      else if (type == ARG_PTR_TO_LONG)
 -              return sizeof(u64);
 -
 -      return -EINVAL;
 -}
 -
  static int resolve_map_arg_type(struct bpf_verifier_env *env,
                                 const struct bpf_call_arg_meta *meta,
                                 enum bpf_arg_type *arg_type)
@@@ -8381,6 -8359,16 +8381,6 @@@ static const struct bpf_reg_types mem_t
        },
  };
  
 -static const struct bpf_reg_types int_ptr_types = {
 -      .types = {
 -              PTR_TO_STACK,
 -              PTR_TO_PACKET,
 -              PTR_TO_PACKET_META,
 -              PTR_TO_MAP_KEY,
 -              PTR_TO_MAP_VALUE,
 -      },
 -};
 -
  static const struct bpf_reg_types spin_lock_types = {
        .types = {
                PTR_TO_MAP_VALUE,
@@@ -8411,12 -8399,7 +8411,12 @@@ static const struct bpf_reg_types func_
  static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
  static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
  static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
 -static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
 +static const struct bpf_reg_types kptr_xchg_dest_types = {
 +      .types = {
 +              PTR_TO_MAP_VALUE,
 +              PTR_TO_BTF_ID | MEM_ALLOC
 +      }
 +};
  static const struct bpf_reg_types dynptr_types = {
        .types = {
                PTR_TO_STACK,
@@@ -8441,12 -8424,14 +8441,12 @@@ static const struct bpf_reg_types *comp
        [ARG_PTR_TO_SPIN_LOCK]          = &spin_lock_types,
        [ARG_PTR_TO_MEM]                = &mem_types,
        [ARG_PTR_TO_RINGBUF_MEM]        = &ringbuf_mem_types,
 -      [ARG_PTR_TO_INT]                = &int_ptr_types,
 -      [ARG_PTR_TO_LONG]               = &int_ptr_types,
        [ARG_PTR_TO_PERCPU_BTF_ID]      = &percpu_btf_ptr_types,
        [ARG_PTR_TO_FUNC]               = &func_ptr_types,
        [ARG_PTR_TO_STACK]              = &stack_ptr_types,
        [ARG_PTR_TO_CONST_STR]          = &const_str_ptr_types,
        [ARG_PTR_TO_TIMER]              = &timer_types,
 -      [ARG_PTR_TO_KPTR]               = &kptr_types,
 +      [ARG_KPTR_XCHG_DEST]            = &kptr_xchg_dest_types,
        [ARG_PTR_TO_DYNPTR]             = &dynptr_types,
  };
  
@@@ -8485,8 -8470,7 +8485,8 @@@ static int check_reg_type(struct bpf_ve
        if (base_type(arg_type) == ARG_PTR_TO_MEM)
                type &= ~DYNPTR_TYPE_FLAG_MASK;
  
 -      if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) {
 +      /* Local kptr types are allowed as the source argument of bpf_kptr_xchg */
 +      if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && regno == BPF_REG_2) {
                type &= ~MEM_ALLOC;
                type &= ~MEM_PERCPU;
        }
@@@ -8579,8 -8563,7 +8579,8 @@@ found
                        verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
                        return -EFAULT;
                }
 -              if (meta->func_id == BPF_FUNC_kptr_xchg) {
 +              /* Check if local kptr in src arg matches kptr in dst arg */
 +              if (meta->func_id == BPF_FUNC_kptr_xchg && regno == BPF_REG_2) {
                        if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
                                return -EACCES;
                }
@@@ -8891,7 -8874,7 +8891,7 @@@ skip_type_check
                meta->release_regno = regno;
        }
  
 -      if (reg->ref_obj_id) {
 +      if (reg->ref_obj_id && base_type(arg_type) != ARG_KPTR_XCHG_DEST) {
                if (meta->ref_obj_id) {
                        verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
                                regno, reg->ref_obj_id,
                 */
                meta->raw_mode = arg_type & MEM_UNINIT;
                if (arg_type & MEM_FIXED_SIZE) {
 -                      err = check_helper_mem_access(env, regno,
 -                                                    fn->arg_size[arg], false,
 -                                                    meta);
 +                      err = check_helper_mem_access(env, regno, fn->arg_size[arg], false, meta);
 +                      if (err)
 +                              return err;
 +                      if (arg_type & MEM_ALIGNED)
 +                              err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true);
                }
                break;
        case ARG_CONST_SIZE:
                if (err)
                        return err;
                break;
 -      case ARG_PTR_TO_INT:
 -      case ARG_PTR_TO_LONG:
 -      {
 -              int size = int_ptr_type_to_size(arg_type);
 -
 -              err = check_helper_mem_access(env, regno, size, false, meta);
 -              if (err)
 -                      return err;
 -              err = check_ptr_alignment(env, reg, 0, size, true);
 -              break;
 -      }
        case ARG_PTR_TO_CONST_STR:
        {
                err = check_reg_const_str(env, reg, regno);
                        return err;
                break;
        }
 -      case ARG_PTR_TO_KPTR:
 +      case ARG_KPTR_XCHG_DEST:
                err = process_kptr_func(env, regno, meta);
                if (err)
                        return err;
@@@ -9348,15 -9340,15 +9348,15 @@@ static bool check_raw_mode_ok(const str
  {
        int count = 0;
  
 -      if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
 +      if (arg_type_is_raw_mem(fn->arg1_type))
                count++;
 -      if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
 +      if (arg_type_is_raw_mem(fn->arg2_type))
                count++;
 -      if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
 +      if (arg_type_is_raw_mem(fn->arg3_type))
                count++;
 -      if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
 +      if (arg_type_is_raw_mem(fn->arg4_type))
                count++;
 -      if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
 +      if (arg_type_is_raw_mem(fn->arg5_type))
                count++;
  
        /* We only support one arg being in raw mode at the moment,
@@@ -11390,7 -11382,7 +11390,7 @@@ get_kfunc_ptr_arg_type(struct bpf_verif
        if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
                return KF_ARG_PTR_TO_DYNPTR;
  
 -      if (is_kfunc_arg_iter(meta, argno))
 +      if (is_kfunc_arg_iter(meta, argno, &args[argno]))
                return KF_ARG_PTR_TO_ITER;
  
        if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
@@@ -11492,7 -11484,8 +11492,7 @@@ static int process_kf_arg_ptr_to_btf_id
         * btf_struct_ids_match() to walk the struct at the 0th offset, and
         * resolve types.
         */
 -      if (is_kfunc_acquire(meta) ||
 -          (is_kfunc_release(meta) && reg->ref_obj_id) ||
 +      if ((is_kfunc_release(meta) && reg->ref_obj_id) ||
            btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
                strict_type_match = true;
  
@@@ -12109,8 -12102,7 +12109,8 @@@ static int check_kfunc_args(struct bpf_
                switch (kf_arg_type) {
                case KF_ARG_PTR_TO_CTX:
                        if (reg->type != PTR_TO_CTX) {
 -                              verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t));
 +                              verbose(env, "arg#%d expected pointer to ctx, but got %s\n",
 +                                      i, reg_type_str(env, reg->type));
                                return -EINVAL;
                        }
  
@@@ -12833,17 -12825,6 +12833,17 @@@ static int check_kfunc_call(struct bpf_
                        regs[BPF_REG_0].btf = desc_btf;
                        regs[BPF_REG_0].type = PTR_TO_BTF_ID;
                        regs[BPF_REG_0].btf_id = ptr_type_id;
 +
 +                      if (is_iter_next_kfunc(&meta)) {
 +                              struct bpf_reg_state *cur_iter;
 +
 +                              cur_iter = get_iter_from_state(env->cur_state, &meta);
 +
 +                              if (cur_iter->type & MEM_RCU) /* KF_RCU_PROTECTED */
 +                                      regs[BPF_REG_0].type |= MEM_RCU;
 +                              else
 +                                      regs[BPF_REG_0].type |= PTR_TRUSTED;
 +                      }
                }
  
                if (is_kfunc_ret_null(&meta)) {
@@@ -16124,14 -16105,14 +16124,14 @@@ static int visit_func_call_insn(int t, 
  
  /* Return a bitmask specifying which caller saved registers are
   * clobbered by a call to a helper *as if* this helper follows
 - * no_caller_saved_registers contract:
 + * bpf_fastcall contract:
   * - includes R0 if function is non-void;
   * - includes R1-R5 if corresponding parameter has is described
   *   in the function prototype.
   */
 -static u32 helper_nocsr_clobber_mask(const struct bpf_func_proto *fn)
 +static u32 helper_fastcall_clobber_mask(const struct bpf_func_proto *fn)
  {
 -      u8 mask;
 +      u32 mask;
        int i;
  
        mask = 0;
  }
  
  /* True if do_misc_fixups() replaces calls to helper number 'imm',
 - * replacement patch is presumed to follow no_caller_saved_registers contract
 - * (see mark_nocsr_pattern_for_call() below).
 + * replacement patch is presumed to follow bpf_fastcall contract
 + * (see mark_fastcall_pattern_for_call() below).
   */
  static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
  {
        }
  }
  
 -/* GCC and LLVM define a no_caller_saved_registers function attribute.
 +/* Same as helper_fastcall_clobber_mask() but for kfuncs, see comment above */
 +static u32 kfunc_fastcall_clobber_mask(struct bpf_kfunc_call_arg_meta *meta)
 +{
 +      u32 vlen, i, mask;
 +
 +      vlen = btf_type_vlen(meta->func_proto);
 +      mask = 0;
 +      if (!btf_type_is_void(btf_type_by_id(meta->btf, meta->func_proto->type)))
 +              mask |= BIT(BPF_REG_0);
 +      for (i = 0; i < vlen; ++i)
 +              mask |= BIT(BPF_REG_1 + i);
 +      return mask;
 +}
 +
 +/* Same as verifier_inlines_helper_call() but for kfuncs, see comment above */
 +static bool is_fastcall_kfunc_call(struct bpf_kfunc_call_arg_meta *meta)
 +{
 +      if (meta->btf == btf_vmlinux)
 +              return meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
 +                     meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast];
 +      return false;
 +}
 +
 +/* LLVM define a bpf_fastcall function attribute.
   * This attribute means that function scratches only some of
   * the caller saved registers defined by ABI.
   * For BPF the set of such registers could be defined as follows:
   *
   * The contract between kernel and clang allows to simultaneously use
   * such functions and maintain backwards compatibility with old
 - * kernels that don't understand no_caller_saved_registers calls
 - * (nocsr for short):
 + * kernels that don't understand bpf_fastcall calls:
   *
 - * - for nocsr calls clang allocates registers as-if relevant r0-r5
 + * - for bpf_fastcall calls clang allocates registers as-if relevant r0-r5
   *   registers are not scratched by the call;
   *
 - * - as a post-processing step, clang visits each nocsr call and adds
 + * - as a post-processing step, clang visits each bpf_fastcall call and adds
   *   spill/fill for every live r0-r5;
   *
   * - stack offsets used for the spill/fill are allocated as lowest
   *   purposes;
   *
   * - when kernel loads a program, it looks for such patterns
 - *   (nocsr function surrounded by spills/fills) and checks if
 - *   spill/fill stack offsets are used exclusively in nocsr patterns;
 + *   (bpf_fastcall function surrounded by spills/fills) and checks if
 + *   spill/fill stack offsets are used exclusively in fastcall patterns;
   *
   * - if so, and if verifier or current JIT inlines the call to the
 - *   nocsr function (e.g. a helper call), kernel removes unnecessary
 + *   bpf_fastcall function (e.g. a helper call), kernel removes unnecessary
   *   spill/fill pairs;
   *
   * - when old kernel loads a program, presence of spill/fill pairs
   *   r0 += r2;
   *   exit;
   *
 - * The purpose of mark_nocsr_pattern_for_call is to:
 + * The purpose of mark_fastcall_pattern_for_call is to:
   * - look for such patterns;
 - * - mark spill and fill instructions in env->insn_aux_data[*].nocsr_pattern;
 - * - mark set env->insn_aux_data[*].nocsr_spills_num for call instruction;
 - * - update env->subprog_info[*]->nocsr_stack_off to find an offset
 - *   at which nocsr spill/fill stack slots start;
 - * - update env->subprog_info[*]->keep_nocsr_stack.
 + * - mark spill and fill instructions in env->insn_aux_data[*].fastcall_pattern;
 + * - mark set env->insn_aux_data[*].fastcall_spills_num for call instruction;
 + * - update env->subprog_info[*]->fastcall_stack_off to find an offset
 + *   at which bpf_fastcall spill/fill stack slots start;
 + * - update env->subprog_info[*]->keep_fastcall_stack.
   *
 - * The .nocsr_pattern and .nocsr_stack_off are used by
 - * check_nocsr_stack_contract() to check if every stack access to
 - * nocsr spill/fill stack slot originates from spill/fill
 - * instructions, members of nocsr patterns.
 + * The .fastcall_pattern and .fastcall_stack_off are used by
 + * check_fastcall_stack_contract() to check if every stack access to
 + * fastcall spill/fill stack slot originates from spill/fill
 + * instructions, members of fastcall patterns.
   *
 - * If such condition holds true for a subprogram, nocsr patterns could
 - * be rewritten by remove_nocsr_spills_fills().
 - * Otherwise nocsr patterns are not changed in the subprogram
 + * If such condition holds true for a subprogram, fastcall patterns could
 + * be rewritten by remove_fastcall_spills_fills().
 + * Otherwise bpf_fastcall patterns are not changed in the subprogram
   * (code, presumably, generated by an older clang version).
   *
   * For example, it is *not* safe to remove spill/fill below:
   *   r0 += r1;                           exit;
   *   exit;
   */
 -static void mark_nocsr_pattern_for_call(struct bpf_verifier_env *env,
 -                                      struct bpf_subprog_info *subprog,
 -                                      int insn_idx, s16 lowest_off)
 +static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env,
 +                                         struct bpf_subprog_info *subprog,
 +                                         int insn_idx, s16 lowest_off)
  {
        struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
        struct bpf_insn *call = &env->prog->insnsi[insn_idx];
                if (get_helper_proto(env, call->imm, &fn) < 0)
                        /* error would be reported later */
                        return;
 -              clobbered_regs_mask = helper_nocsr_clobber_mask(fn);
 -              can_be_inlined = fn->allow_nocsr &&
 +              clobbered_regs_mask = helper_fastcall_clobber_mask(fn);
 +              can_be_inlined = fn->allow_fastcall &&
                                 (verifier_inlines_helper_call(env, call->imm) ||
                                  bpf_jit_inlines_helper_call(call->imm));
        }
  
 +      if (bpf_pseudo_kfunc_call(call)) {
 +              struct bpf_kfunc_call_arg_meta meta;
 +              int err;
 +
 +              err = fetch_kfunc_meta(env, call, &meta, NULL);
 +              if (err < 0)
 +                      /* error would be reported later */
 +                      return;
 +
 +              clobbered_regs_mask = kfunc_fastcall_clobber_mask(&meta);
 +              can_be_inlined = is_fastcall_kfunc_call(&meta);
 +      }
 +
        if (clobbered_regs_mask == ALL_CALLER_SAVED_REGS)
                return;
  
                if (stx->off != off || ldx->off != off)
                        break;
                expected_regs_mask &= ~BIT(stx->src_reg);
 -              env->insn_aux_data[insn_idx - i].nocsr_pattern = 1;
 -              env->insn_aux_data[insn_idx + i].nocsr_pattern = 1;
 +              env->insn_aux_data[insn_idx - i].fastcall_pattern = 1;
 +              env->insn_aux_data[insn_idx + i].fastcall_pattern = 1;
        }
        if (i == 1)
                return;
  
 -      /* Conditionally set 'nocsr_spills_num' to allow forward
 +      /* Conditionally set 'fastcall_spills_num' to allow forward
         * compatibility when more helper functions are marked as
 -       * nocsr at compile time than current kernel supports, e.g:
 +       * bpf_fastcall at compile time than current kernel supports, e.g:
         *
         *   1: *(u64 *)(r10 - 8) = r1
 -       *   2: call A                  ;; assume A is nocsr for current kernel
 +       *   2: call A                  ;; assume A is bpf_fastcall for current kernel
         *   3: r1 = *(u64 *)(r10 - 8)
         *   4: *(u64 *)(r10 - 8) = r1
 -       *   5: call B                  ;; assume B is not nocsr for current kernel
 +       *   5: call B                  ;; assume B is not bpf_fastcall for current kernel
         *   6: r1 = *(u64 *)(r10 - 8)
         *
 -       * There is no need to block nocsr rewrite for such program.
 -       * Set 'nocsr_pattern' for both calls to keep check_nocsr_stack_contract() happy,
 -       * don't set 'nocsr_spills_num' for call B so that remove_nocsr_spills_fills()
 +       * There is no need to block bpf_fastcall rewrite for such program.
 +       * Set 'fastcall_pattern' for both calls to keep check_fastcall_stack_contract() happy,
 +       * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills()
         * does not remove spill/fill pair {4,6}.
         */
        if (can_be_inlined)
 -              env->insn_aux_data[insn_idx].nocsr_spills_num = i - 1;
 +              env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1;
        else
 -              subprog->keep_nocsr_stack = 1;
 -      subprog->nocsr_stack_off = min(subprog->nocsr_stack_off, off);
 +              subprog->keep_fastcall_stack = 1;
 +      subprog->fastcall_stack_off = min(subprog->fastcall_stack_off, off);
  }
  
 -static int mark_nocsr_patterns(struct bpf_verifier_env *env)
 +static int mark_fastcall_patterns(struct bpf_verifier_env *env)
  {
        struct bpf_subprog_info *subprog = env->subprog_info;
        struct bpf_insn *insn;
                                continue;
                        lowest_off = min(lowest_off, insn->off);
                }
 -              /* use this offset to find nocsr patterns */
 +              /* use this offset to find fastcall patterns */
                for (i = subprog->start; i < (subprog + 1)->start; ++i) {
                        insn = env->prog->insnsi + i;
                        if (insn->code != (BPF_JMP | BPF_CALL))
                                continue;
 -                      mark_nocsr_pattern_for_call(env, subprog, i, lowest_off);
 +                      mark_fastcall_pattern_for_call(env, subprog, i, lowest_off);
                }
        }
        return 0;
@@@ -17396,9 -17342,8 +17396,9 @@@ static bool stacksafe(struct bpf_verifi
                spi = i / BPF_REG_SIZE;
  
                if (exact != NOT_EXACT &&
 -                  old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
 -                  cur->stack[spi].slot_type[i % BPF_REG_SIZE])
 +                  (i >= cur->allocated_stack ||
 +                   old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
 +                   cur->stack[spi].slot_type[i % BPF_REG_SIZE]))
                        return false;
  
                if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)
@@@ -18920,6 -18865,53 +18920,53 @@@ static bool bpf_map_is_cgroup_storage(s
                map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
  }
  
+ /* Add map behind fd to used maps list, if it's not already there, and return
+  * its index. Also set *reused to true if this map was already in the list of
+  * used maps.
+  * Returns <0 on error, or >= 0 index, on success.
+  */
+ static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd, bool *reused)
+ {
+       CLASS(fd, f)(fd);
+       struct bpf_map *map;
+       int i;
+       map = __bpf_map_get(f);
+       if (IS_ERR(map)) {
+               verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
+               return PTR_ERR(map);
+       }
+       /* check whether we recorded this map already */
+       for (i = 0; i < env->used_map_cnt; i++) {
+               if (env->used_maps[i] == map) {
+                       *reused = true;
+                       return i;
+               }
+       }
+       if (env->used_map_cnt >= MAX_USED_MAPS) {
+               verbose(env, "The total number of maps per program has reached the limit of %u\n",
+                       MAX_USED_MAPS);
+               return -E2BIG;
+       }
+       if (env->prog->sleepable)
+               atomic64_inc(&map->sleepable_refcnt);
+       /* hold the map. If the program is rejected by verifier,
+        * the map will be released by release_maps() or it
+        * will be used by the valid program until it's unloaded
+        * and all maps are released in bpf_free_used_maps()
+        */
+       bpf_map_inc(map);
+       *reused = false;
+       env->used_maps[env->used_map_cnt++] = map;
+       return env->used_map_cnt - 1;
+ }
  /* find and rewrite pseudo imm in ld_imm64 instructions:
   *
   * 1. if it accesses map FD, replace it with actual map pointer.
@@@ -18931,7 -18923,7 +18978,7 @@@ static int resolve_pseudo_ldimm64(struc
  {
        struct bpf_insn *insn = env->prog->insnsi;
        int insn_cnt = env->prog->len;
-       int i, j, err;
+       int i, err;
  
        err = bpf_prog_calc_tag(env->prog);
        if (err)
                if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
                        struct bpf_insn_aux_data *aux;
                        struct bpf_map *map;
-                       struct fd f;
+                       int map_idx;
                        u64 addr;
                        u32 fd;
+                       bool reused;
  
                        if (i == insn_cnt - 1 || insn[1].code != 0 ||
                            insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
                                break;
                        }
  
-                       f = fdget(fd);
-                       map = __bpf_map_get(f);
-                       if (IS_ERR(map)) {
-                               verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
-                               return PTR_ERR(map);
-                       }
+                       map_idx = add_used_map_from_fd(env, fd, &reused);
+                       if (map_idx < 0)
+                               return map_idx;
+                       map = env->used_maps[map_idx];
+                       aux = &env->insn_aux_data[i];
+                       aux->map_index = map_idx;
  
                        err = check_map_prog_compatibility(env, map, env->prog);
-                       if (err) {
-                               fdput(f);
+                       if (err)
                                return err;
-                       }
  
-                       aux = &env->insn_aux_data[i];
                        if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
                            insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
                                addr = (unsigned long)map;
  
                                if (off >= BPF_MAX_VAR_OFF) {
                                        verbose(env, "direct value offset of %u is not allowed\n", off);
-                                       fdput(f);
                                        return -EINVAL;
                                }
  
                                if (!map->ops->map_direct_value_addr) {
                                        verbose(env, "no direct value access support for this map type\n");
-                                       fdput(f);
                                        return -EINVAL;
                                }
  
                                if (err) {
                                        verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
                                                map->value_size, off);
-                                       fdput(f);
                                        return err;
                                }
  
                        insn[0].imm = (u32)addr;
                        insn[1].imm = addr >> 32;
  
-                       /* check whether we recorded this map already */
-                       for (j = 0; j < env->used_map_cnt; j++) {
-                               if (env->used_maps[j] == map) {
-                                       aux->map_index = j;
-                                       fdput(f);
-                                       goto next_insn;
-                               }
-                       }
-                       if (env->used_map_cnt >= MAX_USED_MAPS) {
-                               verbose(env, "The total number of maps per program has reached the limit of %u\n",
-                                       MAX_USED_MAPS);
-                               fdput(f);
-                               return -E2BIG;
-                       }
-                       if (env->prog->sleepable)
-                               atomic64_inc(&map->sleepable_refcnt);
-                       /* hold the map. If the program is rejected by verifier,
-                        * the map will be released by release_maps() or it
-                        * will be used by the valid program until it's unloaded
-                        * and all maps are released in bpf_free_used_maps()
-                        */
-                       bpf_map_inc(map);
-                       aux->map_index = env->used_map_cnt;
-                       env->used_maps[env->used_map_cnt++] = map;
+                       /* proceed with extra checks only if its newly added used map */
+                       if (reused)
+                               goto next_insn;
  
                        if (bpf_map_is_cgroup_storage(map) &&
                            bpf_cgroup_storage_assign(env->prog->aux, map)) {
                                verbose(env, "only one cgroup storage of each type is allowed\n");
-                               fdput(f);
                                return -EBUSY;
                        }
                        if (map->map_type == BPF_MAP_TYPE_ARENA) {
                                if (env->prog->aux->arena) {
                                        verbose(env, "Only one arena per program\n");
-                                       fdput(f);
                                        return -EBUSY;
                                }
                                if (!env->allow_ptr_leaks || !env->bpf_capable) {
                                        verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n");
-                                       fdput(f);
                                        return -EPERM;
                                }
                                if (!env->prog->jit_requested) {
                                        verbose(env, "JIT is required to use arena\n");
-                                       fdput(f);
                                        return -EOPNOTSUPP;
                                }
                                if (!bpf_jit_supports_arena()) {
                                        verbose(env, "JIT doesn't support arena\n");
-                                       fdput(f);
                                        return -EOPNOTSUPP;
                                }
                                env->prog->aux->arena = (void *)map;
                                if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
                                        verbose(env, "arena's user address must be set via map_extra or mmap()\n");
-                                       fdput(f);
                                        return -EINVAL;
                                }
                        }
  
-                       fdput(f);
  next_insn:
                        insn++;
                        i++;
@@@ -19277,9 -19234,6 +19289,9 @@@ static int adjust_jmp_off(struct bpf_pr
        for (i = 0; i < insn_cnt; i++, insn++) {
                u8 code = insn->code;
  
 +              if (tgt_idx <= i && i < tgt_idx + delta)
 +                      continue;
 +
                if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
                    BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
                        continue;
@@@ -19668,39 -19622,14 +19680,39 @@@ apply_patch_buffer
   */
  static int convert_ctx_accesses(struct bpf_verifier_env *env)
  {
 +      struct bpf_subprog_info *subprogs = env->subprog_info;
        const struct bpf_verifier_ops *ops = env->ops;
 -      int i, cnt, size, ctx_field_size, delta = 0;
 +      int i, cnt, size, ctx_field_size, delta = 0, epilogue_cnt = 0;
        const int insn_cnt = env->prog->len;
 -      struct bpf_insn insn_buf[16], *insn;
 +      struct bpf_insn *epilogue_buf = env->epilogue_buf;
 +      struct bpf_insn *insn_buf = env->insn_buf;
 +      struct bpf_insn *insn;
        u32 target_size, size_default, off;
        struct bpf_prog *new_prog;
        enum bpf_access_type type;
        bool is_narrower_load;
 +      int epilogue_idx = 0;
 +
 +      if (ops->gen_epilogue) {
 +              epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog,
 +                                               -(subprogs[0].stack_depth + 8));
 +              if (epilogue_cnt >= INSN_BUF_SIZE) {
 +                      verbose(env, "bpf verifier is misconfigured\n");
 +                      return -EINVAL;
 +              } else if (epilogue_cnt) {
 +                      /* Save the ARG_PTR_TO_CTX for the epilogue to use */
 +                      cnt = 0;
 +                      subprogs[0].stack_depth += 8;
 +                      insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1,
 +                                                    -subprogs[0].stack_depth);
 +                      insn_buf[cnt++] = env->prog->insnsi[0];
 +                      new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
 +                      if (!new_prog)
 +                              return -ENOMEM;
 +                      env->prog = new_prog;
 +                      delta += cnt - 1;
 +              }
 +      }
  
        if (ops->gen_prologue || env->seen_direct_write) {
                if (!ops->gen_prologue) {
                }
                cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
                                        env->prog);
 -              if (cnt >= ARRAY_SIZE(insn_buf)) {
 +              if (cnt >= INSN_BUF_SIZE) {
                        verbose(env, "bpf verifier is misconfigured\n");
                        return -EINVAL;
                } else if (cnt) {
                }
        }
  
 +      if (delta)
 +              WARN_ON(adjust_jmp_off(env->prog, 0, delta));
 +
        if (bpf_prog_is_offloaded(env->prog->aux))
                return 0;
  
                        insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
                        env->prog->aux->num_exentries++;
                        continue;
 +              } else if (insn->code == (BPF_JMP | BPF_EXIT) &&
 +                         epilogue_cnt &&
 +                         i + delta < subprogs[1].start) {
 +                      /* Generate epilogue for the main prog */
 +                      if (epilogue_idx) {
 +                              /* jump back to the earlier generated epilogue */
 +                              insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1);
 +                              cnt = 1;
 +                      } else {
 +                              memcpy(insn_buf, epilogue_buf,
 +                                     epilogue_cnt * sizeof(*epilogue_buf));
 +                              cnt = epilogue_cnt;
 +                              /* epilogue_idx cannot be 0. It must have at
 +                               * least one ctx ptr saving insn before the
 +                               * epilogue.
 +                               */
 +                              epilogue_idx = i + delta;
 +                      }
 +                      goto patch_insn_buf;
                } else {
                        continue;
                }
                target_size = 0;
                cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
                                         &target_size);
 -              if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
 +              if (cnt == 0 || cnt >= INSN_BUF_SIZE ||
                    (ctx_field_size && !target_size)) {
                        verbose(env, "bpf verifier is misconfigured\n");
                        return -EINVAL;
                if (is_narrower_load && size < target_size) {
                        u8 shift = bpf_ctx_narrow_access_offset(
                                off, size, size_default) * 8;
 -                      if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
 +                      if (shift && cnt + 1 >= INSN_BUF_SIZE) {
                                verbose(env, "bpf verifier narrow ctx load misconfigured\n");
                                return -EINVAL;
                        }
                                                       insn->dst_reg, insn->dst_reg,
                                                       size * 8, 0);
  
 +patch_insn_buf:
                new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
                if (!new_prog)
                        return -ENOMEM;
@@@ -20433,7 -20339,7 +20445,7 @@@ static int do_misc_fixups(struct bpf_ve
        const int insn_cnt = prog->len;
        const struct bpf_map_ops *ops;
        struct bpf_insn_aux_data *aux;
 -      struct bpf_insn insn_buf[16];
 +      struct bpf_insn *insn_buf = env->insn_buf;
        struct bpf_prog *new_prog;
        struct bpf_map *map_ptr;
        int i, ret, cnt, delta = 0, cur_subprog = 0;
                        /* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
                        insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
  
 -              /* Make divide-by-zero exceptions impossible. */
 +              /* Make sdiv/smod divide-by-minus-one exceptions impossible. */
 +              if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
 +                   insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
 +                   insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
 +                   insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
 +                  insn->off == 1 && insn->imm == -1) {
 +                      bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
 +                      bool isdiv = BPF_OP(insn->code) == BPF_DIV;
 +                      struct bpf_insn *patchlet;
 +                      struct bpf_insn chk_and_sdiv[] = {
 +                              BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
 +                                           BPF_NEG | BPF_K, insn->dst_reg,
 +                                           0, 0, 0),
 +                      };
 +                      struct bpf_insn chk_and_smod[] = {
 +                              BPF_MOV32_IMM(insn->dst_reg, 0),
 +                      };
 +
 +                      patchlet = isdiv ? chk_and_sdiv : chk_and_smod;
 +                      cnt = isdiv ? ARRAY_SIZE(chk_and_sdiv) : ARRAY_SIZE(chk_and_smod);
 +
 +                      new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
 +                      if (!new_prog)
 +                              return -ENOMEM;
 +
 +                      delta    += cnt - 1;
 +                      env->prog = prog = new_prog;
 +                      insn      = new_prog->insnsi + i + delta;
 +                      goto next_insn;
 +              }
 +
 +              /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
                if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
                    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
                    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
                    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
                        bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
                        bool isdiv = BPF_OP(insn->code) == BPF_DIV;
 +                      bool is_sdiv = isdiv && insn->off == 1;
 +                      bool is_smod = !isdiv && insn->off == 1;
                        struct bpf_insn *patchlet;
                        struct bpf_insn chk_and_div[] = {
                                /* [R,W]x div 0 -> 0 */
                                BPF_JMP_IMM(BPF_JA, 0, 0, 1),
                                BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
                        };
 +                      struct bpf_insn chk_and_sdiv[] = {
 +                              /* [R,W]x sdiv 0 -> 0
 +                               * LLONG_MIN sdiv -1 -> LLONG_MIN
 +                               * INT_MIN sdiv -1 -> INT_MIN
 +                               */
 +                              BPF_MOV64_REG(BPF_REG_AX, insn->src_reg),
 +                              BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
 +                                           BPF_ADD | BPF_K, BPF_REG_AX,
 +                                           0, 0, 1),
 +                              BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
 +                                           BPF_JGT | BPF_K, BPF_REG_AX,
 +                                           0, 4, 1),
 +                              BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
 +                                           BPF_JEQ | BPF_K, BPF_REG_AX,
 +                                           0, 1, 0),
 +                              BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
 +                                           BPF_MOV | BPF_K, insn->dst_reg,
 +                                           0, 0, 0),
 +                              /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
 +                              BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
 +                                           BPF_NEG | BPF_K, insn->dst_reg,
 +                                           0, 0, 0),
 +                              BPF_JMP_IMM(BPF_JA, 0, 0, 1),
 +                              *insn,
 +                      };
 +                      struct bpf_insn chk_and_smod[] = {
 +                              /* [R,W]x mod 0 -> [R,W]x */
 +                              /* [R,W]x mod -1 -> 0 */
 +                              BPF_MOV64_REG(BPF_REG_AX, insn->src_reg),
 +                              BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
 +                                           BPF_ADD | BPF_K, BPF_REG_AX,
 +                                           0, 0, 1),
 +                              BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
 +                                           BPF_JGT | BPF_K, BPF_REG_AX,
 +                                           0, 3, 1),
 +                              BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
 +                                           BPF_JEQ | BPF_K, BPF_REG_AX,
 +                                           0, 3 + (is64 ? 0 : 1), 1),
 +                              BPF_MOV32_IMM(insn->dst_reg, 0),
 +                              BPF_JMP_IMM(BPF_JA, 0, 0, 1),
 +                              *insn,
 +                              BPF_JMP_IMM(BPF_JA, 0, 0, 1),
 +                              BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
 +                      };
  
 -                      patchlet = isdiv ? chk_and_div : chk_and_mod;
 -                      cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
 -                                    ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
 +                      if (is_sdiv) {
 +                              patchlet = chk_and_sdiv;
 +                              cnt = ARRAY_SIZE(chk_and_sdiv);
 +                      } else if (is_smod) {
 +                              patchlet = chk_and_smod;
 +                              cnt = ARRAY_SIZE(chk_and_smod) - (is64 ? 2 : 0);
 +                      } else {
 +                              patchlet = isdiv ? chk_and_div : chk_and_mod;
 +                              cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
 +                                            ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
 +                      }
  
                        new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
                        if (!new_prog)
                    (BPF_MODE(insn->code) == BPF_ABS ||
                     BPF_MODE(insn->code) == BPF_IND)) {
                        cnt = env->ops->gen_ld_abs(insn, insn_buf);
 -                      if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
 +                      if (cnt == 0 || cnt >= INSN_BUF_SIZE) {
                                verbose(env, "bpf verifier is misconfigured\n");
                                return -EINVAL;
                        }
                                cnt = ops->map_gen_lookup(map_ptr, insn_buf);
                                if (cnt == -EOPNOTSUPP)
                                        goto patch_map_ops_generic;
 -                              if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
 +                              if (cnt <= 0 || cnt >= INSN_BUF_SIZE) {
                                        verbose(env, "bpf verifier is misconfigured\n");
                                        return -EINVAL;
                                }
@@@ -21290,7 -21111,7 +21302,7 @@@ static struct bpf_prog *inline_bpf_loop
                                        int position,
                                        s32 stack_base,
                                        u32 callback_subprogno,
 -                                      u32 *cnt)
 +                                      u32 *total_cnt)
  {
        s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
        s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
        int reg_loop_cnt = BPF_REG_7;
        int reg_loop_ctx = BPF_REG_8;
  
 +      struct bpf_insn *insn_buf = env->insn_buf;
        struct bpf_prog *new_prog;
        u32 callback_start;
        u32 call_insn_offset;
        s32 callback_offset;
 +      u32 cnt = 0;
  
        /* This represents an inlined version of bpf_iter.c:bpf_loop,
         * be careful to modify this code in sync.
         */
 -      struct bpf_insn insn_buf[] = {
 -              /* Return error and jump to the end of the patch if
 -               * expected number of iterations is too big.
 -               */
 -              BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
 -              BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
 -              BPF_JMP_IMM(BPF_JA, 0, 0, 16),
 -              /* spill R6, R7, R8 to use these as loop vars */
 -              BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
 -              BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
 -              BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
 -              /* initialize loop vars */
 -              BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
 -              BPF_MOV32_IMM(reg_loop_cnt, 0),
 -              BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
 -              /* loop header,
 -               * if reg_loop_cnt >= reg_loop_max skip the loop body
 -               */
 -              BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
 -              /* callback call,
 -               * correct callback offset would be set after patching
 -               */
 -              BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
 -              BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
 -              BPF_CALL_REL(0),
 -              /* increment loop counter */
 -              BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
 -              /* jump to loop header if callback returned 0 */
 -              BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
 -              /* return value of bpf_loop,
 -               * set R0 to the number of iterations
 -               */
 -              BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
 -              /* restore original values of R6, R7, R8 */
 -              BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
 -              BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
 -              BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
 -      };
  
 -      *cnt = ARRAY_SIZE(insn_buf);
 -      new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
 +      /* Return error and jump to the end of the patch if
 +       * expected number of iterations is too big.
 +       */
 +      insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2);
 +      insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG);
 +      insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16);
 +      /* spill R6, R7, R8 to use these as loop vars */
 +      insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset);
 +      insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset);
 +      insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset);
 +      /* initialize loop vars */
 +      insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1);
 +      insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0);
 +      insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3);
 +      /* loop header,
 +       * if reg_loop_cnt >= reg_loop_max skip the loop body
 +       */
 +      insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5);
 +      /* callback call,
 +       * correct callback offset would be set after patching
 +       */
 +      insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt);
 +      insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx);
 +      insn_buf[cnt++] = BPF_CALL_REL(0);
 +      /* increment loop counter */
 +      insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1);
 +      /* jump to loop header if callback returned 0 */
 +      insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6);
 +      /* return value of bpf_loop,
 +       * set R0 to the number of iterations
 +       */
 +      insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt);
 +      /* restore original values of R6, R7, R8 */
 +      insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset);
 +      insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset);
 +      insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset);
 +
 +      *total_cnt = cnt;
 +      new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt);
        if (!new_prog)
                return new_prog;
  
@@@ -21423,10 -21243,10 +21435,10 @@@ static int optimize_bpf_loop(struct bpf
        return 0;
  }
  
 -/* Remove unnecessary spill/fill pairs, members of nocsr pattern,
 +/* Remove unnecessary spill/fill pairs, members of fastcall pattern,
   * adjust subprograms stack depth when possible.
   */
 -static int remove_nocsr_spills_fills(struct bpf_verifier_env *env)
 +static int remove_fastcall_spills_fills(struct bpf_verifier_env *env)
  {
        struct bpf_subprog_info *subprog = env->subprog_info;
        struct bpf_insn_aux_data *aux = env->insn_aux_data;
        int i, j;
  
        for (i = 0; i < insn_cnt; i++, insn++) {
 -              if (aux[i].nocsr_spills_num > 0) {
 -                      spills_num = aux[i].nocsr_spills_num;
 +              if (aux[i].fastcall_spills_num > 0) {
 +                      spills_num = aux[i].fastcall_spills_num;
                        /* NOPs would be removed by opt_remove_nops() */
                        for (j = 1; j <= spills_num; ++j) {
                                *(insn - j) = NOP;
                        modified = true;
                }
                if ((subprog + 1)->start == i + 1) {
 -                      if (modified && !subprog->keep_nocsr_stack)
 -                              subprog->stack_depth = -subprog->nocsr_stack_off;
 +                      if (modified && !subprog->keep_fastcall_stack)
 +                              subprog->stack_depth = -subprog->fastcall_stack_off;
                        subprog++;
                        modified = false;
                }
@@@ -21847,13 -21667,11 +21859,13 @@@ int bpf_check_attach_target(struct bpf_
  {
        bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
        bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING;
 +      char trace_symbol[KSYM_SYMBOL_LEN];
        const char prefix[] = "btf_trace_";
 +      struct bpf_raw_event_map *btp;
        int ret = 0, subprog = -1, i;
        const struct btf_type *t;
        bool conservative = true;
 -      const char *tname;
 +      const char *tname, *fname;
        struct btf *btf;
        long addr = 0;
        struct module *mod = NULL;
                        return -EINVAL;
                }
                tname += sizeof(prefix) - 1;
 -              t = btf_type_by_id(btf, t->type);
 -              if (!btf_type_is_ptr(t))
 -                      /* should never happen in valid vmlinux build */
 +
 +              /* The func_proto of "btf_trace_##tname" is generated from typedef without argument
 +               * names. Thus using bpf_raw_event_map to get argument names.
 +               */
 +              btp = bpf_get_raw_tracepoint(tname);
 +              if (!btp)
                        return -EINVAL;
 +              fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL,
 +                                      trace_symbol);
 +              bpf_put_raw_tracepoint(btp);
 +
 +              if (fname)
 +                      ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC);
 +
 +              if (!fname || ret < 0) {
 +                      bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n",
 +                              prefix, tname);
 +                      t = btf_type_by_id(btf, t->type);
 +                      if (!btf_type_is_ptr(t))
 +                              /* should never happen in valid vmlinux build */
 +                              return -EINVAL;
 +              } else {
 +                      t = btf_type_by_id(btf, ret);
 +                      if (!btf_type_is_func(t))
 +                              /* should never happen in valid vmlinux build */
 +                              return -EINVAL;
 +              }
 +
                t = btf_type_by_id(btf, t->type);
                if (!btf_type_is_func_proto(t))
                        /* should never happen in valid vmlinux build */
@@@ -22397,7 -22191,7 +22409,7 @@@ int bpf_check(struct bpf_prog **prog, u
        if (ret < 0)
                goto skip_full_check;
  
 -      ret = mark_nocsr_patterns(env);
 +      ret = mark_fastcall_patterns(env);
        if (ret < 0)
                goto skip_full_check;
  
@@@ -22414,7 -22208,7 +22426,7 @@@ skip_full_check
         * allocate additional slots.
         */
        if (ret == 0)
 -              ret = remove_nocsr_spills_fills(env);
 +              ret = remove_fastcall_spills_fills(env);
  
        if (ret == 0)
                ret = check_max_stack_depth(env);
diff --combined net/core/sock_map.c
index 724b6856fcc3e9fd51673d31927cfd52d5d7d0aa,0f5f80f44d520a78d665393099d2b88eb0625381..242c91a6e3d3870ec6da6fa095d180a933d1d3d4
@@@ -67,46 -67,39 +67,39 @@@ static struct bpf_map *sock_map_alloc(u
  
  int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog)
  {
-       u32 ufd = attr->target_fd;
        struct bpf_map *map;
-       struct fd f;
        int ret;
  
        if (attr->attach_flags || attr->replace_bpf_fd)
                return -EINVAL;
  
-       f = fdget(ufd);
+       CLASS(fd, f)(attr->target_fd);
        map = __bpf_map_get(f);
        if (IS_ERR(map))
                return PTR_ERR(map);
        mutex_lock(&sockmap_mutex);
        ret = sock_map_prog_update(map, prog, NULL, NULL, attr->attach_type);
        mutex_unlock(&sockmap_mutex);
-       fdput(f);
        return ret;
  }
  
  int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
  {
-       u32 ufd = attr->target_fd;
        struct bpf_prog *prog;
        struct bpf_map *map;
-       struct fd f;
        int ret;
  
        if (attr->attach_flags || attr->replace_bpf_fd)
                return -EINVAL;
  
-       f = fdget(ufd);
+       CLASS(fd, f)(attr->target_fd);
        map = __bpf_map_get(f);
        if (IS_ERR(map))
                return PTR_ERR(map);
  
        prog = bpf_prog_get(attr->attach_bpf_fd);
-       if (IS_ERR(prog)) {
-               ret = PTR_ERR(prog);
-               goto put_map;
-       }
+       if (IS_ERR(prog))
+               return PTR_ERR(prog);
  
        if (prog->type != ptype) {
                ret = -EINVAL;
        mutex_unlock(&sockmap_mutex);
  put_prog:
        bpf_prog_put(prog);
- put_map:
-       fdput(f);
        return ret;
  }
  
@@@ -1183,7 -1174,6 +1174,7 @@@ static void sock_hash_free(struct bpf_m
                        sock_put(elem->sk);
                        sock_hash_free_elem(htab, elem);
                }
 +              cond_resched();
        }
  
        /* wait for psock readers accessing its map link */
@@@ -1551,18 -1541,17 +1542,17 @@@ int sock_map_bpf_prog_query(const unio
                            union bpf_attr __user *uattr)
  {
        __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
-       u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
+       u32 prog_cnt = 0, flags = 0;
        struct bpf_prog **pprog;
        struct bpf_prog *prog;
        struct bpf_map *map;
-       struct fd f;
        u32 id = 0;
        int ret;
  
        if (attr->query.query_flags)
                return -EINVAL;
  
-       f = fdget(ufd);
+       CLASS(fd, f)(attr->target_fd);
        map = __bpf_map_get(f);
        if (IS_ERR(map))
                return PTR_ERR(map);
@@@ -1594,7 -1583,6 +1584,6 @@@ end
            copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
                ret = -EFAULT;
  
-       fdput(f);
        return ret;
  }
  
diff --combined security/security.c
index 4564a0a1e4ef3e196e1377c97b4c4b0477a13f29,d8d0b67ced2503be02eb34ca92a2500356287744..6875eb4a59fcc140a9aee1c9abef5e3859171351
  #include <linux/xattr.h>
  #include <linux/msg.h>
  #include <linux/overflow.h>
 +#include <linux/perf_event.h>
 +#include <linux/fs.h>
  #include <net/flow.h>
 +#include <net/sock.h>
  
 -/* How many LSMs were built into the kernel? */
 -#define LSM_COUNT (__end_lsm_info - __start_lsm_info)
 +#define SECURITY_HOOK_ACTIVE_KEY(HOOK, IDX) security_hook_active_##HOOK##_##IDX
  
  /*
 - * How many LSMs are built into the kernel as determined at
 - * build time. Used to determine fixed array sizes.
 - * The capability module is accounted for by CONFIG_SECURITY
 - */
 -#define LSM_CONFIG_COUNT ( \
 -      (IS_ENABLED(CONFIG_SECURITY) ? 1 : 0) + \
 -      (IS_ENABLED(CONFIG_SECURITY_SELINUX) ? 1 : 0) + \
 -      (IS_ENABLED(CONFIG_SECURITY_SMACK) ? 1 : 0) + \
 -      (IS_ENABLED(CONFIG_SECURITY_TOMOYO) ? 1 : 0) + \
 -      (IS_ENABLED(CONFIG_SECURITY_APPARMOR) ? 1 : 0) + \
 -      (IS_ENABLED(CONFIG_SECURITY_YAMA) ? 1 : 0) + \
 -      (IS_ENABLED(CONFIG_SECURITY_LOADPIN) ? 1 : 0) + \
 -      (IS_ENABLED(CONFIG_SECURITY_SAFESETID) ? 1 : 0) + \
 -      (IS_ENABLED(CONFIG_SECURITY_LOCKDOWN_LSM) ? 1 : 0) + \
 -      (IS_ENABLED(CONFIG_BPF_LSM) ? 1 : 0) + \
 -      (IS_ENABLED(CONFIG_SECURITY_LANDLOCK) ? 1 : 0) + \
 -      (IS_ENABLED(CONFIG_IMA) ? 1 : 0) + \
 -      (IS_ENABLED(CONFIG_EVM) ? 1 : 0))
 + * Identifier for the LSM static calls.
 + * HOOK is an LSM hook as defined in linux/lsm_hookdefs.h
 + * IDX is the index of the static call. 0 <= NUM < MAX_LSM_COUNT
 + */
 +#define LSM_STATIC_CALL(HOOK, IDX) lsm_static_call_##HOOK##_##IDX
 +
 +/*
 + * Call the macro M for each LSM hook MAX_LSM_COUNT times.
 + */
 +#define LSM_LOOP_UNROLL(M, ...)               \
 +do {                                          \
 +      UNROLL(MAX_LSM_COUNT, M, __VA_ARGS__)   \
 +} while (0)
 +
 +#define LSM_DEFINE_UNROLL(M, ...) UNROLL(MAX_LSM_COUNT, M, __VA_ARGS__)
  
  /*
   * These are descriptions of the reasons that can be passed to the
@@@ -91,6 -92,7 +91,6 @@@ const char *const lockdown_reasons[LOCK
        [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
  };
  
 -struct security_hook_heads security_hook_heads __ro_after_init;
  static BLOCKING_NOTIFIER_HEAD(blocking_lsm_notifier_chain);
  
  static struct kmem_cache *lsm_file_cache;
@@@ -106,58 -108,9 +106,58 @@@ static __initdata const char *chosen_ma
  static __initconst const char *const builtin_lsm_order = CONFIG_LSM;
  
  /* Ordered list of LSMs to initialize. */
 -static __initdata struct lsm_info **ordered_lsms;
 +static __initdata struct lsm_info *ordered_lsms[MAX_LSM_COUNT + 1];
  static __initdata struct lsm_info *exclusive;
  
 +#ifdef CONFIG_HAVE_STATIC_CALL
 +#define LSM_HOOK_TRAMP(NAME, NUM) \
 +      &STATIC_CALL_TRAMP(LSM_STATIC_CALL(NAME, NUM))
 +#else
 +#define LSM_HOOK_TRAMP(NAME, NUM) NULL
 +#endif
 +
 +/*
 + * Define static calls and static keys for each LSM hook.
 + */
 +#define DEFINE_LSM_STATIC_CALL(NUM, NAME, RET, ...)                   \
 +      DEFINE_STATIC_CALL_NULL(LSM_STATIC_CALL(NAME, NUM),             \
 +                              *((RET(*)(__VA_ARGS__))NULL));          \
 +      DEFINE_STATIC_KEY_FALSE(SECURITY_HOOK_ACTIVE_KEY(NAME, NUM));
 +
 +#define LSM_HOOK(RET, DEFAULT, NAME, ...)                             \
 +      LSM_DEFINE_UNROLL(DEFINE_LSM_STATIC_CALL, NAME, RET, __VA_ARGS__)
 +#include <linux/lsm_hook_defs.h>
 +#undef LSM_HOOK
 +#undef DEFINE_LSM_STATIC_CALL
 +
 +/*
 + * Initialise a table of static calls for each LSM hook.
 + * DEFINE_STATIC_CALL_NULL invocation above generates a key (STATIC_CALL_KEY)
 + * and a trampoline (STATIC_CALL_TRAMP) which are used to call
 + * __static_call_update when updating the static call.
 + *
 + * The static calls table is used by early LSMs, some architectures can fault on
 + * unaligned accesses and the fault handling code may not be ready by then.
 + * Thus, the static calls table should be aligned to avoid any unhandled faults
 + * in early init.
 + */
 +struct lsm_static_calls_table
 +      static_calls_table __ro_after_init __aligned(sizeof(u64)) = {
 +#define INIT_LSM_STATIC_CALL(NUM, NAME)                                       \
 +      (struct lsm_static_call) {                                      \
 +              .key = &STATIC_CALL_KEY(LSM_STATIC_CALL(NAME, NUM)),    \
 +              .trampoline = LSM_HOOK_TRAMP(NAME, NUM),                \
 +              .active = &SECURITY_HOOK_ACTIVE_KEY(NAME, NUM),         \
 +      },
 +#define LSM_HOOK(RET, DEFAULT, NAME, ...)                             \
 +      .NAME = {                                                       \
 +              LSM_DEFINE_UNROLL(INIT_LSM_STATIC_CALL, NAME)           \
 +      },
 +#include <linux/lsm_hook_defs.h>
 +#undef LSM_HOOK
 +#undef INIT_LSM_STATIC_CALL
 +      };
 +
  static __initdata bool debug;
  #define init_debug(...)                                               \
        do {                                                    \
@@@ -218,7 -171,7 +218,7 @@@ static void __init append_ordered_lsm(s
        if (exists_ordered_lsm(lsm))
                return;
  
 -      if (WARN(last_lsm == LSM_COUNT, "%s: out of LSM slots!?\n", from))
 +      if (WARN(last_lsm == MAX_LSM_COUNT, "%s: out of LSM static calls!?\n", from))
                return;
  
        /* Enable this LSM, if it is not already set. */
@@@ -265,7 -218,6 +265,7 @@@ static void __init lsm_set_blob_sizes(s
  
        lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred);
        lsm_set_blob_size(&needed->lbs_file, &blob_sizes.lbs_file);
 +      lsm_set_blob_size(&needed->lbs_ib, &blob_sizes.lbs_ib);
        /*
         * The inode blob gets an rcu_head in addition to
         * what the modules might need.
                blob_sizes.lbs_inode = sizeof(struct rcu_head);
        lsm_set_blob_size(&needed->lbs_inode, &blob_sizes.lbs_inode);
        lsm_set_blob_size(&needed->lbs_ipc, &blob_sizes.lbs_ipc);
 +      lsm_set_blob_size(&needed->lbs_key, &blob_sizes.lbs_key);
        lsm_set_blob_size(&needed->lbs_msg_msg, &blob_sizes.lbs_msg_msg);
 +      lsm_set_blob_size(&needed->lbs_perf_event, &blob_sizes.lbs_perf_event);
 +      lsm_set_blob_size(&needed->lbs_sock, &blob_sizes.lbs_sock);
        lsm_set_blob_size(&needed->lbs_superblock, &blob_sizes.lbs_superblock);
        lsm_set_blob_size(&needed->lbs_task, &blob_sizes.lbs_task);
 +      lsm_set_blob_size(&needed->lbs_tun_dev, &blob_sizes.lbs_tun_dev);
        lsm_set_blob_size(&needed->lbs_xattr_count,
                          &blob_sizes.lbs_xattr_count);
 +      lsm_set_blob_size(&needed->lbs_bdev, &blob_sizes.lbs_bdev);
  }
  
  /* Prepare LSM for initialization. */
@@@ -321,7 -268,7 +321,7 @@@ static void __init initialize_lsm(struc
   * Current index to use while initializing the lsm id list.
   */
  u32 lsm_active_cnt __ro_after_init;
 -const struct lsm_id *lsm_idlist[LSM_CONFIG_COUNT];
 +const struct lsm_id *lsm_idlist[MAX_LSM_COUNT];
  
  /* Populate ordered LSMs list from comma-separated LSM name list. */
  static void __init ordered_lsm_parse(const char *order, const char *origin)
        kfree(sep);
  }
  
 +static void __init lsm_static_call_init(struct security_hook_list *hl)
 +{
 +      struct lsm_static_call *scall = hl->scalls;
 +      int i;
 +
 +      for (i = 0; i < MAX_LSM_COUNT; i++) {
 +              /* Update the first static call that is not used yet */
 +              if (!scall->hl) {
 +                      __static_call_update(scall->key, scall->trampoline,
 +                                           hl->hook.lsm_func_addr);
 +                      scall->hl = hl;
 +                      static_branch_enable(scall->active);
 +                      return;
 +              }
 +              scall++;
 +      }
 +      panic("%s - Ran out of static slots.\n", __func__);
 +}
 +
  static void __init lsm_early_cred(struct cred *cred);
  static void __init lsm_early_task(struct task_struct *task);
  
@@@ -450,6 -378,9 +450,6 @@@ static void __init ordered_lsm_init(voi
  {
        struct lsm_info **lsm;
  
 -      ordered_lsms = kcalloc(LSM_COUNT + 1, sizeof(*ordered_lsms),
 -                             GFP_KERNEL);
 -
        if (chosen_lsm_order) {
                if (chosen_major_lsm) {
                        pr_warn("security=%s is ignored because it is superseded by lsm=%s\n",
  
        init_debug("cred blob size       = %d\n", blob_sizes.lbs_cred);
        init_debug("file blob size       = %d\n", blob_sizes.lbs_file);
 +      init_debug("ib blob size         = %d\n", blob_sizes.lbs_ib);
        init_debug("inode blob size      = %d\n", blob_sizes.lbs_inode);
        init_debug("ipc blob size        = %d\n", blob_sizes.lbs_ipc);
 +#ifdef CONFIG_KEYS
 +      init_debug("key blob size        = %d\n", blob_sizes.lbs_key);
 +#endif /* CONFIG_KEYS */
        init_debug("msg_msg blob size    = %d\n", blob_sizes.lbs_msg_msg);
 +      init_debug("sock blob size       = %d\n", blob_sizes.lbs_sock);
        init_debug("superblock blob size = %d\n", blob_sizes.lbs_superblock);
 +      init_debug("perf event blob size = %d\n", blob_sizes.lbs_perf_event);
        init_debug("task blob size       = %d\n", blob_sizes.lbs_task);
 +      init_debug("tun device blob size = %d\n", blob_sizes.lbs_tun_dev);
        init_debug("xattr slots          = %d\n", blob_sizes.lbs_xattr_count);
 +      init_debug("bdev blob size       = %d\n", blob_sizes.lbs_bdev);
  
        /*
         * Create any kmem_caches needed for blobs
        lsm_early_task(current);
        for (lsm = ordered_lsms; *lsm; lsm++)
                initialize_lsm(*lsm);
 -
 -      kfree(ordered_lsms);
  }
  
  int __init early_security_init(void)
  {
        struct lsm_info *lsm;
  
 -#define LSM_HOOK(RET, DEFAULT, NAME, ...) \
 -      INIT_HLIST_HEAD(&security_hook_heads.NAME);
 -#include "linux/lsm_hook_defs.h"
 -#undef LSM_HOOK
 -
        for (lsm = __start_early_lsm_info; lsm < __end_early_lsm_info; lsm++) {
                if (!lsm->enabled)
                        lsm->enabled = &lsm_enabled_true;
@@@ -624,14 -554,14 +624,14 @@@ void __init security_add_hooks(struct s
         * Look at the previous entry, if there is one, for duplication.
         */
        if (lsm_active_cnt == 0 || lsm_idlist[lsm_active_cnt - 1] != lsmid) {
 -              if (lsm_active_cnt >= LSM_CONFIG_COUNT)
 +              if (lsm_active_cnt >= MAX_LSM_COUNT)
                        panic("%s Too many LSMs registered.\n", __func__);
                lsm_idlist[lsm_active_cnt++] = lsmid;
        }
  
        for (i = 0; i < count; i++) {
                hooks[i].lsmid = lsmid;
 -              hlist_add_tail_rcu(&hooks[i].list, hooks[i].head);
 +              lsm_static_call_init(&hooks[i]);
        }
  
        /*
@@@ -666,42 -596,27 +666,42 @@@ int unregister_blocking_lsm_notifier(st
  EXPORT_SYMBOL(unregister_blocking_lsm_notifier);
  
  /**
 - * lsm_cred_alloc - allocate a composite cred blob
 - * @cred: the cred that needs a blob
 + * lsm_blob_alloc - allocate a composite blob
 + * @dest: the destination for the blob
 + * @size: the size of the blob
   * @gfp: allocation type
   *
 - * Allocate the cred blob for all the modules
 + * Allocate a blob for all the modules
   *
   * Returns 0, or -ENOMEM if memory can't be allocated.
   */
 -static int lsm_cred_alloc(struct cred *cred, gfp_t gfp)
 +static int lsm_blob_alloc(void **dest, size_t size, gfp_t gfp)
  {
 -      if (blob_sizes.lbs_cred == 0) {
 -              cred->security = NULL;
 +      if (size == 0) {
 +              *dest = NULL;
                return 0;
        }
  
 -      cred->security = kzalloc(blob_sizes.lbs_cred, gfp);
 -      if (cred->security == NULL)
 +      *dest = kzalloc(size, gfp);
 +      if (*dest == NULL)
                return -ENOMEM;
        return 0;
  }
  
 +/**
 + * lsm_cred_alloc - allocate a composite cred blob
 + * @cred: the cred that needs a blob
 + * @gfp: allocation type
 + *
 + * Allocate the cred blob for all the modules
 + *
 + * Returns 0, or -ENOMEM if memory can't be allocated.
 + */
 +static int lsm_cred_alloc(struct cred *cred, gfp_t gfp)
 +{
 +      return lsm_blob_alloc(&cred->security, blob_sizes.lbs_cred, gfp);
 +}
 +
  /**
   * lsm_early_cred - during initialization allocate a composite cred blob
   * @cred: the cred that needs a blob
@@@ -745,7 -660,7 +745,7 @@@ static int lsm_file_alloc(struct file *
   *
   * Returns 0, or -ENOMEM if memory can't be allocated.
   */
 -int lsm_inode_alloc(struct inode *inode)
 +static int lsm_inode_alloc(struct inode *inode)
  {
        if (!lsm_inode_cache) {
                inode->i_security = NULL;
   */
  static int lsm_task_alloc(struct task_struct *task)
  {
 -      if (blob_sizes.lbs_task == 0) {
 -              task->security = NULL;
 -              return 0;
 -      }
 -
 -      task->security = kzalloc(blob_sizes.lbs_task, GFP_KERNEL);
 -      if (task->security == NULL)
 -              return -ENOMEM;
 -      return 0;
 +      return lsm_blob_alloc(&task->security, blob_sizes.lbs_task, GFP_KERNEL);
  }
  
  /**
   */
  static int lsm_ipc_alloc(struct kern_ipc_perm *kip)
  {
 -      if (blob_sizes.lbs_ipc == 0) {
 -              kip->security = NULL;
 -              return 0;
 -      }
 +      return lsm_blob_alloc(&kip->security, blob_sizes.lbs_ipc, GFP_KERNEL);
 +}
  
 -      kip->security = kzalloc(blob_sizes.lbs_ipc, GFP_KERNEL);
 -      if (kip->security == NULL)
 -              return -ENOMEM;
 -      return 0;
 +#ifdef CONFIG_KEYS
 +/**
 + * lsm_key_alloc - allocate a composite key blob
 + * @key: the key that needs a blob
 + *
 + * Allocate the key blob for all the modules
 + *
 + * Returns 0, or -ENOMEM if memory can't be allocated.
 + */
 +static int lsm_key_alloc(struct key *key)
 +{
 +      return lsm_blob_alloc(&key->security, blob_sizes.lbs_key, GFP_KERNEL);
  }
 +#endif /* CONFIG_KEYS */
  
  /**
   * lsm_msg_msg_alloc - allocate a composite msg_msg blob
   */
  static int lsm_msg_msg_alloc(struct msg_msg *mp)
  {
 -      if (blob_sizes.lbs_msg_msg == 0) {
 -              mp->security = NULL;
 +      return lsm_blob_alloc(&mp->security, blob_sizes.lbs_msg_msg,
 +                            GFP_KERNEL);
 +}
 +
 +/**
 + * lsm_bdev_alloc - allocate a composite block_device blob
 + * @bdev: the block_device that needs a blob
 + *
 + * Allocate the block_device blob for all the modules
 + *
 + * Returns 0, or -ENOMEM if memory can't be allocated.
 + */
 +static int lsm_bdev_alloc(struct block_device *bdev)
 +{
 +      if (blob_sizes.lbs_bdev == 0) {
 +              bdev->bd_security = NULL;
                return 0;
        }
  
 -      mp->security = kzalloc(blob_sizes.lbs_msg_msg, GFP_KERNEL);
 -      if (mp->security == NULL)
 +      bdev->bd_security = kzalloc(blob_sizes.lbs_bdev, GFP_KERNEL);
 +      if (!bdev->bd_security)
                return -ENOMEM;
 +
        return 0;
  }
  
@@@ -859,8 -760,15 +859,8 @@@ static void __init lsm_early_task(struc
   */
  static int lsm_superblock_alloc(struct super_block *sb)
  {
 -      if (blob_sizes.lbs_superblock == 0) {
 -              sb->s_security = NULL;
 -              return 0;
 -      }
 -
 -      sb->s_security = kzalloc(blob_sizes.lbs_superblock, GFP_KERNEL);
 -      if (sb->s_security == NULL)
 -              return -ENOMEM;
 -      return 0;
 +      return lsm_blob_alloc(&sb->s_security, blob_sizes.lbs_superblock,
 +                            GFP_KERNEL);
  }
  
  /**
@@@ -945,43 -853,29 +945,43 @@@ out
   * call_int_hook:
   *    This is a hook that returns a value.
   */
 +#define __CALL_STATIC_VOID(NUM, HOOK, ...)                                 \
 +do {                                                                       \
 +      if (static_branch_unlikely(&SECURITY_HOOK_ACTIVE_KEY(HOOK, NUM))) {    \
 +              static_call(LSM_STATIC_CALL(HOOK, NUM))(__VA_ARGS__);        \
 +      }                                                                    \
 +} while (0);
  
 -#define call_void_hook(FUNC, ...)                             \
 -      do {                                                    \
 -              struct security_hook_list *P;                   \
 -                                                              \
 -              hlist_for_each_entry(P, &security_hook_heads.FUNC, list) \
 -                      P->hook.FUNC(__VA_ARGS__);              \
 +#define call_void_hook(HOOK, ...)                                 \
 +      do {                                                      \
 +              LSM_LOOP_UNROLL(__CALL_STATIC_VOID, HOOK, __VA_ARGS__); \
        } while (0)
  
 -#define call_int_hook(FUNC, ...) ({                           \
 -      int RC = LSM_RET_DEFAULT(FUNC);                         \
 -      do {                                                    \
 -              struct security_hook_list *P;                   \
 -                                                              \
 -              hlist_for_each_entry(P, &security_hook_heads.FUNC, list) { \
 -                      RC = P->hook.FUNC(__VA_ARGS__);         \
 -                      if (RC != LSM_RET_DEFAULT(FUNC))        \
 -                              break;                          \
 -              }                                               \
 -      } while (0);                                            \
 -      RC;                                                     \
 +
 +#define __CALL_STATIC_INT(NUM, R, HOOK, LABEL, ...)                        \
 +do {                                                                       \
 +      if (static_branch_unlikely(&SECURITY_HOOK_ACTIVE_KEY(HOOK, NUM))) {  \
 +              R = static_call(LSM_STATIC_CALL(HOOK, NUM))(__VA_ARGS__);    \
 +              if (R != LSM_RET_DEFAULT(HOOK))                              \
 +                      goto LABEL;                                          \
 +      }                                                                    \
 +} while (0);
 +
 +#define call_int_hook(HOOK, ...)                                      \
 +({                                                                    \
 +      __label__ OUT;                                                  \
 +      int RC = LSM_RET_DEFAULT(HOOK);                                 \
 +                                                                      \
 +      LSM_LOOP_UNROLL(__CALL_STATIC_INT, RC, HOOK, OUT, __VA_ARGS__); \
 +OUT:                                                                  \
 +      RC;                                                             \
  })
  
 +#define lsm_for_each_hook(scall, NAME)                                        \
 +      for (scall = static_calls_table.NAME;                           \
 +           scall - static_calls_table.NAME < MAX_LSM_COUNT; scall++)  \
 +              if (static_key_enabled(&scall->active->key))
 +
  /* Security operations */
  
  /**
@@@ -1216,19 -1110,20 +1216,19 @@@ int security_settime64(const struct tim
   */
  int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
  {
 -      struct security_hook_list *hp;
 +      struct lsm_static_call *scall;
        int cap_sys_admin = 1;
        int rc;
  
        /*
 -       * The module will respond with a positive value if
 -       * it thinks the __vm_enough_memory() call should be
 -       * made with the cap_sys_admin set. If all of the modules
 -       * agree that it should be set it will. If any module
 -       * thinks it should not be set it won't.
 +       * The module will respond with 0 if it thinks the __vm_enough_memory()
 +       * call should be made with the cap_sys_admin set. If all of the modules
 +       * agree that it should be set it will. If any module thinks it should
 +       * not be set it won't.
         */
 -      hlist_for_each_entry(hp, &security_hook_heads.vm_enough_memory, list) {
 -              rc = hp->hook.vm_enough_memory(mm, pages);
 -              if (rc <= 0) {
 +      lsm_for_each_hook(scall, vm_enough_memory) {
 +              rc = scall->hl->hook.vm_enough_memory(mm, pages);
 +              if (rc < 0) {
                        cap_sys_admin = 0;
                        break;
                }
@@@ -1374,12 -1269,13 +1374,12 @@@ int security_fs_context_dup(struct fs_c
  int security_fs_context_parse_param(struct fs_context *fc,
                                    struct fs_parameter *param)
  {
 -      struct security_hook_list *hp;
 +      struct lsm_static_call *scall;
        int trc;
        int rc = -ENOPARAM;
  
 -      hlist_for_each_entry(hp, &security_hook_heads.fs_context_parse_param,
 -                           list) {
 -              trc = hp->hook.fs_context_parse_param(fc, param);
 +      lsm_for_each_hook(scall, fs_context_parse_param) {
 +              trc = scall->hl->hook.fs_context_parse_param(fc, param);
                if (trc == 0)
                        rc = 0;
                else if (trc != -ENOPARAM)
@@@ -1609,11 -1505,12 +1609,11 @@@ int security_sb_set_mnt_opts(struct sup
                             unsigned long kern_flags,
                             unsigned long *set_kern_flags)
  {
 -      struct security_hook_list *hp;
 +      struct lsm_static_call *scall;
        int rc = mnt_opts ? -EOPNOTSUPP : LSM_RET_DEFAULT(sb_set_mnt_opts);
  
 -      hlist_for_each_entry(hp, &security_hook_heads.sb_set_mnt_opts,
 -                           list) {
 -              rc = hp->hook.sb_set_mnt_opts(sb, mnt_opts, kern_flags,
 +      lsm_for_each_hook(scall, sb_set_mnt_opts) {
 +              rc = scall->hl->hook.sb_set_mnt_opts(sb, mnt_opts, kern_flags,
                                              set_kern_flags);
                if (rc != LSM_RET_DEFAULT(sb_set_mnt_opts))
                        break;
@@@ -1699,8 -1596,9 +1699,8 @@@ int security_inode_alloc(struct inode *
  
  static void inode_free_by_rcu(struct rcu_head *head)
  {
 -      /*
 -       * The rcu head is at the start of the inode blob
 -       */
 +      /* The rcu head is at the start of the inode blob */
 +      call_void_hook(inode_free_security_rcu, head);
        kmem_cache_free(lsm_inode_cache, head);
  }
  
   * security_inode_free() - Free an inode's LSM blob
   * @inode: the inode
   *
 - * Deallocate the inode security structure and set @inode->i_security to NULL.
 + * Release any LSM resources associated with @inode, although due to the
 + * inode's RCU protections it is possible that the resources will not be
 + * fully released until after the current RCU grace period has elapsed.
 + *
 + * It is important for LSMs to note that despite being present in a call to
 + * security_inode_free(), @inode may still be referenced in a VFS path walk
 + * and calls to security_inode_permission() may be made during, or after,
 + * a call to security_inode_free().  For this reason the inode->i_security
 + * field is released via a call_rcu() callback and any LSMs which need to
 + * retain inode state for use in security_inode_permission() should only
 + * release that state in the inode_free_security_rcu() LSM hook callback.
   */
  void security_inode_free(struct inode *inode)
  {
        call_void_hook(inode_free_security, inode);
 -      /*
 -       * The inode may still be referenced in a path walk and
 -       * a call to security_inode_permission() can be made
 -       * after inode_free_security() is called. Ideally, the VFS
 -       * wouldn't do this, but fixing that is a much harder
 -       * job. For now, simply free the i_security via RCU, and
 -       * leave the current inode->i_security pointer intact.
 -       * The inode will be freed after the RCU grace period too.
 -       */
 -      if (inode->i_security)
 -              call_rcu((struct rcu_head *)inode->i_security,
 -                       inode_free_by_rcu);
 +      if (!inode->i_security)
 +              return;
 +      call_rcu((struct rcu_head *)inode->i_security, inode_free_by_rcu);
  }
  
  /**
@@@ -1808,7 -1705,7 +1808,7 @@@ int security_inode_init_security(struc
                                 const struct qstr *qstr,
                                 const initxattrs initxattrs, void *fs_data)
  {
 -      struct security_hook_list *hp;
 +      struct lsm_static_call *scall;
        struct xattr *new_xattrs = NULL;
        int ret = -EOPNOTSUPP, xattr_count = 0;
  
                        return -ENOMEM;
        }
  
 -      hlist_for_each_entry(hp, &security_hook_heads.inode_init_security,
 -                           list) {
 -              ret = hp->hook.inode_init_security(inode, dir, qstr, new_xattrs,
 +      lsm_for_each_hook(scall, inode_init_security) {
 +              ret = scall->hl->hook.inode_init_security(inode, dir, qstr, new_xattrs,
                                                  &xattr_count);
                if (ret && ret != -EOPNOTSUPP)
                        goto out;
@@@ -2763,14 -2661,19 +2763,14 @@@ EXPORT_SYMBOL(security_inode_copy_up)
   * lower layer to the union/overlay layer.   The caller is responsible for
   * reading and writing the xattrs, this hook is merely a filter.
   *
 - * Return: Returns 0 to accept the xattr, 1 to discard the xattr, -EOPNOTSUPP
 - *         if the security module does not know about attribute, or a negative
 - *         error code to abort the copy up.
 + * Return: Returns 0 to accept the xattr, -ECANCELED to discard the xattr,
 + *         -EOPNOTSUPP if the security module does not know about attribute,
 + *         or a negative error code to abort the copy up.
   */
  int security_inode_copy_up_xattr(struct dentry *src, const char *name)
  {
        int rc;
  
 -      /*
 -       * The implementation can return 0 (accept the xattr), 1 (discard the
 -       * xattr), -EOPNOTSUPP if it does not know anything about the xattr or
 -       * any other error code in case of an error.
 -       */
        rc = call_int_hook(inode_copy_up_xattr, src, name);
        if (rc != LSM_RET_DEFAULT(inode_copy_up_xattr))
                return rc;
  }
  EXPORT_SYMBOL(security_inode_copy_up_xattr);
  
 +/**
 + * security_inode_setintegrity() - Set the inode's integrity data
 + * @inode: inode
 + * @type: type of integrity, e.g. hash digest, signature, etc
 + * @value: the integrity value
 + * @size: size of the integrity value
 + *
 + * Register a verified integrity measurement of a inode with LSMs.
 + * LSMs should free the previously saved data if @value is NULL.
 + *
 + * Return: Returns 0 on success, negative values on failure.
 + */
 +int security_inode_setintegrity(const struct inode *inode,
 +                              enum lsm_integrity_type type, const void *value,
 +                              size_t size)
 +{
 +      return call_int_hook(inode_setintegrity, inode, type, value, size);
 +}
 +EXPORT_SYMBOL(security_inode_setintegrity);
 +
  /**
   * security_kernfs_init_security() - Init LSM context for a kernfs node
   * @kn_dir: parent kernfs node
@@@ -3048,8 -2931,6 +3048,8 @@@ int security_file_fcntl(struct file *fi
   * Save owner security information (typically from current->security) in
   * file->f_security for later use by the send_sigiotask hook.
   *
 + * This hook is called with file->f_owner.lock held.
 + *
   * Return: Returns 0 on success.
   */
  void security_file_set_fowner(struct file *file)
@@@ -3676,10 -3557,10 +3676,10 @@@ int security_task_prctl(int option, uns
  {
        int thisrc;
        int rc = LSM_RET_DEFAULT(task_prctl);
 -      struct security_hook_list *hp;
 +      struct lsm_static_call *scall;
  
 -      hlist_for_each_entry(hp, &security_hook_heads.task_prctl, list) {
 -              thisrc = hp->hook.task_prctl(option, arg2, arg3, arg4, arg5);
 +      lsm_for_each_hook(scall, task_prctl) {
 +              thisrc = scall->hl->hook.task_prctl(option, arg2, arg3, arg4, arg5);
                if (thisrc != LSM_RET_DEFAULT(task_prctl)) {
                        rc = thisrc;
                        if (thisrc != 0)
@@@ -4085,7 -3966,7 +4085,7 @@@ EXPORT_SYMBOL(security_d_instantiate)
  int security_getselfattr(unsigned int attr, struct lsm_ctx __user *uctx,
                         u32 __user *size, u32 flags)
  {
 -      struct security_hook_list *hp;
 +      struct lsm_static_call *scall;
        struct lsm_ctx lctx = { .id = LSM_ID_UNDEF, };
        u8 __user *base = (u8 __user *)uctx;
        u32 entrysize;
         * In the usual case gather all the data from the LSMs.
         * In the single case only get the data from the LSM specified.
         */
 -      hlist_for_each_entry(hp, &security_hook_heads.getselfattr, list) {
 -              if (single && lctx.id != hp->lsmid->id)
 +      lsm_for_each_hook(scall, getselfattr) {
 +              if (single && lctx.id != scall->hl->lsmid->id)
                        continue;
                entrysize = left;
                if (base)
                        uctx = (struct lsm_ctx __user *)(base + total);
 -              rc = hp->hook.getselfattr(attr, uctx, &entrysize, flags);
 +              rc = scall->hl->hook.getselfattr(attr, uctx, &entrysize, flags);
                if (rc == -EOPNOTSUPP) {
                        rc = 0;
                        continue;
  int security_setselfattr(unsigned int attr, struct lsm_ctx __user *uctx,
                         u32 size, u32 flags)
  {
 -      struct security_hook_list *hp;
 +      struct lsm_static_call *scall;
        struct lsm_ctx *lctx;
        int rc = LSM_RET_DEFAULT(setselfattr);
        u64 required_len;
                goto free_out;
        }
  
 -      hlist_for_each_entry(hp, &security_hook_heads.setselfattr, list)
 -              if ((hp->lsmid->id) == lctx->id) {
 -                      rc = hp->hook.setselfattr(attr, lctx, size, flags);
 +      lsm_for_each_hook(scall, setselfattr)
 +              if ((scall->hl->lsmid->id) == lctx->id) {
 +                      rc = scall->hl->hook.setselfattr(attr, lctx, size, flags);
                        break;
                }
  
@@@ -4226,12 -4107,12 +4226,12 @@@ free_out
  int security_getprocattr(struct task_struct *p, int lsmid, const char *name,
                         char **value)
  {
 -      struct security_hook_list *hp;
 +      struct lsm_static_call *scall;
  
 -      hlist_for_each_entry(hp, &security_hook_heads.getprocattr, list) {
 -              if (lsmid != 0 && lsmid != hp->lsmid->id)
 +      lsm_for_each_hook(scall, getprocattr) {
 +              if (lsmid != 0 && lsmid != scall->hl->lsmid->id)
                        continue;
 -              return hp->hook.getprocattr(p, name, value);
 +              return scall->hl->hook.getprocattr(p, name, value);
        }
        return LSM_RET_DEFAULT(getprocattr);
  }
   */
  int security_setprocattr(int lsmid, const char *name, void *value, size_t size)
  {
 -      struct security_hook_list *hp;
 +      struct lsm_static_call *scall;
  
 -      hlist_for_each_entry(hp, &security_hook_heads.setprocattr, list) {
 -              if (lsmid != 0 && lsmid != hp->lsmid->id)
 +      lsm_for_each_hook(scall, setprocattr) {
 +              if (lsmid != 0 && lsmid != scall->hl->lsmid->id)
                        continue;
 -              return hp->hook.setprocattr(name, value, size);
 +              return scall->hl->hook.setprocattr(name, value, size);
        }
        return LSM_RET_DEFAULT(setprocattr);
  }
@@@ -4792,20 -4673,6 +4792,20 @@@ int security_socket_getpeersec_dgram(st
  }
  EXPORT_SYMBOL(security_socket_getpeersec_dgram);
  
 +/**
 + * lsm_sock_alloc - allocate a composite sock blob
 + * @sock: the sock that needs a blob
 + * @gfp: allocation mode
 + *
 + * Allocate the sock blob for all the modules
 + *
 + * Returns 0, or -ENOMEM if memory can't be allocated.
 + */
 +static int lsm_sock_alloc(struct sock *sock, gfp_t gfp)
 +{
 +      return lsm_blob_alloc(&sock->sk_security, blob_sizes.lbs_sock, gfp);
 +}
 +
  /**
   * security_sk_alloc() - Allocate and initialize a sock's LSM blob
   * @sk: sock
   */
  int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
  {
 -      return call_int_hook(sk_alloc_security, sk, family, priority);
 +      int rc = lsm_sock_alloc(sk, priority);
 +
 +      if (unlikely(rc))
 +              return rc;
 +      rc = call_int_hook(sk_alloc_security, sk, family, priority);
 +      if (unlikely(rc))
 +              security_sk_free(sk);
 +      return rc;
  }
  
  /**
  void security_sk_free(struct sock *sk)
  {
        call_void_hook(sk_free_security, sk);
 +      kfree(sk->sk_security);
 +      sk->sk_security = NULL;
  }
  
  /**
@@@ -4987,18 -4845,7 +4987,18 @@@ EXPORT_SYMBOL(security_secmark_refcount
   */
  int security_tun_dev_alloc_security(void **security)
  {
 -      return call_int_hook(tun_dev_alloc_security, security);
 +      int rc;
 +
 +      rc = lsm_blob_alloc(security, blob_sizes.lbs_tun_dev, GFP_KERNEL);
 +      if (rc)
 +              return rc;
 +
 +      rc = call_int_hook(tun_dev_alloc_security, *security);
 +      if (rc) {
 +              kfree(*security);
 +              *security = NULL;
 +      }
 +      return rc;
  }
  EXPORT_SYMBOL(security_tun_dev_alloc_security);
  
   */
  void security_tun_dev_free_security(void *security)
  {
 -      call_void_hook(tun_dev_free_security, security);
 +      kfree(security);
  }
  EXPORT_SYMBOL(security_tun_dev_free_security);
  
@@@ -5206,18 -5053,7 +5206,18 @@@ EXPORT_SYMBOL(security_ib_endport_manag
   */
  int security_ib_alloc_security(void **sec)
  {
 -      return call_int_hook(ib_alloc_security, sec);
 +      int rc;
 +
 +      rc = lsm_blob_alloc(sec, blob_sizes.lbs_ib, GFP_KERNEL);
 +      if (rc)
 +              return rc;
 +
 +      rc = call_int_hook(ib_alloc_security, *sec);
 +      if (rc) {
 +              kfree(*sec);
 +              *sec = NULL;
 +      }
 +      return rc;
  }
  EXPORT_SYMBOL(security_ib_alloc_security);
  
   */
  void security_ib_free_security(void *sec)
  {
 -      call_void_hook(ib_free_security, sec);
 +      kfree(sec);
  }
  EXPORT_SYMBOL(security_ib_free_security);
  #endif        /* CONFIG_SECURITY_INFINIBAND */
@@@ -5387,7 -5223,7 +5387,7 @@@ int security_xfrm_state_pol_flow_match(
                                       struct xfrm_policy *xp,
                                       const struct flowi_common *flic)
  {
 -      struct security_hook_list *hp;
 +      struct lsm_static_call *scall;
        int rc = LSM_RET_DEFAULT(xfrm_state_pol_flow_match);
  
        /*
         * For speed optimization, we explicitly break the loop rather than
         * using the macro
         */
 -      hlist_for_each_entry(hp, &security_hook_heads.xfrm_state_pol_flow_match,
 -                           list) {
 -              rc = hp->hook.xfrm_state_pol_flow_match(x, xp, flic);
 +      lsm_for_each_hook(scall, xfrm_state_pol_flow_match) {
 +              rc = scall->hl->hook.xfrm_state_pol_flow_match(x, xp, flic);
                break;
        }
        return rc;
@@@ -5445,14 -5282,7 +5445,14 @@@ EXPORT_SYMBOL(security_skb_classify_flo
  int security_key_alloc(struct key *key, const struct cred *cred,
                       unsigned long flags)
  {
 -      return call_int_hook(key_alloc, key, cred, flags);
 +      int rc = lsm_key_alloc(key);
 +
 +      if (unlikely(rc))
 +              return rc;
 +      rc = call_int_hook(key_alloc, key, cred, flags);
 +      if (unlikely(rc))
 +              security_key_free(key);
 +      return rc;
  }
  
  /**
   */
  void security_key_free(struct key *key)
  {
 -      call_void_hook(key_free, key);
 +      kfree(key->security);
 +      key->security = NULL;
  }
  
  /**
@@@ -5681,7 -5510,7 +5681,7 @@@ int security_bpf_prog_load(struct bpf_p
   * Return: Returns 0 on success, error on failure.
   */
  int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
-                             struct path *path)
+                             const struct path *path)
  {
        return call_int_hook(bpf_token_create, token, attr, path);
  }
@@@ -5767,85 -5596,6 +5767,85 @@@ int security_locked_down(enum lockdown_
  }
  EXPORT_SYMBOL(security_locked_down);
  
 +/**
 + * security_bdev_alloc() - Allocate a block device LSM blob
 + * @bdev: block device
 + *
 + * Allocate and attach a security structure to @bdev->bd_security.  The
 + * security field is initialized to NULL when the bdev structure is
 + * allocated.
 + *
 + * Return: Return 0 if operation was successful.
 + */
 +int security_bdev_alloc(struct block_device *bdev)
 +{
 +      int rc = 0;
 +
 +      rc = lsm_bdev_alloc(bdev);
 +      if (unlikely(rc))
 +              return rc;
 +
 +      rc = call_int_hook(bdev_alloc_security, bdev);
 +      if (unlikely(rc))
 +              security_bdev_free(bdev);
 +
 +      return rc;
 +}
 +EXPORT_SYMBOL(security_bdev_alloc);
 +
 +/**
 + * security_bdev_free() - Free a block device's LSM blob
 + * @bdev: block device
 + *
 + * Deallocate the bdev security structure and set @bdev->bd_security to NULL.
 + */
 +void security_bdev_free(struct block_device *bdev)
 +{
 +      if (!bdev->bd_security)
 +              return;
 +
 +      call_void_hook(bdev_free_security, bdev);
 +
 +      kfree(bdev->bd_security);
 +      bdev->bd_security = NULL;
 +}
 +EXPORT_SYMBOL(security_bdev_free);
 +
 +/**
 + * security_bdev_setintegrity() - Set the device's integrity data
 + * @bdev: block device
 + * @type: type of integrity, e.g. hash digest, signature, etc
 + * @value: the integrity value
 + * @size: size of the integrity value
 + *
 + * Register a verified integrity measurement of a bdev with LSMs.
 + * LSMs should free the previously saved data if @value is NULL.
 + * Please note that the new hook should be invoked every time the security
 + * information is updated to keep these data current. For example, in dm-verity,
 + * if the mapping table is reloaded and configured to use a different dm-verity
 + * target with a new roothash and signing information, the previously stored
 + * data in the LSM blob will become obsolete. It is crucial to re-invoke the
 + * hook to refresh these data and ensure they are up to date. This necessity
 + * arises from the design of device-mapper, where a device-mapper device is
 + * first created, and then targets are subsequently loaded into it. These
 + * targets can be modified multiple times during the device's lifetime.
 + * Therefore, while the LSM blob is allocated during the creation of the block
 + * device, its actual contents are not initialized at this stage and can change
 + * substantially over time. This includes alterations from data that the LSMs
 + * 'trusts' to those they do not, making it essential to handle these changes
 + * correctly. Failure to address this dynamic aspect could potentially allow
 + * for bypassing LSM checks.
 + *
 + * Return: Returns 0 on success, negative values on failure.
 + */
 +int security_bdev_setintegrity(struct block_device *bdev,
 +                             enum lsm_integrity_type type, const void *value,
 +                             size_t size)
 +{
 +      return call_int_hook(bdev_setintegrity, bdev, type, value, size);
 +}
 +EXPORT_SYMBOL(security_bdev_setintegrity);
 +
  #ifdef CONFIG_PERF_EVENTS
  /**
   * security_perf_event_open() - Check if a perf event open is allowed
@@@ -5871,19 -5621,7 +5871,19 @@@ int security_perf_event_open(struct per
   */
  int security_perf_event_alloc(struct perf_event *event)
  {
 -      return call_int_hook(perf_event_alloc, event);
 +      int rc;
 +
 +      rc = lsm_blob_alloc(&event->security, blob_sizes.lbs_perf_event,
 +                          GFP_KERNEL);
 +      if (rc)
 +              return rc;
 +
 +      rc = call_int_hook(perf_event_alloc, event);
 +      if (rc) {
 +              kfree(event->security);
 +              event->security = NULL;
 +      }
 +      return rc;
  }
  
  /**
   */
  void security_perf_event_free(struct perf_event *event)
  {
 -      call_void_hook(perf_event_free, event);
 +      kfree(event->security);
 +      event->security = NULL;
  }
  
  /**
@@@ -5966,13 -5703,3 +5966,13 @@@ int security_uring_cmd(struct io_uring_
        return call_int_hook(uring_cmd, ioucmd);
  }
  #endif /* CONFIG_IO_URING */
 +
 +/**
 + * security_initramfs_populated() - Notify LSMs that initramfs has been loaded
 + *
 + * Tells the LSMs the initramfs has been unpacked into the rootfs.
 + */
 +void security_initramfs_populated(void)
 +{
 +      call_void_hook(initramfs_populated);
 +}
diff --combined security/selinux/hooks.c
index 94c52314012593a057ad659d028ac49a44ae754f,0eec141a8f37e0c04ed2cbb0b2c242c59a87040c..fc926d3cac6e25e9411f918eb341c35a52504999
@@@ -282,13 -282,8 +282,13 @@@ static int __inode_security_revalidate(
  
        might_sleep_if(may_sleep);
  
 +      /*
 +       * The check of isec->initialized below is racy but
 +       * inode_doinit_with_dentry() will recheck with
 +       * isec->lock held.
 +       */
        if (selinux_initialized() &&
 -          isec->initialized != LABEL_INITIALIZED) {
 +          data_race(isec->initialized != LABEL_INITIALIZED)) {
                if (!may_sleep)
                        return -ECHILD;
  
@@@ -2207,16 -2202,23 +2207,16 @@@ static int selinux_syslog(int type
  }
  
  /*
 - * Check that a process has enough memory to allocate a new virtual
 - * mapping. 0 means there is enough memory for the allocation to
 - * succeed and -ENOMEM implies there is not.
 + * Check permission for allocating a new virtual mapping. Returns
 + * 0 if permission is granted, negative error code if not.
   *
   * Do not audit the selinux permission check, as this is applied to all
   * processes that allocate mappings.
   */
  static int selinux_vm_enough_memory(struct mm_struct *mm, long pages)
  {
 -      int rc, cap_sys_admin = 0;
 -
 -      rc = cred_has_capability(current_cred(), CAP_SYS_ADMIN,
 -                               CAP_OPT_NOAUDIT, true);
 -      if (rc == 0)
 -              cap_sys_admin = 1;
 -
 -      return cap_sys_admin;
 +      return cred_has_capability(current_cred(), CAP_SYS_ADMIN,
 +                                 CAP_OPT_NOAUDIT, true);
  }
  
  /* binprm security operations */
@@@ -3536,8 -3538,8 +3536,8 @@@ static int selinux_inode_copy_up_xattr(
         * xattrs up.  Instead, filter out SELinux-related xattrs following
         * policy load.
         */
 -      if (selinux_initialized() && strcmp(name, XATTR_NAME_SELINUX) == 0)
 -              return 1; /* Discard */
 +      if (selinux_initialized() && !strcmp(name, XATTR_NAME_SELINUX))
 +              return -ECANCELED; /* Discard */
        /*
         * Any other attribute apart from SELINUX is not claimed, supported
         * by selinux.
@@@ -3850,17 -3852,7 +3850,17 @@@ static int selinux_file_mprotect(struc
        if (default_noexec &&
            (prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) {
                int rc = 0;
 -              if (vma_is_initial_heap(vma)) {
 +              /*
 +               * We don't use the vma_is_initial_heap() helper as it has
 +               * a history of problems and is currently broken on systems
 +               * where there is no heap, e.g. brk == start_brk.  Before
 +               * replacing the conditional below with vma_is_initial_heap(),
 +               * or something similar, please ensure that the logic is the
 +               * same as what we have below or you have tested every possible
 +               * corner case you can think to test.
 +               */
 +              if (vma->vm_start >= vma->vm_mm->start_brk &&
 +                  vma->vm_end <= vma->vm_mm->brk) {
                        rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
                                          PROCESS__EXECHEAP, NULL);
                } else if (!vma->vm_file && (vma_is_initial_stack(vma) ||
@@@ -3948,7 -3940,7 +3948,7 @@@ static int selinux_file_send_sigiotask(
        struct file_security_struct *fsec;
  
        /* struct fown_struct is never outside the context of a struct file */
 -      file = container_of(fown, struct file, f_owner);
 +      file = fown->file;
  
        fsec = selinux_file(file);
  
@@@ -4592,7 -4584,7 +4592,7 @@@ static int socket_sockcreate_sid(const 
  
  static int sock_has_perm(struct sock *sk, u32 perms)
  {
 -      struct sk_security_struct *sksec = sk->sk_security;
 +      struct sk_security_struct *sksec = selinux_sock(sk);
        struct common_audit_data ad;
        struct lsm_network_audit net;
  
@@@ -4660,7 -4652,7 +4660,7 @@@ static int selinux_socket_post_create(s
        isec->initialized = LABEL_INITIALIZED;
  
        if (sock->sk) {
 -              sksec = sock->sk->sk_security;
 +              sksec = selinux_sock(sock->sk);
                sksec->sclass = sclass;
                sksec->sid = sid;
                /* Allows detection of the first association on this socket */
  static int selinux_socket_socketpair(struct socket *socka,
                                     struct socket *sockb)
  {
 -      struct sk_security_struct *sksec_a = socka->sk->sk_security;
 -      struct sk_security_struct *sksec_b = sockb->sk->sk_security;
 +      struct sk_security_struct *sksec_a = selinux_sock(socka->sk);
 +      struct sk_security_struct *sksec_b = selinux_sock(sockb->sk);
  
        sksec_a->peer_sid = sksec_b->sid;
        sksec_b->peer_sid = sksec_a->sid;
  static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen)
  {
        struct sock *sk = sock->sk;
 -      struct sk_security_struct *sksec = sk->sk_security;
 +      struct sk_security_struct *sksec = selinux_sock(sk);
        u16 family;
        int err;
  
@@@ -4832,7 -4824,7 +4832,7 @@@ static int selinux_socket_connect_helpe
                                         struct sockaddr *address, int addrlen)
  {
        struct sock *sk = sock->sk;
 -      struct sk_security_struct *sksec = sk->sk_security;
 +      struct sk_security_struct *sksec = selinux_sock(sk);
        int err;
  
        err = sock_has_perm(sk, SOCKET__CONNECT);
@@@ -5010,9 -5002,9 +5010,9 @@@ static int selinux_socket_unix_stream_c
                                              struct sock *other,
                                              struct sock *newsk)
  {
 -      struct sk_security_struct *sksec_sock = sock->sk_security;
 -      struct sk_security_struct *sksec_other = other->sk_security;
 -      struct sk_security_struct *sksec_new = newsk->sk_security;
 +      struct sk_security_struct *sksec_sock = selinux_sock(sock);
 +      struct sk_security_struct *sksec_other = selinux_sock(other);
 +      struct sk_security_struct *sksec_new = selinux_sock(newsk);
        struct common_audit_data ad;
        struct lsm_network_audit net;
        int err;
  static int selinux_socket_unix_may_send(struct socket *sock,
                                        struct socket *other)
  {
 -      struct sk_security_struct *ssec = sock->sk->sk_security;
 -      struct sk_security_struct *osec = other->sk->sk_security;
 +      struct sk_security_struct *ssec = selinux_sock(sock->sk);
 +      struct sk_security_struct *osec = selinux_sock(other->sk);
        struct common_audit_data ad;
        struct lsm_network_audit net;
  
@@@ -5079,7 -5071,7 +5079,7 @@@ static int selinux_sock_rcv_skb_compat(
                                       u16 family)
  {
        int err = 0;
 -      struct sk_security_struct *sksec = sk->sk_security;
 +      struct sk_security_struct *sksec = selinux_sock(sk);
        u32 sk_sid = sksec->sid;
        struct common_audit_data ad;
        struct lsm_network_audit net;
  static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
  {
        int err, peerlbl_active, secmark_active;
 -      struct sk_security_struct *sksec = sk->sk_security;
 +      struct sk_security_struct *sksec = selinux_sock(sk);
        u16 family = sk->sk_family;
        u32 sk_sid = sksec->sid;
        struct common_audit_data ad;
@@@ -5176,7 -5168,7 +5176,7 @@@ static int selinux_socket_getpeersec_st
        int err = 0;
        char *scontext = NULL;
        u32 scontext_len;
 -      struct sk_security_struct *sksec = sock->sk->sk_security;
 +      struct sk_security_struct *sksec = selinux_sock(sock->sk);
        u32 peer_sid = SECSID_NULL;
  
        if (sksec->sclass == SECCLASS_UNIX_STREAM_SOCKET ||
@@@ -5236,27 -5228,34 +5236,27 @@@ static int selinux_socket_getpeersec_dg
  
  static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority)
  {
 -      struct sk_security_struct *sksec;
 -
 -      sksec = kzalloc(sizeof(*sksec), priority);
 -      if (!sksec)
 -              return -ENOMEM;
 +      struct sk_security_struct *sksec = selinux_sock(sk);
  
        sksec->peer_sid = SECINITSID_UNLABELED;
        sksec->sid = SECINITSID_UNLABELED;
        sksec->sclass = SECCLASS_SOCKET;
        selinux_netlbl_sk_security_reset(sksec);
 -      sk->sk_security = sksec;
  
        return 0;
  }
  
  static void selinux_sk_free_security(struct sock *sk)
  {
 -      struct sk_security_struct *sksec = sk->sk_security;
 +      struct sk_security_struct *sksec = selinux_sock(sk);
  
 -      sk->sk_security = NULL;
        selinux_netlbl_sk_security_free(sksec);
 -      kfree(sksec);
  }
  
  static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk)
  {
 -      struct sk_security_struct *sksec = sk->sk_security;
 -      struct sk_security_struct *newsksec = newsk->sk_security;
 +      struct sk_security_struct *sksec = selinux_sock(sk);
 +      struct sk_security_struct *newsksec = selinux_sock(newsk);
  
        newsksec->sid = sksec->sid;
        newsksec->peer_sid = sksec->peer_sid;
@@@ -5270,7 -5269,7 +5270,7 @@@ static void selinux_sk_getsecid(const s
        if (!sk)
                *secid = SECINITSID_ANY_SOCKET;
        else {
 -              const struct sk_security_struct *sksec = sk->sk_security;
 +              const struct sk_security_struct *sksec = selinux_sock(sk);
  
                *secid = sksec->sid;
        }
@@@ -5280,7 -5279,7 +5280,7 @@@ static void selinux_sock_graft(struct s
  {
        struct inode_security_struct *isec =
                inode_security_novalidate(SOCK_INODE(parent));
 -      struct sk_security_struct *sksec = sk->sk_security;
 +      struct sk_security_struct *sksec = selinux_sock(sk);
  
        if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6 ||
            sk->sk_family == PF_UNIX)
@@@ -5297,7 -5296,7 +5297,7 @@@ static int selinux_sctp_process_new_ass
  {
        struct sock *sk = asoc->base.sk;
        u16 family = sk->sk_family;
 -      struct sk_security_struct *sksec = sk->sk_security;
 +      struct sk_security_struct *sksec = selinux_sock(sk);
        struct common_audit_data ad;
        struct lsm_network_audit net;
        int err;
  static int selinux_sctp_assoc_request(struct sctp_association *asoc,
                                      struct sk_buff *skb)
  {
 -      struct sk_security_struct *sksec = asoc->base.sk->sk_security;
 +      struct sk_security_struct *sksec = selinux_sock(asoc->base.sk);
        u32 conn_sid;
        int err;
  
  static int selinux_sctp_assoc_established(struct sctp_association *asoc,
                                          struct sk_buff *skb)
  {
 -      struct sk_security_struct *sksec = asoc->base.sk->sk_security;
 +      struct sk_security_struct *sksec = selinux_sock(asoc->base.sk);
  
        if (!selinux_policycap_extsockclass())
                return 0;
@@@ -5484,8 -5483,8 +5484,8 @@@ static int selinux_sctp_bind_connect(st
  static void selinux_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
                                  struct sock *newsk)
  {
 -      struct sk_security_struct *sksec = sk->sk_security;
 -      struct sk_security_struct *newsksec = newsk->sk_security;
 +      struct sk_security_struct *sksec = selinux_sock(sk);
 +      struct sk_security_struct *newsksec = selinux_sock(newsk);
  
        /* If policy does not support SECCLASS_SCTP_SOCKET then call
         * the non-sctp clone version.
  
  static int selinux_mptcp_add_subflow(struct sock *sk, struct sock *ssk)
  {
 -      struct sk_security_struct *ssksec = ssk->sk_security;
 -      struct sk_security_struct *sksec = sk->sk_security;
 +      struct sk_security_struct *ssksec = selinux_sock(ssk);
 +      struct sk_security_struct *sksec = selinux_sock(sk);
  
        ssksec->sclass = sksec->sclass;
        ssksec->sid = sksec->sid;
  static int selinux_inet_conn_request(const struct sock *sk, struct sk_buff *skb,
                                     struct request_sock *req)
  {
 -      struct sk_security_struct *sksec = sk->sk_security;
 +      struct sk_security_struct *sksec = selinux_sock(sk);
        int err;
        u16 family = req->rsk_ops->family;
        u32 connsid;
  static void selinux_inet_csk_clone(struct sock *newsk,
                                   const struct request_sock *req)
  {
 -      struct sk_security_struct *newsksec = newsk->sk_security;
 +      struct sk_security_struct *newsksec = selinux_sock(newsk);
  
        newsksec->sid = req->secid;
        newsksec->peer_sid = req->peer_secid;
  static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb)
  {
        u16 family = sk->sk_family;
 -      struct sk_security_struct *sksec = sk->sk_security;
 +      struct sk_security_struct *sksec = selinux_sock(sk);
  
        /* handle mapped IPv4 packets arriving via IPv6 sockets */
        if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP))
@@@ -5586,14 -5585,24 +5586,14 @@@ static void selinux_req_classify_flow(c
        flic->flowic_secid = req->secid;
  }
  
 -static int selinux_tun_dev_alloc_security(void **security)
 +static int selinux_tun_dev_alloc_security(void *security)
  {
 -      struct tun_security_struct *tunsec;
 +      struct tun_security_struct *tunsec = selinux_tun_dev(security);
  
 -      tunsec = kzalloc(sizeof(*tunsec), GFP_KERNEL);
 -      if (!tunsec)
 -              return -ENOMEM;
        tunsec->sid = current_sid();
 -
 -      *security = tunsec;
        return 0;
  }
  
 -static void selinux_tun_dev_free_security(void *security)
 -{
 -      kfree(security);
 -}
 -
  static int selinux_tun_dev_create(void)
  {
        u32 sid = current_sid();
  
  static int selinux_tun_dev_attach_queue(void *security)
  {
 -      struct tun_security_struct *tunsec = security;
 +      struct tun_security_struct *tunsec = selinux_tun_dev(security);
  
        return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET,
                            TUN_SOCKET__ATTACH_QUEUE, NULL);
  
  static int selinux_tun_dev_attach(struct sock *sk, void *security)
  {
 -      struct tun_security_struct *tunsec = security;
 -      struct sk_security_struct *sksec = sk->sk_security;
 +      struct tun_security_struct *tunsec = selinux_tun_dev(security);
 +      struct sk_security_struct *sksec = selinux_sock(sk);
  
        /* we don't currently perform any NetLabel based labeling here and it
         * isn't clear that we would want to do so anyway; while we could apply
  
  static int selinux_tun_dev_open(void *security)
  {
 -      struct tun_security_struct *tunsec = security;
 +      struct tun_security_struct *tunsec = selinux_tun_dev(security);
        u32 sid = current_sid();
        int err;
  
@@@ -5743,7 -5752,7 +5743,7 @@@ static unsigned int selinux_ip_output(v
                        return NF_ACCEPT;
  
                /* standard practice, label using the parent socket */
 -              sksec = sk->sk_security;
 +              sksec = selinux_sock(sk);
                sid = sksec->sid;
        } else
                sid = SECINITSID_KERNEL;
@@@ -5766,7 -5775,7 +5766,7 @@@ static unsigned int selinux_ip_postrout
        sk = skb_to_full_sk(skb);
        if (sk == NULL)
                return NF_ACCEPT;
 -      sksec = sk->sk_security;
 +      sksec = selinux_sock(sk);
  
        ad_net_init_from_iif(&ad, &net, state->out->ifindex, state->pf);
        if (selinux_parse_skb(skb, &ad, NULL, 0, &proto))
@@@ -5855,7 -5864,7 +5855,7 @@@ static unsigned int selinux_ip_postrout
                u32 skb_sid;
                struct sk_security_struct *sksec;
  
 -              sksec = sk->sk_security;
 +              sksec = selinux_sock(sk);
                if (selinux_skb_peerlbl_sid(skb, family, &skb_sid))
                        return NF_DROP;
                /* At this point, if the returned skb peerlbl is SECSID_NULL
        } else {
                /* Locally generated packet, fetch the security label from the
                 * associated socket. */
 -              struct sk_security_struct *sksec = sk->sk_security;
 +              struct sk_security_struct *sksec = selinux_sock(sk);
                peer_sid = sksec->sid;
                secmark_perm = PACKET__SEND;
        }
@@@ -5927,7 -5936,7 +5927,7 @@@ static int selinux_netlink_send(struct 
        unsigned int data_len = skb->len;
        unsigned char *data = skb->data;
        struct nlmsghdr *nlh;
 -      struct sk_security_struct *sksec = sk->sk_security;
 +      struct sk_security_struct *sksec = selinux_sock(sk);
        u16 sclass = sksec->sclass;
        u32 perm;
  
@@@ -6641,8 -6650,8 +6641,8 @@@ static int selinux_inode_notifysecctx(s
   */
  static int selinux_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
  {
 -      return __vfs_setxattr_noperm(&nop_mnt_idmap, dentry, XATTR_NAME_SELINUX,
 -                                   ctx, ctxlen, 0);
 +      return __vfs_setxattr_locked(&nop_mnt_idmap, dentry, XATTR_NAME_SELINUX,
 +                                   ctx, ctxlen, 0, NULL);
  }
  
  static int selinux_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
@@@ -6661,7 -6670,11 +6661,7 @@@ static int selinux_key_alloc(struct ke
                             unsigned long flags)
  {
        const struct task_security_struct *tsec;
 -      struct key_security_struct *ksec;
 -
 -      ksec = kzalloc(sizeof(struct key_security_struct), GFP_KERNEL);
 -      if (!ksec)
 -              return -ENOMEM;
 +      struct key_security_struct *ksec = selinux_key(k);
  
        tsec = selinux_cred(cred);
        if (tsec->keycreate_sid)
        else
                ksec->sid = tsec->sid;
  
 -      k->security = ksec;
        return 0;
  }
  
 -static void selinux_key_free(struct key *k)
 -{
 -      struct key_security_struct *ksec = k->security;
 -
 -      k->security = NULL;
 -      kfree(ksec);
 -}
 -
  static int selinux_key_permission(key_ref_t key_ref,
                                  const struct cred *cred,
                                  enum key_need_perm need_perm)
  
        sid = cred_sid(cred);
        key = key_ref_to_ptr(key_ref);
 -      ksec = key->security;
 +      ksec = selinux_key(key);
  
        return avc_has_perm(sid, ksec->sid, SECCLASS_KEY, perm, NULL);
  }
  
  static int selinux_key_getsecurity(struct key *key, char **_buffer)
  {
 -      struct key_security_struct *ksec = key->security;
 +      struct key_security_struct *ksec = selinux_key(key);
        char *context = NULL;
        unsigned len;
        int rc;
  #ifdef CONFIG_KEY_NOTIFICATIONS
  static int selinux_watch_key(struct key *key)
  {
 -      struct key_security_struct *ksec = key->security;
 +      struct key_security_struct *ksec = selinux_key(key);
        u32 sid = current_sid();
  
        return avc_has_perm(sid, ksec->sid, SECCLASS_KEY, KEY__VIEW, NULL);
@@@ -6789,13 -6811,23 +6789,13 @@@ static int selinux_ib_endport_manage_su
                            INFINIBAND_ENDPORT__MANAGE_SUBNET, &ad);
  }
  
 -static int selinux_ib_alloc_security(void **ib_sec)
 +static int selinux_ib_alloc_security(void *ib_sec)
  {
 -      struct ib_security_struct *sec;
 +      struct ib_security_struct *sec = selinux_ib(ib_sec);
  
 -      sec = kzalloc(sizeof(*sec), GFP_KERNEL);
 -      if (!sec)
 -              return -ENOMEM;
        sec->sid = current_sid();
 -
 -      *ib_sec = sec;
        return 0;
  }
 -
 -static void selinux_ib_free_security(void *ib_sec)
 -{
 -      kfree(ib_sec);
 -}
  #endif
  
  #ifdef CONFIG_BPF_SYSCALL
@@@ -6933,7 -6965,7 +6933,7 @@@ static void selinux_bpf_prog_free(struc
  }
  
  static int selinux_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
-                                   struct path *path)
+                                   const struct path *path)
  {
        struct bpf_security_struct *bpfsec;
  
@@@ -6961,16 -6993,9 +6961,16 @@@ struct lsm_blob_sizes selinux_blob_size
        .lbs_file = sizeof(struct file_security_struct),
        .lbs_inode = sizeof(struct inode_security_struct),
        .lbs_ipc = sizeof(struct ipc_security_struct),
 +      .lbs_key = sizeof(struct key_security_struct),
        .lbs_msg_msg = sizeof(struct msg_security_struct),
 +#ifdef CONFIG_PERF_EVENTS
 +      .lbs_perf_event = sizeof(struct perf_event_security_struct),
 +#endif
 +      .lbs_sock = sizeof(struct sk_security_struct),
        .lbs_superblock = sizeof(struct superblock_security_struct),
        .lbs_xattr_count = SELINUX_INODE_INIT_XATTRS,
 +      .lbs_tun_dev = sizeof(struct tun_security_struct),
 +      .lbs_ib = sizeof(struct ib_security_struct),
  };
  
  #ifdef CONFIG_PERF_EVENTS
@@@ -6997,12 -7022,24 +6997,12 @@@ static int selinux_perf_event_alloc(str
  {
        struct perf_event_security_struct *perfsec;
  
 -      perfsec = kzalloc(sizeof(*perfsec), GFP_KERNEL);
 -      if (!perfsec)
 -              return -ENOMEM;
 -
 +      perfsec = selinux_perf_event(event->security);
        perfsec->sid = current_sid();
 -      event->security = perfsec;
  
        return 0;
  }
  
 -static void selinux_perf_event_free(struct perf_event *event)
 -{
 -      struct perf_event_security_struct *perfsec = event->security;
 -
 -      event->security = NULL;
 -      kfree(perfsec);
 -}
 -
  static int selinux_perf_event_read(struct perf_event *event)
  {
        struct perf_event_security_struct *perfsec = event->security;
@@@ -7270,6 -7307,7 +7270,6 @@@ static struct security_hook_list selinu
        LSM_HOOK_INIT(secmark_refcount_inc, selinux_secmark_refcount_inc),
        LSM_HOOK_INIT(secmark_refcount_dec, selinux_secmark_refcount_dec),
        LSM_HOOK_INIT(req_classify_flow, selinux_req_classify_flow),
 -      LSM_HOOK_INIT(tun_dev_free_security, selinux_tun_dev_free_security),
        LSM_HOOK_INIT(tun_dev_create, selinux_tun_dev_create),
        LSM_HOOK_INIT(tun_dev_attach_queue, selinux_tun_dev_attach_queue),
        LSM_HOOK_INIT(tun_dev_attach, selinux_tun_dev_attach),
        LSM_HOOK_INIT(ib_pkey_access, selinux_ib_pkey_access),
        LSM_HOOK_INIT(ib_endport_manage_subnet,
                      selinux_ib_endport_manage_subnet),
 -      LSM_HOOK_INIT(ib_free_security, selinux_ib_free_security),
  #endif
  #ifdef CONFIG_SECURITY_NETWORK_XFRM
        LSM_HOOK_INIT(xfrm_policy_free_security, selinux_xfrm_policy_free),
  #endif
  
  #ifdef CONFIG_KEYS
 -      LSM_HOOK_INIT(key_free, selinux_key_free),
        LSM_HOOK_INIT(key_permission, selinux_key_permission),
        LSM_HOOK_INIT(key_getsecurity, selinux_key_getsecurity),
  #ifdef CONFIG_KEY_NOTIFICATIONS
  
  #ifdef CONFIG_PERF_EVENTS
        LSM_HOOK_INIT(perf_event_open, selinux_perf_event_open),
 -      LSM_HOOK_INIT(perf_event_free, selinux_perf_event_free),
        LSM_HOOK_INIT(perf_event_read, selinux_perf_event_read),
        LSM_HOOK_INIT(perf_event_write, selinux_perf_event_write),
  #endif
This page took 0.311576 seconds and 4 git commands to generate.