From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 14 Nov 2020 17:13:40 +0000 (-0800)
Subject: Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
X-Git-Url: https://repo.jachan.dev/J-linux.git/commitdiff_plain/07cbce2e466cabb46b7c2317bd456584aa4ceacc?hp=-c

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says:

====================
pull-request: bpf-next 2020-11-14

1) Add BTF generation for kernel modules and extend BTF infra in kernel
   e.g. support for split BTF loading and validation, from Andrii Nakryiko.

2) Support for pointers beyond pkt_end to recognize LLVM generated patterns
   on inlined branch conditions, from Alexei Starovoitov.

3) Implements bpf_local_storage for task_struct for BPF LSM, from KP Singh.

4) Enable FENTRY/FEXIT/RAW_TP tracing program to use the bpf_sk_storage
   infra, from Martin KaFai Lau.

5) Add XDP bulk APIs that introduce a defer/flush mechanism to optimize the
   XDP_REDIRECT path, from Lorenzo Bianconi.

6) Fix a potential (although rather theoretical) deadlock of hashtab in NMI
   context, from Song Liu.

7) Fixes for cross and out-of-tree build of bpftool and runqslower allowing build
   for different target archs on same source tree, from Jean-Philippe Brucker.

8) Fix error path in htab_map_alloc() triggered from syzbot, from Eric Dumazet.

9) Move functionality from test_tcpbpf_user into the test_progs framework so it
   can run in BPF CI, from Alexander Duyck.

10) Lift hashtab key_size limit to be larger than MAX_BPF_STACK, from Florian Lehner.

Note that for the fix from Song we have seen a sparse report on context
imbalance which requires changes in sparse itself for proper annotation
detection where this is currently being discussed on linux-sparse among
developers [0]. Once we have more clarification/guidance after their fix,
Song will follow-up.

  [0] https://lore.kernel.org/linux-sparse/CAHk-=wh4bx8A8dHnX612MsDO13st6uzAz1mJ1PaHHVevJx_ZCw@mail.gmail.com/T/
      https://lore.kernel.org/linux-sparse/20201109221345.uklbp3lzgq6g42zb@ltop.local/T/

* git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (66 commits)
  net: mlx5: Add xdp tx return bulking support
  net: mvpp2: Add xdp tx return bulking support
  net: mvneta: Add xdp tx return bulking support
  net: page_pool: Add bulk support for ptr_ring
  net: xdp: Introduce bulking for xdp tx return path
  bpf: Expose bpf_d_path helper to sleepable LSM hooks
  bpf: Augment the set of sleepable LSM hooks
  bpf: selftest: Use bpf_sk_storage in FENTRY/FEXIT/RAW_TP
  bpf: Allow using bpf_sk_storage in FENTRY/FEXIT/RAW_TP
  bpf: Rename some functions in bpf_sk_storage
  bpf: Folding omem_charge() into sk_storage_charge()
  selftests/bpf: Add asm tests for pkt vs pkt_end comparison.
  selftests/bpf: Add skb_pkt_end test
  bpf: Support for pointers beyond pkt_end.
  tools/bpf: Always run the *-clean recipes
  tools/bpf: Add bootstrap/ to .gitignore
  bpf: Fix NULL dereference in bpf_task_storage
  tools/bpftool: Fix build slowdown
  tools/runqslower: Build bpftool using HOSTCC
  tools/runqslower: Enable out-of-tree build
  ...
====================

Link: https://lore.kernel.org/r/20201114020819.29584-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---

07cbce2e466cabb46b7c2317bd456584aa4ceacc
diff --combined include/linux/module.h
index 6264617bab4d,20fce258ffba..c4e7a887f469
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@@ -278,7 -278,7 +278,7 @@@ extern typeof(name) __mod_##type##__##n
  		.version	= _version,				\
  	};								\
  	static const struct module_version_attribute			\
 -	__used __attribute__ ((__section__ ("__modver")))		\
 +	__used __section("__modver")					\
  	* __moduleparam_const __modver_attr = &___modver_attr
  #endif
  
@@@ -475,6 -475,10 +475,10 @@@ struct module 
  	unsigned int num_bpf_raw_events;
  	struct bpf_raw_event_map *bpf_raw_events;
  #endif
+ #ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+ 	unsigned int btf_data_size;
+ 	void *btf_data;
+ #endif
  #ifdef CONFIG_JUMP_LABEL
  	struct jump_entry *jump_entries;
  	unsigned int num_jump_entries;
@@@ -740,7 -744,7 +744,7 @@@ static inline bool within_module(unsign
  }
  
  /* Get/put a kernel symbol (calls should be symmetric) */
 -#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); })
 +#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak,visibility("hidden"))); &(x); })
  #define symbol_put(x) do { } while (0)
  #define symbol_put_addr(x) do { } while (0)
  
diff --combined kernel/bpf/Makefile
index c1b9f71ee6aa,f0b93ced5a7f..d1249340fd6b
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@@ -1,15 -1,12 +1,16 @@@
  # SPDX-License-Identifier: GPL-2.0
  obj-y := core.o
 -CFLAGS_core.o += $(call cc-disable-warning, override-init)
 +ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y)
 +# ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details
 +cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
 +endif
 +CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
  
  obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
  obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
  obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
  obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o
+ obj-${CONFIG_BPF_LSM}	  += bpf_task_storage.o
  obj-$(CONFIG_BPF_SYSCALL) += disasm.o
  obj-$(CONFIG_BPF_JIT) += trampoline.o
  obj-$(CONFIG_BPF_SYSCALL) += btf.o
diff --combined kernel/bpf/bpf_lsm.c
index 56cc5a915f67,aed74b853415..553107f4706a
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@@ -27,11 -27,7 +27,11 @@@ noinline RET bpf_lsm_##NAME(__VA_ARGS__
  #include <linux/lsm_hook_defs.h>
  #undef LSM_HOOK
  
 -#define BPF_LSM_SYM_PREFX  "bpf_lsm_"
 +#define LSM_HOOK(RET, DEFAULT, NAME, ...) BTF_ID(func, bpf_lsm_##NAME)
 +BTF_SET_START(bpf_lsm_hooks)
 +#include <linux/lsm_hook_defs.h>
 +#undef LSM_HOOK
 +BTF_SET_END(bpf_lsm_hooks)
  
  int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
  			const struct bpf_prog *prog)
@@@ -42,7 -38,8 +42,7 @@@
  		return -EINVAL;
  	}
  
 -	if (strncmp(BPF_LSM_SYM_PREFX, prog->aux->attach_func_name,
 -		    sizeof(BPF_LSM_SYM_PREFX) - 1)) {
 +	if (!btf_id_set_contains(&bpf_lsm_hooks, prog->aux->attach_btf_id)) {
  		bpf_log(vlog, "attach_btf_id %u points to wrong type name %s\n",
  			prog->aux->attach_btf_id, prog->aux->attach_func_name);
  		return -EINVAL;
@@@ -63,11 -60,99 +63,99 @@@ bpf_lsm_func_proto(enum bpf_func_id fun
  		return &bpf_sk_storage_get_proto;
  	case BPF_FUNC_sk_storage_delete:
  		return &bpf_sk_storage_delete_proto;
+ 	case BPF_FUNC_spin_lock:
+ 		return &bpf_spin_lock_proto;
+ 	case BPF_FUNC_spin_unlock:
+ 		return &bpf_spin_unlock_proto;
+ 	case BPF_FUNC_task_storage_get:
+ 		return &bpf_task_storage_get_proto;
+ 	case BPF_FUNC_task_storage_delete:
+ 		return &bpf_task_storage_delete_proto;
  	default:
  		return tracing_prog_func_proto(func_id, prog);
  	}
  }
  
+ /* The set of hooks which are called without pagefaults disabled and are allowed
+  * to "sleep" and thus can be used for sleeable BPF programs.
+  */
+ BTF_SET_START(sleepable_lsm_hooks)
+ BTF_ID(func, bpf_lsm_bpf)
+ BTF_ID(func, bpf_lsm_bpf_map)
+ BTF_ID(func, bpf_lsm_bpf_map_alloc_security)
+ BTF_ID(func, bpf_lsm_bpf_map_free_security)
+ BTF_ID(func, bpf_lsm_bpf_prog)
+ BTF_ID(func, bpf_lsm_bprm_check_security)
+ BTF_ID(func, bpf_lsm_bprm_committed_creds)
+ BTF_ID(func, bpf_lsm_bprm_committing_creds)
+ BTF_ID(func, bpf_lsm_bprm_creds_for_exec)
+ BTF_ID(func, bpf_lsm_bprm_creds_from_file)
+ BTF_ID(func, bpf_lsm_capget)
+ BTF_ID(func, bpf_lsm_capset)
+ BTF_ID(func, bpf_lsm_cred_prepare)
+ BTF_ID(func, bpf_lsm_file_ioctl)
+ BTF_ID(func, bpf_lsm_file_lock)
+ BTF_ID(func, bpf_lsm_file_open)
+ BTF_ID(func, bpf_lsm_file_receive)
+ BTF_ID(func, bpf_lsm_inet_conn_established)
+ BTF_ID(func, bpf_lsm_inode_create)
+ BTF_ID(func, bpf_lsm_inode_free_security)
+ BTF_ID(func, bpf_lsm_inode_getattr)
+ BTF_ID(func, bpf_lsm_inode_getxattr)
+ BTF_ID(func, bpf_lsm_inode_mknod)
+ BTF_ID(func, bpf_lsm_inode_need_killpriv)
+ BTF_ID(func, bpf_lsm_inode_post_setxattr)
+ BTF_ID(func, bpf_lsm_inode_readlink)
+ BTF_ID(func, bpf_lsm_inode_rename)
+ BTF_ID(func, bpf_lsm_inode_rmdir)
+ BTF_ID(func, bpf_lsm_inode_setattr)
+ BTF_ID(func, bpf_lsm_inode_setxattr)
+ BTF_ID(func, bpf_lsm_inode_symlink)
+ BTF_ID(func, bpf_lsm_inode_unlink)
+ BTF_ID(func, bpf_lsm_kernel_module_request)
+ BTF_ID(func, bpf_lsm_kernfs_init_security)
+ BTF_ID(func, bpf_lsm_key_free)
+ BTF_ID(func, bpf_lsm_mmap_file)
+ BTF_ID(func, bpf_lsm_netlink_send)
+ BTF_ID(func, bpf_lsm_path_notify)
+ BTF_ID(func, bpf_lsm_release_secctx)
+ BTF_ID(func, bpf_lsm_sb_alloc_security)
+ BTF_ID(func, bpf_lsm_sb_eat_lsm_opts)
+ BTF_ID(func, bpf_lsm_sb_kern_mount)
+ BTF_ID(func, bpf_lsm_sb_mount)
+ BTF_ID(func, bpf_lsm_sb_remount)
+ BTF_ID(func, bpf_lsm_sb_set_mnt_opts)
+ BTF_ID(func, bpf_lsm_sb_show_options)
+ BTF_ID(func, bpf_lsm_sb_statfs)
+ BTF_ID(func, bpf_lsm_sb_umount)
+ BTF_ID(func, bpf_lsm_settime)
+ BTF_ID(func, bpf_lsm_socket_accept)
+ BTF_ID(func, bpf_lsm_socket_bind)
+ BTF_ID(func, bpf_lsm_socket_connect)
+ BTF_ID(func, bpf_lsm_socket_create)
+ BTF_ID(func, bpf_lsm_socket_getpeername)
+ BTF_ID(func, bpf_lsm_socket_getpeersec_dgram)
+ BTF_ID(func, bpf_lsm_socket_getsockname)
+ BTF_ID(func, bpf_lsm_socket_getsockopt)
+ BTF_ID(func, bpf_lsm_socket_listen)
+ BTF_ID(func, bpf_lsm_socket_post_create)
+ BTF_ID(func, bpf_lsm_socket_recvmsg)
+ BTF_ID(func, bpf_lsm_socket_sendmsg)
+ BTF_ID(func, bpf_lsm_socket_shutdown)
+ BTF_ID(func, bpf_lsm_socket_socketpair)
+ BTF_ID(func, bpf_lsm_syslog)
+ BTF_ID(func, bpf_lsm_task_alloc)
+ BTF_ID(func, bpf_lsm_task_getsecid)
+ BTF_ID(func, bpf_lsm_task_prctl)
+ BTF_ID(func, bpf_lsm_task_setscheduler)
+ BTF_ID(func, bpf_lsm_task_to_inode)
+ BTF_SET_END(sleepable_lsm_hooks)
+ 
+ bool bpf_lsm_is_sleepable_hook(u32 btf_id)
+ {
+ 	return btf_id_set_contains(&sleepable_lsm_hooks, btf_id);
+ }
+ 
  const struct bpf_prog_ops lsm_prog_ops = {
  };
  
diff --combined kernel/bpf/hashtab.c
index 1fccba6e88c4,7bf18d92af41..ec46266aaf1c
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@@ -86,6 -86,9 +86,9 @@@ struct bucket 
  	};
  };
  
+ #define HASHTAB_MAP_LOCK_COUNT 8
+ #define HASHTAB_MAP_LOCK_MASK (HASHTAB_MAP_LOCK_COUNT - 1)
+ 
  struct bpf_htab {
  	struct bpf_map map;
  	struct bucket *buckets;
@@@ -99,6 -102,8 +102,8 @@@
  	u32 n_buckets;	/* number of hash buckets */
  	u32 elem_size;	/* size of each element in bytes */
  	u32 hashrnd;
+ 	struct lock_class_key lockdep_key;
+ 	int __percpu *map_locked[HASHTAB_MAP_LOCK_COUNT];
  };
  
  /* each htab element is struct htab_elem + key + value */
@@@ -138,33 -143,53 +143,53 @@@ static void htab_init_buckets(struct bp
  
  	for (i = 0; i < htab->n_buckets; i++) {
  		INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
- 		if (htab_use_raw_lock(htab))
+ 		if (htab_use_raw_lock(htab)) {
  			raw_spin_lock_init(&htab->buckets[i].raw_lock);
- 		else
+ 			lockdep_set_class(&htab->buckets[i].raw_lock,
+ 					  &htab->lockdep_key);
+ 		} else {
  			spin_lock_init(&htab->buckets[i].lock);
+ 			lockdep_set_class(&htab->buckets[i].lock,
+ 					  &htab->lockdep_key);
+ 		}
  	}
  }
  
- static inline unsigned long htab_lock_bucket(const struct bpf_htab *htab,
- 					     struct bucket *b)
+ static inline int htab_lock_bucket(const struct bpf_htab *htab,
+ 				   struct bucket *b, u32 hash,
+ 				   unsigned long *pflags)
  {
  	unsigned long flags;
  
+ 	hash = hash & HASHTAB_MAP_LOCK_MASK;
+ 
+ 	migrate_disable();
+ 	if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) {
+ 		__this_cpu_dec(*(htab->map_locked[hash]));
+ 		migrate_enable();
+ 		return -EBUSY;
+ 	}
+ 
  	if (htab_use_raw_lock(htab))
  		raw_spin_lock_irqsave(&b->raw_lock, flags);
  	else
  		spin_lock_irqsave(&b->lock, flags);
- 	return flags;
+ 	*pflags = flags;
+ 
+ 	return 0;
  }
  
  static inline void htab_unlock_bucket(const struct bpf_htab *htab,
- 				      struct bucket *b,
+ 				      struct bucket *b, u32 hash,
  				      unsigned long flags)
  {
+ 	hash = hash & HASHTAB_MAP_LOCK_MASK;
  	if (htab_use_raw_lock(htab))
  		raw_spin_unlock_irqrestore(&b->raw_lock, flags);
  	else
  		spin_unlock_irqrestore(&b->lock, flags);
+ 	__this_cpu_dec(*(htab->map_locked[hash]));
+ 	migrate_enable();
  }
  
  static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node);
@@@ -390,17 -415,11 +415,11 @@@ static int htab_map_alloc_check(union b
  	    attr->value_size == 0)
  		return -EINVAL;
  
- 	if (attr->key_size > MAX_BPF_STACK)
- 		/* eBPF programs initialize keys on stack, so they cannot be
- 		 * larger than max stack size
- 		 */
- 		return -E2BIG;
- 
- 	if (attr->value_size >= KMALLOC_MAX_SIZE -
- 	    MAX_BPF_STACK - sizeof(struct htab_elem))
- 		/* if value_size is bigger, the user space won't be able to
- 		 * access the elements via bpf syscall. This check also makes
- 		 * sure that the elem_size doesn't overflow and it's
+ 	if ((u64)attr->key_size + attr->value_size >= KMALLOC_MAX_SIZE -
+ 	   sizeof(struct htab_elem))
+ 		/* if key_size + value_size is bigger, the user space won't be
+ 		 * able to access the elements via bpf syscall. This check
+ 		 * also makes sure that the elem_size doesn't overflow and it's
  		 * kmalloc-able later in htab_map_update_elem()
  		 */
  		return -E2BIG;
@@@ -422,13 -441,15 +441,15 @@@ static struct bpf_map *htab_map_alloc(u
  	bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
  	bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
  	struct bpf_htab *htab;
+ 	int err, i;
  	u64 cost;
- 	int err;
  
  	htab = kzalloc(sizeof(*htab), GFP_USER);
  	if (!htab)
  		return ERR_PTR(-ENOMEM);
  
+ 	lockdep_register_key(&htab->lockdep_key);
+ 
  	bpf_map_init_from_attr(&htab->map, attr);
  
  	if (percpu_lru) {
@@@ -480,6 -501,13 +501,13 @@@
  	if (!htab->buckets)
  		goto free_charge;
  
+ 	for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) {
+ 		htab->map_locked[i] = __alloc_percpu_gfp(sizeof(int),
+ 							 sizeof(int), GFP_USER);
+ 		if (!htab->map_locked[i])
+ 			goto free_map_locked;
+ 	}
+ 
  	if (htab->map.map_flags & BPF_F_ZERO_SEED)
  		htab->hashrnd = 0;
  	else
@@@ -490,7 -518,7 +518,7 @@@
  	if (prealloc) {
  		err = prealloc_init(htab);
  		if (err)
- 			goto free_buckets;
+ 			goto free_map_locked;
  
  		if (!percpu && !lru) {
  			/* lru itself can remove the least used element, so
@@@ -506,11 -534,14 +534,14 @@@
  
  free_prealloc:
  	prealloc_destroy(htab);
- free_buckets:
+ free_map_locked:
+ 	for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++)
+ 		free_percpu(htab->map_locked[i]);
  	bpf_map_area_free(htab->buckets);
  free_charge:
  	bpf_map_charge_finish(&htab->map.memory);
  free_htab:
+ 	lockdep_unregister_key(&htab->lockdep_key);
  	kfree(htab);
  	return ERR_PTR(err);
  }
@@@ -687,12 -718,15 +718,15 @@@ static bool htab_lru_map_delete_node(vo
  	struct hlist_nulls_node *n;
  	unsigned long flags;
  	struct bucket *b;
+ 	int ret;
  
  	tgt_l = container_of(node, struct htab_elem, lru_node);
  	b = __select_bucket(htab, tgt_l->hash);
  	head = &b->head;
  
- 	flags = htab_lock_bucket(htab, b);
+ 	ret = htab_lock_bucket(htab, b, tgt_l->hash, &flags);
+ 	if (ret)
+ 		return false;
  
  	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
  		if (l == tgt_l) {
@@@ -700,7 -734,7 +734,7 @@@
  			break;
  		}
  
- 	htab_unlock_bucket(htab, b, flags);
+ 	htab_unlock_bucket(htab, b, tgt_l->hash, flags);
  
  	return l == tgt_l;
  }
@@@ -821,32 -855,6 +855,32 @@@ static void pcpu_copy_value(struct bpf_
  	}
  }
  
 +static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
 +			    void *value, bool onallcpus)
 +{
 +	/* When using prealloc and not setting the initial value on all cpus,
 +	 * zero-fill element values for other cpus (just as what happens when
 +	 * not using prealloc). Otherwise, bpf program has no way to ensure
 +	 * known initial values for cpus other than current one
 +	 * (onallcpus=false always when coming from bpf prog).
 +	 */
 +	if (htab_is_prealloc(htab) && !onallcpus) {
 +		u32 size = round_up(htab->map.value_size, 8);
 +		int current_cpu = raw_smp_processor_id();
 +		int cpu;
 +
 +		for_each_possible_cpu(cpu) {
 +			if (cpu == current_cpu)
 +				bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value,
 +						size);
 +			else
 +				memset(per_cpu_ptr(pptr, cpu), 0, size);
 +		}
 +	} else {
 +		pcpu_copy_value(htab, pptr, value, onallcpus);
 +	}
 +}
 +
  static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
  {
  	return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
@@@ -917,7 -925,7 +951,7 @@@ static struct htab_elem *alloc_htab_ele
  			}
  		}
  
 -		pcpu_copy_value(htab, pptr, value, onallcpus);
 +		pcpu_init_value(htab, pptr, value, onallcpus);
  
  		if (!prealloc)
  			htab_elem_set_ptr(l_new, key_size, pptr);
@@@ -998,7 -1006,9 +1032,9 @@@ static int htab_map_update_elem(struct 
  		 */
  	}
  
- 	flags = htab_lock_bucket(htab, b);
+ 	ret = htab_lock_bucket(htab, b, hash, &flags);
+ 	if (ret)
+ 		return ret;
  
  	l_old = lookup_elem_raw(head, hash, key, key_size);
  
@@@ -1039,7 -1049,7 +1075,7 @@@
  	}
  	ret = 0;
  err:
- 	htab_unlock_bucket(htab, b, flags);
+ 	htab_unlock_bucket(htab, b, hash, flags);
  	return ret;
  }
  
@@@ -1077,7 -1087,9 +1113,9 @@@ static int htab_lru_map_update_elem(str
  		return -ENOMEM;
  	memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size);
  
- 	flags = htab_lock_bucket(htab, b);
+ 	ret = htab_lock_bucket(htab, b, hash, &flags);
+ 	if (ret)
+ 		return ret;
  
  	l_old = lookup_elem_raw(head, hash, key, key_size);
  
@@@ -1096,7 -1108,7 +1134,7 @@@
  	ret = 0;
  
  err:
- 	htab_unlock_bucket(htab, b, flags);
+ 	htab_unlock_bucket(htab, b, hash, flags);
  
  	if (ret)
  		bpf_lru_push_free(&htab->lru, &l_new->lru_node);
@@@ -1131,7 -1143,9 +1169,9 @@@ static int __htab_percpu_map_update_ele
  	b = __select_bucket(htab, hash);
  	head = &b->head;
  
- 	flags = htab_lock_bucket(htab, b);
+ 	ret = htab_lock_bucket(htab, b, hash, &flags);
+ 	if (ret)
+ 		return ret;
  
  	l_old = lookup_elem_raw(head, hash, key, key_size);
  
@@@ -1154,7 -1168,7 +1194,7 @@@
  	}
  	ret = 0;
  err:
- 	htab_unlock_bucket(htab, b, flags);
+ 	htab_unlock_bucket(htab, b, hash, flags);
  	return ret;
  }
  
@@@ -1194,7 -1208,9 +1234,9 @@@ static int __htab_lru_percpu_map_update
  			return -ENOMEM;
  	}
  
- 	flags = htab_lock_bucket(htab, b);
+ 	ret = htab_lock_bucket(htab, b, hash, &flags);
+ 	if (ret)
+ 		return ret;
  
  	l_old = lookup_elem_raw(head, hash, key, key_size);
  
@@@ -1209,14 -1225,14 +1251,14 @@@
  		pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
  				value, onallcpus);
  	} else {
 -		pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
 +		pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
  				value, onallcpus);
  		hlist_nulls_add_head_rcu(&l_new->hash_node, head);
  		l_new = NULL;
  	}
  	ret = 0;
  err:
- 	htab_unlock_bucket(htab, b, flags);
+ 	htab_unlock_bucket(htab, b, hash, flags);
  	if (l_new)
  		bpf_lru_push_free(&htab->lru, &l_new->lru_node);
  	return ret;
@@@ -1244,7 -1260,7 +1286,7 @@@ static int htab_map_delete_elem(struct 
  	struct htab_elem *l;
  	unsigned long flags;
  	u32 hash, key_size;
- 	int ret = -ENOENT;
+ 	int ret;
  
  	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
  
@@@ -1254,17 -1270,20 +1296,20 @@@
  	b = __select_bucket(htab, hash);
  	head = &b->head;
  
- 	flags = htab_lock_bucket(htab, b);
+ 	ret = htab_lock_bucket(htab, b, hash, &flags);
+ 	if (ret)
+ 		return ret;
  
  	l = lookup_elem_raw(head, hash, key, key_size);
  
  	if (l) {
  		hlist_nulls_del_rcu(&l->hash_node);
  		free_htab_elem(htab, l);
- 		ret = 0;
+ 	} else {
+ 		ret = -ENOENT;
  	}
  
- 	htab_unlock_bucket(htab, b, flags);
+ 	htab_unlock_bucket(htab, b, hash, flags);
  	return ret;
  }
  
@@@ -1276,7 -1295,7 +1321,7 @@@ static int htab_lru_map_delete_elem(str
  	struct htab_elem *l;
  	unsigned long flags;
  	u32 hash, key_size;
- 	int ret = -ENOENT;
+ 	int ret;
  
  	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
  
@@@ -1286,16 -1305,18 +1331,18 @@@
  	b = __select_bucket(htab, hash);
  	head = &b->head;
  
- 	flags = htab_lock_bucket(htab, b);
+ 	ret = htab_lock_bucket(htab, b, hash, &flags);
+ 	if (ret)
+ 		return ret;
  
  	l = lookup_elem_raw(head, hash, key, key_size);
  
- 	if (l) {
+ 	if (l)
  		hlist_nulls_del_rcu(&l->hash_node);
- 		ret = 0;
- 	}
+ 	else
+ 		ret = -ENOENT;
  
- 	htab_unlock_bucket(htab, b, flags);
+ 	htab_unlock_bucket(htab, b, hash, flags);
  	if (l)
  		bpf_lru_push_free(&htab->lru, &l->lru_node);
  	return ret;
@@@ -1321,6 -1342,7 +1368,7 @@@ static void delete_all_elements(struct 
  static void htab_map_free(struct bpf_map *map)
  {
  	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ 	int i;
  
  	/* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback.
  	 * bpf_free_used_maps() is called after bpf prog is no longer executing.
@@@ -1338,6 -1360,9 +1386,9 @@@
  
  	free_percpu(htab->extra_elems);
  	bpf_map_area_free(htab->buckets);
+ 	for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++)
+ 		free_percpu(htab->map_locked[i]);
+ 	lockdep_unregister_key(&htab->lockdep_key);
  	kfree(htab);
  }
  
@@@ -1441,8 -1466,11 +1492,11 @@@ again_nocopy
  	b = &htab->buckets[batch];
  	head = &b->head;
  	/* do not grab the lock unless need it (bucket_cnt > 0). */
- 	if (locked)
- 		flags = htab_lock_bucket(htab, b);
+ 	if (locked) {
+ 		ret = htab_lock_bucket(htab, b, batch, &flags);
+ 		if (ret)
+ 			goto next_batch;
+ 	}
  
  	bucket_cnt = 0;
  	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
@@@ -1459,7 -1487,7 +1513,7 @@@
  		/* Note that since bucket_cnt > 0 here, it is implicit
  		 * that the locked was grabbed, so release it.
  		 */
- 		htab_unlock_bucket(htab, b, flags);
+ 		htab_unlock_bucket(htab, b, batch, flags);
  		rcu_read_unlock();
  		bpf_enable_instrumentation();
  		goto after_loop;
@@@ -1470,7 -1498,7 +1524,7 @@@
  		/* Note that since bucket_cnt > 0 here, it is implicit
  		 * that the locked was grabbed, so release it.
  		 */
- 		htab_unlock_bucket(htab, b, flags);
+ 		htab_unlock_bucket(htab, b, batch, flags);
  		rcu_read_unlock();
  		bpf_enable_instrumentation();
  		kvfree(keys);
@@@ -1523,7 -1551,7 +1577,7 @@@
  		dst_val += value_size;
  	}
  
- 	htab_unlock_bucket(htab, b, flags);
+ 	htab_unlock_bucket(htab, b, batch, flags);
  	locked = false;
  
  	while (node_to_free) {
diff --combined lib/Kconfig.debug
index fc6e120045b0,1e78faaf20a5..826a205ffd1c
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@@ -274,6 -274,15 +274,15 @@@ config DEBUG_INFO_BT
  	  Turning this on expects presence of pahole tool, which will convert
  	  DWARF type info into equivalent deduplicated BTF type info.
  
+ config PAHOLE_HAS_SPLIT_BTF
+ 	def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "119")
+ 
+ config DEBUG_INFO_BTF_MODULES
+ 	def_bool y
+ 	depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF
+ 	help
+ 	  Generate compact split BTF type information for kernel modules.
+ 
  config GDB_SCRIPTS
  	bool "Provide GDB scripts for kernel debugging"
  	help
@@@ -1870,7 -1879,6 +1879,7 @@@ config KCO
  	depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS
  	select DEBUG_FS
  	select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC
 +	select SKB_EXTENSIONS if NET
  	help
  	  KCOV exposes kernel code coverage information in a form suitable
  	  for coverage-guided fuzzing (randomized testing).
@@@ -2447,6 -2455,4 +2456,6 @@@ config HYPERV_TESTIN
  
  endmenu # "Kernel Testing and Coverage"
  
 +source "Documentation/Kconfig"
 +
  endmenu # Kernel hacking