Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf...

author Jakub Kicinski <[email protected]>

Sat, 24 Jun 2023 21:52:28 +0000 (14:52 -0700)

committer Jakub Kicinski <[email protected]>

Sat, 24 Jun 2023 21:52:28 +0000 (14:52 -0700)
author Jakub Kicinski <[email protected]>
Sat, 24 Jun 2023 21:52:28 +0000 (14:52 -0700)
committer Jakub Kicinski <[email protected]>
Sat, 24 Jun 2023 21:52:28 +0000 (14:52 -0700)
diff --combined include/linux/netdevice.h

index acf706d49c2b2eefec2007b0d4e511ce94ceda3d,8c95ebbcf2034f5d956bc0a74a1f130b15aa05c7..b828c7a75be20b76c87e8ab7152d4bd55ee83438
--- 1/include/linux/netdevice.h
--- 2/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@@ -620,7 -620,7 +620,7 @@@ struct netdev_queue 
         netdevice_tracker       dev_tracker;
   
         struct Qdisc __rcu      *qdisc;
- -      struct Qdisc            *qdisc_sleeping;
+ +      struct Qdisc __rcu      *qdisc_sleeping;
   #ifdef CONFIG_SYSFS
         struct kobject          kobj;
   #endif
@@@ -768,11 -768,8 +768,11 @@@ static inline void rps_record_sock_flow
                 /* We only give a hint, preemption can change CPU under us */
                 val |= raw_smp_processor_id();
   
- -              if (table->ents[index] != val)
- -                      table->ents[index] = val;
+ +              /* The following WRITE_ONCE() is paired with the READ_ONCE()
+ +               * here, and another one in get_rps_cpu().
+ +               */
+ +              if (READ_ONCE(table->ents[index]) != val)
+ +                      WRITE_ONCE(table->ents[index], val);
         }
   }
   
@@@ -3124,10 -3121,6 +3124,10 @@@ struct net_device *netdev_sk_get_lowest
                                             struct sock *sk);
   struct net_device *dev_get_by_index(struct net *net, int ifindex);
   struct net_device *__dev_get_by_index(struct net *net, int ifindex);
+ +struct net_device *netdev_get_by_index(struct net *net, int ifindex,
+ +                                     netdevice_tracker *tracker, gfp_t gfp);
+ +struct net_device *netdev_get_by_name(struct net *net, const char *name,
+ +                                    netdevice_tracker *tracker, gfp_t gfp);
   struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
   struct net_device *dev_get_by_napi_id(unsigned int napi_id);
   int dev_restart(struct net_device *dev);
@@@ -4831,6 -4824,13 +4831,6 @@@ int skb_crc32c_csum_help(struct sk_buf
   int skb_csum_hwoffload_help(struct sk_buff *skb,
                             const netdev_features_t features);
   
- -struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
- -                                netdev_features_t features, bool tx_path);
- -struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
- -                                  netdev_features_t features, __be16 type);
- -struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
- -                                  netdev_features_t features);
- -
   struct netdev_bonding_info {
         ifslave slave;
         ifbond  master;
@@@ -4853,6 -4853,11 +4853,6 @@@ static inline void ethtool_notify(struc
   }
   #endif
   
- -static inline
- -struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
- -{
- -      return __skb_gso_segment(skb, features, true);
- -}
   __be16 skb_network_protocol(struct sk_buff *skb, int *depth);
   
   static inline bool can_checksum_protocol(netdev_features_t features,
@@@ -4979,7 -4984,6 +4979,7 @@@ netdev_features_t passthru_features_che
                                           struct net_device *dev,
                                           netdev_features_t features);
   netdev_features_t netif_skb_features(struct sk_buff *skb);
+ +void skb_warn_bad_offload(const struct sk_buff *skb);
   
   static inline bool net_gso_ok(netdev_features_t features, int gso_type)
   {
@@@ -5028,6 -5032,19 +5028,6 @@@ void netif_set_tso_max_segs(struct net_
   void netif_inherit_tso_max(struct net_device *to,
                            const struct net_device *from);
   
- -static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol,
- -                                      int pulled_hlen, u16 mac_offset,
- -                                      int mac_len)
- -{
- -      skb->protocol = protocol;
- -      skb->encapsulation = 1;
- -      skb_push(skb, pulled_hlen);
- -      skb_reset_transport_header(skb);
- -      skb->mac_header = mac_offset;
- -      skb->network_header = skb->mac_header + mac_len;
- -      skb->mac_len = mac_len;
- -}
- -
   static inline bool netif_is_macsec(const struct net_device *dev)
   {
         return dev->priv_flags & IFF_MACSEC;
@@@ -5073,6 -5090,15 +5073,15 @@@ static inline bool netif_is_l3_slave(co
         return dev->priv_flags & IFF_L3MDEV_SLAVE;
   }
   
+ static inline int dev_sdif(const struct net_device *dev)
+ {
+ #ifdef CONFIG_NET_L3_MASTER_DEV
+       if (netif_is_l3_slave(dev))
+               return dev->ifindex;
+ #endif
+       return 0;
+ }
+ 
   static inline bool netif_is_bridge_master(const struct net_device *dev)
   {
         return dev->priv_flags & IFF_EBRIDGE;
diff --combined include/uapi/linux/bpf.h

index 6961a7b700281037cd6fbb8e05a63c21978ff5c5,a7b5e91dd768e7d5b716272488295f828aa9aa1c..60a9d59beeabba9bdbaa94946aa0c28e3f435463
--- 1/include/uapi/linux/bpf.h
--- 2/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@@ -1035,7 -1035,6 +1035,7 @@@ enum bpf_attach_type 
         BPF_TRACE_KPROBE_MULTI,
         BPF_LSM_CGROUP,
         BPF_STRUCT_OPS,
+ +      BPF_NETFILTER,
         __MAX_BPF_ATTACH_TYPE
   };
   
@@@ -3178,6 -3177,10 +3178,10 @@@ union bpf_attr 
    *            **BPF_FIB_LOOKUP_DIRECT**
    *                    Do a direct table lookup vs full lookup using FIB
    *                    rules.
+  *            **BPF_FIB_LOOKUP_TBID**
+  *                    Used with BPF_FIB_LOOKUP_DIRECT.
+  *                    Use the routing table ID present in *params*->tbid
+  *                    for the fib lookup.
    *            **BPF_FIB_LOOKUP_OUTPUT**
    *                    Perform lookup from an egress perspective (default is
    *                    ingress).
@@@ -6832,6 -6835,7 +6836,7 @@@ enum 
         BPF_FIB_LOOKUP_DIRECT  = (1U << 0),
         BPF_FIB_LOOKUP_OUTPUT  = (1U << 1),
         BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
+       BPF_FIB_LOOKUP_TBID    = (1U << 3),
   };
   
   enum {
@@@ -6892,9 -6896,19 +6897,19 @@@ struct bpf_fib_lookup 
                 __u32           ipv6_dst[4];  /* in6_addr; network order */
         };
   
-       /* output */
-       __be16  h_vlan_proto;
-       __be16  h_vlan_TCI;
+       union {
+               struct {
+                       /* output */
+                       __be16  h_vlan_proto;
+                       __be16  h_vlan_TCI;
+               };
+               /* input: when accompanied with the
+                * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a
+                * specific routing table to use for the fib lookup.
+                */
+               __u32   tbid;
+       };
+ 
         __u8    smac[6];     /* ETH_ALEN */
         __u8    dmac[6];     /* ETH_ALEN */
   };
diff --combined kernel/bpf/btf.c

index bbcae434fda540eb99aea7ce6099c62b75cfc87e,bd2cac057928bb848856d9e4f90339983cc18995..29fe2109929853dc5d8ecd02e2153c3e2d9b823b
--- 1/kernel/bpf/btf.c
--- 2/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@@ -492,25 -492,26 +492,26 @@@ static bool btf_type_is_fwd(const struc
         return BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
   }
   
- static bool btf_type_nosize(const struct btf_type *t)
+ static bool btf_type_is_datasec(const struct btf_type *t)
   {
-       return btf_type_is_void(t) || btf_type_is_fwd(t) ||
-              btf_type_is_func(t) || btf_type_is_func_proto(t);
+       return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
   }
   
- static bool btf_type_nosize_or_null(const struct btf_type *t)
+ static bool btf_type_is_decl_tag(const struct btf_type *t)
   {
-       return !t || btf_type_nosize(t);
+       return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG;
   }
   
- static bool btf_type_is_datasec(const struct btf_type *t)
+ static bool btf_type_nosize(const struct btf_type *t)
   {
-       return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
+       return btf_type_is_void(t) || btf_type_is_fwd(t) ||
+              btf_type_is_func(t) || btf_type_is_func_proto(t) ||
+              btf_type_is_decl_tag(t);
   }
   
- static bool btf_type_is_decl_tag(const struct btf_type *t)
+ static bool btf_type_nosize_or_null(const struct btf_type *t)
   {
-       return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG;
+       return !t || btf_type_nosize(t);
   }
   
   static bool btf_type_is_decl_tag_target(const struct btf_type *t)
@@@ -751,12 -752,13 +752,12 @@@ static bool btf_name_offset_valid(cons
         return offset < btf->hdr.str_len;
   }
   
- -static bool __btf_name_char_ok(char c, bool first, bool dot_ok)
+ +static bool __btf_name_char_ok(char c, bool first)
   {
         if ((first ? !isalpha(c) :
                      !isalnum(c)) &&
             c != '_' &&
- -          ((c == '.' && !dot_ok) ||
- -            c != '.'))
+ +          c != '.')
                 return false;
         return true;
   }
@@@ -773,20 -775,20 +774,20 @@@ static const char *btf_str_by_offset(co
         return NULL;
   }
   
- -static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok)
+ +static bool __btf_name_valid(const struct btf *btf, u32 offset)
   {
         /* offset must be valid */
         const char *src = btf_str_by_offset(btf, offset);
         const char *src_limit;
   
- -      if (!__btf_name_char_ok(*src, true, dot_ok))
+ +      if (!__btf_name_char_ok(*src, true))
                 return false;
   
         /* set a limit on identifier length */
         src_limit = src + KSYM_NAME_LEN;
         src++;
         while (*src && src < src_limit) {
- -              if (!__btf_name_char_ok(*src, false, dot_ok))
+ +              if (!__btf_name_char_ok(*src, false))
                         return false;
                 src++;
         }
@@@ -794,14 -796,17 +795,14 @@@
         return !*src;
   }
   
- -/* Only C-style identifier is permitted. This can be relaxed if
- - * necessary.
- - */
   static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
   {
- -      return __btf_name_valid(btf, offset, false);
+ +      return __btf_name_valid(btf, offset);
   }
   
   static bool btf_name_valid_section(const struct btf *btf, u32 offset)
   {
- -      return __btf_name_valid(btf, offset, true);
+ +      return __btf_name_valid(btf, offset);
   }
   
   static const char *__btf_name_by_offset(const struct btf *btf, u32 offset)
@@@ -4425,7 -4430,7 +4426,7 @@@ static s32 btf_var_check_meta(struct bt
         }
   
         if (!t->name_off ||
- -          !__btf_name_valid(env->btf, t->name_off, true)) {
+ +          !__btf_name_valid(env->btf, t->name_off)) {
                 btf_verifier_log_type(env, t, "Invalid name");
                 return -EINVAL;
         }
diff --combined kernel/bpf/syscall.c

index 4497b193dd200932d8f2eb2071473b05398b03d1,a75c54b6f8a33ace98b4d4079441f04e6873aa93..a2aef900519c23db385c44db0ac596fc85ac64a7
--- 1/kernel/bpf/syscall.c
--- 2/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@@ -109,37 -109,6 +109,6 @@@ const struct bpf_map_ops bpf_map_offloa
         .map_mem_usage = bpf_map_offload_map_mem_usage,
   };
   
- static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
- {
-       const struct bpf_map_ops *ops;
-       u32 type = attr->map_type;
-       struct bpf_map *map;
-       int err;
- 
-       if (type >= ARRAY_SIZE(bpf_map_types))
-               return ERR_PTR(-EINVAL);
-       type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types));
-       ops = bpf_map_types[type];
-       if (!ops)
-               return ERR_PTR(-EINVAL);
- 
-       if (ops->map_alloc_check) {
-               err = ops->map_alloc_check(attr);
-               if (err)
-                       return ERR_PTR(err);
-       }
-       if (attr->map_ifindex)
-               ops = &bpf_map_offload_ops;
-       if (!ops->map_mem_usage)
-               return ERR_PTR(-EINVAL);
-       map = ops->map_alloc(attr);
-       if (IS_ERR(map))
-               return map;
-       map->ops = ops;
-       map->map_type = type;
-       return map;
- }
- 
   static void bpf_map_write_active_inc(struct bpf_map *map)
   {
         atomic64_inc(&map->writecnt);
@@@ -1127,7 -1096,9 +1096,9 @@@ free_map_tab
   /* called via syscall */
   static int map_create(union bpf_attr *attr)
   {
+       const struct bpf_map_ops *ops;
         int numa_node = bpf_map_attr_numa_node(attr);
+       u32 map_type = attr->map_type;
         struct bpf_map *map;
         int f_flags;
         int err;
@@@ -1158,9 -1129,85 +1129,85 @@@
                 return -EINVAL;
   
         /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
-       map = find_and_alloc_map(attr);
+       map_type = attr->map_type;
+       if (map_type >= ARRAY_SIZE(bpf_map_types))
+               return -EINVAL;
+       map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types));
+       ops = bpf_map_types[map_type];
+       if (!ops)
+               return -EINVAL;
+ 
+       if (ops->map_alloc_check) {
+               err = ops->map_alloc_check(attr);
+               if (err)
+                       return err;
+       }
+       if (attr->map_ifindex)
+               ops = &bpf_map_offload_ops;
+       if (!ops->map_mem_usage)
+               return -EINVAL;
+ 
+       /* Intent here is for unprivileged_bpf_disabled to block BPF map
+        * creation for unprivileged users; other actions depend
+        * on fd availability and access to bpffs, so are dependent on
+        * object creation success. Even with unprivileged BPF disabled,
+        * capability checks are still carried out.
+        */
+       if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
+               return -EPERM;
+ 
+       /* check privileged map type permissions */
+       switch (map_type) {
+       case BPF_MAP_TYPE_ARRAY:
+       case BPF_MAP_TYPE_PERCPU_ARRAY:
+       case BPF_MAP_TYPE_PROG_ARRAY:
+       case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+       case BPF_MAP_TYPE_CGROUP_ARRAY:
+       case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+       case BPF_MAP_TYPE_HASH:
+       case BPF_MAP_TYPE_PERCPU_HASH:
+       case BPF_MAP_TYPE_HASH_OF_MAPS:
+       case BPF_MAP_TYPE_RINGBUF:
+       case BPF_MAP_TYPE_USER_RINGBUF:
+       case BPF_MAP_TYPE_CGROUP_STORAGE:
+       case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
+               /* unprivileged */
+               break;
+       case BPF_MAP_TYPE_SK_STORAGE:
+       case BPF_MAP_TYPE_INODE_STORAGE:
+       case BPF_MAP_TYPE_TASK_STORAGE:
+       case BPF_MAP_TYPE_CGRP_STORAGE:
+       case BPF_MAP_TYPE_BLOOM_FILTER:
+       case BPF_MAP_TYPE_LPM_TRIE:
+       case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
+       case BPF_MAP_TYPE_STACK_TRACE:
+       case BPF_MAP_TYPE_QUEUE:
+       case BPF_MAP_TYPE_STACK:
+       case BPF_MAP_TYPE_LRU_HASH:
+       case BPF_MAP_TYPE_LRU_PERCPU_HASH:
+       case BPF_MAP_TYPE_STRUCT_OPS:
+       case BPF_MAP_TYPE_CPUMAP:
+               if (!bpf_capable())
+                       return -EPERM;
+               break;
+       case BPF_MAP_TYPE_SOCKMAP:
+       case BPF_MAP_TYPE_SOCKHASH:
+       case BPF_MAP_TYPE_DEVMAP:
+       case BPF_MAP_TYPE_DEVMAP_HASH:
+       case BPF_MAP_TYPE_XSKMAP:
+               if (!capable(CAP_NET_ADMIN))
+                       return -EPERM;
+               break;
+       default:
+               WARN(1, "unsupported map type %d", map_type);
+               return -EPERM;
+       }
+ 
+       map = ops->map_alloc(attr);
         if (IS_ERR(map))
                 return PTR_ERR(map);
+       map->ops = ops;
+       map->map_type = map_type;
   
         err = bpf_obj_name_cpy(map->name, attr->map_name,
                                sizeof(attr->map_name));
@@@ -2434,10 -2481,6 +2481,10 @@@ bpf_prog_load_check_attach(enum bpf_pro
                 default:
                         return -EINVAL;
                 }
+ +      case BPF_PROG_TYPE_NETFILTER:
+ +              if (expected_attach_type == BPF_NETFILTER)
+ +                      return 0;
+ +              return -EINVAL;
         case BPF_PROG_TYPE_SYSCALL:
         case BPF_PROG_TYPE_EXT:
                 if (expected_attach_type)
@@@ -2507,7 -2550,6 +2554,6 @@@ static int bpf_prog_load(union bpf_att
         struct btf *attach_btf = NULL;
         int err;
         char license[128];
-       bool is_gpl;
   
         if (CHECK_ATTR(BPF_PROG_LOAD))
                 return -EINVAL;
@@@ -2526,15 -2568,15 +2572,15 @@@
             !bpf_capable())
                 return -EPERM;
   
-       /* copy eBPF program license from user space */
-       if (strncpy_from_bpfptr(license,
-                               make_bpfptr(attr->license, uattr.is_kernel),
-                               sizeof(license) - 1) < 0)
-               return -EFAULT;
-       license[sizeof(license) - 1] = 0;
- 
-       /* eBPF programs must be GPL compatible to use GPL-ed functions */
-       is_gpl = license_is_gpl_compatible(license);
+       /* Intent here is for unprivileged_bpf_disabled to block BPF program
+        * creation for unprivileged users; other actions depend
+        * on fd availability and access to bpffs, so are dependent on
+        * object creation success. Even with unprivileged BPF disabled,
+        * capability checks are still carried out for these
+        * and other operations.
+        */
+       if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
+               return -EPERM;
   
         if (attr->insn_cnt == 0 ||
             attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
@@@ -2618,12 -2660,20 +2664,20 @@@
                              make_bpfptr(attr->insns, uattr.is_kernel),
                              bpf_prog_insn_size(prog)) != 0)
                 goto free_prog_sec;
+       /* copy eBPF program license from user space */
+       if (strncpy_from_bpfptr(license,
+                               make_bpfptr(attr->license, uattr.is_kernel),
+                               sizeof(license) - 1) < 0)
+               goto free_prog_sec;
+       license[sizeof(license) - 1] = 0;
+ 
+       /* eBPF programs must be GPL compatible to use GPL-ed functions */
+       prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0;
   
         prog->orig_prog = NULL;
         prog->jited = 0;
   
         atomic64_set(&prog->aux->refcnt, 1);
-       prog->gpl_compatible = is_gpl ? 1 : 0;
   
         if (bpf_prog_is_dev_bound(prog->aux)) {
                 err = bpf_prog_dev_bound_init(prog, attr);
@@@ -2797,28 -2847,31 +2851,31 @@@ static void bpf_link_put_deferred(struc
         bpf_link_free(link);
   }
   
- /* bpf_link_put can be called from atomic context, but ensures that resources
-  * are freed from process context
+ /* bpf_link_put might be called from atomic context. It needs to be called
+  * from sleepable context in order to acquire sleeping locks during the process.
    */
   void bpf_link_put(struct bpf_link *link)
   {
         if (!atomic64_dec_and_test(&link->refcnt))
                 return;
   
-       if (in_atomic()) {
-               INIT_WORK(&link->work, bpf_link_put_deferred);
-               schedule_work(&link->work);
-       } else {
-               bpf_link_free(link);
-       }
+       INIT_WORK(&link->work, bpf_link_put_deferred);
+       schedule_work(&link->work);
   }
   EXPORT_SYMBOL(bpf_link_put);
   
+ static void bpf_link_put_direct(struct bpf_link *link)
+ {
+       if (!atomic64_dec_and_test(&link->refcnt))
+               return;
+       bpf_link_free(link);
+ }
+ 
   static int bpf_link_release(struct inode *inode, struct file *filp)
   {
         struct bpf_link *link = filp->private_data;
   
-       bpf_link_put(link);
+       bpf_link_put_direct(link);
         return 0;
   }
   
@@@ -3463,11 -3516,6 +3520,11 @@@ static int bpf_prog_attach_check_attach
                 return prog->enforce_expected_attach_type &&
                         prog->expected_attach_type != attach_type ?
                         -EINVAL : 0;
+ +      case BPF_PROG_TYPE_KPROBE:
+ +              if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI &&
+ +                  attach_type != BPF_TRACE_KPROBE_MULTI)
+ +                      return -EINVAL;
+ +              return 0;
         default:
                 return 0;
         }
@@@ -4622,12 -4670,7 +4679,12 @@@ static int link_create(union bpf_attr *
   
         switch (prog->type) {
         case BPF_PROG_TYPE_EXT:
+ +              break;
         case BPF_PROG_TYPE_NETFILTER:
+ +              if (attr->link_create.attach_type != BPF_NETFILTER) {
+ +                      ret = -EINVAL;
+ +                      goto out;
+ +              }
                 break;
         case BPF_PROG_TYPE_PERF_EVENT:
         case BPF_PROG_TYPE_TRACEPOINT:
@@@ -4801,7 -4844,7 +4858,7 @@@ out_put_progs
         if (ret)
                 bpf_prog_put(new_prog);
   out_put_link:
-       bpf_link_put(link);
+       bpf_link_put_direct(link);
         return ret;
   }
   
@@@ -4824,7 -4867,7 +4881,7 @@@ static int link_detach(union bpf_attr *
         else
                 ret = -EOPNOTSUPP;
   
-       bpf_link_put(link);
+       bpf_link_put_direct(link);
         return ret;
   }
   
@@@ -4894,7 -4937,7 +4951,7 @@@ static int bpf_link_get_fd_by_id(const 
   
         fd = bpf_link_new_fd(link);
         if (fd < 0)
-               bpf_link_put(link);
+               bpf_link_put_direct(link);
   
         return fd;
   }
@@@ -4971,7 -5014,7 +5028,7 @@@ static int bpf_iter_create(union bpf_at
                 return PTR_ERR(link);
   
         err = bpf_iter_new_fd(link);
-       bpf_link_put(link);
+       bpf_link_put_direct(link);
   
         return err;
   }
@@@ -5041,23 -5084,8 +5098,8 @@@ out_prog_put
   static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
   {
         union bpf_attr attr;
-       bool capable;
         int err;
   
-       capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled;
- 
-       /* Intent here is for unprivileged_bpf_disabled to block key object
-        * creation commands for unprivileged users; other actions depend
-        * of fd availability and access to bpffs, so are dependent on
-        * object creation success.  Capabilities are later verified for
-        * operations such as load and map create, so even with unprivileged
-        * BPF disabled, capability checks are still carried out for these
-        * and other operations.
-        */
-       if (!capable &&
-           (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD))
-               return -EPERM;
- 
         err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
         if (err)
                 return err;
diff --combined kernel/bpf/verifier.c

index b54193de762ba6167619326ff2f2b52f699790f4,fa43dc8e85b99cfc13afc55291233f3f48074022..11e54dd8b6ddcc2afc9d54824e0832c364e557e0
--- 1/kernel/bpf/verifier.c
--- 2/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@@ -197,6 -197,7 +197,7 @@@ static int ref_set_non_owning(struct bp
                               struct bpf_reg_state *reg);
   static void specialize_kfunc(struct bpf_verifier_env *env,
                              u32 func_id, u16 offset, unsigned long *addr);
+ static bool is_trusted_reg(const struct bpf_reg_state *reg);
   
   static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
   {
@@@ -298,16 -299,19 +299,19 @@@ struct bpf_kfunc_call_arg_meta 
                 bool found;
         } arg_constant;
   
-       /* arg_btf and arg_btf_id are used by kfunc-specific handling,
+       /* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
          * generally to pass info about user-defined local kptr types to later
          * verification logic
          *   bpf_obj_drop
          *     Record the local kptr type to be drop'd
          *   bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
-        *     Record the local kptr type to be refcount_incr'd
+        *     Record the local kptr type to be refcount_incr'd and use
+        *     arg_owning_ref to determine whether refcount_acquire should be
+        *     fallible
          */
         struct btf *arg_btf;
         u32 arg_btf_id;
+       bool arg_owning_ref;
   
         struct {
                 struct btf_field *field;
@@@ -439,8 -443,11 +443,11 @@@ static bool type_may_be_null(u32 type
         return type & PTR_MAYBE_NULL;
   }
   
- static bool reg_type_not_null(enum bpf_reg_type type)
+ static bool reg_not_null(const struct bpf_reg_state *reg)
   {
+       enum bpf_reg_type type;
+ 
+       type = reg->type;
         if (type_may_be_null(type))
                 return false;
   
@@@ -450,6 -457,7 +457,7 @@@
                 type == PTR_TO_MAP_VALUE ||
                 type == PTR_TO_MAP_KEY ||
                 type == PTR_TO_SOCK_COMMON ||
+               (type == PTR_TO_BTF_ID && is_trusted_reg(reg)) ||
                 type == PTR_TO_MEM;
   }
   
@@@ -3771,6 -3779,96 +3779,96 @@@ static void mark_all_scalars_imprecise(
         }
   }
   
+ static bool idset_contains(struct bpf_idset *s, u32 id)
+ {
+       u32 i;
+ 
+       for (i = 0; i < s->count; ++i)
+               if (s->ids[i] == id)
+                       return true;
+ 
+       return false;
+ }
+ 
+ static int idset_push(struct bpf_idset *s, u32 id)
+ {
+       if (WARN_ON_ONCE(s->count >= ARRAY_SIZE(s->ids)))
+               return -EFAULT;
+       s->ids[s->count++] = id;
+       return 0;
+ }
+ 
+ static void idset_reset(struct bpf_idset *s)
+ {
+       s->count = 0;
+ }
+ 
+ /* Collect a set of IDs for all registers currently marked as precise in env->bt.
+  * Mark all registers with these IDs as precise.
+  */
+ static int mark_precise_scalar_ids(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+ {
+       struct bpf_idset *precise_ids = &env->idset_scratch;
+       struct backtrack_state *bt = &env->bt;
+       struct bpf_func_state *func;
+       struct bpf_reg_state *reg;
+       DECLARE_BITMAP(mask, 64);
+       int i, fr;
+ 
+       idset_reset(precise_ids);
+ 
+       for (fr = bt->frame; fr >= 0; fr--) {
+               func = st->frame[fr];
+ 
+               bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
+               for_each_set_bit(i, mask, 32) {
+                       reg = &func->regs[i];
+                       if (!reg->id || reg->type != SCALAR_VALUE)
+                               continue;
+                       if (idset_push(precise_ids, reg->id))
+                               return -EFAULT;
+               }
+ 
+               bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
+               for_each_set_bit(i, mask, 64) {
+                       if (i >= func->allocated_stack / BPF_REG_SIZE)
+                               break;
+                       if (!is_spilled_scalar_reg(&func->stack[i]))
+                               continue;
+                       reg = &func->stack[i].spilled_ptr;
+                       if (!reg->id)
+                               continue;
+                       if (idset_push(precise_ids, reg->id))
+                               return -EFAULT;
+               }
+       }
+ 
+       for (fr = 0; fr <= st->curframe; ++fr) {
+               func = st->frame[fr];
+ 
+               for (i = BPF_REG_0; i < BPF_REG_10; ++i) {
+                       reg = &func->regs[i];
+                       if (!reg->id)
+                               continue;
+                       if (!idset_contains(precise_ids, reg->id))
+                               continue;
+                       bt_set_frame_reg(bt, fr, i);
+               }
+               for (i = 0; i < func->allocated_stack / BPF_REG_SIZE; ++i) {
+                       if (!is_spilled_scalar_reg(&func->stack[i]))
+                               continue;
+                       reg = &func->stack[i].spilled_ptr;
+                       if (!reg->id)
+                               continue;
+                       if (!idset_contains(precise_ids, reg->id))
+                               continue;
+                       bt_set_frame_slot(bt, fr, i);
+               }
+       }
+ 
+       return 0;
+ }
+ 
   /*
    * __mark_chain_precision() backtracks BPF program instruction sequence and
    * chain of verifier states making sure that register *regno* (if regno >= 0)
@@@ -3902,6 -4000,31 +4000,31 @@@ static int __mark_chain_precision(struc
                                 bt->frame, last_idx, first_idx, subseq_idx);
                 }
   
+               /* If some register with scalar ID is marked as precise,
+                * make sure that all registers sharing this ID are also precise.
+                * This is needed to estimate effect of find_equal_scalars().
+                * Do this at the last instruction of each state,
+                * bpf_reg_state::id fields are valid for these instructions.
+                *
+                * Allows to track precision in situation like below:
+                *
+                *     r2 = unknown value
+                *     ...
+                *   --- state #0 ---
+                *     ...
+                *     r1 = r2                 // r1 and r2 now share the same ID
+                *     ...
+                *   --- state #1 {r1.id = A, r2.id = A} ---
+                *     ...
+                *     if (r2 > 10) goto exit; // find_equal_scalars() assigns range to r1
+                *     ...
+                *   --- state #2 {r1.id = A, r2.id = A} ---
+                *     r3 = r10
+                *     r3 += r1                // need to mark both r1 and r2
+                */
+               if (mark_precise_scalar_ids(env, st))
+                       return -EFAULT;
+ 
                 if (last_idx < 0) {
                         /* we are at the entry into subprog, which
                          * is expected for global funcs, but only if
@@@ -4222,9 -4345,6 +4345,9 @@@ static int check_stack_write_fixed_off(
                                 return err;
                 }
                 save_register_state(state, spi, reg, size);
+ +              /* Break the relation on a narrowing spill. */
+ +              if (fls64(reg->umax_value) > BITS_PER_BYTE * size)
+ +                      state->stack[spi].spilled_ptr.id = 0;
         } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
                    insn->imm != 0 && env->bpf_capable) {
                 struct bpf_reg_state fake_reg = {};
@@@ -5894,7 -6014,7 +6017,7 @@@ static int check_ptr_to_btf_access(stru
                  * program allocated objects (which always have ref_obj_id > 0),
                  * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
                  */
-               if (atype != BPF_READ && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+               if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) {
                         verbose(env, "only read is supported\n");
                         return -EACCES;
                 }
@@@ -7514,7 -7634,7 +7637,7 @@@ static int check_reg_type(struct bpf_ve
         if (base_type(arg_type) == ARG_PTR_TO_MEM)
                 type &= ~DYNPTR_TYPE_FLAG_MASK;
   
-       if (meta->func_id == BPF_FUNC_kptr_xchg && type & MEM_ALLOC)
+       if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type))
                 type &= ~MEM_ALLOC;
   
         for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
@@@ -9681,11 -9801,6 +9804,6 @@@ static bool is_kfunc_acquire(struct bpf
         return meta->kfunc_flags & KF_ACQUIRE;
   }
   
- static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
- {
-       return meta->kfunc_flags & KF_RET_NULL;
- }
- 
   static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
   {
         return meta->kfunc_flags & KF_RELEASE;
@@@ -10001,6 -10116,16 +10119,16 @@@ BTF_ID(func, bpf_dynptr_slice
   BTF_ID(func, bpf_dynptr_slice_rdwr)
   BTF_ID(func, bpf_dynptr_clone)
   
+ static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
+ {
+       if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
+           meta->arg_owning_ref) {
+               return false;
+       }
+ 
+       return meta->kfunc_flags & KF_RET_NULL;
+ }
+ 
   static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
   {
         return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
@@@ -10478,6 -10603,8 +10606,8 @@@ __process_kf_arg_ptr_to_graph_node(stru
                         node_off, btf_name_by_offset(reg->btf, t->name_off));
                 return -EINVAL;
         }
+       meta->arg_btf = reg->btf;
+       meta->arg_btf_id = reg->btf_id;
   
         if (node_off != field->graph_root.node_offset) {
                 verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
@@@ -10881,10 -11008,12 +11011,12 @@@ static int check_kfunc_args(struct bpf_
                         meta->subprogno = reg->subprogno;
                         break;
                 case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
-                       if (!type_is_ptr_alloc_obj(reg->type) && !type_is_non_owning_ref(reg->type)) {
+                       if (!type_is_ptr_alloc_obj(reg->type)) {
                                 verbose(env, "arg#%d is neither owning or non-owning ref\n", i);
                                 return -EINVAL;
                         }
+                       if (!type_is_non_owning_ref(reg->type))
+                               meta->arg_owning_ref = true;
   
                         rec = reg_btf_record(reg);
                         if (!rec) {
@@@ -11047,6 -11176,7 +11179,7 @@@ static int check_kfunc_call(struct bpf_
             meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
                 release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
                 insn_aux->insert_off = regs[BPF_REG_2].off;
+               insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
                 err = ref_convert_owning_non_owning(env, release_ref_obj_id);
                 if (err) {
                         verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
@@@ -12804,12 -12934,14 +12937,14 @@@ static int check_alu_op(struct bpf_veri
                 if (BPF_SRC(insn->code) == BPF_X) {
                         struct bpf_reg_state *src_reg = regs + insn->src_reg;
                         struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
+                       bool need_id = src_reg->type == SCALAR_VALUE && !src_reg->id &&
+                                      !tnum_is_const(src_reg->var_off);
   
                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
                                 /* case: R1 = R2
                                  * copy register state to dest reg
                                  */
-                               if (src_reg->type == SCALAR_VALUE && !src_reg->id)
+                               if (need_id)
                                         /* Assign src and dst registers the same ID
                                          * that will be used by find_equal_scalars()
                                          * to propagate min/max range.
@@@ -12828,7 -12960,7 +12963,7 @@@
                                 } else if (src_reg->type == SCALAR_VALUE) {
                                         bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
   
-                                       if (is_src_reg_u32 && !src_reg->id)
+                                       if (is_src_reg_u32 && need_id)
                                                 src_reg->id = ++env->id_gen;
                                         copy_register_state(dst_reg, src_reg);
                                         /* Make sure ID is cleared if src_reg is not in u32 range otherwise
@@@ -13160,7 -13292,7 +13295,7 @@@ static int is_branch_taken(struct bpf_r
                            bool is_jmp32)
   {
         if (__is_pointer_value(false, reg)) {
-               if (!reg_type_not_null(reg->type))
+               if (!reg_not_null(reg))
                         return -1;
   
                 /* If pointer is valid tests against zero will fail so we can
@@@ -14984,8 -15116,9 +15119,9 @@@ static bool range_within(struct bpf_reg
    * So we look through our idmap to see if this old id has been seen before.  If
    * so, we require the new id to match; otherwise, we add the id pair to the map.
    */
- static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
+ static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
   {
+       struct bpf_id_pair *map = idmap->map;
         unsigned int i;
   
         /* either both IDs should be set or both should be zero */
@@@ -14996,20 -15129,34 +15132,34 @@@
                 return true;
   
         for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
-               if (!idmap[i].old) {
+               if (!map[i].old) {
                         /* Reached an empty slot; haven't seen this id before */
-                       idmap[i].old = old_id;
-                       idmap[i].cur = cur_id;
+                       map[i].old = old_id;
+                       map[i].cur = cur_id;
                         return true;
                 }
-               if (idmap[i].old == old_id)
-                       return idmap[i].cur == cur_id;
+               if (map[i].old == old_id)
+                       return map[i].cur == cur_id;
+               if (map[i].cur == cur_id)
+                       return false;
         }
         /* We ran out of idmap slots, which should be impossible */
         WARN_ON_ONCE(1);
         return false;
   }
   
+ /* Similar to check_ids(), but allocate a unique temporary ID
+  * for 'old_id' or 'cur_id' of zero.
+  * This makes pairs like '0 vs unique ID', 'unique ID vs 0' valid.
+  */
+ static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
+ {
+       old_id = old_id ? old_id : ++idmap->tmp_id_gen;
+       cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
+ 
+       return check_ids(old_id, cur_id, idmap);
+ }
+ 
   static void clean_func_state(struct bpf_verifier_env *env,
                              struct bpf_func_state *st)
   {
@@@ -15108,7 -15255,7 +15258,7 @@@ next
   
   static bool regs_exact(const struct bpf_reg_state *rold,
                        const struct bpf_reg_state *rcur,
-                      struct bpf_id_pair *idmap)
+                      struct bpf_idmap *idmap)
   {
         return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
                check_ids(rold->id, rcur->id, idmap) &&
@@@ -15117,7 -15264,7 +15267,7 @@@
   
   /* Returns true if (rold safe implies rcur safe) */
   static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
-                   struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
+                   struct bpf_reg_state *rcur, struct bpf_idmap *idmap)
   {
         if (!(rold->live & REG_LIVE_READ))
                 /* explored state didn't use this */
@@@ -15154,15 -15301,42 +15304,42 @@@
   
         switch (base_type(rold->type)) {
         case SCALAR_VALUE:
-               if (regs_exact(rold, rcur, idmap))
-                       return true;
-               if (env->explore_alu_limits)
-                       return false;
+               if (env->explore_alu_limits) {
+                       /* explore_alu_limits disables tnum_in() and range_within()
+                        * logic and requires everything to be strict
+                        */
+                       return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
+                              check_scalar_ids(rold->id, rcur->id, idmap);
+               }
                 if (!rold->precise)
                         return true;
-               /* new val must satisfy old val knowledge */
+               /* Why check_ids() for scalar registers?
+                *
+                * Consider the following BPF code:
+                *   1: r6 = ... unbound scalar, ID=a ...
+                *   2: r7 = ... unbound scalar, ID=b ...
+                *   3: if (r6 > r7) goto +1
+                *   4: r6 = r7
+                *   5: if (r6 > X) goto ...
+                *   6: ... memory operation using r7 ...
+                *
+                * First verification path is [1-6]:
+                * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
+                * - at (5) r6 would be marked <= X, find_equal_scalars() would also mark
+                *   r7 <= X, because r6 and r7 share same id.
+                * Next verification path is [1-4, 6].
+                *
+                * Instruction (6) would be reached in two states:
+                *   I.  r6{.id=b}, r7{.id=b} via path 1-6;
+                *   II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
+                *
+                * Use check_ids() to distinguish these states.
+                * ---
+                * Also verify that new value satisfies old value range knowledge.
+                */
                 return range_within(rold, rcur) &&
-                      tnum_in(rold->var_off, rcur->var_off);
+                      tnum_in(rold->var_off, rcur->var_off) &&
+                      check_scalar_ids(rold->id, rcur->id, idmap);
         case PTR_TO_MAP_KEY:
         case PTR_TO_MAP_VALUE:
         case PTR_TO_MEM:
@@@ -15208,7 -15382,7 +15385,7 @@@
   }
   
   static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
-                     struct bpf_func_state *cur, struct bpf_id_pair *idmap)
+                     struct bpf_func_state *cur, struct bpf_idmap *idmap)
   {
         int i, spi;
   
@@@ -15311,7 -15485,7 +15488,7 @@@
   }
   
   static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur,
-                   struct bpf_id_pair *idmap)
+                   struct bpf_idmap *idmap)
   {
         int i;
   
@@@ -15359,13 -15533,13 +15536,13 @@@ static bool func_states_equal(struct bp
   
         for (i = 0; i < MAX_BPF_REG; i++)
                 if (!regsafe(env, &old->regs[i], &cur->regs[i],
-                            env->idmap_scratch))
+                            &env->idmap_scratch))
                         return false;
   
-       if (!stacksafe(env, old, cur, env->idmap_scratch))
+       if (!stacksafe(env, old, cur, &env->idmap_scratch))
                 return false;
   
-       if (!refsafe(old, cur, env->idmap_scratch))
+       if (!refsafe(old, cur, &env->idmap_scratch))
                 return false;
   
         return true;
@@@ -15380,7 -15554,8 +15557,8 @@@ static bool states_equal(struct bpf_ver
         if (old->curframe != cur->curframe)
                 return false;
   
-       memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
+       env->idmap_scratch.tmp_id_gen = env->id_gen;
+       memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map));
   
         /* Verification state from speculative execution simulation
          * must never prune a non-speculative execution one.
@@@ -15398,7 -15573,7 +15576,7 @@@
                 return false;
   
         if (old->active_lock.id &&
-           !check_ids(old->active_lock.id, cur->active_lock.id, env->idmap_scratch))
+           !check_ids(old->active_lock.id, cur->active_lock.id, &env->idmap_scratch))
                 return false;
   
         if (old->active_rcu_lock != cur->active_rcu_lock)
@@@ -17616,10 -17791,9 +17794,10 @@@ static int jit_subprogs(struct bpf_veri
         }
   
         /* finally lock prog and jit images for all functions and
- -       * populate kallsysm
+ +       * populate kallsysm. Begin at the first subprogram, since
+ +       * bpf_prog_load will add the kallsyms for the main program.
          */
- -      for (i = 0; i < env->subprog_cnt; i++) {
+ +      for (i = 1; i < env->subprog_cnt; i++) {
                 bpf_prog_lock_ro(func[i]);
                 bpf_prog_kallsyms_add(func[i]);
         }
@@@ -17645,8 -17819,6 +17823,8 @@@
         prog->jited = 1;
         prog->bpf_func = func[0]->bpf_func;
         prog->jited_len = func[0]->jited_len;
+ +      prog->aux->extable = func[0]->aux->extable;
+ +      prog->aux->num_exentries = func[0]->aux->num_exentries;
         prog->aux->func = func;
         prog->aux->func_cnt = env->subprog_cnt;
         bpf_prog_jit_attempt_done(prog);
diff --combined tools/include/uapi/linux/bpf.h

index 6961a7b700281037cd6fbb8e05a63c21978ff5c5,a7b5e91dd768e7d5b716272488295f828aa9aa1c..60a9d59beeabba9bdbaa94946aa0c28e3f435463
--- 1/tools/include/uapi/linux/bpf.h
--- 2/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@@ -1035,7 -1035,6 +1035,7 @@@ enum bpf_attach_type 
         BPF_TRACE_KPROBE_MULTI,
         BPF_LSM_CGROUP,
         BPF_STRUCT_OPS,
+ +      BPF_NETFILTER,
         __MAX_BPF_ATTACH_TYPE
   };
   
@@@ -3178,6 -3177,10 +3178,10 @@@ union bpf_attr 
    *            **BPF_FIB_LOOKUP_DIRECT**
    *                    Do a direct table lookup vs full lookup using FIB
    *                    rules.
+  *            **BPF_FIB_LOOKUP_TBID**
+  *                    Used with BPF_FIB_LOOKUP_DIRECT.
+  *                    Use the routing table ID present in *params*->tbid
+  *                    for the fib lookup.
    *            **BPF_FIB_LOOKUP_OUTPUT**
    *                    Perform lookup from an egress perspective (default is
    *                    ingress).
@@@ -6832,6 -6835,7 +6836,7 @@@ enum 
         BPF_FIB_LOOKUP_DIRECT  = (1U << 0),
         BPF_FIB_LOOKUP_OUTPUT  = (1U << 1),
         BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
+       BPF_FIB_LOOKUP_TBID    = (1U << 3),
   };
   
   enum {
@@@ -6892,9 -6896,19 +6897,19 @@@ struct bpf_fib_lookup 
                 __u32           ipv6_dst[4];  /* in6_addr; network order */
         };
   
-       /* output */
-       __be16  h_vlan_proto;
-       __be16  h_vlan_TCI;
+       union {
+               struct {
+                       /* output */
+                       __be16  h_vlan_proto;
+                       __be16  h_vlan_TCI;
+               };
+               /* input: when accompanied with the
+                * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a
+                * specific routing table to use for the fib lookup.
+                */
+               __u32   tbid;
+       };
+ 
         __u8    smac[6];     /* ETH_ALEN */
         __u8    dmac[6];     /* ETH_ALEN */
   };
author	Jakub Kicinski <[email protected]>
	Sat, 24 Jun 2023 21:52:28 +0000 (14:52 -0700)
committer	Jakub Kicinski <[email protected]>
	Sat, 24 Jun 2023 21:52:28 +0000 (14:52 -0700)
		1	2
include/linux/netdevice.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/uapi/linux/bpf.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/bpf/btf.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/bpf/syscall.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/bpf/verifier.c	patch \|	diff1 \|	diff2 \|	blob \| history
tools/include/uapi/linux/bpf.h	patch \|	diff1 \|	diff2 \|	blob \| history