1 // SPDX-License-Identifier: GPL-2.0-only
3 #include <linux/if_bridge.h>
5 #include <linux/list.h>
6 #include <linux/netdevice.h>
7 #include <linux/netlink.h>
8 #include <linux/rhashtable.h>
9 #include <linux/rhashtable-types.h>
10 #include <linux/rtnetlink.h>
11 #include <linux/skbuff.h>
12 #include <linux/types.h>
13 #include <net/netlink.h>
14 #include <net/vxlan.h>
16 #include "vxlan_private.h"
18 struct vxlan_mdb_entry_key {
24 struct vxlan_mdb_entry {
25 struct rhash_head rhnode;
26 struct list_head remotes;
27 struct vxlan_mdb_entry_key key;
28 struct hlist_node mdb_node;
32 #define VXLAN_MDB_REMOTE_F_BLOCKED BIT(0)
34 struct vxlan_mdb_remote {
35 struct list_head list;
36 struct vxlan_rdst __rcu *rd;
40 struct hlist_head src_list;
44 #define VXLAN_SGRP_F_DELETE BIT(0)
46 struct vxlan_mdb_src_entry {
47 struct hlist_node node;
48 union vxlan_addr addr;
52 struct vxlan_mdb_dump_ctx {
58 struct vxlan_mdb_config_src_entry {
59 union vxlan_addr addr;
60 struct list_head node;
63 struct vxlan_mdb_config {
64 struct vxlan_dev *vxlan;
65 struct vxlan_mdb_entry_key group;
66 struct list_head src_list;
67 union vxlan_addr remote_ip;
77 static const struct rhashtable_params vxlan_mdb_rht_params = {
78 .head_offset = offsetof(struct vxlan_mdb_entry, rhnode),
79 .key_offset = offsetof(struct vxlan_mdb_entry, key),
80 .key_len = sizeof(struct vxlan_mdb_entry_key),
81 .automatic_shrinking = true,
84 static int __vxlan_mdb_add(const struct vxlan_mdb_config *cfg,
85 struct netlink_ext_ack *extack);
86 static int __vxlan_mdb_del(const struct vxlan_mdb_config *cfg,
87 struct netlink_ext_ack *extack);
89 static void vxlan_br_mdb_entry_fill(const struct vxlan_dev *vxlan,
90 const struct vxlan_mdb_entry *mdb_entry,
91 const struct vxlan_mdb_remote *remote,
92 struct br_mdb_entry *e)
94 const union vxlan_addr *dst = &mdb_entry->key.dst;
96 memset(e, 0, sizeof(*e));
97 e->ifindex = vxlan->dev->ifindex;
98 e->state = MDB_PERMANENT;
100 if (remote->flags & VXLAN_MDB_REMOTE_F_BLOCKED)
101 e->flags |= MDB_FLAGS_BLOCKED;
103 switch (dst->sa.sa_family) {
105 e->addr.u.ip4 = dst->sin.sin_addr.s_addr;
106 e->addr.proto = htons(ETH_P_IP);
108 #if IS_ENABLED(CONFIG_IPV6)
110 e->addr.u.ip6 = dst->sin6.sin6_addr;
111 e->addr.proto = htons(ETH_P_IPV6);
117 static int vxlan_mdb_entry_info_fill_srcs(struct sk_buff *skb,
118 const struct vxlan_mdb_remote *remote)
120 struct vxlan_mdb_src_entry *ent;
123 if (hlist_empty(&remote->src_list))
126 nest = nla_nest_start(skb, MDBA_MDB_EATTR_SRC_LIST);
130 hlist_for_each_entry(ent, &remote->src_list, node) {
131 struct nlattr *nest_ent;
133 nest_ent = nla_nest_start(skb, MDBA_MDB_SRCLIST_ENTRY);
137 if (vxlan_nla_put_addr(skb, MDBA_MDB_SRCATTR_ADDRESS,
139 nla_put_u32(skb, MDBA_MDB_SRCATTR_TIMER, 0))
142 nla_nest_end(skb, nest_ent);
145 nla_nest_end(skb, nest);
150 nla_nest_cancel(skb, nest);
154 static int vxlan_mdb_entry_info_fill(const struct vxlan_dev *vxlan,
156 const struct vxlan_mdb_entry *mdb_entry,
157 const struct vxlan_mdb_remote *remote)
159 struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
160 struct br_mdb_entry e;
163 nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY_INFO);
167 vxlan_br_mdb_entry_fill(vxlan, mdb_entry, remote, &e);
169 if (nla_put_nohdr(skb, sizeof(e), &e) ||
170 nla_put_u32(skb, MDBA_MDB_EATTR_TIMER, 0))
173 if (!vxlan_addr_any(&mdb_entry->key.src) &&
174 vxlan_nla_put_addr(skb, MDBA_MDB_EATTR_SOURCE, &mdb_entry->key.src))
177 if (nla_put_u8(skb, MDBA_MDB_EATTR_RTPROT, remote->rt_protocol) ||
178 nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE, remote->filter_mode) ||
179 vxlan_mdb_entry_info_fill_srcs(skb, remote) ||
180 vxlan_nla_put_addr(skb, MDBA_MDB_EATTR_DST, &rd->remote_ip))
183 if (rd->remote_port && rd->remote_port != vxlan->cfg.dst_port &&
184 nla_put_u16(skb, MDBA_MDB_EATTR_DST_PORT,
185 be16_to_cpu(rd->remote_port)))
188 if (rd->remote_vni != vxlan->default_dst.remote_vni &&
189 nla_put_u32(skb, MDBA_MDB_EATTR_VNI, be32_to_cpu(rd->remote_vni)))
192 if (rd->remote_ifindex &&
193 nla_put_u32(skb, MDBA_MDB_EATTR_IFINDEX, rd->remote_ifindex))
196 if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) &&
197 mdb_entry->key.vni && nla_put_u32(skb, MDBA_MDB_EATTR_SRC_VNI,
198 be32_to_cpu(mdb_entry->key.vni)))
201 nla_nest_end(skb, nest);
206 nla_nest_cancel(skb, nest);
210 static int vxlan_mdb_entry_fill(const struct vxlan_dev *vxlan,
212 struct vxlan_mdb_dump_ctx *ctx,
213 const struct vxlan_mdb_entry *mdb_entry)
215 int remote_idx = 0, s_remote_idx = ctx->remote_idx;
216 struct vxlan_mdb_remote *remote;
220 nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY);
224 list_for_each_entry(remote, &mdb_entry->remotes, list) {
225 if (remote_idx < s_remote_idx)
228 err = vxlan_mdb_entry_info_fill(vxlan, skb, mdb_entry, remote);
235 ctx->remote_idx = err ? remote_idx : 0;
236 nla_nest_end(skb, nest);
240 static int vxlan_mdb_fill(const struct vxlan_dev *vxlan, struct sk_buff *skb,
241 struct vxlan_mdb_dump_ctx *ctx)
243 int entry_idx = 0, s_entry_idx = ctx->entry_idx;
244 struct vxlan_mdb_entry *mdb_entry;
248 nest = nla_nest_start_noflag(skb, MDBA_MDB);
252 hlist_for_each_entry(mdb_entry, &vxlan->mdb_list, mdb_node) {
253 if (entry_idx < s_entry_idx)
256 err = vxlan_mdb_entry_fill(vxlan, skb, ctx, mdb_entry);
263 ctx->entry_idx = err ? entry_idx : 0;
264 nla_nest_end(skb, nest);
268 int vxlan_mdb_dump(struct net_device *dev, struct sk_buff *skb,
269 struct netlink_callback *cb)
271 struct vxlan_mdb_dump_ctx *ctx = (void *)cb->ctx;
272 struct vxlan_dev *vxlan = netdev_priv(dev);
273 struct br_port_msg *bpm;
274 struct nlmsghdr *nlh;
279 NL_ASSERT_DUMP_CTX_FITS(struct vxlan_mdb_dump_ctx);
281 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
282 cb->nlh->nlmsg_seq, RTM_NEWMDB, sizeof(*bpm),
287 bpm = nlmsg_data(nlh);
288 memset(bpm, 0, sizeof(*bpm));
289 bpm->family = AF_BRIDGE;
290 bpm->ifindex = dev->ifindex;
292 err = vxlan_mdb_fill(vxlan, skb, ctx);
296 cb->seq = vxlan->mdb_seq;
297 nl_dump_check_consistent(cb, nlh);
302 static const struct nla_policy
303 vxlan_mdbe_src_list_entry_pol[MDBE_SRCATTR_MAX + 1] = {
304 [MDBE_SRCATTR_ADDRESS] = NLA_POLICY_RANGE(NLA_BINARY,
305 sizeof(struct in_addr),
306 sizeof(struct in6_addr)),
309 static const struct nla_policy
310 vxlan_mdbe_src_list_pol[MDBE_SRC_LIST_MAX + 1] = {
311 [MDBE_SRC_LIST_ENTRY] = NLA_POLICY_NESTED(vxlan_mdbe_src_list_entry_pol),
314 static struct netlink_range_validation vni_range = {
315 .max = VXLAN_N_VID - 1,
318 static const struct nla_policy vxlan_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = {
319 [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY,
320 sizeof(struct in_addr),
321 sizeof(struct in6_addr)),
322 [MDBE_ATTR_GROUP_MODE] = NLA_POLICY_RANGE(NLA_U8, MCAST_EXCLUDE,
324 [MDBE_ATTR_SRC_LIST] = NLA_POLICY_NESTED(vxlan_mdbe_src_list_pol),
325 [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC),
326 [MDBE_ATTR_DST] = NLA_POLICY_RANGE(NLA_BINARY,
327 sizeof(struct in_addr),
328 sizeof(struct in6_addr)),
329 [MDBE_ATTR_DST_PORT] = { .type = NLA_U16 },
330 [MDBE_ATTR_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range),
331 [MDBE_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1),
332 [MDBE_ATTR_SRC_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range),
335 static bool vxlan_mdb_is_valid_source(const struct nlattr *attr, __be16 proto,
336 struct netlink_ext_ack *extack)
339 case htons(ETH_P_IP):
340 if (nla_len(attr) != sizeof(struct in_addr)) {
341 NL_SET_ERR_MSG_MOD(extack, "IPv4 invalid source address length");
344 if (ipv4_is_multicast(nla_get_in_addr(attr))) {
345 NL_SET_ERR_MSG_MOD(extack, "IPv4 multicast source address is not allowed");
349 #if IS_ENABLED(CONFIG_IPV6)
350 case htons(ETH_P_IPV6): {
353 if (nla_len(attr) != sizeof(struct in6_addr)) {
354 NL_SET_ERR_MSG_MOD(extack, "IPv6 invalid source address length");
357 src = nla_get_in6_addr(attr);
358 if (ipv6_addr_is_multicast(&src)) {
359 NL_SET_ERR_MSG_MOD(extack, "IPv6 multicast source address is not allowed");
366 NL_SET_ERR_MSG_MOD(extack, "Invalid protocol used with source address");
373 static void vxlan_mdb_config_group_set(struct vxlan_mdb_config *cfg,
374 const struct br_mdb_entry *entry,
375 const struct nlattr *source_attr)
377 struct vxlan_mdb_entry_key *group = &cfg->group;
379 switch (entry->addr.proto) {
380 case htons(ETH_P_IP):
381 group->dst.sa.sa_family = AF_INET;
382 group->dst.sin.sin_addr.s_addr = entry->addr.u.ip4;
384 #if IS_ENABLED(CONFIG_IPV6)
385 case htons(ETH_P_IPV6):
386 group->dst.sa.sa_family = AF_INET6;
387 group->dst.sin6.sin6_addr = entry->addr.u.ip6;
393 vxlan_nla_get_addr(&group->src, source_attr);
396 static bool vxlan_mdb_is_star_g(const struct vxlan_mdb_entry_key *group)
398 return !vxlan_addr_any(&group->dst) && vxlan_addr_any(&group->src);
401 static bool vxlan_mdb_is_sg(const struct vxlan_mdb_entry_key *group)
403 return !vxlan_addr_any(&group->dst) && !vxlan_addr_any(&group->src);
406 static int vxlan_mdb_config_src_entry_init(struct vxlan_mdb_config *cfg,
408 const struct nlattr *src_entry,
409 struct netlink_ext_ack *extack)
411 struct nlattr *tb[MDBE_SRCATTR_MAX + 1];
412 struct vxlan_mdb_config_src_entry *src;
415 err = nla_parse_nested(tb, MDBE_SRCATTR_MAX, src_entry,
416 vxlan_mdbe_src_list_entry_pol, extack);
420 if (NL_REQ_ATTR_CHECK(extack, src_entry, tb, MDBE_SRCATTR_ADDRESS))
423 if (!vxlan_mdb_is_valid_source(tb[MDBE_SRCATTR_ADDRESS], proto,
427 src = kzalloc(sizeof(*src), GFP_KERNEL);
431 err = vxlan_nla_get_addr(&src->addr, tb[MDBE_SRCATTR_ADDRESS]);
435 list_add_tail(&src->node, &cfg->src_list);
445 vxlan_mdb_config_src_entry_fini(struct vxlan_mdb_config_src_entry *src)
447 list_del(&src->node);
451 static int vxlan_mdb_config_src_list_init(struct vxlan_mdb_config *cfg,
453 const struct nlattr *src_list,
454 struct netlink_ext_ack *extack)
456 struct vxlan_mdb_config_src_entry *src, *tmp;
457 struct nlattr *src_entry;
460 nla_for_each_nested(src_entry, src_list, rem) {
461 err = vxlan_mdb_config_src_entry_init(cfg, proto, src_entry,
464 goto err_src_entry_init;
470 list_for_each_entry_safe_reverse(src, tmp, &cfg->src_list, node)
471 vxlan_mdb_config_src_entry_fini(src);
475 static void vxlan_mdb_config_src_list_fini(struct vxlan_mdb_config *cfg)
477 struct vxlan_mdb_config_src_entry *src, *tmp;
479 list_for_each_entry_safe_reverse(src, tmp, &cfg->src_list, node)
480 vxlan_mdb_config_src_entry_fini(src);
483 static int vxlan_mdb_config_attrs_init(struct vxlan_mdb_config *cfg,
484 const struct br_mdb_entry *entry,
485 const struct nlattr *set_attrs,
486 struct netlink_ext_ack *extack)
488 struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1];
491 err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, set_attrs,
492 vxlan_mdbe_attrs_pol, extack);
496 if (NL_REQ_ATTR_CHECK(extack, set_attrs, mdbe_attrs, MDBE_ATTR_DST)) {
497 NL_SET_ERR_MSG_MOD(extack, "Missing remote destination IP address");
501 if (mdbe_attrs[MDBE_ATTR_SOURCE] &&
502 !vxlan_mdb_is_valid_source(mdbe_attrs[MDBE_ATTR_SOURCE],
503 entry->addr.proto, extack))
506 vxlan_mdb_config_group_set(cfg, entry, mdbe_attrs[MDBE_ATTR_SOURCE]);
508 /* rtnetlink code only validates that IPv4 group address is
511 if (!vxlan_addr_is_multicast(&cfg->group.dst) &&
512 !vxlan_addr_any(&cfg->group.dst)) {
513 NL_SET_ERR_MSG_MOD(extack, "Group address is not multicast");
517 if (vxlan_addr_any(&cfg->group.dst) &&
518 mdbe_attrs[MDBE_ATTR_SOURCE]) {
519 NL_SET_ERR_MSG_MOD(extack, "Source cannot be specified for the all-zeros entry");
523 if (vxlan_mdb_is_sg(&cfg->group))
524 cfg->filter_mode = MCAST_INCLUDE;
526 if (mdbe_attrs[MDBE_ATTR_GROUP_MODE]) {
527 if (!vxlan_mdb_is_star_g(&cfg->group)) {
528 NL_SET_ERR_MSG_MOD(extack, "Filter mode can only be set for (*, G) entries");
531 cfg->filter_mode = nla_get_u8(mdbe_attrs[MDBE_ATTR_GROUP_MODE]);
534 if (mdbe_attrs[MDBE_ATTR_SRC_LIST]) {
535 if (!vxlan_mdb_is_star_g(&cfg->group)) {
536 NL_SET_ERR_MSG_MOD(extack, "Source list can only be set for (*, G) entries");
539 if (!mdbe_attrs[MDBE_ATTR_GROUP_MODE]) {
540 NL_SET_ERR_MSG_MOD(extack, "Source list cannot be set without filter mode");
543 err = vxlan_mdb_config_src_list_init(cfg, entry->addr.proto,
544 mdbe_attrs[MDBE_ATTR_SRC_LIST],
550 if (vxlan_mdb_is_star_g(&cfg->group) && list_empty(&cfg->src_list) &&
551 cfg->filter_mode == MCAST_INCLUDE) {
552 NL_SET_ERR_MSG_MOD(extack, "Cannot add (*, G) INCLUDE with an empty source list");
556 if (mdbe_attrs[MDBE_ATTR_RTPROT])
557 cfg->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]);
559 err = vxlan_nla_get_addr(&cfg->remote_ip, mdbe_attrs[MDBE_ATTR_DST]);
561 NL_SET_ERR_MSG_MOD(extack, "Invalid remote destination address");
562 goto err_src_list_fini;
565 if (mdbe_attrs[MDBE_ATTR_DST_PORT])
567 cpu_to_be16(nla_get_u16(mdbe_attrs[MDBE_ATTR_DST_PORT]));
569 if (mdbe_attrs[MDBE_ATTR_VNI])
571 cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_VNI]));
573 if (mdbe_attrs[MDBE_ATTR_IFINDEX]) {
574 cfg->remote_ifindex =
575 nla_get_s32(mdbe_attrs[MDBE_ATTR_IFINDEX]);
576 if (!__dev_get_by_index(cfg->vxlan->net, cfg->remote_ifindex)) {
577 NL_SET_ERR_MSG_MOD(extack, "Outgoing interface not found");
579 goto err_src_list_fini;
583 if (mdbe_attrs[MDBE_ATTR_SRC_VNI])
585 cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_SRC_VNI]));
590 vxlan_mdb_config_src_list_fini(cfg);
594 static int vxlan_mdb_config_init(struct vxlan_mdb_config *cfg,
595 struct net_device *dev, struct nlattr *tb[],
597 struct netlink_ext_ack *extack)
599 struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]);
600 struct vxlan_dev *vxlan = netdev_priv(dev);
602 memset(cfg, 0, sizeof(*cfg));
604 cfg->group.vni = vxlan->default_dst.remote_vni;
605 INIT_LIST_HEAD(&cfg->src_list);
606 cfg->nlflags = nlmsg_flags;
607 cfg->filter_mode = MCAST_EXCLUDE;
608 cfg->rt_protocol = RTPROT_STATIC;
609 cfg->remote_vni = vxlan->default_dst.remote_vni;
610 cfg->remote_port = vxlan->cfg.dst_port;
612 if (entry->ifindex != dev->ifindex) {
613 NL_SET_ERR_MSG_MOD(extack, "Port net device must be the VXLAN net device");
617 /* State is not part of the entry key and can be ignored on deletion
620 if ((nlmsg_flags & (NLM_F_CREATE | NLM_F_REPLACE)) &&
621 entry->state != MDB_PERMANENT) {
622 NL_SET_ERR_MSG_MOD(extack, "MDB entry must be permanent");
627 NL_SET_ERR_MSG_MOD(extack, "Invalid MDB entry flags");
632 NL_SET_ERR_MSG_MOD(extack, "VID must not be specified");
636 if (entry->addr.proto != htons(ETH_P_IP) &&
637 entry->addr.proto != htons(ETH_P_IPV6)) {
638 NL_SET_ERR_MSG_MOD(extack, "Group address must be an IPv4 / IPv6 address");
642 if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY_ATTRS)) {
643 NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY_ATTRS attribute");
647 return vxlan_mdb_config_attrs_init(cfg, entry, tb[MDBA_SET_ENTRY_ATTRS],
651 static void vxlan_mdb_config_fini(struct vxlan_mdb_config *cfg)
653 vxlan_mdb_config_src_list_fini(cfg);
656 static struct vxlan_mdb_entry *
657 vxlan_mdb_entry_lookup(struct vxlan_dev *vxlan,
658 const struct vxlan_mdb_entry_key *group)
660 return rhashtable_lookup_fast(&vxlan->mdb_tbl, group,
661 vxlan_mdb_rht_params);
664 static struct vxlan_mdb_remote *
665 vxlan_mdb_remote_lookup(const struct vxlan_mdb_entry *mdb_entry,
666 const union vxlan_addr *addr)
668 struct vxlan_mdb_remote *remote;
670 list_for_each_entry(remote, &mdb_entry->remotes, list) {
671 struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
673 if (vxlan_addr_equal(addr, &rd->remote_ip))
680 static void vxlan_mdb_rdst_free(struct rcu_head *head)
682 struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu);
684 dst_cache_destroy(&rd->dst_cache);
688 static int vxlan_mdb_remote_rdst_init(const struct vxlan_mdb_config *cfg,
689 struct vxlan_mdb_remote *remote)
691 struct vxlan_rdst *rd;
694 rd = kzalloc(sizeof(*rd), GFP_KERNEL);
698 err = dst_cache_init(&rd->dst_cache, GFP_KERNEL);
702 rd->remote_ip = cfg->remote_ip;
703 rd->remote_port = cfg->remote_port;
704 rd->remote_vni = cfg->remote_vni;
705 rd->remote_ifindex = cfg->remote_ifindex;
706 rcu_assign_pointer(remote->rd, rd);
715 static void vxlan_mdb_remote_rdst_fini(struct vxlan_rdst *rd)
717 call_rcu(&rd->rcu, vxlan_mdb_rdst_free);
720 static int vxlan_mdb_remote_init(const struct vxlan_mdb_config *cfg,
721 struct vxlan_mdb_remote *remote)
725 err = vxlan_mdb_remote_rdst_init(cfg, remote);
729 remote->flags = cfg->flags;
730 remote->filter_mode = cfg->filter_mode;
731 remote->rt_protocol = cfg->rt_protocol;
732 INIT_HLIST_HEAD(&remote->src_list);
737 static void vxlan_mdb_remote_fini(struct vxlan_dev *vxlan,
738 struct vxlan_mdb_remote *remote)
740 WARN_ON_ONCE(!hlist_empty(&remote->src_list));
741 vxlan_mdb_remote_rdst_fini(rtnl_dereference(remote->rd));
744 static struct vxlan_mdb_src_entry *
745 vxlan_mdb_remote_src_entry_lookup(const struct vxlan_mdb_remote *remote,
746 const union vxlan_addr *addr)
748 struct vxlan_mdb_src_entry *ent;
750 hlist_for_each_entry(ent, &remote->src_list, node) {
751 if (vxlan_addr_equal(&ent->addr, addr))
758 static struct vxlan_mdb_src_entry *
759 vxlan_mdb_remote_src_entry_add(struct vxlan_mdb_remote *remote,
760 const union vxlan_addr *addr)
762 struct vxlan_mdb_src_entry *ent;
764 ent = kzalloc(sizeof(*ent), GFP_KERNEL);
769 hlist_add_head(&ent->node, &remote->src_list);
775 vxlan_mdb_remote_src_entry_del(struct vxlan_mdb_src_entry *ent)
777 hlist_del(&ent->node);
782 vxlan_mdb_remote_src_fwd_add(const struct vxlan_mdb_config *cfg,
783 const union vxlan_addr *addr,
784 struct netlink_ext_ack *extack)
786 struct vxlan_mdb_config sg_cfg;
788 memset(&sg_cfg, 0, sizeof(sg_cfg));
789 sg_cfg.vxlan = cfg->vxlan;
790 sg_cfg.group.src = *addr;
791 sg_cfg.group.dst = cfg->group.dst;
792 sg_cfg.group.vni = cfg->group.vni;
793 INIT_LIST_HEAD(&sg_cfg.src_list);
794 sg_cfg.remote_ip = cfg->remote_ip;
795 sg_cfg.remote_ifindex = cfg->remote_ifindex;
796 sg_cfg.remote_vni = cfg->remote_vni;
797 sg_cfg.remote_port = cfg->remote_port;
798 sg_cfg.nlflags = cfg->nlflags;
799 sg_cfg.filter_mode = MCAST_INCLUDE;
800 if (cfg->filter_mode == MCAST_EXCLUDE)
801 sg_cfg.flags = VXLAN_MDB_REMOTE_F_BLOCKED;
802 sg_cfg.rt_protocol = cfg->rt_protocol;
804 return __vxlan_mdb_add(&sg_cfg, extack);
808 vxlan_mdb_remote_src_fwd_del(struct vxlan_dev *vxlan,
809 const struct vxlan_mdb_entry_key *group,
810 const struct vxlan_mdb_remote *remote,
811 const union vxlan_addr *addr)
813 struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
814 struct vxlan_mdb_config sg_cfg;
816 memset(&sg_cfg, 0, sizeof(sg_cfg));
817 sg_cfg.vxlan = vxlan;
818 sg_cfg.group.src = *addr;
819 sg_cfg.group.dst = group->dst;
820 sg_cfg.group.vni = group->vni;
821 INIT_LIST_HEAD(&sg_cfg.src_list);
822 sg_cfg.remote_ip = rd->remote_ip;
824 __vxlan_mdb_del(&sg_cfg, NULL);
828 vxlan_mdb_remote_src_add(const struct vxlan_mdb_config *cfg,
829 struct vxlan_mdb_remote *remote,
830 const struct vxlan_mdb_config_src_entry *src,
831 struct netlink_ext_ack *extack)
833 struct vxlan_mdb_src_entry *ent;
836 ent = vxlan_mdb_remote_src_entry_lookup(remote, &src->addr);
838 ent = vxlan_mdb_remote_src_entry_add(remote, &src->addr);
841 } else if (!(cfg->nlflags & NLM_F_REPLACE)) {
842 NL_SET_ERR_MSG_MOD(extack, "Source entry already exists");
846 err = vxlan_mdb_remote_src_fwd_add(cfg, &ent->addr, extack);
850 /* Clear flags in case source entry was marked for deletion as part of
858 vxlan_mdb_remote_src_entry_del(ent);
862 static void vxlan_mdb_remote_src_del(struct vxlan_dev *vxlan,
863 const struct vxlan_mdb_entry_key *group,
864 const struct vxlan_mdb_remote *remote,
865 struct vxlan_mdb_src_entry *ent)
867 vxlan_mdb_remote_src_fwd_del(vxlan, group, remote, &ent->addr);
868 vxlan_mdb_remote_src_entry_del(ent);
871 static int vxlan_mdb_remote_srcs_add(const struct vxlan_mdb_config *cfg,
872 struct vxlan_mdb_remote *remote,
873 struct netlink_ext_ack *extack)
875 struct vxlan_mdb_config_src_entry *src;
876 struct vxlan_mdb_src_entry *ent;
877 struct hlist_node *tmp;
880 list_for_each_entry(src, &cfg->src_list, node) {
881 err = vxlan_mdb_remote_src_add(cfg, remote, src, extack);
889 hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node)
890 vxlan_mdb_remote_src_del(cfg->vxlan, &cfg->group, remote, ent);
894 static void vxlan_mdb_remote_srcs_del(struct vxlan_dev *vxlan,
895 const struct vxlan_mdb_entry_key *group,
896 struct vxlan_mdb_remote *remote)
898 struct vxlan_mdb_src_entry *ent;
899 struct hlist_node *tmp;
901 hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node)
902 vxlan_mdb_remote_src_del(vxlan, group, remote, ent);
906 vxlan_mdb_nlmsg_src_list_size(const struct vxlan_mdb_entry_key *group,
907 const struct vxlan_mdb_remote *remote)
909 struct vxlan_mdb_src_entry *ent;
912 if (hlist_empty(&remote->src_list))
915 /* MDBA_MDB_EATTR_SRC_LIST */
916 nlmsg_size = nla_total_size(0);
918 hlist_for_each_entry(ent, &remote->src_list, node) {
919 /* MDBA_MDB_SRCLIST_ENTRY */
920 nlmsg_size += nla_total_size(0) +
921 /* MDBA_MDB_SRCATTR_ADDRESS */
922 nla_total_size(vxlan_addr_size(&group->dst)) +
923 /* MDBA_MDB_SRCATTR_TIMER */
924 nla_total_size(sizeof(u8));
930 static size_t vxlan_mdb_nlmsg_size(const struct vxlan_dev *vxlan,
931 const struct vxlan_mdb_entry *mdb_entry,
932 const struct vxlan_mdb_remote *remote)
934 const struct vxlan_mdb_entry_key *group = &mdb_entry->key;
935 struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
938 nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) +
943 /* MDBA_MDB_ENTRY_INFO */
944 nla_total_size(sizeof(struct br_mdb_entry)) +
945 /* MDBA_MDB_EATTR_TIMER */
946 nla_total_size(sizeof(u32));
947 /* MDBA_MDB_EATTR_SOURCE */
948 if (vxlan_mdb_is_sg(group))
949 nlmsg_size += nla_total_size(vxlan_addr_size(&group->dst));
950 /* MDBA_MDB_EATTR_RTPROT */
951 nlmsg_size += nla_total_size(sizeof(u8));
952 /* MDBA_MDB_EATTR_SRC_LIST */
953 nlmsg_size += vxlan_mdb_nlmsg_src_list_size(group, remote);
954 /* MDBA_MDB_EATTR_GROUP_MODE */
955 nlmsg_size += nla_total_size(sizeof(u8));
956 /* MDBA_MDB_EATTR_DST */
957 nlmsg_size += nla_total_size(vxlan_addr_size(&rd->remote_ip));
958 /* MDBA_MDB_EATTR_DST_PORT */
959 if (rd->remote_port && rd->remote_port != vxlan->cfg.dst_port)
960 nlmsg_size += nla_total_size(sizeof(u16));
961 /* MDBA_MDB_EATTR_VNI */
962 if (rd->remote_vni != vxlan->default_dst.remote_vni)
963 nlmsg_size += nla_total_size(sizeof(u32));
964 /* MDBA_MDB_EATTR_IFINDEX */
965 if (rd->remote_ifindex)
966 nlmsg_size += nla_total_size(sizeof(u32));
967 /* MDBA_MDB_EATTR_SRC_VNI */
968 if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && group->vni)
969 nlmsg_size += nla_total_size(sizeof(u32));
974 static int vxlan_mdb_nlmsg_fill(const struct vxlan_dev *vxlan,
976 const struct vxlan_mdb_entry *mdb_entry,
977 const struct vxlan_mdb_remote *remote,
980 struct nlattr *mdb_nest, *mdb_entry_nest;
981 struct br_port_msg *bpm;
982 struct nlmsghdr *nlh;
984 nlh = nlmsg_put(skb, 0, 0, type, sizeof(*bpm), 0);
988 bpm = nlmsg_data(nlh);
989 memset(bpm, 0, sizeof(*bpm));
990 bpm->family = AF_BRIDGE;
991 bpm->ifindex = vxlan->dev->ifindex;
993 mdb_nest = nla_nest_start_noflag(skb, MDBA_MDB);
996 mdb_entry_nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY);
1000 if (vxlan_mdb_entry_info_fill(vxlan, skb, mdb_entry, remote))
1003 nla_nest_end(skb, mdb_entry_nest);
1004 nla_nest_end(skb, mdb_nest);
1005 nlmsg_end(skb, nlh);
1010 nlmsg_cancel(skb, nlh);
1014 static void vxlan_mdb_remote_notify(const struct vxlan_dev *vxlan,
1015 const struct vxlan_mdb_entry *mdb_entry,
1016 const struct vxlan_mdb_remote *remote,
1019 struct net *net = dev_net(vxlan->dev);
1020 struct sk_buff *skb;
1023 skb = nlmsg_new(vxlan_mdb_nlmsg_size(vxlan, mdb_entry, remote),
1028 err = vxlan_mdb_nlmsg_fill(vxlan, skb, mdb_entry, remote, type);
1034 rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_KERNEL);
1037 rtnl_set_sk_err(net, RTNLGRP_MDB, err);
1041 vxlan_mdb_remote_srcs_replace(const struct vxlan_mdb_config *cfg,
1042 const struct vxlan_mdb_entry *mdb_entry,
1043 struct vxlan_mdb_remote *remote,
1044 struct netlink_ext_ack *extack)
1046 struct vxlan_dev *vxlan = cfg->vxlan;
1047 struct vxlan_mdb_src_entry *ent;
1048 struct hlist_node *tmp;
1051 hlist_for_each_entry(ent, &remote->src_list, node)
1052 ent->flags |= VXLAN_SGRP_F_DELETE;
1054 err = vxlan_mdb_remote_srcs_add(cfg, remote, extack);
1056 goto err_clear_delete;
1058 hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node) {
1059 if (ent->flags & VXLAN_SGRP_F_DELETE)
1060 vxlan_mdb_remote_src_del(vxlan, &mdb_entry->key, remote,
1067 hlist_for_each_entry(ent, &remote->src_list, node)
1068 ent->flags &= ~VXLAN_SGRP_F_DELETE;
1072 static int vxlan_mdb_remote_replace(const struct vxlan_mdb_config *cfg,
1073 const struct vxlan_mdb_entry *mdb_entry,
1074 struct vxlan_mdb_remote *remote,
1075 struct netlink_ext_ack *extack)
1077 struct vxlan_rdst *new_rd, *old_rd = rtnl_dereference(remote->rd);
1078 struct vxlan_dev *vxlan = cfg->vxlan;
1081 err = vxlan_mdb_remote_rdst_init(cfg, remote);
1084 new_rd = rtnl_dereference(remote->rd);
1086 err = vxlan_mdb_remote_srcs_replace(cfg, mdb_entry, remote, extack);
1088 goto err_rdst_reset;
1090 WRITE_ONCE(remote->flags, cfg->flags);
1091 WRITE_ONCE(remote->filter_mode, cfg->filter_mode);
1092 remote->rt_protocol = cfg->rt_protocol;
1093 vxlan_mdb_remote_notify(vxlan, mdb_entry, remote, RTM_NEWMDB);
1095 vxlan_mdb_remote_rdst_fini(old_rd);
1100 rcu_assign_pointer(remote->rd, old_rd);
1101 vxlan_mdb_remote_rdst_fini(new_rd);
1105 static int vxlan_mdb_remote_add(const struct vxlan_mdb_config *cfg,
1106 struct vxlan_mdb_entry *mdb_entry,
1107 struct netlink_ext_ack *extack)
1109 struct vxlan_mdb_remote *remote;
1112 remote = vxlan_mdb_remote_lookup(mdb_entry, &cfg->remote_ip);
1114 if (!(cfg->nlflags & NLM_F_REPLACE)) {
1115 NL_SET_ERR_MSG_MOD(extack, "Replace not specified and MDB remote entry already exists");
1118 return vxlan_mdb_remote_replace(cfg, mdb_entry, remote, extack);
1121 if (!(cfg->nlflags & NLM_F_CREATE)) {
1122 NL_SET_ERR_MSG_MOD(extack, "Create not specified and entry does not exist");
1126 remote = kzalloc(sizeof(*remote), GFP_KERNEL);
1130 err = vxlan_mdb_remote_init(cfg, remote);
1132 NL_SET_ERR_MSG_MOD(extack, "Failed to initialize remote MDB entry");
1133 goto err_free_remote;
1136 err = vxlan_mdb_remote_srcs_add(cfg, remote, extack);
1138 goto err_remote_fini;
1140 list_add_rcu(&remote->list, &mdb_entry->remotes);
1141 vxlan_mdb_remote_notify(cfg->vxlan, mdb_entry, remote, RTM_NEWMDB);
1146 vxlan_mdb_remote_fini(cfg->vxlan, remote);
1152 static void vxlan_mdb_remote_del(struct vxlan_dev *vxlan,
1153 struct vxlan_mdb_entry *mdb_entry,
1154 struct vxlan_mdb_remote *remote)
1156 vxlan_mdb_remote_notify(vxlan, mdb_entry, remote, RTM_DELMDB);
1157 list_del_rcu(&remote->list);
1158 vxlan_mdb_remote_srcs_del(vxlan, &mdb_entry->key, remote);
1159 vxlan_mdb_remote_fini(vxlan, remote);
1160 kfree_rcu(remote, rcu);
1163 static struct vxlan_mdb_entry *
1164 vxlan_mdb_entry_get(struct vxlan_dev *vxlan,
1165 const struct vxlan_mdb_entry_key *group)
1167 struct vxlan_mdb_entry *mdb_entry;
1170 mdb_entry = vxlan_mdb_entry_lookup(vxlan, group);
1174 mdb_entry = kzalloc(sizeof(*mdb_entry), GFP_KERNEL);
1176 return ERR_PTR(-ENOMEM);
1178 INIT_LIST_HEAD(&mdb_entry->remotes);
1179 memcpy(&mdb_entry->key, group, sizeof(mdb_entry->key));
1180 hlist_add_head(&mdb_entry->mdb_node, &vxlan->mdb_list);
1182 err = rhashtable_lookup_insert_fast(&vxlan->mdb_tbl,
1184 vxlan_mdb_rht_params);
1186 goto err_free_entry;
1188 if (hlist_is_singular_node(&mdb_entry->mdb_node, &vxlan->mdb_list))
1189 vxlan->cfg.flags |= VXLAN_F_MDB;
1194 hlist_del(&mdb_entry->mdb_node);
1196 return ERR_PTR(err);
1199 static void vxlan_mdb_entry_put(struct vxlan_dev *vxlan,
1200 struct vxlan_mdb_entry *mdb_entry)
1202 if (!list_empty(&mdb_entry->remotes))
1205 if (hlist_is_singular_node(&mdb_entry->mdb_node, &vxlan->mdb_list))
1206 vxlan->cfg.flags &= ~VXLAN_F_MDB;
1208 rhashtable_remove_fast(&vxlan->mdb_tbl, &mdb_entry->rhnode,
1209 vxlan_mdb_rht_params);
1210 hlist_del(&mdb_entry->mdb_node);
1211 kfree_rcu(mdb_entry, rcu);
1214 static int __vxlan_mdb_add(const struct vxlan_mdb_config *cfg,
1215 struct netlink_ext_ack *extack)
1217 struct vxlan_dev *vxlan = cfg->vxlan;
1218 struct vxlan_mdb_entry *mdb_entry;
1221 mdb_entry = vxlan_mdb_entry_get(vxlan, &cfg->group);
1222 if (IS_ERR(mdb_entry))
1223 return PTR_ERR(mdb_entry);
1225 err = vxlan_mdb_remote_add(cfg, mdb_entry, extack);
1234 vxlan_mdb_entry_put(vxlan, mdb_entry);
1238 static int __vxlan_mdb_del(const struct vxlan_mdb_config *cfg,
1239 struct netlink_ext_ack *extack)
1241 struct vxlan_dev *vxlan = cfg->vxlan;
1242 struct vxlan_mdb_entry *mdb_entry;
1243 struct vxlan_mdb_remote *remote;
1245 mdb_entry = vxlan_mdb_entry_lookup(vxlan, &cfg->group);
1247 NL_SET_ERR_MSG_MOD(extack, "Did not find MDB entry");
1251 remote = vxlan_mdb_remote_lookup(mdb_entry, &cfg->remote_ip);
1253 NL_SET_ERR_MSG_MOD(extack, "Did not find MDB remote entry");
1257 vxlan_mdb_remote_del(vxlan, mdb_entry, remote);
1258 vxlan_mdb_entry_put(vxlan, mdb_entry);
1265 int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags,
1266 struct netlink_ext_ack *extack)
1268 struct vxlan_mdb_config cfg;
1273 err = vxlan_mdb_config_init(&cfg, dev, tb, nlmsg_flags, extack);
1277 err = __vxlan_mdb_add(&cfg, extack);
1279 vxlan_mdb_config_fini(&cfg);
1283 int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[],
1284 struct netlink_ext_ack *extack)
1286 struct vxlan_mdb_config cfg;
1291 err = vxlan_mdb_config_init(&cfg, dev, tb, 0, extack);
1295 err = __vxlan_mdb_del(&cfg, extack);
1297 vxlan_mdb_config_fini(&cfg);
1301 struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan,
1302 struct sk_buff *skb,
1305 struct vxlan_mdb_entry *mdb_entry;
1306 struct vxlan_mdb_entry_key group;
1308 if (!is_multicast_ether_addr(eth_hdr(skb)->h_dest) ||
1309 is_broadcast_ether_addr(eth_hdr(skb)->h_dest))
1312 /* When not in collect metadata mode, 'src_vni' is zero, but MDB
1313 * entries are stored with the VNI of the VXLAN device.
1315 if (!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA))
1316 src_vni = vxlan->default_dst.remote_vni;
1318 memset(&group, 0, sizeof(group));
1319 group.vni = src_vni;
1321 switch (skb->protocol) {
1322 case htons(ETH_P_IP):
1323 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
1325 group.dst.sa.sa_family = AF_INET;
1326 group.dst.sin.sin_addr.s_addr = ip_hdr(skb)->daddr;
1327 group.src.sa.sa_family = AF_INET;
1328 group.src.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
1330 #if IS_ENABLED(CONFIG_IPV6)
1331 case htons(ETH_P_IPV6):
1332 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
1334 group.dst.sa.sa_family = AF_INET6;
1335 group.dst.sin6.sin6_addr = ipv6_hdr(skb)->daddr;
1336 group.src.sa.sa_family = AF_INET6;
1337 group.src.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
1344 mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group);
1348 memset(&group.src, 0, sizeof(group.src));
1349 mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group);
1353 /* No (S, G) or (*, G) found. Look up the all-zeros entry, but only if
1354 * the destination IP address is not link-local multicast since we want
1355 * to transmit such traffic together with broadcast and unknown unicast
1358 switch (skb->protocol) {
1359 case htons(ETH_P_IP):
1360 if (ipv4_is_local_multicast(group.dst.sin.sin_addr.s_addr))
1362 group.dst.sin.sin_addr.s_addr = 0;
1364 #if IS_ENABLED(CONFIG_IPV6)
1365 case htons(ETH_P_IPV6):
1366 if (ipv6_addr_type(&group.dst.sin6.sin6_addr) &
1367 IPV6_ADDR_LINKLOCAL)
1369 memset(&group.dst.sin6.sin6_addr, 0,
1370 sizeof(group.dst.sin6.sin6_addr));
1377 return vxlan_mdb_entry_lookup(vxlan, &group);
1380 netdev_tx_t vxlan_mdb_xmit(struct vxlan_dev *vxlan,
1381 const struct vxlan_mdb_entry *mdb_entry,
1382 struct sk_buff *skb)
1384 struct vxlan_mdb_remote *remote, *fremote = NULL;
1385 __be32 src_vni = mdb_entry->key.vni;
1387 list_for_each_entry_rcu(remote, &mdb_entry->remotes, list) {
1388 struct sk_buff *skb1;
1390 if ((vxlan_mdb_is_star_g(&mdb_entry->key) &&
1391 READ_ONCE(remote->filter_mode) == MCAST_INCLUDE) ||
1392 (READ_ONCE(remote->flags) & VXLAN_MDB_REMOTE_F_BLOCKED))
1400 skb1 = skb_clone(skb, GFP_ATOMIC);
1402 vxlan_xmit_one(skb1, vxlan->dev, src_vni,
1403 rcu_dereference(remote->rd), false);
1407 vxlan_xmit_one(skb, vxlan->dev, src_vni,
1408 rcu_dereference(fremote->rd), false);
1412 return NETDEV_TX_OK;
1415 static void vxlan_mdb_check_empty(void *ptr, void *arg)
1420 static void vxlan_mdb_remotes_flush(struct vxlan_dev *vxlan,
1421 struct vxlan_mdb_entry *mdb_entry)
1423 struct vxlan_mdb_remote *remote, *tmp;
1425 list_for_each_entry_safe(remote, tmp, &mdb_entry->remotes, list)
1426 vxlan_mdb_remote_del(vxlan, mdb_entry, remote);
1429 static void vxlan_mdb_entries_flush(struct vxlan_dev *vxlan)
1431 struct vxlan_mdb_entry *mdb_entry;
1432 struct hlist_node *tmp;
1434 /* The removal of an entry cannot trigger the removal of another entry
1435 * since entries are always added to the head of the list.
1437 hlist_for_each_entry_safe(mdb_entry, tmp, &vxlan->mdb_list, mdb_node) {
1438 vxlan_mdb_remotes_flush(vxlan, mdb_entry);
1439 vxlan_mdb_entry_put(vxlan, mdb_entry);
1443 int vxlan_mdb_init(struct vxlan_dev *vxlan)
1447 err = rhashtable_init(&vxlan->mdb_tbl, &vxlan_mdb_rht_params);
1451 INIT_HLIST_HEAD(&vxlan->mdb_list);
1456 void vxlan_mdb_fini(struct vxlan_dev *vxlan)
1458 vxlan_mdb_entries_flush(vxlan);
1459 WARN_ON_ONCE(vxlan->cfg.flags & VXLAN_F_MDB);
1460 rhashtable_free_and_destroy(&vxlan->mdb_tbl, vxlan_mdb_check_empty,