2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
6 * LSIIT Laboratory, Strasbourg, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
19 #include <linux/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
24 #include <linux/kernel.h>
25 #include <linux/fcntl.h>
26 #include <linux/stat.h>
27 #include <linux/socket.h>
28 #include <linux/inet.h>
29 #include <linux/netdevice.h>
30 #include <linux/inetdevice.h>
31 #include <linux/proc_fs.h>
32 #include <linux/seq_file.h>
33 #include <linux/init.h>
34 #include <linux/compat.h>
35 #include <linux/rhashtable.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
39 #include <linux/notifier.h>
40 #include <linux/if_arp.h>
41 #include <net/checksum.h>
42 #include <net/netlink.h>
43 #include <net/fib_rules.h>
46 #include <net/ip6_route.h>
47 #include <linux/mroute6.h>
48 #include <linux/pim.h>
49 #include <net/addrconf.h>
50 #include <linux/netfilter_ipv6.h>
51 #include <linux/export.h>
52 #include <net/ip6_checksum.h>
53 #include <linux/netconf.h>
56 struct fib_rule common;
63 /* Big lock, protecting vif table, mrt cache and mroute socket state.
64 Note that the changes are semaphored via rtnl_lock.
67 static DEFINE_RWLOCK(mrt_lock);
69 /* Multicast router control variables */
71 /* Special spinlock for queue of unresolved entries */
72 static DEFINE_SPINLOCK(mfc_unres_lock);
74 /* We return to original Alan's scheme. Hash table of resolved
75 entries is changed only in process context and protected
76 with weak lock mrt_lock. Queue of unresolved entries is protected
77 with strong spinlock mfc_unres_lock.
79 In this case data path is free of exclusive locks at all.
82 static struct kmem_cache *mrt_cachep __read_mostly;
84 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
85 static void ip6mr_free_table(struct mr_table *mrt);
87 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
88 struct sk_buff *skb, struct mfc6_cache *cache);
89 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
90 mifi_t mifi, int assert);
91 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
93 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
94 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
95 struct netlink_callback *cb);
96 static void mroute_clean_tables(struct mr_table *mrt, bool all);
97 static void ipmr_expire_process(struct timer_list *t);
99 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
100 #define ip6mr_for_each_table(mrt, net) \
101 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
103 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
104 struct mr_table *mrt)
106 struct mr_table *ret;
109 ret = list_entry_rcu(net->ipv6.mr6_tables.next,
110 struct mr_table, list);
112 ret = list_entry_rcu(mrt->list.next,
113 struct mr_table, list);
115 if (&ret->list == &net->ipv6.mr6_tables)
120 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
122 struct mr_table *mrt;
124 ip6mr_for_each_table(mrt, net) {
131 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
132 struct mr_table **mrt)
135 struct ip6mr_result res;
136 struct fib_lookup_arg arg = {
138 .flags = FIB_LOOKUP_NOREF,
141 err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
142 flowi6_to_flowi(flp6), 0, &arg);
149 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
150 int flags, struct fib_lookup_arg *arg)
152 struct ip6mr_result *res = arg->result;
153 struct mr_table *mrt;
155 switch (rule->action) {
158 case FR_ACT_UNREACHABLE:
160 case FR_ACT_PROHIBIT:
162 case FR_ACT_BLACKHOLE:
167 mrt = ip6mr_get_table(rule->fr_net, rule->table);
174 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
179 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
183 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
184 struct fib_rule_hdr *frh, struct nlattr **tb,
185 struct netlink_ext_ack *extack)
190 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
196 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
197 struct fib_rule_hdr *frh)
205 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
206 .family = RTNL_FAMILY_IP6MR,
207 .rule_size = sizeof(struct ip6mr_rule),
208 .addr_size = sizeof(struct in6_addr),
209 .action = ip6mr_rule_action,
210 .match = ip6mr_rule_match,
211 .configure = ip6mr_rule_configure,
212 .compare = ip6mr_rule_compare,
213 .fill = ip6mr_rule_fill,
214 .nlgroup = RTNLGRP_IPV6_RULE,
215 .policy = ip6mr_rule_policy,
216 .owner = THIS_MODULE,
219 static int __net_init ip6mr_rules_init(struct net *net)
221 struct fib_rules_ops *ops;
222 struct mr_table *mrt;
225 ops = fib_rules_register(&ip6mr_rules_ops_template, net);
229 INIT_LIST_HEAD(&net->ipv6.mr6_tables);
231 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
237 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
241 net->ipv6.mr6_rules_ops = ops;
245 ip6mr_free_table(mrt);
247 fib_rules_unregister(ops);
251 static void __net_exit ip6mr_rules_exit(struct net *net)
253 struct mr_table *mrt, *next;
256 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
257 list_del(&mrt->list);
258 ip6mr_free_table(mrt);
260 fib_rules_unregister(net->ipv6.mr6_rules_ops);
264 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
266 return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
269 static unsigned int ip6mr_rules_seq_read(struct net *net)
271 return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
274 bool ip6mr_rule_default(const struct fib_rule *rule)
276 return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
277 rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
279 EXPORT_SYMBOL(ip6mr_rule_default);
281 #define ip6mr_for_each_table(mrt, net) \
282 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
284 static struct mr_table *ip6mr_mr_table_iter(struct net *net,
285 struct mr_table *mrt)
288 return net->ipv6.mrt6;
292 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
294 return net->ipv6.mrt6;
297 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
298 struct mr_table **mrt)
300 *mrt = net->ipv6.mrt6;
304 static int __net_init ip6mr_rules_init(struct net *net)
306 struct mr_table *mrt;
308 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
311 net->ipv6.mrt6 = mrt;
315 static void __net_exit ip6mr_rules_exit(struct net *net)
318 ip6mr_free_table(net->ipv6.mrt6);
319 net->ipv6.mrt6 = NULL;
323 static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
328 static unsigned int ip6mr_rules_seq_read(struct net *net)
334 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
337 const struct mfc6_cache_cmp_arg *cmparg = arg->key;
338 struct mfc6_cache *c = (struct mfc6_cache *)ptr;
340 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
341 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
344 static const struct rhashtable_params ip6mr_rht_params = {
345 .head_offset = offsetof(struct mr_mfc, mnode),
346 .key_offset = offsetof(struct mfc6_cache, cmparg),
347 .key_len = sizeof(struct mfc6_cache_cmp_arg),
350 .obj_cmpfn = ip6mr_hash_cmp,
351 .automatic_shrinking = true,
354 static void ip6mr_new_table_set(struct mr_table *mrt,
357 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
358 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
362 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
363 .mf6c_origin = IN6ADDR_ANY_INIT,
364 .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
367 static struct mr_table_ops ip6mr_mr_table_ops = {
368 .rht_params = &ip6mr_rht_params,
369 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
372 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
374 struct mr_table *mrt;
376 mrt = ip6mr_get_table(net, id);
380 return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
381 ipmr_expire_process, ip6mr_new_table_set);
384 static void ip6mr_free_table(struct mr_table *mrt)
386 del_timer_sync(&mrt->ipmr_expire_timer);
387 mroute_clean_tables(mrt, true);
388 rhltable_destroy(&mrt->mfc_hash);
392 #ifdef CONFIG_PROC_FS
393 /* The /proc interfaces to multicast routing
394 * /proc/ip6_mr_cache /proc/ip6_mr_vif
397 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
400 struct mr_vif_iter *iter = seq->private;
401 struct net *net = seq_file_net(seq);
402 struct mr_table *mrt;
404 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
406 return ERR_PTR(-ENOENT);
410 read_lock(&mrt_lock);
411 return mr_vif_seq_start(seq, pos);
414 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
417 read_unlock(&mrt_lock);
420 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
422 struct mr_vif_iter *iter = seq->private;
423 struct mr_table *mrt = iter->mrt;
425 if (v == SEQ_START_TOKEN) {
427 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
429 const struct vif_device *vif = v;
430 const char *name = vif->dev ? vif->dev->name : "none";
433 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
434 vif - mrt->vif_table,
435 name, vif->bytes_in, vif->pkt_in,
436 vif->bytes_out, vif->pkt_out,
442 static const struct seq_operations ip6mr_vif_seq_ops = {
443 .start = ip6mr_vif_seq_start,
444 .next = mr_vif_seq_next,
445 .stop = ip6mr_vif_seq_stop,
446 .show = ip6mr_vif_seq_show,
449 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
451 struct net *net = seq_file_net(seq);
452 struct mr_table *mrt;
454 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
456 return ERR_PTR(-ENOENT);
458 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
461 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
465 if (v == SEQ_START_TOKEN) {
469 "Iif Pkts Bytes Wrong Oifs\n");
471 const struct mfc6_cache *mfc = v;
472 const struct mr_mfc_iter *it = seq->private;
473 struct mr_table *mrt = it->mrt;
475 seq_printf(seq, "%pI6 %pI6 %-3hd",
476 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
479 if (it->cache != &mrt->mfc_unres_queue) {
480 seq_printf(seq, " %8lu %8lu %8lu",
481 mfc->_c.mfc_un.res.pkt,
482 mfc->_c.mfc_un.res.bytes,
483 mfc->_c.mfc_un.res.wrong_if);
484 for (n = mfc->_c.mfc_un.res.minvif;
485 n < mfc->_c.mfc_un.res.maxvif; n++) {
486 if (VIF_EXISTS(mrt, n) &&
487 mfc->_c.mfc_un.res.ttls[n] < 255)
490 mfc->_c.mfc_un.res.ttls[n]);
493 /* unresolved mfc_caches don't contain
494 * pkt, bytes and wrong_if values
496 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
503 static const struct seq_operations ipmr_mfc_seq_ops = {
504 .start = ipmr_mfc_seq_start,
505 .next = mr_mfc_seq_next,
506 .stop = mr_mfc_seq_stop,
507 .show = ipmr_mfc_seq_show,
511 #ifdef CONFIG_IPV6_PIMSM_V2
513 static int pim6_rcv(struct sk_buff *skb)
515 struct pimreghdr *pim;
516 struct ipv6hdr *encap;
517 struct net_device *reg_dev = NULL;
518 struct net *net = dev_net(skb->dev);
519 struct mr_table *mrt;
520 struct flowi6 fl6 = {
521 .flowi6_iif = skb->dev->ifindex,
522 .flowi6_mark = skb->mark,
526 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
529 pim = (struct pimreghdr *)skb_transport_header(skb);
530 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
531 (pim->flags & PIM_NULL_REGISTER) ||
532 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
533 sizeof(*pim), IPPROTO_PIM,
534 csum_partial((void *)pim, sizeof(*pim), 0)) &&
535 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
538 /* check if the inner packet is destined to mcast group */
539 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
542 if (!ipv6_addr_is_multicast(&encap->daddr) ||
543 encap->payload_len == 0 ||
544 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
547 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
549 reg_vif_num = mrt->mroute_reg_vif_num;
551 read_lock(&mrt_lock);
552 if (reg_vif_num >= 0)
553 reg_dev = mrt->vif_table[reg_vif_num].dev;
556 read_unlock(&mrt_lock);
561 skb->mac_header = skb->network_header;
562 skb_pull(skb, (u8 *)encap - skb->data);
563 skb_reset_network_header(skb);
564 skb->protocol = htons(ETH_P_IPV6);
565 skb->ip_summed = CHECKSUM_NONE;
567 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
578 static const struct inet6_protocol pim6_protocol = {
582 /* Service routines creating virtual interfaces: PIMREG */
584 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
585 struct net_device *dev)
587 struct net *net = dev_net(dev);
588 struct mr_table *mrt;
589 struct flowi6 fl6 = {
590 .flowi6_oif = dev->ifindex,
591 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
592 .flowi6_mark = skb->mark,
596 err = ip6mr_fib_lookup(net, &fl6, &mrt);
602 read_lock(&mrt_lock);
603 dev->stats.tx_bytes += skb->len;
604 dev->stats.tx_packets++;
605 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
606 read_unlock(&mrt_lock);
611 static int reg_vif_get_iflink(const struct net_device *dev)
616 static const struct net_device_ops reg_vif_netdev_ops = {
617 .ndo_start_xmit = reg_vif_xmit,
618 .ndo_get_iflink = reg_vif_get_iflink,
621 static void reg_vif_setup(struct net_device *dev)
623 dev->type = ARPHRD_PIMREG;
624 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
625 dev->flags = IFF_NOARP;
626 dev->netdev_ops = ®_vif_netdev_ops;
627 dev->needs_free_netdev = true;
628 dev->features |= NETIF_F_NETNS_LOCAL;
631 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
633 struct net_device *dev;
636 if (mrt->id == RT6_TABLE_DFLT)
637 sprintf(name, "pim6reg");
639 sprintf(name, "pim6reg%u", mrt->id);
641 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
645 dev_net_set(dev, net);
647 if (register_netdevice(dev)) {
659 unregister_netdevice(dev);
664 static int call_ip6mr_vif_entry_notifiers(struct net *net,
665 enum fib_event_type event_type,
666 struct vif_device *vif,
667 mifi_t vif_index, u32 tb_id)
669 return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
670 vif, vif_index, tb_id,
671 &net->ipv6.ipmr_seq);
674 static int call_ip6mr_mfc_entry_notifiers(struct net *net,
675 enum fib_event_type event_type,
676 struct mfc6_cache *mfc, u32 tb_id)
678 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
679 &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
682 /* Delete a VIF entry */
683 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
684 struct list_head *head)
686 struct vif_device *v;
687 struct net_device *dev;
688 struct inet6_dev *in6_dev;
690 if (vifi < 0 || vifi >= mrt->maxvif)
691 return -EADDRNOTAVAIL;
693 v = &mrt->vif_table[vifi];
695 if (VIF_EXISTS(mrt, vifi))
696 call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
697 FIB_EVENT_VIF_DEL, v, vifi,
700 write_lock_bh(&mrt_lock);
705 write_unlock_bh(&mrt_lock);
706 return -EADDRNOTAVAIL;
709 #ifdef CONFIG_IPV6_PIMSM_V2
710 if (vifi == mrt->mroute_reg_vif_num)
711 mrt->mroute_reg_vif_num = -1;
714 if (vifi + 1 == mrt->maxvif) {
716 for (tmp = vifi - 1; tmp >= 0; tmp--) {
717 if (VIF_EXISTS(mrt, tmp))
720 mrt->maxvif = tmp + 1;
723 write_unlock_bh(&mrt_lock);
725 dev_set_allmulti(dev, -1);
727 in6_dev = __in6_dev_get(dev);
729 in6_dev->cnf.mc_forwarding--;
730 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
731 NETCONFA_MC_FORWARDING,
732 dev->ifindex, &in6_dev->cnf);
735 if ((v->flags & MIFF_REGISTER) && !notify)
736 unregister_netdevice_queue(dev, head);
742 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
744 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
746 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
749 static inline void ip6mr_cache_free(struct mfc6_cache *c)
751 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
754 /* Destroy an unresolved cache entry, killing queued skbs
755 and reporting error to netlink readers.
758 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
760 struct net *net = read_pnet(&mrt->net);
763 atomic_dec(&mrt->cache_resolve_queue_len);
765 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
766 if (ipv6_hdr(skb)->version == 0) {
767 struct nlmsghdr *nlh = skb_pull(skb,
768 sizeof(struct ipv6hdr));
769 nlh->nlmsg_type = NLMSG_ERROR;
770 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
771 skb_trim(skb, nlh->nlmsg_len);
772 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
773 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
782 /* Timer process for all the unresolved queue. */
784 static void ipmr_do_expire_process(struct mr_table *mrt)
786 unsigned long now = jiffies;
787 unsigned long expires = 10 * HZ;
788 struct mr_mfc *c, *next;
790 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
791 if (time_after(c->mfc_un.unres.expires, now)) {
793 unsigned long interval = c->mfc_un.unres.expires - now;
794 if (interval < expires)
800 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
801 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
804 if (!list_empty(&mrt->mfc_unres_queue))
805 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
808 static void ipmr_expire_process(struct timer_list *t)
810 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
812 if (!spin_trylock(&mfc_unres_lock)) {
813 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
817 if (!list_empty(&mrt->mfc_unres_queue))
818 ipmr_do_expire_process(mrt);
820 spin_unlock(&mfc_unres_lock);
823 /* Fill oifs list. It is called under write locked mrt_lock. */
825 static void ip6mr_update_thresholds(struct mr_table *mrt,
826 struct mr_mfc *cache,
831 cache->mfc_un.res.minvif = MAXMIFS;
832 cache->mfc_un.res.maxvif = 0;
833 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
835 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
836 if (VIF_EXISTS(mrt, vifi) &&
837 ttls[vifi] && ttls[vifi] < 255) {
838 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
839 if (cache->mfc_un.res.minvif > vifi)
840 cache->mfc_un.res.minvif = vifi;
841 if (cache->mfc_un.res.maxvif <= vifi)
842 cache->mfc_un.res.maxvif = vifi + 1;
845 cache->mfc_un.res.lastuse = jiffies;
848 static int mif6_add(struct net *net, struct mr_table *mrt,
849 struct mif6ctl *vifc, int mrtsock)
851 int vifi = vifc->mif6c_mifi;
852 struct vif_device *v = &mrt->vif_table[vifi];
853 struct net_device *dev;
854 struct inet6_dev *in6_dev;
858 if (VIF_EXISTS(mrt, vifi))
861 switch (vifc->mif6c_flags) {
862 #ifdef CONFIG_IPV6_PIMSM_V2
865 * Special Purpose VIF in PIM
866 * All the packets will be sent to the daemon
868 if (mrt->mroute_reg_vif_num >= 0)
870 dev = ip6mr_reg_vif(net, mrt);
873 err = dev_set_allmulti(dev, 1);
875 unregister_netdevice(dev);
882 dev = dev_get_by_index(net, vifc->mif6c_pifi);
884 return -EADDRNOTAVAIL;
885 err = dev_set_allmulti(dev, 1);
895 in6_dev = __in6_dev_get(dev);
897 in6_dev->cnf.mc_forwarding++;
898 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
899 NETCONFA_MC_FORWARDING,
900 dev->ifindex, &in6_dev->cnf);
903 /* Fill in the VIF structures */
904 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
905 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
908 /* And finish update writing critical data */
909 write_lock_bh(&mrt_lock);
911 #ifdef CONFIG_IPV6_PIMSM_V2
912 if (v->flags & MIFF_REGISTER)
913 mrt->mroute_reg_vif_num = vifi;
915 if (vifi + 1 > mrt->maxvif)
916 mrt->maxvif = vifi + 1;
917 write_unlock_bh(&mrt_lock);
918 call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
923 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
924 const struct in6_addr *origin,
925 const struct in6_addr *mcastgrp)
927 struct mfc6_cache_cmp_arg arg = {
928 .mf6c_origin = *origin,
929 .mf6c_mcastgrp = *mcastgrp,
932 return mr_mfc_find(mrt, &arg);
935 /* Look for a (*,G) entry */
936 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
937 struct in6_addr *mcastgrp,
940 struct mfc6_cache_cmp_arg arg = {
941 .mf6c_origin = in6addr_any,
942 .mf6c_mcastgrp = *mcastgrp,
945 if (ipv6_addr_any(mcastgrp))
946 return mr_mfc_find_any_parent(mrt, mifi);
947 return mr_mfc_find_any(mrt, mifi, &arg);
950 /* Look for a (S,G,iif) entry if parent != -1 */
951 static struct mfc6_cache *
952 ip6mr_cache_find_parent(struct mr_table *mrt,
953 const struct in6_addr *origin,
954 const struct in6_addr *mcastgrp,
957 struct mfc6_cache_cmp_arg arg = {
958 .mf6c_origin = *origin,
959 .mf6c_mcastgrp = *mcastgrp,
962 return mr_mfc_find_parent(mrt, &arg, parent);
965 /* Allocate a multicast cache entry */
966 static struct mfc6_cache *ip6mr_cache_alloc(void)
968 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
971 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
972 c->_c.mfc_un.res.minvif = MAXMIFS;
973 c->_c.free = ip6mr_cache_free_rcu;
974 refcount_set(&c->_c.mfc_un.res.refcount, 1);
978 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
980 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
983 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
984 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
989 * A cache entry has gone into a resolved state from queued
992 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
993 struct mfc6_cache *uc, struct mfc6_cache *c)
998 * Play the pending entries through our router
1001 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1002 if (ipv6_hdr(skb)->version == 0) {
1003 struct nlmsghdr *nlh = skb_pull(skb,
1004 sizeof(struct ipv6hdr));
1006 if (mr_fill_mroute(mrt, skb, &c->_c,
1007 nlmsg_data(nlh)) > 0) {
1008 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1010 nlh->nlmsg_type = NLMSG_ERROR;
1011 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1012 skb_trim(skb, nlh->nlmsg_len);
1013 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1015 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1017 ip6_mr_forward(net, mrt, skb, c);
1022 * Bounce a cache query up to pim6sd and netlink.
1024 * Called under mrt_lock.
1027 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
1028 mifi_t mifi, int assert)
1030 struct sock *mroute6_sk;
1031 struct sk_buff *skb;
1032 struct mrt6msg *msg;
1035 #ifdef CONFIG_IPV6_PIMSM_V2
1036 if (assert == MRT6MSG_WHOLEPKT)
1037 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1041 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1046 /* I suppose that internal messages
1047 * do not require checksums */
1049 skb->ip_summed = CHECKSUM_UNNECESSARY;
1051 #ifdef CONFIG_IPV6_PIMSM_V2
1052 if (assert == MRT6MSG_WHOLEPKT) {
1053 /* Ugly, but we have no choice with this interface.
1054 Duplicate old header, fix length etc.
1055 And all this only to mangle msg->im6_msgtype and
1056 to set msg->im6_mbz to "mbz" :-)
1058 skb_push(skb, -skb_network_offset(pkt));
1060 skb_push(skb, sizeof(*msg));
1061 skb_reset_transport_header(skb);
1062 msg = (struct mrt6msg *)skb_transport_header(skb);
1064 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1065 msg->im6_mif = mrt->mroute_reg_vif_num;
1067 msg->im6_src = ipv6_hdr(pkt)->saddr;
1068 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1070 skb->ip_summed = CHECKSUM_UNNECESSARY;
1075 * Copy the IP header
1078 skb_put(skb, sizeof(struct ipv6hdr));
1079 skb_reset_network_header(skb);
1080 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1085 skb_put(skb, sizeof(*msg));
1086 skb_reset_transport_header(skb);
1087 msg = (struct mrt6msg *)skb_transport_header(skb);
1090 msg->im6_msgtype = assert;
1091 msg->im6_mif = mifi;
1093 msg->im6_src = ipv6_hdr(pkt)->saddr;
1094 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1096 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1097 skb->ip_summed = CHECKSUM_UNNECESSARY;
1101 mroute6_sk = rcu_dereference(mrt->mroute_sk);
1108 mrt6msg_netlink_event(mrt, skb);
1110 /* Deliver to user space multicast routing algorithms */
1111 ret = sock_queue_rcv_skb(mroute6_sk, skb);
1114 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1121 /* Queue a packet for resolution. It gets locked cache entry! */
1122 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1123 struct sk_buff *skb)
1125 struct mfc6_cache *c;
1129 spin_lock_bh(&mfc_unres_lock);
1130 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1131 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1132 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1140 * Create a new entry if allowable
1143 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1144 (c = ip6mr_cache_alloc_unres()) == NULL) {
1145 spin_unlock_bh(&mfc_unres_lock);
1151 /* Fill in the new cache entry */
1152 c->_c.mfc_parent = -1;
1153 c->mf6c_origin = ipv6_hdr(skb)->saddr;
1154 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1157 * Reflect first query at pim6sd
1159 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1161 /* If the report failed throw the cache entry
1164 spin_unlock_bh(&mfc_unres_lock);
1166 ip6mr_cache_free(c);
1171 atomic_inc(&mrt->cache_resolve_queue_len);
1172 list_add(&c->_c.list, &mrt->mfc_unres_queue);
1173 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1175 ipmr_do_expire_process(mrt);
1178 /* See if we can append the packet */
1179 if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1183 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1187 spin_unlock_bh(&mfc_unres_lock);
1192 * MFC6 cache manipulation by user space
1195 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1198 struct mfc6_cache *c;
1200 /* The entries are added/deleted only under RTNL */
1202 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1203 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1207 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1208 list_del_rcu(&c->_c.list);
1210 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1211 FIB_EVENT_ENTRY_DEL, c, mrt->id);
1212 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1213 mr_cache_put(&c->_c);
1217 static int ip6mr_device_event(struct notifier_block *this,
1218 unsigned long event, void *ptr)
1220 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1221 struct net *net = dev_net(dev);
1222 struct mr_table *mrt;
1223 struct vif_device *v;
1226 if (event != NETDEV_UNREGISTER)
1229 ip6mr_for_each_table(mrt, net) {
1230 v = &mrt->vif_table[0];
1231 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1233 mif6_delete(mrt, ct, 1, NULL);
1240 static unsigned int ip6mr_seq_read(struct net *net)
1244 return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1247 static int ip6mr_dump(struct net *net, struct notifier_block *nb)
1249 return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1250 ip6mr_mr_table_iter, &mrt_lock);
1253 static struct notifier_block ip6_mr_notifier = {
1254 .notifier_call = ip6mr_device_event
1257 static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1258 .family = RTNL_FAMILY_IP6MR,
1259 .fib_seq_read = ip6mr_seq_read,
1260 .fib_dump = ip6mr_dump,
1261 .owner = THIS_MODULE,
1264 static int __net_init ip6mr_notifier_init(struct net *net)
1266 struct fib_notifier_ops *ops;
1268 net->ipv6.ipmr_seq = 0;
1270 ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1272 return PTR_ERR(ops);
1274 net->ipv6.ip6mr_notifier_ops = ops;
1279 static void __net_exit ip6mr_notifier_exit(struct net *net)
1281 fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1282 net->ipv6.ip6mr_notifier_ops = NULL;
1285 /* Setup for IP multicast routing */
1286 static int __net_init ip6mr_net_init(struct net *net)
1290 err = ip6mr_notifier_init(net);
1294 err = ip6mr_rules_init(net);
1296 goto ip6mr_rules_fail;
1298 #ifdef CONFIG_PROC_FS
1300 if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1301 sizeof(struct mr_vif_iter)))
1303 if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1304 sizeof(struct mr_mfc_iter)))
1305 goto proc_cache_fail;
1310 #ifdef CONFIG_PROC_FS
1312 remove_proc_entry("ip6_mr_vif", net->proc_net);
1314 ip6mr_rules_exit(net);
1317 ip6mr_notifier_exit(net);
1321 static void __net_exit ip6mr_net_exit(struct net *net)
1323 #ifdef CONFIG_PROC_FS
1324 remove_proc_entry("ip6_mr_cache", net->proc_net);
1325 remove_proc_entry("ip6_mr_vif", net->proc_net);
1327 ip6mr_rules_exit(net);
1328 ip6mr_notifier_exit(net);
1331 static struct pernet_operations ip6mr_net_ops = {
1332 .init = ip6mr_net_init,
1333 .exit = ip6mr_net_exit,
1336 int __init ip6_mr_init(void)
1340 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1341 sizeof(struct mfc6_cache),
1342 0, SLAB_HWCACHE_ALIGN,
1347 err = register_pernet_subsys(&ip6mr_net_ops);
1349 goto reg_pernet_fail;
1351 err = register_netdevice_notifier(&ip6_mr_notifier);
1353 goto reg_notif_fail;
1354 #ifdef CONFIG_IPV6_PIMSM_V2
1355 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1356 pr_err("%s: can't add PIM protocol\n", __func__);
1358 goto add_proto_fail;
1361 err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1362 NULL, ip6mr_rtm_dumproute, 0);
1366 #ifdef CONFIG_IPV6_PIMSM_V2
1367 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1369 unregister_netdevice_notifier(&ip6_mr_notifier);
1372 unregister_pernet_subsys(&ip6mr_net_ops);
1374 kmem_cache_destroy(mrt_cachep);
1378 void ip6_mr_cleanup(void)
1380 rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1381 #ifdef CONFIG_IPV6_PIMSM_V2
1382 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1384 unregister_netdevice_notifier(&ip6_mr_notifier);
1385 unregister_pernet_subsys(&ip6mr_net_ops);
1386 kmem_cache_destroy(mrt_cachep);
1389 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1390 struct mf6cctl *mfc, int mrtsock, int parent)
1392 unsigned char ttls[MAXMIFS];
1393 struct mfc6_cache *uc, *c;
1398 if (mfc->mf6cc_parent >= MAXMIFS)
1401 memset(ttls, 255, MAXMIFS);
1402 for (i = 0; i < MAXMIFS; i++) {
1403 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1407 /* The entries are added/deleted only under RTNL */
1409 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1410 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1413 write_lock_bh(&mrt_lock);
1414 c->_c.mfc_parent = mfc->mf6cc_parent;
1415 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1417 c->_c.mfc_flags |= MFC_STATIC;
1418 write_unlock_bh(&mrt_lock);
1419 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1421 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1425 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1426 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1429 c = ip6mr_cache_alloc();
1433 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1434 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1435 c->_c.mfc_parent = mfc->mf6cc_parent;
1436 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1438 c->_c.mfc_flags |= MFC_STATIC;
1440 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1443 pr_err("ip6mr: rhtable insert error %d\n", err);
1444 ip6mr_cache_free(c);
1447 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1449 /* Check to see if we resolved a queued list. If so we
1450 * need to send on the frames and tidy up.
1453 spin_lock_bh(&mfc_unres_lock);
1454 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1455 uc = (struct mfc6_cache *)_uc;
1456 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1457 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1458 list_del(&_uc->list);
1459 atomic_dec(&mrt->cache_resolve_queue_len);
1464 if (list_empty(&mrt->mfc_unres_queue))
1465 del_timer(&mrt->ipmr_expire_timer);
1466 spin_unlock_bh(&mfc_unres_lock);
1469 ip6mr_cache_resolve(net, mrt, uc, c);
1470 ip6mr_cache_free(uc);
1472 call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1474 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1479 * Close the multicast socket, and clear the vif tables etc
1482 static void mroute_clean_tables(struct mr_table *mrt, bool all)
1484 struct mr_mfc *c, *tmp;
1488 /* Shut down all active vif entries */
1489 for (i = 0; i < mrt->maxvif; i++) {
1490 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1492 mif6_delete(mrt, i, 0, &list);
1494 unregister_netdevice_many(&list);
1496 /* Wipe the cache */
1497 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1498 if (!all && (c->mfc_flags & MFC_STATIC))
1500 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1501 list_del_rcu(&c->list);
1502 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1506 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1507 spin_lock_bh(&mfc_unres_lock);
1508 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1510 call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1511 FIB_EVENT_ENTRY_DEL,
1512 (struct mfc6_cache *)c,
1514 mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1516 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1518 spin_unlock_bh(&mfc_unres_lock);
1522 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1525 struct net *net = sock_net(sk);
1528 write_lock_bh(&mrt_lock);
1529 if (rtnl_dereference(mrt->mroute_sk)) {
1532 rcu_assign_pointer(mrt->mroute_sk, sk);
1533 sock_set_flag(sk, SOCK_RCU_FREE);
1534 net->ipv6.devconf_all->mc_forwarding++;
1536 write_unlock_bh(&mrt_lock);
1539 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1540 NETCONFA_MC_FORWARDING,
1541 NETCONFA_IFINDEX_ALL,
1542 net->ipv6.devconf_all);
1548 int ip6mr_sk_done(struct sock *sk)
1551 struct net *net = sock_net(sk);
1552 struct mr_table *mrt;
1554 if (sk->sk_type != SOCK_RAW ||
1555 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1559 ip6mr_for_each_table(mrt, net) {
1560 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1561 write_lock_bh(&mrt_lock);
1562 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1563 /* Note that mroute_sk had SOCK_RCU_FREE set,
1564 * so the RCU grace period before sk freeing
1565 * is guaranteed by sk_destruct()
1567 net->ipv6.devconf_all->mc_forwarding--;
1568 write_unlock_bh(&mrt_lock);
1569 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1570 NETCONFA_MC_FORWARDING,
1571 NETCONFA_IFINDEX_ALL,
1572 net->ipv6.devconf_all);
1574 mroute_clean_tables(mrt, false);
1584 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1586 struct mr_table *mrt;
1587 struct flowi6 fl6 = {
1588 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
1589 .flowi6_oif = skb->dev->ifindex,
1590 .flowi6_mark = skb->mark,
1593 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1596 return rcu_access_pointer(mrt->mroute_sk);
1598 EXPORT_SYMBOL(mroute6_is_socket);
1601 * Socket options and virtual interface manipulation. The whole
1602 * virtual interface system is a complete heap, but unfortunately
1603 * that's how BSD mrouted happens to think. Maybe one day with a proper
1604 * MOSPF/PIM router set up we can clean this up.
1607 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1609 int ret, parent = 0;
1613 struct net *net = sock_net(sk);
1614 struct mr_table *mrt;
1616 if (sk->sk_type != SOCK_RAW ||
1617 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1620 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1624 if (optname != MRT6_INIT) {
1625 if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1626 !ns_capable(net->user_ns, CAP_NET_ADMIN))
1632 if (optlen < sizeof(int))
1635 return ip6mr_sk_init(mrt, sk);
1638 return ip6mr_sk_done(sk);
1641 if (optlen < sizeof(vif))
1643 if (copy_from_user(&vif, optval, sizeof(vif)))
1645 if (vif.mif6c_mifi >= MAXMIFS)
1648 ret = mif6_add(net, mrt, &vif,
1649 sk == rtnl_dereference(mrt->mroute_sk));
1654 if (optlen < sizeof(mifi_t))
1656 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1659 ret = mif6_delete(mrt, mifi, 0, NULL);
1664 * Manipulate the forwarding caches. These live
1665 * in a sort of kernel/user symbiosis.
1671 case MRT6_ADD_MFC_PROXY:
1672 case MRT6_DEL_MFC_PROXY:
1673 if (optlen < sizeof(mfc))
1675 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1678 parent = mfc.mf6cc_parent;
1680 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1681 ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1683 ret = ip6mr_mfc_add(net, mrt, &mfc,
1685 rtnl_dereference(mrt->mroute_sk),
1691 * Control PIM assert (to activate pim will activate assert)
1697 if (optlen != sizeof(v))
1699 if (get_user(v, (int __user *)optval))
1701 mrt->mroute_do_assert = v;
1705 #ifdef CONFIG_IPV6_PIMSM_V2
1710 if (optlen != sizeof(v))
1712 if (get_user(v, (int __user *)optval))
1717 if (v != mrt->mroute_do_pim) {
1718 mrt->mroute_do_pim = v;
1719 mrt->mroute_do_assert = v;
1726 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1731 if (optlen != sizeof(u32))
1733 if (get_user(v, (u32 __user *)optval))
1735 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1736 if (v != RT_TABLE_DEFAULT && v >= 100000000)
1738 if (sk == rcu_access_pointer(mrt->mroute_sk))
1743 mrt = ip6mr_new_table(net, v);
1747 raw6_sk(sk)->ip6mr_table = v;
1753 * Spurious command, or MRT6_VERSION which you cannot
1757 return -ENOPROTOOPT;
1762 * Getsock opt support for the multicast routing system.
1765 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1770 struct net *net = sock_net(sk);
1771 struct mr_table *mrt;
1773 if (sk->sk_type != SOCK_RAW ||
1774 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1777 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1785 #ifdef CONFIG_IPV6_PIMSM_V2
1787 val = mrt->mroute_do_pim;
1791 val = mrt->mroute_do_assert;
1794 return -ENOPROTOOPT;
1797 if (get_user(olr, optlen))
1800 olr = min_t(int, olr, sizeof(int));
1804 if (put_user(olr, optlen))
1806 if (copy_to_user(optval, &val, olr))
1812 * The IP multicast ioctl support routines.
1815 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1817 struct sioc_sg_req6 sr;
1818 struct sioc_mif_req6 vr;
1819 struct vif_device *vif;
1820 struct mfc6_cache *c;
1821 struct net *net = sock_net(sk);
1822 struct mr_table *mrt;
1824 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1829 case SIOCGETMIFCNT_IN6:
1830 if (copy_from_user(&vr, arg, sizeof(vr)))
1832 if (vr.mifi >= mrt->maxvif)
1834 read_lock(&mrt_lock);
1835 vif = &mrt->vif_table[vr.mifi];
1836 if (VIF_EXISTS(mrt, vr.mifi)) {
1837 vr.icount = vif->pkt_in;
1838 vr.ocount = vif->pkt_out;
1839 vr.ibytes = vif->bytes_in;
1840 vr.obytes = vif->bytes_out;
1841 read_unlock(&mrt_lock);
1843 if (copy_to_user(arg, &vr, sizeof(vr)))
1847 read_unlock(&mrt_lock);
1848 return -EADDRNOTAVAIL;
1849 case SIOCGETSGCNT_IN6:
1850 if (copy_from_user(&sr, arg, sizeof(sr)))
1854 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1856 sr.pktcnt = c->_c.mfc_un.res.pkt;
1857 sr.bytecnt = c->_c.mfc_un.res.bytes;
1858 sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1861 if (copy_to_user(arg, &sr, sizeof(sr)))
1866 return -EADDRNOTAVAIL;
1868 return -ENOIOCTLCMD;
1872 #ifdef CONFIG_COMPAT
1873 struct compat_sioc_sg_req6 {
1874 struct sockaddr_in6 src;
1875 struct sockaddr_in6 grp;
1876 compat_ulong_t pktcnt;
1877 compat_ulong_t bytecnt;
1878 compat_ulong_t wrong_if;
1881 struct compat_sioc_mif_req6 {
1883 compat_ulong_t icount;
1884 compat_ulong_t ocount;
1885 compat_ulong_t ibytes;
1886 compat_ulong_t obytes;
1889 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1891 struct compat_sioc_sg_req6 sr;
1892 struct compat_sioc_mif_req6 vr;
1893 struct vif_device *vif;
1894 struct mfc6_cache *c;
1895 struct net *net = sock_net(sk);
1896 struct mr_table *mrt;
1898 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1903 case SIOCGETMIFCNT_IN6:
1904 if (copy_from_user(&vr, arg, sizeof(vr)))
1906 if (vr.mifi >= mrt->maxvif)
1908 read_lock(&mrt_lock);
1909 vif = &mrt->vif_table[vr.mifi];
1910 if (VIF_EXISTS(mrt, vr.mifi)) {
1911 vr.icount = vif->pkt_in;
1912 vr.ocount = vif->pkt_out;
1913 vr.ibytes = vif->bytes_in;
1914 vr.obytes = vif->bytes_out;
1915 read_unlock(&mrt_lock);
1917 if (copy_to_user(arg, &vr, sizeof(vr)))
1921 read_unlock(&mrt_lock);
1922 return -EADDRNOTAVAIL;
1923 case SIOCGETSGCNT_IN6:
1924 if (copy_from_user(&sr, arg, sizeof(sr)))
1928 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1930 sr.pktcnt = c->_c.mfc_un.res.pkt;
1931 sr.bytecnt = c->_c.mfc_un.res.bytes;
1932 sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1935 if (copy_to_user(arg, &sr, sizeof(sr)))
1940 return -EADDRNOTAVAIL;
1942 return -ENOIOCTLCMD;
1947 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1949 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1950 IPSTATS_MIB_OUTFORWDATAGRAMS);
1951 __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1952 IPSTATS_MIB_OUTOCTETS, skb->len);
1953 return dst_output(net, sk, skb);
1957 * Processing handlers for ip6mr_forward
1960 static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
1961 struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1963 struct ipv6hdr *ipv6h;
1964 struct vif_device *vif = &mrt->vif_table[vifi];
1965 struct net_device *dev;
1966 struct dst_entry *dst;
1972 #ifdef CONFIG_IPV6_PIMSM_V2
1973 if (vif->flags & MIFF_REGISTER) {
1975 vif->bytes_out += skb->len;
1976 vif->dev->stats.tx_bytes += skb->len;
1977 vif->dev->stats.tx_packets++;
1978 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1983 ipv6h = ipv6_hdr(skb);
1985 fl6 = (struct flowi6) {
1986 .flowi6_oif = vif->link,
1987 .daddr = ipv6h->daddr,
1990 dst = ip6_route_output(net, NULL, &fl6);
1997 skb_dst_set(skb, dst);
2000 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2001 * not only before forwarding, but after forwarding on all output
2002 * interfaces. It is clear, if mrouter runs a multicasting
2003 * program, it should receive packets not depending to what interface
2004 * program is joined.
2005 * If we will not make it, the program will have to join on all
2006 * interfaces. On the other hand, multihoming host (or router, but
2007 * not mrouter) cannot join to more than one interface - it will
2008 * result in receiving multiple packets.
2013 vif->bytes_out += skb->len;
2015 /* We are about to write */
2016 /* XXX: extension headers? */
2017 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2020 ipv6h = ipv6_hdr(skb);
2023 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2025 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2026 net, NULL, skb, skb->dev, dev,
2027 ip6mr_forward2_finish);
2034 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2038 for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2039 if (mrt->vif_table[ct].dev == dev)
2045 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2046 struct sk_buff *skb, struct mfc6_cache *c)
2050 int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2052 vif = c->_c.mfc_parent;
2053 c->_c.mfc_un.res.pkt++;
2054 c->_c.mfc_un.res.bytes += skb->len;
2055 c->_c.mfc_un.res.lastuse = jiffies;
2057 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2058 struct mfc6_cache *cache_proxy;
2060 /* For an (*,G) entry, we only check that the incoming
2061 * interface is part of the static tree.
2064 cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2066 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
2074 * Wrong interface: drop packet and (maybe) send PIM assert.
2076 if (mrt->vif_table[vif].dev != skb->dev) {
2077 c->_c.mfc_un.res.wrong_if++;
2079 if (true_vifi >= 0 && mrt->mroute_do_assert &&
2080 /* pimsm uses asserts, when switching from RPT to SPT,
2081 so that we cannot check that packet arrived on an oif.
2082 It is bad, but otherwise we would need to move pretty
2083 large chunk of pimd to kernel. Ough... --ANK
2085 (mrt->mroute_do_pim ||
2086 c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2088 c->_c.mfc_un.res.last_assert +
2089 MFC_ASSERT_THRESH)) {
2090 c->_c.mfc_un.res.last_assert = jiffies;
2091 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2097 mrt->vif_table[vif].pkt_in++;
2098 mrt->vif_table[vif].bytes_in += skb->len;
2103 if (ipv6_addr_any(&c->mf6c_origin) &&
2104 ipv6_addr_any(&c->mf6c_mcastgrp)) {
2105 if (true_vifi >= 0 &&
2106 true_vifi != c->_c.mfc_parent &&
2107 ipv6_hdr(skb)->hop_limit >
2108 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2109 /* It's an (*,*) entry and the packet is not coming from
2110 * the upstream: forward the packet to the upstream
2113 psend = c->_c.mfc_parent;
2118 for (ct = c->_c.mfc_un.res.maxvif - 1;
2119 ct >= c->_c.mfc_un.res.minvif; ct--) {
2120 /* For (*,G) entry, don't forward to the incoming interface */
2121 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2122 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2124 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2126 ip6mr_forward2(net, mrt, skb2,
2134 ip6mr_forward2(net, mrt, skb, c, psend);
2144 * Multicast packets for forwarding arrive here
2147 int ip6_mr_input(struct sk_buff *skb)
2149 struct mfc6_cache *cache;
2150 struct net *net = dev_net(skb->dev);
2151 struct mr_table *mrt;
2152 struct flowi6 fl6 = {
2153 .flowi6_iif = skb->dev->ifindex,
2154 .flowi6_mark = skb->mark,
2158 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2164 read_lock(&mrt_lock);
2165 cache = ip6mr_cache_find(mrt,
2166 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2168 int vif = ip6mr_find_vif(mrt, skb->dev);
2171 cache = ip6mr_cache_find_any(mrt,
2172 &ipv6_hdr(skb)->daddr,
2177 * No usable cache entry
2182 vif = ip6mr_find_vif(mrt, skb->dev);
2184 int err = ip6mr_cache_unresolved(mrt, vif, skb);
2185 read_unlock(&mrt_lock);
2189 read_unlock(&mrt_lock);
2194 ip6_mr_forward(net, mrt, skb, cache);
2196 read_unlock(&mrt_lock);
2201 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2205 struct mr_table *mrt;
2206 struct mfc6_cache *cache;
2207 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2209 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2213 read_lock(&mrt_lock);
2214 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2215 if (!cache && skb->dev) {
2216 int vif = ip6mr_find_vif(mrt, skb->dev);
2219 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2224 struct sk_buff *skb2;
2225 struct ipv6hdr *iph;
2226 struct net_device *dev;
2230 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2231 read_unlock(&mrt_lock);
2235 /* really correct? */
2236 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2238 read_unlock(&mrt_lock);
2242 NETLINK_CB(skb2).portid = portid;
2243 skb_reset_transport_header(skb2);
2245 skb_put(skb2, sizeof(struct ipv6hdr));
2246 skb_reset_network_header(skb2);
2248 iph = ipv6_hdr(skb2);
2251 iph->flow_lbl[0] = 0;
2252 iph->flow_lbl[1] = 0;
2253 iph->flow_lbl[2] = 0;
2254 iph->payload_len = 0;
2255 iph->nexthdr = IPPROTO_NONE;
2257 iph->saddr = rt->rt6i_src.addr;
2258 iph->daddr = rt->rt6i_dst.addr;
2260 err = ip6mr_cache_unresolved(mrt, vif, skb2);
2261 read_unlock(&mrt_lock);
2266 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2267 read_unlock(&mrt_lock);
2271 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2272 u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2275 struct nlmsghdr *nlh;
2279 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2283 rtm = nlmsg_data(nlh);
2284 rtm->rtm_family = RTNL_FAMILY_IP6MR;
2285 rtm->rtm_dst_len = 128;
2286 rtm->rtm_src_len = 128;
2288 rtm->rtm_table = mrt->id;
2289 if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2290 goto nla_put_failure;
2291 rtm->rtm_type = RTN_MULTICAST;
2292 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2293 if (c->_c.mfc_flags & MFC_STATIC)
2294 rtm->rtm_protocol = RTPROT_STATIC;
2296 rtm->rtm_protocol = RTPROT_MROUTED;
2299 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2300 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2301 goto nla_put_failure;
2302 err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2303 /* do not break the dump if cache is unresolved */
2304 if (err < 0 && err != -ENOENT)
2305 goto nla_put_failure;
2307 nlmsg_end(skb, nlh);
2311 nlmsg_cancel(skb, nlh);
2315 static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2316 u32 portid, u32 seq, struct mr_mfc *c,
2319 return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2323 static int mr6_msgsize(bool unresolved, int maxvif)
2326 NLMSG_ALIGN(sizeof(struct rtmsg))
2327 + nla_total_size(4) /* RTA_TABLE */
2328 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */
2329 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */
2334 + nla_total_size(4) /* RTA_IIF */
2335 + nla_total_size(0) /* RTA_MULTIPATH */
2336 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2338 + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2344 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2347 struct net *net = read_pnet(&mrt->net);
2348 struct sk_buff *skb;
2351 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2356 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2360 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2366 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2369 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2372 NLMSG_ALIGN(sizeof(struct rtgenmsg))
2373 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */
2374 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */
2375 /* IP6MRA_CREPORT_SRC_ADDR */
2376 + nla_total_size(sizeof(struct in6_addr))
2377 /* IP6MRA_CREPORT_DST_ADDR */
2378 + nla_total_size(sizeof(struct in6_addr))
2379 /* IP6MRA_CREPORT_PKT */
2380 + nla_total_size(payloadlen)
2386 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2388 struct net *net = read_pnet(&mrt->net);
2389 struct nlmsghdr *nlh;
2390 struct rtgenmsg *rtgenm;
2391 struct mrt6msg *msg;
2392 struct sk_buff *skb;
2396 payloadlen = pkt->len - sizeof(struct mrt6msg);
2397 msg = (struct mrt6msg *)skb_transport_header(pkt);
2399 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2403 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2404 sizeof(struct rtgenmsg), 0);
2407 rtgenm = nlmsg_data(nlh);
2408 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2409 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2410 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2411 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2413 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2415 goto nla_put_failure;
2417 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2418 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2419 nla_data(nla), payloadlen))
2420 goto nla_put_failure;
2422 nlmsg_end(skb, nlh);
2424 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2428 nlmsg_cancel(skb, nlh);
2431 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2434 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2436 return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2437 _ip6mr_fill_mroute, &mfc_unres_lock);