2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
6 * LSIIT Laboratory, Strasbourg, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
19 #include <linux/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
24 #include <linux/kernel.h>
25 #include <linux/fcntl.h>
26 #include <linux/stat.h>
27 #include <linux/socket.h>
28 #include <linux/inet.h>
29 #include <linux/netdevice.h>
30 #include <linux/inetdevice.h>
31 #include <linux/proc_fs.h>
32 #include <linux/seq_file.h>
33 #include <linux/init.h>
34 #include <linux/compat.h>
35 #include <net/protocol.h>
36 #include <linux/skbuff.h>
38 #include <linux/notifier.h>
39 #include <linux/if_arp.h>
40 #include <net/checksum.h>
41 #include <net/netlink.h>
42 #include <net/fib_rules.h>
45 #include <net/ip6_route.h>
46 #include <linux/mroute6.h>
47 #include <linux/pim.h>
48 #include <net/addrconf.h>
49 #include <linux/netfilter_ipv6.h>
50 #include <linux/export.h>
51 #include <net/ip6_checksum.h>
52 #include <linux/netconf.h>
55 struct fib_rule common;
62 /* Big lock, protecting vif table, mrt cache and mroute socket state.
63 Note that the changes are semaphored via rtnl_lock.
66 static DEFINE_RWLOCK(mrt_lock);
68 /* Multicast router control variables */
70 /* Special spinlock for queue of unresolved entries */
71 static DEFINE_SPINLOCK(mfc_unres_lock);
73 /* We return to original Alan's scheme. Hash table of resolved
74 entries is changed only in process context and protected
75 with weak lock mrt_lock. Queue of unresolved entries is protected
76 with strong spinlock mfc_unres_lock.
78 In this case data path is free of exclusive locks at all.
81 static struct kmem_cache *mrt_cachep __read_mostly;
83 static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
84 static void ip6mr_free_table(struct mr_table *mrt);
86 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
87 struct sk_buff *skb, struct mfc6_cache *cache);
88 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
89 mifi_t mifi, int assert);
90 static int __ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
91 struct mfc6_cache *c, struct rtmsg *rtm);
92 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
94 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
95 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
96 struct netlink_callback *cb);
97 static void mroute_clean_tables(struct mr_table *mrt, bool all);
98 static void ipmr_expire_process(struct timer_list *t);
100 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
101 #define ip6mr_for_each_table(mrt, net) \
102 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
104 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
106 struct mr_table *mrt;
108 ip6mr_for_each_table(mrt, net) {
115 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
116 struct mr_table **mrt)
119 struct ip6mr_result res;
120 struct fib_lookup_arg arg = {
122 .flags = FIB_LOOKUP_NOREF,
125 err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
126 flowi6_to_flowi(flp6), 0, &arg);
133 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
134 int flags, struct fib_lookup_arg *arg)
136 struct ip6mr_result *res = arg->result;
137 struct mr_table *mrt;
139 switch (rule->action) {
142 case FR_ACT_UNREACHABLE:
144 case FR_ACT_PROHIBIT:
146 case FR_ACT_BLACKHOLE:
151 mrt = ip6mr_get_table(rule->fr_net, rule->table);
158 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
163 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
167 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
168 struct fib_rule_hdr *frh, struct nlattr **tb)
173 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
179 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
180 struct fib_rule_hdr *frh)
188 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
189 .family = RTNL_FAMILY_IP6MR,
190 .rule_size = sizeof(struct ip6mr_rule),
191 .addr_size = sizeof(struct in6_addr),
192 .action = ip6mr_rule_action,
193 .match = ip6mr_rule_match,
194 .configure = ip6mr_rule_configure,
195 .compare = ip6mr_rule_compare,
196 .fill = ip6mr_rule_fill,
197 .nlgroup = RTNLGRP_IPV6_RULE,
198 .policy = ip6mr_rule_policy,
199 .owner = THIS_MODULE,
202 static int __net_init ip6mr_rules_init(struct net *net)
204 struct fib_rules_ops *ops;
205 struct mr_table *mrt;
208 ops = fib_rules_register(&ip6mr_rules_ops_template, net);
212 INIT_LIST_HEAD(&net->ipv6.mr6_tables);
214 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
220 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
224 net->ipv6.mr6_rules_ops = ops;
228 ip6mr_free_table(mrt);
230 fib_rules_unregister(ops);
234 static void __net_exit ip6mr_rules_exit(struct net *net)
236 struct mr_table *mrt, *next;
239 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
240 list_del(&mrt->list);
241 ip6mr_free_table(mrt);
243 fib_rules_unregister(net->ipv6.mr6_rules_ops);
247 #define ip6mr_for_each_table(mrt, net) \
248 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
250 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
252 return net->ipv6.mrt6;
255 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
256 struct mr_table **mrt)
258 *mrt = net->ipv6.mrt6;
262 static int __net_init ip6mr_rules_init(struct net *net)
264 net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
265 return net->ipv6.mrt6 ? 0 : -ENOMEM;
268 static void __net_exit ip6mr_rules_exit(struct net *net)
271 ip6mr_free_table(net->ipv6.mrt6);
272 net->ipv6.mrt6 = NULL;
277 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
280 const struct mfc6_cache_cmp_arg *cmparg = arg->key;
281 struct mfc6_cache *c = (struct mfc6_cache *)ptr;
283 return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
284 !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
287 static const struct rhashtable_params ip6mr_rht_params = {
288 .head_offset = offsetof(struct mr_mfc, mnode),
289 .key_offset = offsetof(struct mfc6_cache, cmparg),
290 .key_len = sizeof(struct mfc6_cache_cmp_arg),
293 .obj_cmpfn = ip6mr_hash_cmp,
294 .automatic_shrinking = true,
297 static void ip6mr_new_table_set(struct mr_table *mrt,
300 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
301 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
305 static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
306 .mf6c_origin = IN6ADDR_ANY_INIT,
307 .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
310 static struct mr_table_ops ip6mr_mr_table_ops = {
311 .rht_params = &ip6mr_rht_params,
312 .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
315 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
317 struct mr_table *mrt;
319 mrt = ip6mr_get_table(net, id);
323 return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
324 ipmr_expire_process, ip6mr_new_table_set);
327 static void ip6mr_free_table(struct mr_table *mrt)
329 del_timer_sync(&mrt->ipmr_expire_timer);
330 mroute_clean_tables(mrt, true);
331 rhltable_destroy(&mrt->mfc_hash);
335 #ifdef CONFIG_PROC_FS
336 /* The /proc interfaces to multicast routing
337 * /proc/ip6_mr_cache /proc/ip6_mr_vif
340 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
343 struct mr_vif_iter *iter = seq->private;
344 struct net *net = seq_file_net(seq);
345 struct mr_table *mrt;
347 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
349 return ERR_PTR(-ENOENT);
353 read_lock(&mrt_lock);
354 return mr_vif_seq_start(seq, pos);
357 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
360 read_unlock(&mrt_lock);
363 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
365 struct mr_vif_iter *iter = seq->private;
366 struct mr_table *mrt = iter->mrt;
368 if (v == SEQ_START_TOKEN) {
370 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
372 const struct vif_device *vif = v;
373 const char *name = vif->dev ? vif->dev->name : "none";
376 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
377 vif - mrt->vif_table,
378 name, vif->bytes_in, vif->pkt_in,
379 vif->bytes_out, vif->pkt_out,
385 static const struct seq_operations ip6mr_vif_seq_ops = {
386 .start = ip6mr_vif_seq_start,
387 .next = mr_vif_seq_next,
388 .stop = ip6mr_vif_seq_stop,
389 .show = ip6mr_vif_seq_show,
392 static int ip6mr_vif_open(struct inode *inode, struct file *file)
394 return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
395 sizeof(struct mr_vif_iter));
398 static const struct file_operations ip6mr_vif_fops = {
399 .open = ip6mr_vif_open,
402 .release = seq_release_net,
405 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
407 struct net *net = seq_file_net(seq);
408 struct mr_table *mrt;
410 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
412 return ERR_PTR(-ENOENT);
414 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
417 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
421 if (v == SEQ_START_TOKEN) {
425 "Iif Pkts Bytes Wrong Oifs\n");
427 const struct mfc6_cache *mfc = v;
428 const struct mr_mfc_iter *it = seq->private;
429 struct mr_table *mrt = it->mrt;
431 seq_printf(seq, "%pI6 %pI6 %-3hd",
432 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
435 if (it->cache != &mrt->mfc_unres_queue) {
436 seq_printf(seq, " %8lu %8lu %8lu",
437 mfc->_c.mfc_un.res.pkt,
438 mfc->_c.mfc_un.res.bytes,
439 mfc->_c.mfc_un.res.wrong_if);
440 for (n = mfc->_c.mfc_un.res.minvif;
441 n < mfc->_c.mfc_un.res.maxvif; n++) {
442 if (VIF_EXISTS(mrt, n) &&
443 mfc->_c.mfc_un.res.ttls[n] < 255)
446 mfc->_c.mfc_un.res.ttls[n]);
449 /* unresolved mfc_caches don't contain
450 * pkt, bytes and wrong_if values
452 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
459 static const struct seq_operations ipmr_mfc_seq_ops = {
460 .start = ipmr_mfc_seq_start,
461 .next = mr_mfc_seq_next,
462 .stop = mr_mfc_seq_stop,
463 .show = ipmr_mfc_seq_show,
466 static int ipmr_mfc_open(struct inode *inode, struct file *file)
468 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
469 sizeof(struct mr_mfc_iter));
472 static const struct file_operations ip6mr_mfc_fops = {
473 .open = ipmr_mfc_open,
476 .release = seq_release_net,
480 #ifdef CONFIG_IPV6_PIMSM_V2
482 static int pim6_rcv(struct sk_buff *skb)
484 struct pimreghdr *pim;
485 struct ipv6hdr *encap;
486 struct net_device *reg_dev = NULL;
487 struct net *net = dev_net(skb->dev);
488 struct mr_table *mrt;
489 struct flowi6 fl6 = {
490 .flowi6_iif = skb->dev->ifindex,
491 .flowi6_mark = skb->mark,
495 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
498 pim = (struct pimreghdr *)skb_transport_header(skb);
499 if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
500 (pim->flags & PIM_NULL_REGISTER) ||
501 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
502 sizeof(*pim), IPPROTO_PIM,
503 csum_partial((void *)pim, sizeof(*pim), 0)) &&
504 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
507 /* check if the inner packet is destined to mcast group */
508 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
511 if (!ipv6_addr_is_multicast(&encap->daddr) ||
512 encap->payload_len == 0 ||
513 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
516 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
518 reg_vif_num = mrt->mroute_reg_vif_num;
520 read_lock(&mrt_lock);
521 if (reg_vif_num >= 0)
522 reg_dev = mrt->vif_table[reg_vif_num].dev;
525 read_unlock(&mrt_lock);
530 skb->mac_header = skb->network_header;
531 skb_pull(skb, (u8 *)encap - skb->data);
532 skb_reset_network_header(skb);
533 skb->protocol = htons(ETH_P_IPV6);
534 skb->ip_summed = CHECKSUM_NONE;
536 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
547 static const struct inet6_protocol pim6_protocol = {
551 /* Service routines creating virtual interfaces: PIMREG */
553 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
554 struct net_device *dev)
556 struct net *net = dev_net(dev);
557 struct mr_table *mrt;
558 struct flowi6 fl6 = {
559 .flowi6_oif = dev->ifindex,
560 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
561 .flowi6_mark = skb->mark,
565 err = ip6mr_fib_lookup(net, &fl6, &mrt);
571 read_lock(&mrt_lock);
572 dev->stats.tx_bytes += skb->len;
573 dev->stats.tx_packets++;
574 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
575 read_unlock(&mrt_lock);
580 static int reg_vif_get_iflink(const struct net_device *dev)
585 static const struct net_device_ops reg_vif_netdev_ops = {
586 .ndo_start_xmit = reg_vif_xmit,
587 .ndo_get_iflink = reg_vif_get_iflink,
590 static void reg_vif_setup(struct net_device *dev)
592 dev->type = ARPHRD_PIMREG;
593 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
594 dev->flags = IFF_NOARP;
595 dev->netdev_ops = ®_vif_netdev_ops;
596 dev->needs_free_netdev = true;
597 dev->features |= NETIF_F_NETNS_LOCAL;
600 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
602 struct net_device *dev;
605 if (mrt->id == RT6_TABLE_DFLT)
606 sprintf(name, "pim6reg");
608 sprintf(name, "pim6reg%u", mrt->id);
610 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
614 dev_net_set(dev, net);
616 if (register_netdevice(dev)) {
628 unregister_netdevice(dev);
637 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
638 struct list_head *head)
640 struct vif_device *v;
641 struct net_device *dev;
642 struct inet6_dev *in6_dev;
644 if (vifi < 0 || vifi >= mrt->maxvif)
645 return -EADDRNOTAVAIL;
647 v = &mrt->vif_table[vifi];
649 write_lock_bh(&mrt_lock);
654 write_unlock_bh(&mrt_lock);
655 return -EADDRNOTAVAIL;
658 #ifdef CONFIG_IPV6_PIMSM_V2
659 if (vifi == mrt->mroute_reg_vif_num)
660 mrt->mroute_reg_vif_num = -1;
663 if (vifi + 1 == mrt->maxvif) {
665 for (tmp = vifi - 1; tmp >= 0; tmp--) {
666 if (VIF_EXISTS(mrt, tmp))
669 mrt->maxvif = tmp + 1;
672 write_unlock_bh(&mrt_lock);
674 dev_set_allmulti(dev, -1);
676 in6_dev = __in6_dev_get(dev);
678 in6_dev->cnf.mc_forwarding--;
679 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
680 NETCONFA_MC_FORWARDING,
681 dev->ifindex, &in6_dev->cnf);
684 if ((v->flags & MIFF_REGISTER) && !notify)
685 unregister_netdevice_queue(dev, head);
691 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
693 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
695 kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
698 static inline void ip6mr_cache_free(struct mfc6_cache *c)
700 call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
703 /* Destroy an unresolved cache entry, killing queued skbs
704 and reporting error to netlink readers.
707 static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
709 struct net *net = read_pnet(&mrt->net);
712 atomic_dec(&mrt->cache_resolve_queue_len);
714 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
715 if (ipv6_hdr(skb)->version == 0) {
716 struct nlmsghdr *nlh = skb_pull(skb,
717 sizeof(struct ipv6hdr));
718 nlh->nlmsg_type = NLMSG_ERROR;
719 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
720 skb_trim(skb, nlh->nlmsg_len);
721 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
722 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
731 /* Timer process for all the unresolved queue. */
733 static void ipmr_do_expire_process(struct mr_table *mrt)
735 unsigned long now = jiffies;
736 unsigned long expires = 10 * HZ;
737 struct mr_mfc *c, *next;
739 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
740 if (time_after(c->mfc_un.unres.expires, now)) {
742 unsigned long interval = c->mfc_un.unres.expires - now;
743 if (interval < expires)
749 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
750 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
753 if (!list_empty(&mrt->mfc_unres_queue))
754 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
757 static void ipmr_expire_process(struct timer_list *t)
759 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
761 if (!spin_trylock(&mfc_unres_lock)) {
762 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
766 if (!list_empty(&mrt->mfc_unres_queue))
767 ipmr_do_expire_process(mrt);
769 spin_unlock(&mfc_unres_lock);
772 /* Fill oifs list. It is called under write locked mrt_lock. */
774 static void ip6mr_update_thresholds(struct mr_table *mrt,
775 struct mr_mfc *cache,
780 cache->mfc_un.res.minvif = MAXMIFS;
781 cache->mfc_un.res.maxvif = 0;
782 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
784 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
785 if (VIF_EXISTS(mrt, vifi) &&
786 ttls[vifi] && ttls[vifi] < 255) {
787 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
788 if (cache->mfc_un.res.minvif > vifi)
789 cache->mfc_un.res.minvif = vifi;
790 if (cache->mfc_un.res.maxvif <= vifi)
791 cache->mfc_un.res.maxvif = vifi + 1;
794 cache->mfc_un.res.lastuse = jiffies;
797 static int mif6_add(struct net *net, struct mr_table *mrt,
798 struct mif6ctl *vifc, int mrtsock)
800 int vifi = vifc->mif6c_mifi;
801 struct vif_device *v = &mrt->vif_table[vifi];
802 struct net_device *dev;
803 struct inet6_dev *in6_dev;
807 if (VIF_EXISTS(mrt, vifi))
810 switch (vifc->mif6c_flags) {
811 #ifdef CONFIG_IPV6_PIMSM_V2
814 * Special Purpose VIF in PIM
815 * All the packets will be sent to the daemon
817 if (mrt->mroute_reg_vif_num >= 0)
819 dev = ip6mr_reg_vif(net, mrt);
822 err = dev_set_allmulti(dev, 1);
824 unregister_netdevice(dev);
831 dev = dev_get_by_index(net, vifc->mif6c_pifi);
833 return -EADDRNOTAVAIL;
834 err = dev_set_allmulti(dev, 1);
844 in6_dev = __in6_dev_get(dev);
846 in6_dev->cnf.mc_forwarding++;
847 inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
848 NETCONFA_MC_FORWARDING,
849 dev->ifindex, &in6_dev->cnf);
852 /* Fill in the VIF structures */
853 vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
854 vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
857 /* And finish update writing critical data */
858 write_lock_bh(&mrt_lock);
860 #ifdef CONFIG_IPV6_PIMSM_V2
861 if (v->flags & MIFF_REGISTER)
862 mrt->mroute_reg_vif_num = vifi;
864 if (vifi + 1 > mrt->maxvif)
865 mrt->maxvif = vifi + 1;
866 write_unlock_bh(&mrt_lock);
870 static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
871 const struct in6_addr *origin,
872 const struct in6_addr *mcastgrp)
874 struct mfc6_cache_cmp_arg arg = {
875 .mf6c_origin = *origin,
876 .mf6c_mcastgrp = *mcastgrp,
879 return mr_mfc_find(mrt, &arg);
882 /* Look for a (*,G) entry */
883 static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
884 struct in6_addr *mcastgrp,
887 struct mfc6_cache_cmp_arg arg = {
888 .mf6c_origin = in6addr_any,
889 .mf6c_mcastgrp = *mcastgrp,
892 if (ipv6_addr_any(mcastgrp))
893 return mr_mfc_find_any_parent(mrt, mifi);
894 return mr_mfc_find_any(mrt, mifi, &arg);
897 /* Look for a (S,G,iif) entry if parent != -1 */
898 static struct mfc6_cache *
899 ip6mr_cache_find_parent(struct mr_table *mrt,
900 const struct in6_addr *origin,
901 const struct in6_addr *mcastgrp,
904 struct mfc6_cache_cmp_arg arg = {
905 .mf6c_origin = *origin,
906 .mf6c_mcastgrp = *mcastgrp,
909 return mr_mfc_find_parent(mrt, &arg, parent);
912 /* Allocate a multicast cache entry */
913 static struct mfc6_cache *ip6mr_cache_alloc(void)
915 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
918 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
919 c->_c.mfc_un.res.minvif = MAXMIFS;
923 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
925 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
928 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
929 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
934 * A cache entry has gone into a resolved state from queued
937 static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
938 struct mfc6_cache *uc, struct mfc6_cache *c)
943 * Play the pending entries through our router
946 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
947 if (ipv6_hdr(skb)->version == 0) {
948 struct nlmsghdr *nlh = skb_pull(skb,
949 sizeof(struct ipv6hdr));
951 if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
952 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
954 nlh->nlmsg_type = NLMSG_ERROR;
955 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
956 skb_trim(skb, nlh->nlmsg_len);
957 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
959 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
961 ip6_mr_forward(net, mrt, skb, c);
966 * Bounce a cache query up to pim6sd and netlink.
968 * Called under mrt_lock.
971 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
972 mifi_t mifi, int assert)
974 struct sock *mroute6_sk;
979 #ifdef CONFIG_IPV6_PIMSM_V2
980 if (assert == MRT6MSG_WHOLEPKT)
981 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
985 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
990 /* I suppose that internal messages
991 * do not require checksums */
993 skb->ip_summed = CHECKSUM_UNNECESSARY;
995 #ifdef CONFIG_IPV6_PIMSM_V2
996 if (assert == MRT6MSG_WHOLEPKT) {
997 /* Ugly, but we have no choice with this interface.
998 Duplicate old header, fix length etc.
999 And all this only to mangle msg->im6_msgtype and
1000 to set msg->im6_mbz to "mbz" :-)
1002 skb_push(skb, -skb_network_offset(pkt));
1004 skb_push(skb, sizeof(*msg));
1005 skb_reset_transport_header(skb);
1006 msg = (struct mrt6msg *)skb_transport_header(skb);
1008 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1009 msg->im6_mif = mrt->mroute_reg_vif_num;
1011 msg->im6_src = ipv6_hdr(pkt)->saddr;
1012 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1014 skb->ip_summed = CHECKSUM_UNNECESSARY;
1019 * Copy the IP header
1022 skb_put(skb, sizeof(struct ipv6hdr));
1023 skb_reset_network_header(skb);
1024 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1029 skb_put(skb, sizeof(*msg));
1030 skb_reset_transport_header(skb);
1031 msg = (struct mrt6msg *)skb_transport_header(skb);
1034 msg->im6_msgtype = assert;
1035 msg->im6_mif = mifi;
1037 msg->im6_src = ipv6_hdr(pkt)->saddr;
1038 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1040 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1041 skb->ip_summed = CHECKSUM_UNNECESSARY;
1045 mroute6_sk = rcu_dereference(mrt->mroute_sk);
1052 mrt6msg_netlink_event(mrt, skb);
1054 /* Deliver to user space multicast routing algorithms */
1055 ret = sock_queue_rcv_skb(mroute6_sk, skb);
1058 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1065 /* Queue a packet for resolution. It gets locked cache entry! */
1066 static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1067 struct sk_buff *skb)
1069 struct mfc6_cache *c;
1073 spin_lock_bh(&mfc_unres_lock);
1074 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1075 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1076 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1084 * Create a new entry if allowable
1087 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1088 (c = ip6mr_cache_alloc_unres()) == NULL) {
1089 spin_unlock_bh(&mfc_unres_lock);
1095 /* Fill in the new cache entry */
1096 c->_c.mfc_parent = -1;
1097 c->mf6c_origin = ipv6_hdr(skb)->saddr;
1098 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1101 * Reflect first query at pim6sd
1103 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1105 /* If the report failed throw the cache entry
1108 spin_unlock_bh(&mfc_unres_lock);
1110 ip6mr_cache_free(c);
1115 atomic_inc(&mrt->cache_resolve_queue_len);
1116 list_add(&c->_c.list, &mrt->mfc_unres_queue);
1117 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1119 ipmr_do_expire_process(mrt);
1122 /* See if we can append the packet */
1123 if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1127 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1131 spin_unlock_bh(&mfc_unres_lock);
1136 * MFC6 cache manipulation by user space
1139 static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1142 struct mfc6_cache *c;
1144 /* The entries are added/deleted only under RTNL */
1146 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1147 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1151 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1152 list_del_rcu(&c->_c.list);
1154 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1155 ip6mr_cache_free(c);
1159 static int ip6mr_device_event(struct notifier_block *this,
1160 unsigned long event, void *ptr)
1162 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1163 struct net *net = dev_net(dev);
1164 struct mr_table *mrt;
1165 struct vif_device *v;
1168 if (event != NETDEV_UNREGISTER)
1171 ip6mr_for_each_table(mrt, net) {
1172 v = &mrt->vif_table[0];
1173 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1175 mif6_delete(mrt, ct, 1, NULL);
1182 static struct notifier_block ip6_mr_notifier = {
1183 .notifier_call = ip6mr_device_event
1187 * Setup for IP multicast routing
1190 static int __net_init ip6mr_net_init(struct net *net)
1194 err = ip6mr_rules_init(net);
1198 #ifdef CONFIG_PROC_FS
1200 if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1202 if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1203 goto proc_cache_fail;
1208 #ifdef CONFIG_PROC_FS
1210 remove_proc_entry("ip6_mr_vif", net->proc_net);
1212 ip6mr_rules_exit(net);
1218 static void __net_exit ip6mr_net_exit(struct net *net)
1220 #ifdef CONFIG_PROC_FS
1221 remove_proc_entry("ip6_mr_cache", net->proc_net);
1222 remove_proc_entry("ip6_mr_vif", net->proc_net);
1224 ip6mr_rules_exit(net);
1227 static struct pernet_operations ip6mr_net_ops = {
1228 .init = ip6mr_net_init,
1229 .exit = ip6mr_net_exit,
1233 int __init ip6_mr_init(void)
1237 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1238 sizeof(struct mfc6_cache),
1239 0, SLAB_HWCACHE_ALIGN,
1244 err = register_pernet_subsys(&ip6mr_net_ops);
1246 goto reg_pernet_fail;
1248 err = register_netdevice_notifier(&ip6_mr_notifier);
1250 goto reg_notif_fail;
1251 #ifdef CONFIG_IPV6_PIMSM_V2
1252 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1253 pr_err("%s: can't add PIM protocol\n", __func__);
1255 goto add_proto_fail;
1258 err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1259 NULL, ip6mr_rtm_dumproute, 0);
1263 #ifdef CONFIG_IPV6_PIMSM_V2
1264 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1266 unregister_netdevice_notifier(&ip6_mr_notifier);
1269 unregister_pernet_subsys(&ip6mr_net_ops);
1271 kmem_cache_destroy(mrt_cachep);
1275 void ip6_mr_cleanup(void)
1277 rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1278 #ifdef CONFIG_IPV6_PIMSM_V2
1279 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1281 unregister_netdevice_notifier(&ip6_mr_notifier);
1282 unregister_pernet_subsys(&ip6mr_net_ops);
1283 kmem_cache_destroy(mrt_cachep);
1286 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1287 struct mf6cctl *mfc, int mrtsock, int parent)
1289 unsigned char ttls[MAXMIFS];
1290 struct mfc6_cache *uc, *c;
1295 if (mfc->mf6cc_parent >= MAXMIFS)
1298 memset(ttls, 255, MAXMIFS);
1299 for (i = 0; i < MAXMIFS; i++) {
1300 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1304 /* The entries are added/deleted only under RTNL */
1306 c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1307 &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1310 write_lock_bh(&mrt_lock);
1311 c->_c.mfc_parent = mfc->mf6cc_parent;
1312 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1314 c->_c.mfc_flags |= MFC_STATIC;
1315 write_unlock_bh(&mrt_lock);
1316 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1320 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1321 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1324 c = ip6mr_cache_alloc();
1328 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1329 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1330 c->_c.mfc_parent = mfc->mf6cc_parent;
1331 ip6mr_update_thresholds(mrt, &c->_c, ttls);
1333 c->_c.mfc_flags |= MFC_STATIC;
1335 err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1338 pr_err("ip6mr: rhtable insert error %d\n", err);
1339 ip6mr_cache_free(c);
1342 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1344 /* Check to see if we resolved a queued list. If so we
1345 * need to send on the frames and tidy up.
1348 spin_lock_bh(&mfc_unres_lock);
1349 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1350 uc = (struct mfc6_cache *)_uc;
1351 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1352 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1353 list_del(&_uc->list);
1354 atomic_dec(&mrt->cache_resolve_queue_len);
1359 if (list_empty(&mrt->mfc_unres_queue))
1360 del_timer(&mrt->ipmr_expire_timer);
1361 spin_unlock_bh(&mfc_unres_lock);
1364 ip6mr_cache_resolve(net, mrt, uc, c);
1365 ip6mr_cache_free(uc);
1367 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1372 * Close the multicast socket, and clear the vif tables etc
1375 static void mroute_clean_tables(struct mr_table *mrt, bool all)
1377 struct mr_mfc *c, *tmp;
1381 /* Shut down all active vif entries */
1382 for (i = 0; i < mrt->maxvif; i++) {
1383 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1385 mif6_delete(mrt, i, 0, &list);
1387 unregister_netdevice_many(&list);
1389 /* Wipe the cache */
1390 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1391 if (!all && (c->mfc_flags & MFC_STATIC))
1393 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1394 list_del_rcu(&c->list);
1395 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1396 ip6mr_cache_free((struct mfc6_cache *)c);
1399 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1400 spin_lock_bh(&mfc_unres_lock);
1401 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1403 mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1405 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1407 spin_unlock_bh(&mfc_unres_lock);
1411 static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1414 struct net *net = sock_net(sk);
1417 write_lock_bh(&mrt_lock);
1418 if (rtnl_dereference(mrt->mroute_sk)) {
1421 rcu_assign_pointer(mrt->mroute_sk, sk);
1422 net->ipv6.devconf_all->mc_forwarding++;
1424 write_unlock_bh(&mrt_lock);
1427 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1428 NETCONFA_MC_FORWARDING,
1429 NETCONFA_IFINDEX_ALL,
1430 net->ipv6.devconf_all);
1436 int ip6mr_sk_done(struct sock *sk)
1439 struct net *net = sock_net(sk);
1440 struct mr_table *mrt;
1442 if (sk->sk_type != SOCK_RAW ||
1443 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1447 ip6mr_for_each_table(mrt, net) {
1448 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1449 write_lock_bh(&mrt_lock);
1450 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1451 net->ipv6.devconf_all->mc_forwarding--;
1452 write_unlock_bh(&mrt_lock);
1453 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1454 NETCONFA_MC_FORWARDING,
1455 NETCONFA_IFINDEX_ALL,
1456 net->ipv6.devconf_all);
1458 mroute_clean_tables(mrt, false);
1469 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1471 struct mr_table *mrt;
1472 struct flowi6 fl6 = {
1473 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
1474 .flowi6_oif = skb->dev->ifindex,
1475 .flowi6_mark = skb->mark,
1478 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1481 return rcu_access_pointer(mrt->mroute_sk);
1483 EXPORT_SYMBOL(mroute6_is_socket);
1486 * Socket options and virtual interface manipulation. The whole
1487 * virtual interface system is a complete heap, but unfortunately
1488 * that's how BSD mrouted happens to think. Maybe one day with a proper
1489 * MOSPF/PIM router set up we can clean this up.
1492 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1494 int ret, parent = 0;
1498 struct net *net = sock_net(sk);
1499 struct mr_table *mrt;
1501 if (sk->sk_type != SOCK_RAW ||
1502 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1505 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1509 if (optname != MRT6_INIT) {
1510 if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1511 !ns_capable(net->user_ns, CAP_NET_ADMIN))
1517 if (optlen < sizeof(int))
1520 return ip6mr_sk_init(mrt, sk);
1523 return ip6mr_sk_done(sk);
1526 if (optlen < sizeof(vif))
1528 if (copy_from_user(&vif, optval, sizeof(vif)))
1530 if (vif.mif6c_mifi >= MAXMIFS)
1533 ret = mif6_add(net, mrt, &vif,
1534 sk == rtnl_dereference(mrt->mroute_sk));
1539 if (optlen < sizeof(mifi_t))
1541 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1544 ret = mif6_delete(mrt, mifi, 0, NULL);
1549 * Manipulate the forwarding caches. These live
1550 * in a sort of kernel/user symbiosis.
1556 case MRT6_ADD_MFC_PROXY:
1557 case MRT6_DEL_MFC_PROXY:
1558 if (optlen < sizeof(mfc))
1560 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1563 parent = mfc.mf6cc_parent;
1565 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1566 ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1568 ret = ip6mr_mfc_add(net, mrt, &mfc,
1570 rtnl_dereference(mrt->mroute_sk),
1576 * Control PIM assert (to activate pim will activate assert)
1582 if (optlen != sizeof(v))
1584 if (get_user(v, (int __user *)optval))
1586 mrt->mroute_do_assert = v;
1590 #ifdef CONFIG_IPV6_PIMSM_V2
1595 if (optlen != sizeof(v))
1597 if (get_user(v, (int __user *)optval))
1602 if (v != mrt->mroute_do_pim) {
1603 mrt->mroute_do_pim = v;
1604 mrt->mroute_do_assert = v;
1611 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1616 if (optlen != sizeof(u32))
1618 if (get_user(v, (u32 __user *)optval))
1620 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1621 if (v != RT_TABLE_DEFAULT && v >= 100000000)
1623 if (sk == rcu_access_pointer(mrt->mroute_sk))
1628 if (!ip6mr_new_table(net, v))
1630 raw6_sk(sk)->ip6mr_table = v;
1636 * Spurious command, or MRT6_VERSION which you cannot
1640 return -ENOPROTOOPT;
1645 * Getsock opt support for the multicast routing system.
1648 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1653 struct net *net = sock_net(sk);
1654 struct mr_table *mrt;
1656 if (sk->sk_type != SOCK_RAW ||
1657 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1660 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1668 #ifdef CONFIG_IPV6_PIMSM_V2
1670 val = mrt->mroute_do_pim;
1674 val = mrt->mroute_do_assert;
1677 return -ENOPROTOOPT;
1680 if (get_user(olr, optlen))
1683 olr = min_t(int, olr, sizeof(int));
1687 if (put_user(olr, optlen))
1689 if (copy_to_user(optval, &val, olr))
1695 * The IP multicast ioctl support routines.
1698 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1700 struct sioc_sg_req6 sr;
1701 struct sioc_mif_req6 vr;
1702 struct vif_device *vif;
1703 struct mfc6_cache *c;
1704 struct net *net = sock_net(sk);
1705 struct mr_table *mrt;
1707 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1712 case SIOCGETMIFCNT_IN6:
1713 if (copy_from_user(&vr, arg, sizeof(vr)))
1715 if (vr.mifi >= mrt->maxvif)
1717 read_lock(&mrt_lock);
1718 vif = &mrt->vif_table[vr.mifi];
1719 if (VIF_EXISTS(mrt, vr.mifi)) {
1720 vr.icount = vif->pkt_in;
1721 vr.ocount = vif->pkt_out;
1722 vr.ibytes = vif->bytes_in;
1723 vr.obytes = vif->bytes_out;
1724 read_unlock(&mrt_lock);
1726 if (copy_to_user(arg, &vr, sizeof(vr)))
1730 read_unlock(&mrt_lock);
1731 return -EADDRNOTAVAIL;
1732 case SIOCGETSGCNT_IN6:
1733 if (copy_from_user(&sr, arg, sizeof(sr)))
1737 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1739 sr.pktcnt = c->_c.mfc_un.res.pkt;
1740 sr.bytecnt = c->_c.mfc_un.res.bytes;
1741 sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1744 if (copy_to_user(arg, &sr, sizeof(sr)))
1749 return -EADDRNOTAVAIL;
1751 return -ENOIOCTLCMD;
1755 #ifdef CONFIG_COMPAT
1756 struct compat_sioc_sg_req6 {
1757 struct sockaddr_in6 src;
1758 struct sockaddr_in6 grp;
1759 compat_ulong_t pktcnt;
1760 compat_ulong_t bytecnt;
1761 compat_ulong_t wrong_if;
1764 struct compat_sioc_mif_req6 {
1766 compat_ulong_t icount;
1767 compat_ulong_t ocount;
1768 compat_ulong_t ibytes;
1769 compat_ulong_t obytes;
1772 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1774 struct compat_sioc_sg_req6 sr;
1775 struct compat_sioc_mif_req6 vr;
1776 struct vif_device *vif;
1777 struct mfc6_cache *c;
1778 struct net *net = sock_net(sk);
1779 struct mr_table *mrt;
1781 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1786 case SIOCGETMIFCNT_IN6:
1787 if (copy_from_user(&vr, arg, sizeof(vr)))
1789 if (vr.mifi >= mrt->maxvif)
1791 read_lock(&mrt_lock);
1792 vif = &mrt->vif_table[vr.mifi];
1793 if (VIF_EXISTS(mrt, vr.mifi)) {
1794 vr.icount = vif->pkt_in;
1795 vr.ocount = vif->pkt_out;
1796 vr.ibytes = vif->bytes_in;
1797 vr.obytes = vif->bytes_out;
1798 read_unlock(&mrt_lock);
1800 if (copy_to_user(arg, &vr, sizeof(vr)))
1804 read_unlock(&mrt_lock);
1805 return -EADDRNOTAVAIL;
1806 case SIOCGETSGCNT_IN6:
1807 if (copy_from_user(&sr, arg, sizeof(sr)))
1811 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1813 sr.pktcnt = c->_c.mfc_un.res.pkt;
1814 sr.bytecnt = c->_c.mfc_un.res.bytes;
1815 sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1818 if (copy_to_user(arg, &sr, sizeof(sr)))
1823 return -EADDRNOTAVAIL;
1825 return -ENOIOCTLCMD;
1830 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1832 __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1833 IPSTATS_MIB_OUTFORWDATAGRAMS);
1834 __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1835 IPSTATS_MIB_OUTOCTETS, skb->len);
1836 return dst_output(net, sk, skb);
1840 * Processing handlers for ip6mr_forward
1843 static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
1844 struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1846 struct ipv6hdr *ipv6h;
1847 struct vif_device *vif = &mrt->vif_table[vifi];
1848 struct net_device *dev;
1849 struct dst_entry *dst;
1855 #ifdef CONFIG_IPV6_PIMSM_V2
1856 if (vif->flags & MIFF_REGISTER) {
1858 vif->bytes_out += skb->len;
1859 vif->dev->stats.tx_bytes += skb->len;
1860 vif->dev->stats.tx_packets++;
1861 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1866 ipv6h = ipv6_hdr(skb);
1868 fl6 = (struct flowi6) {
1869 .flowi6_oif = vif->link,
1870 .daddr = ipv6h->daddr,
1873 dst = ip6_route_output(net, NULL, &fl6);
1880 skb_dst_set(skb, dst);
1883 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1884 * not only before forwarding, but after forwarding on all output
1885 * interfaces. It is clear, if mrouter runs a multicasting
1886 * program, it should receive packets not depending to what interface
1887 * program is joined.
1888 * If we will not make it, the program will have to join on all
1889 * interfaces. On the other hand, multihoming host (or router, but
1890 * not mrouter) cannot join to more than one interface - it will
1891 * result in receiving multiple packets.
1896 vif->bytes_out += skb->len;
1898 /* We are about to write */
1899 /* XXX: extension headers? */
1900 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1903 ipv6h = ipv6_hdr(skb);
1906 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1908 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
1909 net, NULL, skb, skb->dev, dev,
1910 ip6mr_forward2_finish);
1917 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
1921 for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1922 if (mrt->vif_table[ct].dev == dev)
1928 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
1929 struct sk_buff *skb, struct mfc6_cache *c)
1933 int true_vifi = ip6mr_find_vif(mrt, skb->dev);
1935 vif = c->_c.mfc_parent;
1936 c->_c.mfc_un.res.pkt++;
1937 c->_c.mfc_un.res.bytes += skb->len;
1938 c->_c.mfc_un.res.lastuse = jiffies;
1940 if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
1941 struct mfc6_cache *cache_proxy;
1943 /* For an (*,G) entry, we only check that the incoming
1944 * interface is part of the static tree.
1947 cache_proxy = mr_mfc_find_any_parent(mrt, vif);
1949 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
1957 * Wrong interface: drop packet and (maybe) send PIM assert.
1959 if (mrt->vif_table[vif].dev != skb->dev) {
1960 c->_c.mfc_un.res.wrong_if++;
1962 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1963 /* pimsm uses asserts, when switching from RPT to SPT,
1964 so that we cannot check that packet arrived on an oif.
1965 It is bad, but otherwise we would need to move pretty
1966 large chunk of pimd to kernel. Ough... --ANK
1968 (mrt->mroute_do_pim ||
1969 c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
1971 c->_c.mfc_un.res.last_assert +
1972 MFC_ASSERT_THRESH)) {
1973 c->_c.mfc_un.res.last_assert = jiffies;
1974 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
1980 mrt->vif_table[vif].pkt_in++;
1981 mrt->vif_table[vif].bytes_in += skb->len;
1986 if (ipv6_addr_any(&c->mf6c_origin) &&
1987 ipv6_addr_any(&c->mf6c_mcastgrp)) {
1988 if (true_vifi >= 0 &&
1989 true_vifi != c->_c.mfc_parent &&
1990 ipv6_hdr(skb)->hop_limit >
1991 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
1992 /* It's an (*,*) entry and the packet is not coming from
1993 * the upstream: forward the packet to the upstream
1996 psend = c->_c.mfc_parent;
2001 for (ct = c->_c.mfc_un.res.maxvif - 1;
2002 ct >= c->_c.mfc_un.res.minvif; ct--) {
2003 /* For (*,G) entry, don't forward to the incoming interface */
2004 if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2005 ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2007 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2009 ip6mr_forward2(net, mrt, skb2,
2017 ip6mr_forward2(net, mrt, skb, c, psend);
2027 * Multicast packets for forwarding arrive here
2030 int ip6_mr_input(struct sk_buff *skb)
2032 struct mfc6_cache *cache;
2033 struct net *net = dev_net(skb->dev);
2034 struct mr_table *mrt;
2035 struct flowi6 fl6 = {
2036 .flowi6_iif = skb->dev->ifindex,
2037 .flowi6_mark = skb->mark,
2041 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2047 read_lock(&mrt_lock);
2048 cache = ip6mr_cache_find(mrt,
2049 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2051 int vif = ip6mr_find_vif(mrt, skb->dev);
2054 cache = ip6mr_cache_find_any(mrt,
2055 &ipv6_hdr(skb)->daddr,
2060 * No usable cache entry
2065 vif = ip6mr_find_vif(mrt, skb->dev);
2067 int err = ip6mr_cache_unresolved(mrt, vif, skb);
2068 read_unlock(&mrt_lock);
2072 read_unlock(&mrt_lock);
2077 ip6_mr_forward(net, mrt, skb, cache);
2079 read_unlock(&mrt_lock);
2085 static int __ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2086 struct mfc6_cache *c, struct rtmsg *rtm)
2088 struct rta_mfc_stats mfcs;
2089 struct nlattr *mp_attr;
2090 struct rtnexthop *nhp;
2091 unsigned long lastuse;
2094 /* If cache is unresolved, don't try to parse IIF and OIF */
2095 if (c->_c.mfc_parent >= MAXMIFS) {
2096 rtm->rtm_flags |= RTNH_F_UNRESOLVED;
2100 if (VIF_EXISTS(mrt, c->_c.mfc_parent) &&
2101 nla_put_u32(skb, RTA_IIF,
2102 mrt->vif_table[c->_c.mfc_parent].dev->ifindex) < 0)
2104 mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2108 for (ct = c->_c.mfc_un.res.minvif;
2109 ct < c->_c.mfc_un.res.maxvif; ct++) {
2110 if (VIF_EXISTS(mrt, ct) && c->_c.mfc_un.res.ttls[ct] < 255) {
2111 nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2113 nla_nest_cancel(skb, mp_attr);
2117 nhp->rtnh_flags = 0;
2118 nhp->rtnh_hops = c->_c.mfc_un.res.ttls[ct];
2119 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
2120 nhp->rtnh_len = sizeof(*nhp);
2124 nla_nest_end(skb, mp_attr);
2126 lastuse = READ_ONCE(c->_c.mfc_un.res.lastuse);
2127 lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
2129 mfcs.mfcs_packets = c->_c.mfc_un.res.pkt;
2130 mfcs.mfcs_bytes = c->_c.mfc_un.res.bytes;
2131 mfcs.mfcs_wrong_if = c->_c.mfc_un.res.wrong_if;
2132 if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
2133 nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
2137 rtm->rtm_type = RTN_MULTICAST;
2141 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2145 struct mr_table *mrt;
2146 struct mfc6_cache *cache;
2147 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2149 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2153 read_lock(&mrt_lock);
2154 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2155 if (!cache && skb->dev) {
2156 int vif = ip6mr_find_vif(mrt, skb->dev);
2159 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2164 struct sk_buff *skb2;
2165 struct ipv6hdr *iph;
2166 struct net_device *dev;
2170 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2171 read_unlock(&mrt_lock);
2175 /* really correct? */
2176 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2178 read_unlock(&mrt_lock);
2182 NETLINK_CB(skb2).portid = portid;
2183 skb_reset_transport_header(skb2);
2185 skb_put(skb2, sizeof(struct ipv6hdr));
2186 skb_reset_network_header(skb2);
2188 iph = ipv6_hdr(skb2);
2191 iph->flow_lbl[0] = 0;
2192 iph->flow_lbl[1] = 0;
2193 iph->flow_lbl[2] = 0;
2194 iph->payload_len = 0;
2195 iph->nexthdr = IPPROTO_NONE;
2197 iph->saddr = rt->rt6i_src.addr;
2198 iph->daddr = rt->rt6i_dst.addr;
2200 err = ip6mr_cache_unresolved(mrt, vif, skb2);
2201 read_unlock(&mrt_lock);
2206 if (rtm->rtm_flags & RTM_F_NOTIFY)
2207 cache->_c.mfc_flags |= MFC_NOTIFY;
2209 err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2210 read_unlock(&mrt_lock);
2214 static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2215 u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2218 struct nlmsghdr *nlh;
2222 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2226 rtm = nlmsg_data(nlh);
2227 rtm->rtm_family = RTNL_FAMILY_IP6MR;
2228 rtm->rtm_dst_len = 128;
2229 rtm->rtm_src_len = 128;
2231 rtm->rtm_table = mrt->id;
2232 if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2233 goto nla_put_failure;
2234 rtm->rtm_type = RTN_MULTICAST;
2235 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2236 if (c->_c.mfc_flags & MFC_STATIC)
2237 rtm->rtm_protocol = RTPROT_STATIC;
2239 rtm->rtm_protocol = RTPROT_MROUTED;
2242 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2243 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2244 goto nla_put_failure;
2245 err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2246 /* do not break the dump if cache is unresolved */
2247 if (err < 0 && err != -ENOENT)
2248 goto nla_put_failure;
2250 nlmsg_end(skb, nlh);
2254 nlmsg_cancel(skb, nlh);
2258 static int mr6_msgsize(bool unresolved, int maxvif)
2261 NLMSG_ALIGN(sizeof(struct rtmsg))
2262 + nla_total_size(4) /* RTA_TABLE */
2263 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */
2264 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */
2269 + nla_total_size(4) /* RTA_IIF */
2270 + nla_total_size(0) /* RTA_MULTIPATH */
2271 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2273 + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2279 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2282 struct net *net = read_pnet(&mrt->net);
2283 struct sk_buff *skb;
2286 skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2291 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2295 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2301 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2304 static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2307 NLMSG_ALIGN(sizeof(struct rtgenmsg))
2308 + nla_total_size(1) /* IP6MRA_CREPORT_MSGTYPE */
2309 + nla_total_size(4) /* IP6MRA_CREPORT_MIF_ID */
2310 /* IP6MRA_CREPORT_SRC_ADDR */
2311 + nla_total_size(sizeof(struct in6_addr))
2312 /* IP6MRA_CREPORT_DST_ADDR */
2313 + nla_total_size(sizeof(struct in6_addr))
2314 /* IP6MRA_CREPORT_PKT */
2315 + nla_total_size(payloadlen)
2321 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2323 struct net *net = read_pnet(&mrt->net);
2324 struct nlmsghdr *nlh;
2325 struct rtgenmsg *rtgenm;
2326 struct mrt6msg *msg;
2327 struct sk_buff *skb;
2331 payloadlen = pkt->len - sizeof(struct mrt6msg);
2332 msg = (struct mrt6msg *)skb_transport_header(pkt);
2334 skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2338 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2339 sizeof(struct rtgenmsg), 0);
2342 rtgenm = nlmsg_data(nlh);
2343 rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2344 if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2345 nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2346 nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2348 nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2350 goto nla_put_failure;
2352 nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2353 if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2354 nla_data(nla), payloadlen))
2355 goto nla_put_failure;
2357 nlmsg_end(skb, nlh);
2359 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2363 nlmsg_cancel(skb, nlh);
2366 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2369 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2371 struct net *net = sock_net(skb->sk);
2372 unsigned int t = 0, s_t;
2373 unsigned int e = 0, s_e;
2374 struct mr_table *mrt;
2381 ip6mr_for_each_table(mrt, net) {
2384 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
2387 if (ip6mr_fill_mroute(mrt, skb,
2388 NETLINK_CB(cb->skb).portid,
2390 (struct mfc6_cache *)mfc,
2391 RTM_NEWROUTE, NLM_F_MULTI) < 0)
2399 spin_lock_bh(&mfc_unres_lock);
2400 list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
2403 if (ip6mr_fill_mroute(mrt, skb,
2404 NETLINK_CB(cb->skb).portid,
2406 (struct mfc6_cache *)mfc,
2407 RTM_NEWROUTE, NLM_F_MULTI) < 0) {
2408 spin_unlock_bh(&mfc_unres_lock);
2414 spin_unlock_bh(&mfc_unres_lock);