2 * Linux INET6 implementation
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
52 #include <linux/rtnetlink.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
58 #include <asm/uaccess.h>
61 #include <linux/sysctl.h>
64 /* Set to 3 to get tracing. */
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #define RT6_TRACE(x...) do { ; } while (0)
75 #define CLONE_OFFLINK_ROUTE 0
77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
93 struct in6_addr *gwaddr, int ifindex,
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
97 struct in6_addr *gwaddr, int ifindex);
100 static struct dst_ops ip6_dst_ops_template = {
102 .protocol = cpu_to_be16(ETH_P_IPV6),
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
111 .local_out = __ip6_local_out,
114 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
118 static struct dst_ops ip6_dst_blackhole_ops = {
120 .protocol = cpu_to_be16(ETH_P_IPV6),
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
126 static struct rt6_info ip6_null_entry_template = {
128 .__refcnt = ATOMIC_INIT(1),
131 .error = -ENETUNREACH,
132 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
133 .input = ip6_pkt_discard,
134 .output = ip6_pkt_discard_out,
136 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
137 .rt6i_protocol = RTPROT_KERNEL,
138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
142 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
144 static int ip6_pkt_prohibit(struct sk_buff *skb);
145 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
147 static struct rt6_info ip6_prohibit_entry_template = {
149 .__refcnt = ATOMIC_INIT(1),
153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
154 .input = ip6_pkt_prohibit,
155 .output = ip6_pkt_prohibit_out,
157 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
158 .rt6i_protocol = RTPROT_KERNEL,
159 .rt6i_metric = ~(u32) 0,
160 .rt6i_ref = ATOMIC_INIT(1),
163 static struct rt6_info ip6_blk_hole_entry_template = {
165 .__refcnt = ATOMIC_INIT(1),
169 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
170 .input = dst_discard,
171 .output = dst_discard,
173 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
174 .rt6i_protocol = RTPROT_KERNEL,
175 .rt6i_metric = ~(u32) 0,
176 .rt6i_ref = ATOMIC_INIT(1),
181 /* allocate dst with ip6_dst_ops */
182 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
184 return (struct rt6_info *)dst_alloc(ops);
187 static void ip6_dst_destroy(struct dst_entry *dst)
189 struct rt6_info *rt = (struct rt6_info *)dst;
190 struct inet6_dev *idev = rt->rt6i_idev;
193 rt->rt6i_idev = NULL;
198 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
201 struct rt6_info *rt = (struct rt6_info *)dst;
202 struct inet6_dev *idev = rt->rt6i_idev;
203 struct net_device *loopback_dev =
204 dev_net(dev)->loopback_dev;
206 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
207 struct inet6_dev *loopback_idev =
208 in6_dev_get(loopback_dev);
209 if (loopback_idev != NULL) {
210 rt->rt6i_idev = loopback_idev;
216 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
218 return (rt->rt6i_flags & RTF_EXPIRES) &&
219 time_after(jiffies, rt->rt6i_expires);
222 static inline int rt6_need_strict(struct in6_addr *daddr)
224 return ipv6_addr_type(daddr) &
225 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
229 * Route lookup. Any table->tb6_lock is implied.
232 static inline struct rt6_info *rt6_device_match(struct net *net,
234 struct in6_addr *saddr,
238 struct rt6_info *local = NULL;
239 struct rt6_info *sprt;
241 if (!oif && ipv6_addr_any(saddr))
244 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
245 struct net_device *dev = sprt->rt6i_dev;
248 if (dev->ifindex == oif)
250 if (dev->flags & IFF_LOOPBACK) {
251 if (sprt->rt6i_idev == NULL ||
252 sprt->rt6i_idev->dev->ifindex != oif) {
253 if (flags & RT6_LOOKUP_F_IFACE && oif)
255 if (local && (!oif ||
256 local->rt6i_idev->dev->ifindex == oif))
262 if (ipv6_chk_addr(net, saddr, dev,
263 flags & RT6_LOOKUP_F_IFACE))
272 if (flags & RT6_LOOKUP_F_IFACE)
273 return net->ipv6.ip6_null_entry;
279 #ifdef CONFIG_IPV6_ROUTER_PREF
280 static void rt6_probe(struct rt6_info *rt)
282 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
284 * Okay, this does not seem to be appropriate
285 * for now, however, we need to check if it
286 * is really so; aka Router Reachability Probing.
288 * Router Reachability Probe MUST be rate-limited
289 * to no more than one per minute.
291 if (!neigh || (neigh->nud_state & NUD_VALID))
293 read_lock_bh(&neigh->lock);
294 if (!(neigh->nud_state & NUD_VALID) &&
295 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
296 struct in6_addr mcaddr;
297 struct in6_addr *target;
299 neigh->updated = jiffies;
300 read_unlock_bh(&neigh->lock);
302 target = (struct in6_addr *)&neigh->primary_key;
303 addrconf_addr_solict_mult(target, &mcaddr);
304 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
306 read_unlock_bh(&neigh->lock);
309 static inline void rt6_probe(struct rt6_info *rt)
315 * Default Router Selection (RFC 2461 6.3.6)
317 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
319 struct net_device *dev = rt->rt6i_dev;
320 if (!oif || dev->ifindex == oif)
322 if ((dev->flags & IFF_LOOPBACK) &&
323 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
328 static inline int rt6_check_neigh(struct rt6_info *rt)
330 struct neighbour *neigh = rt->rt6i_nexthop;
332 if (rt->rt6i_flags & RTF_NONEXTHOP ||
333 !(rt->rt6i_flags & RTF_GATEWAY))
336 read_lock_bh(&neigh->lock);
337 if (neigh->nud_state & NUD_VALID)
339 #ifdef CONFIG_IPV6_ROUTER_PREF
340 else if (neigh->nud_state & NUD_FAILED)
345 read_unlock_bh(&neigh->lock);
351 static int rt6_score_route(struct rt6_info *rt, int oif,
356 m = rt6_check_dev(rt, oif);
357 if (!m && (strict & RT6_LOOKUP_F_IFACE))
359 #ifdef CONFIG_IPV6_ROUTER_PREF
360 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
362 n = rt6_check_neigh(rt);
363 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
368 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
369 int *mpri, struct rt6_info *match)
373 if (rt6_check_expired(rt))
376 m = rt6_score_route(rt, oif, strict);
381 if (strict & RT6_LOOKUP_F_REACHABLE)
385 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
393 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
394 struct rt6_info *rr_head,
395 u32 metric, int oif, int strict)
397 struct rt6_info *rt, *match;
401 for (rt = rr_head; rt && rt->rt6i_metric == metric;
402 rt = rt->dst.rt6_next)
403 match = find_match(rt, oif, strict, &mpri, match);
404 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
405 rt = rt->dst.rt6_next)
406 match = find_match(rt, oif, strict, &mpri, match);
411 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
413 struct rt6_info *match, *rt0;
416 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
417 __func__, fn->leaf, oif);
421 fn->rr_ptr = rt0 = fn->leaf;
423 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
426 (strict & RT6_LOOKUP_F_REACHABLE)) {
427 struct rt6_info *next = rt0->dst.rt6_next;
429 /* no entries matched; do round-robin */
430 if (!next || next->rt6i_metric != rt0->rt6i_metric)
437 RT6_TRACE("%s() => %p\n",
440 net = dev_net(rt0->rt6i_dev);
441 return match ? match : net->ipv6.ip6_null_entry;
444 #ifdef CONFIG_IPV6_ROUTE_INFO
445 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
446 struct in6_addr *gwaddr)
448 struct net *net = dev_net(dev);
449 struct route_info *rinfo = (struct route_info *) opt;
450 struct in6_addr prefix_buf, *prefix;
452 unsigned long lifetime;
455 if (len < sizeof(struct route_info)) {
459 /* Sanity check for prefix_len and length */
460 if (rinfo->length > 3) {
462 } else if (rinfo->prefix_len > 128) {
464 } else if (rinfo->prefix_len > 64) {
465 if (rinfo->length < 2) {
468 } else if (rinfo->prefix_len > 0) {
469 if (rinfo->length < 1) {
474 pref = rinfo->route_pref;
475 if (pref == ICMPV6_ROUTER_PREF_INVALID)
478 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
480 if (rinfo->length == 3)
481 prefix = (struct in6_addr *)rinfo->prefix;
483 /* this function is safe */
484 ipv6_addr_prefix(&prefix_buf,
485 (struct in6_addr *)rinfo->prefix,
487 prefix = &prefix_buf;
490 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
493 if (rt && !lifetime) {
499 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
502 rt->rt6i_flags = RTF_ROUTEINFO |
503 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
506 if (!addrconf_finite_timeout(lifetime)) {
507 rt->rt6i_flags &= ~RTF_EXPIRES;
509 rt->rt6i_expires = jiffies + HZ * lifetime;
510 rt->rt6i_flags |= RTF_EXPIRES;
512 dst_release(&rt->dst);
518 #define BACKTRACK(__net, saddr) \
520 if (rt == __net->ipv6.ip6_null_entry) { \
521 struct fib6_node *pn; \
523 if (fn->fn_flags & RTN_TL_ROOT) \
526 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
527 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
530 if (fn->fn_flags & RTN_RTINFO) \
536 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
537 struct fib6_table *table,
538 struct flowi *fl, int flags)
540 struct fib6_node *fn;
543 read_lock_bh(&table->tb6_lock);
544 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
547 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
548 BACKTRACK(net, &fl->fl6_src);
550 dst_use(&rt->dst, jiffies);
551 read_unlock_bh(&table->tb6_lock);
556 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
557 const struct in6_addr *saddr, int oif, int strict)
567 struct dst_entry *dst;
568 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
571 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
572 flags |= RT6_LOOKUP_F_HAS_SADDR;
575 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
577 return (struct rt6_info *) dst;
584 EXPORT_SYMBOL(rt6_lookup);
586 /* ip6_ins_rt is called with FREE table->tb6_lock.
587 It takes new route entry, the addition fails by any reason the
588 route is freed. In any case, if caller does not hold it, it may
592 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
595 struct fib6_table *table;
597 table = rt->rt6i_table;
598 write_lock_bh(&table->tb6_lock);
599 err = fib6_add(&table->tb6_root, rt, info);
600 write_unlock_bh(&table->tb6_lock);
605 int ip6_ins_rt(struct rt6_info *rt)
607 struct nl_info info = {
608 .nl_net = dev_net(rt->rt6i_dev),
610 return __ip6_ins_rt(rt, &info);
613 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
614 struct in6_addr *saddr)
622 rt = ip6_rt_copy(ort);
625 struct neighbour *neigh;
626 int attempts = !in_softirq();
628 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
629 if (rt->rt6i_dst.plen != 128 &&
630 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
631 rt->rt6i_flags |= RTF_ANYCAST;
632 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
635 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
636 rt->rt6i_dst.plen = 128;
637 rt->rt6i_flags |= RTF_CACHE;
638 rt->dst.flags |= DST_HOST;
640 #ifdef CONFIG_IPV6_SUBTREES
641 if (rt->rt6i_src.plen && saddr) {
642 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
643 rt->rt6i_src.plen = 128;
648 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
650 struct net *net = dev_net(rt->rt6i_dev);
651 int saved_rt_min_interval =
652 net->ipv6.sysctl.ip6_rt_gc_min_interval;
653 int saved_rt_elasticity =
654 net->ipv6.sysctl.ip6_rt_gc_elasticity;
656 if (attempts-- > 0) {
657 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
658 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
660 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
662 net->ipv6.sysctl.ip6_rt_gc_elasticity =
664 net->ipv6.sysctl.ip6_rt_gc_min_interval =
665 saved_rt_min_interval;
671 "ipv6: Neighbour table overflow.\n");
675 rt->rt6i_nexthop = neigh;
682 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
684 struct rt6_info *rt = ip6_rt_copy(ort);
686 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
687 rt->rt6i_dst.plen = 128;
688 rt->rt6i_flags |= RTF_CACHE;
689 rt->dst.flags |= DST_HOST;
690 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
695 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
696 struct flowi *fl, int flags)
698 struct fib6_node *fn;
699 struct rt6_info *rt, *nrt;
703 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
705 strict |= flags & RT6_LOOKUP_F_IFACE;
708 read_lock_bh(&table->tb6_lock);
711 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
714 rt = rt6_select(fn, oif, strict | reachable);
716 BACKTRACK(net, &fl->fl6_src);
717 if (rt == net->ipv6.ip6_null_entry ||
718 rt->rt6i_flags & RTF_CACHE)
722 read_unlock_bh(&table->tb6_lock);
724 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
725 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
727 #if CLONE_OFFLINK_ROUTE
728 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
734 dst_release(&rt->dst);
735 rt = nrt ? : net->ipv6.ip6_null_entry;
739 err = ip6_ins_rt(nrt);
748 * Race condition! In the gap, when table->tb6_lock was
749 * released someone could insert this route. Relookup.
751 dst_release(&rt->dst);
760 read_unlock_bh(&table->tb6_lock);
762 rt->dst.lastuse = jiffies;
768 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
769 struct flowi *fl, int flags)
771 return ip6_pol_route(net, table, fl->iif, fl, flags);
774 void ip6_route_input(struct sk_buff *skb)
776 struct ipv6hdr *iph = ipv6_hdr(skb);
777 struct net *net = dev_net(skb->dev);
778 int flags = RT6_LOOKUP_F_HAS_SADDR;
780 .iif = skb->dev->ifindex,
785 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
789 .proto = iph->nexthdr,
792 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
793 flags |= RT6_LOOKUP_F_IFACE;
795 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
798 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
799 struct flowi *fl, int flags)
801 return ip6_pol_route(net, table, fl->oif, fl, flags);
804 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
809 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
810 flags |= RT6_LOOKUP_F_IFACE;
812 if (!ipv6_addr_any(&fl->fl6_src))
813 flags |= RT6_LOOKUP_F_HAS_SADDR;
815 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
817 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
820 EXPORT_SYMBOL(ip6_route_output);
822 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
824 struct rt6_info *ort = (struct rt6_info *) *dstp;
825 struct rt6_info *rt = (struct rt6_info *)
826 dst_alloc(&ip6_dst_blackhole_ops);
827 struct dst_entry *new = NULL;
832 atomic_set(&new->__refcnt, 1);
834 new->input = dst_discard;
835 new->output = dst_discard;
837 memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
838 new->dev = ort->dst.dev;
841 rt->rt6i_idev = ort->rt6i_idev;
843 in6_dev_hold(rt->rt6i_idev);
844 rt->rt6i_expires = 0;
846 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
847 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
850 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
851 #ifdef CONFIG_IPV6_SUBTREES
852 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
860 return new ? 0 : -ENOMEM;
862 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
865 * Destination cache support functions
868 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
872 rt = (struct rt6_info *) dst;
874 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
880 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
882 struct rt6_info *rt = (struct rt6_info *) dst;
885 if (rt->rt6i_flags & RTF_CACHE) {
886 if (rt6_check_expired(rt)) {
898 static void ip6_link_failure(struct sk_buff *skb)
902 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
904 rt = (struct rt6_info *) skb_dst(skb);
906 if (rt->rt6i_flags&RTF_CACHE) {
907 dst_set_expires(&rt->dst, 0);
908 rt->rt6i_flags |= RTF_EXPIRES;
909 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
910 rt->rt6i_node->fn_sernum = -1;
914 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
916 struct rt6_info *rt6 = (struct rt6_info*)dst;
918 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
919 rt6->rt6i_flags |= RTF_MODIFIED;
920 if (mtu < IPV6_MIN_MTU) {
922 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
924 dst->metrics[RTAX_MTU-1] = mtu;
925 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
929 static int ipv6_get_mtu(struct net_device *dev);
931 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
933 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
935 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
936 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
939 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
940 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
941 * IPV6_MAXPLEN is also valid and means: "any MSS,
942 * rely only on pmtu discovery"
944 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
949 static struct dst_entry *icmp6_dst_gc_list;
950 static DEFINE_SPINLOCK(icmp6_dst_lock);
952 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
953 struct neighbour *neigh,
954 const struct in6_addr *addr)
957 struct inet6_dev *idev = in6_dev_get(dev);
958 struct net *net = dev_net(dev);
960 if (unlikely(idev == NULL))
963 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
964 if (unlikely(rt == NULL)) {
973 neigh = ndisc_get_neigh(dev, addr);
979 rt->rt6i_idev = idev;
980 rt->rt6i_nexthop = neigh;
981 atomic_set(&rt->dst.__refcnt, 1);
982 rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
983 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
984 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
985 rt->dst.output = ip6_output;
987 #if 0 /* there's no chance to use these for ndisc */
988 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
991 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
992 rt->rt6i_dst.plen = 128;
995 spin_lock_bh(&icmp6_dst_lock);
996 rt->dst.next = icmp6_dst_gc_list;
997 icmp6_dst_gc_list = &rt->dst;
998 spin_unlock_bh(&icmp6_dst_lock);
1000 fib6_force_start_gc(net);
1006 int icmp6_dst_gc(void)
1008 struct dst_entry *dst, *next, **pprev;
1013 spin_lock_bh(&icmp6_dst_lock);
1014 pprev = &icmp6_dst_gc_list;
1016 while ((dst = *pprev) != NULL) {
1017 if (!atomic_read(&dst->__refcnt)) {
1026 spin_unlock_bh(&icmp6_dst_lock);
1031 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1034 struct dst_entry *dst, **pprev;
1036 spin_lock_bh(&icmp6_dst_lock);
1037 pprev = &icmp6_dst_gc_list;
1038 while ((dst = *pprev) != NULL) {
1039 struct rt6_info *rt = (struct rt6_info *) dst;
1040 if (func(rt, arg)) {
1047 spin_unlock_bh(&icmp6_dst_lock);
1050 static int ip6_dst_gc(struct dst_ops *ops)
1052 unsigned long now = jiffies;
1053 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1054 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1055 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1056 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1057 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1058 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1061 entries = dst_entries_get_fast(ops);
1062 if (time_after(rt_last_gc + rt_min_interval, now) &&
1063 entries <= rt_max_size)
1066 net->ipv6.ip6_rt_gc_expire++;
1067 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1068 net->ipv6.ip6_rt_last_gc = now;
1069 entries = dst_entries_get_slow(ops);
1070 if (entries < ops->gc_thresh)
1071 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1073 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1074 return entries > rt_max_size;
1077 /* Clean host part of a prefix. Not necessary in radix tree,
1078 but results in cleaner routing tables.
1080 Remove it only when all the things will work!
1083 static int ipv6_get_mtu(struct net_device *dev)
1085 int mtu = IPV6_MIN_MTU;
1086 struct inet6_dev *idev;
1089 idev = __in6_dev_get(dev);
1091 mtu = idev->cnf.mtu6;
1096 int ip6_dst_hoplimit(struct dst_entry *dst)
1098 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1100 struct net_device *dev = dst->dev;
1101 struct inet6_dev *idev;
1104 idev = __in6_dev_get(dev);
1106 hoplimit = idev->cnf.hop_limit;
1108 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1118 int ip6_route_add(struct fib6_config *cfg)
1121 struct net *net = cfg->fc_nlinfo.nl_net;
1122 struct rt6_info *rt = NULL;
1123 struct net_device *dev = NULL;
1124 struct inet6_dev *idev = NULL;
1125 struct fib6_table *table;
1128 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1130 #ifndef CONFIG_IPV6_SUBTREES
1131 if (cfg->fc_src_len)
1134 if (cfg->fc_ifindex) {
1136 dev = dev_get_by_index(net, cfg->fc_ifindex);
1139 idev = in6_dev_get(dev);
1144 if (cfg->fc_metric == 0)
1145 cfg->fc_metric = IP6_RT_PRIO_USER;
1147 table = fib6_new_table(net, cfg->fc_table);
1148 if (table == NULL) {
1153 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1160 rt->dst.obsolete = -1;
1161 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1162 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1165 if (cfg->fc_protocol == RTPROT_UNSPEC)
1166 cfg->fc_protocol = RTPROT_BOOT;
1167 rt->rt6i_protocol = cfg->fc_protocol;
1169 addr_type = ipv6_addr_type(&cfg->fc_dst);
1171 if (addr_type & IPV6_ADDR_MULTICAST)
1172 rt->dst.input = ip6_mc_input;
1173 else if (cfg->fc_flags & RTF_LOCAL)
1174 rt->dst.input = ip6_input;
1176 rt->dst.input = ip6_forward;
1178 rt->dst.output = ip6_output;
1180 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1181 rt->rt6i_dst.plen = cfg->fc_dst_len;
1182 if (rt->rt6i_dst.plen == 128)
1183 rt->dst.flags = DST_HOST;
1185 #ifdef CONFIG_IPV6_SUBTREES
1186 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1187 rt->rt6i_src.plen = cfg->fc_src_len;
1190 rt->rt6i_metric = cfg->fc_metric;
1192 /* We cannot add true routes via loopback here,
1193 they would result in kernel looping; promote them to reject routes
1195 if ((cfg->fc_flags & RTF_REJECT) ||
1196 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1197 && !(cfg->fc_flags&RTF_LOCAL))) {
1198 /* hold loopback dev/idev if we haven't done so. */
1199 if (dev != net->loopback_dev) {
1204 dev = net->loopback_dev;
1206 idev = in6_dev_get(dev);
1212 rt->dst.output = ip6_pkt_discard_out;
1213 rt->dst.input = ip6_pkt_discard;
1214 rt->dst.error = -ENETUNREACH;
1215 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1219 if (cfg->fc_flags & RTF_GATEWAY) {
1220 struct in6_addr *gw_addr;
1223 gw_addr = &cfg->fc_gateway;
1224 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1225 gwa_type = ipv6_addr_type(gw_addr);
1227 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1228 struct rt6_info *grt;
1230 /* IPv6 strictly inhibits using not link-local
1231 addresses as nexthop address.
1232 Otherwise, router will not able to send redirects.
1233 It is very good, but in some (rare!) circumstances
1234 (SIT, PtP, NBMA NOARP links) it is handy to allow
1235 some exceptions. --ANK
1238 if (!(gwa_type&IPV6_ADDR_UNICAST))
1241 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1243 err = -EHOSTUNREACH;
1247 if (dev != grt->rt6i_dev) {
1248 dst_release(&grt->dst);
1252 dev = grt->rt6i_dev;
1253 idev = grt->rt6i_idev;
1255 in6_dev_hold(grt->rt6i_idev);
1257 if (!(grt->rt6i_flags&RTF_GATEWAY))
1259 dst_release(&grt->dst);
1265 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1273 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1274 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1275 if (IS_ERR(rt->rt6i_nexthop)) {
1276 err = PTR_ERR(rt->rt6i_nexthop);
1277 rt->rt6i_nexthop = NULL;
1282 rt->rt6i_flags = cfg->fc_flags;
1289 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1290 int type = nla_type(nla);
1293 if (type > RTAX_MAX) {
1298 rt->dst.metrics[type - 1] = nla_get_u32(nla);
1303 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1304 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1305 if (!dst_mtu(&rt->dst))
1306 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1307 if (!dst_metric(&rt->dst, RTAX_ADVMSS))
1308 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1310 rt->rt6i_idev = idev;
1311 rt->rt6i_table = table;
1313 cfg->fc_nlinfo.nl_net = dev_net(dev);
1315 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1327 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1330 struct fib6_table *table;
1331 struct net *net = dev_net(rt->rt6i_dev);
1333 if (rt == net->ipv6.ip6_null_entry)
1336 table = rt->rt6i_table;
1337 write_lock_bh(&table->tb6_lock);
1339 err = fib6_del(rt, info);
1340 dst_release(&rt->dst);
1342 write_unlock_bh(&table->tb6_lock);
1347 int ip6_del_rt(struct rt6_info *rt)
1349 struct nl_info info = {
1350 .nl_net = dev_net(rt->rt6i_dev),
1352 return __ip6_del_rt(rt, &info);
1355 static int ip6_route_del(struct fib6_config *cfg)
1357 struct fib6_table *table;
1358 struct fib6_node *fn;
1359 struct rt6_info *rt;
1362 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1366 read_lock_bh(&table->tb6_lock);
1368 fn = fib6_locate(&table->tb6_root,
1369 &cfg->fc_dst, cfg->fc_dst_len,
1370 &cfg->fc_src, cfg->fc_src_len);
1373 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1374 if (cfg->fc_ifindex &&
1375 (rt->rt6i_dev == NULL ||
1376 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1378 if (cfg->fc_flags & RTF_GATEWAY &&
1379 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1381 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1384 read_unlock_bh(&table->tb6_lock);
1386 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1389 read_unlock_bh(&table->tb6_lock);
1397 struct ip6rd_flowi {
1399 struct in6_addr gateway;
1402 static struct rt6_info *__ip6_route_redirect(struct net *net,
1403 struct fib6_table *table,
1407 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1408 struct rt6_info *rt;
1409 struct fib6_node *fn;
1412 * Get the "current" route for this destination and
1413 * check if the redirect has come from approriate router.
1415 * RFC 2461 specifies that redirects should only be
1416 * accepted if they come from the nexthop to the target.
1417 * Due to the way the routes are chosen, this notion
1418 * is a bit fuzzy and one might need to check all possible
1422 read_lock_bh(&table->tb6_lock);
1423 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1425 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1427 * Current route is on-link; redirect is always invalid.
1429 * Seems, previous statement is not true. It could
1430 * be node, which looks for us as on-link (f.e. proxy ndisc)
1431 * But then router serving it might decide, that we should
1432 * know truth 8)8) --ANK (980726).
1434 if (rt6_check_expired(rt))
1436 if (!(rt->rt6i_flags & RTF_GATEWAY))
1438 if (fl->oif != rt->rt6i_dev->ifindex)
1440 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1446 rt = net->ipv6.ip6_null_entry;
1447 BACKTRACK(net, &fl->fl6_src);
1451 read_unlock_bh(&table->tb6_lock);
1456 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1457 struct in6_addr *src,
1458 struct in6_addr *gateway,
1459 struct net_device *dev)
1461 int flags = RT6_LOOKUP_F_HAS_SADDR;
1462 struct net *net = dev_net(dev);
1463 struct ip6rd_flowi rdfl = {
1465 .oif = dev->ifindex,
1475 ipv6_addr_copy(&rdfl.gateway, gateway);
1477 if (rt6_need_strict(dest))
1478 flags |= RT6_LOOKUP_F_IFACE;
1480 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1481 flags, __ip6_route_redirect);
1484 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1485 struct in6_addr *saddr,
1486 struct neighbour *neigh, u8 *lladdr, int on_link)
1488 struct rt6_info *rt, *nrt = NULL;
1489 struct netevent_redirect netevent;
1490 struct net *net = dev_net(neigh->dev);
1492 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1494 if (rt == net->ipv6.ip6_null_entry) {
1495 if (net_ratelimit())
1496 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1497 "for redirect target\n");
1502 * We have finally decided to accept it.
1505 neigh_update(neigh, lladdr, NUD_STALE,
1506 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1507 NEIGH_UPDATE_F_OVERRIDE|
1508 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1509 NEIGH_UPDATE_F_ISROUTER))
1513 * Redirect received -> path was valid.
1514 * Look, redirects are sent only in response to data packets,
1515 * so that this nexthop apparently is reachable. --ANK
1517 dst_confirm(&rt->dst);
1519 /* Duplicate redirect: silently ignore. */
1520 if (neigh == rt->dst.neighbour)
1523 nrt = ip6_rt_copy(rt);
1527 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1529 nrt->rt6i_flags &= ~RTF_GATEWAY;
1531 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1532 nrt->rt6i_dst.plen = 128;
1533 nrt->dst.flags |= DST_HOST;
1535 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1536 nrt->rt6i_nexthop = neigh_clone(neigh);
1537 /* Reset pmtu, it may be better */
1538 nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1539 nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1540 dst_mtu(&nrt->dst));
1542 if (ip6_ins_rt(nrt))
1545 netevent.old = &rt->dst;
1546 netevent.new = &nrt->dst;
1547 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1549 if (rt->rt6i_flags&RTF_CACHE) {
1555 dst_release(&rt->dst);
1559 * Handle ICMP "packet too big" messages
1560 * i.e. Path MTU discovery
1563 static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1564 struct net *net, u32 pmtu, int ifindex)
1566 struct rt6_info *rt, *nrt;
1569 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1573 if (pmtu >= dst_mtu(&rt->dst))
1576 if (pmtu < IPV6_MIN_MTU) {
1578 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1579 * MTU (1280) and a fragment header should always be included
1580 * after a node receiving Too Big message reporting PMTU is
1581 * less than the IPv6 Minimum Link MTU.
1583 pmtu = IPV6_MIN_MTU;
1587 /* New mtu received -> path was valid.
1588 They are sent only in response to data packets,
1589 so that this nexthop apparently is reachable. --ANK
1591 dst_confirm(&rt->dst);
1593 /* Host route. If it is static, it would be better
1594 not to override it, but add new one, so that
1595 when cache entry will expire old pmtu
1596 would return automatically.
1598 if (rt->rt6i_flags & RTF_CACHE) {
1599 rt->dst.metrics[RTAX_MTU-1] = pmtu;
1601 rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1602 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1603 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1608 Two cases are possible:
1609 1. It is connected route. Action: COW
1610 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1612 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1613 nrt = rt6_alloc_cow(rt, daddr, saddr);
1615 nrt = rt6_alloc_clone(rt, daddr);
1618 nrt->dst.metrics[RTAX_MTU-1] = pmtu;
1620 nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1622 /* According to RFC 1981, detecting PMTU increase shouldn't be
1623 * happened within 5 mins, the recommended timer is 10 mins.
1624 * Here this route expiration time is set to ip6_rt_mtu_expires
1625 * which is 10 mins. After 10 mins the decreased pmtu is expired
1626 * and detecting PMTU increase will be automatically happened.
1628 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1629 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1634 dst_release(&rt->dst);
1637 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1638 struct net_device *dev, u32 pmtu)
1640 struct net *net = dev_net(dev);
1643 * RFC 1981 states that a node "MUST reduce the size of the packets it
1644 * is sending along the path" that caused the Packet Too Big message.
1645 * Since it's not possible in the general case to determine which
1646 * interface was used to send the original packet, we update the MTU
1647 * on the interface that will be used to send future packets. We also
1648 * update the MTU on the interface that received the Packet Too Big in
1649 * case the original packet was forced out that interface with
1650 * SO_BINDTODEVICE or similar. This is the next best thing to the
1651 * correct behaviour, which would be to update the MTU on all
1654 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1655 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1659 * Misc support functions
1662 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1664 struct net *net = dev_net(ort->rt6i_dev);
1665 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1668 rt->dst.input = ort->dst.input;
1669 rt->dst.output = ort->dst.output;
1671 memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
1672 rt->dst.error = ort->dst.error;
1673 rt->dst.dev = ort->dst.dev;
1675 dev_hold(rt->dst.dev);
1676 rt->rt6i_idev = ort->rt6i_idev;
1678 in6_dev_hold(rt->rt6i_idev);
1679 rt->dst.lastuse = jiffies;
1680 rt->rt6i_expires = 0;
1682 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1683 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1684 rt->rt6i_metric = 0;
1686 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1687 #ifdef CONFIG_IPV6_SUBTREES
1688 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1690 rt->rt6i_table = ort->rt6i_table;
1695 #ifdef CONFIG_IPV6_ROUTE_INFO
1696 static struct rt6_info *rt6_get_route_info(struct net *net,
1697 struct in6_addr *prefix, int prefixlen,
1698 struct in6_addr *gwaddr, int ifindex)
1700 struct fib6_node *fn;
1701 struct rt6_info *rt = NULL;
1702 struct fib6_table *table;
1704 table = fib6_get_table(net, RT6_TABLE_INFO);
1708 write_lock_bh(&table->tb6_lock);
1709 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1713 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1714 if (rt->rt6i_dev->ifindex != ifindex)
1716 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1718 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1724 write_unlock_bh(&table->tb6_lock);
1728 static struct rt6_info *rt6_add_route_info(struct net *net,
1729 struct in6_addr *prefix, int prefixlen,
1730 struct in6_addr *gwaddr, int ifindex,
1733 struct fib6_config cfg = {
1734 .fc_table = RT6_TABLE_INFO,
1735 .fc_metric = IP6_RT_PRIO_USER,
1736 .fc_ifindex = ifindex,
1737 .fc_dst_len = prefixlen,
1738 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1739 RTF_UP | RTF_PREF(pref),
1741 .fc_nlinfo.nlh = NULL,
1742 .fc_nlinfo.nl_net = net,
1745 ipv6_addr_copy(&cfg.fc_dst, prefix);
1746 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1748 /* We should treat it as a default route if prefix length is 0. */
1750 cfg.fc_flags |= RTF_DEFAULT;
1752 ip6_route_add(&cfg);
1754 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1758 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1760 struct rt6_info *rt;
1761 struct fib6_table *table;
1763 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1767 write_lock_bh(&table->tb6_lock);
1768 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1769 if (dev == rt->rt6i_dev &&
1770 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1771 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1776 write_unlock_bh(&table->tb6_lock);
1780 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1781 struct net_device *dev,
1784 struct fib6_config cfg = {
1785 .fc_table = RT6_TABLE_DFLT,
1786 .fc_metric = IP6_RT_PRIO_USER,
1787 .fc_ifindex = dev->ifindex,
1788 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1789 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1791 .fc_nlinfo.nlh = NULL,
1792 .fc_nlinfo.nl_net = dev_net(dev),
1795 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1797 ip6_route_add(&cfg);
1799 return rt6_get_dflt_router(gwaddr, dev);
1802 void rt6_purge_dflt_routers(struct net *net)
1804 struct rt6_info *rt;
1805 struct fib6_table *table;
1807 /* NOTE: Keep consistent with rt6_get_dflt_router */
1808 table = fib6_get_table(net, RT6_TABLE_DFLT);
1813 read_lock_bh(&table->tb6_lock);
1814 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1815 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1817 read_unlock_bh(&table->tb6_lock);
1822 read_unlock_bh(&table->tb6_lock);
1825 static void rtmsg_to_fib6_config(struct net *net,
1826 struct in6_rtmsg *rtmsg,
1827 struct fib6_config *cfg)
1829 memset(cfg, 0, sizeof(*cfg));
1831 cfg->fc_table = RT6_TABLE_MAIN;
1832 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1833 cfg->fc_metric = rtmsg->rtmsg_metric;
1834 cfg->fc_expires = rtmsg->rtmsg_info;
1835 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1836 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1837 cfg->fc_flags = rtmsg->rtmsg_flags;
1839 cfg->fc_nlinfo.nl_net = net;
1841 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1842 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1843 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1846 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1848 struct fib6_config cfg;
1849 struct in6_rtmsg rtmsg;
1853 case SIOCADDRT: /* Add a route */
1854 case SIOCDELRT: /* Delete a route */
1855 if (!capable(CAP_NET_ADMIN))
1857 err = copy_from_user(&rtmsg, arg,
1858 sizeof(struct in6_rtmsg));
1862 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1867 err = ip6_route_add(&cfg);
1870 err = ip6_route_del(&cfg);
1884 * Drop the packet on the floor
1887 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1890 struct dst_entry *dst = skb_dst(skb);
1891 switch (ipstats_mib_noroutes) {
1892 case IPSTATS_MIB_INNOROUTES:
1893 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1894 if (type == IPV6_ADDR_ANY) {
1895 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1896 IPSTATS_MIB_INADDRERRORS);
1900 case IPSTATS_MIB_OUTNOROUTES:
1901 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1902 ipstats_mib_noroutes);
1905 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1910 static int ip6_pkt_discard(struct sk_buff *skb)
1912 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1915 static int ip6_pkt_discard_out(struct sk_buff *skb)
1917 skb->dev = skb_dst(skb)->dev;
1918 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1921 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1923 static int ip6_pkt_prohibit(struct sk_buff *skb)
1925 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1928 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1930 skb->dev = skb_dst(skb)->dev;
1931 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1937 * Allocate a dst for local (unicast / anycast) address.
1940 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1941 const struct in6_addr *addr,
1944 struct net *net = dev_net(idev->dev);
1945 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1946 struct neighbour *neigh;
1949 if (net_ratelimit())
1950 pr_warning("IPv6: Maximum number of routes reached,"
1951 " consider increasing route/max_size.\n");
1952 return ERR_PTR(-ENOMEM);
1955 dev_hold(net->loopback_dev);
1958 rt->dst.flags = DST_HOST;
1959 rt->dst.input = ip6_input;
1960 rt->dst.output = ip6_output;
1961 rt->rt6i_dev = net->loopback_dev;
1962 rt->rt6i_idev = idev;
1963 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1964 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1965 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1966 rt->dst.obsolete = -1;
1968 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1970 rt->rt6i_flags |= RTF_ANYCAST;
1972 rt->rt6i_flags |= RTF_LOCAL;
1973 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1974 if (IS_ERR(neigh)) {
1977 /* We are casting this because that is the return
1978 * value type. But an errno encoded pointer is the
1979 * same regardless of the underlying pointer type,
1980 * and that's what we are returning. So this is OK.
1982 return (struct rt6_info *) neigh;
1984 rt->rt6i_nexthop = neigh;
1986 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1987 rt->rt6i_dst.plen = 128;
1988 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1990 atomic_set(&rt->dst.__refcnt, 1);
1995 struct arg_dev_net {
1996 struct net_device *dev;
2000 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2002 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
2003 struct net *net = ((struct arg_dev_net *)arg)->net;
2005 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2006 rt != net->ipv6.ip6_null_entry) {
2007 RT6_TRACE("deleted by ifdown %p\n", rt);
2013 void rt6_ifdown(struct net *net, struct net_device *dev)
2015 struct arg_dev_net adn = {
2020 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2021 icmp6_clean_all(fib6_ifdown, &adn);
2024 struct rt6_mtu_change_arg
2026 struct net_device *dev;
2030 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2032 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2033 struct inet6_dev *idev;
2034 struct net *net = dev_net(arg->dev);
2036 /* In IPv6 pmtu discovery is not optional,
2037 so that RTAX_MTU lock cannot disable it.
2038 We still use this lock to block changes
2039 caused by addrconf/ndisc.
2042 idev = __in6_dev_get(arg->dev);
2046 /* For administrative MTU increase, there is no way to discover
2047 IPv6 PMTU increase, so PMTU increase should be updated here.
2048 Since RFC 1981 doesn't include administrative MTU increase
2049 update PMTU increase is a MUST. (i.e. jumbo frame)
2052 If new MTU is less than route PMTU, this new MTU will be the
2053 lowest MTU in the path, update the route PMTU to reflect PMTU
2054 decreases; if new MTU is greater than route PMTU, and the
2055 old MTU is the lowest MTU in the path, update the route PMTU
2056 to reflect the increase. In this case if the other nodes' MTU
2057 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2060 if (rt->rt6i_dev == arg->dev &&
2061 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2062 (dst_mtu(&rt->dst) >= arg->mtu ||
2063 (dst_mtu(&rt->dst) < arg->mtu &&
2064 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2065 rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
2066 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
2071 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2073 struct rt6_mtu_change_arg arg = {
2078 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2081 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2082 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2083 [RTA_OIF] = { .type = NLA_U32 },
2084 [RTA_IIF] = { .type = NLA_U32 },
2085 [RTA_PRIORITY] = { .type = NLA_U32 },
2086 [RTA_METRICS] = { .type = NLA_NESTED },
2089 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2090 struct fib6_config *cfg)
2093 struct nlattr *tb[RTA_MAX+1];
2096 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2101 rtm = nlmsg_data(nlh);
2102 memset(cfg, 0, sizeof(*cfg));
2104 cfg->fc_table = rtm->rtm_table;
2105 cfg->fc_dst_len = rtm->rtm_dst_len;
2106 cfg->fc_src_len = rtm->rtm_src_len;
2107 cfg->fc_flags = RTF_UP;
2108 cfg->fc_protocol = rtm->rtm_protocol;
2110 if (rtm->rtm_type == RTN_UNREACHABLE)
2111 cfg->fc_flags |= RTF_REJECT;
2113 if (rtm->rtm_type == RTN_LOCAL)
2114 cfg->fc_flags |= RTF_LOCAL;
2116 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2117 cfg->fc_nlinfo.nlh = nlh;
2118 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2120 if (tb[RTA_GATEWAY]) {
2121 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2122 cfg->fc_flags |= RTF_GATEWAY;
2126 int plen = (rtm->rtm_dst_len + 7) >> 3;
2128 if (nla_len(tb[RTA_DST]) < plen)
2131 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2135 int plen = (rtm->rtm_src_len + 7) >> 3;
2137 if (nla_len(tb[RTA_SRC]) < plen)
2140 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2144 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2146 if (tb[RTA_PRIORITY])
2147 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2149 if (tb[RTA_METRICS]) {
2150 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2151 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2155 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2162 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2164 struct fib6_config cfg;
2167 err = rtm_to_fib6_config(skb, nlh, &cfg);
2171 return ip6_route_del(&cfg);
2174 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2176 struct fib6_config cfg;
2179 err = rtm_to_fib6_config(skb, nlh, &cfg);
2183 return ip6_route_add(&cfg);
2186 static inline size_t rt6_nlmsg_size(void)
2188 return NLMSG_ALIGN(sizeof(struct rtmsg))
2189 + nla_total_size(16) /* RTA_SRC */
2190 + nla_total_size(16) /* RTA_DST */
2191 + nla_total_size(16) /* RTA_GATEWAY */
2192 + nla_total_size(16) /* RTA_PREFSRC */
2193 + nla_total_size(4) /* RTA_TABLE */
2194 + nla_total_size(4) /* RTA_IIF */
2195 + nla_total_size(4) /* RTA_OIF */
2196 + nla_total_size(4) /* RTA_PRIORITY */
2197 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2198 + nla_total_size(sizeof(struct rta_cacheinfo));
2201 static int rt6_fill_node(struct net *net,
2202 struct sk_buff *skb, struct rt6_info *rt,
2203 struct in6_addr *dst, struct in6_addr *src,
2204 int iif, int type, u32 pid, u32 seq,
2205 int prefix, int nowait, unsigned int flags)
2208 struct nlmsghdr *nlh;
2212 if (prefix) { /* user wants prefix routes only */
2213 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2214 /* success since this is not a prefix route */
2219 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2223 rtm = nlmsg_data(nlh);
2224 rtm->rtm_family = AF_INET6;
2225 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2226 rtm->rtm_src_len = rt->rt6i_src.plen;
2229 table = rt->rt6i_table->tb6_id;
2231 table = RT6_TABLE_UNSPEC;
2232 rtm->rtm_table = table;
2233 NLA_PUT_U32(skb, RTA_TABLE, table);
2234 if (rt->rt6i_flags&RTF_REJECT)
2235 rtm->rtm_type = RTN_UNREACHABLE;
2236 else if (rt->rt6i_flags&RTF_LOCAL)
2237 rtm->rtm_type = RTN_LOCAL;
2238 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2239 rtm->rtm_type = RTN_LOCAL;
2241 rtm->rtm_type = RTN_UNICAST;
2243 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2244 rtm->rtm_protocol = rt->rt6i_protocol;
2245 if (rt->rt6i_flags&RTF_DYNAMIC)
2246 rtm->rtm_protocol = RTPROT_REDIRECT;
2247 else if (rt->rt6i_flags & RTF_ADDRCONF)
2248 rtm->rtm_protocol = RTPROT_KERNEL;
2249 else if (rt->rt6i_flags&RTF_DEFAULT)
2250 rtm->rtm_protocol = RTPROT_RA;
2252 if (rt->rt6i_flags&RTF_CACHE)
2253 rtm->rtm_flags |= RTM_F_CLONED;
2256 NLA_PUT(skb, RTA_DST, 16, dst);
2257 rtm->rtm_dst_len = 128;
2258 } else if (rtm->rtm_dst_len)
2259 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2260 #ifdef CONFIG_IPV6_SUBTREES
2262 NLA_PUT(skb, RTA_SRC, 16, src);
2263 rtm->rtm_src_len = 128;
2264 } else if (rtm->rtm_src_len)
2265 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2268 #ifdef CONFIG_IPV6_MROUTE
2269 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2270 int err = ip6mr_get_route(net, skb, rtm, nowait);
2275 goto nla_put_failure;
2277 if (err == -EMSGSIZE)
2278 goto nla_put_failure;
2283 NLA_PUT_U32(skb, RTA_IIF, iif);
2285 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
2286 struct in6_addr saddr_buf;
2287 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2288 dst, 0, &saddr_buf) == 0)
2289 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2292 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
2293 goto nla_put_failure;
2295 if (rt->dst.neighbour)
2296 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2299 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2301 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2303 if (!(rt->rt6i_flags & RTF_EXPIRES))
2305 else if (rt->rt6i_expires - jiffies < INT_MAX)
2306 expires = rt->rt6i_expires - jiffies;
2310 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2311 expires, rt->dst.error) < 0)
2312 goto nla_put_failure;
2314 return nlmsg_end(skb, nlh);
2317 nlmsg_cancel(skb, nlh);
2321 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2323 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2326 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2327 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2328 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2332 return rt6_fill_node(arg->net,
2333 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2334 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2335 prefix, 0, NLM_F_MULTI);
2338 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2340 struct net *net = sock_net(in_skb->sk);
2341 struct nlattr *tb[RTA_MAX+1];
2342 struct rt6_info *rt;
2343 struct sk_buff *skb;
2348 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2353 memset(&fl, 0, sizeof(fl));
2356 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2359 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2363 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2366 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2370 iif = nla_get_u32(tb[RTA_IIF]);
2373 fl.oif = nla_get_u32(tb[RTA_OIF]);
2376 struct net_device *dev;
2377 dev = __dev_get_by_index(net, iif);
2384 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2390 /* Reserve room for dummy headers, this skb can pass
2391 through good chunk of routing engine.
2393 skb_reset_mac_header(skb);
2394 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2396 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2397 skb_dst_set(skb, &rt->dst);
2399 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2400 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2401 nlh->nlmsg_seq, 0, 0, 0);
2407 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2412 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2414 struct sk_buff *skb;
2415 struct net *net = info->nl_net;
2420 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2422 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2426 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2427 event, info->pid, seq, 0, 0, 0);
2429 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2430 WARN_ON(err == -EMSGSIZE);
2434 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2435 info->nlh, gfp_any());
2439 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2442 static int ip6_route_dev_notify(struct notifier_block *this,
2443 unsigned long event, void *data)
2445 struct net_device *dev = (struct net_device *)data;
2446 struct net *net = dev_net(dev);
2448 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2449 net->ipv6.ip6_null_entry->dst.dev = dev;
2450 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2451 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2452 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2453 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2454 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2455 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2466 #ifdef CONFIG_PROC_FS
2468 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2479 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2481 struct seq_file *m = p_arg;
2483 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2485 #ifdef CONFIG_IPV6_SUBTREES
2486 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2488 seq_puts(m, "00000000000000000000000000000000 00 ");
2491 if (rt->rt6i_nexthop) {
2492 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2494 seq_puts(m, "00000000000000000000000000000000");
2496 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2497 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2498 rt->dst.__use, rt->rt6i_flags,
2499 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2503 static int ipv6_route_show(struct seq_file *m, void *v)
2505 struct net *net = (struct net *)m->private;
2506 fib6_clean_all(net, rt6_info_route, 0, m);
2510 static int ipv6_route_open(struct inode *inode, struct file *file)
2512 return single_open_net(inode, file, ipv6_route_show);
2515 static const struct file_operations ipv6_route_proc_fops = {
2516 .owner = THIS_MODULE,
2517 .open = ipv6_route_open,
2519 .llseek = seq_lseek,
2520 .release = single_release_net,
2523 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2525 struct net *net = (struct net *)seq->private;
2526 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2527 net->ipv6.rt6_stats->fib_nodes,
2528 net->ipv6.rt6_stats->fib_route_nodes,
2529 net->ipv6.rt6_stats->fib_rt_alloc,
2530 net->ipv6.rt6_stats->fib_rt_entries,
2531 net->ipv6.rt6_stats->fib_rt_cache,
2532 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2533 net->ipv6.rt6_stats->fib_discarded_routes);
2538 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2540 return single_open_net(inode, file, rt6_stats_seq_show);
2543 static const struct file_operations rt6_stats_seq_fops = {
2544 .owner = THIS_MODULE,
2545 .open = rt6_stats_seq_open,
2547 .llseek = seq_lseek,
2548 .release = single_release_net,
2550 #endif /* CONFIG_PROC_FS */
2552 #ifdef CONFIG_SYSCTL
2555 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2556 void __user *buffer, size_t *lenp, loff_t *ppos)
2558 struct net *net = current->nsproxy->net_ns;
2559 int delay = net->ipv6.sysctl.flush_delay;
2561 proc_dointvec(ctl, write, buffer, lenp, ppos);
2562 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2568 ctl_table ipv6_route_table_template[] = {
2570 .procname = "flush",
2571 .data = &init_net.ipv6.sysctl.flush_delay,
2572 .maxlen = sizeof(int),
2574 .proc_handler = ipv6_sysctl_rtcache_flush
2577 .procname = "gc_thresh",
2578 .data = &ip6_dst_ops_template.gc_thresh,
2579 .maxlen = sizeof(int),
2581 .proc_handler = proc_dointvec,
2584 .procname = "max_size",
2585 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2586 .maxlen = sizeof(int),
2588 .proc_handler = proc_dointvec,
2591 .procname = "gc_min_interval",
2592 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2593 .maxlen = sizeof(int),
2595 .proc_handler = proc_dointvec_jiffies,
2598 .procname = "gc_timeout",
2599 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2600 .maxlen = sizeof(int),
2602 .proc_handler = proc_dointvec_jiffies,
2605 .procname = "gc_interval",
2606 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2607 .maxlen = sizeof(int),
2609 .proc_handler = proc_dointvec_jiffies,
2612 .procname = "gc_elasticity",
2613 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2614 .maxlen = sizeof(int),
2616 .proc_handler = proc_dointvec,
2619 .procname = "mtu_expires",
2620 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2621 .maxlen = sizeof(int),
2623 .proc_handler = proc_dointvec_jiffies,
2626 .procname = "min_adv_mss",
2627 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2628 .maxlen = sizeof(int),
2630 .proc_handler = proc_dointvec,
2633 .procname = "gc_min_interval_ms",
2634 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2635 .maxlen = sizeof(int),
2637 .proc_handler = proc_dointvec_ms_jiffies,
2642 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2644 struct ctl_table *table;
2646 table = kmemdup(ipv6_route_table_template,
2647 sizeof(ipv6_route_table_template),
2651 table[0].data = &net->ipv6.sysctl.flush_delay;
2652 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2653 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2654 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2655 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2656 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2657 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2658 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2659 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2660 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2667 static int __net_init ip6_route_net_init(struct net *net)
2671 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2672 sizeof(net->ipv6.ip6_dst_ops));
2674 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2675 goto out_ip6_dst_ops;
2677 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2678 sizeof(*net->ipv6.ip6_null_entry),
2680 if (!net->ipv6.ip6_null_entry)
2681 goto out_ip6_dst_entries;
2682 net->ipv6.ip6_null_entry->dst.path =
2683 (struct dst_entry *)net->ipv6.ip6_null_entry;
2684 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2686 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2687 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2688 sizeof(*net->ipv6.ip6_prohibit_entry),
2690 if (!net->ipv6.ip6_prohibit_entry)
2691 goto out_ip6_null_entry;
2692 net->ipv6.ip6_prohibit_entry->dst.path =
2693 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2694 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2696 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2697 sizeof(*net->ipv6.ip6_blk_hole_entry),
2699 if (!net->ipv6.ip6_blk_hole_entry)
2700 goto out_ip6_prohibit_entry;
2701 net->ipv6.ip6_blk_hole_entry->dst.path =
2702 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2703 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2706 net->ipv6.sysctl.flush_delay = 0;
2707 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2708 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2709 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2710 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2711 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2712 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2713 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2715 #ifdef CONFIG_PROC_FS
2716 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2717 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2719 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2725 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2726 out_ip6_prohibit_entry:
2727 kfree(net->ipv6.ip6_prohibit_entry);
2729 kfree(net->ipv6.ip6_null_entry);
2731 out_ip6_dst_entries:
2732 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2737 static void __net_exit ip6_route_net_exit(struct net *net)
2739 #ifdef CONFIG_PROC_FS
2740 proc_net_remove(net, "ipv6_route");
2741 proc_net_remove(net, "rt6_stats");
2743 kfree(net->ipv6.ip6_null_entry);
2744 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2745 kfree(net->ipv6.ip6_prohibit_entry);
2746 kfree(net->ipv6.ip6_blk_hole_entry);
2748 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2751 static struct pernet_operations ip6_route_net_ops = {
2752 .init = ip6_route_net_init,
2753 .exit = ip6_route_net_exit,
2756 static struct notifier_block ip6_route_dev_notifier = {
2757 .notifier_call = ip6_route_dev_notify,
2761 int __init ip6_route_init(void)
2766 ip6_dst_ops_template.kmem_cachep =
2767 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2768 SLAB_HWCACHE_ALIGN, NULL);
2769 if (!ip6_dst_ops_template.kmem_cachep)
2772 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2774 goto out_kmem_cache;
2776 ret = register_pernet_subsys(&ip6_route_net_ops);
2778 goto out_dst_entries;
2780 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2782 /* Registering of the loopback is done before this portion of code,
2783 * the loopback reference in rt6_info will not be taken, do it
2784 * manually for init_net */
2785 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2786 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2787 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2788 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2789 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2790 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2791 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2795 goto out_register_subsys;
2801 ret = fib6_rules_init();
2806 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2807 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2808 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2809 goto fib6_rules_init;
2811 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2813 goto fib6_rules_init;
2819 fib6_rules_cleanup();
2824 out_register_subsys:
2825 unregister_pernet_subsys(&ip6_route_net_ops);
2827 dst_entries_destroy(&ip6_dst_blackhole_ops);
2829 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2833 void ip6_route_cleanup(void)
2835 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2836 fib6_rules_cleanup();
2839 unregister_pernet_subsys(&ip6_route_net_ops);
2840 dst_entries_destroy(&ip6_dst_blackhole_ops);
2841 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);