2 * Linux INET6 implementation
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #define pr_fmt(fmt) "IPv6: " fmt
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <linux/jhash.h>
48 #include <net/net_namespace.h>
51 #include <net/ip6_fib.h>
52 #include <net/ip6_route.h>
53 #include <net/ndisc.h>
54 #include <net/addrconf.h>
56 #include <linux/rtnetlink.h>
58 #include <net/dst_metadata.h>
60 #include <net/netevent.h>
61 #include <net/netlink.h>
62 #include <net/nexthop.h>
63 #include <net/lwtunnel.h>
64 #include <net/ip_tunnels.h>
65 #include <net/l3mdev.h>
67 #include <linux/uaccess.h>
70 #include <linux/sysctl.h>
73 static int ip6_rt_type_to_error(u8 fib6_type);
75 #define CREATE_TRACE_POINTS
76 #include <trace/events/fib6.h>
77 EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
78 #undef CREATE_TRACE_POINTS
81 RT6_NUD_FAIL_HARD = -3,
82 RT6_NUD_FAIL_PROBE = -2,
83 RT6_NUD_FAIL_DO_RR = -1,
87 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
88 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
89 static unsigned int ip6_mtu(const struct dst_entry *dst);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void ip6_dst_destroy(struct dst_entry *);
92 static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94 static int ip6_dst_gc(struct dst_ops *ops);
96 static int ip6_pkt_discard(struct sk_buff *skb);
97 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
98 static int ip6_pkt_prohibit(struct sk_buff *skb);
99 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
100 static void ip6_link_failure(struct sk_buff *skb);
101 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
102 struct sk_buff *skb, u32 mtu);
103 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
104 struct sk_buff *skb);
105 static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
106 static size_t rt6_nlmsg_size(struct fib6_info *rt);
107 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
108 struct fib6_info *rt, struct dst_entry *dst,
109 struct in6_addr *dest, struct in6_addr *src,
110 int iif, int type, u32 portid, u32 seq,
112 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
113 struct in6_addr *daddr,
114 struct in6_addr *saddr);
116 #ifdef CONFIG_IPV6_ROUTE_INFO
117 static struct fib6_info *rt6_add_route_info(struct net *net,
118 const struct in6_addr *prefix, int prefixlen,
119 const struct in6_addr *gwaddr,
120 struct net_device *dev,
122 static struct fib6_info *rt6_get_route_info(struct net *net,
123 const struct in6_addr *prefix, int prefixlen,
124 const struct in6_addr *gwaddr,
125 struct net_device *dev);
128 struct uncached_list {
130 struct list_head head;
133 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
135 void rt6_uncached_list_add(struct rt6_info *rt)
137 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
139 rt->rt6i_uncached_list = ul;
141 spin_lock_bh(&ul->lock);
142 list_add_tail(&rt->rt6i_uncached, &ul->head);
143 spin_unlock_bh(&ul->lock);
146 void rt6_uncached_list_del(struct rt6_info *rt)
148 if (!list_empty(&rt->rt6i_uncached)) {
149 struct uncached_list *ul = rt->rt6i_uncached_list;
150 struct net *net = dev_net(rt->dst.dev);
152 spin_lock_bh(&ul->lock);
153 list_del(&rt->rt6i_uncached);
154 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
155 spin_unlock_bh(&ul->lock);
159 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
161 struct net_device *loopback_dev = net->loopback_dev;
164 if (dev == loopback_dev)
167 for_each_possible_cpu(cpu) {
168 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
171 spin_lock_bh(&ul->lock);
172 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
173 struct inet6_dev *rt_idev = rt->rt6i_idev;
174 struct net_device *rt_dev = rt->dst.dev;
176 if (rt_idev->dev == dev) {
177 rt->rt6i_idev = in6_dev_get(loopback_dev);
178 in6_dev_put(rt_idev);
182 rt->dst.dev = loopback_dev;
183 dev_hold(rt->dst.dev);
187 spin_unlock_bh(&ul->lock);
191 static inline const void *choose_neigh_daddr(const struct in6_addr *p,
195 if (!ipv6_addr_any(p))
196 return (const void *) p;
198 return &ipv6_hdr(skb)->daddr;
202 struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
203 struct net_device *dev,
209 daddr = choose_neigh_daddr(gw, skb, daddr);
210 n = __ipv6_neigh_lookup(dev, daddr);
214 n = neigh_create(&nd_tbl, daddr, dev);
215 return IS_ERR(n) ? NULL : n;
218 static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
222 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
224 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
227 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
229 struct net_device *dev = dst->dev;
230 struct rt6_info *rt = (struct rt6_info *)dst;
232 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
235 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
237 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
239 __ipv6_confirm_neigh(dev, daddr);
242 static struct dst_ops ip6_dst_ops_template = {
246 .check = ip6_dst_check,
247 .default_advmss = ip6_default_advmss,
249 .cow_metrics = dst_cow_metrics_generic,
250 .destroy = ip6_dst_destroy,
251 .ifdown = ip6_dst_ifdown,
252 .negative_advice = ip6_negative_advice,
253 .link_failure = ip6_link_failure,
254 .update_pmtu = ip6_rt_update_pmtu,
255 .redirect = rt6_do_redirect,
256 .local_out = __ip6_local_out,
257 .neigh_lookup = ip6_dst_neigh_lookup,
258 .confirm_neigh = ip6_confirm_neigh,
261 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
263 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
265 return mtu ? : dst->dev->mtu;
268 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
269 struct sk_buff *skb, u32 mtu)
273 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
278 static struct dst_ops ip6_dst_blackhole_ops = {
280 .destroy = ip6_dst_destroy,
281 .check = ip6_dst_check,
282 .mtu = ip6_blackhole_mtu,
283 .default_advmss = ip6_default_advmss,
284 .update_pmtu = ip6_rt_blackhole_update_pmtu,
285 .redirect = ip6_rt_blackhole_redirect,
286 .cow_metrics = dst_cow_metrics_generic,
287 .neigh_lookup = ip6_dst_neigh_lookup,
290 static const u32 ip6_template_metrics[RTAX_MAX] = {
291 [RTAX_HOPLIMIT - 1] = 0,
294 static const struct fib6_info fib6_null_entry_template = {
295 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
296 .fib6_protocol = RTPROT_KERNEL,
297 .fib6_metric = ~(u32)0,
298 .fib6_ref = ATOMIC_INIT(1),
299 .fib6_type = RTN_UNREACHABLE,
300 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
303 static const struct rt6_info ip6_null_entry_template = {
305 .__refcnt = ATOMIC_INIT(1),
307 .obsolete = DST_OBSOLETE_FORCE_CHK,
308 .error = -ENETUNREACH,
309 .input = ip6_pkt_discard,
310 .output = ip6_pkt_discard_out,
312 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
315 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
317 static const struct rt6_info ip6_prohibit_entry_template = {
319 .__refcnt = ATOMIC_INIT(1),
321 .obsolete = DST_OBSOLETE_FORCE_CHK,
323 .input = ip6_pkt_prohibit,
324 .output = ip6_pkt_prohibit_out,
326 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
329 static const struct rt6_info ip6_blk_hole_entry_template = {
331 .__refcnt = ATOMIC_INIT(1),
333 .obsolete = DST_OBSOLETE_FORCE_CHK,
335 .input = dst_discard,
336 .output = dst_discard_out,
338 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
343 static void rt6_info_init(struct rt6_info *rt)
345 struct dst_entry *dst = &rt->dst;
347 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
348 INIT_LIST_HEAD(&rt->rt6i_uncached);
351 /* allocate dst with ip6_dst_ops */
352 struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
355 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
356 1, DST_OBSOLETE_FORCE_CHK, flags);
360 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
365 EXPORT_SYMBOL(ip6_dst_alloc);
367 static void ip6_dst_destroy(struct dst_entry *dst)
369 struct rt6_info *rt = (struct rt6_info *)dst;
370 struct fib6_info *from;
371 struct inet6_dev *idev;
373 ip_dst_metrics_put(dst);
374 rt6_uncached_list_del(rt);
376 idev = rt->rt6i_idev;
378 rt->rt6i_idev = NULL;
383 from = rcu_dereference(rt->from);
384 rcu_assign_pointer(rt->from, NULL);
385 fib6_info_release(from);
389 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
392 struct rt6_info *rt = (struct rt6_info *)dst;
393 struct inet6_dev *idev = rt->rt6i_idev;
394 struct net_device *loopback_dev =
395 dev_net(dev)->loopback_dev;
397 if (idev && idev->dev != loopback_dev) {
398 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
400 rt->rt6i_idev = loopback_idev;
406 static bool __rt6_check_expired(const struct rt6_info *rt)
408 if (rt->rt6i_flags & RTF_EXPIRES)
409 return time_after(jiffies, rt->dst.expires);
414 static bool rt6_check_expired(const struct rt6_info *rt)
416 struct fib6_info *from;
418 from = rcu_dereference(rt->from);
420 if (rt->rt6i_flags & RTF_EXPIRES) {
421 if (time_after(jiffies, rt->dst.expires))
424 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
425 fib6_check_expired(from);
430 struct fib6_info *fib6_multipath_select(const struct net *net,
431 struct fib6_info *match,
432 struct flowi6 *fl6, int oif,
433 const struct sk_buff *skb,
436 struct fib6_info *sibling, *next_sibling;
438 /* We might have already computed the hash for ICMPv6 errors. In such
439 * case it will always be non-zero. Otherwise now is the time to do it.
442 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
444 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
447 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
451 nh_upper_bound = atomic_read(&sibling->fib6_nh.fib_nh_upper_bound);
452 if (fl6->mp_hash > nh_upper_bound)
454 if (rt6_score_route(sibling, oif, strict) < 0)
464 * Route lookup. rcu_read_lock() should be held.
467 static inline struct fib6_info *rt6_device_match(struct net *net,
468 struct fib6_info *rt,
469 const struct in6_addr *saddr,
473 struct fib6_info *sprt;
475 if (!oif && ipv6_addr_any(saddr) &&
476 !(rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD))
479 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
480 const struct net_device *dev = sprt->fib6_nh.fib_nh_dev;
482 if (sprt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
486 if (dev->ifindex == oif)
489 if (ipv6_chk_addr(net, saddr, dev,
490 flags & RT6_LOOKUP_F_IFACE))
495 if (oif && flags & RT6_LOOKUP_F_IFACE)
496 return net->ipv6.fib6_null_entry;
498 return rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
501 #ifdef CONFIG_IPV6_ROUTER_PREF
502 struct __rt6_probe_work {
503 struct work_struct work;
504 struct in6_addr target;
505 struct net_device *dev;
508 static void rt6_probe_deferred(struct work_struct *w)
510 struct in6_addr mcaddr;
511 struct __rt6_probe_work *work =
512 container_of(w, struct __rt6_probe_work, work);
514 addrconf_addr_solict_mult(&work->target, &mcaddr);
515 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
520 static void rt6_probe(struct fib6_info *rt)
522 struct __rt6_probe_work *work = NULL;
523 const struct in6_addr *nh_gw;
524 struct neighbour *neigh;
525 struct net_device *dev;
526 struct inet6_dev *idev;
529 * Okay, this does not seem to be appropriate
530 * for now, however, we need to check if it
531 * is really so; aka Router Reachability Probing.
533 * Router Reachability Probe MUST be rate-limited
534 * to no more than one per minute.
536 if (!rt || !rt->fib6_nh.fib_nh_has_gw)
539 nh_gw = &rt->fib6_nh.fib_nh_gw6;
540 dev = rt->fib6_nh.fib_nh_dev;
542 idev = __in6_dev_get(dev);
543 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
545 if (neigh->nud_state & NUD_VALID)
548 write_lock(&neigh->lock);
549 if (!(neigh->nud_state & NUD_VALID) &&
551 neigh->updated + idev->cnf.rtr_probe_interval)) {
552 work = kmalloc(sizeof(*work), GFP_ATOMIC);
554 __neigh_set_probe_once(neigh);
556 write_unlock(&neigh->lock);
557 } else if (time_after(jiffies, rt->last_probe +
558 idev->cnf.rtr_probe_interval)) {
559 work = kmalloc(sizeof(*work), GFP_ATOMIC);
563 rt->last_probe = jiffies;
564 INIT_WORK(&work->work, rt6_probe_deferred);
565 work->target = *nh_gw;
568 schedule_work(&work->work);
572 rcu_read_unlock_bh();
575 static inline void rt6_probe(struct fib6_info *rt)
581 * Default Router Selection (RFC 2461 6.3.6)
583 static inline int rt6_check_dev(struct fib6_info *rt, int oif)
585 const struct net_device *dev = rt->fib6_nh.fib_nh_dev;
587 if (!oif || dev->ifindex == oif)
592 static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
594 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
595 struct neighbour *neigh;
597 if (rt->fib6_flags & RTF_NONEXTHOP ||
598 !rt->fib6_nh.fib_nh_has_gw)
599 return RT6_NUD_SUCCEED;
602 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.fib_nh_dev,
603 &rt->fib6_nh.fib_nh_gw6);
605 read_lock(&neigh->lock);
606 if (neigh->nud_state & NUD_VALID)
607 ret = RT6_NUD_SUCCEED;
608 #ifdef CONFIG_IPV6_ROUTER_PREF
609 else if (!(neigh->nud_state & NUD_FAILED))
610 ret = RT6_NUD_SUCCEED;
612 ret = RT6_NUD_FAIL_PROBE;
614 read_unlock(&neigh->lock);
616 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
617 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
619 rcu_read_unlock_bh();
624 static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
628 m = rt6_check_dev(rt, oif);
629 if (!m && (strict & RT6_LOOKUP_F_IFACE))
630 return RT6_NUD_FAIL_HARD;
631 #ifdef CONFIG_IPV6_ROUTER_PREF
632 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
634 if (strict & RT6_LOOKUP_F_REACHABLE) {
635 int n = rt6_check_neigh(rt);
642 static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
643 int *mpri, struct fib6_info *match,
647 bool match_do_rr = false;
649 if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
652 if (ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev) &&
653 rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
654 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
657 if (fib6_check_expired(rt))
660 m = rt6_score_route(rt, oif, strict);
661 if (m == RT6_NUD_FAIL_DO_RR) {
663 m = 0; /* lowest valid score */
664 } else if (m == RT6_NUD_FAIL_HARD) {
668 if (strict & RT6_LOOKUP_F_REACHABLE)
671 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
673 *do_rr = match_do_rr;
681 static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
682 struct fib6_info *leaf,
683 struct fib6_info *rr_head,
684 u32 metric, int oif, int strict,
687 struct fib6_info *rt, *match, *cont;
692 for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
693 if (rt->fib6_metric != metric) {
698 match = find_match(rt, oif, strict, &mpri, match, do_rr);
701 for (rt = leaf; rt && rt != rr_head;
702 rt = rcu_dereference(rt->fib6_next)) {
703 if (rt->fib6_metric != metric) {
708 match = find_match(rt, oif, strict, &mpri, match, do_rr);
714 for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
715 match = find_match(rt, oif, strict, &mpri, match, do_rr);
720 static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
723 struct fib6_info *leaf = rcu_dereference(fn->leaf);
724 struct fib6_info *match, *rt0;
728 if (!leaf || leaf == net->ipv6.fib6_null_entry)
729 return net->ipv6.fib6_null_entry;
731 rt0 = rcu_dereference(fn->rr_ptr);
735 /* Double check to make sure fn is not an intermediate node
736 * and fn->leaf does not points to its child's leaf
737 * (This might happen if all routes under fn are deleted from
738 * the tree and fib6_repair_tree() is called on the node.)
740 key_plen = rt0->fib6_dst.plen;
741 #ifdef CONFIG_IPV6_SUBTREES
742 if (rt0->fib6_src.plen)
743 key_plen = rt0->fib6_src.plen;
745 if (fn->fn_bit != key_plen)
746 return net->ipv6.fib6_null_entry;
748 match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
752 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
754 /* no entries matched; do round-robin */
755 if (!next || next->fib6_metric != rt0->fib6_metric)
759 spin_lock_bh(&leaf->fib6_table->tb6_lock);
760 /* make sure next is not being deleted from the tree */
762 rcu_assign_pointer(fn->rr_ptr, next);
763 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
767 return match ? match : net->ipv6.fib6_null_entry;
770 static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
772 return (rt->fib6_flags & RTF_NONEXTHOP) || rt->fib6_nh.fib_nh_has_gw;
775 #ifdef CONFIG_IPV6_ROUTE_INFO
776 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
777 const struct in6_addr *gwaddr)
779 struct net *net = dev_net(dev);
780 struct route_info *rinfo = (struct route_info *) opt;
781 struct in6_addr prefix_buf, *prefix;
783 unsigned long lifetime;
784 struct fib6_info *rt;
786 if (len < sizeof(struct route_info)) {
790 /* Sanity check for prefix_len and length */
791 if (rinfo->length > 3) {
793 } else if (rinfo->prefix_len > 128) {
795 } else if (rinfo->prefix_len > 64) {
796 if (rinfo->length < 2) {
799 } else if (rinfo->prefix_len > 0) {
800 if (rinfo->length < 1) {
805 pref = rinfo->route_pref;
806 if (pref == ICMPV6_ROUTER_PREF_INVALID)
809 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
811 if (rinfo->length == 3)
812 prefix = (struct in6_addr *)rinfo->prefix;
814 /* this function is safe */
815 ipv6_addr_prefix(&prefix_buf,
816 (struct in6_addr *)rinfo->prefix,
818 prefix = &prefix_buf;
821 if (rinfo->prefix_len == 0)
822 rt = rt6_get_dflt_router(net, gwaddr, dev);
824 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
827 if (rt && !lifetime) {
833 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
836 rt->fib6_flags = RTF_ROUTEINFO |
837 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
840 if (!addrconf_finite_timeout(lifetime))
841 fib6_clean_expires(rt);
843 fib6_set_expires(rt, jiffies + HZ * lifetime);
845 fib6_info_release(rt);
852 * Misc support functions
855 /* called with rcu_lock held */
856 static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
858 struct net_device *dev = rt->fib6_nh.fib_nh_dev;
860 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
861 /* for copies of local routes, dst->dev needs to be the
862 * device if it is a master device, the master device if
863 * device is enslaved, and the loopback as the default
865 if (netif_is_l3_slave(dev) &&
866 !rt6_need_strict(&rt->fib6_dst.addr))
867 dev = l3mdev_master_dev_rcu(dev);
868 else if (!netif_is_l3_master(dev))
869 dev = dev_net(dev)->loopback_dev;
870 /* last case is netif_is_l3_master(dev) is true in which
871 * case we want dev returned to be dev
878 static const int fib6_prop[RTN_MAX + 1] = {
885 [RTN_BLACKHOLE] = -EINVAL,
886 [RTN_UNREACHABLE] = -EHOSTUNREACH,
887 [RTN_PROHIBIT] = -EACCES,
888 [RTN_THROW] = -EAGAIN,
890 [RTN_XRESOLVE] = -EINVAL,
893 static int ip6_rt_type_to_error(u8 fib6_type)
895 return fib6_prop[fib6_type];
898 static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
900 unsigned short flags = 0;
903 flags |= DST_NOCOUNT;
904 if (rt->dst_nopolicy)
905 flags |= DST_NOPOLICY;
912 static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
914 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
916 switch (ort->fib6_type) {
918 rt->dst.output = dst_discard_out;
919 rt->dst.input = dst_discard;
922 rt->dst.output = ip6_pkt_prohibit_out;
923 rt->dst.input = ip6_pkt_prohibit;
926 case RTN_UNREACHABLE:
928 rt->dst.output = ip6_pkt_discard_out;
929 rt->dst.input = ip6_pkt_discard;
934 static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
936 if (ort->fib6_flags & RTF_REJECT) {
937 ip6_rt_init_dst_reject(rt, ort);
942 rt->dst.output = ip6_output;
944 if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
945 rt->dst.input = ip6_input;
946 } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
947 rt->dst.input = ip6_mc_input;
949 rt->dst.input = ip6_forward;
952 if (ort->fib6_nh.fib_nh_lws) {
953 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.fib_nh_lws);
954 lwtunnel_set_redirect(&rt->dst);
957 rt->dst.lastuse = jiffies;
960 /* Caller must already hold reference to @from */
961 static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
963 rt->rt6i_flags &= ~RTF_EXPIRES;
964 rcu_assign_pointer(rt->from, from);
965 ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
968 /* Caller must already hold reference to @ort */
969 static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
971 struct net_device *dev = fib6_info_nh_dev(ort);
973 ip6_rt_init_dst(rt, ort);
975 rt->rt6i_dst = ort->fib6_dst;
976 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
977 rt->rt6i_flags = ort->fib6_flags;
978 if (ort->fib6_nh.fib_nh_has_gw) {
979 rt->rt6i_gateway = ort->fib6_nh.fib_nh_gw6;
980 rt->rt6i_flags |= RTF_GATEWAY;
982 rt6_set_from(rt, ort);
983 #ifdef CONFIG_IPV6_SUBTREES
984 rt->rt6i_src = ort->fib6_src;
988 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
989 struct in6_addr *saddr)
991 struct fib6_node *pn, *sn;
993 if (fn->fn_flags & RTN_TL_ROOT)
995 pn = rcu_dereference(fn->parent);
996 sn = FIB6_SUBTREE(pn);
998 fn = fib6_node_lookup(sn, NULL, saddr);
1001 if (fn->fn_flags & RTN_RTINFO)
1006 static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
1008 struct rt6_info *rt = *prt;
1010 if (dst_hold_safe(&rt->dst))
1013 rt = net->ipv6.ip6_null_entry;
1022 /* called with rcu_lock held */
1023 static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
1025 unsigned short flags = fib6_info_dst_flags(rt);
1026 struct net_device *dev = rt->fib6_nh.fib_nh_dev;
1027 struct rt6_info *nrt;
1029 if (!fib6_info_hold_safe(rt))
1032 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1034 fib6_info_release(rt);
1038 ip6_rt_copy_init(nrt, rt);
1042 nrt = dev_net(dev)->ipv6.ip6_null_entry;
1043 dst_hold(&nrt->dst);
1047 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1048 struct fib6_table *table,
1050 const struct sk_buff *skb,
1053 struct fib6_info *f6i;
1054 struct fib6_node *fn;
1055 struct rt6_info *rt;
1057 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1058 flags &= ~RT6_LOOKUP_F_IFACE;
1061 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1063 f6i = rcu_dereference(fn->leaf);
1065 f6i = net->ipv6.fib6_null_entry;
1067 f6i = rt6_device_match(net, f6i, &fl6->saddr,
1068 fl6->flowi6_oif, flags);
1069 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
1070 f6i = fib6_multipath_select(net, f6i, fl6,
1071 fl6->flowi6_oif, skb,
1074 if (f6i == net->ipv6.fib6_null_entry) {
1075 fn = fib6_backtrack(fn, &fl6->saddr);
1080 trace_fib6_table_lookup(net, f6i, table, fl6);
1082 /* Search through exception table */
1083 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1085 if (ip6_hold_safe(net, &rt))
1086 dst_use_noref(&rt->dst, jiffies);
1087 } else if (f6i == net->ipv6.fib6_null_entry) {
1088 rt = net->ipv6.ip6_null_entry;
1091 rt = ip6_create_rt_rcu(f6i);
1099 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1100 const struct sk_buff *skb, int flags)
1102 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1104 EXPORT_SYMBOL_GPL(ip6_route_lookup);
1106 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1107 const struct in6_addr *saddr, int oif,
1108 const struct sk_buff *skb, int strict)
1110 struct flowi6 fl6 = {
1114 struct dst_entry *dst;
1115 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1118 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1119 flags |= RT6_LOOKUP_F_HAS_SADDR;
1122 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1123 if (dst->error == 0)
1124 return (struct rt6_info *) dst;
1130 EXPORT_SYMBOL(rt6_lookup);
1132 /* ip6_ins_rt is called with FREE table->tb6_lock.
1133 * It takes new route entry, the addition fails by any reason the
1134 * route is released.
1135 * Caller must hold dst before calling it.
1138 static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1139 struct netlink_ext_ack *extack)
1142 struct fib6_table *table;
1144 table = rt->fib6_table;
1145 spin_lock_bh(&table->tb6_lock);
1146 err = fib6_add(&table->tb6_root, rt, info, extack);
1147 spin_unlock_bh(&table->tb6_lock);
1152 int ip6_ins_rt(struct net *net, struct fib6_info *rt)
1154 struct nl_info info = { .nl_net = net, };
1156 return __ip6_ins_rt(rt, &info, NULL);
1159 static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
1160 const struct in6_addr *daddr,
1161 const struct in6_addr *saddr)
1163 struct net_device *dev;
1164 struct rt6_info *rt;
1170 if (!fib6_info_hold_safe(ort))
1173 dev = ip6_rt_get_dev_rcu(ort);
1174 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1176 fib6_info_release(ort);
1180 ip6_rt_copy_init(rt, ort);
1181 rt->rt6i_flags |= RTF_CACHE;
1182 rt->dst.flags |= DST_HOST;
1183 rt->rt6i_dst.addr = *daddr;
1184 rt->rt6i_dst.plen = 128;
1186 if (!rt6_is_gw_or_nonexthop(ort)) {
1187 if (ort->fib6_dst.plen != 128 &&
1188 ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
1189 rt->rt6i_flags |= RTF_ANYCAST;
1190 #ifdef CONFIG_IPV6_SUBTREES
1191 if (rt->rt6i_src.plen && saddr) {
1192 rt->rt6i_src.addr = *saddr;
1193 rt->rt6i_src.plen = 128;
1201 static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
1203 unsigned short flags = fib6_info_dst_flags(rt);
1204 struct net_device *dev;
1205 struct rt6_info *pcpu_rt;
1207 if (!fib6_info_hold_safe(rt))
1211 dev = ip6_rt_get_dev_rcu(rt);
1212 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
1215 fib6_info_release(rt);
1218 ip6_rt_copy_init(pcpu_rt, rt);
1219 pcpu_rt->rt6i_flags |= RTF_PCPU;
1223 /* It should be called with rcu_read_lock() acquired */
1224 static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
1226 struct rt6_info *pcpu_rt, **p;
1228 p = this_cpu_ptr(rt->rt6i_pcpu);
1232 ip6_hold_safe(NULL, &pcpu_rt);
1237 static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1238 struct fib6_info *rt)
1240 struct rt6_info *pcpu_rt, *prev, **p;
1242 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1244 dst_hold(&net->ipv6.ip6_null_entry->dst);
1245 return net->ipv6.ip6_null_entry;
1248 dst_hold(&pcpu_rt->dst);
1249 p = this_cpu_ptr(rt->rt6i_pcpu);
1250 prev = cmpxchg(p, NULL, pcpu_rt);
1256 /* exception hash table implementation
1258 static DEFINE_SPINLOCK(rt6_exception_lock);
1260 /* Remove rt6_ex from hash table and free the memory
1261 * Caller must hold rt6_exception_lock
1263 static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1264 struct rt6_exception *rt6_ex)
1266 struct fib6_info *from;
1269 if (!bucket || !rt6_ex)
1272 net = dev_net(rt6_ex->rt6i->dst.dev);
1273 net->ipv6.rt6_stats->fib_rt_cache--;
1275 /* purge completely the exception to allow releasing the held resources:
1276 * some [sk] cache may keep the dst around for unlimited time
1278 from = rcu_dereference_protected(rt6_ex->rt6i->from,
1279 lockdep_is_held(&rt6_exception_lock));
1280 rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
1281 fib6_info_release(from);
1282 dst_dev_put(&rt6_ex->rt6i->dst);
1284 hlist_del_rcu(&rt6_ex->hlist);
1285 dst_release(&rt6_ex->rt6i->dst);
1286 kfree_rcu(rt6_ex, rcu);
1287 WARN_ON_ONCE(!bucket->depth);
1291 /* Remove oldest rt6_ex in bucket and free the memory
1292 * Caller must hold rt6_exception_lock
1294 static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1296 struct rt6_exception *rt6_ex, *oldest = NULL;
1301 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1302 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1305 rt6_remove_exception(bucket, oldest);
1308 static u32 rt6_exception_hash(const struct in6_addr *dst,
1309 const struct in6_addr *src)
1311 static u32 seed __read_mostly;
1314 net_get_random_once(&seed, sizeof(seed));
1315 val = jhash(dst, sizeof(*dst), seed);
1317 #ifdef CONFIG_IPV6_SUBTREES
1319 val = jhash(src, sizeof(*src), val);
1321 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1324 /* Helper function to find the cached rt in the hash table
1325 * and update bucket pointer to point to the bucket for this
1326 * (daddr, saddr) pair
1327 * Caller must hold rt6_exception_lock
1329 static struct rt6_exception *
1330 __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1331 const struct in6_addr *daddr,
1332 const struct in6_addr *saddr)
1334 struct rt6_exception *rt6_ex;
1337 if (!(*bucket) || !daddr)
1340 hval = rt6_exception_hash(daddr, saddr);
1343 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1344 struct rt6_info *rt6 = rt6_ex->rt6i;
1345 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1347 #ifdef CONFIG_IPV6_SUBTREES
1348 if (matched && saddr)
1349 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1357 /* Helper function to find the cached rt in the hash table
1358 * and update bucket pointer to point to the bucket for this
1359 * (daddr, saddr) pair
1360 * Caller must hold rcu_read_lock()
1362 static struct rt6_exception *
1363 __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1364 const struct in6_addr *daddr,
1365 const struct in6_addr *saddr)
1367 struct rt6_exception *rt6_ex;
1370 WARN_ON_ONCE(!rcu_read_lock_held());
1372 if (!(*bucket) || !daddr)
1375 hval = rt6_exception_hash(daddr, saddr);
1378 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1379 struct rt6_info *rt6 = rt6_ex->rt6i;
1380 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1382 #ifdef CONFIG_IPV6_SUBTREES
1383 if (matched && saddr)
1384 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1392 static unsigned int fib6_mtu(const struct fib6_info *rt)
1396 if (rt->fib6_pmtu) {
1397 mtu = rt->fib6_pmtu;
1399 struct net_device *dev = fib6_info_nh_dev(rt);
1400 struct inet6_dev *idev;
1403 idev = __in6_dev_get(dev);
1404 mtu = idev->cnf.mtu6;
1408 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1410 return mtu - lwtunnel_headroom(rt->fib6_nh.fib_nh_lws, mtu);
1413 static int rt6_insert_exception(struct rt6_info *nrt,
1414 struct fib6_info *ort)
1416 struct net *net = dev_net(nrt->dst.dev);
1417 struct rt6_exception_bucket *bucket;
1418 struct in6_addr *src_key = NULL;
1419 struct rt6_exception *rt6_ex;
1422 spin_lock_bh(&rt6_exception_lock);
1424 if (ort->exception_bucket_flushed) {
1429 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1430 lockdep_is_held(&rt6_exception_lock));
1432 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1438 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1441 #ifdef CONFIG_IPV6_SUBTREES
1442 /* rt6i_src.plen != 0 indicates ort is in subtree
1443 * and exception table is indexed by a hash of
1444 * both rt6i_dst and rt6i_src.
1445 * Otherwise, the exception table is indexed by
1446 * a hash of only rt6i_dst.
1448 if (ort->fib6_src.plen)
1449 src_key = &nrt->rt6i_src.addr;
1451 /* rt6_mtu_change() might lower mtu on ort.
1452 * Only insert this exception route if its mtu
1453 * is less than ort's mtu value.
1455 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
1460 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1463 rt6_remove_exception(bucket, rt6_ex);
1465 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1471 rt6_ex->stamp = jiffies;
1472 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1474 net->ipv6.rt6_stats->fib_rt_cache++;
1476 if (bucket->depth > FIB6_MAX_DEPTH)
1477 rt6_exception_remove_oldest(bucket);
1480 spin_unlock_bh(&rt6_exception_lock);
1482 /* Update fn->fn_sernum to invalidate all cached dst */
1484 spin_lock_bh(&ort->fib6_table->tb6_lock);
1485 fib6_update_sernum(net, ort);
1486 spin_unlock_bh(&ort->fib6_table->tb6_lock);
1487 fib6_force_start_gc(net);
1493 void rt6_flush_exceptions(struct fib6_info *rt)
1495 struct rt6_exception_bucket *bucket;
1496 struct rt6_exception *rt6_ex;
1497 struct hlist_node *tmp;
1500 spin_lock_bh(&rt6_exception_lock);
1501 /* Prevent rt6_insert_exception() to recreate the bucket list */
1502 rt->exception_bucket_flushed = 1;
1504 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1505 lockdep_is_held(&rt6_exception_lock));
1509 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1510 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1511 rt6_remove_exception(bucket, rt6_ex);
1512 WARN_ON_ONCE(bucket->depth);
1517 spin_unlock_bh(&rt6_exception_lock);
1520 /* Find cached rt in the hash table inside passed in rt
1521 * Caller has to hold rcu_read_lock()
1523 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
1524 struct in6_addr *daddr,
1525 struct in6_addr *saddr)
1527 struct rt6_exception_bucket *bucket;
1528 struct in6_addr *src_key = NULL;
1529 struct rt6_exception *rt6_ex;
1530 struct rt6_info *res = NULL;
1532 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1534 #ifdef CONFIG_IPV6_SUBTREES
1535 /* rt6i_src.plen != 0 indicates rt is in subtree
1536 * and exception table is indexed by a hash of
1537 * both rt6i_dst and rt6i_src.
1538 * Otherwise, the exception table is indexed by
1539 * a hash of only rt6i_dst.
1541 if (rt->fib6_src.plen)
1544 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1546 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1552 /* Remove the passed in cached rt from the hash table that contains it */
1553 static int rt6_remove_exception_rt(struct rt6_info *rt)
1555 struct rt6_exception_bucket *bucket;
1556 struct in6_addr *src_key = NULL;
1557 struct rt6_exception *rt6_ex;
1558 struct fib6_info *from;
1561 from = rcu_dereference(rt->from);
1563 !(rt->rt6i_flags & RTF_CACHE))
1566 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1569 spin_lock_bh(&rt6_exception_lock);
1570 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1571 lockdep_is_held(&rt6_exception_lock));
1572 #ifdef CONFIG_IPV6_SUBTREES
1573 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1574 * and exception table is indexed by a hash of
1575 * both rt6i_dst and rt6i_src.
1576 * Otherwise, the exception table is indexed by
1577 * a hash of only rt6i_dst.
1579 if (from->fib6_src.plen)
1580 src_key = &rt->rt6i_src.addr;
1582 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1586 rt6_remove_exception(bucket, rt6_ex);
1592 spin_unlock_bh(&rt6_exception_lock);
1596 /* Find rt6_ex which contains the passed in rt cache and
1599 static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1601 struct rt6_exception_bucket *bucket;
1602 struct in6_addr *src_key = NULL;
1603 struct rt6_exception *rt6_ex;
1604 struct fib6_info *from;
1607 from = rcu_dereference(rt->from);
1608 if (!from || !(rt->rt6i_flags & RTF_CACHE))
1611 bucket = rcu_dereference(from->rt6i_exception_bucket);
1613 #ifdef CONFIG_IPV6_SUBTREES
1614 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1615 * and exception table is indexed by a hash of
1616 * both rt6i_dst and rt6i_src.
1617 * Otherwise, the exception table is indexed by
1618 * a hash of only rt6i_dst.
1620 if (from->fib6_src.plen)
1621 src_key = &rt->rt6i_src.addr;
1623 rt6_ex = __rt6_find_exception_rcu(&bucket,
1627 rt6_ex->stamp = jiffies;
1633 static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1634 struct rt6_info *rt, int mtu)
1636 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1637 * lowest MTU in the path: always allow updating the route PMTU to
1638 * reflect PMTU decreases.
1640 * If the new MTU is higher, and the route PMTU is equal to the local
1641 * MTU, this means the old MTU is the lowest in the path, so allow
1642 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1646 if (dst_mtu(&rt->dst) >= mtu)
1649 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1655 static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1656 struct fib6_info *rt, int mtu)
1658 struct rt6_exception_bucket *bucket;
1659 struct rt6_exception *rt6_ex;
1662 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1663 lockdep_is_held(&rt6_exception_lock));
1668 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1669 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1670 struct rt6_info *entry = rt6_ex->rt6i;
1672 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1673 * route), the metrics of its rt->from have already
1676 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
1677 rt6_mtu_change_route_allowed(idev, entry, mtu))
1678 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
1684 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1686 static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
1687 struct in6_addr *gateway)
1689 struct rt6_exception_bucket *bucket;
1690 struct rt6_exception *rt6_ex;
1691 struct hlist_node *tmp;
1694 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1697 spin_lock_bh(&rt6_exception_lock);
1698 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1699 lockdep_is_held(&rt6_exception_lock));
1702 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1703 hlist_for_each_entry_safe(rt6_ex, tmp,
1704 &bucket->chain, hlist) {
1705 struct rt6_info *entry = rt6_ex->rt6i;
1707 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1708 RTF_CACHE_GATEWAY &&
1709 ipv6_addr_equal(gateway,
1710 &entry->rt6i_gateway)) {
1711 rt6_remove_exception(bucket, rt6_ex);
1718 spin_unlock_bh(&rt6_exception_lock);
1721 static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1722 struct rt6_exception *rt6_ex,
1723 struct fib6_gc_args *gc_args,
1726 struct rt6_info *rt = rt6_ex->rt6i;
1728 /* we are pruning and obsoleting aged-out and non gateway exceptions
1729 * even if others have still references to them, so that on next
1730 * dst_check() such references can be dropped.
1731 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1732 * expired, independently from their aging, as per RFC 8201 section 4
1734 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1735 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1736 RT6_TRACE("aging clone %p\n", rt);
1737 rt6_remove_exception(bucket, rt6_ex);
1740 } else if (time_after(jiffies, rt->dst.expires)) {
1741 RT6_TRACE("purging expired route %p\n", rt);
1742 rt6_remove_exception(bucket, rt6_ex);
1746 if (rt->rt6i_flags & RTF_GATEWAY) {
1747 struct neighbour *neigh;
1748 __u8 neigh_flags = 0;
1750 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1752 neigh_flags = neigh->flags;
1754 if (!(neigh_flags & NTF_ROUTER)) {
1755 RT6_TRACE("purging route %p via non-router but gateway\n",
1757 rt6_remove_exception(bucket, rt6_ex);
1765 void rt6_age_exceptions(struct fib6_info *rt,
1766 struct fib6_gc_args *gc_args,
1769 struct rt6_exception_bucket *bucket;
1770 struct rt6_exception *rt6_ex;
1771 struct hlist_node *tmp;
1774 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1778 spin_lock(&rt6_exception_lock);
1779 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1780 lockdep_is_held(&rt6_exception_lock));
1783 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1784 hlist_for_each_entry_safe(rt6_ex, tmp,
1785 &bucket->chain, hlist) {
1786 rt6_age_examine_exception(bucket, rt6_ex,
1792 spin_unlock(&rt6_exception_lock);
1793 rcu_read_unlock_bh();
1796 /* must be called with rcu lock held */
1797 struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1798 int oif, struct flowi6 *fl6, int strict)
1800 struct fib6_node *fn, *saved_fn;
1801 struct fib6_info *f6i;
1803 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1806 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1810 f6i = rt6_select(net, fn, oif, strict);
1811 if (f6i == net->ipv6.fib6_null_entry) {
1812 fn = fib6_backtrack(fn, &fl6->saddr);
1814 goto redo_rt6_select;
1815 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1816 /* also consider unreachable route */
1817 strict &= ~RT6_LOOKUP_F_REACHABLE;
1819 goto redo_rt6_select;
1823 trace_fib6_table_lookup(net, f6i, table, fl6);
1828 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1829 int oif, struct flowi6 *fl6,
1830 const struct sk_buff *skb, int flags)
1832 struct fib6_info *f6i;
1833 struct rt6_info *rt;
1836 strict |= flags & RT6_LOOKUP_F_IFACE;
1837 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1838 if (net->ipv6.devconf_all->forwarding == 0)
1839 strict |= RT6_LOOKUP_F_REACHABLE;
1843 f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1844 if (f6i->fib6_nsiblings)
1845 f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
1847 if (f6i == net->ipv6.fib6_null_entry) {
1848 rt = net->ipv6.ip6_null_entry;
1854 /*Search through exception table */
1855 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1857 if (ip6_hold_safe(net, &rt))
1858 dst_use_noref(&rt->dst, jiffies);
1862 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1863 !f6i->fib6_nh.fib_nh_has_gw)) {
1864 /* Create a RTF_CACHE clone which will not be
1865 * owned by the fib6 tree. It is for the special case where
1866 * the daddr in the skb during the neighbor look-up is different
1867 * from the fl6->daddr used to look-up route here.
1869 struct rt6_info *uncached_rt;
1871 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
1876 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1877 * No need for another dst_hold()
1879 rt6_uncached_list_add(uncached_rt);
1880 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1882 uncached_rt = net->ipv6.ip6_null_entry;
1883 dst_hold(&uncached_rt->dst);
1888 /* Get a percpu copy */
1890 struct rt6_info *pcpu_rt;
1893 pcpu_rt = rt6_get_pcpu_route(f6i);
1896 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1904 EXPORT_SYMBOL_GPL(ip6_pol_route);
1906 static struct rt6_info *ip6_pol_route_input(struct net *net,
1907 struct fib6_table *table,
1909 const struct sk_buff *skb,
1912 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
1915 struct dst_entry *ip6_route_input_lookup(struct net *net,
1916 struct net_device *dev,
1918 const struct sk_buff *skb,
1921 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1922 flags |= RT6_LOOKUP_F_IFACE;
1924 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
1926 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1928 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1929 struct flow_keys *keys,
1930 struct flow_keys *flkeys)
1932 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1933 const struct ipv6hdr *key_iph = outer_iph;
1934 struct flow_keys *_flkeys = flkeys;
1935 const struct ipv6hdr *inner_iph;
1936 const struct icmp6hdr *icmph;
1937 struct ipv6hdr _inner_iph;
1938 struct icmp6hdr _icmph;
1940 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1943 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1944 sizeof(_icmph), &_icmph);
1948 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1949 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1950 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1951 icmph->icmp6_type != ICMPV6_PARAMPROB)
1954 inner_iph = skb_header_pointer(skb,
1955 skb_transport_offset(skb) + sizeof(*icmph),
1956 sizeof(_inner_iph), &_inner_iph);
1960 key_iph = inner_iph;
1964 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1965 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1966 keys->tags.flow_label = _flkeys->tags.flow_label;
1967 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1969 keys->addrs.v6addrs.src = key_iph->saddr;
1970 keys->addrs.v6addrs.dst = key_iph->daddr;
1971 keys->tags.flow_label = ip6_flowlabel(key_iph);
1972 keys->basic.ip_proto = key_iph->nexthdr;
1976 /* if skb is set it will be used and fl6 can be NULL */
1977 u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1978 const struct sk_buff *skb, struct flow_keys *flkeys)
1980 struct flow_keys hash_keys;
1983 switch (ip6_multipath_hash_policy(net)) {
1985 memset(&hash_keys, 0, sizeof(hash_keys));
1986 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1988 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1990 hash_keys.addrs.v6addrs.src = fl6->saddr;
1991 hash_keys.addrs.v6addrs.dst = fl6->daddr;
1992 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
1993 hash_keys.basic.ip_proto = fl6->flowi6_proto;
1998 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1999 struct flow_keys keys;
2001 /* short-circuit if we already have L4 hash present */
2003 return skb_get_hash_raw(skb) >> 1;
2005 memset(&hash_keys, 0, sizeof(hash_keys));
2008 skb_flow_dissect_flow_keys(skb, &keys, flag);
2011 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2012 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2013 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2014 hash_keys.ports.src = flkeys->ports.src;
2015 hash_keys.ports.dst = flkeys->ports.dst;
2016 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2018 memset(&hash_keys, 0, sizeof(hash_keys));
2019 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2020 hash_keys.addrs.v6addrs.src = fl6->saddr;
2021 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2022 hash_keys.ports.src = fl6->fl6_sport;
2023 hash_keys.ports.dst = fl6->fl6_dport;
2024 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2028 mhash = flow_hash_from_keys(&hash_keys);
2033 void ip6_route_input(struct sk_buff *skb)
2035 const struct ipv6hdr *iph = ipv6_hdr(skb);
2036 struct net *net = dev_net(skb->dev);
2037 int flags = RT6_LOOKUP_F_HAS_SADDR;
2038 struct ip_tunnel_info *tun_info;
2039 struct flowi6 fl6 = {
2040 .flowi6_iif = skb->dev->ifindex,
2041 .daddr = iph->daddr,
2042 .saddr = iph->saddr,
2043 .flowlabel = ip6_flowinfo(iph),
2044 .flowi6_mark = skb->mark,
2045 .flowi6_proto = iph->nexthdr,
2047 struct flow_keys *flkeys = NULL, _flkeys;
2049 tun_info = skb_tunnel_info(skb);
2050 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2051 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
2053 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2056 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2057 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2060 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
2063 static struct rt6_info *ip6_pol_route_output(struct net *net,
2064 struct fib6_table *table,
2066 const struct sk_buff *skb,
2069 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2072 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2073 struct flowi6 *fl6, int flags)
2077 if (ipv6_addr_type(&fl6->daddr) &
2078 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
2079 struct dst_entry *dst;
2081 dst = l3mdev_link_scope_lookup(net, fl6);
2086 fl6->flowi6_iif = LOOPBACK_IFINDEX;
2088 any_src = ipv6_addr_any(&fl6->saddr);
2089 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2090 (fl6->flowi6_oif && any_src))
2091 flags |= RT6_LOOKUP_F_IFACE;
2094 flags |= RT6_LOOKUP_F_HAS_SADDR;
2096 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2098 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2100 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2102 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2104 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
2105 struct net_device *loopback_dev = net->loopback_dev;
2106 struct dst_entry *new = NULL;
2108 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
2109 DST_OBSOLETE_DEAD, 0);
2112 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
2116 new->input = dst_discard;
2117 new->output = dst_discard_out;
2119 dst_copy_metrics(new, &ort->dst);
2121 rt->rt6i_idev = in6_dev_get(loopback_dev);
2122 rt->rt6i_gateway = ort->rt6i_gateway;
2123 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
2125 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2126 #ifdef CONFIG_IPV6_SUBTREES
2127 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2131 dst_release(dst_orig);
2132 return new ? new : ERR_PTR(-ENOMEM);
2136 * Destination cache support functions
2139 static bool fib6_check(struct fib6_info *f6i, u32 cookie)
2143 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
2146 if (fib6_check_expired(f6i))
2152 static struct dst_entry *rt6_check(struct rt6_info *rt,
2153 struct fib6_info *from,
2158 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
2159 rt_cookie != cookie)
2162 if (rt6_check_expired(rt))
2168 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2169 struct fib6_info *from,
2172 if (!__rt6_check_expired(rt) &&
2173 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2174 fib6_check(from, cookie))
2180 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2182 struct dst_entry *dst_ret;
2183 struct fib6_info *from;
2184 struct rt6_info *rt;
2186 rt = container_of(dst, struct rt6_info, dst);
2190 /* All IPV6 dsts are created with ->obsolete set to the value
2191 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2192 * into this function always.
2195 from = rcu_dereference(rt->from);
2197 if (from && (rt->rt6i_flags & RTF_PCPU ||
2198 unlikely(!list_empty(&rt->rt6i_uncached))))
2199 dst_ret = rt6_dst_from_check(rt, from, cookie);
2201 dst_ret = rt6_check(rt, from, cookie);
2208 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2210 struct rt6_info *rt = (struct rt6_info *) dst;
2213 if (rt->rt6i_flags & RTF_CACHE) {
2215 if (rt6_check_expired(rt)) {
2216 rt6_remove_exception_rt(rt);
2228 static void ip6_link_failure(struct sk_buff *skb)
2230 struct rt6_info *rt;
2232 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
2234 rt = (struct rt6_info *) skb_dst(skb);
2237 if (rt->rt6i_flags & RTF_CACHE) {
2238 rt6_remove_exception_rt(rt);
2240 struct fib6_info *from;
2241 struct fib6_node *fn;
2243 from = rcu_dereference(rt->from);
2245 fn = rcu_dereference(from->fib6_node);
2246 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2254 static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2256 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2257 struct fib6_info *from;
2260 from = rcu_dereference(rt0->from);
2262 rt0->dst.expires = from->expires;
2266 dst_set_expires(&rt0->dst, timeout);
2267 rt0->rt6i_flags |= RTF_EXPIRES;
2270 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2272 struct net *net = dev_net(rt->dst.dev);
2274 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
2275 rt->rt6i_flags |= RTF_MODIFIED;
2276 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2279 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2281 return !(rt->rt6i_flags & RTF_CACHE) &&
2282 (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
2285 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2286 const struct ipv6hdr *iph, u32 mtu)
2288 const struct in6_addr *daddr, *saddr;
2289 struct rt6_info *rt6 = (struct rt6_info *)dst;
2291 if (dst_metric_locked(dst, RTAX_MTU))
2295 daddr = &iph->daddr;
2296 saddr = &iph->saddr;
2298 daddr = &sk->sk_v6_daddr;
2299 saddr = &inet6_sk(sk)->saddr;
2304 dst_confirm_neigh(dst, daddr);
2305 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2306 if (mtu >= dst_mtu(dst))
2309 if (!rt6_cache_allowed_for_pmtu(rt6)) {
2310 rt6_do_update_pmtu(rt6, mtu);
2311 /* update rt6_ex->stamp for cache */
2312 if (rt6->rt6i_flags & RTF_CACHE)
2313 rt6_update_exception_stamp_rt(rt6);
2315 struct fib6_info *from;
2316 struct rt6_info *nrt6;
2319 from = rcu_dereference(rt6->from);
2320 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
2322 rt6_do_update_pmtu(nrt6, mtu);
2323 if (rt6_insert_exception(nrt6, from))
2324 dst_release_immediate(&nrt6->dst);
2330 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2331 struct sk_buff *skb, u32 mtu)
2333 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2336 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2337 int oif, u32 mark, kuid_t uid)
2339 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2340 struct dst_entry *dst;
2341 struct flowi6 fl6 = {
2343 .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2344 .daddr = iph->daddr,
2345 .saddr = iph->saddr,
2346 .flowlabel = ip6_flowinfo(iph),
2350 dst = ip6_route_output(net, NULL, &fl6);
2352 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
2355 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2357 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2359 int oif = sk->sk_bound_dev_if;
2360 struct dst_entry *dst;
2362 if (!oif && skb->dev)
2363 oif = l3mdev_master_ifindex(skb->dev);
2365 ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
2367 dst = __sk_dst_get(sk);
2368 if (!dst || !dst->obsolete ||
2369 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2373 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2374 ip6_datagram_dst_update(sk, false);
2377 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2379 void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2380 const struct flowi6 *fl6)
2382 #ifdef CONFIG_IPV6_SUBTREES
2383 struct ipv6_pinfo *np = inet6_sk(sk);
2386 ip6_dst_store(sk, dst,
2387 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2388 &sk->sk_v6_daddr : NULL,
2389 #ifdef CONFIG_IPV6_SUBTREES
2390 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2396 /* Handle redirects */
2397 struct ip6rd_flowi {
2399 struct in6_addr gateway;
2402 static struct rt6_info *__ip6_route_redirect(struct net *net,
2403 struct fib6_table *table,
2405 const struct sk_buff *skb,
2408 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2409 struct rt6_info *ret = NULL, *rt_cache;
2410 struct fib6_info *rt;
2411 struct fib6_node *fn;
2413 /* Get the "current" route for this destination and
2414 * check if the redirect has come from appropriate router.
2416 * RFC 4861 specifies that redirects should only be
2417 * accepted if they come from the nexthop to the target.
2418 * Due to the way the routes are chosen, this notion
2419 * is a bit fuzzy and one might need to check all possible
2424 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2426 for_each_fib6_node_rt_rcu(fn) {
2427 if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
2429 if (fib6_check_expired(rt))
2431 if (rt->fib6_flags & RTF_REJECT)
2433 if (!rt->fib6_nh.fib_nh_has_gw)
2435 if (fl6->flowi6_oif != rt->fib6_nh.fib_nh_dev->ifindex)
2437 /* rt_cache's gateway might be different from its 'parent'
2438 * in the case of an ip redirect.
2439 * So we keep searching in the exception table if the gateway
2442 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.fib_nh_gw6)) {
2443 rt_cache = rt6_find_cached_rt(rt,
2447 ipv6_addr_equal(&rdfl->gateway,
2448 &rt_cache->rt6i_gateway)) {
2458 rt = net->ipv6.fib6_null_entry;
2459 else if (rt->fib6_flags & RTF_REJECT) {
2460 ret = net->ipv6.ip6_null_entry;
2464 if (rt == net->ipv6.fib6_null_entry) {
2465 fn = fib6_backtrack(fn, &fl6->saddr);
2472 ip6_hold_safe(net, &ret);
2474 ret = ip6_create_rt_rcu(rt);
2478 trace_fib6_table_lookup(net, rt, table, fl6);
2482 static struct dst_entry *ip6_route_redirect(struct net *net,
2483 const struct flowi6 *fl6,
2484 const struct sk_buff *skb,
2485 const struct in6_addr *gateway)
2487 int flags = RT6_LOOKUP_F_HAS_SADDR;
2488 struct ip6rd_flowi rdfl;
2491 rdfl.gateway = *gateway;
2493 return fib6_rule_lookup(net, &rdfl.fl6, skb,
2494 flags, __ip6_route_redirect);
2497 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2500 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2501 struct dst_entry *dst;
2502 struct flowi6 fl6 = {
2503 .flowi6_iif = LOOPBACK_IFINDEX,
2505 .flowi6_mark = mark,
2506 .daddr = iph->daddr,
2507 .saddr = iph->saddr,
2508 .flowlabel = ip6_flowinfo(iph),
2512 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
2513 rt6_do_redirect(dst, NULL, skb);
2516 EXPORT_SYMBOL_GPL(ip6_redirect);
2518 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
2520 const struct ipv6hdr *iph = ipv6_hdr(skb);
2521 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2522 struct dst_entry *dst;
2523 struct flowi6 fl6 = {
2524 .flowi6_iif = LOOPBACK_IFINDEX,
2527 .saddr = iph->daddr,
2528 .flowi6_uid = sock_net_uid(net, NULL),
2531 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
2532 rt6_do_redirect(dst, NULL, skb);
2536 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2538 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2541 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2543 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
2545 struct net_device *dev = dst->dev;
2546 unsigned int mtu = dst_mtu(dst);
2547 struct net *net = dev_net(dev);
2549 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2551 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2552 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
2555 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2556 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2557 * IPV6_MAXPLEN is also valid and means: "any MSS,
2558 * rely only on pmtu discovery"
2560 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2565 static unsigned int ip6_mtu(const struct dst_entry *dst)
2567 struct inet6_dev *idev;
2570 mtu = dst_metric_raw(dst, RTAX_MTU);
2577 idev = __in6_dev_get(dst->dev);
2579 mtu = idev->cnf.mtu6;
2583 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2585 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2589 * 1. mtu on route is locked - use it
2590 * 2. mtu from nexthop exception
2591 * 3. mtu from egress device
2593 * based on ip6_dst_mtu_forward and exception logic of
2594 * rt6_find_cached_rt; called with rcu_read_lock
2596 u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2597 struct in6_addr *saddr)
2599 struct rt6_exception_bucket *bucket;
2600 struct rt6_exception *rt6_ex;
2601 struct in6_addr *src_key;
2602 struct inet6_dev *idev;
2605 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2606 mtu = f6i->fib6_pmtu;
2612 #ifdef CONFIG_IPV6_SUBTREES
2613 if (f6i->fib6_src.plen)
2617 bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2618 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2619 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2620 mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2623 struct net_device *dev = fib6_info_nh_dev(f6i);
2626 idev = __in6_dev_get(dev);
2627 if (idev && idev->cnf.mtu6 > mtu)
2628 mtu = idev->cnf.mtu6;
2631 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2633 return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2636 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2639 struct dst_entry *dst;
2640 struct rt6_info *rt;
2641 struct inet6_dev *idev = in6_dev_get(dev);
2642 struct net *net = dev_net(dev);
2644 if (unlikely(!idev))
2645 return ERR_PTR(-ENODEV);
2647 rt = ip6_dst_alloc(net, dev, 0);
2648 if (unlikely(!rt)) {
2650 dst = ERR_PTR(-ENOMEM);
2654 rt->dst.flags |= DST_HOST;
2655 rt->dst.input = ip6_input;
2656 rt->dst.output = ip6_output;
2657 rt->rt6i_gateway = fl6->daddr;
2658 rt->rt6i_dst.addr = fl6->daddr;
2659 rt->rt6i_dst.plen = 128;
2660 rt->rt6i_idev = idev;
2661 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
2663 /* Add this dst into uncached_list so that rt6_disable_ip() can
2664 * do proper release of the net_device
2666 rt6_uncached_list_add(rt);
2667 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2669 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2675 static int ip6_dst_gc(struct dst_ops *ops)
2677 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
2678 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2679 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2680 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2681 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2682 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2685 entries = dst_entries_get_fast(ops);
2686 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2687 entries <= rt_max_size)
2690 net->ipv6.ip6_rt_gc_expire++;
2691 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2692 entries = dst_entries_get_slow(ops);
2693 if (entries < ops->gc_thresh)
2694 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
2696 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2697 return entries > rt_max_size;
2700 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2701 struct fib6_config *cfg,
2702 const struct in6_addr *gw_addr,
2703 u32 tbid, int flags)
2705 struct flowi6 fl6 = {
2706 .flowi6_oif = cfg->fc_ifindex,
2708 .saddr = cfg->fc_prefsrc,
2710 struct fib6_table *table;
2711 struct rt6_info *rt;
2713 table = fib6_get_table(net, tbid);
2717 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2718 flags |= RT6_LOOKUP_F_HAS_SADDR;
2720 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
2721 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
2723 /* if table lookup failed, fall back to full lookup */
2724 if (rt == net->ipv6.ip6_null_entry) {
2732 static int ip6_route_check_nh_onlink(struct net *net,
2733 struct fib6_config *cfg,
2734 const struct net_device *dev,
2735 struct netlink_ext_ack *extack)
2737 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
2738 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2739 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2740 struct fib6_info *from;
2741 struct rt6_info *grt;
2745 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2748 from = rcu_dereference(grt->from);
2749 if (!grt->dst.error &&
2750 /* ignore match if it is the default route */
2751 from && !ipv6_addr_any(&from->fib6_dst.addr) &&
2752 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
2753 NL_SET_ERR_MSG(extack,
2754 "Nexthop has invalid gateway or device mismatch");
2765 static int ip6_route_check_nh(struct net *net,
2766 struct fib6_config *cfg,
2767 struct net_device **_dev,
2768 struct inet6_dev **idev)
2770 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2771 struct net_device *dev = _dev ? *_dev : NULL;
2772 struct rt6_info *grt = NULL;
2773 int err = -EHOSTUNREACH;
2775 if (cfg->fc_table) {
2776 int flags = RT6_LOOKUP_F_IFACE;
2778 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2779 cfg->fc_table, flags);
2781 if (grt->rt6i_flags & RTF_GATEWAY ||
2782 (dev && dev != grt->dst.dev)) {
2790 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
2796 if (dev != grt->dst.dev) {
2801 *_dev = dev = grt->dst.dev;
2802 *idev = grt->rt6i_idev;
2804 in6_dev_hold(grt->rt6i_idev);
2807 if (!(grt->rt6i_flags & RTF_GATEWAY))
2816 static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2817 struct net_device **_dev, struct inet6_dev **idev,
2818 struct netlink_ext_ack *extack)
2820 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2821 int gwa_type = ipv6_addr_type(gw_addr);
2822 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
2823 const struct net_device *dev = *_dev;
2824 bool need_addr_check = !dev;
2827 /* if gw_addr is local we will fail to detect this in case
2828 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2829 * will return already-added prefix route via interface that
2830 * prefix route was assigned to, which might be non-loopback.
2833 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2834 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2838 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2839 /* IPv6 strictly inhibits using not link-local
2840 * addresses as nexthop address.
2841 * Otherwise, router will not able to send redirects.
2842 * It is very good, but in some (rare!) circumstances
2843 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2844 * some exceptions. --ANK
2845 * We allow IPv4-mapped nexthops to support RFC4798-type
2848 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2849 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2853 if (cfg->fc_flags & RTNH_F_ONLINK)
2854 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2856 err = ip6_route_check_nh(net, cfg, _dev, idev);
2862 /* reload in case device was changed */
2867 NL_SET_ERR_MSG(extack, "Egress device not specified");
2869 } else if (dev->flags & IFF_LOOPBACK) {
2870 NL_SET_ERR_MSG(extack,
2871 "Egress device can not be loopback device for this route");
2875 /* if we did not check gw_addr above, do so now that the
2876 * egress device has been resolved.
2878 if (need_addr_check &&
2879 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2880 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2889 static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
2891 if ((flags & RTF_REJECT) ||
2892 (dev && (dev->flags & IFF_LOOPBACK) &&
2893 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2894 !(flags & RTF_LOCAL)))
2900 int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
2901 struct fib6_config *cfg, gfp_t gfp_flags,
2902 struct netlink_ext_ack *extack)
2904 struct net_device *dev = NULL;
2905 struct inet6_dev *idev = NULL;
2909 fib6_nh->fib_nh_family = AF_INET6;
2912 if (cfg->fc_ifindex) {
2913 dev = dev_get_by_index(net, cfg->fc_ifindex);
2916 idev = in6_dev_get(dev);
2921 if (cfg->fc_flags & RTNH_F_ONLINK) {
2923 NL_SET_ERR_MSG(extack,
2924 "Nexthop device required for onlink");
2928 if (!(dev->flags & IFF_UP)) {
2929 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2934 fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
2937 fib6_nh->fib_nh_weight = 1;
2939 /* We cannot add true routes via loopback here,
2940 * they would result in kernel looping; promote them to reject routes
2942 addr_type = ipv6_addr_type(&cfg->fc_dst);
2943 if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
2944 /* hold loopback dev/idev if we haven't done so. */
2945 if (dev != net->loopback_dev) {
2950 dev = net->loopback_dev;
2952 idev = in6_dev_get(dev);
2961 if (cfg->fc_flags & RTF_GATEWAY) {
2962 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
2966 fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
2967 fib6_nh->fib_nh_has_gw = 1;
2974 if (idev->cnf.disable_ipv6) {
2975 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
2980 if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
2981 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2986 if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
2987 !netif_carrier_ok(dev))
2988 fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
2990 err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
2991 cfg->fc_encap_type, cfg, gfp_flags, extack);
2995 fib6_nh->fib_nh_dev = dev;
2996 fib6_nh->fib_nh_oif = dev->ifindex;
3003 lwtstate_put(fib6_nh->fib_nh_lws);
3004 fib6_nh->fib_nh_lws = NULL;
3012 void fib6_nh_release(struct fib6_nh *fib6_nh)
3014 fib_nh_common_release(&fib6_nh->nh_common);
3017 static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
3019 struct netlink_ext_ack *extack)
3021 struct net *net = cfg->fc_nlinfo.nl_net;
3022 struct fib6_info *rt = NULL;
3023 struct fib6_table *table;
3027 /* RTF_PCPU is an internal flag; can not be set by userspace */
3028 if (cfg->fc_flags & RTF_PCPU) {
3029 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
3033 /* RTF_CACHE is an internal flag; can not be set by userspace */
3034 if (cfg->fc_flags & RTF_CACHE) {
3035 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3039 if (cfg->fc_type > RTN_MAX) {
3040 NL_SET_ERR_MSG(extack, "Invalid route type");
3044 if (cfg->fc_dst_len > 128) {
3045 NL_SET_ERR_MSG(extack, "Invalid prefix length");
3048 if (cfg->fc_src_len > 128) {
3049 NL_SET_ERR_MSG(extack, "Invalid source address length");
3052 #ifndef CONFIG_IPV6_SUBTREES
3053 if (cfg->fc_src_len) {
3054 NL_SET_ERR_MSG(extack,
3055 "Specifying source address requires IPV6_SUBTREES to be enabled");
3061 if (cfg->fc_nlinfo.nlh &&
3062 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
3063 table = fib6_get_table(net, cfg->fc_table);
3065 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
3066 table = fib6_new_table(net, cfg->fc_table);
3069 table = fib6_new_table(net, cfg->fc_table);
3076 rt = fib6_info_alloc(gfp_flags);
3080 rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3082 if (IS_ERR(rt->fib6_metrics)) {
3083 err = PTR_ERR(rt->fib6_metrics);
3084 /* Do not leave garbage there. */
3085 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
3089 if (cfg->fc_flags & RTF_ADDRCONF)
3090 rt->dst_nocount = true;
3092 if (cfg->fc_flags & RTF_EXPIRES)
3093 fib6_set_expires(rt, jiffies +
3094 clock_t_to_jiffies(cfg->fc_expires));
3096 fib6_clean_expires(rt);
3098 if (cfg->fc_protocol == RTPROT_UNSPEC)
3099 cfg->fc_protocol = RTPROT_BOOT;
3100 rt->fib6_protocol = cfg->fc_protocol;
3102 rt->fib6_table = table;
3103 rt->fib6_metric = cfg->fc_metric;
3104 rt->fib6_type = cfg->fc_type;
3105 rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
3107 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3108 rt->fib6_dst.plen = cfg->fc_dst_len;
3109 if (rt->fib6_dst.plen == 128)
3110 rt->dst_host = true;
3112 #ifdef CONFIG_IPV6_SUBTREES
3113 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3114 rt->fib6_src.plen = cfg->fc_src_len;
3116 err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
3120 /* We cannot add true routes via loopback here,
3121 * they would result in kernel looping; promote them to reject routes
3123 addr_type = ipv6_addr_type(&cfg->fc_dst);
3124 if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
3125 rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
3127 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3128 struct net_device *dev = fib6_info_nh_dev(rt);
3130 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3131 NL_SET_ERR_MSG(extack, "Invalid source address");
3135 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3136 rt->fib6_prefsrc.plen = 128;
3138 rt->fib6_prefsrc.plen = 0;
3142 fib6_info_release(rt);
3143 return ERR_PTR(err);
3146 int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3147 struct netlink_ext_ack *extack)
3149 struct fib6_info *rt;
3152 rt = ip6_route_info_create(cfg, gfp_flags, extack);
3156 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
3157 fib6_info_release(rt);
3162 static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
3164 struct net *net = info->nl_net;
3165 struct fib6_table *table;
3168 if (rt == net->ipv6.fib6_null_entry) {
3173 table = rt->fib6_table;
3174 spin_lock_bh(&table->tb6_lock);
3175 err = fib6_del(rt, info);
3176 spin_unlock_bh(&table->tb6_lock);
3179 fib6_info_release(rt);
3183 int ip6_del_rt(struct net *net, struct fib6_info *rt)
3185 struct nl_info info = { .nl_net = net };
3187 return __ip6_del_rt(rt, &info);
3190 static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
3192 struct nl_info *info = &cfg->fc_nlinfo;
3193 struct net *net = info->nl_net;
3194 struct sk_buff *skb = NULL;
3195 struct fib6_table *table;
3198 if (rt == net->ipv6.fib6_null_entry)
3200 table = rt->fib6_table;
3201 spin_lock_bh(&table->tb6_lock);
3203 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
3204 struct fib6_info *sibling, *next_sibling;
3206 /* prefer to send a single notification with all hops */
3207 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3209 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3211 if (rt6_fill_node(net, skb, rt, NULL,
3212 NULL, NULL, 0, RTM_DELROUTE,
3213 info->portid, seq, 0) < 0) {
3217 info->skip_notify = 1;
3220 list_for_each_entry_safe(sibling, next_sibling,
3223 err = fib6_del(sibling, info);
3229 err = fib6_del(rt, info);
3231 spin_unlock_bh(&table->tb6_lock);
3233 fib6_info_release(rt);
3236 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3237 info->nlh, gfp_any());
3242 static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3246 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3249 if (cfg->fc_flags & RTF_GATEWAY &&
3250 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3253 rc = rt6_remove_exception_rt(rt);
3258 static int ip6_route_del(struct fib6_config *cfg,
3259 struct netlink_ext_ack *extack)
3261 struct rt6_info *rt_cache;
3262 struct fib6_table *table;
3263 struct fib6_info *rt;
3264 struct fib6_node *fn;
3267 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3269 NL_SET_ERR_MSG(extack, "FIB table does not exist");
3275 fn = fib6_locate(&table->tb6_root,
3276 &cfg->fc_dst, cfg->fc_dst_len,
3277 &cfg->fc_src, cfg->fc_src_len,
3278 !(cfg->fc_flags & RTF_CACHE));
3281 for_each_fib6_node_rt_rcu(fn) {
3284 if (cfg->fc_flags & RTF_CACHE) {
3287 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3290 rc = ip6_del_cached_rt(rt_cache, cfg);
3300 if (cfg->fc_ifindex &&
3302 nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
3304 if (cfg->fc_flags & RTF_GATEWAY &&
3305 !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
3307 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
3309 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
3311 if (!fib6_info_hold_safe(rt))
3315 /* if gateway was specified only delete the one hop */
3316 if (cfg->fc_flags & RTF_GATEWAY)
3317 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3319 return __ip6_del_rt_siblings(rt, cfg);
3327 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
3329 struct netevent_redirect netevent;
3330 struct rt6_info *rt, *nrt = NULL;
3331 struct ndisc_options ndopts;
3332 struct inet6_dev *in6_dev;
3333 struct neighbour *neigh;
3334 struct fib6_info *from;
3336 int optlen, on_link;
3339 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
3340 optlen -= sizeof(*msg);
3343 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
3347 msg = (struct rd_msg *)icmp6_hdr(skb);
3349 if (ipv6_addr_is_multicast(&msg->dest)) {
3350 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
3355 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
3357 } else if (ipv6_addr_type(&msg->target) !=
3358 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
3359 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
3363 in6_dev = __in6_dev_get(skb->dev);
3366 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3370 * The IP source address of the Redirect MUST be the same as the current
3371 * first-hop router for the specified ICMP Destination Address.
3374 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
3375 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3380 if (ndopts.nd_opts_tgt_lladdr) {
3381 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3384 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3389 rt = (struct rt6_info *) dst;
3390 if (rt->rt6i_flags & RTF_REJECT) {
3391 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
3395 /* Redirect received -> path was valid.
3396 * Look, redirects are sent only in response to data packets,
3397 * so that this nexthop apparently is reachable. --ANK
3399 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
3401 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
3406 * We have finally decided to accept it.
3409 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
3410 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3411 NEIGH_UPDATE_F_OVERRIDE|
3412 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3413 NEIGH_UPDATE_F_ISROUTER)),
3414 NDISC_REDIRECT, &ndopts);
3417 from = rcu_dereference(rt->from);
3418 /* This fib6_info_hold() is safe here because we hold reference to rt
3419 * and rt already holds reference to fib6_info.
3421 fib6_info_hold(from);
3424 nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
3428 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3430 nrt->rt6i_flags &= ~RTF_GATEWAY;
3432 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
3434 /* No need to remove rt from the exception table if rt is
3435 * a cached route because rt6_insert_exception() will
3438 if (rt6_insert_exception(nrt, from)) {
3439 dst_release_immediate(&nrt->dst);
3443 netevent.old = &rt->dst;
3444 netevent.new = &nrt->dst;
3445 netevent.daddr = &msg->dest;
3446 netevent.neigh = neigh;
3447 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3450 fib6_info_release(from);
3451 neigh_release(neigh);
3454 #ifdef CONFIG_IPV6_ROUTE_INFO
3455 static struct fib6_info *rt6_get_route_info(struct net *net,
3456 const struct in6_addr *prefix, int prefixlen,
3457 const struct in6_addr *gwaddr,
3458 struct net_device *dev)
3460 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3461 int ifindex = dev->ifindex;
3462 struct fib6_node *fn;
3463 struct fib6_info *rt = NULL;
3464 struct fib6_table *table;
3466 table = fib6_get_table(net, tb_id);
3471 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
3475 for_each_fib6_node_rt_rcu(fn) {
3476 if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
3478 if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
3479 !rt->fib6_nh.fib_nh_has_gw)
3481 if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
3483 if (!fib6_info_hold_safe(rt))
3492 static struct fib6_info *rt6_add_route_info(struct net *net,
3493 const struct in6_addr *prefix, int prefixlen,
3494 const struct in6_addr *gwaddr,
3495 struct net_device *dev,
3498 struct fib6_config cfg = {
3499 .fc_metric = IP6_RT_PRIO_USER,
3500 .fc_ifindex = dev->ifindex,
3501 .fc_dst_len = prefixlen,
3502 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3503 RTF_UP | RTF_PREF(pref),
3504 .fc_protocol = RTPROT_RA,
3505 .fc_type = RTN_UNICAST,
3506 .fc_nlinfo.portid = 0,
3507 .fc_nlinfo.nlh = NULL,
3508 .fc_nlinfo.nl_net = net,
3511 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
3512 cfg.fc_dst = *prefix;
3513 cfg.fc_gateway = *gwaddr;
3515 /* We should treat it as a default route if prefix length is 0. */
3517 cfg.fc_flags |= RTF_DEFAULT;
3519 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
3521 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
3525 struct fib6_info *rt6_get_dflt_router(struct net *net,
3526 const struct in6_addr *addr,
3527 struct net_device *dev)
3529 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
3530 struct fib6_info *rt;
3531 struct fib6_table *table;
3533 table = fib6_get_table(net, tb_id);
3538 for_each_fib6_node_rt_rcu(&table->tb6_root) {
3539 struct fib6_nh *nh = &rt->fib6_nh;
3541 if (dev == nh->fib_nh_dev &&
3542 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
3543 ipv6_addr_equal(&nh->fib_nh_gw6, addr))
3546 if (rt && !fib6_info_hold_safe(rt))
3552 struct fib6_info *rt6_add_dflt_router(struct net *net,
3553 const struct in6_addr *gwaddr,
3554 struct net_device *dev,
3557 struct fib6_config cfg = {
3558 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3559 .fc_metric = IP6_RT_PRIO_USER,
3560 .fc_ifindex = dev->ifindex,
3561 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3562 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3563 .fc_protocol = RTPROT_RA,
3564 .fc_type = RTN_UNICAST,
3565 .fc_nlinfo.portid = 0,
3566 .fc_nlinfo.nlh = NULL,
3567 .fc_nlinfo.nl_net = net,
3570 cfg.fc_gateway = *gwaddr;
3572 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
3573 struct fib6_table *table;
3575 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3577 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3580 return rt6_get_dflt_router(net, gwaddr, dev);
3583 static void __rt6_purge_dflt_routers(struct net *net,
3584 struct fib6_table *table)
3586 struct fib6_info *rt;
3590 for_each_fib6_node_rt_rcu(&table->tb6_root) {
3591 struct net_device *dev = fib6_info_nh_dev(rt);
3592 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3594 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3595 (!idev || idev->cnf.accept_ra != 2) &&
3596 fib6_info_hold_safe(rt)) {
3598 ip6_del_rt(net, rt);
3604 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3607 void rt6_purge_dflt_routers(struct net *net)
3609 struct fib6_table *table;
3610 struct hlist_head *head;
3615 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3616 head = &net->ipv6.fib_table_hash[h];
3617 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3618 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3619 __rt6_purge_dflt_routers(net, table);
3626 static void rtmsg_to_fib6_config(struct net *net,
3627 struct in6_rtmsg *rtmsg,
3628 struct fib6_config *cfg)
3630 *cfg = (struct fib6_config){
3631 .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3633 .fc_ifindex = rtmsg->rtmsg_ifindex,
3634 .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
3635 .fc_expires = rtmsg->rtmsg_info,
3636 .fc_dst_len = rtmsg->rtmsg_dst_len,
3637 .fc_src_len = rtmsg->rtmsg_src_len,
3638 .fc_flags = rtmsg->rtmsg_flags,
3639 .fc_type = rtmsg->rtmsg_type,
3641 .fc_nlinfo.nl_net = net,
3643 .fc_dst = rtmsg->rtmsg_dst,
3644 .fc_src = rtmsg->rtmsg_src,
3645 .fc_gateway = rtmsg->rtmsg_gateway,
3649 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3651 struct fib6_config cfg;
3652 struct in6_rtmsg rtmsg;
3656 case SIOCADDRT: /* Add a route */
3657 case SIOCDELRT: /* Delete a route */
3658 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
3660 err = copy_from_user(&rtmsg, arg,
3661 sizeof(struct in6_rtmsg));
3665 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
3670 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
3673 err = ip6_route_del(&cfg, NULL);
3687 * Drop the packet on the floor
3690 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
3693 struct dst_entry *dst = skb_dst(skb);
3694 switch (ipstats_mib_noroutes) {
3695 case IPSTATS_MIB_INNOROUTES:
3696 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
3697 if (type == IPV6_ADDR_ANY) {
3698 IP6_INC_STATS(dev_net(dst->dev),
3699 __in6_dev_get_safely(skb->dev),
3700 IPSTATS_MIB_INADDRERRORS);
3704 case IPSTATS_MIB_OUTNOROUTES:
3705 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3706 ipstats_mib_noroutes);
3709 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
3714 static int ip6_pkt_discard(struct sk_buff *skb)
3716 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
3719 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3721 skb->dev = skb_dst(skb)->dev;
3722 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
3725 static int ip6_pkt_prohibit(struct sk_buff *skb)
3727 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
3730 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3732 skb->dev = skb_dst(skb)->dev;
3733 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
3737 * Allocate a dst for local (unicast / anycast) address.
3740 struct fib6_info *addrconf_f6i_alloc(struct net *net,
3741 struct inet6_dev *idev,
3742 const struct in6_addr *addr,
3743 bool anycast, gfp_t gfp_flags)
3745 struct fib6_config cfg = {
3746 .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
3747 .fc_ifindex = idev->dev->ifindex,
3748 .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
3751 .fc_protocol = RTPROT_KERNEL,
3752 .fc_nlinfo.nl_net = net,
3753 .fc_ignore_dev_down = true,
3757 cfg.fc_type = RTN_ANYCAST;
3758 cfg.fc_flags |= RTF_ANYCAST;
3760 cfg.fc_type = RTN_LOCAL;
3761 cfg.fc_flags |= RTF_LOCAL;
3764 return ip6_route_info_create(&cfg, gfp_flags, NULL);
3767 /* remove deleted ip from prefsrc entries */
3768 struct arg_dev_net_ip {
3769 struct net_device *dev;
3771 struct in6_addr *addr;
3774 static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
3776 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3777 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3778 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3780 if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
3781 rt != net->ipv6.fib6_null_entry &&
3782 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
3783 spin_lock_bh(&rt6_exception_lock);
3784 /* remove prefsrc entry */
3785 rt->fib6_prefsrc.plen = 0;
3786 spin_unlock_bh(&rt6_exception_lock);
3791 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3793 struct net *net = dev_net(ifp->idev->dev);
3794 struct arg_dev_net_ip adni = {
3795 .dev = ifp->idev->dev,
3799 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3802 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT)
3804 /* Remove routers and update dst entries when gateway turn into host. */
3805 static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
3807 struct in6_addr *gateway = (struct in6_addr *)arg;
3809 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3810 rt->fib6_nh.fib_nh_has_gw &&
3811 ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
3815 /* Further clean up cached routes in exception table.
3816 * This is needed because cached route may have a different
3817 * gateway than its 'parent' in the case of an ip redirect.
3819 rt6_exceptions_clean_tohost(rt, gateway);
3824 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3826 fib6_clean_all(net, fib6_clean_tohost, gateway);
3829 struct arg_netdev_event {
3830 const struct net_device *dev;
3832 unsigned int nh_flags;
3833 unsigned long event;
3837 static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
3839 struct fib6_info *iter;
3840 struct fib6_node *fn;
3842 fn = rcu_dereference_protected(rt->fib6_node,
3843 lockdep_is_held(&rt->fib6_table->tb6_lock));
3844 iter = rcu_dereference_protected(fn->leaf,
3845 lockdep_is_held(&rt->fib6_table->tb6_lock));
3847 if (iter->fib6_metric == rt->fib6_metric &&
3848 rt6_qualify_for_ecmp(iter))
3850 iter = rcu_dereference_protected(iter->fib6_next,
3851 lockdep_is_held(&rt->fib6_table->tb6_lock));
3857 static bool rt6_is_dead(const struct fib6_info *rt)
3859 if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
3860 (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
3861 ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
3867 static int rt6_multipath_total_weight(const struct fib6_info *rt)
3869 struct fib6_info *iter;
3872 if (!rt6_is_dead(rt))
3873 total += rt->fib6_nh.fib_nh_weight;
3875 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
3876 if (!rt6_is_dead(iter))
3877 total += iter->fib6_nh.fib_nh_weight;
3883 static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
3885 int upper_bound = -1;
3887 if (!rt6_is_dead(rt)) {
3888 *weight += rt->fib6_nh.fib_nh_weight;
3889 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3892 atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
3895 static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
3897 struct fib6_info *iter;
3900 rt6_upper_bound_set(rt, &weight, total);
3902 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3903 rt6_upper_bound_set(iter, &weight, total);
3906 void rt6_multipath_rebalance(struct fib6_info *rt)
3908 struct fib6_info *first;
3911 /* In case the entire multipath route was marked for flushing,
3912 * then there is no need to rebalance upon the removal of every
3915 if (!rt->fib6_nsiblings || rt->should_flush)
3918 /* During lookup routes are evaluated in order, so we need to
3919 * make sure upper bounds are assigned from the first sibling
3922 first = rt6_multipath_first_sibling(rt);
3923 if (WARN_ON_ONCE(!first))
3926 total = rt6_multipath_total_weight(first);
3927 rt6_multipath_upper_bound_set(first, total);
3930 static int fib6_ifup(struct fib6_info *rt, void *p_arg)
3932 const struct arg_netdev_event *arg = p_arg;
3933 struct net *net = dev_net(arg->dev);
3935 if (rt != net->ipv6.fib6_null_entry &&
3936 rt->fib6_nh.fib_nh_dev == arg->dev) {
3937 rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
3938 fib6_update_sernum_upto_root(net, rt);
3939 rt6_multipath_rebalance(rt);
3945 void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3947 struct arg_netdev_event arg = {
3950 .nh_flags = nh_flags,
3954 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3955 arg.nh_flags |= RTNH_F_LINKDOWN;
3957 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3960 static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
3961 const struct net_device *dev)
3963 struct fib6_info *iter;
3965 if (rt->fib6_nh.fib_nh_dev == dev)
3967 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3968 if (iter->fib6_nh.fib_nh_dev == dev)
3974 static void rt6_multipath_flush(struct fib6_info *rt)
3976 struct fib6_info *iter;
3978 rt->should_flush = 1;
3979 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3980 iter->should_flush = 1;
3983 static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
3984 const struct net_device *down_dev)
3986 struct fib6_info *iter;
3987 unsigned int dead = 0;
3989 if (rt->fib6_nh.fib_nh_dev == down_dev ||
3990 rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
3992 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3993 if (iter->fib6_nh.fib_nh_dev == down_dev ||
3994 iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
4000 static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
4001 const struct net_device *dev,
4002 unsigned int nh_flags)
4004 struct fib6_info *iter;
4006 if (rt->fib6_nh.fib_nh_dev == dev)
4007 rt->fib6_nh.fib_nh_flags |= nh_flags;
4008 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4009 if (iter->fib6_nh.fib_nh_dev == dev)
4010 iter->fib6_nh.fib_nh_flags |= nh_flags;
4013 /* called with write lock held for table with rt */
4014 static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
4016 const struct arg_netdev_event *arg = p_arg;
4017 const struct net_device *dev = arg->dev;
4018 struct net *net = dev_net(dev);
4020 if (rt == net->ipv6.fib6_null_entry)
4023 switch (arg->event) {
4024 case NETDEV_UNREGISTER:
4025 return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
4027 if (rt->should_flush)
4029 if (!rt->fib6_nsiblings)
4030 return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
4031 if (rt6_multipath_uses_dev(rt, dev)) {
4034 count = rt6_multipath_dead_count(rt, dev);
4035 if (rt->fib6_nsiblings + 1 == count) {
4036 rt6_multipath_flush(rt);
4039 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4041 fib6_update_sernum(net, rt);
4042 rt6_multipath_rebalance(rt);
4046 if (rt->fib6_nh.fib_nh_dev != dev ||
4047 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
4049 rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
4050 rt6_multipath_rebalance(rt);
4057 void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
4059 struct arg_netdev_event arg = {
4065 struct net *net = dev_net(dev);
4067 if (net->ipv6.sysctl.skip_notify_on_dev_down)
4068 fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4070 fib6_clean_all(net, fib6_ifdown, &arg);
4073 void rt6_disable_ip(struct net_device *dev, unsigned long event)
4075 rt6_sync_down_dev(dev, event);
4076 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4077 neigh_ifdown(&nd_tbl, dev);
4080 struct rt6_mtu_change_arg {
4081 struct net_device *dev;
4085 static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
4087 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4088 struct inet6_dev *idev;
4090 /* In IPv6 pmtu discovery is not optional,
4091 so that RTAX_MTU lock cannot disable it.
4092 We still use this lock to block changes
4093 caused by addrconf/ndisc.
4096 idev = __in6_dev_get(arg->dev);
4100 /* For administrative MTU increase, there is no way to discover
4101 IPv6 PMTU increase, so PMTU increase should be updated here.
4102 Since RFC 1981 doesn't include administrative MTU increase
4103 update PMTU increase is a MUST. (i.e. jumbo frame)
4105 if (rt->fib6_nh.fib_nh_dev == arg->dev &&
4106 !fib6_metric_locked(rt, RTAX_MTU)) {
4107 u32 mtu = rt->fib6_pmtu;
4109 if (mtu >= arg->mtu ||
4110 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4111 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4113 spin_lock_bh(&rt6_exception_lock);
4114 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
4115 spin_unlock_bh(&rt6_exception_lock);
4120 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
4122 struct rt6_mtu_change_arg arg = {
4127 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
4130 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
4131 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
4132 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
4133 [RTA_OIF] = { .type = NLA_U32 },
4134 [RTA_IIF] = { .type = NLA_U32 },
4135 [RTA_PRIORITY] = { .type = NLA_U32 },
4136 [RTA_METRICS] = { .type = NLA_NESTED },
4137 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4138 [RTA_PREF] = { .type = NLA_U8 },
4139 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4140 [RTA_ENCAP] = { .type = NLA_NESTED },
4141 [RTA_EXPIRES] = { .type = NLA_U32 },
4142 [RTA_UID] = { .type = NLA_U32 },
4143 [RTA_MARK] = { .type = NLA_U32 },
4144 [RTA_TABLE] = { .type = NLA_U32 },
4145 [RTA_IP_PROTO] = { .type = NLA_U8 },
4146 [RTA_SPORT] = { .type = NLA_U16 },
4147 [RTA_DPORT] = { .type = NLA_U16 },
4150 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4151 struct fib6_config *cfg,
4152 struct netlink_ext_ack *extack)
4155 struct nlattr *tb[RTA_MAX+1];
4159 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4165 rtm = nlmsg_data(nlh);
4167 *cfg = (struct fib6_config){
4168 .fc_table = rtm->rtm_table,
4169 .fc_dst_len = rtm->rtm_dst_len,
4170 .fc_src_len = rtm->rtm_src_len,
4172 .fc_protocol = rtm->rtm_protocol,
4173 .fc_type = rtm->rtm_type,
4175 .fc_nlinfo.portid = NETLINK_CB(skb).portid,
4176 .fc_nlinfo.nlh = nlh,
4177 .fc_nlinfo.nl_net = sock_net(skb->sk),
4180 if (rtm->rtm_type == RTN_UNREACHABLE ||
4181 rtm->rtm_type == RTN_BLACKHOLE ||
4182 rtm->rtm_type == RTN_PROHIBIT ||
4183 rtm->rtm_type == RTN_THROW)
4184 cfg->fc_flags |= RTF_REJECT;
4186 if (rtm->rtm_type == RTN_LOCAL)
4187 cfg->fc_flags |= RTF_LOCAL;
4189 if (rtm->rtm_flags & RTM_F_CLONED)
4190 cfg->fc_flags |= RTF_CACHE;
4192 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4194 if (tb[RTA_GATEWAY]) {
4195 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
4196 cfg->fc_flags |= RTF_GATEWAY;
4199 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4204 int plen = (rtm->rtm_dst_len + 7) >> 3;
4206 if (nla_len(tb[RTA_DST]) < plen)
4209 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
4213 int plen = (rtm->rtm_src_len + 7) >> 3;
4215 if (nla_len(tb[RTA_SRC]) < plen)
4218 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
4221 if (tb[RTA_PREFSRC])
4222 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4225 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4227 if (tb[RTA_PRIORITY])
4228 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4230 if (tb[RTA_METRICS]) {
4231 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4232 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
4236 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4238 if (tb[RTA_MULTIPATH]) {
4239 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4240 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
4242 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4243 cfg->fc_mp_len, extack);
4249 pref = nla_get_u8(tb[RTA_PREF]);
4250 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4251 pref != ICMPV6_ROUTER_PREF_HIGH)
4252 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4253 cfg->fc_flags |= RTF_PREF(pref);
4257 cfg->fc_encap = tb[RTA_ENCAP];
4259 if (tb[RTA_ENCAP_TYPE]) {
4260 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4262 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
4267 if (tb[RTA_EXPIRES]) {
4268 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4270 if (addrconf_finite_timeout(timeout)) {
4271 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4272 cfg->fc_flags |= RTF_EXPIRES;
4282 struct fib6_info *fib6_info;
4283 struct fib6_config r_cfg;
4284 struct list_head next;
4287 static int ip6_route_info_append(struct net *net,
4288 struct list_head *rt6_nh_list,
4289 struct fib6_info *rt,
4290 struct fib6_config *r_cfg)
4295 list_for_each_entry(nh, rt6_nh_list, next) {
4296 /* check if fib6_info already exists */
4297 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
4301 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4305 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4306 list_add_tail(&nh->next, rt6_nh_list);
4311 static void ip6_route_mpath_notify(struct fib6_info *rt,
4312 struct fib6_info *rt_last,
4313 struct nl_info *info,
4316 /* if this is an APPEND route, then rt points to the first route
4317 * inserted and rt_last points to last route inserted. Userspace
4318 * wants a consistent dump of the route which starts at the first
4319 * nexthop. Since sibling routes are always added at the end of
4320 * the list, find the first sibling of the last route appended
4322 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4323 rt = list_first_entry(&rt_last->fib6_siblings,
4329 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4332 static int ip6_route_multipath_add(struct fib6_config *cfg,
4333 struct netlink_ext_ack *extack)
4335 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
4336 struct nl_info *info = &cfg->fc_nlinfo;
4337 struct fib6_config r_cfg;
4338 struct rtnexthop *rtnh;
4339 struct fib6_info *rt;
4340 struct rt6_nh *err_nh;
4341 struct rt6_nh *nh, *nh_safe;
4347 int replace = (cfg->fc_nlinfo.nlh &&
4348 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4349 LIST_HEAD(rt6_nh_list);
4351 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4352 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4353 nlflags |= NLM_F_APPEND;
4355 remaining = cfg->fc_mp_len;
4356 rtnh = (struct rtnexthop *)cfg->fc_mp;
4358 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4359 * fib6_info structs per nexthop
4361 while (rtnh_ok(rtnh, remaining)) {
4362 memcpy(&r_cfg, cfg, sizeof(*cfg));
4363 if (rtnh->rtnh_ifindex)
4364 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4366 attrlen = rtnh_attrlen(rtnh);
4368 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4370 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4372 r_cfg.fc_gateway = nla_get_in6_addr(nla);
4373 r_cfg.fc_flags |= RTF_GATEWAY;
4375 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4376 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4378 r_cfg.fc_encap_type = nla_get_u16(nla);
4381 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
4382 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
4388 if (!rt6_qualify_for_ecmp(rt)) {
4390 NL_SET_ERR_MSG(extack,
4391 "Device only routes can not be added for IPv6 using the multipath API.");
4392 fib6_info_release(rt);
4396 rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
4398 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4401 fib6_info_release(rt);
4405 rtnh = rtnh_next(rtnh, &remaining);
4408 /* for add and replace send one notification with all nexthops.
4409 * Skip the notification in fib6_add_rt2node and send one with
4410 * the full route when done
4412 info->skip_notify = 1;
4415 list_for_each_entry(nh, &rt6_nh_list, next) {
4416 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4417 fib6_info_release(nh->fib6_info);
4420 /* save reference to last route successfully inserted */
4421 rt_last = nh->fib6_info;
4423 /* save reference to first route for notification */
4425 rt_notif = nh->fib6_info;
4428 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4429 nh->fib6_info = NULL;
4432 NL_SET_ERR_MSG_MOD(extack,
4433 "multipath route replace failed (check consistency of installed routes)");
4438 /* Because each route is added like a single route we remove
4439 * these flags after the first nexthop: if there is a collision,
4440 * we have already failed to add the first nexthop:
4441 * fib6_add_rt2node() has rejected it; when replacing, old
4442 * nexthops have been replaced by first new, the rest should
4445 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4450 /* success ... tell user about new route */
4451 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4455 /* send notification for routes that were added so that
4456 * the delete notifications sent by ip6_route_del are
4460 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4462 /* Delete routes that were already added */
4463 list_for_each_entry(nh, &rt6_nh_list, next) {
4466 ip6_route_del(&nh->r_cfg, extack);
4470 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
4472 fib6_info_release(nh->fib6_info);
4473 list_del(&nh->next);
4480 static int ip6_route_multipath_del(struct fib6_config *cfg,
4481 struct netlink_ext_ack *extack)
4483 struct fib6_config r_cfg;
4484 struct rtnexthop *rtnh;
4487 int err = 1, last_err = 0;
4489 remaining = cfg->fc_mp_len;
4490 rtnh = (struct rtnexthop *)cfg->fc_mp;
4492 /* Parse a Multipath Entry */
4493 while (rtnh_ok(rtnh, remaining)) {
4494 memcpy(&r_cfg, cfg, sizeof(*cfg));
4495 if (rtnh->rtnh_ifindex)
4496 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4498 attrlen = rtnh_attrlen(rtnh);
4500 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4502 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4504 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4505 r_cfg.fc_flags |= RTF_GATEWAY;
4508 err = ip6_route_del(&r_cfg, extack);
4512 rtnh = rtnh_next(rtnh, &remaining);
4518 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4519 struct netlink_ext_ack *extack)
4521 struct fib6_config cfg;
4524 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4529 return ip6_route_multipath_del(&cfg, extack);
4531 cfg.fc_delete_all_nh = 1;
4532 return ip6_route_del(&cfg, extack);
4536 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4537 struct netlink_ext_ack *extack)
4539 struct fib6_config cfg;
4542 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4546 if (cfg.fc_metric == 0)
4547 cfg.fc_metric = IP6_RT_PRIO_USER;
4550 return ip6_route_multipath_add(&cfg, extack);
4552 return ip6_route_add(&cfg, GFP_KERNEL, extack);
4555 static size_t rt6_nlmsg_size(struct fib6_info *rt)
4557 int nexthop_len = 0;
4559 if (rt->fib6_nsiblings) {
4560 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4561 + NLA_ALIGN(sizeof(struct rtnexthop))
4562 + nla_total_size(16) /* RTA_GATEWAY */
4563 + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
4565 nexthop_len *= rt->fib6_nsiblings;
4568 return NLMSG_ALIGN(sizeof(struct rtmsg))
4569 + nla_total_size(16) /* RTA_SRC */
4570 + nla_total_size(16) /* RTA_DST */
4571 + nla_total_size(16) /* RTA_GATEWAY */
4572 + nla_total_size(16) /* RTA_PREFSRC */
4573 + nla_total_size(4) /* RTA_TABLE */
4574 + nla_total_size(4) /* RTA_IIF */
4575 + nla_total_size(4) /* RTA_OIF */
4576 + nla_total_size(4) /* RTA_PRIORITY */
4577 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
4578 + nla_total_size(sizeof(struct rta_cacheinfo))
4579 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
4580 + nla_total_size(1) /* RTA_PREF */
4581 + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
4585 static int rt6_nexthop_info(struct sk_buff *skb, const struct fib6_nh *fib6_nh,
4586 unsigned int *flags, bool skip_oif)
4588 if (fib6_nh->fib_nh_flags & RTNH_F_DEAD)
4589 *flags |= RTNH_F_DEAD;
4591 if (fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
4592 *flags |= RTNH_F_LINKDOWN;
4595 if (ip6_ignore_linkdown(fib6_nh->fib_nh_dev))
4596 *flags |= RTNH_F_DEAD;
4600 if (fib6_nh->fib_nh_has_gw) {
4601 if (nla_put_in6_addr(skb, RTA_GATEWAY, &fib6_nh->fib_nh_gw6) < 0)
4602 goto nla_put_failure;
4605 *flags |= (fib6_nh->fib_nh_flags & RTNH_F_ONLINK);
4606 if (fib6_nh->fib_nh_flags & RTNH_F_OFFLOAD)
4607 *flags |= RTNH_F_OFFLOAD;
4609 /* not needed for multipath encoding b/c it has a rtnexthop struct */
4610 if (!skip_oif && fib6_nh->fib_nh_dev &&
4611 nla_put_u32(skb, RTA_OIF, fib6_nh->fib_nh_dev->ifindex))
4612 goto nla_put_failure;
4614 if (fib6_nh->fib_nh_lws &&
4615 lwtunnel_fill_encap(skb, fib6_nh->fib_nh_lws) < 0)
4616 goto nla_put_failure;
4624 /* add multipath next hop */
4625 static int rt6_add_nexthop(struct sk_buff *skb, const struct fib6_nh *fib6_nh)
4627 const struct net_device *dev = fib6_nh->fib_nh_dev;
4628 struct rtnexthop *rtnh;
4629 unsigned int flags = 0;
4631 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4633 goto nla_put_failure;
4635 rtnh->rtnh_hops = fib6_nh->fib_nh_weight - 1;
4636 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
4638 if (rt6_nexthop_info(skb, fib6_nh, &flags, true) < 0)
4639 goto nla_put_failure;
4641 rtnh->rtnh_flags = flags;
4643 /* length of rtnetlink header + attributes */
4644 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4652 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
4653 struct fib6_info *rt, struct dst_entry *dst,
4654 struct in6_addr *dest, struct in6_addr *src,
4655 int iif, int type, u32 portid, u32 seq,
4658 struct rt6_info *rt6 = (struct rt6_info *)dst;
4659 struct rt6key *rt6_dst, *rt6_src;
4660 u32 *pmetrics, table, rt6_flags;
4661 struct nlmsghdr *nlh;
4665 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
4670 rt6_dst = &rt6->rt6i_dst;
4671 rt6_src = &rt6->rt6i_src;
4672 rt6_flags = rt6->rt6i_flags;
4674 rt6_dst = &rt->fib6_dst;
4675 rt6_src = &rt->fib6_src;
4676 rt6_flags = rt->fib6_flags;
4679 rtm = nlmsg_data(nlh);
4680 rtm->rtm_family = AF_INET6;
4681 rtm->rtm_dst_len = rt6_dst->plen;
4682 rtm->rtm_src_len = rt6_src->plen;
4685 table = rt->fib6_table->tb6_id;
4687 table = RT6_TABLE_UNSPEC;
4688 rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
4689 if (nla_put_u32(skb, RTA_TABLE, table))
4690 goto nla_put_failure;
4692 rtm->rtm_type = rt->fib6_type;
4694 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4695 rtm->rtm_protocol = rt->fib6_protocol;
4697 if (rt6_flags & RTF_CACHE)
4698 rtm->rtm_flags |= RTM_F_CLONED;
4701 if (nla_put_in6_addr(skb, RTA_DST, dest))
4702 goto nla_put_failure;
4703 rtm->rtm_dst_len = 128;
4704 } else if (rtm->rtm_dst_len)
4705 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
4706 goto nla_put_failure;
4707 #ifdef CONFIG_IPV6_SUBTREES
4709 if (nla_put_in6_addr(skb, RTA_SRC, src))
4710 goto nla_put_failure;
4711 rtm->rtm_src_len = 128;
4712 } else if (rtm->rtm_src_len &&
4713 nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
4714 goto nla_put_failure;
4717 #ifdef CONFIG_IPV6_MROUTE
4718 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
4719 int err = ip6mr_get_route(net, skb, rtm, portid);
4724 goto nla_put_failure;
4727 if (nla_put_u32(skb, RTA_IIF, iif))
4728 goto nla_put_failure;
4730 struct in6_addr saddr_buf;
4731 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
4732 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4733 goto nla_put_failure;
4736 if (rt->fib6_prefsrc.plen) {
4737 struct in6_addr saddr_buf;
4738 saddr_buf = rt->fib6_prefsrc.addr;
4739 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4740 goto nla_put_failure;
4743 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4744 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
4745 goto nla_put_failure;
4747 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
4748 goto nla_put_failure;
4750 /* For multipath routes, walk the siblings list and add
4751 * each as a nexthop within RTA_MULTIPATH.
4754 if (rt6_flags & RTF_GATEWAY &&
4755 nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
4756 goto nla_put_failure;
4758 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
4759 goto nla_put_failure;
4760 } else if (rt->fib6_nsiblings) {
4761 struct fib6_info *sibling, *next_sibling;
4764 mp = nla_nest_start(skb, RTA_MULTIPATH);
4766 goto nla_put_failure;
4768 if (rt6_add_nexthop(skb, &rt->fib6_nh) < 0)
4769 goto nla_put_failure;
4771 list_for_each_entry_safe(sibling, next_sibling,
4772 &rt->fib6_siblings, fib6_siblings) {
4773 if (rt6_add_nexthop(skb, &sibling->fib6_nh) < 0)
4774 goto nla_put_failure;
4777 nla_nest_end(skb, mp);
4779 if (rt6_nexthop_info(skb, &rt->fib6_nh, &rtm->rtm_flags,
4781 goto nla_put_failure;
4784 if (rt6_flags & RTF_EXPIRES) {
4785 expires = dst ? dst->expires : rt->expires;
4789 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
4790 goto nla_put_failure;
4792 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
4793 goto nla_put_failure;
4796 nlmsg_end(skb, nlh);
4800 nlmsg_cancel(skb, nlh);
4804 static bool fib6_info_uses_dev(const struct fib6_info *f6i,
4805 const struct net_device *dev)
4807 if (f6i->fib6_nh.fib_nh_dev == dev)
4810 if (f6i->fib6_nsiblings) {
4811 struct fib6_info *sibling, *next_sibling;
4813 list_for_each_entry_safe(sibling, next_sibling,
4814 &f6i->fib6_siblings, fib6_siblings) {
4815 if (sibling->fib6_nh.fib_nh_dev == dev)
4823 int rt6_dump_route(struct fib6_info *rt, void *p_arg)
4825 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
4826 struct fib_dump_filter *filter = &arg->filter;
4827 unsigned int flags = NLM_F_MULTI;
4828 struct net *net = arg->net;
4830 if (rt == net->ipv6.fib6_null_entry)
4833 if ((filter->flags & RTM_F_PREFIX) &&
4834 !(rt->fib6_flags & RTF_PREFIX_RT)) {
4835 /* success since this is not a prefix route */
4838 if (filter->filter_set) {
4839 if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
4840 (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
4841 (filter->protocol && rt->fib6_protocol != filter->protocol)) {
4844 flags |= NLM_F_DUMP_FILTERED;
4847 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4848 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4849 arg->cb->nlh->nlmsg_seq, flags);
4852 static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
4853 const struct nlmsghdr *nlh,
4855 struct netlink_ext_ack *extack)
4860 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
4861 NL_SET_ERR_MSG_MOD(extack,
4862 "Invalid header for get route request");
4866 if (!netlink_strict_get_check(skb))
4867 return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
4868 rtm_ipv6_policy, extack);
4870 rtm = nlmsg_data(nlh);
4871 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
4872 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
4873 rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
4875 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
4878 if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
4879 NL_SET_ERR_MSG_MOD(extack,
4880 "Invalid flags for get route request");
4884 err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
4885 rtm_ipv6_policy, extack);
4889 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
4890 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
4891 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
4895 for (i = 0; i <= RTA_MAX; i++) {
4911 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
4919 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4920 struct netlink_ext_ack *extack)
4922 struct net *net = sock_net(in_skb->sk);
4923 struct nlattr *tb[RTA_MAX+1];
4924 int err, iif = 0, oif = 0;
4925 struct fib6_info *from;
4926 struct dst_entry *dst;
4927 struct rt6_info *rt;
4928 struct sk_buff *skb;
4930 struct flowi6 fl6 = {};
4933 err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
4938 rtm = nlmsg_data(nlh);
4939 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
4940 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4943 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4946 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4950 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4953 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4957 iif = nla_get_u32(tb[RTA_IIF]);
4960 oif = nla_get_u32(tb[RTA_OIF]);
4963 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4966 fl6.flowi6_uid = make_kuid(current_user_ns(),
4967 nla_get_u32(tb[RTA_UID]));
4969 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4972 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4975 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4977 if (tb[RTA_IP_PROTO]) {
4978 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
4979 &fl6.flowi6_proto, AF_INET6,
4986 struct net_device *dev;
4991 dev = dev_get_by_index_rcu(net, iif);
4998 fl6.flowi6_iif = iif;
5000 if (!ipv6_addr_any(&fl6.saddr))
5001 flags |= RT6_LOOKUP_F_HAS_SADDR;
5003 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
5007 fl6.flowi6_oif = oif;
5009 dst = ip6_route_output(net, NULL, &fl6);
5013 rt = container_of(dst, struct rt6_info, dst);
5014 if (rt->dst.error) {
5015 err = rt->dst.error;
5020 if (rt == net->ipv6.ip6_null_entry) {
5021 err = rt->dst.error;
5026 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
5033 skb_dst_set(skb, &rt->dst);
5036 from = rcu_dereference(rt->from);
5039 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
5040 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
5043 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5044 &fl6.saddr, iif, RTM_NEWROUTE,
5045 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
5054 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
5059 void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
5060 unsigned int nlm_flags)
5062 struct sk_buff *skb;
5063 struct net *net = info->nl_net;
5068 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
5070 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
5074 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5075 event, info->portid, seq, nlm_flags);
5077 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5078 WARN_ON(err == -EMSGSIZE);
5082 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
5083 info->nlh, gfp_any());
5087 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
5090 static int ip6_route_dev_notify(struct notifier_block *this,
5091 unsigned long event, void *ptr)
5093 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
5094 struct net *net = dev_net(dev);
5096 if (!(dev->flags & IFF_LOOPBACK))
5099 if (event == NETDEV_REGISTER) {
5100 net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
5101 net->ipv6.ip6_null_entry->dst.dev = dev;
5102 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5103 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5104 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
5105 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
5106 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
5107 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
5109 } else if (event == NETDEV_UNREGISTER &&
5110 dev->reg_state != NETREG_UNREGISTERED) {
5111 /* NETDEV_UNREGISTER could be fired for multiple times by
5112 * netdev_wait_allrefs(). Make sure we only call this once.
5114 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
5115 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5116 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5117 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
5128 #ifdef CONFIG_PROC_FS
5129 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5131 struct net *net = (struct net *)seq->private;
5132 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
5133 net->ipv6.rt6_stats->fib_nodes,
5134 net->ipv6.rt6_stats->fib_route_nodes,
5135 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
5136 net->ipv6.rt6_stats->fib_rt_entries,
5137 net->ipv6.rt6_stats->fib_rt_cache,
5138 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
5139 net->ipv6.rt6_stats->fib_discarded_routes);
5143 #endif /* CONFIG_PROC_FS */
5145 #ifdef CONFIG_SYSCTL
5148 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
5149 void __user *buffer, size_t *lenp, loff_t *ppos)
5157 net = (struct net *)ctl->extra1;
5158 delay = net->ipv6.sysctl.flush_delay;
5159 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
5163 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
5170 static struct ctl_table ipv6_route_table_template[] = {
5172 .procname = "flush",
5173 .data = &init_net.ipv6.sysctl.flush_delay,
5174 .maxlen = sizeof(int),
5176 .proc_handler = ipv6_sysctl_rtcache_flush
5179 .procname = "gc_thresh",
5180 .data = &ip6_dst_ops_template.gc_thresh,
5181 .maxlen = sizeof(int),
5183 .proc_handler = proc_dointvec,
5186 .procname = "max_size",
5187 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
5188 .maxlen = sizeof(int),
5190 .proc_handler = proc_dointvec,
5193 .procname = "gc_min_interval",
5194 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5195 .maxlen = sizeof(int),
5197 .proc_handler = proc_dointvec_jiffies,
5200 .procname = "gc_timeout",
5201 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
5202 .maxlen = sizeof(int),
5204 .proc_handler = proc_dointvec_jiffies,
5207 .procname = "gc_interval",
5208 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
5209 .maxlen = sizeof(int),
5211 .proc_handler = proc_dointvec_jiffies,
5214 .procname = "gc_elasticity",
5215 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
5216 .maxlen = sizeof(int),
5218 .proc_handler = proc_dointvec,
5221 .procname = "mtu_expires",
5222 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
5223 .maxlen = sizeof(int),
5225 .proc_handler = proc_dointvec_jiffies,
5228 .procname = "min_adv_mss",
5229 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
5230 .maxlen = sizeof(int),
5232 .proc_handler = proc_dointvec,
5235 .procname = "gc_min_interval_ms",
5236 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5237 .maxlen = sizeof(int),
5239 .proc_handler = proc_dointvec_ms_jiffies,
5242 .procname = "skip_notify_on_dev_down",
5243 .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
5244 .maxlen = sizeof(int),
5246 .proc_handler = proc_dointvec,
5253 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
5255 struct ctl_table *table;
5257 table = kmemdup(ipv6_route_table_template,
5258 sizeof(ipv6_route_table_template),
5262 table[0].data = &net->ipv6.sysctl.flush_delay;
5263 table[0].extra1 = net;
5264 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5265 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5266 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5267 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5268 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5269 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5270 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5271 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
5272 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5273 table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
5275 /* Don't export sysctls to unprivileged users */
5276 if (net->user_ns != &init_user_ns)
5277 table[0].procname = NULL;
5284 static int __net_init ip6_route_net_init(struct net *net)
5288 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5289 sizeof(net->ipv6.ip6_dst_ops));
5291 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5292 goto out_ip6_dst_ops;
5294 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5295 sizeof(*net->ipv6.fib6_null_entry),
5297 if (!net->ipv6.fib6_null_entry)
5298 goto out_ip6_dst_entries;
5300 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5301 sizeof(*net->ipv6.ip6_null_entry),
5303 if (!net->ipv6.ip6_null_entry)
5304 goto out_fib6_null_entry;
5305 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5306 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5307 ip6_template_metrics, true);
5309 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5310 net->ipv6.fib6_has_custom_rules = false;
5311 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5312 sizeof(*net->ipv6.ip6_prohibit_entry),
5314 if (!net->ipv6.ip6_prohibit_entry)
5315 goto out_ip6_null_entry;
5316 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5317 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5318 ip6_template_metrics, true);
5320 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5321 sizeof(*net->ipv6.ip6_blk_hole_entry),
5323 if (!net->ipv6.ip6_blk_hole_entry)
5324 goto out_ip6_prohibit_entry;
5325 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5326 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5327 ip6_template_metrics, true);
5330 net->ipv6.sysctl.flush_delay = 0;
5331 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5332 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5333 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5334 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5335 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5336 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5337 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5338 net->ipv6.sysctl.skip_notify_on_dev_down = 0;
5340 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5346 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5347 out_ip6_prohibit_entry:
5348 kfree(net->ipv6.ip6_prohibit_entry);
5350 kfree(net->ipv6.ip6_null_entry);
5352 out_fib6_null_entry:
5353 kfree(net->ipv6.fib6_null_entry);
5354 out_ip6_dst_entries:
5355 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5360 static void __net_exit ip6_route_net_exit(struct net *net)
5362 kfree(net->ipv6.fib6_null_entry);
5363 kfree(net->ipv6.ip6_null_entry);
5364 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5365 kfree(net->ipv6.ip6_prohibit_entry);
5366 kfree(net->ipv6.ip6_blk_hole_entry);
5368 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5371 static int __net_init ip6_route_net_init_late(struct net *net)
5373 #ifdef CONFIG_PROC_FS
5374 proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5375 sizeof(struct ipv6_route_iter));
5376 proc_create_net_single("rt6_stats", 0444, net->proc_net,
5377 rt6_stats_seq_show, NULL);
5382 static void __net_exit ip6_route_net_exit_late(struct net *net)
5384 #ifdef CONFIG_PROC_FS
5385 remove_proc_entry("ipv6_route", net->proc_net);
5386 remove_proc_entry("rt6_stats", net->proc_net);
5390 static struct pernet_operations ip6_route_net_ops = {
5391 .init = ip6_route_net_init,
5392 .exit = ip6_route_net_exit,
5395 static int __net_init ipv6_inetpeer_init(struct net *net)
5397 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5401 inet_peer_base_init(bp);
5402 net->ipv6.peers = bp;
5406 static void __net_exit ipv6_inetpeer_exit(struct net *net)
5408 struct inet_peer_base *bp = net->ipv6.peers;
5410 net->ipv6.peers = NULL;
5411 inetpeer_invalidate_tree(bp);
5415 static struct pernet_operations ipv6_inetpeer_ops = {
5416 .init = ipv6_inetpeer_init,
5417 .exit = ipv6_inetpeer_exit,
5420 static struct pernet_operations ip6_route_net_late_ops = {
5421 .init = ip6_route_net_init_late,
5422 .exit = ip6_route_net_exit_late,
5425 static struct notifier_block ip6_route_dev_notifier = {
5426 .notifier_call = ip6_route_dev_notify,
5427 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
5430 void __init ip6_route_init_special_entries(void)
5432 /* Registering of the loopback is done before this portion of code,
5433 * the loopback reference in rt6_info will not be taken, do it
5434 * manually for init_net */
5435 init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
5436 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5437 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5438 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5439 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5440 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5441 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5442 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5446 int __init ip6_route_init(void)
5452 ip6_dst_ops_template.kmem_cachep =
5453 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
5454 SLAB_HWCACHE_ALIGN, NULL);
5455 if (!ip6_dst_ops_template.kmem_cachep)
5458 ret = dst_entries_init(&ip6_dst_blackhole_ops);
5460 goto out_kmem_cache;
5462 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5464 goto out_dst_entries;
5466 ret = register_pernet_subsys(&ip6_route_net_ops);
5468 goto out_register_inetpeer;
5470 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5474 goto out_register_subsys;
5480 ret = fib6_rules_init();
5484 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5486 goto fib6_rules_init;
5488 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5489 inet6_rtm_newroute, NULL, 0);
5491 goto out_register_late_subsys;
5493 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5494 inet6_rtm_delroute, NULL, 0);
5496 goto out_register_late_subsys;
5498 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5499 inet6_rtm_getroute, NULL,
5500 RTNL_FLAG_DOIT_UNLOCKED);
5502 goto out_register_late_subsys;
5504 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
5506 goto out_register_late_subsys;
5508 for_each_possible_cpu(cpu) {
5509 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5511 INIT_LIST_HEAD(&ul->head);
5512 spin_lock_init(&ul->lock);
5518 out_register_late_subsys:
5519 rtnl_unregister_all(PF_INET6);
5520 unregister_pernet_subsys(&ip6_route_net_late_ops);
5522 fib6_rules_cleanup();
5527 out_register_subsys:
5528 unregister_pernet_subsys(&ip6_route_net_ops);
5529 out_register_inetpeer:
5530 unregister_pernet_subsys(&ipv6_inetpeer_ops);
5532 dst_entries_destroy(&ip6_dst_blackhole_ops);
5534 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5538 void ip6_route_cleanup(void)
5540 unregister_netdevice_notifier(&ip6_route_dev_notifier);
5541 unregister_pernet_subsys(&ip6_route_net_late_ops);
5542 fib6_rules_cleanup();
5545 unregister_pernet_subsys(&ipv6_inetpeer_ops);
5546 unregister_pernet_subsys(&ip6_route_net_ops);
5547 dst_entries_destroy(&ip6_dst_blackhole_ops);
5548 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);