]> Git Repo - linux.git/blob - net/ipv6/route.c
net: inet: Support UID-based routing in IP protocols.
[linux.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <[email protected]>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
66
67 #include <asm/uaccess.h>
68
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72
73 enum rt6_nud_state {
74         RT6_NUD_FAIL_HARD = -3,
75         RT6_NUD_FAIL_PROBE = -2,
76         RT6_NUD_FAIL_DO_RR = -1,
77         RT6_NUD_SUCCEED = 1
78 };
79
80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int      ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void             ip6_dst_destroy(struct dst_entry *);
86 static void             ip6_dst_ifdown(struct dst_entry *,
87                                        struct net_device *dev, int how);
88 static int               ip6_dst_gc(struct dst_ops *ops);
89
90 static int              ip6_pkt_discard(struct sk_buff *skb);
91 static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int              ip6_pkt_prohibit(struct sk_buff *skb);
93 static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void             ip6_link_failure(struct sk_buff *skb);
95 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96                                            struct sk_buff *skb, u32 mtu);
97 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98                                         struct sk_buff *skb);
99 static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
100 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101
102 #ifdef CONFIG_IPV6_ROUTE_INFO
103 static struct rt6_info *rt6_add_route_info(struct net *net,
104                                            const struct in6_addr *prefix, int prefixlen,
105                                            const struct in6_addr *gwaddr,
106                                            struct net_device *dev,
107                                            unsigned int pref);
108 static struct rt6_info *rt6_get_route_info(struct net *net,
109                                            const struct in6_addr *prefix, int prefixlen,
110                                            const struct in6_addr *gwaddr,
111                                            struct net_device *dev);
112 #endif
113
114 struct uncached_list {
115         spinlock_t              lock;
116         struct list_head        head;
117 };
118
119 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
120
121 static void rt6_uncached_list_add(struct rt6_info *rt)
122 {
123         struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
124
125         rt->dst.flags |= DST_NOCACHE;
126         rt->rt6i_uncached_list = ul;
127
128         spin_lock_bh(&ul->lock);
129         list_add_tail(&rt->rt6i_uncached, &ul->head);
130         spin_unlock_bh(&ul->lock);
131 }
132
133 static void rt6_uncached_list_del(struct rt6_info *rt)
134 {
135         if (!list_empty(&rt->rt6i_uncached)) {
136                 struct uncached_list *ul = rt->rt6i_uncached_list;
137
138                 spin_lock_bh(&ul->lock);
139                 list_del(&rt->rt6i_uncached);
140                 spin_unlock_bh(&ul->lock);
141         }
142 }
143
144 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
145 {
146         struct net_device *loopback_dev = net->loopback_dev;
147         int cpu;
148
149         if (dev == loopback_dev)
150                 return;
151
152         for_each_possible_cpu(cpu) {
153                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
154                 struct rt6_info *rt;
155
156                 spin_lock_bh(&ul->lock);
157                 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
158                         struct inet6_dev *rt_idev = rt->rt6i_idev;
159                         struct net_device *rt_dev = rt->dst.dev;
160
161                         if (rt_idev->dev == dev) {
162                                 rt->rt6i_idev = in6_dev_get(loopback_dev);
163                                 in6_dev_put(rt_idev);
164                         }
165
166                         if (rt_dev == dev) {
167                                 rt->dst.dev = loopback_dev;
168                                 dev_hold(rt->dst.dev);
169                                 dev_put(rt_dev);
170                         }
171                 }
172                 spin_unlock_bh(&ul->lock);
173         }
174 }
175
176 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
177 {
178         return dst_metrics_write_ptr(rt->dst.from);
179 }
180
181 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
182 {
183         struct rt6_info *rt = (struct rt6_info *)dst;
184
185         if (rt->rt6i_flags & RTF_PCPU)
186                 return rt6_pcpu_cow_metrics(rt);
187         else if (rt->rt6i_flags & RTF_CACHE)
188                 return NULL;
189         else
190                 return dst_cow_metrics_generic(dst, old);
191 }
192
193 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
194                                              struct sk_buff *skb,
195                                              const void *daddr)
196 {
197         struct in6_addr *p = &rt->rt6i_gateway;
198
199         if (!ipv6_addr_any(p))
200                 return (const void *) p;
201         else if (skb)
202                 return &ipv6_hdr(skb)->daddr;
203         return daddr;
204 }
205
206 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
207                                           struct sk_buff *skb,
208                                           const void *daddr)
209 {
210         struct rt6_info *rt = (struct rt6_info *) dst;
211         struct neighbour *n;
212
213         daddr = choose_neigh_daddr(rt, skb, daddr);
214         n = __ipv6_neigh_lookup(dst->dev, daddr);
215         if (n)
216                 return n;
217         return neigh_create(&nd_tbl, daddr, dst->dev);
218 }
219
220 static struct dst_ops ip6_dst_ops_template = {
221         .family                 =       AF_INET6,
222         .gc                     =       ip6_dst_gc,
223         .gc_thresh              =       1024,
224         .check                  =       ip6_dst_check,
225         .default_advmss         =       ip6_default_advmss,
226         .mtu                    =       ip6_mtu,
227         .cow_metrics            =       ipv6_cow_metrics,
228         .destroy                =       ip6_dst_destroy,
229         .ifdown                 =       ip6_dst_ifdown,
230         .negative_advice        =       ip6_negative_advice,
231         .link_failure           =       ip6_link_failure,
232         .update_pmtu            =       ip6_rt_update_pmtu,
233         .redirect               =       rt6_do_redirect,
234         .local_out              =       __ip6_local_out,
235         .neigh_lookup           =       ip6_neigh_lookup,
236 };
237
238 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
239 {
240         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
241
242         return mtu ? : dst->dev->mtu;
243 }
244
245 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
246                                          struct sk_buff *skb, u32 mtu)
247 {
248 }
249
250 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
251                                       struct sk_buff *skb)
252 {
253 }
254
255 static struct dst_ops ip6_dst_blackhole_ops = {
256         .family                 =       AF_INET6,
257         .destroy                =       ip6_dst_destroy,
258         .check                  =       ip6_dst_check,
259         .mtu                    =       ip6_blackhole_mtu,
260         .default_advmss         =       ip6_default_advmss,
261         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
262         .redirect               =       ip6_rt_blackhole_redirect,
263         .cow_metrics            =       dst_cow_metrics_generic,
264         .neigh_lookup           =       ip6_neigh_lookup,
265 };
266
267 static const u32 ip6_template_metrics[RTAX_MAX] = {
268         [RTAX_HOPLIMIT - 1] = 0,
269 };
270
271 static const struct rt6_info ip6_null_entry_template = {
272         .dst = {
273                 .__refcnt       = ATOMIC_INIT(1),
274                 .__use          = 1,
275                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
276                 .error          = -ENETUNREACH,
277                 .input          = ip6_pkt_discard,
278                 .output         = ip6_pkt_discard_out,
279         },
280         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
281         .rt6i_protocol  = RTPROT_KERNEL,
282         .rt6i_metric    = ~(u32) 0,
283         .rt6i_ref       = ATOMIC_INIT(1),
284 };
285
286 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
287
288 static const struct rt6_info ip6_prohibit_entry_template = {
289         .dst = {
290                 .__refcnt       = ATOMIC_INIT(1),
291                 .__use          = 1,
292                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
293                 .error          = -EACCES,
294                 .input          = ip6_pkt_prohibit,
295                 .output         = ip6_pkt_prohibit_out,
296         },
297         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
298         .rt6i_protocol  = RTPROT_KERNEL,
299         .rt6i_metric    = ~(u32) 0,
300         .rt6i_ref       = ATOMIC_INIT(1),
301 };
302
303 static const struct rt6_info ip6_blk_hole_entry_template = {
304         .dst = {
305                 .__refcnt       = ATOMIC_INIT(1),
306                 .__use          = 1,
307                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
308                 .error          = -EINVAL,
309                 .input          = dst_discard,
310                 .output         = dst_discard_out,
311         },
312         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
313         .rt6i_protocol  = RTPROT_KERNEL,
314         .rt6i_metric    = ~(u32) 0,
315         .rt6i_ref       = ATOMIC_INIT(1),
316 };
317
318 #endif
319
320 static void rt6_info_init(struct rt6_info *rt)
321 {
322         struct dst_entry *dst = &rt->dst;
323
324         memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
325         INIT_LIST_HEAD(&rt->rt6i_siblings);
326         INIT_LIST_HEAD(&rt->rt6i_uncached);
327 }
328
329 /* allocate dst with ip6_dst_ops */
330 static struct rt6_info *__ip6_dst_alloc(struct net *net,
331                                         struct net_device *dev,
332                                         int flags)
333 {
334         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
335                                         0, DST_OBSOLETE_FORCE_CHK, flags);
336
337         if (rt)
338                 rt6_info_init(rt);
339
340         return rt;
341 }
342
343 struct rt6_info *ip6_dst_alloc(struct net *net,
344                                struct net_device *dev,
345                                int flags)
346 {
347         struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
348
349         if (rt) {
350                 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
351                 if (rt->rt6i_pcpu) {
352                         int cpu;
353
354                         for_each_possible_cpu(cpu) {
355                                 struct rt6_info **p;
356
357                                 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
358                                 /* no one shares rt */
359                                 *p =  NULL;
360                         }
361                 } else {
362                         dst_destroy((struct dst_entry *)rt);
363                         return NULL;
364                 }
365         }
366
367         return rt;
368 }
369 EXPORT_SYMBOL(ip6_dst_alloc);
370
371 static void ip6_dst_destroy(struct dst_entry *dst)
372 {
373         struct rt6_info *rt = (struct rt6_info *)dst;
374         struct dst_entry *from = dst->from;
375         struct inet6_dev *idev;
376
377         dst_destroy_metrics_generic(dst);
378         free_percpu(rt->rt6i_pcpu);
379         rt6_uncached_list_del(rt);
380
381         idev = rt->rt6i_idev;
382         if (idev) {
383                 rt->rt6i_idev = NULL;
384                 in6_dev_put(idev);
385         }
386
387         dst->from = NULL;
388         dst_release(from);
389 }
390
391 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
392                            int how)
393 {
394         struct rt6_info *rt = (struct rt6_info *)dst;
395         struct inet6_dev *idev = rt->rt6i_idev;
396         struct net_device *loopback_dev =
397                 dev_net(dev)->loopback_dev;
398
399         if (dev != loopback_dev) {
400                 if (idev && idev->dev == dev) {
401                         struct inet6_dev *loopback_idev =
402                                 in6_dev_get(loopback_dev);
403                         if (loopback_idev) {
404                                 rt->rt6i_idev = loopback_idev;
405                                 in6_dev_put(idev);
406                         }
407                 }
408         }
409 }
410
411 static bool __rt6_check_expired(const struct rt6_info *rt)
412 {
413         if (rt->rt6i_flags & RTF_EXPIRES)
414                 return time_after(jiffies, rt->dst.expires);
415         else
416                 return false;
417 }
418
419 static bool rt6_check_expired(const struct rt6_info *rt)
420 {
421         if (rt->rt6i_flags & RTF_EXPIRES) {
422                 if (time_after(jiffies, rt->dst.expires))
423                         return true;
424         } else if (rt->dst.from) {
425                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
426         }
427         return false;
428 }
429
430 /* Multipath route selection:
431  *   Hash based function using packet header and flowlabel.
432  * Adapted from fib_info_hashfn()
433  */
434 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
435                                const struct flowi6 *fl6)
436 {
437         return get_hash_from_flowi6(fl6) % candidate_count;
438 }
439
440 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
441                                              struct flowi6 *fl6, int oif,
442                                              int strict)
443 {
444         struct rt6_info *sibling, *next_sibling;
445         int route_choosen;
446
447         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
448         /* Don't change the route, if route_choosen == 0
449          * (siblings does not include ourself)
450          */
451         if (route_choosen)
452                 list_for_each_entry_safe(sibling, next_sibling,
453                                 &match->rt6i_siblings, rt6i_siblings) {
454                         route_choosen--;
455                         if (route_choosen == 0) {
456                                 if (rt6_score_route(sibling, oif, strict) < 0)
457                                         break;
458                                 match = sibling;
459                                 break;
460                         }
461                 }
462         return match;
463 }
464
465 /*
466  *      Route lookup. Any table->tb6_lock is implied.
467  */
468
469 static inline struct rt6_info *rt6_device_match(struct net *net,
470                                                     struct rt6_info *rt,
471                                                     const struct in6_addr *saddr,
472                                                     int oif,
473                                                     int flags)
474 {
475         struct rt6_info *local = NULL;
476         struct rt6_info *sprt;
477
478         if (!oif && ipv6_addr_any(saddr))
479                 goto out;
480
481         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
482                 struct net_device *dev = sprt->dst.dev;
483
484                 if (oif) {
485                         if (dev->ifindex == oif)
486                                 return sprt;
487                         if (dev->flags & IFF_LOOPBACK) {
488                                 if (!sprt->rt6i_idev ||
489                                     sprt->rt6i_idev->dev->ifindex != oif) {
490                                         if (flags & RT6_LOOKUP_F_IFACE)
491                                                 continue;
492                                         if (local &&
493                                             local->rt6i_idev->dev->ifindex == oif)
494                                                 continue;
495                                 }
496                                 local = sprt;
497                         }
498                 } else {
499                         if (ipv6_chk_addr(net, saddr, dev,
500                                           flags & RT6_LOOKUP_F_IFACE))
501                                 return sprt;
502                 }
503         }
504
505         if (oif) {
506                 if (local)
507                         return local;
508
509                 if (flags & RT6_LOOKUP_F_IFACE)
510                         return net->ipv6.ip6_null_entry;
511         }
512 out:
513         return rt;
514 }
515
516 #ifdef CONFIG_IPV6_ROUTER_PREF
517 struct __rt6_probe_work {
518         struct work_struct work;
519         struct in6_addr target;
520         struct net_device *dev;
521 };
522
523 static void rt6_probe_deferred(struct work_struct *w)
524 {
525         struct in6_addr mcaddr;
526         struct __rt6_probe_work *work =
527                 container_of(w, struct __rt6_probe_work, work);
528
529         addrconf_addr_solict_mult(&work->target, &mcaddr);
530         ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
531         dev_put(work->dev);
532         kfree(work);
533 }
534
535 static void rt6_probe(struct rt6_info *rt)
536 {
537         struct __rt6_probe_work *work;
538         struct neighbour *neigh;
539         /*
540          * Okay, this does not seem to be appropriate
541          * for now, however, we need to check if it
542          * is really so; aka Router Reachability Probing.
543          *
544          * Router Reachability Probe MUST be rate-limited
545          * to no more than one per minute.
546          */
547         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
548                 return;
549         rcu_read_lock_bh();
550         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
551         if (neigh) {
552                 if (neigh->nud_state & NUD_VALID)
553                         goto out;
554
555                 work = NULL;
556                 write_lock(&neigh->lock);
557                 if (!(neigh->nud_state & NUD_VALID) &&
558                     time_after(jiffies,
559                                neigh->updated +
560                                rt->rt6i_idev->cnf.rtr_probe_interval)) {
561                         work = kmalloc(sizeof(*work), GFP_ATOMIC);
562                         if (work)
563                                 __neigh_set_probe_once(neigh);
564                 }
565                 write_unlock(&neigh->lock);
566         } else {
567                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
568         }
569
570         if (work) {
571                 INIT_WORK(&work->work, rt6_probe_deferred);
572                 work->target = rt->rt6i_gateway;
573                 dev_hold(rt->dst.dev);
574                 work->dev = rt->dst.dev;
575                 schedule_work(&work->work);
576         }
577
578 out:
579         rcu_read_unlock_bh();
580 }
581 #else
582 static inline void rt6_probe(struct rt6_info *rt)
583 {
584 }
585 #endif
586
587 /*
588  * Default Router Selection (RFC 2461 6.3.6)
589  */
590 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
591 {
592         struct net_device *dev = rt->dst.dev;
593         if (!oif || dev->ifindex == oif)
594                 return 2;
595         if ((dev->flags & IFF_LOOPBACK) &&
596             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
597                 return 1;
598         return 0;
599 }
600
601 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
602 {
603         struct neighbour *neigh;
604         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
605
606         if (rt->rt6i_flags & RTF_NONEXTHOP ||
607             !(rt->rt6i_flags & RTF_GATEWAY))
608                 return RT6_NUD_SUCCEED;
609
610         rcu_read_lock_bh();
611         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
612         if (neigh) {
613                 read_lock(&neigh->lock);
614                 if (neigh->nud_state & NUD_VALID)
615                         ret = RT6_NUD_SUCCEED;
616 #ifdef CONFIG_IPV6_ROUTER_PREF
617                 else if (!(neigh->nud_state & NUD_FAILED))
618                         ret = RT6_NUD_SUCCEED;
619                 else
620                         ret = RT6_NUD_FAIL_PROBE;
621 #endif
622                 read_unlock(&neigh->lock);
623         } else {
624                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
625                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
626         }
627         rcu_read_unlock_bh();
628
629         return ret;
630 }
631
632 static int rt6_score_route(struct rt6_info *rt, int oif,
633                            int strict)
634 {
635         int m;
636
637         m = rt6_check_dev(rt, oif);
638         if (!m && (strict & RT6_LOOKUP_F_IFACE))
639                 return RT6_NUD_FAIL_HARD;
640 #ifdef CONFIG_IPV6_ROUTER_PREF
641         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
642 #endif
643         if (strict & RT6_LOOKUP_F_REACHABLE) {
644                 int n = rt6_check_neigh(rt);
645                 if (n < 0)
646                         return n;
647         }
648         return m;
649 }
650
651 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
652                                    int *mpri, struct rt6_info *match,
653                                    bool *do_rr)
654 {
655         int m;
656         bool match_do_rr = false;
657         struct inet6_dev *idev = rt->rt6i_idev;
658         struct net_device *dev = rt->dst.dev;
659
660         if (dev && !netif_carrier_ok(dev) &&
661             idev->cnf.ignore_routes_with_linkdown &&
662             !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
663                 goto out;
664
665         if (rt6_check_expired(rt))
666                 goto out;
667
668         m = rt6_score_route(rt, oif, strict);
669         if (m == RT6_NUD_FAIL_DO_RR) {
670                 match_do_rr = true;
671                 m = 0; /* lowest valid score */
672         } else if (m == RT6_NUD_FAIL_HARD) {
673                 goto out;
674         }
675
676         if (strict & RT6_LOOKUP_F_REACHABLE)
677                 rt6_probe(rt);
678
679         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
680         if (m > *mpri) {
681                 *do_rr = match_do_rr;
682                 *mpri = m;
683                 match = rt;
684         }
685 out:
686         return match;
687 }
688
689 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
690                                      struct rt6_info *rr_head,
691                                      u32 metric, int oif, int strict,
692                                      bool *do_rr)
693 {
694         struct rt6_info *rt, *match, *cont;
695         int mpri = -1;
696
697         match = NULL;
698         cont = NULL;
699         for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
700                 if (rt->rt6i_metric != metric) {
701                         cont = rt;
702                         break;
703                 }
704
705                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
706         }
707
708         for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
709                 if (rt->rt6i_metric != metric) {
710                         cont = rt;
711                         break;
712                 }
713
714                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
715         }
716
717         if (match || !cont)
718                 return match;
719
720         for (rt = cont; rt; rt = rt->dst.rt6_next)
721                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
722
723         return match;
724 }
725
726 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
727 {
728         struct rt6_info *match, *rt0;
729         struct net *net;
730         bool do_rr = false;
731
732         rt0 = fn->rr_ptr;
733         if (!rt0)
734                 fn->rr_ptr = rt0 = fn->leaf;
735
736         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
737                              &do_rr);
738
739         if (do_rr) {
740                 struct rt6_info *next = rt0->dst.rt6_next;
741
742                 /* no entries matched; do round-robin */
743                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
744                         next = fn->leaf;
745
746                 if (next != rt0)
747                         fn->rr_ptr = next;
748         }
749
750         net = dev_net(rt0->dst.dev);
751         return match ? match : net->ipv6.ip6_null_entry;
752 }
753
754 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
755 {
756         return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
757 }
758
759 #ifdef CONFIG_IPV6_ROUTE_INFO
760 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
761                   const struct in6_addr *gwaddr)
762 {
763         struct net *net = dev_net(dev);
764         struct route_info *rinfo = (struct route_info *) opt;
765         struct in6_addr prefix_buf, *prefix;
766         unsigned int pref;
767         unsigned long lifetime;
768         struct rt6_info *rt;
769
770         if (len < sizeof(struct route_info)) {
771                 return -EINVAL;
772         }
773
774         /* Sanity check for prefix_len and length */
775         if (rinfo->length > 3) {
776                 return -EINVAL;
777         } else if (rinfo->prefix_len > 128) {
778                 return -EINVAL;
779         } else if (rinfo->prefix_len > 64) {
780                 if (rinfo->length < 2) {
781                         return -EINVAL;
782                 }
783         } else if (rinfo->prefix_len > 0) {
784                 if (rinfo->length < 1) {
785                         return -EINVAL;
786                 }
787         }
788
789         pref = rinfo->route_pref;
790         if (pref == ICMPV6_ROUTER_PREF_INVALID)
791                 return -EINVAL;
792
793         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
794
795         if (rinfo->length == 3)
796                 prefix = (struct in6_addr *)rinfo->prefix;
797         else {
798                 /* this function is safe */
799                 ipv6_addr_prefix(&prefix_buf,
800                                  (struct in6_addr *)rinfo->prefix,
801                                  rinfo->prefix_len);
802                 prefix = &prefix_buf;
803         }
804
805         if (rinfo->prefix_len == 0)
806                 rt = rt6_get_dflt_router(gwaddr, dev);
807         else
808                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
809                                         gwaddr, dev);
810
811         if (rt && !lifetime) {
812                 ip6_del_rt(rt);
813                 rt = NULL;
814         }
815
816         if (!rt && lifetime)
817                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
818                                         dev, pref);
819         else if (rt)
820                 rt->rt6i_flags = RTF_ROUTEINFO |
821                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
822
823         if (rt) {
824                 if (!addrconf_finite_timeout(lifetime))
825                         rt6_clean_expires(rt);
826                 else
827                         rt6_set_expires(rt, jiffies + HZ * lifetime);
828
829                 ip6_rt_put(rt);
830         }
831         return 0;
832 }
833 #endif
834
835 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
836                                         struct in6_addr *saddr)
837 {
838         struct fib6_node *pn;
839         while (1) {
840                 if (fn->fn_flags & RTN_TL_ROOT)
841                         return NULL;
842                 pn = fn->parent;
843                 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
844                         fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
845                 else
846                         fn = pn;
847                 if (fn->fn_flags & RTN_RTINFO)
848                         return fn;
849         }
850 }
851
852 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
853                                              struct fib6_table *table,
854                                              struct flowi6 *fl6, int flags)
855 {
856         struct fib6_node *fn;
857         struct rt6_info *rt;
858
859         read_lock_bh(&table->tb6_lock);
860         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
861 restart:
862         rt = fn->leaf;
863         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
864         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
865                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
866         if (rt == net->ipv6.ip6_null_entry) {
867                 fn = fib6_backtrack(fn, &fl6->saddr);
868                 if (fn)
869                         goto restart;
870         }
871         dst_use(&rt->dst, jiffies);
872         read_unlock_bh(&table->tb6_lock);
873
874         trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
875
876         return rt;
877
878 }
879
880 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
881                                     int flags)
882 {
883         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
884 }
885 EXPORT_SYMBOL_GPL(ip6_route_lookup);
886
887 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
888                             const struct in6_addr *saddr, int oif, int strict)
889 {
890         struct flowi6 fl6 = {
891                 .flowi6_oif = oif,
892                 .daddr = *daddr,
893         };
894         struct dst_entry *dst;
895         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
896
897         if (saddr) {
898                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
899                 flags |= RT6_LOOKUP_F_HAS_SADDR;
900         }
901
902         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
903         if (dst->error == 0)
904                 return (struct rt6_info *) dst;
905
906         dst_release(dst);
907
908         return NULL;
909 }
910 EXPORT_SYMBOL(rt6_lookup);
911
912 /* ip6_ins_rt is called with FREE table->tb6_lock.
913    It takes new route entry, the addition fails by any reason the
914    route is freed. In any case, if caller does not hold it, it may
915    be destroyed.
916  */
917
918 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
919                         struct mx6_config *mxc)
920 {
921         int err;
922         struct fib6_table *table;
923
924         table = rt->rt6i_table;
925         write_lock_bh(&table->tb6_lock);
926         err = fib6_add(&table->tb6_root, rt, info, mxc);
927         write_unlock_bh(&table->tb6_lock);
928
929         return err;
930 }
931
932 int ip6_ins_rt(struct rt6_info *rt)
933 {
934         struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
935         struct mx6_config mxc = { .mx = NULL, };
936
937         return __ip6_ins_rt(rt, &info, &mxc);
938 }
939
940 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
941                                            const struct in6_addr *daddr,
942                                            const struct in6_addr *saddr)
943 {
944         struct rt6_info *rt;
945
946         /*
947          *      Clone the route.
948          */
949
950         if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
951                 ort = (struct rt6_info *)ort->dst.from;
952
953         rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
954
955         if (!rt)
956                 return NULL;
957
958         ip6_rt_copy_init(rt, ort);
959         rt->rt6i_flags |= RTF_CACHE;
960         rt->rt6i_metric = 0;
961         rt->dst.flags |= DST_HOST;
962         rt->rt6i_dst.addr = *daddr;
963         rt->rt6i_dst.plen = 128;
964
965         if (!rt6_is_gw_or_nonexthop(ort)) {
966                 if (ort->rt6i_dst.plen != 128 &&
967                     ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
968                         rt->rt6i_flags |= RTF_ANYCAST;
969 #ifdef CONFIG_IPV6_SUBTREES
970                 if (rt->rt6i_src.plen && saddr) {
971                         rt->rt6i_src.addr = *saddr;
972                         rt->rt6i_src.plen = 128;
973                 }
974 #endif
975         }
976
977         return rt;
978 }
979
980 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
981 {
982         struct rt6_info *pcpu_rt;
983
984         pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
985                                   rt->dst.dev, rt->dst.flags);
986
987         if (!pcpu_rt)
988                 return NULL;
989         ip6_rt_copy_init(pcpu_rt, rt);
990         pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
991         pcpu_rt->rt6i_flags |= RTF_PCPU;
992         return pcpu_rt;
993 }
994
995 /* It should be called with read_lock_bh(&tb6_lock) acquired */
996 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
997 {
998         struct rt6_info *pcpu_rt, **p;
999
1000         p = this_cpu_ptr(rt->rt6i_pcpu);
1001         pcpu_rt = *p;
1002
1003         if (pcpu_rt) {
1004                 dst_hold(&pcpu_rt->dst);
1005                 rt6_dst_from_metrics_check(pcpu_rt);
1006         }
1007         return pcpu_rt;
1008 }
1009
1010 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1011 {
1012         struct fib6_table *table = rt->rt6i_table;
1013         struct rt6_info *pcpu_rt, *prev, **p;
1014
1015         pcpu_rt = ip6_rt_pcpu_alloc(rt);
1016         if (!pcpu_rt) {
1017                 struct net *net = dev_net(rt->dst.dev);
1018
1019                 dst_hold(&net->ipv6.ip6_null_entry->dst);
1020                 return net->ipv6.ip6_null_entry;
1021         }
1022
1023         read_lock_bh(&table->tb6_lock);
1024         if (rt->rt6i_pcpu) {
1025                 p = this_cpu_ptr(rt->rt6i_pcpu);
1026                 prev = cmpxchg(p, NULL, pcpu_rt);
1027                 if (prev) {
1028                         /* If someone did it before us, return prev instead */
1029                         dst_destroy(&pcpu_rt->dst);
1030                         pcpu_rt = prev;
1031                 }
1032         } else {
1033                 /* rt has been removed from the fib6 tree
1034                  * before we have a chance to acquire the read_lock.
1035                  * In this case, don't brother to create a pcpu rt
1036                  * since rt is going away anyway.  The next
1037                  * dst_check() will trigger a re-lookup.
1038                  */
1039                 dst_destroy(&pcpu_rt->dst);
1040                 pcpu_rt = rt;
1041         }
1042         dst_hold(&pcpu_rt->dst);
1043         rt6_dst_from_metrics_check(pcpu_rt);
1044         read_unlock_bh(&table->tb6_lock);
1045         return pcpu_rt;
1046 }
1047
1048 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1049                                int oif, struct flowi6 *fl6, int flags)
1050 {
1051         struct fib6_node *fn, *saved_fn;
1052         struct rt6_info *rt;
1053         int strict = 0;
1054
1055         strict |= flags & RT6_LOOKUP_F_IFACE;
1056         strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1057         if (net->ipv6.devconf_all->forwarding == 0)
1058                 strict |= RT6_LOOKUP_F_REACHABLE;
1059
1060         read_lock_bh(&table->tb6_lock);
1061
1062         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1063         saved_fn = fn;
1064
1065         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1066                 oif = 0;
1067
1068 redo_rt6_select:
1069         rt = rt6_select(fn, oif, strict);
1070         if (rt->rt6i_nsiblings)
1071                 rt = rt6_multipath_select(rt, fl6, oif, strict);
1072         if (rt == net->ipv6.ip6_null_entry) {
1073                 fn = fib6_backtrack(fn, &fl6->saddr);
1074                 if (fn)
1075                         goto redo_rt6_select;
1076                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1077                         /* also consider unreachable route */
1078                         strict &= ~RT6_LOOKUP_F_REACHABLE;
1079                         fn = saved_fn;
1080                         goto redo_rt6_select;
1081                 }
1082         }
1083
1084
1085         if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1086                 dst_use(&rt->dst, jiffies);
1087                 read_unlock_bh(&table->tb6_lock);
1088
1089                 rt6_dst_from_metrics_check(rt);
1090
1091                 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1092                 return rt;
1093         } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1094                             !(rt->rt6i_flags & RTF_GATEWAY))) {
1095                 /* Create a RTF_CACHE clone which will not be
1096                  * owned by the fib6 tree.  It is for the special case where
1097                  * the daddr in the skb during the neighbor look-up is different
1098                  * from the fl6->daddr used to look-up route here.
1099                  */
1100
1101                 struct rt6_info *uncached_rt;
1102
1103                 dst_use(&rt->dst, jiffies);
1104                 read_unlock_bh(&table->tb6_lock);
1105
1106                 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1107                 dst_release(&rt->dst);
1108
1109                 if (uncached_rt)
1110                         rt6_uncached_list_add(uncached_rt);
1111                 else
1112                         uncached_rt = net->ipv6.ip6_null_entry;
1113
1114                 dst_hold(&uncached_rt->dst);
1115
1116                 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1117                 return uncached_rt;
1118
1119         } else {
1120                 /* Get a percpu copy */
1121
1122                 struct rt6_info *pcpu_rt;
1123
1124                 rt->dst.lastuse = jiffies;
1125                 rt->dst.__use++;
1126                 pcpu_rt = rt6_get_pcpu_route(rt);
1127
1128                 if (pcpu_rt) {
1129                         read_unlock_bh(&table->tb6_lock);
1130                 } else {
1131                         /* We have to do the read_unlock first
1132                          * because rt6_make_pcpu_route() may trigger
1133                          * ip6_dst_gc() which will take the write_lock.
1134                          */
1135                         dst_hold(&rt->dst);
1136                         read_unlock_bh(&table->tb6_lock);
1137                         pcpu_rt = rt6_make_pcpu_route(rt);
1138                         dst_release(&rt->dst);
1139                 }
1140
1141                 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1142                 return pcpu_rt;
1143
1144         }
1145 }
1146 EXPORT_SYMBOL_GPL(ip6_pol_route);
1147
1148 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1149                                             struct flowi6 *fl6, int flags)
1150 {
1151         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1152 }
1153
1154 struct dst_entry *ip6_route_input_lookup(struct net *net,
1155                                          struct net_device *dev,
1156                                          struct flowi6 *fl6, int flags)
1157 {
1158         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1159                 flags |= RT6_LOOKUP_F_IFACE;
1160
1161         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1162 }
1163 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1164
1165 void ip6_route_input(struct sk_buff *skb)
1166 {
1167         const struct ipv6hdr *iph = ipv6_hdr(skb);
1168         struct net *net = dev_net(skb->dev);
1169         int flags = RT6_LOOKUP_F_HAS_SADDR;
1170         struct ip_tunnel_info *tun_info;
1171         struct flowi6 fl6 = {
1172                 .flowi6_iif = skb->dev->ifindex,
1173                 .daddr = iph->daddr,
1174                 .saddr = iph->saddr,
1175                 .flowlabel = ip6_flowinfo(iph),
1176                 .flowi6_mark = skb->mark,
1177                 .flowi6_proto = iph->nexthdr,
1178         };
1179
1180         tun_info = skb_tunnel_info(skb);
1181         if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1182                 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1183         skb_dst_drop(skb);
1184         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1185 }
1186
1187 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1188                                              struct flowi6 *fl6, int flags)
1189 {
1190         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1191 }
1192
1193 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1194                                          struct flowi6 *fl6, int flags)
1195 {
1196         bool any_src;
1197
1198         if (rt6_need_strict(&fl6->daddr)) {
1199                 struct dst_entry *dst;
1200
1201                 dst = l3mdev_link_scope_lookup(net, fl6);
1202                 if (dst)
1203                         return dst;
1204         }
1205
1206         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1207
1208         any_src = ipv6_addr_any(&fl6->saddr);
1209         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1210             (fl6->flowi6_oif && any_src))
1211                 flags |= RT6_LOOKUP_F_IFACE;
1212
1213         if (!any_src)
1214                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1215         else if (sk)
1216                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1217
1218         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1219 }
1220 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1221
1222 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1223 {
1224         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1225         struct dst_entry *new = NULL;
1226
1227         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1228         if (rt) {
1229                 rt6_info_init(rt);
1230
1231                 new = &rt->dst;
1232                 new->__use = 1;
1233                 new->input = dst_discard;
1234                 new->output = dst_discard_out;
1235
1236                 dst_copy_metrics(new, &ort->dst);
1237                 rt->rt6i_idev = ort->rt6i_idev;
1238                 if (rt->rt6i_idev)
1239                         in6_dev_hold(rt->rt6i_idev);
1240
1241                 rt->rt6i_gateway = ort->rt6i_gateway;
1242                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1243                 rt->rt6i_metric = 0;
1244
1245                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1246 #ifdef CONFIG_IPV6_SUBTREES
1247                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1248 #endif
1249
1250                 dst_free(new);
1251         }
1252
1253         dst_release(dst_orig);
1254         return new ? new : ERR_PTR(-ENOMEM);
1255 }
1256
1257 /*
1258  *      Destination cache support functions
1259  */
1260
1261 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1262 {
1263         if (rt->dst.from &&
1264             dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1265                 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1266 }
1267
1268 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1269 {
1270         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1271                 return NULL;
1272
1273         if (rt6_check_expired(rt))
1274                 return NULL;
1275
1276         return &rt->dst;
1277 }
1278
1279 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1280 {
1281         if (!__rt6_check_expired(rt) &&
1282             rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1283             rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1284                 return &rt->dst;
1285         else
1286                 return NULL;
1287 }
1288
1289 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1290 {
1291         struct rt6_info *rt;
1292
1293         rt = (struct rt6_info *) dst;
1294
1295         /* All IPV6 dsts are created with ->obsolete set to the value
1296          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1297          * into this function always.
1298          */
1299
1300         rt6_dst_from_metrics_check(rt);
1301
1302         if (rt->rt6i_flags & RTF_PCPU ||
1303             (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1304                 return rt6_dst_from_check(rt, cookie);
1305         else
1306                 return rt6_check(rt, cookie);
1307 }
1308
1309 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1310 {
1311         struct rt6_info *rt = (struct rt6_info *) dst;
1312
1313         if (rt) {
1314                 if (rt->rt6i_flags & RTF_CACHE) {
1315                         if (rt6_check_expired(rt)) {
1316                                 ip6_del_rt(rt);
1317                                 dst = NULL;
1318                         }
1319                 } else {
1320                         dst_release(dst);
1321                         dst = NULL;
1322                 }
1323         }
1324         return dst;
1325 }
1326
1327 static void ip6_link_failure(struct sk_buff *skb)
1328 {
1329         struct rt6_info *rt;
1330
1331         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1332
1333         rt = (struct rt6_info *) skb_dst(skb);
1334         if (rt) {
1335                 if (rt->rt6i_flags & RTF_CACHE) {
1336                         dst_hold(&rt->dst);
1337                         ip6_del_rt(rt);
1338                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1339                         rt->rt6i_node->fn_sernum = -1;
1340                 }
1341         }
1342 }
1343
1344 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1345 {
1346         struct net *net = dev_net(rt->dst.dev);
1347
1348         rt->rt6i_flags |= RTF_MODIFIED;
1349         rt->rt6i_pmtu = mtu;
1350         rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1351 }
1352
1353 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1354 {
1355         return !(rt->rt6i_flags & RTF_CACHE) &&
1356                 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1357 }
1358
1359 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1360                                  const struct ipv6hdr *iph, u32 mtu)
1361 {
1362         struct rt6_info *rt6 = (struct rt6_info *)dst;
1363
1364         if (rt6->rt6i_flags & RTF_LOCAL)
1365                 return;
1366
1367         dst_confirm(dst);
1368         mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1369         if (mtu >= dst_mtu(dst))
1370                 return;
1371
1372         if (!rt6_cache_allowed_for_pmtu(rt6)) {
1373                 rt6_do_update_pmtu(rt6, mtu);
1374         } else {
1375                 const struct in6_addr *daddr, *saddr;
1376                 struct rt6_info *nrt6;
1377
1378                 if (iph) {
1379                         daddr = &iph->daddr;
1380                         saddr = &iph->saddr;
1381                 } else if (sk) {
1382                         daddr = &sk->sk_v6_daddr;
1383                         saddr = &inet6_sk(sk)->saddr;
1384                 } else {
1385                         return;
1386                 }
1387                 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1388                 if (nrt6) {
1389                         rt6_do_update_pmtu(nrt6, mtu);
1390
1391                         /* ip6_ins_rt(nrt6) will bump the
1392                          * rt6->rt6i_node->fn_sernum
1393                          * which will fail the next rt6_check() and
1394                          * invalidate the sk->sk_dst_cache.
1395                          */
1396                         ip6_ins_rt(nrt6);
1397                 }
1398         }
1399 }
1400
1401 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1402                                struct sk_buff *skb, u32 mtu)
1403 {
1404         __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1405 }
1406
1407 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1408                      int oif, u32 mark, kuid_t uid)
1409 {
1410         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1411         struct dst_entry *dst;
1412         struct flowi6 fl6;
1413
1414         memset(&fl6, 0, sizeof(fl6));
1415         fl6.flowi6_oif = oif;
1416         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1417         fl6.daddr = iph->daddr;
1418         fl6.saddr = iph->saddr;
1419         fl6.flowlabel = ip6_flowinfo(iph);
1420         fl6.flowi6_uid = uid;
1421
1422         dst = ip6_route_output(net, NULL, &fl6);
1423         if (!dst->error)
1424                 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1425         dst_release(dst);
1426 }
1427 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1428
1429 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1430 {
1431         struct dst_entry *dst;
1432
1433         ip6_update_pmtu(skb, sock_net(sk), mtu,
1434                         sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
1435
1436         dst = __sk_dst_get(sk);
1437         if (!dst || !dst->obsolete ||
1438             dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1439                 return;
1440
1441         bh_lock_sock(sk);
1442         if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1443                 ip6_datagram_dst_update(sk, false);
1444         bh_unlock_sock(sk);
1445 }
1446 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1447
1448 /* Handle redirects */
1449 struct ip6rd_flowi {
1450         struct flowi6 fl6;
1451         struct in6_addr gateway;
1452 };
1453
1454 static struct rt6_info *__ip6_route_redirect(struct net *net,
1455                                              struct fib6_table *table,
1456                                              struct flowi6 *fl6,
1457                                              int flags)
1458 {
1459         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1460         struct rt6_info *rt;
1461         struct fib6_node *fn;
1462
1463         /* Get the "current" route for this destination and
1464          * check if the redirect has come from approriate router.
1465          *
1466          * RFC 4861 specifies that redirects should only be
1467          * accepted if they come from the nexthop to the target.
1468          * Due to the way the routes are chosen, this notion
1469          * is a bit fuzzy and one might need to check all possible
1470          * routes.
1471          */
1472
1473         read_lock_bh(&table->tb6_lock);
1474         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1475 restart:
1476         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1477                 if (rt6_check_expired(rt))
1478                         continue;
1479                 if (rt->dst.error)
1480                         break;
1481                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1482                         continue;
1483                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1484                         continue;
1485                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1486                         continue;
1487                 break;
1488         }
1489
1490         if (!rt)
1491                 rt = net->ipv6.ip6_null_entry;
1492         else if (rt->dst.error) {
1493                 rt = net->ipv6.ip6_null_entry;
1494                 goto out;
1495         }
1496
1497         if (rt == net->ipv6.ip6_null_entry) {
1498                 fn = fib6_backtrack(fn, &fl6->saddr);
1499                 if (fn)
1500                         goto restart;
1501         }
1502
1503 out:
1504         dst_hold(&rt->dst);
1505
1506         read_unlock_bh(&table->tb6_lock);
1507
1508         trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1509         return rt;
1510 };
1511
1512 static struct dst_entry *ip6_route_redirect(struct net *net,
1513                                         const struct flowi6 *fl6,
1514                                         const struct in6_addr *gateway)
1515 {
1516         int flags = RT6_LOOKUP_F_HAS_SADDR;
1517         struct ip6rd_flowi rdfl;
1518
1519         rdfl.fl6 = *fl6;
1520         rdfl.gateway = *gateway;
1521
1522         return fib6_rule_lookup(net, &rdfl.fl6,
1523                                 flags, __ip6_route_redirect);
1524 }
1525
1526 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1527                   kuid_t uid)
1528 {
1529         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1530         struct dst_entry *dst;
1531         struct flowi6 fl6;
1532
1533         memset(&fl6, 0, sizeof(fl6));
1534         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1535         fl6.flowi6_oif = oif;
1536         fl6.flowi6_mark = mark;
1537         fl6.daddr = iph->daddr;
1538         fl6.saddr = iph->saddr;
1539         fl6.flowlabel = ip6_flowinfo(iph);
1540         fl6.flowi6_uid = uid;
1541
1542         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1543         rt6_do_redirect(dst, NULL, skb);
1544         dst_release(dst);
1545 }
1546 EXPORT_SYMBOL_GPL(ip6_redirect);
1547
1548 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1549                             u32 mark)
1550 {
1551         const struct ipv6hdr *iph = ipv6_hdr(skb);
1552         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1553         struct dst_entry *dst;
1554         struct flowi6 fl6;
1555
1556         memset(&fl6, 0, sizeof(fl6));
1557         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1558         fl6.flowi6_oif = oif;
1559         fl6.flowi6_mark = mark;
1560         fl6.daddr = msg->dest;
1561         fl6.saddr = iph->daddr;
1562         fl6.flowi6_uid = sock_net_uid(net, NULL);
1563
1564         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1565         rt6_do_redirect(dst, NULL, skb);
1566         dst_release(dst);
1567 }
1568
1569 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1570 {
1571         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1572                      sk->sk_uid);
1573 }
1574 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1575
1576 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1577 {
1578         struct net_device *dev = dst->dev;
1579         unsigned int mtu = dst_mtu(dst);
1580         struct net *net = dev_net(dev);
1581
1582         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1583
1584         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1585                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1586
1587         /*
1588          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1589          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1590          * IPV6_MAXPLEN is also valid and means: "any MSS,
1591          * rely only on pmtu discovery"
1592          */
1593         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1594                 mtu = IPV6_MAXPLEN;
1595         return mtu;
1596 }
1597
1598 static unsigned int ip6_mtu(const struct dst_entry *dst)
1599 {
1600         const struct rt6_info *rt = (const struct rt6_info *)dst;
1601         unsigned int mtu = rt->rt6i_pmtu;
1602         struct inet6_dev *idev;
1603
1604         if (mtu)
1605                 goto out;
1606
1607         mtu = dst_metric_raw(dst, RTAX_MTU);
1608         if (mtu)
1609                 goto out;
1610
1611         mtu = IPV6_MIN_MTU;
1612
1613         rcu_read_lock();
1614         idev = __in6_dev_get(dst->dev);
1615         if (idev)
1616                 mtu = idev->cnf.mtu6;
1617         rcu_read_unlock();
1618
1619 out:
1620         mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1621
1622         return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1623 }
1624
1625 static struct dst_entry *icmp6_dst_gc_list;
1626 static DEFINE_SPINLOCK(icmp6_dst_lock);
1627
1628 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1629                                   struct flowi6 *fl6)
1630 {
1631         struct dst_entry *dst;
1632         struct rt6_info *rt;
1633         struct inet6_dev *idev = in6_dev_get(dev);
1634         struct net *net = dev_net(dev);
1635
1636         if (unlikely(!idev))
1637                 return ERR_PTR(-ENODEV);
1638
1639         rt = ip6_dst_alloc(net, dev, 0);
1640         if (unlikely(!rt)) {
1641                 in6_dev_put(idev);
1642                 dst = ERR_PTR(-ENOMEM);
1643                 goto out;
1644         }
1645
1646         rt->dst.flags |= DST_HOST;
1647         rt->dst.output  = ip6_output;
1648         atomic_set(&rt->dst.__refcnt, 1);
1649         rt->rt6i_gateway  = fl6->daddr;
1650         rt->rt6i_dst.addr = fl6->daddr;
1651         rt->rt6i_dst.plen = 128;
1652         rt->rt6i_idev     = idev;
1653         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1654
1655         spin_lock_bh(&icmp6_dst_lock);
1656         rt->dst.next = icmp6_dst_gc_list;
1657         icmp6_dst_gc_list = &rt->dst;
1658         spin_unlock_bh(&icmp6_dst_lock);
1659
1660         fib6_force_start_gc(net);
1661
1662         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1663
1664 out:
1665         return dst;
1666 }
1667
1668 int icmp6_dst_gc(void)
1669 {
1670         struct dst_entry *dst, **pprev;
1671         int more = 0;
1672
1673         spin_lock_bh(&icmp6_dst_lock);
1674         pprev = &icmp6_dst_gc_list;
1675
1676         while ((dst = *pprev) != NULL) {
1677                 if (!atomic_read(&dst->__refcnt)) {
1678                         *pprev = dst->next;
1679                         dst_free(dst);
1680                 } else {
1681                         pprev = &dst->next;
1682                         ++more;
1683                 }
1684         }
1685
1686         spin_unlock_bh(&icmp6_dst_lock);
1687
1688         return more;
1689 }
1690
1691 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1692                             void *arg)
1693 {
1694         struct dst_entry *dst, **pprev;
1695
1696         spin_lock_bh(&icmp6_dst_lock);
1697         pprev = &icmp6_dst_gc_list;
1698         while ((dst = *pprev) != NULL) {
1699                 struct rt6_info *rt = (struct rt6_info *) dst;
1700                 if (func(rt, arg)) {
1701                         *pprev = dst->next;
1702                         dst_free(dst);
1703                 } else {
1704                         pprev = &dst->next;
1705                 }
1706         }
1707         spin_unlock_bh(&icmp6_dst_lock);
1708 }
1709
1710 static int ip6_dst_gc(struct dst_ops *ops)
1711 {
1712         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1713         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1714         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1715         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1716         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1717         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1718         int entries;
1719
1720         entries = dst_entries_get_fast(ops);
1721         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1722             entries <= rt_max_size)
1723                 goto out;
1724
1725         net->ipv6.ip6_rt_gc_expire++;
1726         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1727         entries = dst_entries_get_slow(ops);
1728         if (entries < ops->gc_thresh)
1729                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1730 out:
1731         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1732         return entries > rt_max_size;
1733 }
1734
1735 static int ip6_convert_metrics(struct mx6_config *mxc,
1736                                const struct fib6_config *cfg)
1737 {
1738         bool ecn_ca = false;
1739         struct nlattr *nla;
1740         int remaining;
1741         u32 *mp;
1742
1743         if (!cfg->fc_mx)
1744                 return 0;
1745
1746         mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1747         if (unlikely(!mp))
1748                 return -ENOMEM;
1749
1750         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1751                 int type = nla_type(nla);
1752                 u32 val;
1753
1754                 if (!type)
1755                         continue;
1756                 if (unlikely(type > RTAX_MAX))
1757                         goto err;
1758
1759                 if (type == RTAX_CC_ALGO) {
1760                         char tmp[TCP_CA_NAME_MAX];
1761
1762                         nla_strlcpy(tmp, nla, sizeof(tmp));
1763                         val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1764                         if (val == TCP_CA_UNSPEC)
1765                                 goto err;
1766                 } else {
1767                         val = nla_get_u32(nla);
1768                 }
1769                 if (type == RTAX_HOPLIMIT && val > 255)
1770                         val = 255;
1771                 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1772                         goto err;
1773
1774                 mp[type - 1] = val;
1775                 __set_bit(type - 1, mxc->mx_valid);
1776         }
1777
1778         if (ecn_ca) {
1779                 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1780                 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1781         }
1782
1783         mxc->mx = mp;
1784         return 0;
1785  err:
1786         kfree(mp);
1787         return -EINVAL;
1788 }
1789
1790 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1791                                             struct fib6_config *cfg,
1792                                             const struct in6_addr *gw_addr)
1793 {
1794         struct flowi6 fl6 = {
1795                 .flowi6_oif = cfg->fc_ifindex,
1796                 .daddr = *gw_addr,
1797                 .saddr = cfg->fc_prefsrc,
1798         };
1799         struct fib6_table *table;
1800         struct rt6_info *rt;
1801         int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
1802
1803         table = fib6_get_table(net, cfg->fc_table);
1804         if (!table)
1805                 return NULL;
1806
1807         if (!ipv6_addr_any(&cfg->fc_prefsrc))
1808                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1809
1810         rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1811
1812         /* if table lookup failed, fall back to full lookup */
1813         if (rt == net->ipv6.ip6_null_entry) {
1814                 ip6_rt_put(rt);
1815                 rt = NULL;
1816         }
1817
1818         return rt;
1819 }
1820
1821 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1822 {
1823         struct net *net = cfg->fc_nlinfo.nl_net;
1824         struct rt6_info *rt = NULL;
1825         struct net_device *dev = NULL;
1826         struct inet6_dev *idev = NULL;
1827         struct fib6_table *table;
1828         int addr_type;
1829         int err = -EINVAL;
1830
1831         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1832                 goto out;
1833 #ifndef CONFIG_IPV6_SUBTREES
1834         if (cfg->fc_src_len)
1835                 goto out;
1836 #endif
1837         if (cfg->fc_ifindex) {
1838                 err = -ENODEV;
1839                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1840                 if (!dev)
1841                         goto out;
1842                 idev = in6_dev_get(dev);
1843                 if (!idev)
1844                         goto out;
1845         }
1846
1847         if (cfg->fc_metric == 0)
1848                 cfg->fc_metric = IP6_RT_PRIO_USER;
1849
1850         err = -ENOBUFS;
1851         if (cfg->fc_nlinfo.nlh &&
1852             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1853                 table = fib6_get_table(net, cfg->fc_table);
1854                 if (!table) {
1855                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1856                         table = fib6_new_table(net, cfg->fc_table);
1857                 }
1858         } else {
1859                 table = fib6_new_table(net, cfg->fc_table);
1860         }
1861
1862         if (!table)
1863                 goto out;
1864
1865         rt = ip6_dst_alloc(net, NULL,
1866                            (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1867
1868         if (!rt) {
1869                 err = -ENOMEM;
1870                 goto out;
1871         }
1872
1873         if (cfg->fc_flags & RTF_EXPIRES)
1874                 rt6_set_expires(rt, jiffies +
1875                                 clock_t_to_jiffies(cfg->fc_expires));
1876         else
1877                 rt6_clean_expires(rt);
1878
1879         if (cfg->fc_protocol == RTPROT_UNSPEC)
1880                 cfg->fc_protocol = RTPROT_BOOT;
1881         rt->rt6i_protocol = cfg->fc_protocol;
1882
1883         addr_type = ipv6_addr_type(&cfg->fc_dst);
1884
1885         if (addr_type & IPV6_ADDR_MULTICAST)
1886                 rt->dst.input = ip6_mc_input;
1887         else if (cfg->fc_flags & RTF_LOCAL)
1888                 rt->dst.input = ip6_input;
1889         else
1890                 rt->dst.input = ip6_forward;
1891
1892         rt->dst.output = ip6_output;
1893
1894         if (cfg->fc_encap) {
1895                 struct lwtunnel_state *lwtstate;
1896
1897                 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1898                                            cfg->fc_encap, AF_INET6, cfg,
1899                                            &lwtstate);
1900                 if (err)
1901                         goto out;
1902                 rt->dst.lwtstate = lwtstate_get(lwtstate);
1903                 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1904                         rt->dst.lwtstate->orig_output = rt->dst.output;
1905                         rt->dst.output = lwtunnel_output;
1906                 }
1907                 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1908                         rt->dst.lwtstate->orig_input = rt->dst.input;
1909                         rt->dst.input = lwtunnel_input;
1910                 }
1911         }
1912
1913         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1914         rt->rt6i_dst.plen = cfg->fc_dst_len;
1915         if (rt->rt6i_dst.plen == 128)
1916                 rt->dst.flags |= DST_HOST;
1917
1918 #ifdef CONFIG_IPV6_SUBTREES
1919         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1920         rt->rt6i_src.plen = cfg->fc_src_len;
1921 #endif
1922
1923         rt->rt6i_metric = cfg->fc_metric;
1924
1925         /* We cannot add true routes via loopback here,
1926            they would result in kernel looping; promote them to reject routes
1927          */
1928         if ((cfg->fc_flags & RTF_REJECT) ||
1929             (dev && (dev->flags & IFF_LOOPBACK) &&
1930              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1931              !(cfg->fc_flags & RTF_LOCAL))) {
1932                 /* hold loopback dev/idev if we haven't done so. */
1933                 if (dev != net->loopback_dev) {
1934                         if (dev) {
1935                                 dev_put(dev);
1936                                 in6_dev_put(idev);
1937                         }
1938                         dev = net->loopback_dev;
1939                         dev_hold(dev);
1940                         idev = in6_dev_get(dev);
1941                         if (!idev) {
1942                                 err = -ENODEV;
1943                                 goto out;
1944                         }
1945                 }
1946                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1947                 switch (cfg->fc_type) {
1948                 case RTN_BLACKHOLE:
1949                         rt->dst.error = -EINVAL;
1950                         rt->dst.output = dst_discard_out;
1951                         rt->dst.input = dst_discard;
1952                         break;
1953                 case RTN_PROHIBIT:
1954                         rt->dst.error = -EACCES;
1955                         rt->dst.output = ip6_pkt_prohibit_out;
1956                         rt->dst.input = ip6_pkt_prohibit;
1957                         break;
1958                 case RTN_THROW:
1959                 case RTN_UNREACHABLE:
1960                 default:
1961                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1962                                         : (cfg->fc_type == RTN_UNREACHABLE)
1963                                         ? -EHOSTUNREACH : -ENETUNREACH;
1964                         rt->dst.output = ip6_pkt_discard_out;
1965                         rt->dst.input = ip6_pkt_discard;
1966                         break;
1967                 }
1968                 goto install_route;
1969         }
1970
1971         if (cfg->fc_flags & RTF_GATEWAY) {
1972                 const struct in6_addr *gw_addr;
1973                 int gwa_type;
1974
1975                 gw_addr = &cfg->fc_gateway;
1976                 gwa_type = ipv6_addr_type(gw_addr);
1977
1978                 /* if gw_addr is local we will fail to detect this in case
1979                  * address is still TENTATIVE (DAD in progress). rt6_lookup()
1980                  * will return already-added prefix route via interface that
1981                  * prefix route was assigned to, which might be non-loopback.
1982                  */
1983                 err = -EINVAL;
1984                 if (ipv6_chk_addr_and_flags(net, gw_addr,
1985                                             gwa_type & IPV6_ADDR_LINKLOCAL ?
1986                                             dev : NULL, 0, 0))
1987                         goto out;
1988
1989                 rt->rt6i_gateway = *gw_addr;
1990
1991                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1992                         struct rt6_info *grt = NULL;
1993
1994                         /* IPv6 strictly inhibits using not link-local
1995                            addresses as nexthop address.
1996                            Otherwise, router will not able to send redirects.
1997                            It is very good, but in some (rare!) circumstances
1998                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1999                            some exceptions. --ANK
2000                          */
2001                         if (!(gwa_type & IPV6_ADDR_UNICAST))
2002                                 goto out;
2003
2004                         if (cfg->fc_table) {
2005                                 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2006
2007                                 if (grt) {
2008                                         if (grt->rt6i_flags & RTF_GATEWAY ||
2009                                             (dev && dev != grt->dst.dev)) {
2010                                                 ip6_rt_put(grt);
2011                                                 grt = NULL;
2012                                         }
2013                                 }
2014                         }
2015
2016                         if (!grt)
2017                                 grt = rt6_lookup(net, gw_addr, NULL,
2018                                                  cfg->fc_ifindex, 1);
2019
2020                         err = -EHOSTUNREACH;
2021                         if (!grt)
2022                                 goto out;
2023                         if (dev) {
2024                                 if (dev != grt->dst.dev) {
2025                                         ip6_rt_put(grt);
2026                                         goto out;
2027                                 }
2028                         } else {
2029                                 dev = grt->dst.dev;
2030                                 idev = grt->rt6i_idev;
2031                                 dev_hold(dev);
2032                                 in6_dev_hold(grt->rt6i_idev);
2033                         }
2034                         if (!(grt->rt6i_flags & RTF_GATEWAY))
2035                                 err = 0;
2036                         ip6_rt_put(grt);
2037
2038                         if (err)
2039                                 goto out;
2040                 }
2041                 err = -EINVAL;
2042                 if (!dev || (dev->flags & IFF_LOOPBACK))
2043                         goto out;
2044         }
2045
2046         err = -ENODEV;
2047         if (!dev)
2048                 goto out;
2049
2050         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2051                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2052                         err = -EINVAL;
2053                         goto out;
2054                 }
2055                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2056                 rt->rt6i_prefsrc.plen = 128;
2057         } else
2058                 rt->rt6i_prefsrc.plen = 0;
2059
2060         rt->rt6i_flags = cfg->fc_flags;
2061
2062 install_route:
2063         rt->dst.dev = dev;
2064         rt->rt6i_idev = idev;
2065         rt->rt6i_table = table;
2066
2067         cfg->fc_nlinfo.nl_net = dev_net(dev);
2068
2069         return rt;
2070 out:
2071         if (dev)
2072                 dev_put(dev);
2073         if (idev)
2074                 in6_dev_put(idev);
2075         if (rt)
2076                 dst_free(&rt->dst);
2077
2078         return ERR_PTR(err);
2079 }
2080
2081 int ip6_route_add(struct fib6_config *cfg)
2082 {
2083         struct mx6_config mxc = { .mx = NULL, };
2084         struct rt6_info *rt;
2085         int err;
2086
2087         rt = ip6_route_info_create(cfg);
2088         if (IS_ERR(rt)) {
2089                 err = PTR_ERR(rt);
2090                 rt = NULL;
2091                 goto out;
2092         }
2093
2094         err = ip6_convert_metrics(&mxc, cfg);
2095         if (err)
2096                 goto out;
2097
2098         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2099
2100         kfree(mxc.mx);
2101
2102         return err;
2103 out:
2104         if (rt)
2105                 dst_free(&rt->dst);
2106
2107         return err;
2108 }
2109
2110 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2111 {
2112         int err;
2113         struct fib6_table *table;
2114         struct net *net = dev_net(rt->dst.dev);
2115
2116         if (rt == net->ipv6.ip6_null_entry ||
2117             rt->dst.flags & DST_NOCACHE) {
2118                 err = -ENOENT;
2119                 goto out;
2120         }
2121
2122         table = rt->rt6i_table;
2123         write_lock_bh(&table->tb6_lock);
2124         err = fib6_del(rt, info);
2125         write_unlock_bh(&table->tb6_lock);
2126
2127 out:
2128         ip6_rt_put(rt);
2129         return err;
2130 }
2131
2132 int ip6_del_rt(struct rt6_info *rt)
2133 {
2134         struct nl_info info = {
2135                 .nl_net = dev_net(rt->dst.dev),
2136         };
2137         return __ip6_del_rt(rt, &info);
2138 }
2139
2140 static int ip6_route_del(struct fib6_config *cfg)
2141 {
2142         struct fib6_table *table;
2143         struct fib6_node *fn;
2144         struct rt6_info *rt;
2145         int err = -ESRCH;
2146
2147         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2148         if (!table)
2149                 return err;
2150
2151         read_lock_bh(&table->tb6_lock);
2152
2153         fn = fib6_locate(&table->tb6_root,
2154                          &cfg->fc_dst, cfg->fc_dst_len,
2155                          &cfg->fc_src, cfg->fc_src_len);
2156
2157         if (fn) {
2158                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2159                         if ((rt->rt6i_flags & RTF_CACHE) &&
2160                             !(cfg->fc_flags & RTF_CACHE))
2161                                 continue;
2162                         if (cfg->fc_ifindex &&
2163                             (!rt->dst.dev ||
2164                              rt->dst.dev->ifindex != cfg->fc_ifindex))
2165                                 continue;
2166                         if (cfg->fc_flags & RTF_GATEWAY &&
2167                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2168                                 continue;
2169                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2170                                 continue;
2171                         dst_hold(&rt->dst);
2172                         read_unlock_bh(&table->tb6_lock);
2173
2174                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2175                 }
2176         }
2177         read_unlock_bh(&table->tb6_lock);
2178
2179         return err;
2180 }
2181
2182 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2183 {
2184         struct netevent_redirect netevent;
2185         struct rt6_info *rt, *nrt = NULL;
2186         struct ndisc_options ndopts;
2187         struct inet6_dev *in6_dev;
2188         struct neighbour *neigh;
2189         struct rd_msg *msg;
2190         int optlen, on_link;
2191         u8 *lladdr;
2192
2193         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2194         optlen -= sizeof(*msg);
2195
2196         if (optlen < 0) {
2197                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2198                 return;
2199         }
2200
2201         msg = (struct rd_msg *)icmp6_hdr(skb);
2202
2203         if (ipv6_addr_is_multicast(&msg->dest)) {
2204                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2205                 return;
2206         }
2207
2208         on_link = 0;
2209         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2210                 on_link = 1;
2211         } else if (ipv6_addr_type(&msg->target) !=
2212                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2213                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2214                 return;
2215         }
2216
2217         in6_dev = __in6_dev_get(skb->dev);
2218         if (!in6_dev)
2219                 return;
2220         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2221                 return;
2222
2223         /* RFC2461 8.1:
2224          *      The IP source address of the Redirect MUST be the same as the current
2225          *      first-hop router for the specified ICMP Destination Address.
2226          */
2227
2228         if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2229                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2230                 return;
2231         }
2232
2233         lladdr = NULL;
2234         if (ndopts.nd_opts_tgt_lladdr) {
2235                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2236                                              skb->dev);
2237                 if (!lladdr) {
2238                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2239                         return;
2240                 }
2241         }
2242
2243         rt = (struct rt6_info *) dst;
2244         if (rt->rt6i_flags & RTF_REJECT) {
2245                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2246                 return;
2247         }
2248
2249         /* Redirect received -> path was valid.
2250          * Look, redirects are sent only in response to data packets,
2251          * so that this nexthop apparently is reachable. --ANK
2252          */
2253         dst_confirm(&rt->dst);
2254
2255         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2256         if (!neigh)
2257                 return;
2258
2259         /*
2260          *      We have finally decided to accept it.
2261          */
2262
2263         ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2264                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
2265                      NEIGH_UPDATE_F_OVERRIDE|
2266                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2267                                      NEIGH_UPDATE_F_ISROUTER)),
2268                      NDISC_REDIRECT, &ndopts);
2269
2270         nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2271         if (!nrt)
2272                 goto out;
2273
2274         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2275         if (on_link)
2276                 nrt->rt6i_flags &= ~RTF_GATEWAY;
2277
2278         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2279
2280         if (ip6_ins_rt(nrt))
2281                 goto out;
2282
2283         netevent.old = &rt->dst;
2284         netevent.new = &nrt->dst;
2285         netevent.daddr = &msg->dest;
2286         netevent.neigh = neigh;
2287         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2288
2289         if (rt->rt6i_flags & RTF_CACHE) {
2290                 rt = (struct rt6_info *) dst_clone(&rt->dst);
2291                 ip6_del_rt(rt);
2292         }
2293
2294 out:
2295         neigh_release(neigh);
2296 }
2297
2298 /*
2299  *      Misc support functions
2300  */
2301
2302 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2303 {
2304         BUG_ON(from->dst.from);
2305
2306         rt->rt6i_flags &= ~RTF_EXPIRES;
2307         dst_hold(&from->dst);
2308         rt->dst.from = &from->dst;
2309         dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2310 }
2311
2312 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2313 {
2314         rt->dst.input = ort->dst.input;
2315         rt->dst.output = ort->dst.output;
2316         rt->rt6i_dst = ort->rt6i_dst;
2317         rt->dst.error = ort->dst.error;
2318         rt->rt6i_idev = ort->rt6i_idev;
2319         if (rt->rt6i_idev)
2320                 in6_dev_hold(rt->rt6i_idev);
2321         rt->dst.lastuse = jiffies;
2322         rt->rt6i_gateway = ort->rt6i_gateway;
2323         rt->rt6i_flags = ort->rt6i_flags;
2324         rt6_set_from(rt, ort);
2325         rt->rt6i_metric = ort->rt6i_metric;
2326 #ifdef CONFIG_IPV6_SUBTREES
2327         rt->rt6i_src = ort->rt6i_src;
2328 #endif
2329         rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2330         rt->rt6i_table = ort->rt6i_table;
2331         rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2332 }
2333
2334 #ifdef CONFIG_IPV6_ROUTE_INFO
2335 static struct rt6_info *rt6_get_route_info(struct net *net,
2336                                            const struct in6_addr *prefix, int prefixlen,
2337                                            const struct in6_addr *gwaddr,
2338                                            struct net_device *dev)
2339 {
2340         u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2341         int ifindex = dev->ifindex;
2342         struct fib6_node *fn;
2343         struct rt6_info *rt = NULL;
2344         struct fib6_table *table;
2345
2346         table = fib6_get_table(net, tb_id);
2347         if (!table)
2348                 return NULL;
2349
2350         read_lock_bh(&table->tb6_lock);
2351         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2352         if (!fn)
2353                 goto out;
2354
2355         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2356                 if (rt->dst.dev->ifindex != ifindex)
2357                         continue;
2358                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2359                         continue;
2360                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2361                         continue;
2362                 dst_hold(&rt->dst);
2363                 break;
2364         }
2365 out:
2366         read_unlock_bh(&table->tb6_lock);
2367         return rt;
2368 }
2369
2370 static struct rt6_info *rt6_add_route_info(struct net *net,
2371                                            const struct in6_addr *prefix, int prefixlen,
2372                                            const struct in6_addr *gwaddr,
2373                                            struct net_device *dev,
2374                                            unsigned int pref)
2375 {
2376         struct fib6_config cfg = {
2377                 .fc_metric      = IP6_RT_PRIO_USER,
2378                 .fc_ifindex     = dev->ifindex,
2379                 .fc_dst_len     = prefixlen,
2380                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2381                                   RTF_UP | RTF_PREF(pref),
2382                 .fc_nlinfo.portid = 0,
2383                 .fc_nlinfo.nlh = NULL,
2384                 .fc_nlinfo.nl_net = net,
2385         };
2386
2387         cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
2388         cfg.fc_dst = *prefix;
2389         cfg.fc_gateway = *gwaddr;
2390
2391         /* We should treat it as a default route if prefix length is 0. */
2392         if (!prefixlen)
2393                 cfg.fc_flags |= RTF_DEFAULT;
2394
2395         ip6_route_add(&cfg);
2396
2397         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
2398 }
2399 #endif
2400
2401 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2402 {
2403         u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
2404         struct rt6_info *rt;
2405         struct fib6_table *table;
2406
2407         table = fib6_get_table(dev_net(dev), tb_id);
2408         if (!table)
2409                 return NULL;
2410
2411         read_lock_bh(&table->tb6_lock);
2412         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2413                 if (dev == rt->dst.dev &&
2414                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2415                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
2416                         break;
2417         }
2418         if (rt)
2419                 dst_hold(&rt->dst);
2420         read_unlock_bh(&table->tb6_lock);
2421         return rt;
2422 }
2423
2424 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2425                                      struct net_device *dev,
2426                                      unsigned int pref)
2427 {
2428         struct fib6_config cfg = {
2429                 .fc_table       = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2430                 .fc_metric      = IP6_RT_PRIO_USER,
2431                 .fc_ifindex     = dev->ifindex,
2432                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2433                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2434                 .fc_nlinfo.portid = 0,
2435                 .fc_nlinfo.nlh = NULL,
2436                 .fc_nlinfo.nl_net = dev_net(dev),
2437         };
2438
2439         cfg.fc_gateway = *gwaddr;
2440
2441         if (!ip6_route_add(&cfg)) {
2442                 struct fib6_table *table;
2443
2444                 table = fib6_get_table(dev_net(dev), cfg.fc_table);
2445                 if (table)
2446                         table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2447         }
2448
2449         return rt6_get_dflt_router(gwaddr, dev);
2450 }
2451
2452 static void __rt6_purge_dflt_routers(struct fib6_table *table)
2453 {
2454         struct rt6_info *rt;
2455
2456 restart:
2457         read_lock_bh(&table->tb6_lock);
2458         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2459                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2460                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2461                         dst_hold(&rt->dst);
2462                         read_unlock_bh(&table->tb6_lock);
2463                         ip6_del_rt(rt);
2464                         goto restart;
2465                 }
2466         }
2467         read_unlock_bh(&table->tb6_lock);
2468
2469         table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2470 }
2471
2472 void rt6_purge_dflt_routers(struct net *net)
2473 {
2474         struct fib6_table *table;
2475         struct hlist_head *head;
2476         unsigned int h;
2477
2478         rcu_read_lock();
2479
2480         for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2481                 head = &net->ipv6.fib_table_hash[h];
2482                 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2483                         if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
2484                                 __rt6_purge_dflt_routers(table);
2485                 }
2486         }
2487
2488         rcu_read_unlock();
2489 }
2490
2491 static void rtmsg_to_fib6_config(struct net *net,
2492                                  struct in6_rtmsg *rtmsg,
2493                                  struct fib6_config *cfg)
2494 {
2495         memset(cfg, 0, sizeof(*cfg));
2496
2497         cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2498                          : RT6_TABLE_MAIN;
2499         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2500         cfg->fc_metric = rtmsg->rtmsg_metric;
2501         cfg->fc_expires = rtmsg->rtmsg_info;
2502         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2503         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2504         cfg->fc_flags = rtmsg->rtmsg_flags;
2505
2506         cfg->fc_nlinfo.nl_net = net;
2507
2508         cfg->fc_dst = rtmsg->rtmsg_dst;
2509         cfg->fc_src = rtmsg->rtmsg_src;
2510         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2511 }
2512
2513 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2514 {
2515         struct fib6_config cfg;
2516         struct in6_rtmsg rtmsg;
2517         int err;
2518
2519         switch (cmd) {
2520         case SIOCADDRT:         /* Add a route */
2521         case SIOCDELRT:         /* Delete a route */
2522                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2523                         return -EPERM;
2524                 err = copy_from_user(&rtmsg, arg,
2525                                      sizeof(struct in6_rtmsg));
2526                 if (err)
2527                         return -EFAULT;
2528
2529                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2530
2531                 rtnl_lock();
2532                 switch (cmd) {
2533                 case SIOCADDRT:
2534                         err = ip6_route_add(&cfg);
2535                         break;
2536                 case SIOCDELRT:
2537                         err = ip6_route_del(&cfg);
2538                         break;
2539                 default:
2540                         err = -EINVAL;
2541                 }
2542                 rtnl_unlock();
2543
2544                 return err;
2545         }
2546
2547         return -EINVAL;
2548 }
2549
2550 /*
2551  *      Drop the packet on the floor
2552  */
2553
2554 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2555 {
2556         int type;
2557         struct dst_entry *dst = skb_dst(skb);
2558         switch (ipstats_mib_noroutes) {
2559         case IPSTATS_MIB_INNOROUTES:
2560                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2561                 if (type == IPV6_ADDR_ANY) {
2562                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2563                                       IPSTATS_MIB_INADDRERRORS);
2564                         break;
2565                 }
2566                 /* FALLTHROUGH */
2567         case IPSTATS_MIB_OUTNOROUTES:
2568                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2569                               ipstats_mib_noroutes);
2570                 break;
2571         }
2572         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2573         kfree_skb(skb);
2574         return 0;
2575 }
2576
2577 static int ip6_pkt_discard(struct sk_buff *skb)
2578 {
2579         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2580 }
2581
2582 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2583 {
2584         skb->dev = skb_dst(skb)->dev;
2585         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2586 }
2587
2588 static int ip6_pkt_prohibit(struct sk_buff *skb)
2589 {
2590         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2591 }
2592
2593 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2594 {
2595         skb->dev = skb_dst(skb)->dev;
2596         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2597 }
2598
2599 /*
2600  *      Allocate a dst for local (unicast / anycast) address.
2601  */
2602
2603 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2604                                     const struct in6_addr *addr,
2605                                     bool anycast)
2606 {
2607         u32 tb_id;
2608         struct net *net = dev_net(idev->dev);
2609         struct net_device *dev = net->loopback_dev;
2610         struct rt6_info *rt;
2611
2612         /* use L3 Master device as loopback for host routes if device
2613          * is enslaved and address is not link local or multicast
2614          */
2615         if (!rt6_need_strict(addr))
2616                 dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
2617
2618         rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
2619         if (!rt)
2620                 return ERR_PTR(-ENOMEM);
2621
2622         in6_dev_hold(idev);
2623
2624         rt->dst.flags |= DST_HOST;
2625         rt->dst.input = ip6_input;
2626         rt->dst.output = ip6_output;
2627         rt->rt6i_idev = idev;
2628
2629         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2630         if (anycast)
2631                 rt->rt6i_flags |= RTF_ANYCAST;
2632         else
2633                 rt->rt6i_flags |= RTF_LOCAL;
2634
2635         rt->rt6i_gateway  = *addr;
2636         rt->rt6i_dst.addr = *addr;
2637         rt->rt6i_dst.plen = 128;
2638         tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2639         rt->rt6i_table = fib6_get_table(net, tb_id);
2640         rt->dst.flags |= DST_NOCACHE;
2641
2642         atomic_set(&rt->dst.__refcnt, 1);
2643
2644         return rt;
2645 }
2646
2647 /* remove deleted ip from prefsrc entries */
2648 struct arg_dev_net_ip {
2649         struct net_device *dev;
2650         struct net *net;
2651         struct in6_addr *addr;
2652 };
2653
2654 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2655 {
2656         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2657         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2658         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2659
2660         if (((void *)rt->dst.dev == dev || !dev) &&
2661             rt != net->ipv6.ip6_null_entry &&
2662             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2663                 /* remove prefsrc entry */
2664                 rt->rt6i_prefsrc.plen = 0;
2665         }
2666         return 0;
2667 }
2668
2669 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2670 {
2671         struct net *net = dev_net(ifp->idev->dev);
2672         struct arg_dev_net_ip adni = {
2673                 .dev = ifp->idev->dev,
2674                 .net = net,
2675                 .addr = &ifp->addr,
2676         };
2677         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2678 }
2679
2680 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2681 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2682
2683 /* Remove routers and update dst entries when gateway turn into host. */
2684 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2685 {
2686         struct in6_addr *gateway = (struct in6_addr *)arg;
2687
2688         if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2689              ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2690              ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2691                 return -1;
2692         }
2693         return 0;
2694 }
2695
2696 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2697 {
2698         fib6_clean_all(net, fib6_clean_tohost, gateway);
2699 }
2700
2701 struct arg_dev_net {
2702         struct net_device *dev;
2703         struct net *net;
2704 };
2705
2706 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2707 {
2708         const struct arg_dev_net *adn = arg;
2709         const struct net_device *dev = adn->dev;
2710
2711         if ((rt->dst.dev == dev || !dev) &&
2712             rt != adn->net->ipv6.ip6_null_entry)
2713                 return -1;
2714
2715         return 0;
2716 }
2717
2718 void rt6_ifdown(struct net *net, struct net_device *dev)
2719 {
2720         struct arg_dev_net adn = {
2721                 .dev = dev,
2722                 .net = net,
2723         };
2724
2725         fib6_clean_all(net, fib6_ifdown, &adn);
2726         icmp6_clean_all(fib6_ifdown, &adn);
2727         if (dev)
2728                 rt6_uncached_list_flush_dev(net, dev);
2729 }
2730
2731 struct rt6_mtu_change_arg {
2732         struct net_device *dev;
2733         unsigned int mtu;
2734 };
2735
2736 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2737 {
2738         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2739         struct inet6_dev *idev;
2740
2741         /* In IPv6 pmtu discovery is not optional,
2742            so that RTAX_MTU lock cannot disable it.
2743            We still use this lock to block changes
2744            caused by addrconf/ndisc.
2745         */
2746
2747         idev = __in6_dev_get(arg->dev);
2748         if (!idev)
2749                 return 0;
2750
2751         /* For administrative MTU increase, there is no way to discover
2752            IPv6 PMTU increase, so PMTU increase should be updated here.
2753            Since RFC 1981 doesn't include administrative MTU increase
2754            update PMTU increase is a MUST. (i.e. jumbo frame)
2755          */
2756         /*
2757            If new MTU is less than route PMTU, this new MTU will be the
2758            lowest MTU in the path, update the route PMTU to reflect PMTU
2759            decreases; if new MTU is greater than route PMTU, and the
2760            old MTU is the lowest MTU in the path, update the route PMTU
2761            to reflect the increase. In this case if the other nodes' MTU
2762            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2763            PMTU discouvery.
2764          */
2765         if (rt->dst.dev == arg->dev &&
2766             !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2767                 if (rt->rt6i_flags & RTF_CACHE) {
2768                         /* For RTF_CACHE with rt6i_pmtu == 0
2769                          * (i.e. a redirected route),
2770                          * the metrics of its rt->dst.from has already
2771                          * been updated.
2772                          */
2773                         if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2774                                 rt->rt6i_pmtu = arg->mtu;
2775                 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2776                            (dst_mtu(&rt->dst) < arg->mtu &&
2777                             dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2778                         dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2779                 }
2780         }
2781         return 0;
2782 }
2783
2784 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2785 {
2786         struct rt6_mtu_change_arg arg = {
2787                 .dev = dev,
2788                 .mtu = mtu,
2789         };
2790
2791         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2792 }
2793
2794 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2795         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2796         [RTA_OIF]               = { .type = NLA_U32 },
2797         [RTA_IIF]               = { .type = NLA_U32 },
2798         [RTA_PRIORITY]          = { .type = NLA_U32 },
2799         [RTA_METRICS]           = { .type = NLA_NESTED },
2800         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2801         [RTA_PREF]              = { .type = NLA_U8 },
2802         [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
2803         [RTA_ENCAP]             = { .type = NLA_NESTED },
2804         [RTA_EXPIRES]           = { .type = NLA_U32 },
2805         [RTA_UID]               = { .type = NLA_U32 },
2806 };
2807
2808 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2809                               struct fib6_config *cfg)
2810 {
2811         struct rtmsg *rtm;
2812         struct nlattr *tb[RTA_MAX+1];
2813         unsigned int pref;
2814         int err;
2815
2816         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2817         if (err < 0)
2818                 goto errout;
2819
2820         err = -EINVAL;
2821         rtm = nlmsg_data(nlh);
2822         memset(cfg, 0, sizeof(*cfg));
2823
2824         cfg->fc_table = rtm->rtm_table;
2825         cfg->fc_dst_len = rtm->rtm_dst_len;
2826         cfg->fc_src_len = rtm->rtm_src_len;
2827         cfg->fc_flags = RTF_UP;
2828         cfg->fc_protocol = rtm->rtm_protocol;
2829         cfg->fc_type = rtm->rtm_type;
2830
2831         if (rtm->rtm_type == RTN_UNREACHABLE ||
2832             rtm->rtm_type == RTN_BLACKHOLE ||
2833             rtm->rtm_type == RTN_PROHIBIT ||
2834             rtm->rtm_type == RTN_THROW)
2835                 cfg->fc_flags |= RTF_REJECT;
2836
2837         if (rtm->rtm_type == RTN_LOCAL)
2838                 cfg->fc_flags |= RTF_LOCAL;
2839
2840         if (rtm->rtm_flags & RTM_F_CLONED)
2841                 cfg->fc_flags |= RTF_CACHE;
2842
2843         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2844         cfg->fc_nlinfo.nlh = nlh;
2845         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2846
2847         if (tb[RTA_GATEWAY]) {
2848                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2849                 cfg->fc_flags |= RTF_GATEWAY;
2850         }
2851
2852         if (tb[RTA_DST]) {
2853                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2854
2855                 if (nla_len(tb[RTA_DST]) < plen)
2856                         goto errout;
2857
2858                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2859         }
2860
2861         if (tb[RTA_SRC]) {
2862                 int plen = (rtm->rtm_src_len + 7) >> 3;
2863
2864                 if (nla_len(tb[RTA_SRC]) < plen)
2865                         goto errout;
2866
2867                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2868         }
2869
2870         if (tb[RTA_PREFSRC])
2871                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2872
2873         if (tb[RTA_OIF])
2874                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2875
2876         if (tb[RTA_PRIORITY])
2877                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2878
2879         if (tb[RTA_METRICS]) {
2880                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2881                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2882         }
2883
2884         if (tb[RTA_TABLE])
2885                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2886
2887         if (tb[RTA_MULTIPATH]) {
2888                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2889                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2890         }
2891
2892         if (tb[RTA_PREF]) {
2893                 pref = nla_get_u8(tb[RTA_PREF]);
2894                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2895                     pref != ICMPV6_ROUTER_PREF_HIGH)
2896                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
2897                 cfg->fc_flags |= RTF_PREF(pref);
2898         }
2899
2900         if (tb[RTA_ENCAP])
2901                 cfg->fc_encap = tb[RTA_ENCAP];
2902
2903         if (tb[RTA_ENCAP_TYPE])
2904                 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2905
2906         if (tb[RTA_EXPIRES]) {
2907                 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2908
2909                 if (addrconf_finite_timeout(timeout)) {
2910                         cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2911                         cfg->fc_flags |= RTF_EXPIRES;
2912                 }
2913         }
2914
2915         err = 0;
2916 errout:
2917         return err;
2918 }
2919
2920 struct rt6_nh {
2921         struct rt6_info *rt6_info;
2922         struct fib6_config r_cfg;
2923         struct mx6_config mxc;
2924         struct list_head next;
2925 };
2926
2927 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2928 {
2929         struct rt6_nh *nh;
2930
2931         list_for_each_entry(nh, rt6_nh_list, next) {
2932                 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2933                         &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2934                         nh->r_cfg.fc_ifindex);
2935         }
2936 }
2937
2938 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2939                                  struct rt6_info *rt, struct fib6_config *r_cfg)
2940 {
2941         struct rt6_nh *nh;
2942         struct rt6_info *rtnh;
2943         int err = -EEXIST;
2944
2945         list_for_each_entry(nh, rt6_nh_list, next) {
2946                 /* check if rt6_info already exists */
2947                 rtnh = nh->rt6_info;
2948
2949                 if (rtnh->dst.dev == rt->dst.dev &&
2950                     rtnh->rt6i_idev == rt->rt6i_idev &&
2951                     ipv6_addr_equal(&rtnh->rt6i_gateway,
2952                                     &rt->rt6i_gateway))
2953                         return err;
2954         }
2955
2956         nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2957         if (!nh)
2958                 return -ENOMEM;
2959         nh->rt6_info = rt;
2960         err = ip6_convert_metrics(&nh->mxc, r_cfg);
2961         if (err) {
2962                 kfree(nh);
2963                 return err;
2964         }
2965         memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2966         list_add_tail(&nh->next, rt6_nh_list);
2967
2968         return 0;
2969 }
2970
2971 static int ip6_route_multipath_add(struct fib6_config *cfg)
2972 {
2973         struct fib6_config r_cfg;
2974         struct rtnexthop *rtnh;
2975         struct rt6_info *rt;
2976         struct rt6_nh *err_nh;
2977         struct rt6_nh *nh, *nh_safe;
2978         int remaining;
2979         int attrlen;
2980         int err = 1;
2981         int nhn = 0;
2982         int replace = (cfg->fc_nlinfo.nlh &&
2983                        (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2984         LIST_HEAD(rt6_nh_list);
2985
2986         remaining = cfg->fc_mp_len;
2987         rtnh = (struct rtnexthop *)cfg->fc_mp;
2988
2989         /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2990          * rt6_info structs per nexthop
2991          */
2992         while (rtnh_ok(rtnh, remaining)) {
2993                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2994                 if (rtnh->rtnh_ifindex)
2995                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2996
2997                 attrlen = rtnh_attrlen(rtnh);
2998                 if (attrlen > 0) {
2999                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3000
3001                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3002                         if (nla) {
3003                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
3004                                 r_cfg.fc_flags |= RTF_GATEWAY;
3005                         }
3006                         r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3007                         nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3008                         if (nla)
3009                                 r_cfg.fc_encap_type = nla_get_u16(nla);
3010                 }
3011
3012                 rt = ip6_route_info_create(&r_cfg);
3013                 if (IS_ERR(rt)) {
3014                         err = PTR_ERR(rt);
3015                         rt = NULL;
3016                         goto cleanup;
3017                 }
3018
3019                 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3020                 if (err) {
3021                         dst_free(&rt->dst);
3022                         goto cleanup;
3023                 }
3024
3025                 rtnh = rtnh_next(rtnh, &remaining);
3026         }
3027
3028         err_nh = NULL;
3029         list_for_each_entry(nh, &rt6_nh_list, next) {
3030                 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
3031                 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3032                 nh->rt6_info = NULL;
3033                 if (err) {
3034                         if (replace && nhn)
3035                                 ip6_print_replace_route_err(&rt6_nh_list);
3036                         err_nh = nh;
3037                         goto add_errout;
3038                 }
3039
3040                 /* Because each route is added like a single route we remove
3041                  * these flags after the first nexthop: if there is a collision,
3042                  * we have already failed to add the first nexthop:
3043                  * fib6_add_rt2node() has rejected it; when replacing, old
3044                  * nexthops have been replaced by first new, the rest should
3045                  * be added to it.
3046                  */
3047                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3048                                                      NLM_F_REPLACE);
3049                 nhn++;
3050         }
3051
3052         goto cleanup;
3053
3054 add_errout:
3055         /* Delete routes that were already added */
3056         list_for_each_entry(nh, &rt6_nh_list, next) {
3057                 if (err_nh == nh)
3058                         break;
3059                 ip6_route_del(&nh->r_cfg);
3060         }
3061
3062 cleanup:
3063         list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3064                 if (nh->rt6_info)
3065                         dst_free(&nh->rt6_info->dst);
3066                 kfree(nh->mxc.mx);
3067                 list_del(&nh->next);
3068                 kfree(nh);
3069         }
3070
3071         return err;
3072 }
3073
3074 static int ip6_route_multipath_del(struct fib6_config *cfg)
3075 {
3076         struct fib6_config r_cfg;
3077         struct rtnexthop *rtnh;
3078         int remaining;
3079         int attrlen;
3080         int err = 1, last_err = 0;
3081
3082         remaining = cfg->fc_mp_len;
3083         rtnh = (struct rtnexthop *)cfg->fc_mp;
3084
3085         /* Parse a Multipath Entry */
3086         while (rtnh_ok(rtnh, remaining)) {
3087                 memcpy(&r_cfg, cfg, sizeof(*cfg));
3088                 if (rtnh->rtnh_ifindex)
3089                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3090
3091                 attrlen = rtnh_attrlen(rtnh);
3092                 if (attrlen > 0) {
3093                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3094
3095                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3096                         if (nla) {
3097                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3098                                 r_cfg.fc_flags |= RTF_GATEWAY;
3099                         }
3100                 }
3101                 err = ip6_route_del(&r_cfg);
3102                 if (err)
3103                         last_err = err;
3104
3105                 rtnh = rtnh_next(rtnh, &remaining);
3106         }
3107
3108         return last_err;
3109 }
3110
3111 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3112 {
3113         struct fib6_config cfg;
3114         int err;
3115
3116         err = rtm_to_fib6_config(skb, nlh, &cfg);
3117         if (err < 0)
3118                 return err;
3119
3120         if (cfg.fc_mp)
3121                 return ip6_route_multipath_del(&cfg);
3122         else
3123                 return ip6_route_del(&cfg);
3124 }
3125
3126 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3127 {
3128         struct fib6_config cfg;
3129         int err;
3130
3131         err = rtm_to_fib6_config(skb, nlh, &cfg);
3132         if (err < 0)
3133                 return err;
3134
3135         if (cfg.fc_mp)
3136                 return ip6_route_multipath_add(&cfg);
3137         else
3138                 return ip6_route_add(&cfg);
3139 }
3140
3141 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3142 {
3143         return NLMSG_ALIGN(sizeof(struct rtmsg))
3144                + nla_total_size(16) /* RTA_SRC */
3145                + nla_total_size(16) /* RTA_DST */
3146                + nla_total_size(16) /* RTA_GATEWAY */
3147                + nla_total_size(16) /* RTA_PREFSRC */
3148                + nla_total_size(4) /* RTA_TABLE */
3149                + nla_total_size(4) /* RTA_IIF */
3150                + nla_total_size(4) /* RTA_OIF */
3151                + nla_total_size(4) /* RTA_PRIORITY */
3152                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3153                + nla_total_size(sizeof(struct rta_cacheinfo))
3154                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3155                + nla_total_size(1) /* RTA_PREF */
3156                + lwtunnel_get_encap_size(rt->dst.lwtstate);
3157 }
3158
3159 static int rt6_fill_node(struct net *net,
3160                          struct sk_buff *skb, struct rt6_info *rt,
3161                          struct in6_addr *dst, struct in6_addr *src,
3162                          int iif, int type, u32 portid, u32 seq,
3163                          int prefix, int nowait, unsigned int flags)
3164 {
3165         u32 metrics[RTAX_MAX];
3166         struct rtmsg *rtm;
3167         struct nlmsghdr *nlh;
3168         long expires;
3169         u32 table;
3170
3171         if (prefix) {   /* user wants prefix routes only */
3172                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3173                         /* success since this is not a prefix route */
3174                         return 1;
3175                 }
3176         }
3177
3178         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3179         if (!nlh)
3180                 return -EMSGSIZE;
3181
3182         rtm = nlmsg_data(nlh);
3183         rtm->rtm_family = AF_INET6;
3184         rtm->rtm_dst_len = rt->rt6i_dst.plen;
3185         rtm->rtm_src_len = rt->rt6i_src.plen;
3186         rtm->rtm_tos = 0;
3187         if (rt->rt6i_table)
3188                 table = rt->rt6i_table->tb6_id;
3189         else
3190                 table = RT6_TABLE_UNSPEC;
3191         rtm->rtm_table = table;
3192         if (nla_put_u32(skb, RTA_TABLE, table))
3193                 goto nla_put_failure;
3194         if (rt->rt6i_flags & RTF_REJECT) {
3195                 switch (rt->dst.error) {
3196                 case -EINVAL:
3197                         rtm->rtm_type = RTN_BLACKHOLE;
3198                         break;
3199                 case -EACCES:
3200                         rtm->rtm_type = RTN_PROHIBIT;
3201                         break;
3202                 case -EAGAIN:
3203                         rtm->rtm_type = RTN_THROW;
3204                         break;
3205                 default:
3206                         rtm->rtm_type = RTN_UNREACHABLE;
3207                         break;
3208                 }
3209         }
3210         else if (rt->rt6i_flags & RTF_LOCAL)
3211                 rtm->rtm_type = RTN_LOCAL;
3212         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3213                 rtm->rtm_type = RTN_LOCAL;
3214         else
3215                 rtm->rtm_type = RTN_UNICAST;
3216         rtm->rtm_flags = 0;
3217         if (!netif_carrier_ok(rt->dst.dev)) {
3218                 rtm->rtm_flags |= RTNH_F_LINKDOWN;
3219                 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3220                         rtm->rtm_flags |= RTNH_F_DEAD;
3221         }
3222         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3223         rtm->rtm_protocol = rt->rt6i_protocol;
3224         if (rt->rt6i_flags & RTF_DYNAMIC)
3225                 rtm->rtm_protocol = RTPROT_REDIRECT;
3226         else if (rt->rt6i_flags & RTF_ADDRCONF) {
3227                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3228                         rtm->rtm_protocol = RTPROT_RA;
3229                 else
3230                         rtm->rtm_protocol = RTPROT_KERNEL;
3231         }
3232
3233         if (rt->rt6i_flags & RTF_CACHE)
3234                 rtm->rtm_flags |= RTM_F_CLONED;
3235
3236         if (dst) {
3237                 if (nla_put_in6_addr(skb, RTA_DST, dst))
3238                         goto nla_put_failure;
3239                 rtm->rtm_dst_len = 128;
3240         } else if (rtm->rtm_dst_len)
3241                 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3242                         goto nla_put_failure;
3243 #ifdef CONFIG_IPV6_SUBTREES
3244         if (src) {
3245                 if (nla_put_in6_addr(skb, RTA_SRC, src))
3246                         goto nla_put_failure;
3247                 rtm->rtm_src_len = 128;
3248         } else if (rtm->rtm_src_len &&
3249                    nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3250                 goto nla_put_failure;
3251 #endif
3252         if (iif) {
3253 #ifdef CONFIG_IPV6_MROUTE
3254                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3255                         int err = ip6mr_get_route(net, skb, rtm, nowait,
3256                                                   portid);
3257
3258                         if (err <= 0) {
3259                                 if (!nowait) {
3260                                         if (err == 0)
3261                                                 return 0;
3262                                         goto nla_put_failure;
3263                                 } else {
3264                                         if (err == -EMSGSIZE)
3265                                                 goto nla_put_failure;
3266                                 }
3267                         }
3268                 } else
3269 #endif
3270                         if (nla_put_u32(skb, RTA_IIF, iif))
3271                                 goto nla_put_failure;
3272         } else if (dst) {
3273                 struct in6_addr saddr_buf;
3274                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3275                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3276                         goto nla_put_failure;
3277         }
3278
3279         if (rt->rt6i_prefsrc.plen) {
3280                 struct in6_addr saddr_buf;
3281                 saddr_buf = rt->rt6i_prefsrc.addr;
3282                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3283                         goto nla_put_failure;
3284         }
3285
3286         memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3287         if (rt->rt6i_pmtu)
3288                 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3289         if (rtnetlink_put_metrics(skb, metrics) < 0)
3290                 goto nla_put_failure;
3291
3292         if (rt->rt6i_flags & RTF_GATEWAY) {
3293                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3294                         goto nla_put_failure;
3295         }
3296
3297         if (rt->dst.dev &&
3298             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3299                 goto nla_put_failure;
3300         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3301                 goto nla_put_failure;
3302
3303         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3304
3305         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3306                 goto nla_put_failure;
3307
3308         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3309                 goto nla_put_failure;
3310
3311         lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3312
3313         nlmsg_end(skb, nlh);
3314         return 0;
3315
3316 nla_put_failure:
3317         nlmsg_cancel(skb, nlh);
3318         return -EMSGSIZE;
3319 }
3320
3321 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3322 {
3323         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3324         int prefix;
3325
3326         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3327                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3328                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3329         } else
3330                 prefix = 0;
3331
3332         return rt6_fill_node(arg->net,
3333                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3334                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3335                      prefix, 0, NLM_F_MULTI);
3336 }
3337
3338 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3339 {
3340         struct net *net = sock_net(in_skb->sk);
3341         struct nlattr *tb[RTA_MAX+1];
3342         struct rt6_info *rt;
3343         struct sk_buff *skb;
3344         struct rtmsg *rtm;
3345         struct flowi6 fl6;
3346         int err, iif = 0, oif = 0;
3347
3348         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3349         if (err < 0)
3350                 goto errout;
3351
3352         err = -EINVAL;
3353         memset(&fl6, 0, sizeof(fl6));
3354         rtm = nlmsg_data(nlh);
3355         fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3356
3357         if (tb[RTA_SRC]) {
3358                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3359                         goto errout;
3360
3361                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3362         }
3363
3364         if (tb[RTA_DST]) {
3365                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3366                         goto errout;
3367
3368                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3369         }
3370
3371         if (tb[RTA_IIF])
3372                 iif = nla_get_u32(tb[RTA_IIF]);
3373
3374         if (tb[RTA_OIF])
3375                 oif = nla_get_u32(tb[RTA_OIF]);
3376
3377         if (tb[RTA_MARK])
3378                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3379
3380         if (tb[RTA_UID])
3381                 fl6.flowi6_uid = make_kuid(current_user_ns(),
3382                                            nla_get_u32(tb[RTA_UID]));
3383         else
3384                 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3385
3386         if (iif) {
3387                 struct net_device *dev;
3388                 int flags = 0;
3389
3390                 dev = __dev_get_by_index(net, iif);
3391                 if (!dev) {
3392                         err = -ENODEV;
3393                         goto errout;
3394                 }
3395
3396                 fl6.flowi6_iif = iif;
3397
3398                 if (!ipv6_addr_any(&fl6.saddr))
3399                         flags |= RT6_LOOKUP_F_HAS_SADDR;
3400
3401                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3402                                                                flags);
3403         } else {
3404                 fl6.flowi6_oif = oif;
3405
3406                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3407         }
3408
3409         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3410         if (!skb) {
3411                 ip6_rt_put(rt);
3412                 err = -ENOBUFS;
3413                 goto errout;
3414         }
3415
3416         /* Reserve room for dummy headers, this skb can pass
3417            through good chunk of routing engine.
3418          */
3419         skb_reset_mac_header(skb);
3420         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3421
3422         skb_dst_set(skb, &rt->dst);
3423
3424         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3425                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3426                             nlh->nlmsg_seq, 0, 0, 0);
3427         if (err < 0) {
3428                 kfree_skb(skb);
3429                 goto errout;
3430         }
3431
3432         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3433 errout:
3434         return err;
3435 }
3436
3437 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3438                      unsigned int nlm_flags)
3439 {
3440         struct sk_buff *skb;
3441         struct net *net = info->nl_net;
3442         u32 seq;
3443         int err;
3444
3445         err = -ENOBUFS;
3446         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3447
3448         skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3449         if (!skb)
3450                 goto errout;
3451
3452         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3453                                 event, info->portid, seq, 0, 0, nlm_flags);
3454         if (err < 0) {
3455                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3456                 WARN_ON(err == -EMSGSIZE);
3457                 kfree_skb(skb);
3458                 goto errout;
3459         }
3460         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3461                     info->nlh, gfp_any());
3462         return;
3463 errout:
3464         if (err < 0)
3465                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3466 }
3467
3468 static int ip6_route_dev_notify(struct notifier_block *this,
3469                                 unsigned long event, void *ptr)
3470 {
3471         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3472         struct net *net = dev_net(dev);
3473
3474         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3475                 net->ipv6.ip6_null_entry->dst.dev = dev;
3476                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3477 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3478                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3479                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3480                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3481                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3482 #endif
3483         }
3484
3485         return NOTIFY_OK;
3486 }
3487
3488 /*
3489  *      /proc
3490  */
3491
3492 #ifdef CONFIG_PROC_FS
3493
3494 static const struct file_operations ipv6_route_proc_fops = {
3495         .owner          = THIS_MODULE,
3496         .open           = ipv6_route_open,
3497         .read           = seq_read,
3498         .llseek         = seq_lseek,
3499         .release        = seq_release_net,
3500 };
3501
3502 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3503 {
3504         struct net *net = (struct net *)seq->private;
3505         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3506                    net->ipv6.rt6_stats->fib_nodes,
3507                    net->ipv6.rt6_stats->fib_route_nodes,
3508                    net->ipv6.rt6_stats->fib_rt_alloc,
3509                    net->ipv6.rt6_stats->fib_rt_entries,
3510                    net->ipv6.rt6_stats->fib_rt_cache,
3511                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3512                    net->ipv6.rt6_stats->fib_discarded_routes);
3513
3514         return 0;
3515 }
3516
3517 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3518 {
3519         return single_open_net(inode, file, rt6_stats_seq_show);
3520 }
3521
3522 static const struct file_operations rt6_stats_seq_fops = {
3523         .owner   = THIS_MODULE,
3524         .open    = rt6_stats_seq_open,
3525         .read    = seq_read,
3526         .llseek  = seq_lseek,
3527         .release = single_release_net,
3528 };
3529 #endif  /* CONFIG_PROC_FS */
3530
3531 #ifdef CONFIG_SYSCTL
3532
3533 static
3534 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3535                               void __user *buffer, size_t *lenp, loff_t *ppos)
3536 {
3537         struct net *net;
3538         int delay;
3539         if (!write)
3540                 return -EINVAL;
3541
3542         net = (struct net *)ctl->extra1;
3543         delay = net->ipv6.sysctl.flush_delay;
3544         proc_dointvec(ctl, write, buffer, lenp, ppos);
3545         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3546         return 0;
3547 }
3548
3549 struct ctl_table ipv6_route_table_template[] = {
3550         {
3551                 .procname       =       "flush",
3552                 .data           =       &init_net.ipv6.sysctl.flush_delay,
3553                 .maxlen         =       sizeof(int),
3554                 .mode           =       0200,
3555                 .proc_handler   =       ipv6_sysctl_rtcache_flush
3556         },
3557         {
3558                 .procname       =       "gc_thresh",
3559                 .data           =       &ip6_dst_ops_template.gc_thresh,
3560                 .maxlen         =       sizeof(int),
3561                 .mode           =       0644,
3562                 .proc_handler   =       proc_dointvec,
3563         },
3564         {
3565                 .procname       =       "max_size",
3566                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
3567                 .maxlen         =       sizeof(int),
3568                 .mode           =       0644,
3569                 .proc_handler   =       proc_dointvec,
3570         },
3571         {
3572                 .procname       =       "gc_min_interval",
3573                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3574                 .maxlen         =       sizeof(int),
3575                 .mode           =       0644,
3576                 .proc_handler   =       proc_dointvec_jiffies,
3577         },
3578         {
3579                 .procname       =       "gc_timeout",
3580                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3581                 .maxlen         =       sizeof(int),
3582                 .mode           =       0644,
3583                 .proc_handler   =       proc_dointvec_jiffies,
3584         },
3585         {
3586                 .procname       =       "gc_interval",
3587                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3588                 .maxlen         =       sizeof(int),
3589                 .mode           =       0644,
3590                 .proc_handler   =       proc_dointvec_jiffies,
3591         },
3592         {
3593                 .procname       =       "gc_elasticity",
3594                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3595                 .maxlen         =       sizeof(int),
3596                 .mode           =       0644,
3597                 .proc_handler   =       proc_dointvec,
3598         },
3599         {
3600                 .procname       =       "mtu_expires",
3601                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3602                 .maxlen         =       sizeof(int),
3603                 .mode           =       0644,
3604                 .proc_handler   =       proc_dointvec_jiffies,
3605         },
3606         {
3607                 .procname       =       "min_adv_mss",
3608                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3609                 .maxlen         =       sizeof(int),
3610                 .mode           =       0644,
3611                 .proc_handler   =       proc_dointvec,
3612         },
3613         {
3614                 .procname       =       "gc_min_interval_ms",
3615                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3616                 .maxlen         =       sizeof(int),
3617                 .mode           =       0644,
3618                 .proc_handler   =       proc_dointvec_ms_jiffies,
3619         },
3620         { }
3621 };
3622
3623 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3624 {
3625         struct ctl_table *table;
3626
3627         table = kmemdup(ipv6_route_table_template,
3628                         sizeof(ipv6_route_table_template),
3629                         GFP_KERNEL);
3630
3631         if (table) {
3632                 table[0].data = &net->ipv6.sysctl.flush_delay;
3633                 table[0].extra1 = net;
3634                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3635                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3636                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3637                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3638                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3639                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3640                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3641                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3642                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3643
3644                 /* Don't export sysctls to unprivileged users */
3645                 if (net->user_ns != &init_user_ns)
3646                         table[0].procname = NULL;
3647         }
3648
3649         return table;
3650 }
3651 #endif
3652
3653 static int __net_init ip6_route_net_init(struct net *net)
3654 {
3655         int ret = -ENOMEM;
3656
3657         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3658                sizeof(net->ipv6.ip6_dst_ops));
3659
3660         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3661                 goto out_ip6_dst_ops;
3662
3663         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3664                                            sizeof(*net->ipv6.ip6_null_entry),
3665                                            GFP_KERNEL);
3666         if (!net->ipv6.ip6_null_entry)
3667                 goto out_ip6_dst_entries;
3668         net->ipv6.ip6_null_entry->dst.path =
3669                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3670         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3671         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3672                          ip6_template_metrics, true);
3673
3674 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3675         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3676                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3677                                                GFP_KERNEL);
3678         if (!net->ipv6.ip6_prohibit_entry)
3679                 goto out_ip6_null_entry;
3680         net->ipv6.ip6_prohibit_entry->dst.path =
3681                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3682         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3683         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3684                          ip6_template_metrics, true);
3685
3686         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3687                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3688                                                GFP_KERNEL);
3689         if (!net->ipv6.ip6_blk_hole_entry)
3690                 goto out_ip6_prohibit_entry;
3691         net->ipv6.ip6_blk_hole_entry->dst.path =
3692                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3693         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3694         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3695                          ip6_template_metrics, true);
3696 #endif
3697
3698         net->ipv6.sysctl.flush_delay = 0;
3699         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3700         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3701         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3702         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3703         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3704         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3705         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3706
3707         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3708
3709         ret = 0;
3710 out:
3711         return ret;
3712
3713 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3714 out_ip6_prohibit_entry:
3715         kfree(net->ipv6.ip6_prohibit_entry);
3716 out_ip6_null_entry:
3717         kfree(net->ipv6.ip6_null_entry);
3718 #endif
3719 out_ip6_dst_entries:
3720         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3721 out_ip6_dst_ops:
3722         goto out;
3723 }
3724
3725 static void __net_exit ip6_route_net_exit(struct net *net)
3726 {
3727         kfree(net->ipv6.ip6_null_entry);
3728 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3729         kfree(net->ipv6.ip6_prohibit_entry);
3730         kfree(net->ipv6.ip6_blk_hole_entry);
3731 #endif
3732         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3733 }
3734
3735 static int __net_init ip6_route_net_init_late(struct net *net)
3736 {
3737 #ifdef CONFIG_PROC_FS
3738         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3739         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3740 #endif
3741         return 0;
3742 }
3743
3744 static void __net_exit ip6_route_net_exit_late(struct net *net)
3745 {
3746 #ifdef CONFIG_PROC_FS
3747         remove_proc_entry("ipv6_route", net->proc_net);
3748         remove_proc_entry("rt6_stats", net->proc_net);
3749 #endif
3750 }
3751
3752 static struct pernet_operations ip6_route_net_ops = {
3753         .init = ip6_route_net_init,
3754         .exit = ip6_route_net_exit,
3755 };
3756
3757 static int __net_init ipv6_inetpeer_init(struct net *net)
3758 {
3759         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3760
3761         if (!bp)
3762                 return -ENOMEM;
3763         inet_peer_base_init(bp);
3764         net->ipv6.peers = bp;
3765         return 0;
3766 }
3767
3768 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3769 {
3770         struct inet_peer_base *bp = net->ipv6.peers;
3771
3772         net->ipv6.peers = NULL;
3773         inetpeer_invalidate_tree(bp);
3774         kfree(bp);
3775 }
3776
3777 static struct pernet_operations ipv6_inetpeer_ops = {
3778         .init   =       ipv6_inetpeer_init,
3779         .exit   =       ipv6_inetpeer_exit,
3780 };
3781
3782 static struct pernet_operations ip6_route_net_late_ops = {
3783         .init = ip6_route_net_init_late,
3784         .exit = ip6_route_net_exit_late,
3785 };
3786
3787 static struct notifier_block ip6_route_dev_notifier = {
3788         .notifier_call = ip6_route_dev_notify,
3789         .priority = 0,
3790 };
3791
3792 int __init ip6_route_init(void)
3793 {
3794         int ret;
3795         int cpu;
3796
3797         ret = -ENOMEM;
3798         ip6_dst_ops_template.kmem_cachep =
3799                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3800                                   SLAB_HWCACHE_ALIGN, NULL);
3801         if (!ip6_dst_ops_template.kmem_cachep)
3802                 goto out;
3803
3804         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3805         if (ret)
3806                 goto out_kmem_cache;
3807
3808         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3809         if (ret)
3810                 goto out_dst_entries;
3811
3812         ret = register_pernet_subsys(&ip6_route_net_ops);
3813         if (ret)
3814                 goto out_register_inetpeer;
3815
3816         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3817
3818         /* Registering of the loopback is done before this portion of code,
3819          * the loopback reference in rt6_info will not be taken, do it
3820          * manually for init_net */
3821         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3822         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3823   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3824         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3825         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3826         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3827         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3828   #endif
3829         ret = fib6_init();
3830         if (ret)
3831                 goto out_register_subsys;
3832
3833         ret = xfrm6_init();
3834         if (ret)
3835                 goto out_fib6_init;
3836
3837         ret = fib6_rules_init();
3838         if (ret)
3839                 goto xfrm6_init;
3840
3841         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3842         if (ret)
3843                 goto fib6_rules_init;
3844
3845         ret = -ENOBUFS;
3846         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3847             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3848             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3849                 goto out_register_late_subsys;
3850
3851         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3852         if (ret)
3853                 goto out_register_late_subsys;
3854
3855         for_each_possible_cpu(cpu) {
3856                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3857
3858                 INIT_LIST_HEAD(&ul->head);
3859                 spin_lock_init(&ul->lock);
3860         }
3861
3862 out:
3863         return ret;
3864
3865 out_register_late_subsys:
3866         unregister_pernet_subsys(&ip6_route_net_late_ops);
3867 fib6_rules_init:
3868         fib6_rules_cleanup();
3869 xfrm6_init:
3870         xfrm6_fini();
3871 out_fib6_init:
3872         fib6_gc_cleanup();
3873 out_register_subsys:
3874         unregister_pernet_subsys(&ip6_route_net_ops);
3875 out_register_inetpeer:
3876         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3877 out_dst_entries:
3878         dst_entries_destroy(&ip6_dst_blackhole_ops);
3879 out_kmem_cache:
3880         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3881         goto out;
3882 }
3883
3884 void ip6_route_cleanup(void)
3885 {
3886         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3887         unregister_pernet_subsys(&ip6_route_net_late_ops);
3888         fib6_rules_cleanup();
3889         xfrm6_fini();
3890         fib6_gc_cleanup();
3891         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3892         unregister_pernet_subsys(&ip6_route_net_ops);
3893         dst_entries_destroy(&ip6_dst_blackhole_ops);
3894         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3895 }
This page took 0.249398 seconds and 4 git commands to generate.