]> Git Repo - linux.git/blob - net/ipv6/route.c
net: Allow userns root to control ipv6
[linux.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <[email protected]>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69                                     const struct in6_addr *dest);
70 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int      ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void             ip6_dst_destroy(struct dst_entry *);
75 static void             ip6_dst_ifdown(struct dst_entry *,
76                                        struct net_device *dev, int how);
77 static int               ip6_dst_gc(struct dst_ops *ops);
78
79 static int              ip6_pkt_discard(struct sk_buff *skb);
80 static int              ip6_pkt_discard_out(struct sk_buff *skb);
81 static void             ip6_link_failure(struct sk_buff *skb);
82 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83                                            struct sk_buff *skb, u32 mtu);
84 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85                                         struct sk_buff *skb);
86
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net *net,
89                                            const struct in6_addr *prefix, int prefixlen,
90                                            const struct in6_addr *gwaddr, int ifindex,
91                                            unsigned int pref);
92 static struct rt6_info *rt6_get_route_info(struct net *net,
93                                            const struct in6_addr *prefix, int prefixlen,
94                                            const struct in6_addr *gwaddr, int ifindex);
95 #endif
96
97 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98 {
99         struct rt6_info *rt = (struct rt6_info *) dst;
100         struct inet_peer *peer;
101         u32 *p = NULL;
102
103         if (!(rt->dst.flags & DST_HOST))
104                 return NULL;
105
106         peer = rt6_get_peer_create(rt);
107         if (peer) {
108                 u32 *old_p = __DST_METRICS_PTR(old);
109                 unsigned long prev, new;
110
111                 p = peer->metrics;
112                 if (inet_metrics_new(peer))
113                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114
115                 new = (unsigned long) p;
116                 prev = cmpxchg(&dst->_metrics, old, new);
117
118                 if (prev != old) {
119                         p = __DST_METRICS_PTR(prev);
120                         if (prev & DST_METRICS_READ_ONLY)
121                                 p = NULL;
122                 }
123         }
124         return p;
125 }
126
127 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128                                              struct sk_buff *skb,
129                                              const void *daddr)
130 {
131         struct in6_addr *p = &rt->rt6i_gateway;
132
133         if (!ipv6_addr_any(p))
134                 return (const void *) p;
135         else if (skb)
136                 return &ipv6_hdr(skb)->daddr;
137         return daddr;
138 }
139
140 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141                                           struct sk_buff *skb,
142                                           const void *daddr)
143 {
144         struct rt6_info *rt = (struct rt6_info *) dst;
145         struct neighbour *n;
146
147         daddr = choose_neigh_daddr(rt, skb, daddr);
148         n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
149         if (n)
150                 return n;
151         return neigh_create(&nd_tbl, daddr, dst->dev);
152 }
153
154 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
155 {
156         struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
157         if (!n) {
158                 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
159                 if (IS_ERR(n))
160                         return PTR_ERR(n);
161         }
162         rt->n = n;
163
164         return 0;
165 }
166
167 static struct dst_ops ip6_dst_ops_template = {
168         .family                 =       AF_INET6,
169         .protocol               =       cpu_to_be16(ETH_P_IPV6),
170         .gc                     =       ip6_dst_gc,
171         .gc_thresh              =       1024,
172         .check                  =       ip6_dst_check,
173         .default_advmss         =       ip6_default_advmss,
174         .mtu                    =       ip6_mtu,
175         .cow_metrics            =       ipv6_cow_metrics,
176         .destroy                =       ip6_dst_destroy,
177         .ifdown                 =       ip6_dst_ifdown,
178         .negative_advice        =       ip6_negative_advice,
179         .link_failure           =       ip6_link_failure,
180         .update_pmtu            =       ip6_rt_update_pmtu,
181         .redirect               =       rt6_do_redirect,
182         .local_out              =       __ip6_local_out,
183         .neigh_lookup           =       ip6_neigh_lookup,
184 };
185
186 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
187 {
188         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
189
190         return mtu ? : dst->dev->mtu;
191 }
192
193 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
194                                          struct sk_buff *skb, u32 mtu)
195 {
196 }
197
198 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
199                                       struct sk_buff *skb)
200 {
201 }
202
203 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
204                                          unsigned long old)
205 {
206         return NULL;
207 }
208
209 static struct dst_ops ip6_dst_blackhole_ops = {
210         .family                 =       AF_INET6,
211         .protocol               =       cpu_to_be16(ETH_P_IPV6),
212         .destroy                =       ip6_dst_destroy,
213         .check                  =       ip6_dst_check,
214         .mtu                    =       ip6_blackhole_mtu,
215         .default_advmss         =       ip6_default_advmss,
216         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
217         .redirect               =       ip6_rt_blackhole_redirect,
218         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
219         .neigh_lookup           =       ip6_neigh_lookup,
220 };
221
222 static const u32 ip6_template_metrics[RTAX_MAX] = {
223         [RTAX_HOPLIMIT - 1] = 0,
224 };
225
226 static const struct rt6_info ip6_null_entry_template = {
227         .dst = {
228                 .__refcnt       = ATOMIC_INIT(1),
229                 .__use          = 1,
230                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
231                 .error          = -ENETUNREACH,
232                 .input          = ip6_pkt_discard,
233                 .output         = ip6_pkt_discard_out,
234         },
235         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
236         .rt6i_protocol  = RTPROT_KERNEL,
237         .rt6i_metric    = ~(u32) 0,
238         .rt6i_ref       = ATOMIC_INIT(1),
239 };
240
241 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
242
243 static int ip6_pkt_prohibit(struct sk_buff *skb);
244 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
245
246 static const struct rt6_info ip6_prohibit_entry_template = {
247         .dst = {
248                 .__refcnt       = ATOMIC_INIT(1),
249                 .__use          = 1,
250                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
251                 .error          = -EACCES,
252                 .input          = ip6_pkt_prohibit,
253                 .output         = ip6_pkt_prohibit_out,
254         },
255         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
256         .rt6i_protocol  = RTPROT_KERNEL,
257         .rt6i_metric    = ~(u32) 0,
258         .rt6i_ref       = ATOMIC_INIT(1),
259 };
260
261 static const struct rt6_info ip6_blk_hole_entry_template = {
262         .dst = {
263                 .__refcnt       = ATOMIC_INIT(1),
264                 .__use          = 1,
265                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
266                 .error          = -EINVAL,
267                 .input          = dst_discard,
268                 .output         = dst_discard,
269         },
270         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
271         .rt6i_protocol  = RTPROT_KERNEL,
272         .rt6i_metric    = ~(u32) 0,
273         .rt6i_ref       = ATOMIC_INIT(1),
274 };
275
276 #endif
277
278 /* allocate dst with ip6_dst_ops */
279 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
280                                              struct net_device *dev,
281                                              int flags,
282                                              struct fib6_table *table)
283 {
284         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
285                                         0, DST_OBSOLETE_FORCE_CHK, flags);
286
287         if (rt) {
288                 struct dst_entry *dst = &rt->dst;
289
290                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
291                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
292                 rt->rt6i_genid = rt_genid(net);
293                 INIT_LIST_HEAD(&rt->rt6i_siblings);
294                 rt->rt6i_nsiblings = 0;
295         }
296         return rt;
297 }
298
299 static void ip6_dst_destroy(struct dst_entry *dst)
300 {
301         struct rt6_info *rt = (struct rt6_info *)dst;
302         struct inet6_dev *idev = rt->rt6i_idev;
303
304         if (rt->n)
305                 neigh_release(rt->n);
306
307         if (!(rt->dst.flags & DST_HOST))
308                 dst_destroy_metrics_generic(dst);
309
310         if (idev) {
311                 rt->rt6i_idev = NULL;
312                 in6_dev_put(idev);
313         }
314
315         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
316                 dst_release(dst->from);
317
318         if (rt6_has_peer(rt)) {
319                 struct inet_peer *peer = rt6_peer_ptr(rt);
320                 inet_putpeer(peer);
321         }
322 }
323
324 void rt6_bind_peer(struct rt6_info *rt, int create)
325 {
326         struct inet_peer_base *base;
327         struct inet_peer *peer;
328
329         base = inetpeer_base_ptr(rt->_rt6i_peer);
330         if (!base)
331                 return;
332
333         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
334         if (peer) {
335                 if (!rt6_set_peer(rt, peer))
336                         inet_putpeer(peer);
337         }
338 }
339
340 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
341                            int how)
342 {
343         struct rt6_info *rt = (struct rt6_info *)dst;
344         struct inet6_dev *idev = rt->rt6i_idev;
345         struct net_device *loopback_dev =
346                 dev_net(dev)->loopback_dev;
347
348         if (dev != loopback_dev) {
349                 if (idev && idev->dev == dev) {
350                         struct inet6_dev *loopback_idev =
351                                 in6_dev_get(loopback_dev);
352                         if (loopback_idev) {
353                                 rt->rt6i_idev = loopback_idev;
354                                 in6_dev_put(idev);
355                         }
356                 }
357                 if (rt->n && rt->n->dev == dev) {
358                         rt->n->dev = loopback_dev;
359                         dev_hold(loopback_dev);
360                         dev_put(dev);
361                 }
362         }
363 }
364
365 static bool rt6_check_expired(const struct rt6_info *rt)
366 {
367         if (rt->rt6i_flags & RTF_EXPIRES) {
368                 if (time_after(jiffies, rt->dst.expires))
369                         return true;
370         } else if (rt->dst.from) {
371                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
372         }
373         return false;
374 }
375
376 static bool rt6_need_strict(const struct in6_addr *daddr)
377 {
378         return ipv6_addr_type(daddr) &
379                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
380 }
381
382 /* Multipath route selection:
383  *   Hash based function using packet header and flowlabel.
384  * Adapted from fib_info_hashfn()
385  */
386 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
387                                const struct flowi6 *fl6)
388 {
389         unsigned int val = fl6->flowi6_proto;
390
391         val ^= (__force u32)fl6->daddr.s6_addr32[0];
392         val ^= (__force u32)fl6->daddr.s6_addr32[1];
393         val ^= (__force u32)fl6->daddr.s6_addr32[2];
394         val ^= (__force u32)fl6->daddr.s6_addr32[3];
395
396         val ^= (__force u32)fl6->saddr.s6_addr32[0];
397         val ^= (__force u32)fl6->saddr.s6_addr32[1];
398         val ^= (__force u32)fl6->saddr.s6_addr32[2];
399         val ^= (__force u32)fl6->saddr.s6_addr32[3];
400
401         /* Work only if this not encapsulated */
402         switch (fl6->flowi6_proto) {
403         case IPPROTO_UDP:
404         case IPPROTO_TCP:
405         case IPPROTO_SCTP:
406                 val ^= (__force u16)fl6->fl6_sport;
407                 val ^= (__force u16)fl6->fl6_dport;
408                 break;
409
410         case IPPROTO_ICMPV6:
411                 val ^= (__force u16)fl6->fl6_icmp_type;
412                 val ^= (__force u16)fl6->fl6_icmp_code;
413                 break;
414         }
415         /* RFC6438 recommands to use flowlabel */
416         val ^= (__force u32)fl6->flowlabel;
417
418         /* Perhaps, we need to tune, this function? */
419         val = val ^ (val >> 7) ^ (val >> 12);
420         return val % candidate_count;
421 }
422
423 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
424                                              struct flowi6 *fl6)
425 {
426         struct rt6_info *sibling, *next_sibling;
427         int route_choosen;
428
429         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
430         /* Don't change the route, if route_choosen == 0
431          * (siblings does not include ourself)
432          */
433         if (route_choosen)
434                 list_for_each_entry_safe(sibling, next_sibling,
435                                 &match->rt6i_siblings, rt6i_siblings) {
436                         route_choosen--;
437                         if (route_choosen == 0) {
438                                 match = sibling;
439                                 break;
440                         }
441                 }
442         return match;
443 }
444
445 /*
446  *      Route lookup. Any table->tb6_lock is implied.
447  */
448
449 static inline struct rt6_info *rt6_device_match(struct net *net,
450                                                     struct rt6_info *rt,
451                                                     const struct in6_addr *saddr,
452                                                     int oif,
453                                                     int flags)
454 {
455         struct rt6_info *local = NULL;
456         struct rt6_info *sprt;
457
458         if (!oif && ipv6_addr_any(saddr))
459                 goto out;
460
461         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
462                 struct net_device *dev = sprt->dst.dev;
463
464                 if (oif) {
465                         if (dev->ifindex == oif)
466                                 return sprt;
467                         if (dev->flags & IFF_LOOPBACK) {
468                                 if (!sprt->rt6i_idev ||
469                                     sprt->rt6i_idev->dev->ifindex != oif) {
470                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
471                                                 continue;
472                                         if (local && (!oif ||
473                                                       local->rt6i_idev->dev->ifindex == oif))
474                                                 continue;
475                                 }
476                                 local = sprt;
477                         }
478                 } else {
479                         if (ipv6_chk_addr(net, saddr, dev,
480                                           flags & RT6_LOOKUP_F_IFACE))
481                                 return sprt;
482                 }
483         }
484
485         if (oif) {
486                 if (local)
487                         return local;
488
489                 if (flags & RT6_LOOKUP_F_IFACE)
490                         return net->ipv6.ip6_null_entry;
491         }
492 out:
493         return rt;
494 }
495
496 #ifdef CONFIG_IPV6_ROUTER_PREF
497 static void rt6_probe(struct rt6_info *rt)
498 {
499         struct neighbour *neigh;
500         /*
501          * Okay, this does not seem to be appropriate
502          * for now, however, we need to check if it
503          * is really so; aka Router Reachability Probing.
504          *
505          * Router Reachability Probe MUST be rate-limited
506          * to no more than one per minute.
507          */
508         neigh = rt ? rt->n : NULL;
509         if (!neigh || (neigh->nud_state & NUD_VALID))
510                 return;
511         read_lock_bh(&neigh->lock);
512         if (!(neigh->nud_state & NUD_VALID) &&
513             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
514                 struct in6_addr mcaddr;
515                 struct in6_addr *target;
516
517                 neigh->updated = jiffies;
518                 read_unlock_bh(&neigh->lock);
519
520                 target = (struct in6_addr *)&neigh->primary_key;
521                 addrconf_addr_solict_mult(target, &mcaddr);
522                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
523         } else {
524                 read_unlock_bh(&neigh->lock);
525         }
526 }
527 #else
528 static inline void rt6_probe(struct rt6_info *rt)
529 {
530 }
531 #endif
532
533 /*
534  * Default Router Selection (RFC 2461 6.3.6)
535  */
536 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
537 {
538         struct net_device *dev = rt->dst.dev;
539         if (!oif || dev->ifindex == oif)
540                 return 2;
541         if ((dev->flags & IFF_LOOPBACK) &&
542             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
543                 return 1;
544         return 0;
545 }
546
547 static inline int rt6_check_neigh(struct rt6_info *rt)
548 {
549         struct neighbour *neigh;
550         int m;
551
552         neigh = rt->n;
553         if (rt->rt6i_flags & RTF_NONEXTHOP ||
554             !(rt->rt6i_flags & RTF_GATEWAY))
555                 m = 1;
556         else if (neigh) {
557                 read_lock_bh(&neigh->lock);
558                 if (neigh->nud_state & NUD_VALID)
559                         m = 2;
560 #ifdef CONFIG_IPV6_ROUTER_PREF
561                 else if (neigh->nud_state & NUD_FAILED)
562                         m = 0;
563 #endif
564                 else
565                         m = 1;
566                 read_unlock_bh(&neigh->lock);
567         } else
568                 m = 0;
569         return m;
570 }
571
572 static int rt6_score_route(struct rt6_info *rt, int oif,
573                            int strict)
574 {
575         int m, n;
576
577         m = rt6_check_dev(rt, oif);
578         if (!m && (strict & RT6_LOOKUP_F_IFACE))
579                 return -1;
580 #ifdef CONFIG_IPV6_ROUTER_PREF
581         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
582 #endif
583         n = rt6_check_neigh(rt);
584         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
585                 return -1;
586         return m;
587 }
588
589 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
590                                    int *mpri, struct rt6_info *match)
591 {
592         int m;
593
594         if (rt6_check_expired(rt))
595                 goto out;
596
597         m = rt6_score_route(rt, oif, strict);
598         if (m < 0)
599                 goto out;
600
601         if (m > *mpri) {
602                 if (strict & RT6_LOOKUP_F_REACHABLE)
603                         rt6_probe(match);
604                 *mpri = m;
605                 match = rt;
606         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
607                 rt6_probe(rt);
608         }
609
610 out:
611         return match;
612 }
613
614 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
615                                      struct rt6_info *rr_head,
616                                      u32 metric, int oif, int strict)
617 {
618         struct rt6_info *rt, *match;
619         int mpri = -1;
620
621         match = NULL;
622         for (rt = rr_head; rt && rt->rt6i_metric == metric;
623              rt = rt->dst.rt6_next)
624                 match = find_match(rt, oif, strict, &mpri, match);
625         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
626              rt = rt->dst.rt6_next)
627                 match = find_match(rt, oif, strict, &mpri, match);
628
629         return match;
630 }
631
632 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
633 {
634         struct rt6_info *match, *rt0;
635         struct net *net;
636
637         rt0 = fn->rr_ptr;
638         if (!rt0)
639                 fn->rr_ptr = rt0 = fn->leaf;
640
641         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
642
643         if (!match &&
644             (strict & RT6_LOOKUP_F_REACHABLE)) {
645                 struct rt6_info *next = rt0->dst.rt6_next;
646
647                 /* no entries matched; do round-robin */
648                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
649                         next = fn->leaf;
650
651                 if (next != rt0)
652                         fn->rr_ptr = next;
653         }
654
655         net = dev_net(rt0->dst.dev);
656         return match ? match : net->ipv6.ip6_null_entry;
657 }
658
659 #ifdef CONFIG_IPV6_ROUTE_INFO
660 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
661                   const struct in6_addr *gwaddr)
662 {
663         struct net *net = dev_net(dev);
664         struct route_info *rinfo = (struct route_info *) opt;
665         struct in6_addr prefix_buf, *prefix;
666         unsigned int pref;
667         unsigned long lifetime;
668         struct rt6_info *rt;
669
670         if (len < sizeof(struct route_info)) {
671                 return -EINVAL;
672         }
673
674         /* Sanity check for prefix_len and length */
675         if (rinfo->length > 3) {
676                 return -EINVAL;
677         } else if (rinfo->prefix_len > 128) {
678                 return -EINVAL;
679         } else if (rinfo->prefix_len > 64) {
680                 if (rinfo->length < 2) {
681                         return -EINVAL;
682                 }
683         } else if (rinfo->prefix_len > 0) {
684                 if (rinfo->length < 1) {
685                         return -EINVAL;
686                 }
687         }
688
689         pref = rinfo->route_pref;
690         if (pref == ICMPV6_ROUTER_PREF_INVALID)
691                 return -EINVAL;
692
693         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
694
695         if (rinfo->length == 3)
696                 prefix = (struct in6_addr *)rinfo->prefix;
697         else {
698                 /* this function is safe */
699                 ipv6_addr_prefix(&prefix_buf,
700                                  (struct in6_addr *)rinfo->prefix,
701                                  rinfo->prefix_len);
702                 prefix = &prefix_buf;
703         }
704
705         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
706                                 dev->ifindex);
707
708         if (rt && !lifetime) {
709                 ip6_del_rt(rt);
710                 rt = NULL;
711         }
712
713         if (!rt && lifetime)
714                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
715                                         pref);
716         else if (rt)
717                 rt->rt6i_flags = RTF_ROUTEINFO |
718                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
719
720         if (rt) {
721                 if (!addrconf_finite_timeout(lifetime))
722                         rt6_clean_expires(rt);
723                 else
724                         rt6_set_expires(rt, jiffies + HZ * lifetime);
725
726                 ip6_rt_put(rt);
727         }
728         return 0;
729 }
730 #endif
731
732 #define BACKTRACK(__net, saddr)                 \
733 do { \
734         if (rt == __net->ipv6.ip6_null_entry) { \
735                 struct fib6_node *pn; \
736                 while (1) { \
737                         if (fn->fn_flags & RTN_TL_ROOT) \
738                                 goto out; \
739                         pn = fn->parent; \
740                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
741                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
742                         else \
743                                 fn = pn; \
744                         if (fn->fn_flags & RTN_RTINFO) \
745                                 goto restart; \
746                 } \
747         } \
748 } while (0)
749
750 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
751                                              struct fib6_table *table,
752                                              struct flowi6 *fl6, int flags)
753 {
754         struct fib6_node *fn;
755         struct rt6_info *rt;
756
757         read_lock_bh(&table->tb6_lock);
758         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
759 restart:
760         rt = fn->leaf;
761         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
762         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
763                 rt = rt6_multipath_select(rt, fl6);
764         BACKTRACK(net, &fl6->saddr);
765 out:
766         dst_use(&rt->dst, jiffies);
767         read_unlock_bh(&table->tb6_lock);
768         return rt;
769
770 }
771
772 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
773                                     int flags)
774 {
775         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
776 }
777 EXPORT_SYMBOL_GPL(ip6_route_lookup);
778
779 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
780                             const struct in6_addr *saddr, int oif, int strict)
781 {
782         struct flowi6 fl6 = {
783                 .flowi6_oif = oif,
784                 .daddr = *daddr,
785         };
786         struct dst_entry *dst;
787         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
788
789         if (saddr) {
790                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
791                 flags |= RT6_LOOKUP_F_HAS_SADDR;
792         }
793
794         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
795         if (dst->error == 0)
796                 return (struct rt6_info *) dst;
797
798         dst_release(dst);
799
800         return NULL;
801 }
802
803 EXPORT_SYMBOL(rt6_lookup);
804
805 /* ip6_ins_rt is called with FREE table->tb6_lock.
806    It takes new route entry, the addition fails by any reason the
807    route is freed. In any case, if caller does not hold it, it may
808    be destroyed.
809  */
810
811 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
812 {
813         int err;
814         struct fib6_table *table;
815
816         table = rt->rt6i_table;
817         write_lock_bh(&table->tb6_lock);
818         err = fib6_add(&table->tb6_root, rt, info);
819         write_unlock_bh(&table->tb6_lock);
820
821         return err;
822 }
823
824 int ip6_ins_rt(struct rt6_info *rt)
825 {
826         struct nl_info info = {
827                 .nl_net = dev_net(rt->dst.dev),
828         };
829         return __ip6_ins_rt(rt, &info);
830 }
831
832 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
833                                       const struct in6_addr *daddr,
834                                       const struct in6_addr *saddr)
835 {
836         struct rt6_info *rt;
837
838         /*
839          *      Clone the route.
840          */
841
842         rt = ip6_rt_copy(ort, daddr);
843
844         if (rt) {
845                 int attempts = !in_softirq();
846
847                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
848                         if (ort->rt6i_dst.plen != 128 &&
849                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
850                                 rt->rt6i_flags |= RTF_ANYCAST;
851                         rt->rt6i_gateway = *daddr;
852                 }
853
854                 rt->rt6i_flags |= RTF_CACHE;
855
856 #ifdef CONFIG_IPV6_SUBTREES
857                 if (rt->rt6i_src.plen && saddr) {
858                         rt->rt6i_src.addr = *saddr;
859                         rt->rt6i_src.plen = 128;
860                 }
861 #endif
862
863         retry:
864                 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
865                         struct net *net = dev_net(rt->dst.dev);
866                         int saved_rt_min_interval =
867                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
868                         int saved_rt_elasticity =
869                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
870
871                         if (attempts-- > 0) {
872                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
873                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
874
875                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
876
877                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
878                                         saved_rt_elasticity;
879                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
880                                         saved_rt_min_interval;
881                                 goto retry;
882                         }
883
884                         net_warn_ratelimited("Neighbour table overflow\n");
885                         dst_free(&rt->dst);
886                         return NULL;
887                 }
888         }
889
890         return rt;
891 }
892
893 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
894                                         const struct in6_addr *daddr)
895 {
896         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
897
898         if (rt) {
899                 rt->rt6i_flags |= RTF_CACHE;
900                 rt->n = neigh_clone(ort->n);
901         }
902         return rt;
903 }
904
905 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
906                                       struct flowi6 *fl6, int flags)
907 {
908         struct fib6_node *fn;
909         struct rt6_info *rt, *nrt;
910         int strict = 0;
911         int attempts = 3;
912         int err;
913         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
914
915         strict |= flags & RT6_LOOKUP_F_IFACE;
916
917 relookup:
918         read_lock_bh(&table->tb6_lock);
919
920 restart_2:
921         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
922
923 restart:
924         rt = rt6_select(fn, oif, strict | reachable);
925         if (rt->rt6i_nsiblings && oif == 0)
926                 rt = rt6_multipath_select(rt, fl6);
927         BACKTRACK(net, &fl6->saddr);
928         if (rt == net->ipv6.ip6_null_entry ||
929             rt->rt6i_flags & RTF_CACHE)
930                 goto out;
931
932         dst_hold(&rt->dst);
933         read_unlock_bh(&table->tb6_lock);
934
935         if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
936                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
937         else if (!(rt->dst.flags & DST_HOST))
938                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
939         else
940                 goto out2;
941
942         ip6_rt_put(rt);
943         rt = nrt ? : net->ipv6.ip6_null_entry;
944
945         dst_hold(&rt->dst);
946         if (nrt) {
947                 err = ip6_ins_rt(nrt);
948                 if (!err)
949                         goto out2;
950         }
951
952         if (--attempts <= 0)
953                 goto out2;
954
955         /*
956          * Race condition! In the gap, when table->tb6_lock was
957          * released someone could insert this route.  Relookup.
958          */
959         ip6_rt_put(rt);
960         goto relookup;
961
962 out:
963         if (reachable) {
964                 reachable = 0;
965                 goto restart_2;
966         }
967         dst_hold(&rt->dst);
968         read_unlock_bh(&table->tb6_lock);
969 out2:
970         rt->dst.lastuse = jiffies;
971         rt->dst.__use++;
972
973         return rt;
974 }
975
976 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
977                                             struct flowi6 *fl6, int flags)
978 {
979         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
980 }
981
982 static struct dst_entry *ip6_route_input_lookup(struct net *net,
983                                                 struct net_device *dev,
984                                                 struct flowi6 *fl6, int flags)
985 {
986         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
987                 flags |= RT6_LOOKUP_F_IFACE;
988
989         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
990 }
991
992 void ip6_route_input(struct sk_buff *skb)
993 {
994         const struct ipv6hdr *iph = ipv6_hdr(skb);
995         struct net *net = dev_net(skb->dev);
996         int flags = RT6_LOOKUP_F_HAS_SADDR;
997         struct flowi6 fl6 = {
998                 .flowi6_iif = skb->dev->ifindex,
999                 .daddr = iph->daddr,
1000                 .saddr = iph->saddr,
1001                 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
1002                 .flowi6_mark = skb->mark,
1003                 .flowi6_proto = iph->nexthdr,
1004         };
1005
1006         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1007 }
1008
1009 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1010                                              struct flowi6 *fl6, int flags)
1011 {
1012         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1013 }
1014
1015 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1016                                     struct flowi6 *fl6)
1017 {
1018         int flags = 0;
1019
1020         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1021
1022         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1023                 flags |= RT6_LOOKUP_F_IFACE;
1024
1025         if (!ipv6_addr_any(&fl6->saddr))
1026                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1027         else if (sk)
1028                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1029
1030         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1031 }
1032
1033 EXPORT_SYMBOL(ip6_route_output);
1034
1035 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1036 {
1037         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1038         struct dst_entry *new = NULL;
1039
1040         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1041         if (rt) {
1042                 new = &rt->dst;
1043
1044                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1045                 rt6_init_peer(rt, net->ipv6.peers);
1046
1047                 new->__use = 1;
1048                 new->input = dst_discard;
1049                 new->output = dst_discard;
1050
1051                 if (dst_metrics_read_only(&ort->dst))
1052                         new->_metrics = ort->dst._metrics;
1053                 else
1054                         dst_copy_metrics(new, &ort->dst);
1055                 rt->rt6i_idev = ort->rt6i_idev;
1056                 if (rt->rt6i_idev)
1057                         in6_dev_hold(rt->rt6i_idev);
1058
1059                 rt->rt6i_gateway = ort->rt6i_gateway;
1060                 rt->rt6i_flags = ort->rt6i_flags;
1061                 rt6_clean_expires(rt);
1062                 rt->rt6i_metric = 0;
1063
1064                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1065 #ifdef CONFIG_IPV6_SUBTREES
1066                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1067 #endif
1068
1069                 dst_free(new);
1070         }
1071
1072         dst_release(dst_orig);
1073         return new ? new : ERR_PTR(-ENOMEM);
1074 }
1075
1076 /*
1077  *      Destination cache support functions
1078  */
1079
1080 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1081 {
1082         struct rt6_info *rt;
1083
1084         rt = (struct rt6_info *) dst;
1085
1086         /* All IPV6 dsts are created with ->obsolete set to the value
1087          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1088          * into this function always.
1089          */
1090         if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1091                 return NULL;
1092
1093         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1094                 return dst;
1095
1096         return NULL;
1097 }
1098
1099 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1100 {
1101         struct rt6_info *rt = (struct rt6_info *) dst;
1102
1103         if (rt) {
1104                 if (rt->rt6i_flags & RTF_CACHE) {
1105                         if (rt6_check_expired(rt)) {
1106                                 ip6_del_rt(rt);
1107                                 dst = NULL;
1108                         }
1109                 } else {
1110                         dst_release(dst);
1111                         dst = NULL;
1112                 }
1113         }
1114         return dst;
1115 }
1116
1117 static void ip6_link_failure(struct sk_buff *skb)
1118 {
1119         struct rt6_info *rt;
1120
1121         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1122
1123         rt = (struct rt6_info *) skb_dst(skb);
1124         if (rt) {
1125                 if (rt->rt6i_flags & RTF_CACHE)
1126                         rt6_update_expires(rt, 0);
1127                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1128                         rt->rt6i_node->fn_sernum = -1;
1129         }
1130 }
1131
1132 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1133                                struct sk_buff *skb, u32 mtu)
1134 {
1135         struct rt6_info *rt6 = (struct rt6_info*)dst;
1136
1137         dst_confirm(dst);
1138         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1139                 struct net *net = dev_net(dst->dev);
1140
1141                 rt6->rt6i_flags |= RTF_MODIFIED;
1142                 if (mtu < IPV6_MIN_MTU) {
1143                         u32 features = dst_metric(dst, RTAX_FEATURES);
1144                         mtu = IPV6_MIN_MTU;
1145                         features |= RTAX_FEATURE_ALLFRAG;
1146                         dst_metric_set(dst, RTAX_FEATURES, features);
1147                 }
1148                 dst_metric_set(dst, RTAX_MTU, mtu);
1149                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1150         }
1151 }
1152
1153 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1154                      int oif, u32 mark)
1155 {
1156         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1157         struct dst_entry *dst;
1158         struct flowi6 fl6;
1159
1160         memset(&fl6, 0, sizeof(fl6));
1161         fl6.flowi6_oif = oif;
1162         fl6.flowi6_mark = mark;
1163         fl6.flowi6_flags = 0;
1164         fl6.daddr = iph->daddr;
1165         fl6.saddr = iph->saddr;
1166         fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1167
1168         dst = ip6_route_output(net, NULL, &fl6);
1169         if (!dst->error)
1170                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1171         dst_release(dst);
1172 }
1173 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1174
1175 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1176 {
1177         ip6_update_pmtu(skb, sock_net(sk), mtu,
1178                         sk->sk_bound_dev_if, sk->sk_mark);
1179 }
1180 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1181
1182 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1183 {
1184         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1185         struct dst_entry *dst;
1186         struct flowi6 fl6;
1187
1188         memset(&fl6, 0, sizeof(fl6));
1189         fl6.flowi6_oif = oif;
1190         fl6.flowi6_mark = mark;
1191         fl6.flowi6_flags = 0;
1192         fl6.daddr = iph->daddr;
1193         fl6.saddr = iph->saddr;
1194         fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1195
1196         dst = ip6_route_output(net, NULL, &fl6);
1197         if (!dst->error)
1198                 rt6_do_redirect(dst, NULL, skb);
1199         dst_release(dst);
1200 }
1201 EXPORT_SYMBOL_GPL(ip6_redirect);
1202
1203 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1204 {
1205         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1206 }
1207 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1208
1209 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1210 {
1211         struct net_device *dev = dst->dev;
1212         unsigned int mtu = dst_mtu(dst);
1213         struct net *net = dev_net(dev);
1214
1215         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1216
1217         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1218                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1219
1220         /*
1221          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1222          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1223          * IPV6_MAXPLEN is also valid and means: "any MSS,
1224          * rely only on pmtu discovery"
1225          */
1226         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1227                 mtu = IPV6_MAXPLEN;
1228         return mtu;
1229 }
1230
1231 static unsigned int ip6_mtu(const struct dst_entry *dst)
1232 {
1233         struct inet6_dev *idev;
1234         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1235
1236         if (mtu)
1237                 return mtu;
1238
1239         mtu = IPV6_MIN_MTU;
1240
1241         rcu_read_lock();
1242         idev = __in6_dev_get(dst->dev);
1243         if (idev)
1244                 mtu = idev->cnf.mtu6;
1245         rcu_read_unlock();
1246
1247         return mtu;
1248 }
1249
1250 static struct dst_entry *icmp6_dst_gc_list;
1251 static DEFINE_SPINLOCK(icmp6_dst_lock);
1252
1253 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1254                                   struct neighbour *neigh,
1255                                   struct flowi6 *fl6)
1256 {
1257         struct dst_entry *dst;
1258         struct rt6_info *rt;
1259         struct inet6_dev *idev = in6_dev_get(dev);
1260         struct net *net = dev_net(dev);
1261
1262         if (unlikely(!idev))
1263                 return ERR_PTR(-ENODEV);
1264
1265         rt = ip6_dst_alloc(net, dev, 0, NULL);
1266         if (unlikely(!rt)) {
1267                 in6_dev_put(idev);
1268                 dst = ERR_PTR(-ENOMEM);
1269                 goto out;
1270         }
1271
1272         if (neigh)
1273                 neigh_hold(neigh);
1274         else {
1275                 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1276                 if (IS_ERR(neigh)) {
1277                         in6_dev_put(idev);
1278                         dst_free(&rt->dst);
1279                         return ERR_CAST(neigh);
1280                 }
1281         }
1282
1283         rt->dst.flags |= DST_HOST;
1284         rt->dst.output  = ip6_output;
1285         rt->n = neigh;
1286         atomic_set(&rt->dst.__refcnt, 1);
1287         rt->rt6i_dst.addr = fl6->daddr;
1288         rt->rt6i_dst.plen = 128;
1289         rt->rt6i_idev     = idev;
1290         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1291
1292         spin_lock_bh(&icmp6_dst_lock);
1293         rt->dst.next = icmp6_dst_gc_list;
1294         icmp6_dst_gc_list = &rt->dst;
1295         spin_unlock_bh(&icmp6_dst_lock);
1296
1297         fib6_force_start_gc(net);
1298
1299         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1300
1301 out:
1302         return dst;
1303 }
1304
1305 int icmp6_dst_gc(void)
1306 {
1307         struct dst_entry *dst, **pprev;
1308         int more = 0;
1309
1310         spin_lock_bh(&icmp6_dst_lock);
1311         pprev = &icmp6_dst_gc_list;
1312
1313         while ((dst = *pprev) != NULL) {
1314                 if (!atomic_read(&dst->__refcnt)) {
1315                         *pprev = dst->next;
1316                         dst_free(dst);
1317                 } else {
1318                         pprev = &dst->next;
1319                         ++more;
1320                 }
1321         }
1322
1323         spin_unlock_bh(&icmp6_dst_lock);
1324
1325         return more;
1326 }
1327
1328 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1329                             void *arg)
1330 {
1331         struct dst_entry *dst, **pprev;
1332
1333         spin_lock_bh(&icmp6_dst_lock);
1334         pprev = &icmp6_dst_gc_list;
1335         while ((dst = *pprev) != NULL) {
1336                 struct rt6_info *rt = (struct rt6_info *) dst;
1337                 if (func(rt, arg)) {
1338                         *pprev = dst->next;
1339                         dst_free(dst);
1340                 } else {
1341                         pprev = &dst->next;
1342                 }
1343         }
1344         spin_unlock_bh(&icmp6_dst_lock);
1345 }
1346
1347 static int ip6_dst_gc(struct dst_ops *ops)
1348 {
1349         unsigned long now = jiffies;
1350         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1351         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1352         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1353         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1354         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1355         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1356         int entries;
1357
1358         entries = dst_entries_get_fast(ops);
1359         if (time_after(rt_last_gc + rt_min_interval, now) &&
1360             entries <= rt_max_size)
1361                 goto out;
1362
1363         net->ipv6.ip6_rt_gc_expire++;
1364         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1365         net->ipv6.ip6_rt_last_gc = now;
1366         entries = dst_entries_get_slow(ops);
1367         if (entries < ops->gc_thresh)
1368                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1369 out:
1370         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1371         return entries > rt_max_size;
1372 }
1373
1374 int ip6_dst_hoplimit(struct dst_entry *dst)
1375 {
1376         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1377         if (hoplimit == 0) {
1378                 struct net_device *dev = dst->dev;
1379                 struct inet6_dev *idev;
1380
1381                 rcu_read_lock();
1382                 idev = __in6_dev_get(dev);
1383                 if (idev)
1384                         hoplimit = idev->cnf.hop_limit;
1385                 else
1386                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1387                 rcu_read_unlock();
1388         }
1389         return hoplimit;
1390 }
1391 EXPORT_SYMBOL(ip6_dst_hoplimit);
1392
1393 /*
1394  *
1395  */
1396
1397 int ip6_route_add(struct fib6_config *cfg)
1398 {
1399         int err;
1400         struct net *net = cfg->fc_nlinfo.nl_net;
1401         struct rt6_info *rt = NULL;
1402         struct net_device *dev = NULL;
1403         struct inet6_dev *idev = NULL;
1404         struct fib6_table *table;
1405         int addr_type;
1406
1407         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1408                 return -EINVAL;
1409 #ifndef CONFIG_IPV6_SUBTREES
1410         if (cfg->fc_src_len)
1411                 return -EINVAL;
1412 #endif
1413         if (cfg->fc_ifindex) {
1414                 err = -ENODEV;
1415                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1416                 if (!dev)
1417                         goto out;
1418                 idev = in6_dev_get(dev);
1419                 if (!idev)
1420                         goto out;
1421         }
1422
1423         if (cfg->fc_metric == 0)
1424                 cfg->fc_metric = IP6_RT_PRIO_USER;
1425
1426         err = -ENOBUFS;
1427         if (cfg->fc_nlinfo.nlh &&
1428             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1429                 table = fib6_get_table(net, cfg->fc_table);
1430                 if (!table) {
1431                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1432                         table = fib6_new_table(net, cfg->fc_table);
1433                 }
1434         } else {
1435                 table = fib6_new_table(net, cfg->fc_table);
1436         }
1437
1438         if (!table)
1439                 goto out;
1440
1441         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1442
1443         if (!rt) {
1444                 err = -ENOMEM;
1445                 goto out;
1446         }
1447
1448         if (cfg->fc_flags & RTF_EXPIRES)
1449                 rt6_set_expires(rt, jiffies +
1450                                 clock_t_to_jiffies(cfg->fc_expires));
1451         else
1452                 rt6_clean_expires(rt);
1453
1454         if (cfg->fc_protocol == RTPROT_UNSPEC)
1455                 cfg->fc_protocol = RTPROT_BOOT;
1456         rt->rt6i_protocol = cfg->fc_protocol;
1457
1458         addr_type = ipv6_addr_type(&cfg->fc_dst);
1459
1460         if (addr_type & IPV6_ADDR_MULTICAST)
1461                 rt->dst.input = ip6_mc_input;
1462         else if (cfg->fc_flags & RTF_LOCAL)
1463                 rt->dst.input = ip6_input;
1464         else
1465                 rt->dst.input = ip6_forward;
1466
1467         rt->dst.output = ip6_output;
1468
1469         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1470         rt->rt6i_dst.plen = cfg->fc_dst_len;
1471         if (rt->rt6i_dst.plen == 128)
1472                rt->dst.flags |= DST_HOST;
1473
1474         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1475                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1476                 if (!metrics) {
1477                         err = -ENOMEM;
1478                         goto out;
1479                 }
1480                 dst_init_metrics(&rt->dst, metrics, 0);
1481         }
1482 #ifdef CONFIG_IPV6_SUBTREES
1483         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1484         rt->rt6i_src.plen = cfg->fc_src_len;
1485 #endif
1486
1487         rt->rt6i_metric = cfg->fc_metric;
1488
1489         /* We cannot add true routes via loopback here,
1490            they would result in kernel looping; promote them to reject routes
1491          */
1492         if ((cfg->fc_flags & RTF_REJECT) ||
1493             (dev && (dev->flags & IFF_LOOPBACK) &&
1494              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1495              !(cfg->fc_flags & RTF_LOCAL))) {
1496                 /* hold loopback dev/idev if we haven't done so. */
1497                 if (dev != net->loopback_dev) {
1498                         if (dev) {
1499                                 dev_put(dev);
1500                                 in6_dev_put(idev);
1501                         }
1502                         dev = net->loopback_dev;
1503                         dev_hold(dev);
1504                         idev = in6_dev_get(dev);
1505                         if (!idev) {
1506                                 err = -ENODEV;
1507                                 goto out;
1508                         }
1509                 }
1510                 rt->dst.output = ip6_pkt_discard_out;
1511                 rt->dst.input = ip6_pkt_discard;
1512                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1513                 switch (cfg->fc_type) {
1514                 case RTN_BLACKHOLE:
1515                         rt->dst.error = -EINVAL;
1516                         break;
1517                 case RTN_PROHIBIT:
1518                         rt->dst.error = -EACCES;
1519                         break;
1520                 case RTN_THROW:
1521                         rt->dst.error = -EAGAIN;
1522                         break;
1523                 default:
1524                         rt->dst.error = -ENETUNREACH;
1525                         break;
1526                 }
1527                 goto install_route;
1528         }
1529
1530         if (cfg->fc_flags & RTF_GATEWAY) {
1531                 const struct in6_addr *gw_addr;
1532                 int gwa_type;
1533
1534                 gw_addr = &cfg->fc_gateway;
1535                 rt->rt6i_gateway = *gw_addr;
1536                 gwa_type = ipv6_addr_type(gw_addr);
1537
1538                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1539                         struct rt6_info *grt;
1540
1541                         /* IPv6 strictly inhibits using not link-local
1542                            addresses as nexthop address.
1543                            Otherwise, router will not able to send redirects.
1544                            It is very good, but in some (rare!) circumstances
1545                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1546                            some exceptions. --ANK
1547                          */
1548                         err = -EINVAL;
1549                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1550                                 goto out;
1551
1552                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1553
1554                         err = -EHOSTUNREACH;
1555                         if (!grt)
1556                                 goto out;
1557                         if (dev) {
1558                                 if (dev != grt->dst.dev) {
1559                                         ip6_rt_put(grt);
1560                                         goto out;
1561                                 }
1562                         } else {
1563                                 dev = grt->dst.dev;
1564                                 idev = grt->rt6i_idev;
1565                                 dev_hold(dev);
1566                                 in6_dev_hold(grt->rt6i_idev);
1567                         }
1568                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1569                                 err = 0;
1570                         ip6_rt_put(grt);
1571
1572                         if (err)
1573                                 goto out;
1574                 }
1575                 err = -EINVAL;
1576                 if (!dev || (dev->flags & IFF_LOOPBACK))
1577                         goto out;
1578         }
1579
1580         err = -ENODEV;
1581         if (!dev)
1582                 goto out;
1583
1584         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1585                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1586                         err = -EINVAL;
1587                         goto out;
1588                 }
1589                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1590                 rt->rt6i_prefsrc.plen = 128;
1591         } else
1592                 rt->rt6i_prefsrc.plen = 0;
1593
1594         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1595                 err = rt6_bind_neighbour(rt, dev);
1596                 if (err)
1597                         goto out;
1598         }
1599
1600         rt->rt6i_flags = cfg->fc_flags;
1601
1602 install_route:
1603         if (cfg->fc_mx) {
1604                 struct nlattr *nla;
1605                 int remaining;
1606
1607                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1608                         int type = nla_type(nla);
1609
1610                         if (type) {
1611                                 if (type > RTAX_MAX) {
1612                                         err = -EINVAL;
1613                                         goto out;
1614                                 }
1615
1616                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1617                         }
1618                 }
1619         }
1620
1621         rt->dst.dev = dev;
1622         rt->rt6i_idev = idev;
1623         rt->rt6i_table = table;
1624
1625         cfg->fc_nlinfo.nl_net = dev_net(dev);
1626
1627         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1628
1629 out:
1630         if (dev)
1631                 dev_put(dev);
1632         if (idev)
1633                 in6_dev_put(idev);
1634         if (rt)
1635                 dst_free(&rt->dst);
1636         return err;
1637 }
1638
1639 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1640 {
1641         int err;
1642         struct fib6_table *table;
1643         struct net *net = dev_net(rt->dst.dev);
1644
1645         if (rt == net->ipv6.ip6_null_entry) {
1646                 err = -ENOENT;
1647                 goto out;
1648         }
1649
1650         table = rt->rt6i_table;
1651         write_lock_bh(&table->tb6_lock);
1652         err = fib6_del(rt, info);
1653         write_unlock_bh(&table->tb6_lock);
1654
1655 out:
1656         ip6_rt_put(rt);
1657         return err;
1658 }
1659
1660 int ip6_del_rt(struct rt6_info *rt)
1661 {
1662         struct nl_info info = {
1663                 .nl_net = dev_net(rt->dst.dev),
1664         };
1665         return __ip6_del_rt(rt, &info);
1666 }
1667
1668 static int ip6_route_del(struct fib6_config *cfg)
1669 {
1670         struct fib6_table *table;
1671         struct fib6_node *fn;
1672         struct rt6_info *rt;
1673         int err = -ESRCH;
1674
1675         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1676         if (!table)
1677                 return err;
1678
1679         read_lock_bh(&table->tb6_lock);
1680
1681         fn = fib6_locate(&table->tb6_root,
1682                          &cfg->fc_dst, cfg->fc_dst_len,
1683                          &cfg->fc_src, cfg->fc_src_len);
1684
1685         if (fn) {
1686                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1687                         if (cfg->fc_ifindex &&
1688                             (!rt->dst.dev ||
1689                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1690                                 continue;
1691                         if (cfg->fc_flags & RTF_GATEWAY &&
1692                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1693                                 continue;
1694                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1695                                 continue;
1696                         dst_hold(&rt->dst);
1697                         read_unlock_bh(&table->tb6_lock);
1698
1699                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1700                 }
1701         }
1702         read_unlock_bh(&table->tb6_lock);
1703
1704         return err;
1705 }
1706
1707 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1708 {
1709         struct net *net = dev_net(skb->dev);
1710         struct netevent_redirect netevent;
1711         struct rt6_info *rt, *nrt = NULL;
1712         const struct in6_addr *target;
1713         struct ndisc_options ndopts;
1714         const struct in6_addr *dest;
1715         struct neighbour *old_neigh;
1716         struct inet6_dev *in6_dev;
1717         struct neighbour *neigh;
1718         struct icmp6hdr *icmph;
1719         int optlen, on_link;
1720         u8 *lladdr;
1721
1722         optlen = skb->tail - skb->transport_header;
1723         optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1724
1725         if (optlen < 0) {
1726                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1727                 return;
1728         }
1729
1730         icmph = icmp6_hdr(skb);
1731         target = (const struct in6_addr *) (icmph + 1);
1732         dest = target + 1;
1733
1734         if (ipv6_addr_is_multicast(dest)) {
1735                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1736                 return;
1737         }
1738
1739         on_link = 0;
1740         if (ipv6_addr_equal(dest, target)) {
1741                 on_link = 1;
1742         } else if (ipv6_addr_type(target) !=
1743                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1744                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1745                 return;
1746         }
1747
1748         in6_dev = __in6_dev_get(skb->dev);
1749         if (!in6_dev)
1750                 return;
1751         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1752                 return;
1753
1754         /* RFC2461 8.1:
1755          *      The IP source address of the Redirect MUST be the same as the current
1756          *      first-hop router for the specified ICMP Destination Address.
1757          */
1758
1759         if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1760                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1761                 return;
1762         }
1763
1764         lladdr = NULL;
1765         if (ndopts.nd_opts_tgt_lladdr) {
1766                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1767                                              skb->dev);
1768                 if (!lladdr) {
1769                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1770                         return;
1771                 }
1772         }
1773
1774         rt = (struct rt6_info *) dst;
1775         if (rt == net->ipv6.ip6_null_entry) {
1776                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1777                 return;
1778         }
1779
1780         /* Redirect received -> path was valid.
1781          * Look, redirects are sent only in response to data packets,
1782          * so that this nexthop apparently is reachable. --ANK
1783          */
1784         dst_confirm(&rt->dst);
1785
1786         neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1787         if (!neigh)
1788                 return;
1789
1790         /* Duplicate redirect: silently ignore. */
1791         old_neigh = rt->n;
1792         if (neigh == old_neigh)
1793                 goto out;
1794
1795         /*
1796          *      We have finally decided to accept it.
1797          */
1798
1799         neigh_update(neigh, lladdr, NUD_STALE,
1800                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1801                      NEIGH_UPDATE_F_OVERRIDE|
1802                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1803                                      NEIGH_UPDATE_F_ISROUTER))
1804                      );
1805
1806         nrt = ip6_rt_copy(rt, dest);
1807         if (!nrt)
1808                 goto out;
1809
1810         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1811         if (on_link)
1812                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1813
1814         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1815         nrt->n = neigh_clone(neigh);
1816
1817         if (ip6_ins_rt(nrt))
1818                 goto out;
1819
1820         netevent.old = &rt->dst;
1821         netevent.old_neigh = old_neigh;
1822         netevent.new = &nrt->dst;
1823         netevent.new_neigh = neigh;
1824         netevent.daddr = dest;
1825         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1826
1827         if (rt->rt6i_flags & RTF_CACHE) {
1828                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1829                 ip6_del_rt(rt);
1830         }
1831
1832 out:
1833         neigh_release(neigh);
1834 }
1835
1836 /*
1837  *      Misc support functions
1838  */
1839
1840 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1841                                     const struct in6_addr *dest)
1842 {
1843         struct net *net = dev_net(ort->dst.dev);
1844         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1845                                             ort->rt6i_table);
1846
1847         if (rt) {
1848                 rt->dst.input = ort->dst.input;
1849                 rt->dst.output = ort->dst.output;
1850                 rt->dst.flags |= DST_HOST;
1851
1852                 rt->rt6i_dst.addr = *dest;
1853                 rt->rt6i_dst.plen = 128;
1854                 dst_copy_metrics(&rt->dst, &ort->dst);
1855                 rt->dst.error = ort->dst.error;
1856                 rt->rt6i_idev = ort->rt6i_idev;
1857                 if (rt->rt6i_idev)
1858                         in6_dev_hold(rt->rt6i_idev);
1859                 rt->dst.lastuse = jiffies;
1860
1861                 rt->rt6i_gateway = ort->rt6i_gateway;
1862                 rt->rt6i_flags = ort->rt6i_flags;
1863                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1864                     (RTF_DEFAULT | RTF_ADDRCONF))
1865                         rt6_set_from(rt, ort);
1866                 else
1867                         rt6_clean_expires(rt);
1868                 rt->rt6i_metric = 0;
1869
1870 #ifdef CONFIG_IPV6_SUBTREES
1871                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1872 #endif
1873                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1874                 rt->rt6i_table = ort->rt6i_table;
1875         }
1876         return rt;
1877 }
1878
1879 #ifdef CONFIG_IPV6_ROUTE_INFO
1880 static struct rt6_info *rt6_get_route_info(struct net *net,
1881                                            const struct in6_addr *prefix, int prefixlen,
1882                                            const struct in6_addr *gwaddr, int ifindex)
1883 {
1884         struct fib6_node *fn;
1885         struct rt6_info *rt = NULL;
1886         struct fib6_table *table;
1887
1888         table = fib6_get_table(net, RT6_TABLE_INFO);
1889         if (!table)
1890                 return NULL;
1891
1892         read_lock_bh(&table->tb6_lock);
1893         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1894         if (!fn)
1895                 goto out;
1896
1897         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1898                 if (rt->dst.dev->ifindex != ifindex)
1899                         continue;
1900                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1901                         continue;
1902                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1903                         continue;
1904                 dst_hold(&rt->dst);
1905                 break;
1906         }
1907 out:
1908         read_unlock_bh(&table->tb6_lock);
1909         return rt;
1910 }
1911
1912 static struct rt6_info *rt6_add_route_info(struct net *net,
1913                                            const struct in6_addr *prefix, int prefixlen,
1914                                            const struct in6_addr *gwaddr, int ifindex,
1915                                            unsigned int pref)
1916 {
1917         struct fib6_config cfg = {
1918                 .fc_table       = RT6_TABLE_INFO,
1919                 .fc_metric      = IP6_RT_PRIO_USER,
1920                 .fc_ifindex     = ifindex,
1921                 .fc_dst_len     = prefixlen,
1922                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1923                                   RTF_UP | RTF_PREF(pref),
1924                 .fc_nlinfo.portid = 0,
1925                 .fc_nlinfo.nlh = NULL,
1926                 .fc_nlinfo.nl_net = net,
1927         };
1928
1929         cfg.fc_dst = *prefix;
1930         cfg.fc_gateway = *gwaddr;
1931
1932         /* We should treat it as a default route if prefix length is 0. */
1933         if (!prefixlen)
1934                 cfg.fc_flags |= RTF_DEFAULT;
1935
1936         ip6_route_add(&cfg);
1937
1938         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1939 }
1940 #endif
1941
1942 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1943 {
1944         struct rt6_info *rt;
1945         struct fib6_table *table;
1946
1947         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1948         if (!table)
1949                 return NULL;
1950
1951         read_lock_bh(&table->tb6_lock);
1952         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1953                 if (dev == rt->dst.dev &&
1954                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1955                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1956                         break;
1957         }
1958         if (rt)
1959                 dst_hold(&rt->dst);
1960         read_unlock_bh(&table->tb6_lock);
1961         return rt;
1962 }
1963
1964 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1965                                      struct net_device *dev,
1966                                      unsigned int pref)
1967 {
1968         struct fib6_config cfg = {
1969                 .fc_table       = RT6_TABLE_DFLT,
1970                 .fc_metric      = IP6_RT_PRIO_USER,
1971                 .fc_ifindex     = dev->ifindex,
1972                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1973                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1974                 .fc_nlinfo.portid = 0,
1975                 .fc_nlinfo.nlh = NULL,
1976                 .fc_nlinfo.nl_net = dev_net(dev),
1977         };
1978
1979         cfg.fc_gateway = *gwaddr;
1980
1981         ip6_route_add(&cfg);
1982
1983         return rt6_get_dflt_router(gwaddr, dev);
1984 }
1985
1986 void rt6_purge_dflt_routers(struct net *net)
1987 {
1988         struct rt6_info *rt;
1989         struct fib6_table *table;
1990
1991         /* NOTE: Keep consistent with rt6_get_dflt_router */
1992         table = fib6_get_table(net, RT6_TABLE_DFLT);
1993         if (!table)
1994                 return;
1995
1996 restart:
1997         read_lock_bh(&table->tb6_lock);
1998         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1999                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
2000                         dst_hold(&rt->dst);
2001                         read_unlock_bh(&table->tb6_lock);
2002                         ip6_del_rt(rt);
2003                         goto restart;
2004                 }
2005         }
2006         read_unlock_bh(&table->tb6_lock);
2007 }
2008
2009 static void rtmsg_to_fib6_config(struct net *net,
2010                                  struct in6_rtmsg *rtmsg,
2011                                  struct fib6_config *cfg)
2012 {
2013         memset(cfg, 0, sizeof(*cfg));
2014
2015         cfg->fc_table = RT6_TABLE_MAIN;
2016         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2017         cfg->fc_metric = rtmsg->rtmsg_metric;
2018         cfg->fc_expires = rtmsg->rtmsg_info;
2019         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2020         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2021         cfg->fc_flags = rtmsg->rtmsg_flags;
2022
2023         cfg->fc_nlinfo.nl_net = net;
2024
2025         cfg->fc_dst = rtmsg->rtmsg_dst;
2026         cfg->fc_src = rtmsg->rtmsg_src;
2027         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2028 }
2029
2030 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2031 {
2032         struct fib6_config cfg;
2033         struct in6_rtmsg rtmsg;
2034         int err;
2035
2036         switch(cmd) {
2037         case SIOCADDRT:         /* Add a route */
2038         case SIOCDELRT:         /* Delete a route */
2039                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2040                         return -EPERM;
2041                 err = copy_from_user(&rtmsg, arg,
2042                                      sizeof(struct in6_rtmsg));
2043                 if (err)
2044                         return -EFAULT;
2045
2046                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2047
2048                 rtnl_lock();
2049                 switch (cmd) {
2050                 case SIOCADDRT:
2051                         err = ip6_route_add(&cfg);
2052                         break;
2053                 case SIOCDELRT:
2054                         err = ip6_route_del(&cfg);
2055                         break;
2056                 default:
2057                         err = -EINVAL;
2058                 }
2059                 rtnl_unlock();
2060
2061                 return err;
2062         }
2063
2064         return -EINVAL;
2065 }
2066
2067 /*
2068  *      Drop the packet on the floor
2069  */
2070
2071 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2072 {
2073         int type;
2074         struct dst_entry *dst = skb_dst(skb);
2075         switch (ipstats_mib_noroutes) {
2076         case IPSTATS_MIB_INNOROUTES:
2077                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2078                 if (type == IPV6_ADDR_ANY) {
2079                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2080                                       IPSTATS_MIB_INADDRERRORS);
2081                         break;
2082                 }
2083                 /* FALLTHROUGH */
2084         case IPSTATS_MIB_OUTNOROUTES:
2085                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2086                               ipstats_mib_noroutes);
2087                 break;
2088         }
2089         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2090         kfree_skb(skb);
2091         return 0;
2092 }
2093
2094 static int ip6_pkt_discard(struct sk_buff *skb)
2095 {
2096         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2097 }
2098
2099 static int ip6_pkt_discard_out(struct sk_buff *skb)
2100 {
2101         skb->dev = skb_dst(skb)->dev;
2102         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2103 }
2104
2105 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2106
2107 static int ip6_pkt_prohibit(struct sk_buff *skb)
2108 {
2109         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2110 }
2111
2112 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2113 {
2114         skb->dev = skb_dst(skb)->dev;
2115         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2116 }
2117
2118 #endif
2119
2120 /*
2121  *      Allocate a dst for local (unicast / anycast) address.
2122  */
2123
2124 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2125                                     const struct in6_addr *addr,
2126                                     bool anycast)
2127 {
2128         struct net *net = dev_net(idev->dev);
2129         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2130         int err;
2131
2132         if (!rt) {
2133                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2134                 return ERR_PTR(-ENOMEM);
2135         }
2136
2137         in6_dev_hold(idev);
2138
2139         rt->dst.flags |= DST_HOST;
2140         rt->dst.input = ip6_input;
2141         rt->dst.output = ip6_output;
2142         rt->rt6i_idev = idev;
2143
2144         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2145         if (anycast)
2146                 rt->rt6i_flags |= RTF_ANYCAST;
2147         else
2148                 rt->rt6i_flags |= RTF_LOCAL;
2149         err = rt6_bind_neighbour(rt, rt->dst.dev);
2150         if (err) {
2151                 dst_free(&rt->dst);
2152                 return ERR_PTR(err);
2153         }
2154
2155         rt->rt6i_dst.addr = *addr;
2156         rt->rt6i_dst.plen = 128;
2157         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2158
2159         atomic_set(&rt->dst.__refcnt, 1);
2160
2161         return rt;
2162 }
2163
2164 int ip6_route_get_saddr(struct net *net,
2165                         struct rt6_info *rt,
2166                         const struct in6_addr *daddr,
2167                         unsigned int prefs,
2168                         struct in6_addr *saddr)
2169 {
2170         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2171         int err = 0;
2172         if (rt->rt6i_prefsrc.plen)
2173                 *saddr = rt->rt6i_prefsrc.addr;
2174         else
2175                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2176                                          daddr, prefs, saddr);
2177         return err;
2178 }
2179
2180 /* remove deleted ip from prefsrc entries */
2181 struct arg_dev_net_ip {
2182         struct net_device *dev;
2183         struct net *net;
2184         struct in6_addr *addr;
2185 };
2186
2187 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2188 {
2189         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2190         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2191         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2192
2193         if (((void *)rt->dst.dev == dev || !dev) &&
2194             rt != net->ipv6.ip6_null_entry &&
2195             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2196                 /* remove prefsrc entry */
2197                 rt->rt6i_prefsrc.plen = 0;
2198         }
2199         return 0;
2200 }
2201
2202 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2203 {
2204         struct net *net = dev_net(ifp->idev->dev);
2205         struct arg_dev_net_ip adni = {
2206                 .dev = ifp->idev->dev,
2207                 .net = net,
2208                 .addr = &ifp->addr,
2209         };
2210         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2211 }
2212
2213 struct arg_dev_net {
2214         struct net_device *dev;
2215         struct net *net;
2216 };
2217
2218 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2219 {
2220         const struct arg_dev_net *adn = arg;
2221         const struct net_device *dev = adn->dev;
2222
2223         if ((rt->dst.dev == dev || !dev) &&
2224             rt != adn->net->ipv6.ip6_null_entry)
2225                 return -1;
2226
2227         return 0;
2228 }
2229
2230 void rt6_ifdown(struct net *net, struct net_device *dev)
2231 {
2232         struct arg_dev_net adn = {
2233                 .dev = dev,
2234                 .net = net,
2235         };
2236
2237         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2238         icmp6_clean_all(fib6_ifdown, &adn);
2239 }
2240
2241 struct rt6_mtu_change_arg {
2242         struct net_device *dev;
2243         unsigned int mtu;
2244 };
2245
2246 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2247 {
2248         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2249         struct inet6_dev *idev;
2250
2251         /* In IPv6 pmtu discovery is not optional,
2252            so that RTAX_MTU lock cannot disable it.
2253            We still use this lock to block changes
2254            caused by addrconf/ndisc.
2255         */
2256
2257         idev = __in6_dev_get(arg->dev);
2258         if (!idev)
2259                 return 0;
2260
2261         /* For administrative MTU increase, there is no way to discover
2262            IPv6 PMTU increase, so PMTU increase should be updated here.
2263            Since RFC 1981 doesn't include administrative MTU increase
2264            update PMTU increase is a MUST. (i.e. jumbo frame)
2265          */
2266         /*
2267            If new MTU is less than route PMTU, this new MTU will be the
2268            lowest MTU in the path, update the route PMTU to reflect PMTU
2269            decreases; if new MTU is greater than route PMTU, and the
2270            old MTU is the lowest MTU in the path, update the route PMTU
2271            to reflect the increase. In this case if the other nodes' MTU
2272            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2273            PMTU discouvery.
2274          */
2275         if (rt->dst.dev == arg->dev &&
2276             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2277             (dst_mtu(&rt->dst) >= arg->mtu ||
2278              (dst_mtu(&rt->dst) < arg->mtu &&
2279               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2280                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2281         }
2282         return 0;
2283 }
2284
2285 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2286 {
2287         struct rt6_mtu_change_arg arg = {
2288                 .dev = dev,
2289                 .mtu = mtu,
2290         };
2291
2292         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2293 }
2294
2295 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2296         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2297         [RTA_OIF]               = { .type = NLA_U32 },
2298         [RTA_IIF]               = { .type = NLA_U32 },
2299         [RTA_PRIORITY]          = { .type = NLA_U32 },
2300         [RTA_METRICS]           = { .type = NLA_NESTED },
2301         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2302 };
2303
2304 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2305                               struct fib6_config *cfg)
2306 {
2307         struct rtmsg *rtm;
2308         struct nlattr *tb[RTA_MAX+1];
2309         int err;
2310
2311         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2312         if (err < 0)
2313                 goto errout;
2314
2315         err = -EINVAL;
2316         rtm = nlmsg_data(nlh);
2317         memset(cfg, 0, sizeof(*cfg));
2318
2319         cfg->fc_table = rtm->rtm_table;
2320         cfg->fc_dst_len = rtm->rtm_dst_len;
2321         cfg->fc_src_len = rtm->rtm_src_len;
2322         cfg->fc_flags = RTF_UP;
2323         cfg->fc_protocol = rtm->rtm_protocol;
2324         cfg->fc_type = rtm->rtm_type;
2325
2326         if (rtm->rtm_type == RTN_UNREACHABLE ||
2327             rtm->rtm_type == RTN_BLACKHOLE ||
2328             rtm->rtm_type == RTN_PROHIBIT ||
2329             rtm->rtm_type == RTN_THROW)
2330                 cfg->fc_flags |= RTF_REJECT;
2331
2332         if (rtm->rtm_type == RTN_LOCAL)
2333                 cfg->fc_flags |= RTF_LOCAL;
2334
2335         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2336         cfg->fc_nlinfo.nlh = nlh;
2337         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2338
2339         if (tb[RTA_GATEWAY]) {
2340                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2341                 cfg->fc_flags |= RTF_GATEWAY;
2342         }
2343
2344         if (tb[RTA_DST]) {
2345                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2346
2347                 if (nla_len(tb[RTA_DST]) < plen)
2348                         goto errout;
2349
2350                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2351         }
2352
2353         if (tb[RTA_SRC]) {
2354                 int plen = (rtm->rtm_src_len + 7) >> 3;
2355
2356                 if (nla_len(tb[RTA_SRC]) < plen)
2357                         goto errout;
2358
2359                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2360         }
2361
2362         if (tb[RTA_PREFSRC])
2363                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2364
2365         if (tb[RTA_OIF])
2366                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2367
2368         if (tb[RTA_PRIORITY])
2369                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2370
2371         if (tb[RTA_METRICS]) {
2372                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2373                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2374         }
2375
2376         if (tb[RTA_TABLE])
2377                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2378
2379         if (tb[RTA_MULTIPATH]) {
2380                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2381                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2382         }
2383
2384         err = 0;
2385 errout:
2386         return err;
2387 }
2388
2389 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2390 {
2391         struct fib6_config r_cfg;
2392         struct rtnexthop *rtnh;
2393         int remaining;
2394         int attrlen;
2395         int err = 0, last_err = 0;
2396
2397 beginning:
2398         rtnh = (struct rtnexthop *)cfg->fc_mp;
2399         remaining = cfg->fc_mp_len;
2400
2401         /* Parse a Multipath Entry */
2402         while (rtnh_ok(rtnh, remaining)) {
2403                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2404                 if (rtnh->rtnh_ifindex)
2405                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2406
2407                 attrlen = rtnh_attrlen(rtnh);
2408                 if (attrlen > 0) {
2409                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2410
2411                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2412                         if (nla) {
2413                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2414                                 r_cfg.fc_flags |= RTF_GATEWAY;
2415                         }
2416                 }
2417                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2418                 if (err) {
2419                         last_err = err;
2420                         /* If we are trying to remove a route, do not stop the
2421                          * loop when ip6_route_del() fails (because next hop is
2422                          * already gone), we should try to remove all next hops.
2423                          */
2424                         if (add) {
2425                                 /* If add fails, we should try to delete all
2426                                  * next hops that have been already added.
2427                                  */
2428                                 add = 0;
2429                                 goto beginning;
2430                         }
2431                 }
2432                 /* Because each route is added like a single route we remove
2433                  * this flag after the first nexthop (if there is a collision,
2434                  * we have already fail to add the first nexthop:
2435                  * fib6_add_rt2node() has reject it).
2436                  */
2437                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2438                 rtnh = rtnh_next(rtnh, &remaining);
2439         }
2440
2441         return last_err;
2442 }
2443
2444 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2445 {
2446         struct fib6_config cfg;
2447         int err;
2448
2449         if (!capable(CAP_NET_ADMIN))
2450                 return -EPERM;
2451
2452         err = rtm_to_fib6_config(skb, nlh, &cfg);
2453         if (err < 0)
2454                 return err;
2455
2456         if (cfg.fc_mp)
2457                 return ip6_route_multipath(&cfg, 0);
2458         else
2459                 return ip6_route_del(&cfg);
2460 }
2461
2462 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2463 {
2464         struct fib6_config cfg;
2465         int err;
2466
2467         if (!capable(CAP_NET_ADMIN))
2468                 return -EPERM;
2469
2470         err = rtm_to_fib6_config(skb, nlh, &cfg);
2471         if (err < 0)
2472                 return err;
2473
2474         if (cfg.fc_mp)
2475                 return ip6_route_multipath(&cfg, 1);
2476         else
2477                 return ip6_route_add(&cfg);
2478 }
2479
2480 static inline size_t rt6_nlmsg_size(void)
2481 {
2482         return NLMSG_ALIGN(sizeof(struct rtmsg))
2483                + nla_total_size(16) /* RTA_SRC */
2484                + nla_total_size(16) /* RTA_DST */
2485                + nla_total_size(16) /* RTA_GATEWAY */
2486                + nla_total_size(16) /* RTA_PREFSRC */
2487                + nla_total_size(4) /* RTA_TABLE */
2488                + nla_total_size(4) /* RTA_IIF */
2489                + nla_total_size(4) /* RTA_OIF */
2490                + nla_total_size(4) /* RTA_PRIORITY */
2491                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2492                + nla_total_size(sizeof(struct rta_cacheinfo));
2493 }
2494
2495 static int rt6_fill_node(struct net *net,
2496                          struct sk_buff *skb, struct rt6_info *rt,
2497                          struct in6_addr *dst, struct in6_addr *src,
2498                          int iif, int type, u32 portid, u32 seq,
2499                          int prefix, int nowait, unsigned int flags)
2500 {
2501         struct rtmsg *rtm;
2502         struct nlmsghdr *nlh;
2503         long expires;
2504         u32 table;
2505         struct neighbour *n;
2506
2507         if (prefix) {   /* user wants prefix routes only */
2508                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2509                         /* success since this is not a prefix route */
2510                         return 1;
2511                 }
2512         }
2513
2514         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2515         if (!nlh)
2516                 return -EMSGSIZE;
2517
2518         rtm = nlmsg_data(nlh);
2519         rtm->rtm_family = AF_INET6;
2520         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2521         rtm->rtm_src_len = rt->rt6i_src.plen;
2522         rtm->rtm_tos = 0;
2523         if (rt->rt6i_table)
2524                 table = rt->rt6i_table->tb6_id;
2525         else
2526                 table = RT6_TABLE_UNSPEC;
2527         rtm->rtm_table = table;
2528         if (nla_put_u32(skb, RTA_TABLE, table))
2529                 goto nla_put_failure;
2530         if (rt->rt6i_flags & RTF_REJECT) {
2531                 switch (rt->dst.error) {
2532                 case -EINVAL:
2533                         rtm->rtm_type = RTN_BLACKHOLE;
2534                         break;
2535                 case -EACCES:
2536                         rtm->rtm_type = RTN_PROHIBIT;
2537                         break;
2538                 case -EAGAIN:
2539                         rtm->rtm_type = RTN_THROW;
2540                         break;
2541                 default:
2542                         rtm->rtm_type = RTN_UNREACHABLE;
2543                         break;
2544                 }
2545         }
2546         else if (rt->rt6i_flags & RTF_LOCAL)
2547                 rtm->rtm_type = RTN_LOCAL;
2548         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2549                 rtm->rtm_type = RTN_LOCAL;
2550         else
2551                 rtm->rtm_type = RTN_UNICAST;
2552         rtm->rtm_flags = 0;
2553         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2554         rtm->rtm_protocol = rt->rt6i_protocol;
2555         if (rt->rt6i_flags & RTF_DYNAMIC)
2556                 rtm->rtm_protocol = RTPROT_REDIRECT;
2557         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2558                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2559                         rtm->rtm_protocol = RTPROT_RA;
2560                 else
2561                         rtm->rtm_protocol = RTPROT_KERNEL;
2562         }
2563
2564         if (rt->rt6i_flags & RTF_CACHE)
2565                 rtm->rtm_flags |= RTM_F_CLONED;
2566
2567         if (dst) {
2568                 if (nla_put(skb, RTA_DST, 16, dst))
2569                         goto nla_put_failure;
2570                 rtm->rtm_dst_len = 128;
2571         } else if (rtm->rtm_dst_len)
2572                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2573                         goto nla_put_failure;
2574 #ifdef CONFIG_IPV6_SUBTREES
2575         if (src) {
2576                 if (nla_put(skb, RTA_SRC, 16, src))
2577                         goto nla_put_failure;
2578                 rtm->rtm_src_len = 128;
2579         } else if (rtm->rtm_src_len &&
2580                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2581                 goto nla_put_failure;
2582 #endif
2583         if (iif) {
2584 #ifdef CONFIG_IPV6_MROUTE
2585                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2586                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2587                         if (err <= 0) {
2588                                 if (!nowait) {
2589                                         if (err == 0)
2590                                                 return 0;
2591                                         goto nla_put_failure;
2592                                 } else {
2593                                         if (err == -EMSGSIZE)
2594                                                 goto nla_put_failure;
2595                                 }
2596                         }
2597                 } else
2598 #endif
2599                         if (nla_put_u32(skb, RTA_IIF, iif))
2600                                 goto nla_put_failure;
2601         } else if (dst) {
2602                 struct in6_addr saddr_buf;
2603                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2604                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2605                         goto nla_put_failure;
2606         }
2607
2608         if (rt->rt6i_prefsrc.plen) {
2609                 struct in6_addr saddr_buf;
2610                 saddr_buf = rt->rt6i_prefsrc.addr;
2611                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2612                         goto nla_put_failure;
2613         }
2614
2615         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2616                 goto nla_put_failure;
2617
2618         n = rt->n;
2619         if (n) {
2620                 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2621                         goto nla_put_failure;
2622         }
2623
2624         if (rt->dst.dev &&
2625             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2626                 goto nla_put_failure;
2627         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2628                 goto nla_put_failure;
2629
2630         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2631
2632         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2633                 goto nla_put_failure;
2634
2635         return nlmsg_end(skb, nlh);
2636
2637 nla_put_failure:
2638         nlmsg_cancel(skb, nlh);
2639         return -EMSGSIZE;
2640 }
2641
2642 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2643 {
2644         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2645         int prefix;
2646
2647         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2648                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2649                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2650         } else
2651                 prefix = 0;
2652
2653         return rt6_fill_node(arg->net,
2654                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2655                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2656                      prefix, 0, NLM_F_MULTI);
2657 }
2658
2659 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2660 {
2661         struct net *net = sock_net(in_skb->sk);
2662         struct nlattr *tb[RTA_MAX+1];
2663         struct rt6_info *rt;
2664         struct sk_buff *skb;
2665         struct rtmsg *rtm;
2666         struct flowi6 fl6;
2667         int err, iif = 0, oif = 0;
2668
2669         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2670         if (err < 0)
2671                 goto errout;
2672
2673         err = -EINVAL;
2674         memset(&fl6, 0, sizeof(fl6));
2675
2676         if (tb[RTA_SRC]) {
2677                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2678                         goto errout;
2679
2680                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2681         }
2682
2683         if (tb[RTA_DST]) {
2684                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2685                         goto errout;
2686
2687                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2688         }
2689
2690         if (tb[RTA_IIF])
2691                 iif = nla_get_u32(tb[RTA_IIF]);
2692
2693         if (tb[RTA_OIF])
2694                 oif = nla_get_u32(tb[RTA_OIF]);
2695
2696         if (iif) {
2697                 struct net_device *dev;
2698                 int flags = 0;
2699
2700                 dev = __dev_get_by_index(net, iif);
2701                 if (!dev) {
2702                         err = -ENODEV;
2703                         goto errout;
2704                 }
2705
2706                 fl6.flowi6_iif = iif;
2707
2708                 if (!ipv6_addr_any(&fl6.saddr))
2709                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2710
2711                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2712                                                                flags);
2713         } else {
2714                 fl6.flowi6_oif = oif;
2715
2716                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2717         }
2718
2719         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2720         if (!skb) {
2721                 ip6_rt_put(rt);
2722                 err = -ENOBUFS;
2723                 goto errout;
2724         }
2725
2726         /* Reserve room for dummy headers, this skb can pass
2727            through good chunk of routing engine.
2728          */
2729         skb_reset_mac_header(skb);
2730         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2731
2732         skb_dst_set(skb, &rt->dst);
2733
2734         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2735                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2736                             nlh->nlmsg_seq, 0, 0, 0);
2737         if (err < 0) {
2738                 kfree_skb(skb);
2739                 goto errout;
2740         }
2741
2742         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2743 errout:
2744         return err;
2745 }
2746
2747 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2748 {
2749         struct sk_buff *skb;
2750         struct net *net = info->nl_net;
2751         u32 seq;
2752         int err;
2753
2754         err = -ENOBUFS;
2755         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2756
2757         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2758         if (!skb)
2759                 goto errout;
2760
2761         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2762                                 event, info->portid, seq, 0, 0, 0);
2763         if (err < 0) {
2764                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2765                 WARN_ON(err == -EMSGSIZE);
2766                 kfree_skb(skb);
2767                 goto errout;
2768         }
2769         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2770                     info->nlh, gfp_any());
2771         return;
2772 errout:
2773         if (err < 0)
2774                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2775 }
2776
2777 static int ip6_route_dev_notify(struct notifier_block *this,
2778                                 unsigned long event, void *data)
2779 {
2780         struct net_device *dev = (struct net_device *)data;
2781         struct net *net = dev_net(dev);
2782
2783         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2784                 net->ipv6.ip6_null_entry->dst.dev = dev;
2785                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2786 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2787                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2788                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2789                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2790                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2791 #endif
2792         }
2793
2794         return NOTIFY_OK;
2795 }
2796
2797 /*
2798  *      /proc
2799  */
2800
2801 #ifdef CONFIG_PROC_FS
2802
2803 struct rt6_proc_arg
2804 {
2805         char *buffer;
2806         int offset;
2807         int length;
2808         int skip;
2809         int len;
2810 };
2811
2812 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2813 {
2814         struct seq_file *m = p_arg;
2815         struct neighbour *n;
2816
2817         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2818
2819 #ifdef CONFIG_IPV6_SUBTREES
2820         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2821 #else
2822         seq_puts(m, "00000000000000000000000000000000 00 ");
2823 #endif
2824         n = rt->n;
2825         if (n) {
2826                 seq_printf(m, "%pi6", n->primary_key);
2827         } else {
2828                 seq_puts(m, "00000000000000000000000000000000");
2829         }
2830         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2831                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2832                    rt->dst.__use, rt->rt6i_flags,
2833                    rt->dst.dev ? rt->dst.dev->name : "");
2834         return 0;
2835 }
2836
2837 static int ipv6_route_show(struct seq_file *m, void *v)
2838 {
2839         struct net *net = (struct net *)m->private;
2840         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2841         return 0;
2842 }
2843
2844 static int ipv6_route_open(struct inode *inode, struct file *file)
2845 {
2846         return single_open_net(inode, file, ipv6_route_show);
2847 }
2848
2849 static const struct file_operations ipv6_route_proc_fops = {
2850         .owner          = THIS_MODULE,
2851         .open           = ipv6_route_open,
2852         .read           = seq_read,
2853         .llseek         = seq_lseek,
2854         .release        = single_release_net,
2855 };
2856
2857 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2858 {
2859         struct net *net = (struct net *)seq->private;
2860         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2861                    net->ipv6.rt6_stats->fib_nodes,
2862                    net->ipv6.rt6_stats->fib_route_nodes,
2863                    net->ipv6.rt6_stats->fib_rt_alloc,
2864                    net->ipv6.rt6_stats->fib_rt_entries,
2865                    net->ipv6.rt6_stats->fib_rt_cache,
2866                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2867                    net->ipv6.rt6_stats->fib_discarded_routes);
2868
2869         return 0;
2870 }
2871
2872 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2873 {
2874         return single_open_net(inode, file, rt6_stats_seq_show);
2875 }
2876
2877 static const struct file_operations rt6_stats_seq_fops = {
2878         .owner   = THIS_MODULE,
2879         .open    = rt6_stats_seq_open,
2880         .read    = seq_read,
2881         .llseek  = seq_lseek,
2882         .release = single_release_net,
2883 };
2884 #endif  /* CONFIG_PROC_FS */
2885
2886 #ifdef CONFIG_SYSCTL
2887
2888 static
2889 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2890                               void __user *buffer, size_t *lenp, loff_t *ppos)
2891 {
2892         struct net *net;
2893         int delay;
2894         if (!write)
2895                 return -EINVAL;
2896
2897         net = (struct net *)ctl->extra1;
2898         delay = net->ipv6.sysctl.flush_delay;
2899         proc_dointvec(ctl, write, buffer, lenp, ppos);
2900         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2901         return 0;
2902 }
2903
2904 ctl_table ipv6_route_table_template[] = {
2905         {
2906                 .procname       =       "flush",
2907                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2908                 .maxlen         =       sizeof(int),
2909                 .mode           =       0200,
2910                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2911         },
2912         {
2913                 .procname       =       "gc_thresh",
2914                 .data           =       &ip6_dst_ops_template.gc_thresh,
2915                 .maxlen         =       sizeof(int),
2916                 .mode           =       0644,
2917                 .proc_handler   =       proc_dointvec,
2918         },
2919         {
2920                 .procname       =       "max_size",
2921                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2922                 .maxlen         =       sizeof(int),
2923                 .mode           =       0644,
2924                 .proc_handler   =       proc_dointvec,
2925         },
2926         {
2927                 .procname       =       "gc_min_interval",
2928                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2929                 .maxlen         =       sizeof(int),
2930                 .mode           =       0644,
2931                 .proc_handler   =       proc_dointvec_jiffies,
2932         },
2933         {
2934                 .procname       =       "gc_timeout",
2935                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2936                 .maxlen         =       sizeof(int),
2937                 .mode           =       0644,
2938                 .proc_handler   =       proc_dointvec_jiffies,
2939         },
2940         {
2941                 .procname       =       "gc_interval",
2942                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2943                 .maxlen         =       sizeof(int),
2944                 .mode           =       0644,
2945                 .proc_handler   =       proc_dointvec_jiffies,
2946         },
2947         {
2948                 .procname       =       "gc_elasticity",
2949                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2950                 .maxlen         =       sizeof(int),
2951                 .mode           =       0644,
2952                 .proc_handler   =       proc_dointvec,
2953         },
2954         {
2955                 .procname       =       "mtu_expires",
2956                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2957                 .maxlen         =       sizeof(int),
2958                 .mode           =       0644,
2959                 .proc_handler   =       proc_dointvec_jiffies,
2960         },
2961         {
2962                 .procname       =       "min_adv_mss",
2963                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2964                 .maxlen         =       sizeof(int),
2965                 .mode           =       0644,
2966                 .proc_handler   =       proc_dointvec,
2967         },
2968         {
2969                 .procname       =       "gc_min_interval_ms",
2970                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2971                 .maxlen         =       sizeof(int),
2972                 .mode           =       0644,
2973                 .proc_handler   =       proc_dointvec_ms_jiffies,
2974         },
2975         { }
2976 };
2977
2978 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2979 {
2980         struct ctl_table *table;
2981
2982         table = kmemdup(ipv6_route_table_template,
2983                         sizeof(ipv6_route_table_template),
2984                         GFP_KERNEL);
2985
2986         if (table) {
2987                 table[0].data = &net->ipv6.sysctl.flush_delay;
2988                 table[0].extra1 = net;
2989                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2990                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2991                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2992                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2993                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2994                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2995                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2996                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2997                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2998
2999                 /* Don't export sysctls to unprivileged users */
3000                 if (net->user_ns != &init_user_ns)
3001                         table[0].procname = NULL;
3002         }
3003
3004         return table;
3005 }
3006 #endif
3007
3008 static int __net_init ip6_route_net_init(struct net *net)
3009 {
3010         int ret = -ENOMEM;
3011
3012         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3013                sizeof(net->ipv6.ip6_dst_ops));
3014
3015         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3016                 goto out_ip6_dst_ops;
3017
3018         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3019                                            sizeof(*net->ipv6.ip6_null_entry),
3020                                            GFP_KERNEL);
3021         if (!net->ipv6.ip6_null_entry)
3022                 goto out_ip6_dst_entries;
3023         net->ipv6.ip6_null_entry->dst.path =
3024                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3025         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3026         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3027                          ip6_template_metrics, true);
3028
3029 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3030         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3031                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3032                                                GFP_KERNEL);
3033         if (!net->ipv6.ip6_prohibit_entry)
3034                 goto out_ip6_null_entry;
3035         net->ipv6.ip6_prohibit_entry->dst.path =
3036                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3037         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3038         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3039                          ip6_template_metrics, true);
3040
3041         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3042                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3043                                                GFP_KERNEL);
3044         if (!net->ipv6.ip6_blk_hole_entry)
3045                 goto out_ip6_prohibit_entry;
3046         net->ipv6.ip6_blk_hole_entry->dst.path =
3047                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3048         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3049         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3050                          ip6_template_metrics, true);
3051 #endif
3052
3053         net->ipv6.sysctl.flush_delay = 0;
3054         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3055         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3056         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3057         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3058         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3059         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3060         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3061
3062         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3063
3064         ret = 0;
3065 out:
3066         return ret;
3067
3068 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3069 out_ip6_prohibit_entry:
3070         kfree(net->ipv6.ip6_prohibit_entry);
3071 out_ip6_null_entry:
3072         kfree(net->ipv6.ip6_null_entry);
3073 #endif
3074 out_ip6_dst_entries:
3075         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3076 out_ip6_dst_ops:
3077         goto out;
3078 }
3079
3080 static void __net_exit ip6_route_net_exit(struct net *net)
3081 {
3082         kfree(net->ipv6.ip6_null_entry);
3083 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3084         kfree(net->ipv6.ip6_prohibit_entry);
3085         kfree(net->ipv6.ip6_blk_hole_entry);
3086 #endif
3087         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3088 }
3089
3090 static int __net_init ip6_route_net_init_late(struct net *net)
3091 {
3092 #ifdef CONFIG_PROC_FS
3093         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3094         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3095 #endif
3096         return 0;
3097 }
3098
3099 static void __net_exit ip6_route_net_exit_late(struct net *net)
3100 {
3101 #ifdef CONFIG_PROC_FS
3102         proc_net_remove(net, "ipv6_route");
3103         proc_net_remove(net, "rt6_stats");
3104 #endif
3105 }
3106
3107 static struct pernet_operations ip6_route_net_ops = {
3108         .init = ip6_route_net_init,
3109         .exit = ip6_route_net_exit,
3110 };
3111
3112 static int __net_init ipv6_inetpeer_init(struct net *net)
3113 {
3114         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3115
3116         if (!bp)
3117                 return -ENOMEM;
3118         inet_peer_base_init(bp);
3119         net->ipv6.peers = bp;
3120         return 0;
3121 }
3122
3123 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3124 {
3125         struct inet_peer_base *bp = net->ipv6.peers;
3126
3127         net->ipv6.peers = NULL;
3128         inetpeer_invalidate_tree(bp);
3129         kfree(bp);
3130 }
3131
3132 static struct pernet_operations ipv6_inetpeer_ops = {
3133         .init   =       ipv6_inetpeer_init,
3134         .exit   =       ipv6_inetpeer_exit,
3135 };
3136
3137 static struct pernet_operations ip6_route_net_late_ops = {
3138         .init = ip6_route_net_init_late,
3139         .exit = ip6_route_net_exit_late,
3140 };
3141
3142 static struct notifier_block ip6_route_dev_notifier = {
3143         .notifier_call = ip6_route_dev_notify,
3144         .priority = 0,
3145 };
3146
3147 int __init ip6_route_init(void)
3148 {
3149         int ret;
3150
3151         ret = -ENOMEM;
3152         ip6_dst_ops_template.kmem_cachep =
3153                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3154                                   SLAB_HWCACHE_ALIGN, NULL);
3155         if (!ip6_dst_ops_template.kmem_cachep)
3156                 goto out;
3157
3158         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3159         if (ret)
3160                 goto out_kmem_cache;
3161
3162         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3163         if (ret)
3164                 goto out_dst_entries;
3165
3166         ret = register_pernet_subsys(&ip6_route_net_ops);
3167         if (ret)
3168                 goto out_register_inetpeer;
3169
3170         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3171
3172         /* Registering of the loopback is done before this portion of code,
3173          * the loopback reference in rt6_info will not be taken, do it
3174          * manually for init_net */
3175         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3176         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3177   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3178         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3179         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3180         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3181         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3182   #endif
3183         ret = fib6_init();
3184         if (ret)
3185                 goto out_register_subsys;
3186
3187         ret = xfrm6_init();
3188         if (ret)
3189                 goto out_fib6_init;
3190
3191         ret = fib6_rules_init();
3192         if (ret)
3193                 goto xfrm6_init;
3194
3195         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3196         if (ret)
3197                 goto fib6_rules_init;
3198
3199         ret = -ENOBUFS;
3200         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3201             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3202             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3203                 goto out_register_late_subsys;
3204
3205         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3206         if (ret)
3207                 goto out_register_late_subsys;
3208
3209 out:
3210         return ret;
3211
3212 out_register_late_subsys:
3213         unregister_pernet_subsys(&ip6_route_net_late_ops);
3214 fib6_rules_init:
3215         fib6_rules_cleanup();
3216 xfrm6_init:
3217         xfrm6_fini();
3218 out_fib6_init:
3219         fib6_gc_cleanup();
3220 out_register_subsys:
3221         unregister_pernet_subsys(&ip6_route_net_ops);
3222 out_register_inetpeer:
3223         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3224 out_dst_entries:
3225         dst_entries_destroy(&ip6_dst_blackhole_ops);
3226 out_kmem_cache:
3227         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3228         goto out;
3229 }
3230
3231 void ip6_route_cleanup(void)
3232 {
3233         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3234         unregister_pernet_subsys(&ip6_route_net_late_ops);
3235         fib6_rules_cleanup();
3236         xfrm6_fini();
3237         fib6_gc_cleanup();
3238         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3239         unregister_pernet_subsys(&ip6_route_net_ops);
3240         dst_entries_destroy(&ip6_dst_blackhole_ops);
3241         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3242 }
This page took 0.232006 seconds and 4 git commands to generate.