]> Git Repo - linux.git/blame - net/ipv6/route.c
treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 151
[linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <[email protected]>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
3c618c1d 62#include <net/rtnh.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
eacb9384 66#include <net/ip.h>
7c0f6ba6 67#include <linux/uaccess.h>
1da177e4
LT
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
30d444d3
DA
73static int ip6_rt_type_to_error(u8 fib6_type);
74
75#define CREATE_TRACE_POINTS
76#include <trace/events/fib6.h>
77EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
78#undef CREATE_TRACE_POINTS
79
afc154e9 80enum rt6_nud_state {
7e980569
JB
81 RT6_NUD_FAIL_HARD = -3,
82 RT6_NUD_FAIL_PROBE = -2,
83 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
84 RT6_NUD_SUCCEED = 1
85};
86
1da177e4 87static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 88static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 89static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
569d3645 94static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
95
96static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 97static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 98static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 99static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 100static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
101static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
102 struct sk_buff *skb, u32 mtu);
103static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
104 struct sk_buff *skb);
702cea56
DA
105static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
106 int strict);
8d1c802b 107static size_t rt6_nlmsg_size(struct fib6_info *rt);
d4ead6b3 108static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 109 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 110 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
111 int iif, int type, u32 portid, u32 seq,
112 unsigned int flags);
7e4b5128 113static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
510e2ced
WW
114 const struct in6_addr *daddr,
115 const struct in6_addr *saddr);
1da177e4 116
70ceb4f5 117#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 118static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 119 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
120 const struct in6_addr *gwaddr,
121 struct net_device *dev,
95c96174 122 unsigned int pref);
8d1c802b 123static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 124 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
125 const struct in6_addr *gwaddr,
126 struct net_device *dev);
70ceb4f5
YH
127#endif
128
8d0b94af
MKL
129struct uncached_list {
130 spinlock_t lock;
131 struct list_head head;
132};
133
134static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
135
510c321b 136void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
137{
138 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
139
8d0b94af
MKL
140 rt->rt6i_uncached_list = ul;
141
142 spin_lock_bh(&ul->lock);
143 list_add_tail(&rt->rt6i_uncached, &ul->head);
144 spin_unlock_bh(&ul->lock);
145}
146
510c321b 147void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
148{
149 if (!list_empty(&rt->rt6i_uncached)) {
150 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 151 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
152
153 spin_lock_bh(&ul->lock);
154 list_del(&rt->rt6i_uncached);
81eb8447 155 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
156 spin_unlock_bh(&ul->lock);
157 }
158}
159
160static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
161{
162 struct net_device *loopback_dev = net->loopback_dev;
163 int cpu;
164
e332bc67
EB
165 if (dev == loopback_dev)
166 return;
167
8d0b94af
MKL
168 for_each_possible_cpu(cpu) {
169 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
170 struct rt6_info *rt;
171
172 spin_lock_bh(&ul->lock);
173 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
174 struct inet6_dev *rt_idev = rt->rt6i_idev;
175 struct net_device *rt_dev = rt->dst.dev;
176
e332bc67 177 if (rt_idev->dev == dev) {
8d0b94af
MKL
178 rt->rt6i_idev = in6_dev_get(loopback_dev);
179 in6_dev_put(rt_idev);
180 }
181
e332bc67 182 if (rt_dev == dev) {
8d0b94af
MKL
183 rt->dst.dev = loopback_dev;
184 dev_hold(rt->dst.dev);
185 dev_put(rt_dev);
186 }
187 }
188 spin_unlock_bh(&ul->lock);
189 }
190}
191
f8a1b43b 192static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
193 struct sk_buff *skb,
194 const void *daddr)
39232973 195{
a7563f34 196 if (!ipv6_addr_any(p))
39232973 197 return (const void *) p;
f894cbf8
DM
198 else if (skb)
199 return &ipv6_hdr(skb)->daddr;
39232973
DM
200 return daddr;
201}
202
f8a1b43b
DA
203struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
204 struct net_device *dev,
205 struct sk_buff *skb,
206 const void *daddr)
d3aaeb38 207{
39232973
DM
208 struct neighbour *n;
209
f8a1b43b
DA
210 daddr = choose_neigh_daddr(gw, skb, daddr);
211 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
212 if (n)
213 return n;
7adf3246
SB
214
215 n = neigh_create(&nd_tbl, daddr, dev);
216 return IS_ERR(n) ? NULL : n;
f8a1b43b
DA
217}
218
219static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
220 struct sk_buff *skb,
221 const void *daddr)
222{
223 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
224
225 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
226}
227
63fca65d
JA
228static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
229{
230 struct net_device *dev = dst->dev;
231 struct rt6_info *rt = (struct rt6_info *)dst;
232
f8a1b43b 233 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
234 if (!daddr)
235 return;
236 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
237 return;
238 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
239 return;
240 __ipv6_confirm_neigh(dev, daddr);
241}
242
9a7ec3a9 243static struct dst_ops ip6_dst_ops_template = {
1da177e4 244 .family = AF_INET6,
1da177e4
LT
245 .gc = ip6_dst_gc,
246 .gc_thresh = 1024,
247 .check = ip6_dst_check,
0dbaee3b 248 .default_advmss = ip6_default_advmss,
ebb762f2 249 .mtu = ip6_mtu,
d4ead6b3 250 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
251 .destroy = ip6_dst_destroy,
252 .ifdown = ip6_dst_ifdown,
253 .negative_advice = ip6_negative_advice,
254 .link_failure = ip6_link_failure,
255 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 256 .redirect = rt6_do_redirect,
9f8955cc 257 .local_out = __ip6_local_out,
f8a1b43b 258 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 259 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
260};
261
ebb762f2 262static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 263{
618f9bc7
SK
264 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
265
266 return mtu ? : dst->dev->mtu;
ec831ea7
RD
267}
268
6700c270
DM
269static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
270 struct sk_buff *skb, u32 mtu)
14e50e57
DM
271{
272}
273
6700c270
DM
274static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
275 struct sk_buff *skb)
b587ee3b
DM
276{
277}
278
14e50e57
DM
279static struct dst_ops ip6_dst_blackhole_ops = {
280 .family = AF_INET6,
14e50e57
DM
281 .destroy = ip6_dst_destroy,
282 .check = ip6_dst_check,
ebb762f2 283 .mtu = ip6_blackhole_mtu,
214f45c9 284 .default_advmss = ip6_default_advmss,
14e50e57 285 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 286 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 287 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 288 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
289};
290
62fa8a84 291static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 292 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
293};
294
8d1c802b 295static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
296 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
297 .fib6_protocol = RTPROT_KERNEL,
298 .fib6_metric = ~(u32)0,
f05713e0 299 .fib6_ref = REFCOUNT_INIT(1),
421842ed
DA
300 .fib6_type = RTN_UNREACHABLE,
301 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
302};
303
fb0af4c7 304static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
305 .dst = {
306 .__refcnt = ATOMIC_INIT(1),
307 .__use = 1,
2c20cbd7 308 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 309 .error = -ENETUNREACH,
d8d1f30b
CG
310 .input = ip6_pkt_discard,
311 .output = ip6_pkt_discard_out,
1da177e4
LT
312 },
313 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
314};
315
101367c2
TG
316#ifdef CONFIG_IPV6_MULTIPLE_TABLES
317
fb0af4c7 318static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
319 .dst = {
320 .__refcnt = ATOMIC_INIT(1),
321 .__use = 1,
2c20cbd7 322 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 323 .error = -EACCES,
d8d1f30b
CG
324 .input = ip6_pkt_prohibit,
325 .output = ip6_pkt_prohibit_out,
101367c2
TG
326 },
327 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
328};
329
fb0af4c7 330static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
331 .dst = {
332 .__refcnt = ATOMIC_INIT(1),
333 .__use = 1,
2c20cbd7 334 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 335 .error = -EINVAL,
d8d1f30b 336 .input = dst_discard,
ede2059d 337 .output = dst_discard_out,
101367c2
TG
338 },
339 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
340};
341
342#endif
343
ebfa45f0
MKL
344static void rt6_info_init(struct rt6_info *rt)
345{
346 struct dst_entry *dst = &rt->dst;
347
348 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
ebfa45f0
MKL
349 INIT_LIST_HEAD(&rt->rt6i_uncached);
350}
351
1da177e4 352/* allocate dst with ip6_dst_ops */
93531c67
DA
353struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
354 int flags)
1da177e4 355{
97bab73f 356 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 357 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 358
81eb8447 359 if (rt) {
ebfa45f0 360 rt6_info_init(rt);
81eb8447
WW
361 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
362 }
8104891b 363
cf911662 364 return rt;
1da177e4 365}
9ab179d8 366EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 367
1da177e4
LT
368static void ip6_dst_destroy(struct dst_entry *dst)
369{
370 struct rt6_info *rt = (struct rt6_info *)dst;
a68886a6 371 struct fib6_info *from;
8d0b94af 372 struct inet6_dev *idev;
1da177e4 373
1620a336 374 ip_dst_metrics_put(dst);
8d0b94af
MKL
375 rt6_uncached_list_del(rt);
376
377 idev = rt->rt6i_idev;
38308473 378 if (idev) {
1da177e4
LT
379 rt->rt6i_idev = NULL;
380 in6_dev_put(idev);
1ab1457c 381 }
1716a961 382
0e233874 383 from = xchg((__force struct fib6_info **)&rt->from, NULL);
93531c67 384 fib6_info_release(from);
b3419363
DM
385}
386
1da177e4
LT
387static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
388 int how)
389{
390 struct rt6_info *rt = (struct rt6_info *)dst;
391 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 392 struct net_device *loopback_dev =
c346dca1 393 dev_net(dev)->loopback_dev;
1da177e4 394
e5645f51
WW
395 if (idev && idev->dev != loopback_dev) {
396 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
397 if (loopback_idev) {
398 rt->rt6i_idev = loopback_idev;
399 in6_dev_put(idev);
97cac082 400 }
1da177e4
LT
401 }
402}
403
5973fb1e
MKL
404static bool __rt6_check_expired(const struct rt6_info *rt)
405{
406 if (rt->rt6i_flags & RTF_EXPIRES)
407 return time_after(jiffies, rt->dst.expires);
408 else
409 return false;
410}
411
a50feda5 412static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 413{
a68886a6
DA
414 struct fib6_info *from;
415
416 from = rcu_dereference(rt->from);
417
1716a961
G
418 if (rt->rt6i_flags & RTF_EXPIRES) {
419 if (time_after(jiffies, rt->dst.expires))
a50feda5 420 return true;
a68886a6 421 } else if (from) {
1e2ea8ad 422 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
a68886a6 423 fib6_check_expired(from);
1716a961 424 }
a50feda5 425 return false;
1da177e4
LT
426}
427
b1d40991
DA
428void fib6_select_path(const struct net *net, struct fib6_result *res,
429 struct flowi6 *fl6, int oif, bool have_oif_match,
430 const struct sk_buff *skb, int strict)
51ebd318 431{
8d1c802b 432 struct fib6_info *sibling, *next_sibling;
b1d40991
DA
433 struct fib6_info *match = res->f6i;
434
435 if (!match->fib6_nsiblings || have_oif_match)
436 goto out;
51ebd318 437
b673d6cc
JS
438 /* We might have already computed the hash for ICMPv6 errors. In such
439 * case it will always be non-zero. Otherwise now is the time to do it.
440 */
441 if (!fl6->mp_hash)
b4bac172 442 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 443
ad1601ae 444 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
b1d40991 445 goto out;
3d709f69 446
93c2fb25
DA
447 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
448 fib6_siblings) {
702cea56 449 const struct fib6_nh *nh = &sibling->fib6_nh;
5e670d84
DA
450 int nh_upper_bound;
451
702cea56 452 nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
5e670d84 453 if (fl6->mp_hash > nh_upper_bound)
3d709f69 454 continue;
702cea56 455 if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
3d709f69
IS
456 break;
457 match = sibling;
458 break;
459 }
460
b1d40991
DA
461out:
462 res->f6i = match;
463 res->nh = &match->fib6_nh;
51ebd318
ND
464}
465
1da177e4 466/*
66f5d6ce 467 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
468 */
469
0c59d006
DA
470static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
471 const struct in6_addr *saddr, int oif, int flags)
472{
473 const struct net_device *dev;
474
475 if (nh->fib_nh_flags & RTNH_F_DEAD)
476 return false;
477
478 dev = nh->fib_nh_dev;
479 if (oif) {
480 if (dev->ifindex == oif)
481 return true;
482 } else {
483 if (ipv6_chk_addr(net, saddr, dev,
484 flags & RT6_LOOKUP_F_IFACE))
485 return true;
486 }
487
488 return false;
489}
490
75ef7389
DA
491static void rt6_device_match(struct net *net, struct fib6_result *res,
492 const struct in6_addr *saddr, int oif, int flags)
1da177e4 493{
75ef7389
DA
494 struct fib6_info *f6i = res->f6i;
495 struct fib6_info *spf6i;
496 struct fib6_nh *nh;
1da177e4 497
75ef7389
DA
498 if (!oif && ipv6_addr_any(saddr)) {
499 nh = &f6i->fib6_nh;
7d21fec9
DA
500 if (!(nh->fib_nh_flags & RTNH_F_DEAD))
501 goto out;
75ef7389 502 }
dd3abc4e 503
75ef7389
DA
504 for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
505 nh = &spf6i->fib6_nh;
506 if (__rt6_device_match(net, nh, saddr, oif, flags)) {
507 res->f6i = spf6i;
7d21fec9 508 goto out;
75ef7389 509 }
dd3abc4e 510 }
1da177e4 511
75ef7389
DA
512 if (oif && flags & RT6_LOOKUP_F_IFACE) {
513 res->f6i = net->ipv6.fib6_null_entry;
7d21fec9
DA
514 nh = &res->f6i->fib6_nh;
515 goto out;
75ef7389 516 }
8067bb8c 517
7d21fec9
DA
518 nh = &f6i->fib6_nh;
519 if (nh->fib_nh_flags & RTNH_F_DEAD) {
75ef7389 520 res->f6i = net->ipv6.fib6_null_entry;
7d21fec9 521 nh = &res->f6i->fib6_nh;
75ef7389 522 }
7d21fec9
DA
523out:
524 res->nh = nh;
525 res->fib6_type = res->f6i->fib6_type;
526 res->fib6_flags = res->f6i->fib6_flags;
1da177e4
LT
527}
528
27097255 529#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
530struct __rt6_probe_work {
531 struct work_struct work;
532 struct in6_addr target;
533 struct net_device *dev;
534};
535
536static void rt6_probe_deferred(struct work_struct *w)
537{
538 struct in6_addr mcaddr;
539 struct __rt6_probe_work *work =
540 container_of(w, struct __rt6_probe_work, work);
541
542 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 543 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 544 dev_put(work->dev);
662f5533 545 kfree(work);
c2f17e82
HFS
546}
547
cc3a86c8 548static void rt6_probe(struct fib6_nh *fib6_nh)
27097255 549{
f547fac6 550 struct __rt6_probe_work *work = NULL;
5e670d84 551 const struct in6_addr *nh_gw;
f2c31e32 552 struct neighbour *neigh;
5e670d84 553 struct net_device *dev;
f547fac6 554 struct inet6_dev *idev;
5e670d84 555
27097255
YH
556 /*
557 * Okay, this does not seem to be appropriate
558 * for now, however, we need to check if it
559 * is really so; aka Router Reachability Probing.
560 *
561 * Router Reachability Probe MUST be rate-limited
562 * to no more than one per minute.
563 */
cc3a86c8 564 if (fib6_nh->fib_nh_gw_family)
7ff74a59 565 return;
5e670d84 566
cc3a86c8
DA
567 nh_gw = &fib6_nh->fib_nh_gw6;
568 dev = fib6_nh->fib_nh_dev;
2152caea 569 rcu_read_lock_bh();
f547fac6 570 idev = __in6_dev_get(dev);
5e670d84 571 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 572 if (neigh) {
8d6c31bf
MKL
573 if (neigh->nud_state & NUD_VALID)
574 goto out;
575
2152caea 576 write_lock(&neigh->lock);
990edb42
MKL
577 if (!(neigh->nud_state & NUD_VALID) &&
578 time_after(jiffies,
dcd1f572 579 neigh->updated + idev->cnf.rtr_probe_interval)) {
990edb42
MKL
580 work = kmalloc(sizeof(*work), GFP_ATOMIC);
581 if (work)
582 __neigh_set_probe_once(neigh);
c2f17e82 583 }
2152caea 584 write_unlock(&neigh->lock);
cc3a86c8 585 } else if (time_after(jiffies, fib6_nh->last_probe +
f547fac6 586 idev->cnf.rtr_probe_interval)) {
990edb42 587 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 588 }
990edb42
MKL
589
590 if (work) {
cc3a86c8 591 fib6_nh->last_probe = jiffies;
990edb42 592 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
593 work->target = *nh_gw;
594 dev_hold(dev);
595 work->dev = dev;
990edb42
MKL
596 schedule_work(&work->work);
597 }
598
8d6c31bf 599out:
2152caea 600 rcu_read_unlock_bh();
27097255
YH
601}
602#else
cc3a86c8 603static inline void rt6_probe(struct fib6_nh *fib6_nh)
27097255 604{
27097255
YH
605}
606#endif
607
1da177e4 608/*
554cfb7e 609 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 610 */
1ba9a895 611static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
1da177e4 612{
afc154e9 613 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 614 struct neighbour *neigh;
f2c31e32 615
145a3621 616 rcu_read_lock_bh();
1ba9a895
DA
617 neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
618 &fib6_nh->fib_nh_gw6);
145a3621
YH
619 if (neigh) {
620 read_lock(&neigh->lock);
554cfb7e 621 if (neigh->nud_state & NUD_VALID)
afc154e9 622 ret = RT6_NUD_SUCCEED;
398bcbeb 623#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 624 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 625 ret = RT6_NUD_SUCCEED;
7e980569
JB
626 else
627 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 628#endif
145a3621 629 read_unlock(&neigh->lock);
afc154e9
HFS
630 } else {
631 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 632 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 633 }
145a3621
YH
634 rcu_read_unlock_bh();
635
a5a81f0b 636 return ret;
1da177e4
LT
637}
638
702cea56
DA
639static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
640 int strict)
1da177e4 641{
6e1809a5
DA
642 int m = 0;
643
644 if (!oif || nh->fib_nh_dev->ifindex == oif)
645 m = 2;
1ab1457c 646
77d16f45 647 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 648 return RT6_NUD_FAIL_HARD;
ebacaaa0 649#ifdef CONFIG_IPV6_ROUTER_PREF
702cea56 650 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
ebacaaa0 651#endif
1ba9a895 652 if ((strict & RT6_LOOKUP_F_REACHABLE) &&
702cea56 653 !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
1ba9a895 654 int n = rt6_check_neigh(nh);
afc154e9
HFS
655 if (n < 0)
656 return n;
657 }
554cfb7e
YH
658 return m;
659}
660
28679ed1
DA
661static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
662 int oif, int strict, int *mpri, bool *do_rr)
554cfb7e 663{
afc154e9 664 bool match_do_rr = false;
28679ed1
DA
665 bool rc = false;
666 int m;
35103d11 667
28679ed1 668 if (nh->fib_nh_flags & RTNH_F_DEAD)
8067bb8c
IS
669 goto out;
670
28679ed1
DA
671 if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
672 nh->fib_nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 673 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 674 goto out;
f11e6659 675
28679ed1 676 m = rt6_score_route(nh, fib6_flags, oif, strict);
7e980569 677 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
678 match_do_rr = true;
679 m = 0; /* lowest valid score */
7e980569 680 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 681 goto out;
afc154e9
HFS
682 }
683
684 if (strict & RT6_LOOKUP_F_REACHABLE)
28679ed1 685 rt6_probe(nh);
f11e6659 686
7e980569 687 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 688 if (m > *mpri) {
afc154e9 689 *do_rr = match_do_rr;
f11e6659 690 *mpri = m;
28679ed1 691 rc = true;
f11e6659 692 }
f11e6659 693out:
28679ed1 694 return rc;
f11e6659
DM
695}
696
b7bc4b6a 697static void __find_rr_leaf(struct fib6_info *f6i_start,
30c15f03 698 struct fib6_info *nomatch, u32 metric,
b7bc4b6a 699 struct fib6_result *res, struct fib6_info **cont,
30c15f03 700 int oif, int strict, bool *do_rr, int *mpri)
f11e6659 701{
b7bc4b6a 702 struct fib6_info *f6i;
1da177e4 703
b7bc4b6a
DA
704 for (f6i = f6i_start;
705 f6i && f6i != nomatch;
706 f6i = rcu_dereference(f6i->fib6_next)) {
30c15f03
DA
707 struct fib6_nh *nh;
708
b7bc4b6a
DA
709 if (cont && f6i->fib6_metric != metric) {
710 *cont = f6i;
30c15f03 711 return;
9fbdcfaf
SK
712 }
713
b7bc4b6a 714 if (fib6_check_expired(f6i))
28679ed1
DA
715 continue;
716
b7bc4b6a
DA
717 nh = &f6i->fib6_nh;
718 if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) {
719 res->f6i = f6i;
720 res->nh = nh;
7d21fec9
DA
721 res->fib6_flags = f6i->fib6_flags;
722 res->fib6_type = f6i->fib6_type;
b7bc4b6a 723 }
9fbdcfaf 724 }
30c15f03 725}
9fbdcfaf 726
b7bc4b6a
DA
727static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
728 struct fib6_info *rr_head, int oif, int strict,
729 bool *do_rr, struct fib6_result *res)
30c15f03 730{
b7bc4b6a
DA
731 u32 metric = rr_head->fib6_metric;
732 struct fib6_info *cont = NULL;
30c15f03 733 int mpri = -1;
9fbdcfaf 734
b7bc4b6a 735 __find_rr_leaf(rr_head, NULL, metric, res, &cont,
30c15f03 736 oif, strict, do_rr, &mpri);
28679ed1 737
b7bc4b6a 738 __find_rr_leaf(leaf, rr_head, metric, res, &cont,
30c15f03 739 oif, strict, do_rr, &mpri);
9fbdcfaf 740
b7bc4b6a
DA
741 if (res->f6i || !cont)
742 return;
9fbdcfaf 743
b7bc4b6a 744 __find_rr_leaf(cont, NULL, metric, res, NULL,
30c15f03 745 oif, strict, do_rr, &mpri);
f11e6659 746}
1da177e4 747
b7bc4b6a
DA
748static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
749 struct fib6_result *res, int strict)
f11e6659 750{
8d1c802b 751 struct fib6_info *leaf = rcu_dereference(fn->leaf);
b7bc4b6a 752 struct fib6_info *rt0;
afc154e9 753 bool do_rr = false;
17ecf590 754 int key_plen;
1da177e4 755
b7bc4b6a
DA
756 /* make sure this function or its helpers sets f6i */
757 res->f6i = NULL;
758
421842ed 759 if (!leaf || leaf == net->ipv6.fib6_null_entry)
b7bc4b6a 760 goto out;
8d1040e8 761
66f5d6ce 762 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 763 if (!rt0)
66f5d6ce 764 rt0 = leaf;
1da177e4 765
17ecf590
WW
766 /* Double check to make sure fn is not an intermediate node
767 * and fn->leaf does not points to its child's leaf
768 * (This might happen if all routes under fn are deleted from
769 * the tree and fib6_repair_tree() is called on the node.)
770 */
93c2fb25 771 key_plen = rt0->fib6_dst.plen;
17ecf590 772#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
773 if (rt0->fib6_src.plen)
774 key_plen = rt0->fib6_src.plen;
17ecf590
WW
775#endif
776 if (fn->fn_bit != key_plen)
b7bc4b6a 777 goto out;
1da177e4 778
b7bc4b6a 779 find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
afc154e9 780 if (do_rr) {
8fb11a9a 781 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
f11e6659 782
554cfb7e 783 /* no entries matched; do round-robin */
93c2fb25 784 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 785 next = leaf;
f11e6659 786
66f5d6ce 787 if (next != rt0) {
93c2fb25 788 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 789 /* make sure next is not being deleted from the tree */
93c2fb25 790 if (next->fib6_node)
66f5d6ce 791 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 792 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 793 }
1da177e4 794 }
1da177e4 795
b7bc4b6a
DA
796out:
797 if (!res->f6i) {
798 res->f6i = net->ipv6.fib6_null_entry;
799 res->nh = &res->f6i->fib6_nh;
7d21fec9
DA
800 res->fib6_flags = res->f6i->fib6_flags;
801 res->fib6_type = res->f6i->fib6_type;
b7bc4b6a 802 }
1da177e4
LT
803}
804
85bd05de 805static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
8b9df265 806{
85bd05de
DA
807 return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
808 res->nh->fib_nh_gw_family;
8b9df265
MKL
809}
810
70ceb4f5
YH
811#ifdef CONFIG_IPV6_ROUTE_INFO
812int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 813 const struct in6_addr *gwaddr)
70ceb4f5 814{
c346dca1 815 struct net *net = dev_net(dev);
70ceb4f5
YH
816 struct route_info *rinfo = (struct route_info *) opt;
817 struct in6_addr prefix_buf, *prefix;
818 unsigned int pref;
4bed72e4 819 unsigned long lifetime;
8d1c802b 820 struct fib6_info *rt;
70ceb4f5
YH
821
822 if (len < sizeof(struct route_info)) {
823 return -EINVAL;
824 }
825
826 /* Sanity check for prefix_len and length */
827 if (rinfo->length > 3) {
828 return -EINVAL;
829 } else if (rinfo->prefix_len > 128) {
830 return -EINVAL;
831 } else if (rinfo->prefix_len > 64) {
832 if (rinfo->length < 2) {
833 return -EINVAL;
834 }
835 } else if (rinfo->prefix_len > 0) {
836 if (rinfo->length < 1) {
837 return -EINVAL;
838 }
839 }
840
841 pref = rinfo->route_pref;
842 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 843 return -EINVAL;
70ceb4f5 844
4bed72e4 845 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
846
847 if (rinfo->length == 3)
848 prefix = (struct in6_addr *)rinfo->prefix;
849 else {
850 /* this function is safe */
851 ipv6_addr_prefix(&prefix_buf,
852 (struct in6_addr *)rinfo->prefix,
853 rinfo->prefix_len);
854 prefix = &prefix_buf;
855 }
856
f104a567 857 if (rinfo->prefix_len == 0)
afb1d4b5 858 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
859 else
860 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 861 gwaddr, dev);
70ceb4f5
YH
862
863 if (rt && !lifetime) {
afb1d4b5 864 ip6_del_rt(net, rt);
70ceb4f5
YH
865 rt = NULL;
866 }
867
868 if (!rt && lifetime)
830218c1
DA
869 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
870 dev, pref);
70ceb4f5 871 else if (rt)
93c2fb25
DA
872 rt->fib6_flags = RTF_ROUTEINFO |
873 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
874
875 if (rt) {
1716a961 876 if (!addrconf_finite_timeout(lifetime))
14895687 877 fib6_clean_expires(rt);
1716a961 878 else
14895687 879 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 880
93531c67 881 fib6_info_release(rt);
70ceb4f5
YH
882 }
883 return 0;
884}
885#endif
886
ae90d867
DA
887/*
888 * Misc support functions
889 */
890
891/* called with rcu_lock held */
0d161581 892static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
ae90d867 893{
0d161581 894 struct net_device *dev = res->nh->fib_nh_dev;
ae90d867 895
7d21fec9 896 if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
897 /* for copies of local routes, dst->dev needs to be the
898 * device if it is a master device, the master device if
899 * device is enslaved, and the loopback as the default
900 */
901 if (netif_is_l3_slave(dev) &&
7d21fec9 902 !rt6_need_strict(&res->f6i->fib6_dst.addr))
ae90d867
DA
903 dev = l3mdev_master_dev_rcu(dev);
904 else if (!netif_is_l3_master(dev))
905 dev = dev_net(dev)->loopback_dev;
906 /* last case is netif_is_l3_master(dev) is true in which
907 * case we want dev returned to be dev
908 */
909 }
910
911 return dev;
912}
913
6edb3c96
DA
914static const int fib6_prop[RTN_MAX + 1] = {
915 [RTN_UNSPEC] = 0,
916 [RTN_UNICAST] = 0,
917 [RTN_LOCAL] = 0,
918 [RTN_BROADCAST] = 0,
919 [RTN_ANYCAST] = 0,
920 [RTN_MULTICAST] = 0,
921 [RTN_BLACKHOLE] = -EINVAL,
922 [RTN_UNREACHABLE] = -EHOSTUNREACH,
923 [RTN_PROHIBIT] = -EACCES,
924 [RTN_THROW] = -EAGAIN,
925 [RTN_NAT] = -EINVAL,
926 [RTN_XRESOLVE] = -EINVAL,
927};
928
929static int ip6_rt_type_to_error(u8 fib6_type)
930{
931 return fib6_prop[fib6_type];
932}
933
8d1c802b 934static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
935{
936 unsigned short flags = 0;
937
938 if (rt->dst_nocount)
939 flags |= DST_NOCOUNT;
940 if (rt->dst_nopolicy)
941 flags |= DST_NOPOLICY;
942 if (rt->dst_host)
943 flags |= DST_HOST;
944
945 return flags;
946}
947
7d21fec9 948static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
6edb3c96 949{
7d21fec9 950 rt->dst.error = ip6_rt_type_to_error(fib6_type);
6edb3c96 951
7d21fec9 952 switch (fib6_type) {
6edb3c96
DA
953 case RTN_BLACKHOLE:
954 rt->dst.output = dst_discard_out;
955 rt->dst.input = dst_discard;
956 break;
957 case RTN_PROHIBIT:
958 rt->dst.output = ip6_pkt_prohibit_out;
959 rt->dst.input = ip6_pkt_prohibit;
960 break;
961 case RTN_THROW:
962 case RTN_UNREACHABLE:
963 default:
964 rt->dst.output = ip6_pkt_discard_out;
965 rt->dst.input = ip6_pkt_discard;
966 break;
967 }
968}
969
0d161581 970static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
6edb3c96 971{
7d21fec9 972 struct fib6_info *f6i = res->f6i;
0d161581 973
7d21fec9
DA
974 if (res->fib6_flags & RTF_REJECT) {
975 ip6_rt_init_dst_reject(rt, res->fib6_type);
6edb3c96
DA
976 return;
977 }
978
979 rt->dst.error = 0;
980 rt->dst.output = ip6_output;
981
7d21fec9 982 if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
6edb3c96 983 rt->dst.input = ip6_input;
7d21fec9 984 } else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
985 rt->dst.input = ip6_mc_input;
986 } else {
987 rt->dst.input = ip6_forward;
988 }
989
0d161581
DA
990 if (res->nh->fib_nh_lws) {
991 rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
6edb3c96
DA
992 lwtunnel_set_redirect(&rt->dst);
993 }
994
995 rt->dst.lastuse = jiffies;
996}
997
e873e4b9 998/* Caller must already hold reference to @from */
8d1c802b 999static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 1000{
ae90d867 1001 rt->rt6i_flags &= ~RTF_EXPIRES;
a68886a6 1002 rcu_assign_pointer(rt->from, from);
e1255ed4 1003 ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
ae90d867
DA
1004}
1005
0d161581
DA
1006/* Caller must already hold reference to f6i in result */
1007static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
ae90d867 1008{
0d161581
DA
1009 const struct fib6_nh *nh = res->nh;
1010 const struct net_device *dev = nh->fib_nh_dev;
1011 struct fib6_info *f6i = res->f6i;
dcd1f572 1012
0d161581 1013 ip6_rt_init_dst(rt, res);
6edb3c96 1014
0d161581 1015 rt->rt6i_dst = f6i->fib6_dst;
dcd1f572 1016 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
7d21fec9 1017 rt->rt6i_flags = res->fib6_flags;
0d161581
DA
1018 if (nh->fib_nh_gw_family) {
1019 rt->rt6i_gateway = nh->fib_nh_gw6;
2b2450ca
DA
1020 rt->rt6i_flags |= RTF_GATEWAY;
1021 }
0d161581 1022 rt6_set_from(rt, f6i);
ae90d867 1023#ifdef CONFIG_IPV6_SUBTREES
0d161581 1024 rt->rt6i_src = f6i->fib6_src;
ae90d867 1025#endif
ae90d867
DA
1026}
1027
a3c00e46
MKL
1028static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1029 struct in6_addr *saddr)
1030{
66f5d6ce 1031 struct fib6_node *pn, *sn;
a3c00e46
MKL
1032 while (1) {
1033 if (fn->fn_flags & RTN_TL_ROOT)
1034 return NULL;
66f5d6ce
WW
1035 pn = rcu_dereference(fn->parent);
1036 sn = FIB6_SUBTREE(pn);
1037 if (sn && sn != fn)
6454743b 1038 fn = fib6_node_lookup(sn, NULL, saddr);
a3c00e46
MKL
1039 else
1040 fn = pn;
1041 if (fn->fn_flags & RTN_RTINFO)
1042 return fn;
1043 }
1044}
c71099ac 1045
10585b43 1046static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
d3843fe5
WW
1047{
1048 struct rt6_info *rt = *prt;
1049
1050 if (dst_hold_safe(&rt->dst))
1051 return true;
10585b43 1052 if (net) {
d3843fe5
WW
1053 rt = net->ipv6.ip6_null_entry;
1054 dst_hold(&rt->dst);
1055 } else {
1056 rt = NULL;
1057 }
1058 *prt = rt;
1059 return false;
1060}
1061
dec9b0e2 1062/* called with rcu_lock held */
9b6b35ab 1063static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
dec9b0e2 1064{
9b6b35ab
DA
1065 struct net_device *dev = res->nh->fib_nh_dev;
1066 struct fib6_info *f6i = res->f6i;
1067 unsigned short flags;
dec9b0e2
DA
1068 struct rt6_info *nrt;
1069
9b6b35ab 1070 if (!fib6_info_hold_safe(f6i))
1c87e79a 1071 goto fallback;
e873e4b9 1072
9b6b35ab 1073 flags = fib6_info_dst_flags(f6i);
93531c67 1074 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1c87e79a 1075 if (!nrt) {
9b6b35ab 1076 fib6_info_release(f6i);
1c87e79a
XL
1077 goto fallback;
1078 }
dec9b0e2 1079
0d161581 1080 ip6_rt_copy_init(nrt, res);
1c87e79a
XL
1081 return nrt;
1082
1083fallback:
1084 nrt = dev_net(dev)->ipv6.ip6_null_entry;
1085 dst_hold(&nrt->dst);
dec9b0e2
DA
1086 return nrt;
1087}
1088
8ed67789
DL
1089static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1090 struct fib6_table *table,
b75cc8f9
DA
1091 struct flowi6 *fl6,
1092 const struct sk_buff *skb,
1093 int flags)
1da177e4 1094{
b1d40991 1095 struct fib6_result res = {};
1da177e4 1096 struct fib6_node *fn;
23fb93a4 1097 struct rt6_info *rt;
1da177e4 1098
b6cdbc85
DA
1099 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1100 flags &= ~RT6_LOOKUP_F_IFACE;
1101
66f5d6ce 1102 rcu_read_lock();
6454743b 1103 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1104restart:
b1d40991
DA
1105 res.f6i = rcu_dereference(fn->leaf);
1106 if (!res.f6i)
1107 res.f6i = net->ipv6.fib6_null_entry;
af52a52c 1108 else
75ef7389
DA
1109 rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
1110 flags);
af52a52c 1111
b1d40991 1112 if (res.f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1113 fn = fib6_backtrack(fn, &fl6->saddr);
1114 if (fn)
1115 goto restart;
2b760fcf 1116
af52a52c
DA
1117 rt = net->ipv6.ip6_null_entry;
1118 dst_hold(&rt->dst);
1119 goto out;
1120 }
d3843fe5 1121
b1d40991
DA
1122 fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
1123 fl6->flowi6_oif != 0, skb, flags);
1124
2b760fcf 1125 /* Search through exception table */
7e4b5128 1126 rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
23fb93a4 1127 if (rt) {
10585b43 1128 if (ip6_hold_safe(net, &rt))
dec9b0e2 1129 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1130 } else {
9b6b35ab 1131 rt = ip6_create_rt_rcu(&res);
dec9b0e2 1132 }
b811580d 1133
af52a52c 1134out:
8ff2e5b2 1135 trace_fib6_table_lookup(net, &res, table, fl6);
af52a52c 1136
66f5d6ce 1137 rcu_read_unlock();
b811580d 1138
c71099ac 1139 return rt;
c71099ac
TG
1140}
1141
67ba4152 1142struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1143 const struct sk_buff *skb, int flags)
ea6e574e 1144{
b75cc8f9 1145 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1146}
1147EXPORT_SYMBOL_GPL(ip6_route_lookup);
1148
9acd9f3a 1149struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1150 const struct in6_addr *saddr, int oif,
1151 const struct sk_buff *skb, int strict)
c71099ac 1152{
4c9483b2
DM
1153 struct flowi6 fl6 = {
1154 .flowi6_oif = oif,
1155 .daddr = *daddr,
c71099ac
TG
1156 };
1157 struct dst_entry *dst;
77d16f45 1158 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1159
adaa70bb 1160 if (saddr) {
4c9483b2 1161 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1162 flags |= RT6_LOOKUP_F_HAS_SADDR;
1163 }
1164
b75cc8f9 1165 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1166 if (dst->error == 0)
1167 return (struct rt6_info *) dst;
1168
1169 dst_release(dst);
1170
1da177e4
LT
1171 return NULL;
1172}
7159039a
YH
1173EXPORT_SYMBOL(rt6_lookup);
1174
c71099ac 1175/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1176 * It takes new route entry, the addition fails by any reason the
1177 * route is released.
1178 * Caller must hold dst before calling it.
1da177e4
LT
1179 */
1180
8d1c802b 1181static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1182 struct netlink_ext_ack *extack)
1da177e4
LT
1183{
1184 int err;
c71099ac 1185 struct fib6_table *table;
1da177e4 1186
93c2fb25 1187 table = rt->fib6_table;
66f5d6ce 1188 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1189 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1190 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1191
1192 return err;
1193}
1194
8d1c802b 1195int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1196{
afb1d4b5 1197 struct nl_info info = { .nl_net = net, };
e715b6d3 1198
d4ead6b3 1199 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1200}
1201
85bd05de 1202static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
8b9df265
MKL
1203 const struct in6_addr *daddr,
1204 const struct in6_addr *saddr)
1da177e4 1205{
85bd05de 1206 struct fib6_info *f6i = res->f6i;
4832c30d 1207 struct net_device *dev;
1da177e4
LT
1208 struct rt6_info *rt;
1209
1210 /*
1211 * Clone the route.
1212 */
1213
85bd05de 1214 if (!fib6_info_hold_safe(f6i))
e873e4b9
WW
1215 return NULL;
1216
0d161581 1217 dev = ip6_rt_get_dev_rcu(res);
93531c67 1218 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
e873e4b9 1219 if (!rt) {
85bd05de 1220 fib6_info_release(f6i);
83a09abd 1221 return NULL;
e873e4b9 1222 }
83a09abd 1223
0d161581 1224 ip6_rt_copy_init(rt, res);
83a09abd 1225 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1226 rt->dst.flags |= DST_HOST;
1227 rt->rt6i_dst.addr = *daddr;
1228 rt->rt6i_dst.plen = 128;
1da177e4 1229
85bd05de
DA
1230 if (!rt6_is_gw_or_nonexthop(res)) {
1231 if (f6i->fib6_dst.plen != 128 &&
1232 ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
83a09abd 1233 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1234#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1235 if (rt->rt6i_src.plen && saddr) {
1236 rt->rt6i_src.addr = *saddr;
1237 rt->rt6i_src.plen = 128;
8b9df265 1238 }
83a09abd 1239#endif
95a9a5ba 1240 }
1da177e4 1241
95a9a5ba
YH
1242 return rt;
1243}
1da177e4 1244
db3fedee 1245static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
d52d3997 1246{
db3fedee
DA
1247 struct fib6_info *f6i = res->f6i;
1248 unsigned short flags = fib6_info_dst_flags(f6i);
4832c30d 1249 struct net_device *dev;
d52d3997
MKL
1250 struct rt6_info *pcpu_rt;
1251
db3fedee 1252 if (!fib6_info_hold_safe(f6i))
e873e4b9
WW
1253 return NULL;
1254
4832c30d 1255 rcu_read_lock();
0d161581 1256 dev = ip6_rt_get_dev_rcu(res);
93531c67 1257 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1258 rcu_read_unlock();
e873e4b9 1259 if (!pcpu_rt) {
db3fedee 1260 fib6_info_release(f6i);
d52d3997 1261 return NULL;
e873e4b9 1262 }
0d161581 1263 ip6_rt_copy_init(pcpu_rt, res);
d52d3997
MKL
1264 pcpu_rt->rt6i_flags |= RTF_PCPU;
1265 return pcpu_rt;
1266}
1267
66f5d6ce 1268/* It should be called with rcu_read_lock() acquired */
db3fedee 1269static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
d52d3997 1270{
a73e4195 1271 struct rt6_info *pcpu_rt, **p;
d52d3997 1272
db3fedee 1273 p = this_cpu_ptr(res->f6i->rt6i_pcpu);
d52d3997
MKL
1274 pcpu_rt = *p;
1275
d4ead6b3 1276 if (pcpu_rt)
10585b43 1277 ip6_hold_safe(NULL, &pcpu_rt);
d3843fe5 1278
a73e4195
MKL
1279 return pcpu_rt;
1280}
1281
afb1d4b5 1282static struct rt6_info *rt6_make_pcpu_route(struct net *net,
db3fedee 1283 const struct fib6_result *res)
a73e4195
MKL
1284{
1285 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997 1286
db3fedee 1287 pcpu_rt = ip6_rt_pcpu_alloc(res);
d52d3997 1288 if (!pcpu_rt) {
9c7370a1
MKL
1289 dst_hold(&net->ipv6.ip6_null_entry->dst);
1290 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1291 }
1292
a94b9367 1293 dst_hold(&pcpu_rt->dst);
db3fedee 1294 p = this_cpu_ptr(res->f6i->rt6i_pcpu);
a94b9367 1295 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1296 BUG_ON(prev);
a94b9367 1297
61fb0d01
ED
1298 if (res->f6i->fib6_destroying) {
1299 struct fib6_info *from;
1300
1301 from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
1302 fib6_info_release(from);
1303 }
1304
d52d3997
MKL
1305 return pcpu_rt;
1306}
1307
35732d01
WW
1308/* exception hash table implementation
1309 */
1310static DEFINE_SPINLOCK(rt6_exception_lock);
1311
1312/* Remove rt6_ex from hash table and free the memory
1313 * Caller must hold rt6_exception_lock
1314 */
1315static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1316 struct rt6_exception *rt6_ex)
1317{
f5b51fe8 1318 struct fib6_info *from;
b2427e67 1319 struct net *net;
81eb8447 1320
35732d01
WW
1321 if (!bucket || !rt6_ex)
1322 return;
b2427e67
CIK
1323
1324 net = dev_net(rt6_ex->rt6i->dst.dev);
f5b51fe8
PA
1325 net->ipv6.rt6_stats->fib_rt_cache--;
1326
1327 /* purge completely the exception to allow releasing the held resources:
1328 * some [sk] cache may keep the dst around for unlimited time
1329 */
0e233874 1330 from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
f5b51fe8
PA
1331 fib6_info_release(from);
1332 dst_dev_put(&rt6_ex->rt6i->dst);
1333
35732d01 1334 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1335 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1336 kfree_rcu(rt6_ex, rcu);
1337 WARN_ON_ONCE(!bucket->depth);
1338 bucket->depth--;
1339}
1340
1341/* Remove oldest rt6_ex in bucket and free the memory
1342 * Caller must hold rt6_exception_lock
1343 */
1344static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1345{
1346 struct rt6_exception *rt6_ex, *oldest = NULL;
1347
1348 if (!bucket)
1349 return;
1350
1351 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1352 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1353 oldest = rt6_ex;
1354 }
1355 rt6_remove_exception(bucket, oldest);
1356}
1357
1358static u32 rt6_exception_hash(const struct in6_addr *dst,
1359 const struct in6_addr *src)
1360{
1361 static u32 seed __read_mostly;
1362 u32 val;
1363
1364 net_get_random_once(&seed, sizeof(seed));
1365 val = jhash(dst, sizeof(*dst), seed);
1366
1367#ifdef CONFIG_IPV6_SUBTREES
1368 if (src)
1369 val = jhash(src, sizeof(*src), val);
1370#endif
1371 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1372}
1373
1374/* Helper function to find the cached rt in the hash table
1375 * and update bucket pointer to point to the bucket for this
1376 * (daddr, saddr) pair
1377 * Caller must hold rt6_exception_lock
1378 */
1379static struct rt6_exception *
1380__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1381 const struct in6_addr *daddr,
1382 const struct in6_addr *saddr)
1383{
1384 struct rt6_exception *rt6_ex;
1385 u32 hval;
1386
1387 if (!(*bucket) || !daddr)
1388 return NULL;
1389
1390 hval = rt6_exception_hash(daddr, saddr);
1391 *bucket += hval;
1392
1393 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1394 struct rt6_info *rt6 = rt6_ex->rt6i;
1395 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1396
1397#ifdef CONFIG_IPV6_SUBTREES
1398 if (matched && saddr)
1399 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1400#endif
1401 if (matched)
1402 return rt6_ex;
1403 }
1404 return NULL;
1405}
1406
1407/* Helper function to find the cached rt in the hash table
1408 * and update bucket pointer to point to the bucket for this
1409 * (daddr, saddr) pair
1410 * Caller must hold rcu_read_lock()
1411 */
1412static struct rt6_exception *
1413__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1414 const struct in6_addr *daddr,
1415 const struct in6_addr *saddr)
1416{
1417 struct rt6_exception *rt6_ex;
1418 u32 hval;
1419
1420 WARN_ON_ONCE(!rcu_read_lock_held());
1421
1422 if (!(*bucket) || !daddr)
1423 return NULL;
1424
1425 hval = rt6_exception_hash(daddr, saddr);
1426 *bucket += hval;
1427
1428 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1429 struct rt6_info *rt6 = rt6_ex->rt6i;
1430 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1431
1432#ifdef CONFIG_IPV6_SUBTREES
1433 if (matched && saddr)
1434 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1435#endif
1436 if (matched)
1437 return rt6_ex;
1438 }
1439 return NULL;
1440}
1441
b748f260 1442static unsigned int fib6_mtu(const struct fib6_result *res)
d4ead6b3 1443{
b748f260 1444 const struct fib6_nh *nh = res->nh;
d4ead6b3
DA
1445 unsigned int mtu;
1446
b748f260
DA
1447 if (res->f6i->fib6_pmtu) {
1448 mtu = res->f6i->fib6_pmtu;
dcd1f572 1449 } else {
b748f260 1450 struct net_device *dev = nh->fib_nh_dev;
dcd1f572
DA
1451 struct inet6_dev *idev;
1452
1453 rcu_read_lock();
1454 idev = __in6_dev_get(dev);
1455 mtu = idev->cnf.mtu6;
1456 rcu_read_unlock();
1457 }
1458
d4ead6b3
DA
1459 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1460
b748f260 1461 return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
d4ead6b3
DA
1462}
1463
35732d01 1464static int rt6_insert_exception(struct rt6_info *nrt,
5012f0a5 1465 const struct fib6_result *res)
35732d01 1466{
5e670d84 1467 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1468 struct rt6_exception_bucket *bucket;
1469 struct in6_addr *src_key = NULL;
1470 struct rt6_exception *rt6_ex;
5012f0a5 1471 struct fib6_info *f6i = res->f6i;
35732d01
WW
1472 int err = 0;
1473
35732d01
WW
1474 spin_lock_bh(&rt6_exception_lock);
1475
5012f0a5 1476 if (f6i->exception_bucket_flushed) {
35732d01
WW
1477 err = -EINVAL;
1478 goto out;
1479 }
1480
5012f0a5 1481 bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket,
35732d01
WW
1482 lockdep_is_held(&rt6_exception_lock));
1483 if (!bucket) {
1484 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1485 GFP_ATOMIC);
1486 if (!bucket) {
1487 err = -ENOMEM;
1488 goto out;
1489 }
5012f0a5 1490 rcu_assign_pointer(f6i->rt6i_exception_bucket, bucket);
35732d01
WW
1491 }
1492
1493#ifdef CONFIG_IPV6_SUBTREES
5012f0a5 1494 /* fib6_src.plen != 0 indicates f6i is in subtree
35732d01 1495 * and exception table is indexed by a hash of
5012f0a5 1496 * both fib6_dst and fib6_src.
35732d01 1497 * Otherwise, the exception table is indexed by
5012f0a5 1498 * a hash of only fib6_dst.
35732d01 1499 */
5012f0a5 1500 if (f6i->fib6_src.plen)
35732d01
WW
1501 src_key = &nrt->rt6i_src.addr;
1502#endif
5012f0a5 1503 /* rt6_mtu_change() might lower mtu on f6i.
f5bbe7ee 1504 * Only insert this exception route if its mtu
5012f0a5 1505 * is less than f6i's mtu value.
f5bbe7ee 1506 */
b748f260 1507 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
f5bbe7ee
WW
1508 err = -EINVAL;
1509 goto out;
1510 }
60006a48 1511
35732d01
WW
1512 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1513 src_key);
1514 if (rt6_ex)
1515 rt6_remove_exception(bucket, rt6_ex);
1516
1517 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1518 if (!rt6_ex) {
1519 err = -ENOMEM;
1520 goto out;
1521 }
1522 rt6_ex->rt6i = nrt;
1523 rt6_ex->stamp = jiffies;
35732d01
WW
1524 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1525 bucket->depth++;
81eb8447 1526 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1527
1528 if (bucket->depth > FIB6_MAX_DEPTH)
1529 rt6_exception_remove_oldest(bucket);
1530
1531out:
1532 spin_unlock_bh(&rt6_exception_lock);
1533
1534 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1535 if (!err) {
5012f0a5
DA
1536 spin_lock_bh(&f6i->fib6_table->tb6_lock);
1537 fib6_update_sernum(net, f6i);
1538 spin_unlock_bh(&f6i->fib6_table->tb6_lock);
b886d5f2
PA
1539 fib6_force_start_gc(net);
1540 }
35732d01
WW
1541
1542 return err;
1543}
1544
8d1c802b 1545void rt6_flush_exceptions(struct fib6_info *rt)
35732d01
WW
1546{
1547 struct rt6_exception_bucket *bucket;
1548 struct rt6_exception *rt6_ex;
1549 struct hlist_node *tmp;
1550 int i;
1551
1552 spin_lock_bh(&rt6_exception_lock);
1553 /* Prevent rt6_insert_exception() to recreate the bucket list */
1554 rt->exception_bucket_flushed = 1;
1555
1556 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1557 lockdep_is_held(&rt6_exception_lock));
1558 if (!bucket)
1559 goto out;
1560
1561 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1562 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1563 rt6_remove_exception(bucket, rt6_ex);
1564 WARN_ON_ONCE(bucket->depth);
1565 bucket++;
1566 }
1567
1568out:
1569 spin_unlock_bh(&rt6_exception_lock);
1570}
1571
1572/* Find cached rt in the hash table inside passed in rt
1573 * Caller has to hold rcu_read_lock()
1574 */
7e4b5128 1575static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
510e2ced
WW
1576 const struct in6_addr *daddr,
1577 const struct in6_addr *saddr)
35732d01 1578{
510e2ced 1579 const struct in6_addr *src_key = NULL;
35732d01 1580 struct rt6_exception_bucket *bucket;
35732d01 1581 struct rt6_exception *rt6_ex;
7e4b5128 1582 struct rt6_info *ret = NULL;
35732d01 1583
35732d01 1584#ifdef CONFIG_IPV6_SUBTREES
7e4b5128 1585 /* fib6i_src.plen != 0 indicates f6i is in subtree
35732d01 1586 * and exception table is indexed by a hash of
7e4b5128 1587 * both fib6_dst and fib6_src.
510e2ced
WW
1588 * However, the src addr used to create the hash
1589 * might not be exactly the passed in saddr which
1590 * is a /128 addr from the flow.
1591 * So we need to use f6i->fib6_src to redo lookup
1592 * if the passed in saddr does not find anything.
1593 * (See the logic in ip6_rt_cache_alloc() on how
1594 * rt->rt6i_src is updated.)
35732d01 1595 */
7e4b5128 1596 if (res->f6i->fib6_src.plen)
35732d01 1597 src_key = saddr;
510e2ced 1598find_ex:
35732d01 1599#endif
510e2ced 1600 bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
35732d01
WW
1601 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1602
1603 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
7e4b5128 1604 ret = rt6_ex->rt6i;
35732d01 1605
510e2ced
WW
1606#ifdef CONFIG_IPV6_SUBTREES
1607 /* Use fib6_src as src_key and redo lookup */
1608 if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
1609 src_key = &res->f6i->fib6_src.addr;
1610 goto find_ex;
1611 }
1612#endif
1613
7e4b5128 1614 return ret;
35732d01
WW
1615}
1616
1617/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1618static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1619{
35732d01
WW
1620 struct rt6_exception_bucket *bucket;
1621 struct in6_addr *src_key = NULL;
1622 struct rt6_exception *rt6_ex;
8a14e46f 1623 struct fib6_info *from;
35732d01
WW
1624 int err;
1625
091311de 1626 from = rcu_dereference(rt->from);
35732d01 1627 if (!from ||
442d713b 1628 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1629 return -EINVAL;
1630
1631 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1632 return -ENOENT;
1633
1634 spin_lock_bh(&rt6_exception_lock);
1635 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1636 lockdep_is_held(&rt6_exception_lock));
1637#ifdef CONFIG_IPV6_SUBTREES
1638 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1639 * and exception table is indexed by a hash of
1640 * both rt6i_dst and rt6i_src.
1641 * Otherwise, the exception table is indexed by
1642 * a hash of only rt6i_dst.
1643 */
93c2fb25 1644 if (from->fib6_src.plen)
35732d01
WW
1645 src_key = &rt->rt6i_src.addr;
1646#endif
1647 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1648 &rt->rt6i_dst.addr,
1649 src_key);
1650 if (rt6_ex) {
1651 rt6_remove_exception(bucket, rt6_ex);
1652 err = 0;
1653 } else {
1654 err = -ENOENT;
1655 }
1656
1657 spin_unlock_bh(&rt6_exception_lock);
1658 return err;
1659}
1660
1661/* Find rt6_ex which contains the passed in rt cache and
1662 * refresh its stamp
1663 */
1664static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1665{
35732d01
WW
1666 struct rt6_exception_bucket *bucket;
1667 struct in6_addr *src_key = NULL;
1668 struct rt6_exception *rt6_ex;
193f3685 1669 struct fib6_info *from;
35732d01
WW
1670
1671 rcu_read_lock();
193f3685
PA
1672 from = rcu_dereference(rt->from);
1673 if (!from || !(rt->rt6i_flags & RTF_CACHE))
1674 goto unlock;
1675
35732d01
WW
1676 bucket = rcu_dereference(from->rt6i_exception_bucket);
1677
1678#ifdef CONFIG_IPV6_SUBTREES
1679 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1680 * and exception table is indexed by a hash of
1681 * both rt6i_dst and rt6i_src.
1682 * Otherwise, the exception table is indexed by
1683 * a hash of only rt6i_dst.
1684 */
93c2fb25 1685 if (from->fib6_src.plen)
35732d01
WW
1686 src_key = &rt->rt6i_src.addr;
1687#endif
1688 rt6_ex = __rt6_find_exception_rcu(&bucket,
1689 &rt->rt6i_dst.addr,
1690 src_key);
1691 if (rt6_ex)
1692 rt6_ex->stamp = jiffies;
1693
193f3685 1694unlock:
35732d01
WW
1695 rcu_read_unlock();
1696}
1697
e9fa1495
SB
1698static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1699 struct rt6_info *rt, int mtu)
1700{
1701 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1702 * lowest MTU in the path: always allow updating the route PMTU to
1703 * reflect PMTU decreases.
1704 *
1705 * If the new MTU is higher, and the route PMTU is equal to the local
1706 * MTU, this means the old MTU is the lowest in the path, so allow
1707 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1708 * handle this.
1709 */
1710
1711 if (dst_mtu(&rt->dst) >= mtu)
1712 return true;
1713
1714 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1715 return true;
1716
1717 return false;
1718}
1719
1720static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
8d1c802b 1721 struct fib6_info *rt, int mtu)
f5bbe7ee
WW
1722{
1723 struct rt6_exception_bucket *bucket;
1724 struct rt6_exception *rt6_ex;
1725 int i;
1726
1727 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1728 lockdep_is_held(&rt6_exception_lock));
1729
e9fa1495
SB
1730 if (!bucket)
1731 return;
1732
1733 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1734 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1735 struct rt6_info *entry = rt6_ex->rt6i;
1736
1737 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1738 * route), the metrics of its rt->from have already
e9fa1495
SB
1739 * been updated.
1740 */
d4ead6b3 1741 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1742 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1743 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1744 }
e9fa1495 1745 bucket++;
f5bbe7ee
WW
1746 }
1747}
1748
b16cb459
WW
1749#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1750
8d1c802b 1751static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
b16cb459
WW
1752 struct in6_addr *gateway)
1753{
1754 struct rt6_exception_bucket *bucket;
1755 struct rt6_exception *rt6_ex;
1756 struct hlist_node *tmp;
1757 int i;
1758
1759 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1760 return;
1761
1762 spin_lock_bh(&rt6_exception_lock);
1763 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1764 lockdep_is_held(&rt6_exception_lock));
1765
1766 if (bucket) {
1767 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1768 hlist_for_each_entry_safe(rt6_ex, tmp,
1769 &bucket->chain, hlist) {
1770 struct rt6_info *entry = rt6_ex->rt6i;
1771
1772 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1773 RTF_CACHE_GATEWAY &&
1774 ipv6_addr_equal(gateway,
1775 &entry->rt6i_gateway)) {
1776 rt6_remove_exception(bucket, rt6_ex);
1777 }
1778 }
1779 bucket++;
1780 }
1781 }
1782
1783 spin_unlock_bh(&rt6_exception_lock);
1784}
1785
c757faa8
WW
1786static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1787 struct rt6_exception *rt6_ex,
1788 struct fib6_gc_args *gc_args,
1789 unsigned long now)
1790{
1791 struct rt6_info *rt = rt6_ex->rt6i;
1792
1859bac0
PA
1793 /* we are pruning and obsoleting aged-out and non gateway exceptions
1794 * even if others have still references to them, so that on next
1795 * dst_check() such references can be dropped.
1796 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1797 * expired, independently from their aging, as per RFC 8201 section 4
1798 */
31afeb42
WW
1799 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1800 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1801 RT6_TRACE("aging clone %p\n", rt);
1802 rt6_remove_exception(bucket, rt6_ex);
1803 return;
1804 }
1805 } else if (time_after(jiffies, rt->dst.expires)) {
1806 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1807 rt6_remove_exception(bucket, rt6_ex);
1808 return;
31afeb42
WW
1809 }
1810
1811 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1812 struct neighbour *neigh;
1813 __u8 neigh_flags = 0;
1814
1bfa26ff
ED
1815 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1816 if (neigh)
c757faa8 1817 neigh_flags = neigh->flags;
1bfa26ff 1818
c757faa8
WW
1819 if (!(neigh_flags & NTF_ROUTER)) {
1820 RT6_TRACE("purging route %p via non-router but gateway\n",
1821 rt);
1822 rt6_remove_exception(bucket, rt6_ex);
1823 return;
1824 }
1825 }
31afeb42 1826
c757faa8
WW
1827 gc_args->more++;
1828}
1829
8d1c802b 1830void rt6_age_exceptions(struct fib6_info *rt,
c757faa8
WW
1831 struct fib6_gc_args *gc_args,
1832 unsigned long now)
1833{
1834 struct rt6_exception_bucket *bucket;
1835 struct rt6_exception *rt6_ex;
1836 struct hlist_node *tmp;
1837 int i;
1838
1839 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1840 return;
1841
1bfa26ff
ED
1842 rcu_read_lock_bh();
1843 spin_lock(&rt6_exception_lock);
c757faa8
WW
1844 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1845 lockdep_is_held(&rt6_exception_lock));
1846
1847 if (bucket) {
1848 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1849 hlist_for_each_entry_safe(rt6_ex, tmp,
1850 &bucket->chain, hlist) {
1851 rt6_age_examine_exception(bucket, rt6_ex,
1852 gc_args, now);
1853 }
1854 bucket++;
1855 }
1856 }
1bfa26ff
ED
1857 spin_unlock(&rt6_exception_lock);
1858 rcu_read_unlock_bh();
c757faa8
WW
1859}
1860
1d053da9 1861/* must be called with rcu lock held */
effda4dd
DA
1862int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
1863 struct flowi6 *fl6, struct fib6_result *res, int strict)
1da177e4 1864{
367efcb9 1865 struct fib6_node *fn, *saved_fn;
1da177e4 1866
6454743b 1867 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1868 saved_fn = fn;
1da177e4 1869
ca254490
DA
1870 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1871 oif = 0;
1872
a3c00e46 1873redo_rt6_select:
effda4dd
DA
1874 rt6_select(net, fn, oif, res, strict);
1875 if (res->f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1876 fn = fib6_backtrack(fn, &fl6->saddr);
1877 if (fn)
1878 goto redo_rt6_select;
367efcb9
MKL
1879 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1880 /* also consider unreachable route */
1881 strict &= ~RT6_LOOKUP_F_REACHABLE;
1882 fn = saved_fn;
1883 goto redo_rt6_select;
367efcb9 1884 }
a3c00e46
MKL
1885 }
1886
effda4dd 1887 trace_fib6_table_lookup(net, res, table, fl6);
fb9de91e 1888
effda4dd 1889 return 0;
1d053da9
DA
1890}
1891
1892struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1893 int oif, struct flowi6 *fl6,
1894 const struct sk_buff *skb, int flags)
1895{
b1d40991 1896 struct fib6_result res = {};
1d053da9
DA
1897 struct rt6_info *rt;
1898 int strict = 0;
1899
1900 strict |= flags & RT6_LOOKUP_F_IFACE;
1901 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1902 if (net->ipv6.devconf_all->forwarding == 0)
1903 strict |= RT6_LOOKUP_F_REACHABLE;
1904
1905 rcu_read_lock();
1906
effda4dd 1907 fib6_table_lookup(net, table, oif, fl6, &res, strict);
b1d40991 1908 if (res.f6i == net->ipv6.fib6_null_entry) {
421842ed 1909 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1910 rcu_read_unlock();
d3843fe5 1911 dst_hold(&rt->dst);
d3843fe5 1912 return rt;
23fb93a4
DA
1913 }
1914
b1d40991 1915 fib6_select_path(net, &res, fl6, oif, false, skb, strict);
d83009d4 1916
23fb93a4 1917 /*Search through exception table */
7e4b5128 1918 rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
23fb93a4 1919 if (rt) {
10585b43 1920 if (ip6_hold_safe(net, &rt))
d3843fe5 1921 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1922
66f5d6ce 1923 rcu_read_unlock();
d52d3997 1924 return rt;
3da59bd9 1925 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
b1d40991 1926 !res.nh->fib_nh_gw_family)) {
3da59bd9
MKL
1927 /* Create a RTF_CACHE clone which will not be
1928 * owned by the fib6 tree. It is for the special case where
1929 * the daddr in the skb during the neighbor look-up is different
1930 * from the fl6->daddr used to look-up route here.
1931 */
3da59bd9
MKL
1932 struct rt6_info *uncached_rt;
1933
85bd05de 1934 uncached_rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
d52d3997 1935
4d85cd0c 1936 rcu_read_unlock();
c71099ac 1937
1cfb71ee
WW
1938 if (uncached_rt) {
1939 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1940 * No need for another dst_hold()
1941 */
8d0b94af 1942 rt6_uncached_list_add(uncached_rt);
81eb8447 1943 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1944 } else {
3da59bd9 1945 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1946 dst_hold(&uncached_rt->dst);
1947 }
b811580d 1948
3da59bd9 1949 return uncached_rt;
d52d3997
MKL
1950 } else {
1951 /* Get a percpu copy */
1952
1953 struct rt6_info *pcpu_rt;
1954
951f788a 1955 local_bh_disable();
db3fedee 1956 pcpu_rt = rt6_get_pcpu_route(&res);
d52d3997 1957
93531c67 1958 if (!pcpu_rt)
db3fedee 1959 pcpu_rt = rt6_make_pcpu_route(net, &res);
93531c67 1960
951f788a
ED
1961 local_bh_enable();
1962 rcu_read_unlock();
d4bea421 1963
d52d3997
MKL
1964 return pcpu_rt;
1965 }
1da177e4 1966}
9ff74384 1967EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1968
b75cc8f9
DA
1969static struct rt6_info *ip6_pol_route_input(struct net *net,
1970 struct fib6_table *table,
1971 struct flowi6 *fl6,
1972 const struct sk_buff *skb,
1973 int flags)
4acad72d 1974{
b75cc8f9 1975 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1976}
1977
d409b847
MB
1978struct dst_entry *ip6_route_input_lookup(struct net *net,
1979 struct net_device *dev,
b75cc8f9
DA
1980 struct flowi6 *fl6,
1981 const struct sk_buff *skb,
1982 int flags)
72331bc0
SL
1983{
1984 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1985 flags |= RT6_LOOKUP_F_IFACE;
1986
b75cc8f9 1987 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1988}
d409b847 1989EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1990
23aebdac 1991static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1992 struct flow_keys *keys,
1993 struct flow_keys *flkeys)
23aebdac
JS
1994{
1995 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1996 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1997 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1998 const struct ipv6hdr *inner_iph;
1999 const struct icmp6hdr *icmph;
2000 struct ipv6hdr _inner_iph;
cea67a2d 2001 struct icmp6hdr _icmph;
23aebdac
JS
2002
2003 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
2004 goto out;
2005
cea67a2d
ED
2006 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
2007 sizeof(_icmph), &_icmph);
2008 if (!icmph)
2009 goto out;
2010
23aebdac
JS
2011 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
2012 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
2013 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
2014 icmph->icmp6_type != ICMPV6_PARAMPROB)
2015 goto out;
2016
2017 inner_iph = skb_header_pointer(skb,
2018 skb_transport_offset(skb) + sizeof(*icmph),
2019 sizeof(_inner_iph), &_inner_iph);
2020 if (!inner_iph)
2021 goto out;
2022
2023 key_iph = inner_iph;
5e5d6fed 2024 _flkeys = NULL;
23aebdac 2025out:
5e5d6fed
RP
2026 if (_flkeys) {
2027 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2028 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2029 keys->tags.flow_label = _flkeys->tags.flow_label;
2030 keys->basic.ip_proto = _flkeys->basic.ip_proto;
2031 } else {
2032 keys->addrs.v6addrs.src = key_iph->saddr;
2033 keys->addrs.v6addrs.dst = key_iph->daddr;
fa1be7e0 2034 keys->tags.flow_label = ip6_flowlabel(key_iph);
5e5d6fed
RP
2035 keys->basic.ip_proto = key_iph->nexthdr;
2036 }
23aebdac
JS
2037}
2038
2039/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
2040u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2041 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
2042{
2043 struct flow_keys hash_keys;
9a2a537a 2044 u32 mhash;
23aebdac 2045
bbfa047a 2046 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
2047 case 0:
2048 memset(&hash_keys, 0, sizeof(hash_keys));
2049 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2050 if (skb) {
2051 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2052 } else {
2053 hash_keys.addrs.v6addrs.src = fl6->saddr;
2054 hash_keys.addrs.v6addrs.dst = fl6->daddr;
fa1be7e0 2055 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
b4bac172
DA
2056 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2057 }
2058 break;
2059 case 1:
2060 if (skb) {
2061 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2062 struct flow_keys keys;
2063
2064 /* short-circuit if we already have L4 hash present */
2065 if (skb->l4_hash)
2066 return skb_get_hash_raw(skb) >> 1;
2067
2068 memset(&hash_keys, 0, sizeof(hash_keys));
2069
2070 if (!flkeys) {
2071 skb_flow_dissect_flow_keys(skb, &keys, flag);
2072 flkeys = &keys;
2073 }
2074 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2075 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2076 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2077 hash_keys.ports.src = flkeys->ports.src;
2078 hash_keys.ports.dst = flkeys->ports.dst;
2079 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2080 } else {
2081 memset(&hash_keys, 0, sizeof(hash_keys));
2082 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2083 hash_keys.addrs.v6addrs.src = fl6->saddr;
2084 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2085 hash_keys.ports.src = fl6->fl6_sport;
2086 hash_keys.ports.dst = fl6->fl6_dport;
2087 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2088 }
2089 break;
23aebdac 2090 }
9a2a537a 2091 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2092
9a2a537a 2093 return mhash >> 1;
23aebdac
JS
2094}
2095
c71099ac
TG
2096void ip6_route_input(struct sk_buff *skb)
2097{
b71d1d42 2098 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2099 struct net *net = dev_net(skb->dev);
adaa70bb 2100 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2101 struct ip_tunnel_info *tun_info;
4c9483b2 2102 struct flowi6 fl6 = {
e0d56fdd 2103 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2104 .daddr = iph->daddr,
2105 .saddr = iph->saddr,
6502ca52 2106 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2107 .flowi6_mark = skb->mark,
2108 .flowi6_proto = iph->nexthdr,
c71099ac 2109 };
5e5d6fed 2110 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2111
904af04d 2112 tun_info = skb_tunnel_info(skb);
46fa062a 2113 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2114 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2115
2116 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2117 flkeys = &_flkeys;
2118
23aebdac 2119 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2120 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2121 skb_dst_drop(skb);
b75cc8f9
DA
2122 skb_dst_set(skb,
2123 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2124}
2125
b75cc8f9
DA
2126static struct rt6_info *ip6_pol_route_output(struct net *net,
2127 struct fib6_table *table,
2128 struct flowi6 *fl6,
2129 const struct sk_buff *skb,
2130 int flags)
1da177e4 2131{
b75cc8f9 2132 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2133}
2134
6f21c96a
PA
2135struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2136 struct flowi6 *fl6, int flags)
c71099ac 2137{
d46a9d67 2138 bool any_src;
c71099ac 2139
3ede0bbc
RS
2140 if (ipv6_addr_type(&fl6->daddr) &
2141 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
4c1feac5
DA
2142 struct dst_entry *dst;
2143
2144 dst = l3mdev_link_scope_lookup(net, fl6);
2145 if (dst)
2146 return dst;
2147 }
ca254490 2148
1fb9489b 2149 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2150
d46a9d67 2151 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2152 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2153 (fl6->flowi6_oif && any_src))
77d16f45 2154 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2155
d46a9d67 2156 if (!any_src)
adaa70bb 2157 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2158 else if (sk)
2159 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2160
b75cc8f9 2161 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2162}
6f21c96a 2163EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2164
2774c131 2165struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2166{
5c1e6aa3 2167 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2168 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2169 struct dst_entry *new = NULL;
2170
1dbe3252 2171 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2172 DST_OBSOLETE_DEAD, 0);
14e50e57 2173 if (rt) {
0a1f5962 2174 rt6_info_init(rt);
81eb8447 2175 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2176
0a1f5962 2177 new = &rt->dst;
14e50e57 2178 new->__use = 1;
352e512c 2179 new->input = dst_discard;
ede2059d 2180 new->output = dst_discard_out;
14e50e57 2181
0a1f5962 2182 dst_copy_metrics(new, &ort->dst);
14e50e57 2183
1dbe3252 2184 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2185 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2186 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2187
2188 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2189#ifdef CONFIG_IPV6_SUBTREES
2190 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2191#endif
14e50e57
DM
2192 }
2193
69ead7af
DM
2194 dst_release(dst_orig);
2195 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2196}
14e50e57 2197
1da177e4
LT
2198/*
2199 * Destination cache support functions
2200 */
2201
8d1c802b 2202static bool fib6_check(struct fib6_info *f6i, u32 cookie)
4b32b5ad 2203{
93531c67
DA
2204 u32 rt_cookie = 0;
2205
8ae86971 2206 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
93531c67
DA
2207 return false;
2208
2209 if (fib6_check_expired(f6i))
2210 return false;
2211
2212 return true;
4b32b5ad
MKL
2213}
2214
a68886a6
DA
2215static struct dst_entry *rt6_check(struct rt6_info *rt,
2216 struct fib6_info *from,
2217 u32 cookie)
3da59bd9 2218{
36143645 2219 u32 rt_cookie = 0;
c5cff856 2220
a68886a6 2221 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
93531c67 2222 rt_cookie != cookie)
3da59bd9
MKL
2223 return NULL;
2224
2225 if (rt6_check_expired(rt))
2226 return NULL;
2227
2228 return &rt->dst;
2229}
2230
a68886a6
DA
2231static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2232 struct fib6_info *from,
2233 u32 cookie)
3da59bd9 2234{
5973fb1e
MKL
2235 if (!__rt6_check_expired(rt) &&
2236 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
a68886a6 2237 fib6_check(from, cookie))
3da59bd9
MKL
2238 return &rt->dst;
2239 else
2240 return NULL;
2241}
2242
1da177e4
LT
2243static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2244{
a87b7dc9 2245 struct dst_entry *dst_ret;
a68886a6 2246 struct fib6_info *from;
1da177e4
LT
2247 struct rt6_info *rt;
2248
a87b7dc9
DA
2249 rt = container_of(dst, struct rt6_info, dst);
2250
2251 rcu_read_lock();
1da177e4 2252
6f3118b5
ND
2253 /* All IPV6 dsts are created with ->obsolete set to the value
2254 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2255 * into this function always.
2256 */
e3bc10bd 2257
a68886a6 2258 from = rcu_dereference(rt->from);
4b32b5ad 2259
a68886a6
DA
2260 if (from && (rt->rt6i_flags & RTF_PCPU ||
2261 unlikely(!list_empty(&rt->rt6i_uncached))))
2262 dst_ret = rt6_dst_from_check(rt, from, cookie);
3da59bd9 2263 else
a68886a6 2264 dst_ret = rt6_check(rt, from, cookie);
a87b7dc9
DA
2265
2266 rcu_read_unlock();
2267
2268 return dst_ret;
1da177e4
LT
2269}
2270
2271static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2272{
2273 struct rt6_info *rt = (struct rt6_info *) dst;
2274
2275 if (rt) {
54c1a859 2276 if (rt->rt6i_flags & RTF_CACHE) {
c3c14da0 2277 rcu_read_lock();
54c1a859 2278 if (rt6_check_expired(rt)) {
93531c67 2279 rt6_remove_exception_rt(rt);
54c1a859
YH
2280 dst = NULL;
2281 }
c3c14da0 2282 rcu_read_unlock();
54c1a859 2283 } else {
1da177e4 2284 dst_release(dst);
54c1a859
YH
2285 dst = NULL;
2286 }
1da177e4 2287 }
54c1a859 2288 return dst;
1da177e4
LT
2289}
2290
2291static void ip6_link_failure(struct sk_buff *skb)
2292{
2293 struct rt6_info *rt;
2294
3ffe533c 2295 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2296
adf30907 2297 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2298 if (rt) {
8a14e46f 2299 rcu_read_lock();
1eb4f758 2300 if (rt->rt6i_flags & RTF_CACHE) {
761f6026 2301 rt6_remove_exception_rt(rt);
c5cff856 2302 } else {
a68886a6 2303 struct fib6_info *from;
c5cff856
WW
2304 struct fib6_node *fn;
2305
a68886a6
DA
2306 from = rcu_dereference(rt->from);
2307 if (from) {
2308 fn = rcu_dereference(from->fib6_node);
2309 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2310 fn->fn_sernum = -1;
2311 }
1eb4f758 2312 }
8a14e46f 2313 rcu_read_unlock();
1da177e4
LT
2314 }
2315}
2316
6a3e030f
DA
2317static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2318{
a68886a6
DA
2319 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2320 struct fib6_info *from;
2321
2322 rcu_read_lock();
2323 from = rcu_dereference(rt0->from);
2324 if (from)
2325 rt0->dst.expires = from->expires;
2326 rcu_read_unlock();
2327 }
6a3e030f
DA
2328
2329 dst_set_expires(&rt0->dst, timeout);
2330 rt0->rt6i_flags |= RTF_EXPIRES;
2331}
2332
45e4fd26
MKL
2333static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2334{
2335 struct net *net = dev_net(rt->dst.dev);
2336
d4ead6b3 2337 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2338 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2339 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2340}
2341
0d3f6d29
MKL
2342static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2343{
2344 return !(rt->rt6i_flags & RTF_CACHE) &&
1490ed2a 2345 (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
0d3f6d29
MKL
2346}
2347
45e4fd26
MKL
2348static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2349 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2350{
0dec879f 2351 const struct in6_addr *daddr, *saddr;
67ba4152 2352 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2353
19bda36c
XL
2354 if (dst_metric_locked(dst, RTAX_MTU))
2355 return;
2356
0dec879f
JA
2357 if (iph) {
2358 daddr = &iph->daddr;
2359 saddr = &iph->saddr;
2360 } else if (sk) {
2361 daddr = &sk->sk_v6_daddr;
2362 saddr = &inet6_sk(sk)->saddr;
2363 } else {
2364 daddr = NULL;
2365 saddr = NULL;
2366 }
2367 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2368 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2369 if (mtu >= dst_mtu(dst))
2370 return;
9d289715 2371
0d3f6d29 2372 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2373 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2374 /* update rt6_ex->stamp for cache */
2375 if (rt6->rt6i_flags & RTF_CACHE)
2376 rt6_update_exception_stamp_rt(rt6);
0dec879f 2377 } else if (daddr) {
85bd05de 2378 struct fib6_result res = {};
45e4fd26
MKL
2379 struct rt6_info *nrt6;
2380
4d85cd0c 2381 rcu_read_lock();
85bd05de
DA
2382 res.f6i = rcu_dereference(rt6->from);
2383 if (!res.f6i) {
9c69a132
JL
2384 rcu_read_unlock();
2385 return;
2386 }
85bd05de 2387 res.nh = &res.f6i->fib6_nh;
7d21fec9
DA
2388 res.fib6_flags = res.f6i->fib6_flags;
2389 res.fib6_type = res.f6i->fib6_type;
2390
85bd05de 2391 nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
45e4fd26
MKL
2392 if (nrt6) {
2393 rt6_do_update_pmtu(nrt6, mtu);
5012f0a5 2394 if (rt6_insert_exception(nrt6, &res))
2b760fcf 2395 dst_release_immediate(&nrt6->dst);
45e4fd26 2396 }
a68886a6 2397 rcu_read_unlock();
1da177e4
LT
2398 }
2399}
2400
45e4fd26
MKL
2401static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2402 struct sk_buff *skb, u32 mtu)
2403{
2404 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2405}
2406
42ae66c8 2407void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2408 int oif, u32 mark, kuid_t uid)
81aded24
DM
2409{
2410 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2411 struct dst_entry *dst;
dc92095d
2412 struct flowi6 fl6 = {
2413 .flowi6_oif = oif,
2414 .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2415 .daddr = iph->daddr,
2416 .saddr = iph->saddr,
2417 .flowlabel = ip6_flowinfo(iph),
2418 .flowi6_uid = uid,
2419 };
81aded24
DM
2420
2421 dst = ip6_route_output(net, NULL, &fl6);
2422 if (!dst->error)
45e4fd26 2423 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2424 dst_release(dst);
2425}
2426EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2427
2428void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2429{
7ddacfa5 2430 int oif = sk->sk_bound_dev_if;
33c162a9
MKL
2431 struct dst_entry *dst;
2432
7ddacfa5
DA
2433 if (!oif && skb->dev)
2434 oif = l3mdev_master_ifindex(skb->dev);
2435
2436 ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2437
2438 dst = __sk_dst_get(sk);
2439 if (!dst || !dst->obsolete ||
2440 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2441 return;
2442
2443 bh_lock_sock(sk);
2444 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2445 ip6_datagram_dst_update(sk, false);
2446 bh_unlock_sock(sk);
81aded24
DM
2447}
2448EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2449
7d6850f7
AK
2450void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2451 const struct flowi6 *fl6)
2452{
2453#ifdef CONFIG_IPV6_SUBTREES
2454 struct ipv6_pinfo *np = inet6_sk(sk);
2455#endif
2456
2457 ip6_dst_store(sk, dst,
2458 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2459 &sk->sk_v6_daddr : NULL,
2460#ifdef CONFIG_IPV6_SUBTREES
2461 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2462 &np->saddr :
2463#endif
2464 NULL);
2465}
2466
9b6b35ab 2467static bool ip6_redirect_nh_match(const struct fib6_result *res,
0b34eb00
DA
2468 struct flowi6 *fl6,
2469 const struct in6_addr *gw,
2470 struct rt6_info **ret)
2471{
9b6b35ab
DA
2472 const struct fib6_nh *nh = res->nh;
2473
0b34eb00
DA
2474 if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
2475 fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
2476 return false;
2477
2478 /* rt_cache's gateway might be different from its 'parent'
2479 * in the case of an ip redirect.
2480 * So we keep searching in the exception table if the gateway
2481 * is different.
2482 */
2483 if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
2484 struct rt6_info *rt_cache;
2485
9b6b35ab 2486 rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
0b34eb00
DA
2487 if (rt_cache &&
2488 ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
2489 *ret = rt_cache;
2490 return true;
2491 }
2492 return false;
2493 }
2494 return true;
2495}
2496
b55b76b2
DJ
2497/* Handle redirects */
2498struct ip6rd_flowi {
2499 struct flowi6 fl6;
2500 struct in6_addr gateway;
2501};
2502
2503static struct rt6_info *__ip6_route_redirect(struct net *net,
2504 struct fib6_table *table,
2505 struct flowi6 *fl6,
b75cc8f9 2506 const struct sk_buff *skb,
b55b76b2
DJ
2507 int flags)
2508{
2509 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
0b34eb00 2510 struct rt6_info *ret = NULL;
9b6b35ab 2511 struct fib6_result res = {};
8d1c802b 2512 struct fib6_info *rt;
b55b76b2
DJ
2513 struct fib6_node *fn;
2514
2515 /* Get the "current" route for this destination and
67c408cf 2516 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2517 *
2518 * RFC 4861 specifies that redirects should only be
2519 * accepted if they come from the nexthop to the target.
2520 * Due to the way the routes are chosen, this notion
2521 * is a bit fuzzy and one might need to check all possible
2522 * routes.
2523 */
2524
66f5d6ce 2525 rcu_read_lock();
6454743b 2526 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
b55b76b2 2527restart:
66f5d6ce 2528 for_each_fib6_node_rt_rcu(fn) {
9b6b35ab
DA
2529 res.f6i = rt;
2530 res.nh = &rt->fib6_nh;
2531
14895687 2532 if (fib6_check_expired(rt))
b55b76b2 2533 continue;
93c2fb25 2534 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 2535 break;
9b6b35ab 2536 if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, &ret))
0b34eb00 2537 goto out;
b55b76b2
DJ
2538 }
2539
2540 if (!rt)
421842ed 2541 rt = net->ipv6.fib6_null_entry;
93c2fb25 2542 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 2543 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2544 goto out;
2545 }
2546
421842ed 2547 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2548 fn = fib6_backtrack(fn, &fl6->saddr);
2549 if (fn)
2550 goto restart;
b55b76b2 2551 }
a3c00e46 2552
9b6b35ab
DA
2553 res.f6i = rt;
2554 res.nh = &rt->fib6_nh;
b0a1ba59 2555out:
7d21fec9 2556 if (ret) {
10585b43 2557 ip6_hold_safe(net, &ret);
7d21fec9
DA
2558 } else {
2559 res.fib6_flags = res.f6i->fib6_flags;
2560 res.fib6_type = res.f6i->fib6_type;
9b6b35ab 2561 ret = ip6_create_rt_rcu(&res);
7d21fec9 2562 }
b55b76b2 2563
66f5d6ce 2564 rcu_read_unlock();
b55b76b2 2565
8ff2e5b2 2566 trace_fib6_table_lookup(net, &res, table, fl6);
23fb93a4 2567 return ret;
b55b76b2
DJ
2568};
2569
2570static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2571 const struct flowi6 *fl6,
2572 const struct sk_buff *skb,
2573 const struct in6_addr *gateway)
b55b76b2
DJ
2574{
2575 int flags = RT6_LOOKUP_F_HAS_SADDR;
2576 struct ip6rd_flowi rdfl;
2577
2578 rdfl.fl6 = *fl6;
2579 rdfl.gateway = *gateway;
2580
b75cc8f9 2581 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2582 flags, __ip6_route_redirect);
2583}
2584
e2d118a1
LC
2585void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2586 kuid_t uid)
3a5ad2ee
DM
2587{
2588 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2589 struct dst_entry *dst;
1f7f10ac
2590 struct flowi6 fl6 = {
2591 .flowi6_iif = LOOPBACK_IFINDEX,
2592 .flowi6_oif = oif,
2593 .flowi6_mark = mark,
2594 .daddr = iph->daddr,
2595 .saddr = iph->saddr,
2596 .flowlabel = ip6_flowinfo(iph),
2597 .flowi6_uid = uid,
2598 };
3a5ad2ee 2599
b75cc8f9 2600 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2601 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2602 dst_release(dst);
2603}
2604EXPORT_SYMBOL_GPL(ip6_redirect);
2605
d456336d 2606void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
c92a59ec
DJ
2607{
2608 const struct ipv6hdr *iph = ipv6_hdr(skb);
2609 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2610 struct dst_entry *dst;
0b26fb17
2611 struct flowi6 fl6 = {
2612 .flowi6_iif = LOOPBACK_IFINDEX,
2613 .flowi6_oif = oif,
0b26fb17
2614 .daddr = msg->dest,
2615 .saddr = iph->daddr,
2616 .flowi6_uid = sock_net_uid(net, NULL),
2617 };
c92a59ec 2618
b75cc8f9 2619 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2620 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2621 dst_release(dst);
2622}
2623
3a5ad2ee
DM
2624void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2625{
e2d118a1
LC
2626 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2627 sk->sk_uid);
3a5ad2ee
DM
2628}
2629EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2630
0dbaee3b 2631static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2632{
0dbaee3b
DM
2633 struct net_device *dev = dst->dev;
2634 unsigned int mtu = dst_mtu(dst);
2635 struct net *net = dev_net(dev);
2636
1da177e4
LT
2637 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2638
5578689a
DL
2639 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2640 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2641
2642 /*
1ab1457c
YH
2643 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2644 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2645 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2646 * rely only on pmtu discovery"
2647 */
2648 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2649 mtu = IPV6_MAXPLEN;
2650 return mtu;
2651}
2652
ebb762f2 2653static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2654{
d33e4553 2655 struct inet6_dev *idev;
d4ead6b3 2656 unsigned int mtu;
4b32b5ad
MKL
2657
2658 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2659 if (mtu)
30f78d8e 2660 goto out;
618f9bc7
SK
2661
2662 mtu = IPV6_MIN_MTU;
d33e4553
DM
2663
2664 rcu_read_lock();
2665 idev = __in6_dev_get(dst->dev);
2666 if (idev)
2667 mtu = idev->cnf.mtu6;
2668 rcu_read_unlock();
2669
30f78d8e 2670out:
14972cbd
RP
2671 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2672
2673 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2674}
2675
901731b8
DA
2676/* MTU selection:
2677 * 1. mtu on route is locked - use it
2678 * 2. mtu from nexthop exception
2679 * 3. mtu from egress device
2680 *
2681 * based on ip6_dst_mtu_forward and exception logic of
2682 * rt6_find_cached_rt; called with rcu_read_lock
2683 */
b748f260
DA
2684u32 ip6_mtu_from_fib6(const struct fib6_result *res,
2685 const struct in6_addr *daddr,
2686 const struct in6_addr *saddr)
901731b8 2687{
b748f260
DA
2688 const struct fib6_nh *nh = res->nh;
2689 struct fib6_info *f6i = res->f6i;
901731b8 2690 struct inet6_dev *idev;
510e2ced 2691 struct rt6_info *rt;
901731b8
DA
2692 u32 mtu = 0;
2693
2694 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2695 mtu = f6i->fib6_pmtu;
2696 if (mtu)
2697 goto out;
2698 }
2699
510e2ced
WW
2700 rt = rt6_find_cached_rt(res, daddr, saddr);
2701 if (unlikely(rt)) {
2702 mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
2703 } else {
b748f260 2704 struct net_device *dev = nh->fib_nh_dev;
901731b8
DA
2705
2706 mtu = IPV6_MIN_MTU;
2707 idev = __in6_dev_get(dev);
2708 if (idev && idev->cnf.mtu6 > mtu)
2709 mtu = idev->cnf.mtu6;
2710 }
2711
2712 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2713out:
b748f260 2714 return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
901731b8
DA
2715}
2716
3b00944c 2717struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2718 struct flowi6 *fl6)
1da177e4 2719{
87a11578 2720 struct dst_entry *dst;
1da177e4
LT
2721 struct rt6_info *rt;
2722 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2723 struct net *net = dev_net(dev);
1da177e4 2724
38308473 2725 if (unlikely(!idev))
122bdf67 2726 return ERR_PTR(-ENODEV);
1da177e4 2727
ad706862 2728 rt = ip6_dst_alloc(net, dev, 0);
38308473 2729 if (unlikely(!rt)) {
1da177e4 2730 in6_dev_put(idev);
87a11578 2731 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2732 goto out;
2733 }
2734
8e2ec639 2735 rt->dst.flags |= DST_HOST;
588753f1 2736 rt->dst.input = ip6_input;
8e2ec639 2737 rt->dst.output = ip6_output;
550bab42 2738 rt->rt6i_gateway = fl6->daddr;
87a11578 2739 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2740 rt->rt6i_dst.plen = 128;
2741 rt->rt6i_idev = idev;
14edd87d 2742 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2743
4c981e28 2744 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2745 * do proper release of the net_device
2746 */
2747 rt6_uncached_list_add(rt);
81eb8447 2748 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2749
87a11578
DM
2750 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2751
1da177e4 2752out:
87a11578 2753 return dst;
1da177e4
LT
2754}
2755
569d3645 2756static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2757{
86393e52 2758 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2759 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2760 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2761 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2762 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2763 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2764 int entries;
7019b78e 2765
fc66f95c 2766 entries = dst_entries_get_fast(ops);
49a18d86 2767 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2768 entries <= rt_max_size)
1da177e4
LT
2769 goto out;
2770
6891a346 2771 net->ipv6.ip6_rt_gc_expire++;
14956643 2772 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2773 entries = dst_entries_get_slow(ops);
2774 if (entries < ops->gc_thresh)
7019b78e 2775 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2776out:
7019b78e 2777 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2778 return entries > rt_max_size;
1da177e4
LT
2779}
2780
8c14586f
DA
2781static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2782 struct fib6_config *cfg,
f4797b33
DA
2783 const struct in6_addr *gw_addr,
2784 u32 tbid, int flags)
8c14586f
DA
2785{
2786 struct flowi6 fl6 = {
2787 .flowi6_oif = cfg->fc_ifindex,
2788 .daddr = *gw_addr,
2789 .saddr = cfg->fc_prefsrc,
2790 };
2791 struct fib6_table *table;
2792 struct rt6_info *rt;
8c14586f 2793
f4797b33 2794 table = fib6_get_table(net, tbid);
8c14586f
DA
2795 if (!table)
2796 return NULL;
2797
2798 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2799 flags |= RT6_LOOKUP_F_HAS_SADDR;
2800
f4797b33 2801 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2802 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2803
2804 /* if table lookup failed, fall back to full lookup */
2805 if (rt == net->ipv6.ip6_null_entry) {
2806 ip6_rt_put(rt);
2807 rt = NULL;
2808 }
2809
2810 return rt;
2811}
2812
fc1e64e1
DA
2813static int ip6_route_check_nh_onlink(struct net *net,
2814 struct fib6_config *cfg,
9fbb704c 2815 const struct net_device *dev,
fc1e64e1
DA
2816 struct netlink_ext_ack *extack)
2817{
44750f84 2818 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2819 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2820 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
bf1dc8ba 2821 struct fib6_info *from;
fc1e64e1
DA
2822 struct rt6_info *grt;
2823 int err;
2824
2825 err = 0;
2826 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2827 if (grt) {
bf1dc8ba
PA
2828 rcu_read_lock();
2829 from = rcu_dereference(grt->from);
58e354c0 2830 if (!grt->dst.error &&
4ed591c8 2831 /* ignore match if it is the default route */
bf1dc8ba 2832 from && !ipv6_addr_any(&from->fib6_dst.addr) &&
58e354c0 2833 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2834 NL_SET_ERR_MSG(extack,
2835 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2836 err = -EINVAL;
2837 }
bf1dc8ba 2838 rcu_read_unlock();
fc1e64e1
DA
2839
2840 ip6_rt_put(grt);
2841 }
2842
2843 return err;
2844}
2845
1edce99f
DA
2846static int ip6_route_check_nh(struct net *net,
2847 struct fib6_config *cfg,
2848 struct net_device **_dev,
2849 struct inet6_dev **idev)
2850{
2851 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2852 struct net_device *dev = _dev ? *_dev : NULL;
2853 struct rt6_info *grt = NULL;
2854 int err = -EHOSTUNREACH;
2855
2856 if (cfg->fc_table) {
f4797b33
DA
2857 int flags = RT6_LOOKUP_F_IFACE;
2858
2859 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2860 cfg->fc_table, flags);
1edce99f
DA
2861 if (grt) {
2862 if (grt->rt6i_flags & RTF_GATEWAY ||
2863 (dev && dev != grt->dst.dev)) {
2864 ip6_rt_put(grt);
2865 grt = NULL;
2866 }
2867 }
2868 }
2869
2870 if (!grt)
b75cc8f9 2871 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2872
2873 if (!grt)
2874 goto out;
2875
2876 if (dev) {
2877 if (dev != grt->dst.dev) {
2878 ip6_rt_put(grt);
2879 goto out;
2880 }
2881 } else {
2882 *_dev = dev = grt->dst.dev;
2883 *idev = grt->rt6i_idev;
2884 dev_hold(dev);
2885 in6_dev_hold(grt->rt6i_idev);
2886 }
2887
2888 if (!(grt->rt6i_flags & RTF_GATEWAY))
2889 err = 0;
2890
2891 ip6_rt_put(grt);
2892
2893out:
2894 return err;
2895}
2896
9fbb704c
DA
2897static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2898 struct net_device **_dev, struct inet6_dev **idev,
2899 struct netlink_ext_ack *extack)
2900{
2901 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2902 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2903 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2904 const struct net_device *dev = *_dev;
232378e8 2905 bool need_addr_check = !dev;
9fbb704c
DA
2906 int err = -EINVAL;
2907
2908 /* if gw_addr is local we will fail to detect this in case
2909 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2910 * will return already-added prefix route via interface that
2911 * prefix route was assigned to, which might be non-loopback.
2912 */
232378e8
DA
2913 if (dev &&
2914 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2915 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2916 goto out;
2917 }
2918
2919 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2920 /* IPv6 strictly inhibits using not link-local
2921 * addresses as nexthop address.
2922 * Otherwise, router will not able to send redirects.
2923 * It is very good, but in some (rare!) circumstances
2924 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2925 * some exceptions. --ANK
2926 * We allow IPv4-mapped nexthops to support RFC4798-type
2927 * addressing
2928 */
2929 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2930 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2931 goto out;
2932 }
2933
2934 if (cfg->fc_flags & RTNH_F_ONLINK)
2935 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2936 else
2937 err = ip6_route_check_nh(net, cfg, _dev, idev);
2938
2939 if (err)
2940 goto out;
2941 }
2942
2943 /* reload in case device was changed */
2944 dev = *_dev;
2945
2946 err = -EINVAL;
2947 if (!dev) {
2948 NL_SET_ERR_MSG(extack, "Egress device not specified");
2949 goto out;
2950 } else if (dev->flags & IFF_LOOPBACK) {
2951 NL_SET_ERR_MSG(extack,
2952 "Egress device can not be loopback device for this route");
2953 goto out;
2954 }
232378e8
DA
2955
2956 /* if we did not check gw_addr above, do so now that the
2957 * egress device has been resolved.
2958 */
2959 if (need_addr_check &&
2960 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2961 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2962 goto out;
2963 }
2964
9fbb704c
DA
2965 err = 0;
2966out:
2967 return err;
2968}
2969
83c44251
DA
2970static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
2971{
2972 if ((flags & RTF_REJECT) ||
2973 (dev && (dev->flags & IFF_LOOPBACK) &&
2974 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2975 !(flags & RTF_LOCAL)))
2976 return true;
2977
2978 return false;
2979}
2980
2981int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
2982 struct fib6_config *cfg, gfp_t gfp_flags,
2983 struct netlink_ext_ack *extack)
2984{
2985 struct net_device *dev = NULL;
2986 struct inet6_dev *idev = NULL;
2987 int addr_type;
2988 int err;
2989
f1741730
DA
2990 fib6_nh->fib_nh_family = AF_INET6;
2991
83c44251
DA
2992 err = -ENODEV;
2993 if (cfg->fc_ifindex) {
2994 dev = dev_get_by_index(net, cfg->fc_ifindex);
2995 if (!dev)
2996 goto out;
2997 idev = in6_dev_get(dev);
2998 if (!idev)
2999 goto out;
3000 }
3001
3002 if (cfg->fc_flags & RTNH_F_ONLINK) {
3003 if (!dev) {
3004 NL_SET_ERR_MSG(extack,
3005 "Nexthop device required for onlink");
3006 goto out;
3007 }
3008
3009 if (!(dev->flags & IFF_UP)) {
3010 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3011 err = -ENETDOWN;
3012 goto out;
3013 }
3014
ad1601ae 3015 fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
83c44251
DA
3016 }
3017
ad1601ae 3018 fib6_nh->fib_nh_weight = 1;
83c44251
DA
3019
3020 /* We cannot add true routes via loopback here,
3021 * they would result in kernel looping; promote them to reject routes
3022 */
3023 addr_type = ipv6_addr_type(&cfg->fc_dst);
3024 if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
3025 /* hold loopback dev/idev if we haven't done so. */
3026 if (dev != net->loopback_dev) {
3027 if (dev) {
3028 dev_put(dev);
3029 in6_dev_put(idev);
3030 }
3031 dev = net->loopback_dev;
3032 dev_hold(dev);
3033 idev = in6_dev_get(dev);
3034 if (!idev) {
3035 err = -ENODEV;
3036 goto out;
3037 }
3038 }
3039 goto set_dev;
3040 }
3041
3042 if (cfg->fc_flags & RTF_GATEWAY) {
3043 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3044 if (err)
3045 goto out;
3046
ad1601ae 3047 fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
bdf00467 3048 fib6_nh->fib_nh_gw_family = AF_INET6;
83c44251
DA
3049 }
3050
3051 err = -ENODEV;
3052 if (!dev)
3053 goto out;
3054
3055 if (idev->cnf.disable_ipv6) {
3056 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3057 err = -EACCES;
3058 goto out;
3059 }
3060
3061 if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
3062 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3063 err = -ENETDOWN;
3064 goto out;
3065 }
3066
3067 if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3068 !netif_carrier_ok(dev))
ad1601ae 3069 fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
83c44251 3070
979e276e
DA
3071 err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
3072 cfg->fc_encap_type, cfg, gfp_flags, extack);
3073 if (err)
3074 goto out;
83c44251 3075set_dev:
ad1601ae 3076 fib6_nh->fib_nh_dev = dev;
f1741730 3077 fib6_nh->fib_nh_oif = dev->ifindex;
83c44251
DA
3078 err = 0;
3079out:
3080 if (idev)
3081 in6_dev_put(idev);
3082
3083 if (err) {
ad1601ae
DA
3084 lwtstate_put(fib6_nh->fib_nh_lws);
3085 fib6_nh->fib_nh_lws = NULL;
83c44251
DA
3086 if (dev)
3087 dev_put(dev);
3088 }
3089
3090 return err;
3091}
3092
dac7d0f2
DA
3093void fib6_nh_release(struct fib6_nh *fib6_nh)
3094{
979e276e 3095 fib_nh_common_release(&fib6_nh->nh_common);
dac7d0f2
DA
3096}
3097
8d1c802b 3098static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 3099 gfp_t gfp_flags,
333c4301 3100 struct netlink_ext_ack *extack)
1da177e4 3101{
5578689a 3102 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 3103 struct fib6_info *rt = NULL;
c71099ac 3104 struct fib6_table *table;
8c5b83f0 3105 int err = -EINVAL;
83c44251 3106 int addr_type;
1da177e4 3107
557c44be 3108 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
3109 if (cfg->fc_flags & RTF_PCPU) {
3110 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 3111 goto out;
d5d531cb 3112 }
557c44be 3113
2ea2352e
WW
3114 /* RTF_CACHE is an internal flag; can not be set by userspace */
3115 if (cfg->fc_flags & RTF_CACHE) {
3116 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3117 goto out;
3118 }
3119
e8478e80
DA
3120 if (cfg->fc_type > RTN_MAX) {
3121 NL_SET_ERR_MSG(extack, "Invalid route type");
3122 goto out;
3123 }
3124
d5d531cb
DA
3125 if (cfg->fc_dst_len > 128) {
3126 NL_SET_ERR_MSG(extack, "Invalid prefix length");
3127 goto out;
3128 }
3129 if (cfg->fc_src_len > 128) {
3130 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 3131 goto out;
d5d531cb 3132 }
1da177e4 3133#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
3134 if (cfg->fc_src_len) {
3135 NL_SET_ERR_MSG(extack,
3136 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 3137 goto out;
d5d531cb 3138 }
1da177e4 3139#endif
fc1e64e1 3140
d71314b4 3141 err = -ENOBUFS;
38308473
DM
3142 if (cfg->fc_nlinfo.nlh &&
3143 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 3144 table = fib6_get_table(net, cfg->fc_table);
38308473 3145 if (!table) {
f3213831 3146 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
3147 table = fib6_new_table(net, cfg->fc_table);
3148 }
3149 } else {
3150 table = fib6_new_table(net, cfg->fc_table);
3151 }
38308473
DM
3152
3153 if (!table)
c71099ac 3154 goto out;
c71099ac 3155
93531c67
DA
3156 err = -ENOMEM;
3157 rt = fib6_info_alloc(gfp_flags);
3158 if (!rt)
1da177e4 3159 goto out;
93531c67 3160
d7e774f3
DA
3161 rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3162 extack);
767a2217
DA
3163 if (IS_ERR(rt->fib6_metrics)) {
3164 err = PTR_ERR(rt->fib6_metrics);
fda21d46
ED
3165 /* Do not leave garbage there. */
3166 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
767a2217
DA
3167 goto out;
3168 }
3169
93531c67
DA
3170 if (cfg->fc_flags & RTF_ADDRCONF)
3171 rt->dst_nocount = true;
1da177e4 3172
1716a961 3173 if (cfg->fc_flags & RTF_EXPIRES)
14895687 3174 fib6_set_expires(rt, jiffies +
1716a961
G
3175 clock_t_to_jiffies(cfg->fc_expires));
3176 else
14895687 3177 fib6_clean_expires(rt);
1da177e4 3178
86872cb5
TG
3179 if (cfg->fc_protocol == RTPROT_UNSPEC)
3180 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 3181 rt->fib6_protocol = cfg->fc_protocol;
86872cb5 3182
83c44251
DA
3183 rt->fib6_table = table;
3184 rt->fib6_metric = cfg->fc_metric;
3185 rt->fib6_type = cfg->fc_type;
2b2450ca 3186 rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
19e42e45 3187
93c2fb25
DA
3188 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3189 rt->fib6_dst.plen = cfg->fc_dst_len;
3190 if (rt->fib6_dst.plen == 128)
3b6761d1 3191 rt->dst_host = true;
e5fd387a 3192
1da177e4 3193#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
3194 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3195 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4 3196#endif
83c44251
DA
3197 err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
3198 if (err)
3199 goto out;
1da177e4
LT
3200
3201 /* We cannot add true routes via loopback here,
83c44251 3202 * they would result in kernel looping; promote them to reject routes
1da177e4 3203 */
83c44251 3204 addr_type = ipv6_addr_type(&cfg->fc_dst);
ad1601ae 3205 if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
83c44251 3206 rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
955ec4cb 3207
c3968a85 3208 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
83c44251
DA
3209 struct net_device *dev = fib6_info_nh_dev(rt);
3210
c3968a85 3211 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3212 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3213 err = -EINVAL;
3214 goto out;
3215 }
93c2fb25
DA
3216 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3217 rt->fib6_prefsrc.plen = 128;
c3968a85 3218 } else
93c2fb25 3219 rt->fib6_prefsrc.plen = 0;
c3968a85 3220
8c5b83f0 3221 return rt;
6b9ea5a6 3222out:
93531c67 3223 fib6_info_release(rt);
8c5b83f0 3224 return ERR_PTR(err);
6b9ea5a6
RP
3225}
3226
acb54e3c 3227int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
333c4301 3228 struct netlink_ext_ack *extack)
6b9ea5a6 3229{
8d1c802b 3230 struct fib6_info *rt;
6b9ea5a6
RP
3231 int err;
3232
acb54e3c 3233 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3234 if (IS_ERR(rt))
3235 return PTR_ERR(rt);
6b9ea5a6 3236
d4ead6b3 3237 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3238 fib6_info_release(rt);
6b9ea5a6 3239
1da177e4
LT
3240 return err;
3241}
3242
8d1c802b 3243static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3244{
afb1d4b5 3245 struct net *net = info->nl_net;
c71099ac 3246 struct fib6_table *table;
afb1d4b5 3247 int err;
1da177e4 3248
421842ed 3249 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3250 err = -ENOENT;
3251 goto out;
3252 }
6c813a72 3253
93c2fb25 3254 table = rt->fib6_table;
66f5d6ce 3255 spin_lock_bh(&table->tb6_lock);
86872cb5 3256 err = fib6_del(rt, info);
66f5d6ce 3257 spin_unlock_bh(&table->tb6_lock);
1da177e4 3258
6825a26c 3259out:
93531c67 3260 fib6_info_release(rt);
1da177e4
LT
3261 return err;
3262}
3263
8d1c802b 3264int ip6_del_rt(struct net *net, struct fib6_info *rt)
e0a1ad73 3265{
afb1d4b5
DA
3266 struct nl_info info = { .nl_net = net };
3267
528c4ceb 3268 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3269}
3270
8d1c802b 3271static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3272{
3273 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3274 struct net *net = info->nl_net;
16a16cd3 3275 struct sk_buff *skb = NULL;
0ae81335 3276 struct fib6_table *table;
e3330039 3277 int err = -ENOENT;
0ae81335 3278
421842ed 3279 if (rt == net->ipv6.fib6_null_entry)
e3330039 3280 goto out_put;
93c2fb25 3281 table = rt->fib6_table;
66f5d6ce 3282 spin_lock_bh(&table->tb6_lock);
0ae81335 3283
93c2fb25 3284 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3285 struct fib6_info *sibling, *next_sibling;
0ae81335 3286
16a16cd3
DA
3287 /* prefer to send a single notification with all hops */
3288 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3289 if (skb) {
3290 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3291
d4ead6b3 3292 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3293 NULL, NULL, 0, RTM_DELROUTE,
3294 info->portid, seq, 0) < 0) {
3295 kfree_skb(skb);
3296 skb = NULL;
3297 } else
3298 info->skip_notify = 1;
3299 }
3300
0ae81335 3301 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3302 &rt->fib6_siblings,
3303 fib6_siblings) {
0ae81335
DA
3304 err = fib6_del(sibling, info);
3305 if (err)
e3330039 3306 goto out_unlock;
0ae81335
DA
3307 }
3308 }
3309
3310 err = fib6_del(rt, info);
e3330039 3311out_unlock:
66f5d6ce 3312 spin_unlock_bh(&table->tb6_lock);
e3330039 3313out_put:
93531c67 3314 fib6_info_release(rt);
16a16cd3
DA
3315
3316 if (skb) {
e3330039 3317 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3318 info->nlh, gfp_any());
3319 }
0ae81335
DA
3320 return err;
3321}
3322
23fb93a4
DA
3323static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3324{
3325 int rc = -ESRCH;
3326
3327 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3328 goto out;
3329
3330 if (cfg->fc_flags & RTF_GATEWAY &&
3331 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3332 goto out;
761f6026
XL
3333
3334 rc = rt6_remove_exception_rt(rt);
23fb93a4
DA
3335out:
3336 return rc;
3337}
3338
333c4301
DA
3339static int ip6_route_del(struct fib6_config *cfg,
3340 struct netlink_ext_ack *extack)
1da177e4 3341{
8d1c802b 3342 struct rt6_info *rt_cache;
c71099ac 3343 struct fib6_table *table;
8d1c802b 3344 struct fib6_info *rt;
1da177e4 3345 struct fib6_node *fn;
1da177e4
LT
3346 int err = -ESRCH;
3347
5578689a 3348 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3349 if (!table) {
3350 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3351 return err;
d5d531cb 3352 }
c71099ac 3353
66f5d6ce 3354 rcu_read_lock();
1da177e4 3355
c71099ac 3356 fn = fib6_locate(&table->tb6_root,
86872cb5 3357 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3358 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3359 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3360
1da177e4 3361 if (fn) {
66f5d6ce 3362 for_each_fib6_node_rt_rcu(fn) {
ad1601ae
DA
3363 struct fib6_nh *nh;
3364
2b760fcf 3365 if (cfg->fc_flags & RTF_CACHE) {
7e4b5128
DA
3366 struct fib6_result res = {
3367 .f6i = rt,
3368 };
23fb93a4
DA
3369 int rc;
3370
7e4b5128
DA
3371 rt_cache = rt6_find_cached_rt(&res,
3372 &cfg->fc_dst,
2b760fcf 3373 &cfg->fc_src);
23fb93a4
DA
3374 if (rt_cache) {
3375 rc = ip6_del_cached_rt(rt_cache, cfg);
9e575010
ED
3376 if (rc != -ESRCH) {
3377 rcu_read_unlock();
23fb93a4 3378 return rc;
9e575010 3379 }
23fb93a4
DA
3380 }
3381 continue;
2b760fcf 3382 }
ad1601ae
DA
3383
3384 nh = &rt->fib6_nh;
86872cb5 3385 if (cfg->fc_ifindex &&
ad1601ae
DA
3386 (!nh->fib_nh_dev ||
3387 nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3388 continue;
86872cb5 3389 if (cfg->fc_flags & RTF_GATEWAY &&
ad1601ae 3390 !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
1da177e4 3391 continue;
93c2fb25 3392 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
1da177e4 3393 continue;
93c2fb25 3394 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
c2ed1880 3395 continue;
e873e4b9
WW
3396 if (!fib6_info_hold_safe(rt))
3397 continue;
66f5d6ce 3398 rcu_read_unlock();
1da177e4 3399
0ae81335
DA
3400 /* if gateway was specified only delete the one hop */
3401 if (cfg->fc_flags & RTF_GATEWAY)
3402 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3403
3404 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3405 }
3406 }
66f5d6ce 3407 rcu_read_unlock();
1da177e4
LT
3408
3409 return err;
3410}
3411
6700c270 3412static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3413{
a6279458 3414 struct netevent_redirect netevent;
e8599ff4 3415 struct rt6_info *rt, *nrt = NULL;
85bd05de 3416 struct fib6_result res = {};
e8599ff4
DM
3417 struct ndisc_options ndopts;
3418 struct inet6_dev *in6_dev;
3419 struct neighbour *neigh;
71bcdba0 3420 struct rd_msg *msg;
6e157b6a
DM
3421 int optlen, on_link;
3422 u8 *lladdr;
e8599ff4 3423
29a3cad5 3424 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3425 optlen -= sizeof(*msg);
e8599ff4
DM
3426
3427 if (optlen < 0) {
6e157b6a 3428 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3429 return;
3430 }
3431
71bcdba0 3432 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3433
71bcdba0 3434 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3435 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3436 return;
3437 }
3438
6e157b6a 3439 on_link = 0;
71bcdba0 3440 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3441 on_link = 1;
71bcdba0 3442 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3443 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3444 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3445 return;
3446 }
3447
3448 in6_dev = __in6_dev_get(skb->dev);
3449 if (!in6_dev)
3450 return;
3451 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3452 return;
3453
3454 /* RFC2461 8.1:
3455 * The IP source address of the Redirect MUST be the same as the current
3456 * first-hop router for the specified ICMP Destination Address.
3457 */
3458
f997c55c 3459 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3460 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3461 return;
3462 }
6e157b6a
DM
3463
3464 lladdr = NULL;
e8599ff4
DM
3465 if (ndopts.nd_opts_tgt_lladdr) {
3466 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3467 skb->dev);
3468 if (!lladdr) {
3469 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3470 return;
3471 }
3472 }
3473
6e157b6a 3474 rt = (struct rt6_info *) dst;
ec13ad1d 3475 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3476 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3477 return;
6e157b6a 3478 }
e8599ff4 3479
6e157b6a
DM
3480 /* Redirect received -> path was valid.
3481 * Look, redirects are sent only in response to data packets,
3482 * so that this nexthop apparently is reachable. --ANK
3483 */
0dec879f 3484 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3485
71bcdba0 3486 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3487 if (!neigh)
3488 return;
a6279458 3489
1da177e4
LT
3490 /*
3491 * We have finally decided to accept it.
3492 */
3493
f997c55c 3494 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3495 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3496 NEIGH_UPDATE_F_OVERRIDE|
3497 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3498 NEIGH_UPDATE_F_ISROUTER)),
3499 NDISC_REDIRECT, &ndopts);
1da177e4 3500
4d85cd0c 3501 rcu_read_lock();
85bd05de 3502 res.f6i = rcu_dereference(rt->from);
ff24e498 3503 if (!res.f6i)
886b7a50 3504 goto out;
8a14e46f 3505
85bd05de 3506 res.nh = &res.f6i->fib6_nh;
7d21fec9
DA
3507 res.fib6_flags = res.f6i->fib6_flags;
3508 res.fib6_type = res.f6i->fib6_type;
85bd05de 3509 nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
38308473 3510 if (!nrt)
1da177e4
LT
3511 goto out;
3512
3513 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3514 if (on_link)
3515 nrt->rt6i_flags &= ~RTF_GATEWAY;
3516
4e3fd7a0 3517 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3518
886b7a50 3519 /* rt6_insert_exception() will take care of duplicated exceptions */
5012f0a5 3520 if (rt6_insert_exception(nrt, &res)) {
2b760fcf
WW
3521 dst_release_immediate(&nrt->dst);
3522 goto out;
3523 }
1da177e4 3524
d8d1f30b
CG
3525 netevent.old = &rt->dst;
3526 netevent.new = &nrt->dst;
71bcdba0 3527 netevent.daddr = &msg->dest;
60592833 3528 netevent.neigh = neigh;
8d71740c
TT
3529 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3530
1da177e4 3531out:
886b7a50 3532 rcu_read_unlock();
e8599ff4 3533 neigh_release(neigh);
6e157b6a
DM
3534}
3535
70ceb4f5 3536#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 3537static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 3538 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3539 const struct in6_addr *gwaddr,
3540 struct net_device *dev)
70ceb4f5 3541{
830218c1
DA
3542 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3543 int ifindex = dev->ifindex;
70ceb4f5 3544 struct fib6_node *fn;
8d1c802b 3545 struct fib6_info *rt = NULL;
c71099ac
TG
3546 struct fib6_table *table;
3547
830218c1 3548 table = fib6_get_table(net, tb_id);
38308473 3549 if (!table)
c71099ac 3550 return NULL;
70ceb4f5 3551
66f5d6ce 3552 rcu_read_lock();
38fbeeee 3553 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3554 if (!fn)
3555 goto out;
3556
66f5d6ce 3557 for_each_fib6_node_rt_rcu(fn) {
ad1601ae 3558 if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
70ceb4f5 3559 continue;
2b2450ca 3560 if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
bdf00467 3561 !rt->fib6_nh.fib_nh_gw_family)
70ceb4f5 3562 continue;
ad1601ae 3563 if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
70ceb4f5 3564 continue;
e873e4b9
WW
3565 if (!fib6_info_hold_safe(rt))
3566 continue;
70ceb4f5
YH
3567 break;
3568 }
3569out:
66f5d6ce 3570 rcu_read_unlock();
70ceb4f5
YH
3571 return rt;
3572}
3573
8d1c802b 3574static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 3575 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3576 const struct in6_addr *gwaddr,
3577 struct net_device *dev,
95c96174 3578 unsigned int pref)
70ceb4f5 3579{
86872cb5 3580 struct fib6_config cfg = {
238fc7ea 3581 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3582 .fc_ifindex = dev->ifindex,
86872cb5
TG
3583 .fc_dst_len = prefixlen,
3584 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3585 RTF_UP | RTF_PREF(pref),
b91d5329 3586 .fc_protocol = RTPROT_RA,
e8478e80 3587 .fc_type = RTN_UNICAST,
15e47304 3588 .fc_nlinfo.portid = 0,
efa2cea0
DL
3589 .fc_nlinfo.nlh = NULL,
3590 .fc_nlinfo.nl_net = net,
86872cb5
TG
3591 };
3592
830218c1 3593 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3594 cfg.fc_dst = *prefix;
3595 cfg.fc_gateway = *gwaddr;
70ceb4f5 3596
e317da96
YH
3597 /* We should treat it as a default route if prefix length is 0. */
3598 if (!prefixlen)
86872cb5 3599 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3600
acb54e3c 3601 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3602
830218c1 3603 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3604}
3605#endif
3606
8d1c802b 3607struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
3608 const struct in6_addr *addr,
3609 struct net_device *dev)
1ab1457c 3610{
830218c1 3611 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 3612 struct fib6_info *rt;
c71099ac 3613 struct fib6_table *table;
1da177e4 3614
afb1d4b5 3615 table = fib6_get_table(net, tb_id);
38308473 3616 if (!table)
c71099ac 3617 return NULL;
1da177e4 3618
66f5d6ce
WW
3619 rcu_read_lock();
3620 for_each_fib6_node_rt_rcu(&table->tb6_root) {
ad1601ae
DA
3621 struct fib6_nh *nh = &rt->fib6_nh;
3622
3623 if (dev == nh->fib_nh_dev &&
93c2fb25 3624 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ad1601ae 3625 ipv6_addr_equal(&nh->fib_nh_gw6, addr))
1da177e4
LT
3626 break;
3627 }
e873e4b9
WW
3628 if (rt && !fib6_info_hold_safe(rt))
3629 rt = NULL;
66f5d6ce 3630 rcu_read_unlock();
1da177e4
LT
3631 return rt;
3632}
3633
8d1c802b 3634struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 3635 const struct in6_addr *gwaddr,
ebacaaa0
YH
3636 struct net_device *dev,
3637 unsigned int pref)
1da177e4 3638{
86872cb5 3639 struct fib6_config cfg = {
ca254490 3640 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3641 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3642 .fc_ifindex = dev->ifindex,
3643 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3644 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3645 .fc_protocol = RTPROT_RA,
e8478e80 3646 .fc_type = RTN_UNICAST,
15e47304 3647 .fc_nlinfo.portid = 0,
5578689a 3648 .fc_nlinfo.nlh = NULL,
afb1d4b5 3649 .fc_nlinfo.nl_net = net,
86872cb5 3650 };
1da177e4 3651
4e3fd7a0 3652 cfg.fc_gateway = *gwaddr;
1da177e4 3653
acb54e3c 3654 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3655 struct fib6_table *table;
3656
3657 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3658 if (table)
3659 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3660 }
1da177e4 3661
afb1d4b5 3662 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3663}
3664
afb1d4b5
DA
3665static void __rt6_purge_dflt_routers(struct net *net,
3666 struct fib6_table *table)
1da177e4 3667{
8d1c802b 3668 struct fib6_info *rt;
1da177e4
LT
3669
3670restart:
66f5d6ce
WW
3671 rcu_read_lock();
3672 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
3673 struct net_device *dev = fib6_info_nh_dev(rt);
3674 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3675
93c2fb25 3676 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
e873e4b9
WW
3677 (!idev || idev->cnf.accept_ra != 2) &&
3678 fib6_info_hold_safe(rt)) {
93531c67
DA
3679 rcu_read_unlock();
3680 ip6_del_rt(net, rt);
1da177e4
LT
3681 goto restart;
3682 }
3683 }
66f5d6ce 3684 rcu_read_unlock();
830218c1
DA
3685
3686 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3687}
3688
3689void rt6_purge_dflt_routers(struct net *net)
3690{
3691 struct fib6_table *table;
3692 struct hlist_head *head;
3693 unsigned int h;
3694
3695 rcu_read_lock();
3696
3697 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3698 head = &net->ipv6.fib_table_hash[h];
3699 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3700 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3701 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3702 }
3703 }
3704
3705 rcu_read_unlock();
1da177e4
LT
3706}
3707
5578689a
DL
3708static void rtmsg_to_fib6_config(struct net *net,
3709 struct in6_rtmsg *rtmsg,
86872cb5
TG
3710 struct fib6_config *cfg)
3711{
8823a3ac
3712 *cfg = (struct fib6_config){
3713 .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3714 : RT6_TABLE_MAIN,
3715 .fc_ifindex = rtmsg->rtmsg_ifindex,
67f69513 3716 .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
8823a3ac
3717 .fc_expires = rtmsg->rtmsg_info,
3718 .fc_dst_len = rtmsg->rtmsg_dst_len,
3719 .fc_src_len = rtmsg->rtmsg_src_len,
3720 .fc_flags = rtmsg->rtmsg_flags,
3721 .fc_type = rtmsg->rtmsg_type,
3722
3723 .fc_nlinfo.nl_net = net,
3724
3725 .fc_dst = rtmsg->rtmsg_dst,
3726 .fc_src = rtmsg->rtmsg_src,
3727 .fc_gateway = rtmsg->rtmsg_gateway,
3728 };
86872cb5
TG
3729}
3730
5578689a 3731int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3732{
86872cb5 3733 struct fib6_config cfg;
1da177e4
LT
3734 struct in6_rtmsg rtmsg;
3735 int err;
3736
67ba4152 3737 switch (cmd) {
1da177e4
LT
3738 case SIOCADDRT: /* Add a route */
3739 case SIOCDELRT: /* Delete a route */
af31f412 3740 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3741 return -EPERM;
3742 err = copy_from_user(&rtmsg, arg,
3743 sizeof(struct in6_rtmsg));
3744 if (err)
3745 return -EFAULT;
86872cb5 3746
5578689a 3747 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3748
1da177e4
LT
3749 rtnl_lock();
3750 switch (cmd) {
3751 case SIOCADDRT:
acb54e3c 3752 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3753 break;
3754 case SIOCDELRT:
333c4301 3755 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3756 break;
3757 default:
3758 err = -EINVAL;
3759 }
3760 rtnl_unlock();
3761
3762 return err;
3ff50b79 3763 }
1da177e4
LT
3764
3765 return -EINVAL;
3766}
3767
3768/*
3769 * Drop the packet on the floor
3770 */
3771
d5fdd6ba 3772static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3773{
adf30907 3774 struct dst_entry *dst = skb_dst(skb);
1d3fd8a1
SS
3775 struct net *net = dev_net(dst->dev);
3776 struct inet6_dev *idev;
3777 int type;
3778
3779 if (netif_is_l3_master(skb->dev) &&
3780 dst->dev == net->loopback_dev)
3781 idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
3782 else
3783 idev = ip6_dst_idev(dst);
3784
612f09e8
YH
3785 switch (ipstats_mib_noroutes) {
3786 case IPSTATS_MIB_INNOROUTES:
0660e03f 3787 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3788 if (type == IPV6_ADDR_ANY) {
1d3fd8a1 3789 IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3790 break;
3791 }
3792 /* FALLTHROUGH */
3793 case IPSTATS_MIB_OUTNOROUTES:
1d3fd8a1 3794 IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
612f09e8
YH
3795 break;
3796 }
1d3fd8a1
SS
3797
3798 /* Start over by dropping the dst for l3mdev case */
3799 if (netif_is_l3_master(skb->dev))
3800 skb_dst_drop(skb);
3801
3ffe533c 3802 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3803 kfree_skb(skb);
3804 return 0;
3805}
3806
9ce8ade0
TG
3807static int ip6_pkt_discard(struct sk_buff *skb)
3808{
612f09e8 3809 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3810}
3811
ede2059d 3812static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3813{
adf30907 3814 skb->dev = skb_dst(skb)->dev;
612f09e8 3815 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3816}
3817
9ce8ade0
TG
3818static int ip6_pkt_prohibit(struct sk_buff *skb)
3819{
612f09e8 3820 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3821}
3822
ede2059d 3823static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3824{
adf30907 3825 skb->dev = skb_dst(skb)->dev;
612f09e8 3826 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3827}
3828
1da177e4
LT
3829/*
3830 * Allocate a dst for local (unicast / anycast) address.
3831 */
3832
360a9887
DA
3833struct fib6_info *addrconf_f6i_alloc(struct net *net,
3834 struct inet6_dev *idev,
3835 const struct in6_addr *addr,
3836 bool anycast, gfp_t gfp_flags)
1da177e4 3837{
c7a1ce39
DA
3838 struct fib6_config cfg = {
3839 .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
3840 .fc_ifindex = idev->dev->ifindex,
3841 .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
3842 .fc_dst = *addr,
3843 .fc_dst_len = 128,
3844 .fc_protocol = RTPROT_KERNEL,
3845 .fc_nlinfo.nl_net = net,
3846 .fc_ignore_dev_down = true,
3847 };
1da177e4 3848
e8478e80 3849 if (anycast) {
c7a1ce39
DA
3850 cfg.fc_type = RTN_ANYCAST;
3851 cfg.fc_flags |= RTF_ANYCAST;
e8478e80 3852 } else {
c7a1ce39
DA
3853 cfg.fc_type = RTN_LOCAL;
3854 cfg.fc_flags |= RTF_LOCAL;
e8478e80 3855 }
1da177e4 3856
c7a1ce39 3857 return ip6_route_info_create(&cfg, gfp_flags, NULL);
1da177e4
LT
3858}
3859
c3968a85
DW
3860/* remove deleted ip from prefsrc entries */
3861struct arg_dev_net_ip {
3862 struct net_device *dev;
3863 struct net *net;
3864 struct in6_addr *addr;
3865};
3866
8d1c802b 3867static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85
DW
3868{
3869 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3870 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3871 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3872
ad1601ae 3873 if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
421842ed 3874 rt != net->ipv6.fib6_null_entry &&
93c2fb25 3875 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
60006a48 3876 spin_lock_bh(&rt6_exception_lock);
c3968a85 3877 /* remove prefsrc entry */
93c2fb25 3878 rt->fib6_prefsrc.plen = 0;
60006a48 3879 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3880 }
3881 return 0;
3882}
3883
3884void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3885{
3886 struct net *net = dev_net(ifp->idev->dev);
3887 struct arg_dev_net_ip adni = {
3888 .dev = ifp->idev->dev,
3889 .net = net,
3890 .addr = &ifp->addr,
3891 };
0c3584d5 3892 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3893}
3894
2b2450ca 3895#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT)
be7a010d
DJ
3896
3897/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 3898static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
3899{
3900 struct in6_addr *gateway = (struct in6_addr *)arg;
3901
93c2fb25 3902 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
bdf00467 3903 rt->fib6_nh.fib_nh_gw_family &&
ad1601ae 3904 ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
be7a010d
DJ
3905 return -1;
3906 }
b16cb459
WW
3907
3908 /* Further clean up cached routes in exception table.
3909 * This is needed because cached route may have a different
3910 * gateway than its 'parent' in the case of an ip redirect.
3911 */
3912 rt6_exceptions_clean_tohost(rt, gateway);
3913
be7a010d
DJ
3914 return 0;
3915}
3916
3917void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3918{
3919 fib6_clean_all(net, fib6_clean_tohost, gateway);
3920}
3921
2127d95a
IS
3922struct arg_netdev_event {
3923 const struct net_device *dev;
4c981e28 3924 union {
ecc5663c 3925 unsigned char nh_flags;
4c981e28
IS
3926 unsigned long event;
3927 };
2127d95a
IS
3928};
3929
8d1c802b 3930static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 3931{
8d1c802b 3932 struct fib6_info *iter;
d7dedee1
IS
3933 struct fib6_node *fn;
3934
93c2fb25
DA
3935 fn = rcu_dereference_protected(rt->fib6_node,
3936 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3937 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 3938 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3939 while (iter) {
93c2fb25 3940 if (iter->fib6_metric == rt->fib6_metric &&
33bd5ac5 3941 rt6_qualify_for_ecmp(iter))
d7dedee1 3942 return iter;
8fb11a9a 3943 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 3944 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
3945 }
3946
3947 return NULL;
3948}
3949
8d1c802b 3950static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 3951{
ad1601ae
DA
3952 if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
3953 (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
3954 ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
d7dedee1
IS
3955 return true;
3956
3957 return false;
3958}
3959
8d1c802b 3960static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 3961{
8d1c802b 3962 struct fib6_info *iter;
d7dedee1
IS
3963 int total = 0;
3964
3965 if (!rt6_is_dead(rt))
ad1601ae 3966 total += rt->fib6_nh.fib_nh_weight;
d7dedee1 3967
93c2fb25 3968 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 3969 if (!rt6_is_dead(iter))
ad1601ae 3970 total += iter->fib6_nh.fib_nh_weight;
d7dedee1
IS
3971 }
3972
3973 return total;
3974}
3975
8d1c802b 3976static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
3977{
3978 int upper_bound = -1;
3979
3980 if (!rt6_is_dead(rt)) {
ad1601ae 3981 *weight += rt->fib6_nh.fib_nh_weight;
d7dedee1
IS
3982 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3983 total) - 1;
3984 }
ad1601ae 3985 atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
d7dedee1
IS
3986}
3987
8d1c802b 3988static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 3989{
8d1c802b 3990 struct fib6_info *iter;
d7dedee1
IS
3991 int weight = 0;
3992
3993 rt6_upper_bound_set(rt, &weight, total);
3994
93c2fb25 3995 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
3996 rt6_upper_bound_set(iter, &weight, total);
3997}
3998
8d1c802b 3999void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 4000{
8d1c802b 4001 struct fib6_info *first;
d7dedee1
IS
4002 int total;
4003
4004 /* In case the entire multipath route was marked for flushing,
4005 * then there is no need to rebalance upon the removal of every
4006 * sibling route.
4007 */
93c2fb25 4008 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
4009 return;
4010
4011 /* During lookup routes are evaluated in order, so we need to
4012 * make sure upper bounds are assigned from the first sibling
4013 * onwards.
4014 */
4015 first = rt6_multipath_first_sibling(rt);
4016 if (WARN_ON_ONCE(!first))
4017 return;
4018
4019 total = rt6_multipath_total_weight(first);
4020 rt6_multipath_upper_bound_set(first, total);
4021}
4022
8d1c802b 4023static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
4024{
4025 const struct arg_netdev_event *arg = p_arg;
7aef6859 4026 struct net *net = dev_net(arg->dev);
2127d95a 4027
ad1601ae
DA
4028 if (rt != net->ipv6.fib6_null_entry &&
4029 rt->fib6_nh.fib_nh_dev == arg->dev) {
4030 rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
7aef6859 4031 fib6_update_sernum_upto_root(net, rt);
d7dedee1 4032 rt6_multipath_rebalance(rt);
1de178ed 4033 }
2127d95a
IS
4034
4035 return 0;
4036}
4037
ecc5663c 4038void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
2127d95a
IS
4039{
4040 struct arg_netdev_event arg = {
4041 .dev = dev,
6802f3ad
IS
4042 {
4043 .nh_flags = nh_flags,
4044 },
2127d95a
IS
4045 };
4046
4047 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
4048 arg.nh_flags |= RTNH_F_LINKDOWN;
4049
4050 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
4051}
4052
8d1c802b 4053static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
4054 const struct net_device *dev)
4055{
8d1c802b 4056 struct fib6_info *iter;
1de178ed 4057
ad1601ae 4058 if (rt->fib6_nh.fib_nh_dev == dev)
1de178ed 4059 return true;
93c2fb25 4060 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
ad1601ae 4061 if (iter->fib6_nh.fib_nh_dev == dev)
1de178ed
IS
4062 return true;
4063
4064 return false;
4065}
4066
8d1c802b 4067static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 4068{
8d1c802b 4069 struct fib6_info *iter;
1de178ed
IS
4070
4071 rt->should_flush = 1;
93c2fb25 4072 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
4073 iter->should_flush = 1;
4074}
4075
8d1c802b 4076static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
4077 const struct net_device *down_dev)
4078{
8d1c802b 4079 struct fib6_info *iter;
1de178ed
IS
4080 unsigned int dead = 0;
4081
ad1601ae
DA
4082 if (rt->fib6_nh.fib_nh_dev == down_dev ||
4083 rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
1de178ed 4084 dead++;
93c2fb25 4085 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
ad1601ae
DA
4086 if (iter->fib6_nh.fib_nh_dev == down_dev ||
4087 iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
1de178ed
IS
4088 dead++;
4089
4090 return dead;
4091}
4092
8d1c802b 4093static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed 4094 const struct net_device *dev,
ecc5663c 4095 unsigned char nh_flags)
1de178ed 4096{
8d1c802b 4097 struct fib6_info *iter;
1de178ed 4098
ad1601ae
DA
4099 if (rt->fib6_nh.fib_nh_dev == dev)
4100 rt->fib6_nh.fib_nh_flags |= nh_flags;
93c2fb25 4101 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
ad1601ae
DA
4102 if (iter->fib6_nh.fib_nh_dev == dev)
4103 iter->fib6_nh.fib_nh_flags |= nh_flags;
1de178ed
IS
4104}
4105
a1a22c12 4106/* called with write lock held for table with rt */
8d1c802b 4107static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 4108{
4c981e28
IS
4109 const struct arg_netdev_event *arg = p_arg;
4110 const struct net_device *dev = arg->dev;
7aef6859 4111 struct net *net = dev_net(dev);
8ed67789 4112
421842ed 4113 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
4114 return 0;
4115
4116 switch (arg->event) {
4117 case NETDEV_UNREGISTER:
ad1601ae 4118 return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
27c6fa73 4119 case NETDEV_DOWN:
1de178ed 4120 if (rt->should_flush)
27c6fa73 4121 return -1;
93c2fb25 4122 if (!rt->fib6_nsiblings)
ad1601ae 4123 return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
1de178ed
IS
4124 if (rt6_multipath_uses_dev(rt, dev)) {
4125 unsigned int count;
4126
4127 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 4128 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
4129 rt6_multipath_flush(rt);
4130 return -1;
4131 }
4132 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4133 RTNH_F_LINKDOWN);
7aef6859 4134 fib6_update_sernum(net, rt);
d7dedee1 4135 rt6_multipath_rebalance(rt);
1de178ed
IS
4136 }
4137 return -2;
27c6fa73 4138 case NETDEV_CHANGE:
ad1601ae 4139 if (rt->fib6_nh.fib_nh_dev != dev ||
93c2fb25 4140 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 4141 break;
ad1601ae 4142 rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 4143 rt6_multipath_rebalance(rt);
27c6fa73 4144 break;
2b241361 4145 }
c159d30c 4146
1da177e4
LT
4147 return 0;
4148}
4149
27c6fa73 4150void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 4151{
4c981e28 4152 struct arg_netdev_event arg = {
8ed67789 4153 .dev = dev,
6802f3ad
IS
4154 {
4155 .event = event,
4156 },
8ed67789 4157 };
7c6bb7d2 4158 struct net *net = dev_net(dev);
8ed67789 4159
7c6bb7d2
DA
4160 if (net->ipv6.sysctl.skip_notify_on_dev_down)
4161 fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4162 else
4163 fib6_clean_all(net, fib6_ifdown, &arg);
4c981e28
IS
4164}
4165
4166void rt6_disable_ip(struct net_device *dev, unsigned long event)
4167{
4168 rt6_sync_down_dev(dev, event);
4169 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4170 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
4171}
4172
95c96174 4173struct rt6_mtu_change_arg {
1da177e4 4174 struct net_device *dev;
95c96174 4175 unsigned int mtu;
1da177e4
LT
4176};
4177
8d1c802b 4178static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4179{
4180 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4181 struct inet6_dev *idev;
4182
4183 /* In IPv6 pmtu discovery is not optional,
4184 so that RTAX_MTU lock cannot disable it.
4185 We still use this lock to block changes
4186 caused by addrconf/ndisc.
4187 */
4188
4189 idev = __in6_dev_get(arg->dev);
38308473 4190 if (!idev)
1da177e4
LT
4191 return 0;
4192
4193 /* For administrative MTU increase, there is no way to discover
4194 IPv6 PMTU increase, so PMTU increase should be updated here.
4195 Since RFC 1981 doesn't include administrative MTU increase
4196 update PMTU increase is a MUST. (i.e. jumbo frame)
4197 */
ad1601ae 4198 if (rt->fib6_nh.fib_nh_dev == arg->dev &&
d4ead6b3
DA
4199 !fib6_metric_locked(rt, RTAX_MTU)) {
4200 u32 mtu = rt->fib6_pmtu;
4201
4202 if (mtu >= arg->mtu ||
4203 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4204 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4205
f5bbe7ee 4206 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4207 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4208 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4209 }
1da177e4
LT
4210 return 0;
4211}
4212
95c96174 4213void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4214{
c71099ac
TG
4215 struct rt6_mtu_change_arg arg = {
4216 .dev = dev,
4217 .mtu = mtu,
4218 };
1da177e4 4219
0c3584d5 4220 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4221}
4222
ef7c79ed 4223static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4224 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
aa8f8778 4225 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 4226 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4227 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4228 [RTA_PRIORITY] = { .type = NLA_U32 },
4229 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4230 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4231 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4232 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4233 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4234 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4235 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4236 [RTA_MARK] = { .type = NLA_U32 },
aa8f8778 4237 [RTA_TABLE] = { .type = NLA_U32 },
eacb9384
RP
4238 [RTA_IP_PROTO] = { .type = NLA_U8 },
4239 [RTA_SPORT] = { .type = NLA_U16 },
4240 [RTA_DPORT] = { .type = NLA_U16 },
86872cb5
TG
4241};
4242
4243static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4244 struct fib6_config *cfg,
4245 struct netlink_ext_ack *extack)
1da177e4 4246{
86872cb5
TG
4247 struct rtmsg *rtm;
4248 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4249 unsigned int pref;
86872cb5 4250 int err;
1da177e4 4251
8cb08174
JB
4252 err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
4253 rtm_ipv6_policy, extack);
86872cb5
TG
4254 if (err < 0)
4255 goto errout;
1da177e4 4256
86872cb5
TG
4257 err = -EINVAL;
4258 rtm = nlmsg_data(nlh);
86872cb5 4259
84db8407
4260 *cfg = (struct fib6_config){
4261 .fc_table = rtm->rtm_table,
4262 .fc_dst_len = rtm->rtm_dst_len,
4263 .fc_src_len = rtm->rtm_src_len,
4264 .fc_flags = RTF_UP,
4265 .fc_protocol = rtm->rtm_protocol,
4266 .fc_type = rtm->rtm_type,
4267
4268 .fc_nlinfo.portid = NETLINK_CB(skb).portid,
4269 .fc_nlinfo.nlh = nlh,
4270 .fc_nlinfo.nl_net = sock_net(skb->sk),
4271 };
86872cb5 4272
ef2c7d7b
ND
4273 if (rtm->rtm_type == RTN_UNREACHABLE ||
4274 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4275 rtm->rtm_type == RTN_PROHIBIT ||
4276 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4277 cfg->fc_flags |= RTF_REJECT;
4278
ab79ad14
4279 if (rtm->rtm_type == RTN_LOCAL)
4280 cfg->fc_flags |= RTF_LOCAL;
4281
1f56a01f
MKL
4282 if (rtm->rtm_flags & RTM_F_CLONED)
4283 cfg->fc_flags |= RTF_CACHE;
4284
fc1e64e1
DA
4285 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4286
86872cb5 4287 if (tb[RTA_GATEWAY]) {
67b61f6c 4288 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4289 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4290 }
e3818541
DA
4291 if (tb[RTA_VIA]) {
4292 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4293 goto errout;
4294 }
86872cb5
TG
4295
4296 if (tb[RTA_DST]) {
4297 int plen = (rtm->rtm_dst_len + 7) >> 3;
4298
4299 if (nla_len(tb[RTA_DST]) < plen)
4300 goto errout;
4301
4302 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4303 }
86872cb5
TG
4304
4305 if (tb[RTA_SRC]) {
4306 int plen = (rtm->rtm_src_len + 7) >> 3;
4307
4308 if (nla_len(tb[RTA_SRC]) < plen)
4309 goto errout;
4310
4311 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4312 }
86872cb5 4313
c3968a85 4314 if (tb[RTA_PREFSRC])
67b61f6c 4315 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4316
86872cb5
TG
4317 if (tb[RTA_OIF])
4318 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4319
4320 if (tb[RTA_PRIORITY])
4321 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4322
4323 if (tb[RTA_METRICS]) {
4324 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4325 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4326 }
86872cb5
TG
4327
4328 if (tb[RTA_TABLE])
4329 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4330
51ebd318
ND
4331 if (tb[RTA_MULTIPATH]) {
4332 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4333 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4334
4335 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4336 cfg->fc_mp_len, extack);
9ed59592
DA
4337 if (err < 0)
4338 goto errout;
51ebd318
ND
4339 }
4340
c78ba6d6
LR
4341 if (tb[RTA_PREF]) {
4342 pref = nla_get_u8(tb[RTA_PREF]);
4343 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4344 pref != ICMPV6_ROUTER_PREF_HIGH)
4345 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4346 cfg->fc_flags |= RTF_PREF(pref);
4347 }
4348
19e42e45
RP
4349 if (tb[RTA_ENCAP])
4350 cfg->fc_encap = tb[RTA_ENCAP];
4351
9ed59592 4352 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4353 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4354
c255bd68 4355 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4356 if (err < 0)
4357 goto errout;
4358 }
4359
32bc201e
XL
4360 if (tb[RTA_EXPIRES]) {
4361 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4362
4363 if (addrconf_finite_timeout(timeout)) {
4364 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4365 cfg->fc_flags |= RTF_EXPIRES;
4366 }
4367 }
4368
86872cb5
TG
4369 err = 0;
4370errout:
4371 return err;
1da177e4
LT
4372}
4373
6b9ea5a6 4374struct rt6_nh {
8d1c802b 4375 struct fib6_info *fib6_info;
6b9ea5a6 4376 struct fib6_config r_cfg;
6b9ea5a6
RP
4377 struct list_head next;
4378};
4379
d4ead6b3
DA
4380static int ip6_route_info_append(struct net *net,
4381 struct list_head *rt6_nh_list,
8d1c802b
DA
4382 struct fib6_info *rt,
4383 struct fib6_config *r_cfg)
6b9ea5a6
RP
4384{
4385 struct rt6_nh *nh;
6b9ea5a6
RP
4386 int err = -EEXIST;
4387
4388 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
4389 /* check if fib6_info already exists */
4390 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
4391 return err;
4392 }
4393
4394 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4395 if (!nh)
4396 return -ENOMEM;
8d1c802b 4397 nh->fib6_info = rt;
6b9ea5a6
RP
4398 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4399 list_add_tail(&nh->next, rt6_nh_list);
4400
4401 return 0;
4402}
4403
8d1c802b
DA
4404static void ip6_route_mpath_notify(struct fib6_info *rt,
4405 struct fib6_info *rt_last,
3b1137fe
DA
4406 struct nl_info *info,
4407 __u16 nlflags)
4408{
4409 /* if this is an APPEND route, then rt points to the first route
4410 * inserted and rt_last points to last route inserted. Userspace
4411 * wants a consistent dump of the route which starts at the first
4412 * nexthop. Since sibling routes are always added at the end of
4413 * the list, find the first sibling of the last route appended
4414 */
93c2fb25
DA
4415 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4416 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 4417 struct fib6_info,
93c2fb25 4418 fib6_siblings);
3b1137fe
DA
4419 }
4420
4421 if (rt)
4422 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4423}
4424
333c4301
DA
4425static int ip6_route_multipath_add(struct fib6_config *cfg,
4426 struct netlink_ext_ack *extack)
51ebd318 4427{
8d1c802b 4428 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 4429 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4430 struct fib6_config r_cfg;
4431 struct rtnexthop *rtnh;
8d1c802b 4432 struct fib6_info *rt;
6b9ea5a6
RP
4433 struct rt6_nh *err_nh;
4434 struct rt6_nh *nh, *nh_safe;
3b1137fe 4435 __u16 nlflags;
51ebd318
ND
4436 int remaining;
4437 int attrlen;
6b9ea5a6
RP
4438 int err = 1;
4439 int nhn = 0;
4440 int replace = (cfg->fc_nlinfo.nlh &&
4441 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4442 LIST_HEAD(rt6_nh_list);
51ebd318 4443
3b1137fe
DA
4444 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4445 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4446 nlflags |= NLM_F_APPEND;
4447
35f1b4e9 4448 remaining = cfg->fc_mp_len;
51ebd318 4449 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4450
6b9ea5a6 4451 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 4452 * fib6_info structs per nexthop
6b9ea5a6 4453 */
51ebd318
ND
4454 while (rtnh_ok(rtnh, remaining)) {
4455 memcpy(&r_cfg, cfg, sizeof(*cfg));
4456 if (rtnh->rtnh_ifindex)
4457 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4458
4459 attrlen = rtnh_attrlen(rtnh);
4460 if (attrlen > 0) {
4461 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4462
4463 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4464 if (nla) {
67b61f6c 4465 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4466 r_cfg.fc_flags |= RTF_GATEWAY;
4467 }
19e42e45
RP
4468 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4469 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4470 if (nla)
4471 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4472 }
6b9ea5a6 4473
68e2ffde 4474 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4475 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4476 if (IS_ERR(rt)) {
4477 err = PTR_ERR(rt);
4478 rt = NULL;
6b9ea5a6 4479 goto cleanup;
8c5b83f0 4480 }
b5d2d75e
DA
4481 if (!rt6_qualify_for_ecmp(rt)) {
4482 err = -EINVAL;
4483 NL_SET_ERR_MSG(extack,
4484 "Device only routes can not be added for IPv6 using the multipath API.");
4485 fib6_info_release(rt);
4486 goto cleanup;
4487 }
6b9ea5a6 4488
ad1601ae 4489 rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
398958ae 4490
d4ead6b3
DA
4491 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4492 rt, &r_cfg);
51ebd318 4493 if (err) {
93531c67 4494 fib6_info_release(rt);
6b9ea5a6
RP
4495 goto cleanup;
4496 }
4497
4498 rtnh = rtnh_next(rtnh, &remaining);
4499 }
4500
3b1137fe
DA
4501 /* for add and replace send one notification with all nexthops.
4502 * Skip the notification in fib6_add_rt2node and send one with
4503 * the full route when done
4504 */
4505 info->skip_notify = 1;
4506
6b9ea5a6
RP
4507 err_nh = NULL;
4508 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b
DA
4509 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4510 fib6_info_release(nh->fib6_info);
93531c67 4511
f7225172
DA
4512 if (!err) {
4513 /* save reference to last route successfully inserted */
4514 rt_last = nh->fib6_info;
4515
4516 /* save reference to first route for notification */
4517 if (!rt_notif)
4518 rt_notif = nh->fib6_info;
4519 }
3b1137fe 4520
8d1c802b
DA
4521 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4522 nh->fib6_info = NULL;
6b9ea5a6
RP
4523 if (err) {
4524 if (replace && nhn)
a5a82d84
JK
4525 NL_SET_ERR_MSG_MOD(extack,
4526 "multipath route replace failed (check consistency of installed routes)");
6b9ea5a6
RP
4527 err_nh = nh;
4528 goto add_errout;
51ebd318 4529 }
6b9ea5a6 4530
1a72418b 4531 /* Because each route is added like a single route we remove
27596472
MK
4532 * these flags after the first nexthop: if there is a collision,
4533 * we have already failed to add the first nexthop:
4534 * fib6_add_rt2node() has rejected it; when replacing, old
4535 * nexthops have been replaced by first new, the rest should
4536 * be added to it.
1a72418b 4537 */
27596472
MK
4538 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4539 NLM_F_REPLACE);
6b9ea5a6
RP
4540 nhn++;
4541 }
4542
3b1137fe
DA
4543 /* success ... tell user about new route */
4544 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4545 goto cleanup;
4546
4547add_errout:
3b1137fe
DA
4548 /* send notification for routes that were added so that
4549 * the delete notifications sent by ip6_route_del are
4550 * coherent
4551 */
4552 if (rt_notif)
4553 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4554
6b9ea5a6
RP
4555 /* Delete routes that were already added */
4556 list_for_each_entry(nh, &rt6_nh_list, next) {
4557 if (err_nh == nh)
4558 break;
333c4301 4559 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4560 }
4561
4562cleanup:
4563 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
8d1c802b
DA
4564 if (nh->fib6_info)
4565 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
4566 list_del(&nh->next);
4567 kfree(nh);
4568 }
4569
4570 return err;
4571}
4572
333c4301
DA
4573static int ip6_route_multipath_del(struct fib6_config *cfg,
4574 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4575{
4576 struct fib6_config r_cfg;
4577 struct rtnexthop *rtnh;
4578 int remaining;
4579 int attrlen;
4580 int err = 1, last_err = 0;
4581
4582 remaining = cfg->fc_mp_len;
4583 rtnh = (struct rtnexthop *)cfg->fc_mp;
4584
4585 /* Parse a Multipath Entry */
4586 while (rtnh_ok(rtnh, remaining)) {
4587 memcpy(&r_cfg, cfg, sizeof(*cfg));
4588 if (rtnh->rtnh_ifindex)
4589 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4590
4591 attrlen = rtnh_attrlen(rtnh);
4592 if (attrlen > 0) {
4593 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4594
4595 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4596 if (nla) {
4597 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4598 r_cfg.fc_flags |= RTF_GATEWAY;
4599 }
4600 }
333c4301 4601 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4602 if (err)
4603 last_err = err;
4604
51ebd318
ND
4605 rtnh = rtnh_next(rtnh, &remaining);
4606 }
4607
4608 return last_err;
4609}
4610
c21ef3e3
DA
4611static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4612 struct netlink_ext_ack *extack)
1da177e4 4613{
86872cb5
TG
4614 struct fib6_config cfg;
4615 int err;
1da177e4 4616
333c4301 4617 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4618 if (err < 0)
4619 return err;
4620
51ebd318 4621 if (cfg.fc_mp)
333c4301 4622 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4623 else {
4624 cfg.fc_delete_all_nh = 1;
333c4301 4625 return ip6_route_del(&cfg, extack);
0ae81335 4626 }
1da177e4
LT
4627}
4628
c21ef3e3
DA
4629static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4630 struct netlink_ext_ack *extack)
1da177e4 4631{
86872cb5
TG
4632 struct fib6_config cfg;
4633 int err;
1da177e4 4634
333c4301 4635 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4636 if (err < 0)
4637 return err;
4638
67f69513
DA
4639 if (cfg.fc_metric == 0)
4640 cfg.fc_metric = IP6_RT_PRIO_USER;
4641
51ebd318 4642 if (cfg.fc_mp)
333c4301 4643 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4644 else
acb54e3c 4645 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4646}
4647
8d1c802b 4648static size_t rt6_nlmsg_size(struct fib6_info *rt)
339bf98f 4649{
beb1afac
DA
4650 int nexthop_len = 0;
4651
93c2fb25 4652 if (rt->fib6_nsiblings) {
beb1afac
DA
4653 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4654 + NLA_ALIGN(sizeof(struct rtnexthop))
4655 + nla_total_size(16) /* RTA_GATEWAY */
ad1601ae 4656 + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
beb1afac 4657
93c2fb25 4658 nexthop_len *= rt->fib6_nsiblings;
beb1afac
DA
4659 }
4660
339bf98f
TG
4661 return NLMSG_ALIGN(sizeof(struct rtmsg))
4662 + nla_total_size(16) /* RTA_SRC */
4663 + nla_total_size(16) /* RTA_DST */
4664 + nla_total_size(16) /* RTA_GATEWAY */
4665 + nla_total_size(16) /* RTA_PREFSRC */
4666 + nla_total_size(4) /* RTA_TABLE */
4667 + nla_total_size(4) /* RTA_IIF */
4668 + nla_total_size(4) /* RTA_OIF */
4669 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4670 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4671 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4672 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4673 + nla_total_size(1) /* RTA_PREF */
ad1601ae 4674 + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
beb1afac
DA
4675 + nexthop_len;
4676}
4677
d4ead6b3 4678static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 4679 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 4680 struct in6_addr *dest, struct in6_addr *src,
15e47304 4681 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4682 unsigned int flags)
1da177e4 4683{
22d0bd82
XL
4684 struct rt6_info *rt6 = (struct rt6_info *)dst;
4685 struct rt6key *rt6_dst, *rt6_src;
4686 u32 *pmetrics, table, rt6_flags;
2d7202bf 4687 struct nlmsghdr *nlh;
22d0bd82 4688 struct rtmsg *rtm;
d4ead6b3 4689 long expires = 0;
1da177e4 4690
15e47304 4691 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4692 if (!nlh)
26932566 4693 return -EMSGSIZE;
2d7202bf 4694
22d0bd82
XL
4695 if (rt6) {
4696 rt6_dst = &rt6->rt6i_dst;
4697 rt6_src = &rt6->rt6i_src;
4698 rt6_flags = rt6->rt6i_flags;
4699 } else {
4700 rt6_dst = &rt->fib6_dst;
4701 rt6_src = &rt->fib6_src;
4702 rt6_flags = rt->fib6_flags;
4703 }
4704
2d7202bf 4705 rtm = nlmsg_data(nlh);
1da177e4 4706 rtm->rtm_family = AF_INET6;
22d0bd82
XL
4707 rtm->rtm_dst_len = rt6_dst->plen;
4708 rtm->rtm_src_len = rt6_src->plen;
1da177e4 4709 rtm->rtm_tos = 0;
93c2fb25
DA
4710 if (rt->fib6_table)
4711 table = rt->fib6_table->tb6_id;
c71099ac 4712 else
9e762a4a 4713 table = RT6_TABLE_UNSPEC;
97f0082a 4714 rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
c78679e8
DM
4715 if (nla_put_u32(skb, RTA_TABLE, table))
4716 goto nla_put_failure;
e8478e80
DA
4717
4718 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4719 rtm->rtm_flags = 0;
4720 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 4721 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 4722
22d0bd82 4723 if (rt6_flags & RTF_CACHE)
1da177e4
LT
4724 rtm->rtm_flags |= RTM_F_CLONED;
4725
d4ead6b3
DA
4726 if (dest) {
4727 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4728 goto nla_put_failure;
1ab1457c 4729 rtm->rtm_dst_len = 128;
1da177e4 4730 } else if (rtm->rtm_dst_len)
22d0bd82 4731 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
c78679e8 4732 goto nla_put_failure;
1da177e4
LT
4733#ifdef CONFIG_IPV6_SUBTREES
4734 if (src) {
930345ea 4735 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4736 goto nla_put_failure;
1ab1457c 4737 rtm->rtm_src_len = 128;
c78679e8 4738 } else if (rtm->rtm_src_len &&
22d0bd82 4739 nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
c78679e8 4740 goto nla_put_failure;
1da177e4 4741#endif
7bc570c8
YH
4742 if (iif) {
4743#ifdef CONFIG_IPV6_MROUTE
22d0bd82 4744 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
fd61c6ba
DA
4745 int err = ip6mr_get_route(net, skb, rtm, portid);
4746
4747 if (err == 0)
4748 return 0;
4749 if (err < 0)
4750 goto nla_put_failure;
7bc570c8
YH
4751 } else
4752#endif
c78679e8
DM
4753 if (nla_put_u32(skb, RTA_IIF, iif))
4754 goto nla_put_failure;
d4ead6b3 4755 } else if (dest) {
1da177e4 4756 struct in6_addr saddr_buf;
d4ead6b3 4757 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4758 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4759 goto nla_put_failure;
1da177e4 4760 }
2d7202bf 4761
93c2fb25 4762 if (rt->fib6_prefsrc.plen) {
c3968a85 4763 struct in6_addr saddr_buf;
93c2fb25 4764 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 4765 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4766 goto nla_put_failure;
c3968a85
DW
4767 }
4768
d4ead6b3
DA
4769 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4770 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4771 goto nla_put_failure;
4772
93c2fb25 4773 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 4774 goto nla_put_failure;
8253947e 4775
beb1afac
DA
4776 /* For multipath routes, walk the siblings list and add
4777 * each as a nexthop within RTA_MULTIPATH.
4778 */
22d0bd82
XL
4779 if (rt6) {
4780 if (rt6_flags & RTF_GATEWAY &&
4781 nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
4782 goto nla_put_failure;
4783
4784 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
4785 goto nla_put_failure;
4786 } else if (rt->fib6_nsiblings) {
8d1c802b 4787 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
4788 struct nlattr *mp;
4789
ae0be8de 4790 mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
beb1afac
DA
4791 if (!mp)
4792 goto nla_put_failure;
4793
c0a72077
DA
4794 if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common,
4795 rt->fib6_nh.fib_nh_weight) < 0)
beb1afac
DA
4796 goto nla_put_failure;
4797
4798 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 4799 &rt->fib6_siblings, fib6_siblings) {
c0a72077
DA
4800 if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common,
4801 sibling->fib6_nh.fib_nh_weight) < 0)
beb1afac
DA
4802 goto nla_put_failure;
4803 }
4804
4805 nla_nest_end(skb, mp);
4806 } else {
ecc5663c
DA
4807 unsigned char nh_flags = 0;
4808
c0a72077 4809 if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common,
ecc5663c 4810 &nh_flags, false) < 0)
beb1afac 4811 goto nla_put_failure;
ecc5663c
DA
4812
4813 rtm->rtm_flags |= nh_flags;
beb1afac
DA
4814 }
4815
22d0bd82 4816 if (rt6_flags & RTF_EXPIRES) {
14895687
DA
4817 expires = dst ? dst->expires : rt->expires;
4818 expires -= jiffies;
4819 }
69cdf8f9 4820
d4ead6b3 4821 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4822 goto nla_put_failure;
2d7202bf 4823
22d0bd82 4824 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
c78ba6d6
LR
4825 goto nla_put_failure;
4826
19e42e45 4827
053c095a
JB
4828 nlmsg_end(skb, nlh);
4829 return 0;
2d7202bf
TG
4830
4831nla_put_failure:
26932566
PM
4832 nlmsg_cancel(skb, nlh);
4833 return -EMSGSIZE;
1da177e4
LT
4834}
4835
13e38901
DA
4836static bool fib6_info_uses_dev(const struct fib6_info *f6i,
4837 const struct net_device *dev)
4838{
ad1601ae 4839 if (f6i->fib6_nh.fib_nh_dev == dev)
13e38901
DA
4840 return true;
4841
4842 if (f6i->fib6_nsiblings) {
4843 struct fib6_info *sibling, *next_sibling;
4844
4845 list_for_each_entry_safe(sibling, next_sibling,
4846 &f6i->fib6_siblings, fib6_siblings) {
ad1601ae 4847 if (sibling->fib6_nh.fib_nh_dev == dev)
13e38901
DA
4848 return true;
4849 }
4850 }
4851
4852 return false;
4853}
4854
8d1c802b 4855int rt6_dump_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4856{
4857 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
13e38901
DA
4858 struct fib_dump_filter *filter = &arg->filter;
4859 unsigned int flags = NLM_F_MULTI;
1f17e2f2
DA
4860 struct net *net = arg->net;
4861
421842ed 4862 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4863 return 0;
1da177e4 4864
13e38901
DA
4865 if ((filter->flags & RTM_F_PREFIX) &&
4866 !(rt->fib6_flags & RTF_PREFIX_RT)) {
4867 /* success since this is not a prefix route */
4868 return 1;
4869 }
4870 if (filter->filter_set) {
4871 if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
4872 (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
4873 (filter->protocol && rt->fib6_protocol != filter->protocol)) {
f8cfe2ce
DA
4874 return 1;
4875 }
13e38901 4876 flags |= NLM_F_DUMP_FILTERED;
f8cfe2ce 4877 }
1da177e4 4878
d4ead6b3
DA
4879 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4880 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
13e38901 4881 arg->cb->nlh->nlmsg_seq, flags);
1da177e4
LT
4882}
4883
0eff0a27
JK
4884static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
4885 const struct nlmsghdr *nlh,
4886 struct nlattr **tb,
4887 struct netlink_ext_ack *extack)
4888{
4889 struct rtmsg *rtm;
4890 int i, err;
4891
4892 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
4893 NL_SET_ERR_MSG_MOD(extack,
4894 "Invalid header for get route request");
4895 return -EINVAL;
4896 }
4897
4898 if (!netlink_strict_get_check(skb))
8cb08174
JB
4899 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
4900 rtm_ipv6_policy, extack);
0eff0a27
JK
4901
4902 rtm = nlmsg_data(nlh);
4903 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
4904 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
4905 rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
4906 rtm->rtm_type) {
4907 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
4908 return -EINVAL;
4909 }
4910 if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
4911 NL_SET_ERR_MSG_MOD(extack,
4912 "Invalid flags for get route request");
4913 return -EINVAL;
4914 }
4915
8cb08174
JB
4916 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
4917 rtm_ipv6_policy, extack);
0eff0a27
JK
4918 if (err)
4919 return err;
4920
4921 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
4922 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
4923 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
4924 return -EINVAL;
4925 }
4926
4927 for (i = 0; i <= RTA_MAX; i++) {
4928 if (!tb[i])
4929 continue;
4930
4931 switch (i) {
4932 case RTA_SRC:
4933 case RTA_DST:
4934 case RTA_IIF:
4935 case RTA_OIF:
4936 case RTA_MARK:
4937 case RTA_UID:
4938 case RTA_SPORT:
4939 case RTA_DPORT:
4940 case RTA_IP_PROTO:
4941 break;
4942 default:
4943 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
4944 return -EINVAL;
4945 }
4946 }
4947
4948 return 0;
4949}
4950
c21ef3e3
DA
4951static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4952 struct netlink_ext_ack *extack)
1da177e4 4953{
3b1e0a65 4954 struct net *net = sock_net(in_skb->sk);
ab364a6f 4955 struct nlattr *tb[RTA_MAX+1];
18c3a61c 4956 int err, iif = 0, oif = 0;
a68886a6 4957 struct fib6_info *from;
18c3a61c 4958 struct dst_entry *dst;
ab364a6f 4959 struct rt6_info *rt;
1da177e4 4960 struct sk_buff *skb;
ab364a6f 4961 struct rtmsg *rtm;
744486d4 4962 struct flowi6 fl6 = {};
18c3a61c 4963 bool fibmatch;
1da177e4 4964
0eff0a27 4965 err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
ab364a6f
TG
4966 if (err < 0)
4967 goto errout;
1da177e4 4968
ab364a6f 4969 err = -EINVAL;
38b7097b
HFS
4970 rtm = nlmsg_data(nlh);
4971 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4972 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4973
ab364a6f
TG
4974 if (tb[RTA_SRC]) {
4975 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4976 goto errout;
4977
4e3fd7a0 4978 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4979 }
4980
4981 if (tb[RTA_DST]) {
4982 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4983 goto errout;
4984
4e3fd7a0 4985 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4986 }
4987
4988 if (tb[RTA_IIF])
4989 iif = nla_get_u32(tb[RTA_IIF]);
4990
4991 if (tb[RTA_OIF])
72331bc0 4992 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4993
2e47b291
LC
4994 if (tb[RTA_MARK])
4995 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4996
622ec2c9
LC
4997 if (tb[RTA_UID])
4998 fl6.flowi6_uid = make_kuid(current_user_ns(),
4999 nla_get_u32(tb[RTA_UID]));
5000 else
5001 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
5002
eacb9384
RP
5003 if (tb[RTA_SPORT])
5004 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
5005
5006 if (tb[RTA_DPORT])
5007 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
5008
5009 if (tb[RTA_IP_PROTO]) {
5010 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
5e1a99ea
HL
5011 &fl6.flowi6_proto, AF_INET6,
5012 extack);
eacb9384
RP
5013 if (err)
5014 goto errout;
5015 }
5016
1da177e4
LT
5017 if (iif) {
5018 struct net_device *dev;
72331bc0
SL
5019 int flags = 0;
5020
121622db
FW
5021 rcu_read_lock();
5022
5023 dev = dev_get_by_index_rcu(net, iif);
1da177e4 5024 if (!dev) {
121622db 5025 rcu_read_unlock();
1da177e4 5026 err = -ENODEV;
ab364a6f 5027 goto errout;
1da177e4 5028 }
72331bc0
SL
5029
5030 fl6.flowi6_iif = iif;
5031
5032 if (!ipv6_addr_any(&fl6.saddr))
5033 flags |= RT6_LOOKUP_F_HAS_SADDR;
5034
b75cc8f9 5035 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
5036
5037 rcu_read_unlock();
72331bc0
SL
5038 } else {
5039 fl6.flowi6_oif = oif;
5040
58acfd71 5041 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
5042 }
5043
18c3a61c
RP
5044
5045 rt = container_of(dst, struct rt6_info, dst);
5046 if (rt->dst.error) {
5047 err = rt->dst.error;
5048 ip6_rt_put(rt);
5049 goto errout;
1da177e4
LT
5050 }
5051
9d6acb3b
WC
5052 if (rt == net->ipv6.ip6_null_entry) {
5053 err = rt->dst.error;
5054 ip6_rt_put(rt);
5055 goto errout;
5056 }
5057
ab364a6f 5058 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 5059 if (!skb) {
94e187c0 5060 ip6_rt_put(rt);
ab364a6f
TG
5061 err = -ENOBUFS;
5062 goto errout;
5063 }
1da177e4 5064
d8d1f30b 5065 skb_dst_set(skb, &rt->dst);
a68886a6
DA
5066
5067 rcu_read_lock();
5068 from = rcu_dereference(rt->from);
886b7a50
MKL
5069 if (from) {
5070 if (fibmatch)
5071 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
5072 iif, RTM_NEWROUTE,
5073 NETLINK_CB(in_skb).portid,
5074 nlh->nlmsg_seq, 0);
5075 else
5076 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5077 &fl6.saddr, iif, RTM_NEWROUTE,
5078 NETLINK_CB(in_skb).portid,
5079 nlh->nlmsg_seq, 0);
5080 } else {
5081 err = -ENETUNREACH;
5082 }
a68886a6
DA
5083 rcu_read_unlock();
5084
1da177e4 5085 if (err < 0) {
ab364a6f
TG
5086 kfree_skb(skb);
5087 goto errout;
1da177e4
LT
5088 }
5089
15e47304 5090 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 5091errout:
1da177e4 5092 return err;
1da177e4
LT
5093}
5094
8d1c802b 5095void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 5096 unsigned int nlm_flags)
1da177e4
LT
5097{
5098 struct sk_buff *skb;
5578689a 5099 struct net *net = info->nl_net;
528c4ceb
DL
5100 u32 seq;
5101 int err;
5102
5103 err = -ENOBUFS;
38308473 5104 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 5105
19e42e45 5106 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 5107 if (!skb)
21713ebc
TG
5108 goto errout;
5109
d4ead6b3
DA
5110 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5111 event, info->portid, seq, nlm_flags);
26932566
PM
5112 if (err < 0) {
5113 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5114 WARN_ON(err == -EMSGSIZE);
5115 kfree_skb(skb);
5116 goto errout;
5117 }
15e47304 5118 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
5119 info->nlh, gfp_any());
5120 return;
21713ebc
TG
5121errout:
5122 if (err < 0)
5578689a 5123 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
5124}
5125
8ed67789 5126static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 5127 unsigned long event, void *ptr)
8ed67789 5128{
351638e7 5129 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 5130 struct net *net = dev_net(dev);
8ed67789 5131
242d3a49
WC
5132 if (!(dev->flags & IFF_LOOPBACK))
5133 return NOTIFY_OK;
5134
5135 if (event == NETDEV_REGISTER) {
ad1601ae 5136 net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
d8d1f30b 5137 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
5138 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5139#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 5140 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 5141 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 5142 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 5143 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 5144#endif
76da0704
WC
5145 } else if (event == NETDEV_UNREGISTER &&
5146 dev->reg_state != NETREG_UNREGISTERED) {
5147 /* NETDEV_UNREGISTER could be fired for multiple times by
5148 * netdev_wait_allrefs(). Make sure we only call this once.
5149 */
12d94a80 5150 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 5151#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
5152 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5153 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
5154#endif
5155 }
5156
5157 return NOTIFY_OK;
5158}
5159
1da177e4
LT
5160/*
5161 * /proc
5162 */
5163
5164#ifdef CONFIG_PROC_FS
1da177e4
LT
5165static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5166{
69ddb805 5167 struct net *net = (struct net *)seq->private;
1da177e4 5168 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
5169 net->ipv6.rt6_stats->fib_nodes,
5170 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 5171 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
5172 net->ipv6.rt6_stats->fib_rt_entries,
5173 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 5174 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 5175 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
5176
5177 return 0;
5178}
1da177e4
LT
5179#endif /* CONFIG_PROC_FS */
5180
5181#ifdef CONFIG_SYSCTL
5182
1da177e4 5183static
fe2c6338 5184int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
5185 void __user *buffer, size_t *lenp, loff_t *ppos)
5186{
c486da34
LAG
5187 struct net *net;
5188 int delay;
f0fb9b28 5189 int ret;
c486da34 5190 if (!write)
1da177e4 5191 return -EINVAL;
c486da34
LAG
5192
5193 net = (struct net *)ctl->extra1;
5194 delay = net->ipv6.sysctl.flush_delay;
f0fb9b28
AP
5195 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
5196 if (ret)
5197 return ret;
5198
2ac3ac8f 5199 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 5200 return 0;
1da177e4
LT
5201}
5202
7c6bb7d2
DA
5203static int zero;
5204static int one = 1;
5205
ed792e28 5206static struct ctl_table ipv6_route_table_template[] = {
1ab1457c 5207 {
1da177e4 5208 .procname = "flush",
4990509f 5209 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 5210 .maxlen = sizeof(int),
89c8b3a1 5211 .mode = 0200,
6d9f239a 5212 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
5213 },
5214 {
1da177e4 5215 .procname = "gc_thresh",
9a7ec3a9 5216 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
5217 .maxlen = sizeof(int),
5218 .mode = 0644,
6d9f239a 5219 .proc_handler = proc_dointvec,
1da177e4
LT
5220 },
5221 {
1da177e4 5222 .procname = "max_size",
4990509f 5223 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
5224 .maxlen = sizeof(int),
5225 .mode = 0644,
6d9f239a 5226 .proc_handler = proc_dointvec,
1da177e4
LT
5227 },
5228 {
1da177e4 5229 .procname = "gc_min_interval",
4990509f 5230 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5231 .maxlen = sizeof(int),
5232 .mode = 0644,
6d9f239a 5233 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5234 },
5235 {
1da177e4 5236 .procname = "gc_timeout",
4990509f 5237 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
5238 .maxlen = sizeof(int),
5239 .mode = 0644,
6d9f239a 5240 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5241 },
5242 {
1da177e4 5243 .procname = "gc_interval",
4990509f 5244 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
5245 .maxlen = sizeof(int),
5246 .mode = 0644,
6d9f239a 5247 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5248 },
5249 {
1da177e4 5250 .procname = "gc_elasticity",
4990509f 5251 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5252 .maxlen = sizeof(int),
5253 .mode = 0644,
f3d3f616 5254 .proc_handler = proc_dointvec,
1da177e4
LT
5255 },
5256 {
1da177e4 5257 .procname = "mtu_expires",
4990509f 5258 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5259 .maxlen = sizeof(int),
5260 .mode = 0644,
6d9f239a 5261 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5262 },
5263 {
1da177e4 5264 .procname = "min_adv_mss",
4990509f 5265 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5266 .maxlen = sizeof(int),
5267 .mode = 0644,
f3d3f616 5268 .proc_handler = proc_dointvec,
1da177e4
LT
5269 },
5270 {
1da177e4 5271 .procname = "gc_min_interval_ms",
4990509f 5272 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5273 .maxlen = sizeof(int),
5274 .mode = 0644,
6d9f239a 5275 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5276 },
7c6bb7d2
DA
5277 {
5278 .procname = "skip_notify_on_dev_down",
5279 .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
5280 .maxlen = sizeof(int),
5281 .mode = 0644,
5282 .proc_handler = proc_dointvec,
5283 .extra1 = &zero,
5284 .extra2 = &one,
5285 },
f8572d8f 5286 { }
1da177e4
LT
5287};
5288
2c8c1e72 5289struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5290{
5291 struct ctl_table *table;
5292
5293 table = kmemdup(ipv6_route_table_template,
5294 sizeof(ipv6_route_table_template),
5295 GFP_KERNEL);
5ee09105
YH
5296
5297 if (table) {
5298 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5299 table[0].extra1 = net;
86393e52 5300 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5301 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5302 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5303 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5304 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5305 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5306 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5307 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5308 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
7c6bb7d2 5309 table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
464dc801
EB
5310
5311 /* Don't export sysctls to unprivileged users */
5312 if (net->user_ns != &init_user_ns)
5313 table[0].procname = NULL;
5ee09105
YH
5314 }
5315
760f2d01
DL
5316 return table;
5317}
1da177e4
LT
5318#endif
5319
2c8c1e72 5320static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5321{
633d424b 5322 int ret = -ENOMEM;
8ed67789 5323
86393e52
AD
5324 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5325 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5326
fc66f95c
ED
5327 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5328 goto out_ip6_dst_ops;
5329
421842ed
DA
5330 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5331 sizeof(*net->ipv6.fib6_null_entry),
5332 GFP_KERNEL);
5333 if (!net->ipv6.fib6_null_entry)
5334 goto out_ip6_dst_entries;
5335
8ed67789
DL
5336 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5337 sizeof(*net->ipv6.ip6_null_entry),
5338 GFP_KERNEL);
5339 if (!net->ipv6.ip6_null_entry)
421842ed 5340 goto out_fib6_null_entry;
d8d1f30b 5341 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5342 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5343 ip6_template_metrics, true);
8ed67789
DL
5344
5345#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5346 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5347 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5348 sizeof(*net->ipv6.ip6_prohibit_entry),
5349 GFP_KERNEL);
68fffc67
PZ
5350 if (!net->ipv6.ip6_prohibit_entry)
5351 goto out_ip6_null_entry;
d8d1f30b 5352 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5353 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5354 ip6_template_metrics, true);
8ed67789
DL
5355
5356 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5357 sizeof(*net->ipv6.ip6_blk_hole_entry),
5358 GFP_KERNEL);
68fffc67
PZ
5359 if (!net->ipv6.ip6_blk_hole_entry)
5360 goto out_ip6_prohibit_entry;
d8d1f30b 5361 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5362 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5363 ip6_template_metrics, true);
8ed67789
DL
5364#endif
5365
b339a47c
PZ
5366 net->ipv6.sysctl.flush_delay = 0;
5367 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5368 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5369 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5370 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5371 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5372 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5373 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
7c6bb7d2 5374 net->ipv6.sysctl.skip_notify_on_dev_down = 0;
b339a47c 5375
6891a346
BT
5376 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5377
8ed67789
DL
5378 ret = 0;
5379out:
5380 return ret;
f2fc6a54 5381
68fffc67
PZ
5382#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5383out_ip6_prohibit_entry:
5384 kfree(net->ipv6.ip6_prohibit_entry);
5385out_ip6_null_entry:
5386 kfree(net->ipv6.ip6_null_entry);
5387#endif
421842ed
DA
5388out_fib6_null_entry:
5389 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5390out_ip6_dst_entries:
5391 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5392out_ip6_dst_ops:
f2fc6a54 5393 goto out;
cdb18761
DL
5394}
5395
2c8c1e72 5396static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5397{
421842ed 5398 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5399 kfree(net->ipv6.ip6_null_entry);
5400#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5401 kfree(net->ipv6.ip6_prohibit_entry);
5402 kfree(net->ipv6.ip6_blk_hole_entry);
5403#endif
41bb78b4 5404 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5405}
5406
d189634e
TG
5407static int __net_init ip6_route_net_init_late(struct net *net)
5408{
5409#ifdef CONFIG_PROC_FS
c3506372
CH
5410 proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5411 sizeof(struct ipv6_route_iter));
3617d949
CH
5412 proc_create_net_single("rt6_stats", 0444, net->proc_net,
5413 rt6_stats_seq_show, NULL);
d189634e
TG
5414#endif
5415 return 0;
5416}
5417
5418static void __net_exit ip6_route_net_exit_late(struct net *net)
5419{
5420#ifdef CONFIG_PROC_FS
ece31ffd
G
5421 remove_proc_entry("ipv6_route", net->proc_net);
5422 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5423#endif
5424}
5425
cdb18761
DL
5426static struct pernet_operations ip6_route_net_ops = {
5427 .init = ip6_route_net_init,
5428 .exit = ip6_route_net_exit,
5429};
5430
c3426b47
DM
5431static int __net_init ipv6_inetpeer_init(struct net *net)
5432{
5433 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5434
5435 if (!bp)
5436 return -ENOMEM;
5437 inet_peer_base_init(bp);
5438 net->ipv6.peers = bp;
5439 return 0;
5440}
5441
5442static void __net_exit ipv6_inetpeer_exit(struct net *net)
5443{
5444 struct inet_peer_base *bp = net->ipv6.peers;
5445
5446 net->ipv6.peers = NULL;
56a6b248 5447 inetpeer_invalidate_tree(bp);
c3426b47
DM
5448 kfree(bp);
5449}
5450
2b823f72 5451static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5452 .init = ipv6_inetpeer_init,
5453 .exit = ipv6_inetpeer_exit,
5454};
5455
d189634e
TG
5456static struct pernet_operations ip6_route_net_late_ops = {
5457 .init = ip6_route_net_init_late,
5458 .exit = ip6_route_net_exit_late,
5459};
5460
8ed67789
DL
5461static struct notifier_block ip6_route_dev_notifier = {
5462 .notifier_call = ip6_route_dev_notify,
242d3a49 5463 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5464};
5465
2f460933
WC
5466void __init ip6_route_init_special_entries(void)
5467{
5468 /* Registering of the loopback is done before this portion of code,
5469 * the loopback reference in rt6_info will not be taken, do it
5470 * manually for init_net */
ad1601ae 5471 init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
2f460933
WC
5472 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5473 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5474 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5475 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5476 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5477 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5478 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5479 #endif
5480}
5481
433d49c3 5482int __init ip6_route_init(void)
1da177e4 5483{
433d49c3 5484 int ret;
8d0b94af 5485 int cpu;
433d49c3 5486
9a7ec3a9
DL
5487 ret = -ENOMEM;
5488 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5489 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5490 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5491 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5492 goto out;
14e50e57 5493
fc66f95c 5494 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5495 if (ret)
bdb3289f 5496 goto out_kmem_cache;
bdb3289f 5497
c3426b47
DM
5498 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5499 if (ret)
e8803b6c 5500 goto out_dst_entries;
2a0c451a 5501
7e52b33b
DM
5502 ret = register_pernet_subsys(&ip6_route_net_ops);
5503 if (ret)
5504 goto out_register_inetpeer;
c3426b47 5505
5dc121e9
AE
5506 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5507
e8803b6c 5508 ret = fib6_init();
433d49c3 5509 if (ret)
8ed67789 5510 goto out_register_subsys;
433d49c3 5511
433d49c3
DL
5512 ret = xfrm6_init();
5513 if (ret)
e8803b6c 5514 goto out_fib6_init;
c35b7e72 5515
433d49c3
DL
5516 ret = fib6_rules_init();
5517 if (ret)
5518 goto xfrm6_init;
7e5449c2 5519
d189634e
TG
5520 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5521 if (ret)
5522 goto fib6_rules_init;
5523
16feebcf
FW
5524 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5525 inet6_rtm_newroute, NULL, 0);
5526 if (ret < 0)
5527 goto out_register_late_subsys;
5528
5529 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5530 inet6_rtm_delroute, NULL, 0);
5531 if (ret < 0)
5532 goto out_register_late_subsys;
5533
5534 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5535 inet6_rtm_getroute, NULL,
5536 RTNL_FLAG_DOIT_UNLOCKED);
5537 if (ret < 0)
d189634e 5538 goto out_register_late_subsys;
c127ea2c 5539
8ed67789 5540 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5541 if (ret)
d189634e 5542 goto out_register_late_subsys;
8ed67789 5543
8d0b94af
MKL
5544 for_each_possible_cpu(cpu) {
5545 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5546
5547 INIT_LIST_HEAD(&ul->head);
5548 spin_lock_init(&ul->lock);
5549 }
5550
433d49c3
DL
5551out:
5552 return ret;
5553
d189634e 5554out_register_late_subsys:
16feebcf 5555 rtnl_unregister_all(PF_INET6);
d189634e 5556 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5557fib6_rules_init:
433d49c3
DL
5558 fib6_rules_cleanup();
5559xfrm6_init:
433d49c3 5560 xfrm6_fini();
2a0c451a
TG
5561out_fib6_init:
5562 fib6_gc_cleanup();
8ed67789
DL
5563out_register_subsys:
5564 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5565out_register_inetpeer:
5566 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5567out_dst_entries:
5568 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5569out_kmem_cache:
f2fc6a54 5570 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5571 goto out;
1da177e4
LT
5572}
5573
5574void ip6_route_cleanup(void)
5575{
8ed67789 5576 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5577 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5578 fib6_rules_cleanup();
1da177e4 5579 xfrm6_fini();
1da177e4 5580 fib6_gc_cleanup();
c3426b47 5581 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5582 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5583 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5584 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5585}
This page took 2.553364 seconds and 4 git commands to generate.