]> Git Repo - linux.git/blame - net/ipv6/route.c
ipv6: Create cleanup helper for fib6_nh
[linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <[email protected]>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
eacb9384 66#include <net/ip.h>
7c0f6ba6 67#include <linux/uaccess.h>
1da177e4
LT
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
30d444d3
DA
73static int ip6_rt_type_to_error(u8 fib6_type);
74
75#define CREATE_TRACE_POINTS
76#include <trace/events/fib6.h>
77EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
78#undef CREATE_TRACE_POINTS
79
afc154e9 80enum rt6_nud_state {
7e980569
JB
81 RT6_NUD_FAIL_HARD = -3,
82 RT6_NUD_FAIL_PROBE = -2,
83 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
84 RT6_NUD_SUCCEED = 1
85};
86
1da177e4 87static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 88static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 89static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
569d3645 94static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
95
96static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 97static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 98static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 99static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 100static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
101static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
102 struct sk_buff *skb, u32 mtu);
103static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
104 struct sk_buff *skb);
8d1c802b
DA
105static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
106static size_t rt6_nlmsg_size(struct fib6_info *rt);
d4ead6b3 107static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 108 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 109 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
110 int iif, int type, u32 portid, u32 seq,
111 unsigned int flags);
8d1c802b 112static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
113 struct in6_addr *daddr,
114 struct in6_addr *saddr);
1da177e4 115
70ceb4f5 116#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 117static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 118 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
119 const struct in6_addr *gwaddr,
120 struct net_device *dev,
95c96174 121 unsigned int pref);
8d1c802b 122static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 123 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
124 const struct in6_addr *gwaddr,
125 struct net_device *dev);
70ceb4f5
YH
126#endif
127
8d0b94af
MKL
128struct uncached_list {
129 spinlock_t lock;
130 struct list_head head;
131};
132
133static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
134
510c321b 135void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
136{
137 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
138
8d0b94af
MKL
139 rt->rt6i_uncached_list = ul;
140
141 spin_lock_bh(&ul->lock);
142 list_add_tail(&rt->rt6i_uncached, &ul->head);
143 spin_unlock_bh(&ul->lock);
144}
145
510c321b 146void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
147{
148 if (!list_empty(&rt->rt6i_uncached)) {
149 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 150 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
151
152 spin_lock_bh(&ul->lock);
153 list_del(&rt->rt6i_uncached);
81eb8447 154 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
155 spin_unlock_bh(&ul->lock);
156 }
157}
158
159static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
160{
161 struct net_device *loopback_dev = net->loopback_dev;
162 int cpu;
163
e332bc67
EB
164 if (dev == loopback_dev)
165 return;
166
8d0b94af
MKL
167 for_each_possible_cpu(cpu) {
168 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
169 struct rt6_info *rt;
170
171 spin_lock_bh(&ul->lock);
172 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
173 struct inet6_dev *rt_idev = rt->rt6i_idev;
174 struct net_device *rt_dev = rt->dst.dev;
175
e332bc67 176 if (rt_idev->dev == dev) {
8d0b94af
MKL
177 rt->rt6i_idev = in6_dev_get(loopback_dev);
178 in6_dev_put(rt_idev);
179 }
180
e332bc67 181 if (rt_dev == dev) {
8d0b94af
MKL
182 rt->dst.dev = loopback_dev;
183 dev_hold(rt->dst.dev);
184 dev_put(rt_dev);
185 }
186 }
187 spin_unlock_bh(&ul->lock);
188 }
189}
190
f8a1b43b 191static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
192 struct sk_buff *skb,
193 const void *daddr)
39232973 194{
a7563f34 195 if (!ipv6_addr_any(p))
39232973 196 return (const void *) p;
f894cbf8
DM
197 else if (skb)
198 return &ipv6_hdr(skb)->daddr;
39232973
DM
199 return daddr;
200}
201
f8a1b43b
DA
202struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
203 struct net_device *dev,
204 struct sk_buff *skb,
205 const void *daddr)
d3aaeb38 206{
39232973
DM
207 struct neighbour *n;
208
f8a1b43b
DA
209 daddr = choose_neigh_daddr(gw, skb, daddr);
210 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
211 if (n)
212 return n;
7adf3246
SB
213
214 n = neigh_create(&nd_tbl, daddr, dev);
215 return IS_ERR(n) ? NULL : n;
f8a1b43b
DA
216}
217
218static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
219 struct sk_buff *skb,
220 const void *daddr)
221{
222 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
223
224 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
225}
226
63fca65d
JA
227static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
228{
229 struct net_device *dev = dst->dev;
230 struct rt6_info *rt = (struct rt6_info *)dst;
231
f8a1b43b 232 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
233 if (!daddr)
234 return;
235 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
236 return;
237 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
238 return;
239 __ipv6_confirm_neigh(dev, daddr);
240}
241
9a7ec3a9 242static struct dst_ops ip6_dst_ops_template = {
1da177e4 243 .family = AF_INET6,
1da177e4
LT
244 .gc = ip6_dst_gc,
245 .gc_thresh = 1024,
246 .check = ip6_dst_check,
0dbaee3b 247 .default_advmss = ip6_default_advmss,
ebb762f2 248 .mtu = ip6_mtu,
d4ead6b3 249 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
250 .destroy = ip6_dst_destroy,
251 .ifdown = ip6_dst_ifdown,
252 .negative_advice = ip6_negative_advice,
253 .link_failure = ip6_link_failure,
254 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 255 .redirect = rt6_do_redirect,
9f8955cc 256 .local_out = __ip6_local_out,
f8a1b43b 257 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 258 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
259};
260
ebb762f2 261static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 262{
618f9bc7
SK
263 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
264
265 return mtu ? : dst->dev->mtu;
ec831ea7
RD
266}
267
6700c270
DM
268static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
269 struct sk_buff *skb, u32 mtu)
14e50e57
DM
270{
271}
272
6700c270
DM
273static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
274 struct sk_buff *skb)
b587ee3b
DM
275{
276}
277
14e50e57
DM
278static struct dst_ops ip6_dst_blackhole_ops = {
279 .family = AF_INET6,
14e50e57
DM
280 .destroy = ip6_dst_destroy,
281 .check = ip6_dst_check,
ebb762f2 282 .mtu = ip6_blackhole_mtu,
214f45c9 283 .default_advmss = ip6_default_advmss,
14e50e57 284 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 285 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 286 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 287 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
288};
289
62fa8a84 290static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 291 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
292};
293
8d1c802b 294static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
295 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
296 .fib6_protocol = RTPROT_KERNEL,
297 .fib6_metric = ~(u32)0,
298 .fib6_ref = ATOMIC_INIT(1),
421842ed
DA
299 .fib6_type = RTN_UNREACHABLE,
300 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
301};
302
fb0af4c7 303static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
304 .dst = {
305 .__refcnt = ATOMIC_INIT(1),
306 .__use = 1,
2c20cbd7 307 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 308 .error = -ENETUNREACH,
d8d1f30b
CG
309 .input = ip6_pkt_discard,
310 .output = ip6_pkt_discard_out,
1da177e4
LT
311 },
312 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
313};
314
101367c2
TG
315#ifdef CONFIG_IPV6_MULTIPLE_TABLES
316
fb0af4c7 317static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
318 .dst = {
319 .__refcnt = ATOMIC_INIT(1),
320 .__use = 1,
2c20cbd7 321 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 322 .error = -EACCES,
d8d1f30b
CG
323 .input = ip6_pkt_prohibit,
324 .output = ip6_pkt_prohibit_out,
101367c2
TG
325 },
326 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
327};
328
fb0af4c7 329static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
330 .dst = {
331 .__refcnt = ATOMIC_INIT(1),
332 .__use = 1,
2c20cbd7 333 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 334 .error = -EINVAL,
d8d1f30b 335 .input = dst_discard,
ede2059d 336 .output = dst_discard_out,
101367c2
TG
337 },
338 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
339};
340
341#endif
342
ebfa45f0
MKL
343static void rt6_info_init(struct rt6_info *rt)
344{
345 struct dst_entry *dst = &rt->dst;
346
347 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
ebfa45f0
MKL
348 INIT_LIST_HEAD(&rt->rt6i_uncached);
349}
350
1da177e4 351/* allocate dst with ip6_dst_ops */
93531c67
DA
352struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
353 int flags)
1da177e4 354{
97bab73f 355 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 356 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 357
81eb8447 358 if (rt) {
ebfa45f0 359 rt6_info_init(rt);
81eb8447
WW
360 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
361 }
8104891b 362
cf911662 363 return rt;
1da177e4 364}
9ab179d8 365EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 366
1da177e4
LT
367static void ip6_dst_destroy(struct dst_entry *dst)
368{
369 struct rt6_info *rt = (struct rt6_info *)dst;
a68886a6 370 struct fib6_info *from;
8d0b94af 371 struct inet6_dev *idev;
1da177e4 372
1620a336 373 ip_dst_metrics_put(dst);
8d0b94af
MKL
374 rt6_uncached_list_del(rt);
375
376 idev = rt->rt6i_idev;
38308473 377 if (idev) {
1da177e4
LT
378 rt->rt6i_idev = NULL;
379 in6_dev_put(idev);
1ab1457c 380 }
1716a961 381
a68886a6
DA
382 rcu_read_lock();
383 from = rcu_dereference(rt->from);
384 rcu_assign_pointer(rt->from, NULL);
93531c67 385 fib6_info_release(from);
a68886a6 386 rcu_read_unlock();
b3419363
DM
387}
388
1da177e4
LT
389static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
390 int how)
391{
392 struct rt6_info *rt = (struct rt6_info *)dst;
393 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 394 struct net_device *loopback_dev =
c346dca1 395 dev_net(dev)->loopback_dev;
1da177e4 396
e5645f51
WW
397 if (idev && idev->dev != loopback_dev) {
398 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
399 if (loopback_idev) {
400 rt->rt6i_idev = loopback_idev;
401 in6_dev_put(idev);
97cac082 402 }
1da177e4
LT
403 }
404}
405
5973fb1e
MKL
406static bool __rt6_check_expired(const struct rt6_info *rt)
407{
408 if (rt->rt6i_flags & RTF_EXPIRES)
409 return time_after(jiffies, rt->dst.expires);
410 else
411 return false;
412}
413
a50feda5 414static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 415{
a68886a6
DA
416 struct fib6_info *from;
417
418 from = rcu_dereference(rt->from);
419
1716a961
G
420 if (rt->rt6i_flags & RTF_EXPIRES) {
421 if (time_after(jiffies, rt->dst.expires))
a50feda5 422 return true;
a68886a6 423 } else if (from) {
1e2ea8ad 424 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
a68886a6 425 fib6_check_expired(from);
1716a961 426 }
a50feda5 427 return false;
1da177e4
LT
428}
429
3b290a31
DA
430struct fib6_info *fib6_multipath_select(const struct net *net,
431 struct fib6_info *match,
432 struct flowi6 *fl6, int oif,
433 const struct sk_buff *skb,
434 int strict)
51ebd318 435{
8d1c802b 436 struct fib6_info *sibling, *next_sibling;
51ebd318 437
b673d6cc
JS
438 /* We might have already computed the hash for ICMPv6 errors. In such
439 * case it will always be non-zero. Otherwise now is the time to do it.
440 */
441 if (!fl6->mp_hash)
b4bac172 442 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 443
5e670d84 444 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
3d709f69
IS
445 return match;
446
93c2fb25
DA
447 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
448 fib6_siblings) {
5e670d84
DA
449 int nh_upper_bound;
450
451 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
452 if (fl6->mp_hash > nh_upper_bound)
3d709f69
IS
453 continue;
454 if (rt6_score_route(sibling, oif, strict) < 0)
455 break;
456 match = sibling;
457 break;
458 }
459
51ebd318
ND
460 return match;
461}
462
1da177e4 463/*
66f5d6ce 464 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
465 */
466
8d1c802b
DA
467static inline struct fib6_info *rt6_device_match(struct net *net,
468 struct fib6_info *rt,
b71d1d42 469 const struct in6_addr *saddr,
1da177e4 470 int oif,
d420895e 471 int flags)
1da177e4 472{
8d1c802b 473 struct fib6_info *sprt;
1da177e4 474
5e670d84
DA
475 if (!oif && ipv6_addr_any(saddr) &&
476 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
8067bb8c 477 return rt;
dd3abc4e 478
8fb11a9a 479 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
5e670d84 480 const struct net_device *dev = sprt->fib6_nh.nh_dev;
dd3abc4e 481
5e670d84 482 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
483 continue;
484
dd3abc4e 485 if (oif) {
1da177e4
LT
486 if (dev->ifindex == oif)
487 return sprt;
dd3abc4e
YH
488 } else {
489 if (ipv6_chk_addr(net, saddr, dev,
490 flags & RT6_LOOKUP_F_IFACE))
491 return sprt;
1da177e4 492 }
dd3abc4e 493 }
1da177e4 494
eea68cd3
DA
495 if (oif && flags & RT6_LOOKUP_F_IFACE)
496 return net->ipv6.fib6_null_entry;
8067bb8c 497
421842ed 498 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
1da177e4
LT
499}
500
27097255 501#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
502struct __rt6_probe_work {
503 struct work_struct work;
504 struct in6_addr target;
505 struct net_device *dev;
506};
507
508static void rt6_probe_deferred(struct work_struct *w)
509{
510 struct in6_addr mcaddr;
511 struct __rt6_probe_work *work =
512 container_of(w, struct __rt6_probe_work, work);
513
514 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 515 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 516 dev_put(work->dev);
662f5533 517 kfree(work);
c2f17e82
HFS
518}
519
8d1c802b 520static void rt6_probe(struct fib6_info *rt)
27097255 521{
f547fac6 522 struct __rt6_probe_work *work = NULL;
5e670d84 523 const struct in6_addr *nh_gw;
f2c31e32 524 struct neighbour *neigh;
5e670d84 525 struct net_device *dev;
f547fac6 526 struct inet6_dev *idev;
5e670d84 527
27097255
YH
528 /*
529 * Okay, this does not seem to be appropriate
530 * for now, however, we need to check if it
531 * is really so; aka Router Reachability Probing.
532 *
533 * Router Reachability Probe MUST be rate-limited
534 * to no more than one per minute.
535 */
93c2fb25 536 if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
7ff74a59 537 return;
5e670d84
DA
538
539 nh_gw = &rt->fib6_nh.nh_gw;
540 dev = rt->fib6_nh.nh_dev;
2152caea 541 rcu_read_lock_bh();
f547fac6 542 idev = __in6_dev_get(dev);
5e670d84 543 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 544 if (neigh) {
8d6c31bf
MKL
545 if (neigh->nud_state & NUD_VALID)
546 goto out;
547
2152caea 548 write_lock(&neigh->lock);
990edb42
MKL
549 if (!(neigh->nud_state & NUD_VALID) &&
550 time_after(jiffies,
dcd1f572 551 neigh->updated + idev->cnf.rtr_probe_interval)) {
990edb42
MKL
552 work = kmalloc(sizeof(*work), GFP_ATOMIC);
553 if (work)
554 __neigh_set_probe_once(neigh);
c2f17e82 555 }
2152caea 556 write_unlock(&neigh->lock);
f547fac6
SD
557 } else if (time_after(jiffies, rt->last_probe +
558 idev->cnf.rtr_probe_interval)) {
990edb42 559 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 560 }
990edb42
MKL
561
562 if (work) {
f547fac6 563 rt->last_probe = jiffies;
990edb42 564 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
565 work->target = *nh_gw;
566 dev_hold(dev);
567 work->dev = dev;
990edb42
MKL
568 schedule_work(&work->work);
569 }
570
8d6c31bf 571out:
2152caea 572 rcu_read_unlock_bh();
27097255
YH
573}
574#else
8d1c802b 575static inline void rt6_probe(struct fib6_info *rt)
27097255 576{
27097255
YH
577}
578#endif
579
1da177e4 580/*
554cfb7e 581 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 582 */
8d1c802b 583static inline int rt6_check_dev(struct fib6_info *rt, int oif)
554cfb7e 584{
5e670d84
DA
585 const struct net_device *dev = rt->fib6_nh.nh_dev;
586
161980f4 587 if (!oif || dev->ifindex == oif)
554cfb7e 588 return 2;
161980f4 589 return 0;
554cfb7e 590}
1da177e4 591
8d1c802b 592static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
1da177e4 593{
afc154e9 594 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 595 struct neighbour *neigh;
f2c31e32 596
93c2fb25
DA
597 if (rt->fib6_flags & RTF_NONEXTHOP ||
598 !(rt->fib6_flags & RTF_GATEWAY))
afc154e9 599 return RT6_NUD_SUCCEED;
145a3621
YH
600
601 rcu_read_lock_bh();
5e670d84
DA
602 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
603 &rt->fib6_nh.nh_gw);
145a3621
YH
604 if (neigh) {
605 read_lock(&neigh->lock);
554cfb7e 606 if (neigh->nud_state & NUD_VALID)
afc154e9 607 ret = RT6_NUD_SUCCEED;
398bcbeb 608#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 609 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 610 ret = RT6_NUD_SUCCEED;
7e980569
JB
611 else
612 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 613#endif
145a3621 614 read_unlock(&neigh->lock);
afc154e9
HFS
615 } else {
616 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 617 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 618 }
145a3621
YH
619 rcu_read_unlock_bh();
620
a5a81f0b 621 return ret;
1da177e4
LT
622}
623
8d1c802b 624static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
1da177e4 625{
a5a81f0b 626 int m;
1ab1457c 627
4d0c5911 628 m = rt6_check_dev(rt, oif);
77d16f45 629 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 630 return RT6_NUD_FAIL_HARD;
ebacaaa0 631#ifdef CONFIG_IPV6_ROUTER_PREF
93c2fb25 632 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
ebacaaa0 633#endif
afc154e9
HFS
634 if (strict & RT6_LOOKUP_F_REACHABLE) {
635 int n = rt6_check_neigh(rt);
636 if (n < 0)
637 return n;
638 }
554cfb7e
YH
639 return m;
640}
641
dcd1f572
DA
642/* called with rc_read_lock held */
643static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
644{
645 const struct net_device *dev = fib6_info_nh_dev(f6i);
646 bool rc = false;
647
648 if (dev) {
649 const struct inet6_dev *idev = __in6_dev_get(dev);
650
651 rc = !!idev->cnf.ignore_routes_with_linkdown;
652 }
653
654 return rc;
655}
656
8d1c802b
DA
657static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
658 int *mpri, struct fib6_info *match,
afc154e9 659 bool *do_rr)
554cfb7e 660{
f11e6659 661 int m;
afc154e9 662 bool match_do_rr = false;
35103d11 663
5e670d84 664 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
665 goto out;
666
dcd1f572 667 if (fib6_ignore_linkdown(rt) &&
5e670d84 668 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 669 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 670 goto out;
f11e6659 671
14895687 672 if (fib6_check_expired(rt))
f11e6659
DM
673 goto out;
674
675 m = rt6_score_route(rt, oif, strict);
7e980569 676 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
677 match_do_rr = true;
678 m = 0; /* lowest valid score */
7e980569 679 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 680 goto out;
afc154e9
HFS
681 }
682
683 if (strict & RT6_LOOKUP_F_REACHABLE)
684 rt6_probe(rt);
f11e6659 685
7e980569 686 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 687 if (m > *mpri) {
afc154e9 688 *do_rr = match_do_rr;
f11e6659
DM
689 *mpri = m;
690 match = rt;
f11e6659 691 }
f11e6659
DM
692out:
693 return match;
694}
695
8d1c802b
DA
696static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
697 struct fib6_info *leaf,
698 struct fib6_info *rr_head,
afc154e9
HFS
699 u32 metric, int oif, int strict,
700 bool *do_rr)
f11e6659 701{
8d1c802b 702 struct fib6_info *rt, *match, *cont;
554cfb7e 703 int mpri = -1;
1da177e4 704
f11e6659 705 match = NULL;
9fbdcfaf 706 cont = NULL;
8fb11a9a 707 for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 708 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
709 cont = rt;
710 break;
711 }
712
713 match = find_match(rt, oif, strict, &mpri, match, do_rr);
714 }
715
66f5d6ce 716 for (rt = leaf; rt && rt != rr_head;
8fb11a9a 717 rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 718 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
719 cont = rt;
720 break;
721 }
722
afc154e9 723 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
724 }
725
726 if (match || !cont)
727 return match;
728
8fb11a9a 729 for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
afc154e9 730 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 731
f11e6659
DM
732 return match;
733}
1da177e4 734
8d1c802b 735static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
8d1040e8 736 int oif, int strict)
f11e6659 737{
8d1c802b
DA
738 struct fib6_info *leaf = rcu_dereference(fn->leaf);
739 struct fib6_info *match, *rt0;
afc154e9 740 bool do_rr = false;
17ecf590 741 int key_plen;
1da177e4 742
421842ed
DA
743 if (!leaf || leaf == net->ipv6.fib6_null_entry)
744 return net->ipv6.fib6_null_entry;
8d1040e8 745
66f5d6ce 746 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 747 if (!rt0)
66f5d6ce 748 rt0 = leaf;
1da177e4 749
17ecf590
WW
750 /* Double check to make sure fn is not an intermediate node
751 * and fn->leaf does not points to its child's leaf
752 * (This might happen if all routes under fn are deleted from
753 * the tree and fib6_repair_tree() is called on the node.)
754 */
93c2fb25 755 key_plen = rt0->fib6_dst.plen;
17ecf590 756#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
757 if (rt0->fib6_src.plen)
758 key_plen = rt0->fib6_src.plen;
17ecf590
WW
759#endif
760 if (fn->fn_bit != key_plen)
421842ed 761 return net->ipv6.fib6_null_entry;
17ecf590 762
93c2fb25 763 match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
afc154e9 764 &do_rr);
1da177e4 765
afc154e9 766 if (do_rr) {
8fb11a9a 767 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
f11e6659 768
554cfb7e 769 /* no entries matched; do round-robin */
93c2fb25 770 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 771 next = leaf;
f11e6659 772
66f5d6ce 773 if (next != rt0) {
93c2fb25 774 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 775 /* make sure next is not being deleted from the tree */
93c2fb25 776 if (next->fib6_node)
66f5d6ce 777 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 778 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 779 }
1da177e4 780 }
1da177e4 781
421842ed 782 return match ? match : net->ipv6.fib6_null_entry;
1da177e4
LT
783}
784
8d1c802b 785static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
8b9df265 786{
93c2fb25 787 return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
8b9df265
MKL
788}
789
70ceb4f5
YH
790#ifdef CONFIG_IPV6_ROUTE_INFO
791int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 792 const struct in6_addr *gwaddr)
70ceb4f5 793{
c346dca1 794 struct net *net = dev_net(dev);
70ceb4f5
YH
795 struct route_info *rinfo = (struct route_info *) opt;
796 struct in6_addr prefix_buf, *prefix;
797 unsigned int pref;
4bed72e4 798 unsigned long lifetime;
8d1c802b 799 struct fib6_info *rt;
70ceb4f5
YH
800
801 if (len < sizeof(struct route_info)) {
802 return -EINVAL;
803 }
804
805 /* Sanity check for prefix_len and length */
806 if (rinfo->length > 3) {
807 return -EINVAL;
808 } else if (rinfo->prefix_len > 128) {
809 return -EINVAL;
810 } else if (rinfo->prefix_len > 64) {
811 if (rinfo->length < 2) {
812 return -EINVAL;
813 }
814 } else if (rinfo->prefix_len > 0) {
815 if (rinfo->length < 1) {
816 return -EINVAL;
817 }
818 }
819
820 pref = rinfo->route_pref;
821 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 822 return -EINVAL;
70ceb4f5 823
4bed72e4 824 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
825
826 if (rinfo->length == 3)
827 prefix = (struct in6_addr *)rinfo->prefix;
828 else {
829 /* this function is safe */
830 ipv6_addr_prefix(&prefix_buf,
831 (struct in6_addr *)rinfo->prefix,
832 rinfo->prefix_len);
833 prefix = &prefix_buf;
834 }
835
f104a567 836 if (rinfo->prefix_len == 0)
afb1d4b5 837 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
838 else
839 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 840 gwaddr, dev);
70ceb4f5
YH
841
842 if (rt && !lifetime) {
afb1d4b5 843 ip6_del_rt(net, rt);
70ceb4f5
YH
844 rt = NULL;
845 }
846
847 if (!rt && lifetime)
830218c1
DA
848 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
849 dev, pref);
70ceb4f5 850 else if (rt)
93c2fb25
DA
851 rt->fib6_flags = RTF_ROUTEINFO |
852 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
853
854 if (rt) {
1716a961 855 if (!addrconf_finite_timeout(lifetime))
14895687 856 fib6_clean_expires(rt);
1716a961 857 else
14895687 858 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 859
93531c67 860 fib6_info_release(rt);
70ceb4f5
YH
861 }
862 return 0;
863}
864#endif
865
ae90d867
DA
866/*
867 * Misc support functions
868 */
869
870/* called with rcu_lock held */
8d1c802b 871static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
ae90d867 872{
5e670d84 873 struct net_device *dev = rt->fib6_nh.nh_dev;
ae90d867 874
93c2fb25 875 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
876 /* for copies of local routes, dst->dev needs to be the
877 * device if it is a master device, the master device if
878 * device is enslaved, and the loopback as the default
879 */
880 if (netif_is_l3_slave(dev) &&
93c2fb25 881 !rt6_need_strict(&rt->fib6_dst.addr))
ae90d867
DA
882 dev = l3mdev_master_dev_rcu(dev);
883 else if (!netif_is_l3_master(dev))
884 dev = dev_net(dev)->loopback_dev;
885 /* last case is netif_is_l3_master(dev) is true in which
886 * case we want dev returned to be dev
887 */
888 }
889
890 return dev;
891}
892
6edb3c96
DA
893static const int fib6_prop[RTN_MAX + 1] = {
894 [RTN_UNSPEC] = 0,
895 [RTN_UNICAST] = 0,
896 [RTN_LOCAL] = 0,
897 [RTN_BROADCAST] = 0,
898 [RTN_ANYCAST] = 0,
899 [RTN_MULTICAST] = 0,
900 [RTN_BLACKHOLE] = -EINVAL,
901 [RTN_UNREACHABLE] = -EHOSTUNREACH,
902 [RTN_PROHIBIT] = -EACCES,
903 [RTN_THROW] = -EAGAIN,
904 [RTN_NAT] = -EINVAL,
905 [RTN_XRESOLVE] = -EINVAL,
906};
907
908static int ip6_rt_type_to_error(u8 fib6_type)
909{
910 return fib6_prop[fib6_type];
911}
912
8d1c802b 913static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
914{
915 unsigned short flags = 0;
916
917 if (rt->dst_nocount)
918 flags |= DST_NOCOUNT;
919 if (rt->dst_nopolicy)
920 flags |= DST_NOPOLICY;
921 if (rt->dst_host)
922 flags |= DST_HOST;
923
924 return flags;
925}
926
8d1c802b 927static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96
DA
928{
929 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
930
931 switch (ort->fib6_type) {
932 case RTN_BLACKHOLE:
933 rt->dst.output = dst_discard_out;
934 rt->dst.input = dst_discard;
935 break;
936 case RTN_PROHIBIT:
937 rt->dst.output = ip6_pkt_prohibit_out;
938 rt->dst.input = ip6_pkt_prohibit;
939 break;
940 case RTN_THROW:
941 case RTN_UNREACHABLE:
942 default:
943 rt->dst.output = ip6_pkt_discard_out;
944 rt->dst.input = ip6_pkt_discard;
945 break;
946 }
947}
948
8d1c802b 949static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96 950{
93c2fb25 951 if (ort->fib6_flags & RTF_REJECT) {
6edb3c96
DA
952 ip6_rt_init_dst_reject(rt, ort);
953 return;
954 }
955
956 rt->dst.error = 0;
957 rt->dst.output = ip6_output;
958
d23c4b63 959 if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
6edb3c96 960 rt->dst.input = ip6_input;
93c2fb25 961 } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
962 rt->dst.input = ip6_mc_input;
963 } else {
964 rt->dst.input = ip6_forward;
965 }
966
967 if (ort->fib6_nh.nh_lwtstate) {
968 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
969 lwtunnel_set_redirect(&rt->dst);
970 }
971
972 rt->dst.lastuse = jiffies;
973}
974
e873e4b9 975/* Caller must already hold reference to @from */
8d1c802b 976static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 977{
ae90d867 978 rt->rt6i_flags &= ~RTF_EXPIRES;
a68886a6 979 rcu_assign_pointer(rt->from, from);
e1255ed4 980 ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
ae90d867
DA
981}
982
e873e4b9 983/* Caller must already hold reference to @ort */
8d1c802b 984static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
ae90d867 985{
dcd1f572
DA
986 struct net_device *dev = fib6_info_nh_dev(ort);
987
6edb3c96
DA
988 ip6_rt_init_dst(rt, ort);
989
93c2fb25 990 rt->rt6i_dst = ort->fib6_dst;
dcd1f572 991 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
5e670d84 992 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
93c2fb25 993 rt->rt6i_flags = ort->fib6_flags;
ae90d867 994 rt6_set_from(rt, ort);
ae90d867 995#ifdef CONFIG_IPV6_SUBTREES
93c2fb25 996 rt->rt6i_src = ort->fib6_src;
ae90d867 997#endif
ae90d867
DA
998}
999
a3c00e46
MKL
1000static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1001 struct in6_addr *saddr)
1002{
66f5d6ce 1003 struct fib6_node *pn, *sn;
a3c00e46
MKL
1004 while (1) {
1005 if (fn->fn_flags & RTN_TL_ROOT)
1006 return NULL;
66f5d6ce
WW
1007 pn = rcu_dereference(fn->parent);
1008 sn = FIB6_SUBTREE(pn);
1009 if (sn && sn != fn)
6454743b 1010 fn = fib6_node_lookup(sn, NULL, saddr);
a3c00e46
MKL
1011 else
1012 fn = pn;
1013 if (fn->fn_flags & RTN_RTINFO)
1014 return fn;
1015 }
1016}
c71099ac 1017
10585b43 1018static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
d3843fe5
WW
1019{
1020 struct rt6_info *rt = *prt;
1021
1022 if (dst_hold_safe(&rt->dst))
1023 return true;
10585b43 1024 if (net) {
d3843fe5
WW
1025 rt = net->ipv6.ip6_null_entry;
1026 dst_hold(&rt->dst);
1027 } else {
1028 rt = NULL;
1029 }
1030 *prt = rt;
1031 return false;
1032}
1033
dec9b0e2 1034/* called with rcu_lock held */
8d1c802b 1035static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
dec9b0e2 1036{
3b6761d1 1037 unsigned short flags = fib6_info_dst_flags(rt);
dec9b0e2
DA
1038 struct net_device *dev = rt->fib6_nh.nh_dev;
1039 struct rt6_info *nrt;
1040
e873e4b9 1041 if (!fib6_info_hold_safe(rt))
1c87e79a 1042 goto fallback;
e873e4b9 1043
93531c67 1044 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1c87e79a 1045 if (!nrt) {
e873e4b9 1046 fib6_info_release(rt);
1c87e79a
XL
1047 goto fallback;
1048 }
dec9b0e2 1049
1c87e79a
XL
1050 ip6_rt_copy_init(nrt, rt);
1051 return nrt;
1052
1053fallback:
1054 nrt = dev_net(dev)->ipv6.ip6_null_entry;
1055 dst_hold(&nrt->dst);
dec9b0e2
DA
1056 return nrt;
1057}
1058
8ed67789
DL
1059static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1060 struct fib6_table *table,
b75cc8f9
DA
1061 struct flowi6 *fl6,
1062 const struct sk_buff *skb,
1063 int flags)
1da177e4 1064{
8d1c802b 1065 struct fib6_info *f6i;
1da177e4 1066 struct fib6_node *fn;
23fb93a4 1067 struct rt6_info *rt;
1da177e4 1068
b6cdbc85
DA
1069 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1070 flags &= ~RT6_LOOKUP_F_IFACE;
1071
66f5d6ce 1072 rcu_read_lock();
6454743b 1073 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1074restart:
23fb93a4
DA
1075 f6i = rcu_dereference(fn->leaf);
1076 if (!f6i) {
1077 f6i = net->ipv6.fib6_null_entry;
66f5d6ce 1078 } else {
23fb93a4 1079 f6i = rt6_device_match(net, f6i, &fl6->saddr,
66f5d6ce 1080 fl6->flowi6_oif, flags);
93c2fb25 1081 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
3b290a31
DA
1082 f6i = fib6_multipath_select(net, f6i, fl6,
1083 fl6->flowi6_oif, skb,
1084 flags);
66f5d6ce 1085 }
23fb93a4 1086 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1087 fn = fib6_backtrack(fn, &fl6->saddr);
1088 if (fn)
1089 goto restart;
1090 }
2b760fcf 1091
d4bea421 1092 trace_fib6_table_lookup(net, f6i, table, fl6);
d3843fe5 1093
2b760fcf 1094 /* Search through exception table */
23fb93a4
DA
1095 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1096 if (rt) {
10585b43 1097 if (ip6_hold_safe(net, &rt))
dec9b0e2 1098 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1099 } else if (f6i == net->ipv6.fib6_null_entry) {
dec9b0e2
DA
1100 rt = net->ipv6.ip6_null_entry;
1101 dst_hold(&rt->dst);
23fb93a4
DA
1102 } else {
1103 rt = ip6_create_rt_rcu(f6i);
dec9b0e2 1104 }
b811580d 1105
66f5d6ce 1106 rcu_read_unlock();
b811580d 1107
c71099ac 1108 return rt;
c71099ac
TG
1109}
1110
67ba4152 1111struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1112 const struct sk_buff *skb, int flags)
ea6e574e 1113{
b75cc8f9 1114 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1115}
1116EXPORT_SYMBOL_GPL(ip6_route_lookup);
1117
9acd9f3a 1118struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1119 const struct in6_addr *saddr, int oif,
1120 const struct sk_buff *skb, int strict)
c71099ac 1121{
4c9483b2
DM
1122 struct flowi6 fl6 = {
1123 .flowi6_oif = oif,
1124 .daddr = *daddr,
c71099ac
TG
1125 };
1126 struct dst_entry *dst;
77d16f45 1127 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1128
adaa70bb 1129 if (saddr) {
4c9483b2 1130 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1131 flags |= RT6_LOOKUP_F_HAS_SADDR;
1132 }
1133
b75cc8f9 1134 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1135 if (dst->error == 0)
1136 return (struct rt6_info *) dst;
1137
1138 dst_release(dst);
1139
1da177e4
LT
1140 return NULL;
1141}
7159039a
YH
1142EXPORT_SYMBOL(rt6_lookup);
1143
c71099ac 1144/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1145 * It takes new route entry, the addition fails by any reason the
1146 * route is released.
1147 * Caller must hold dst before calling it.
1da177e4
LT
1148 */
1149
8d1c802b 1150static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1151 struct netlink_ext_ack *extack)
1da177e4
LT
1152{
1153 int err;
c71099ac 1154 struct fib6_table *table;
1da177e4 1155
93c2fb25 1156 table = rt->fib6_table;
66f5d6ce 1157 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1158 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1159 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1160
1161 return err;
1162}
1163
8d1c802b 1164int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1165{
afb1d4b5 1166 struct nl_info info = { .nl_net = net, };
e715b6d3 1167
d4ead6b3 1168 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1169}
1170
8d1c802b 1171static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
8b9df265
MKL
1172 const struct in6_addr *daddr,
1173 const struct in6_addr *saddr)
1da177e4 1174{
4832c30d 1175 struct net_device *dev;
1da177e4
LT
1176 struct rt6_info *rt;
1177
1178 /*
1179 * Clone the route.
1180 */
1181
e873e4b9
WW
1182 if (!fib6_info_hold_safe(ort))
1183 return NULL;
1184
4832c30d 1185 dev = ip6_rt_get_dev_rcu(ort);
93531c67 1186 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
e873e4b9
WW
1187 if (!rt) {
1188 fib6_info_release(ort);
83a09abd 1189 return NULL;
e873e4b9 1190 }
83a09abd
MKL
1191
1192 ip6_rt_copy_init(rt, ort);
1193 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1194 rt->dst.flags |= DST_HOST;
1195 rt->rt6i_dst.addr = *daddr;
1196 rt->rt6i_dst.plen = 128;
1da177e4 1197
83a09abd 1198 if (!rt6_is_gw_or_nonexthop(ort)) {
93c2fb25
DA
1199 if (ort->fib6_dst.plen != 128 &&
1200 ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
83a09abd 1201 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1202#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1203 if (rt->rt6i_src.plen && saddr) {
1204 rt->rt6i_src.addr = *saddr;
1205 rt->rt6i_src.plen = 128;
8b9df265 1206 }
83a09abd 1207#endif
95a9a5ba 1208 }
1da177e4 1209
95a9a5ba
YH
1210 return rt;
1211}
1da177e4 1212
8d1c802b 1213static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
d52d3997 1214{
3b6761d1 1215 unsigned short flags = fib6_info_dst_flags(rt);
4832c30d 1216 struct net_device *dev;
d52d3997
MKL
1217 struct rt6_info *pcpu_rt;
1218
e873e4b9
WW
1219 if (!fib6_info_hold_safe(rt))
1220 return NULL;
1221
4832c30d
DA
1222 rcu_read_lock();
1223 dev = ip6_rt_get_dev_rcu(rt);
93531c67 1224 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1225 rcu_read_unlock();
e873e4b9
WW
1226 if (!pcpu_rt) {
1227 fib6_info_release(rt);
d52d3997 1228 return NULL;
e873e4b9 1229 }
d52d3997 1230 ip6_rt_copy_init(pcpu_rt, rt);
d52d3997
MKL
1231 pcpu_rt->rt6i_flags |= RTF_PCPU;
1232 return pcpu_rt;
1233}
1234
66f5d6ce 1235/* It should be called with rcu_read_lock() acquired */
8d1c802b 1236static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
d52d3997 1237{
a73e4195 1238 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1239
1240 p = this_cpu_ptr(rt->rt6i_pcpu);
1241 pcpu_rt = *p;
1242
d4ead6b3 1243 if (pcpu_rt)
10585b43 1244 ip6_hold_safe(NULL, &pcpu_rt);
d3843fe5 1245
a73e4195
MKL
1246 return pcpu_rt;
1247}
1248
afb1d4b5 1249static struct rt6_info *rt6_make_pcpu_route(struct net *net,
8d1c802b 1250 struct fib6_info *rt)
a73e4195
MKL
1251{
1252 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1253
1254 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1255 if (!pcpu_rt) {
9c7370a1
MKL
1256 dst_hold(&net->ipv6.ip6_null_entry->dst);
1257 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1258 }
1259
a94b9367
WW
1260 dst_hold(&pcpu_rt->dst);
1261 p = this_cpu_ptr(rt->rt6i_pcpu);
1262 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1263 BUG_ON(prev);
a94b9367 1264
d52d3997
MKL
1265 return pcpu_rt;
1266}
1267
35732d01
WW
1268/* exception hash table implementation
1269 */
1270static DEFINE_SPINLOCK(rt6_exception_lock);
1271
1272/* Remove rt6_ex from hash table and free the memory
1273 * Caller must hold rt6_exception_lock
1274 */
1275static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1276 struct rt6_exception *rt6_ex)
1277{
f5b51fe8 1278 struct fib6_info *from;
b2427e67 1279 struct net *net;
81eb8447 1280
35732d01
WW
1281 if (!bucket || !rt6_ex)
1282 return;
b2427e67
CIK
1283
1284 net = dev_net(rt6_ex->rt6i->dst.dev);
f5b51fe8
PA
1285 net->ipv6.rt6_stats->fib_rt_cache--;
1286
1287 /* purge completely the exception to allow releasing the held resources:
1288 * some [sk] cache may keep the dst around for unlimited time
1289 */
1290 from = rcu_dereference_protected(rt6_ex->rt6i->from,
1291 lockdep_is_held(&rt6_exception_lock));
1292 rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
1293 fib6_info_release(from);
1294 dst_dev_put(&rt6_ex->rt6i->dst);
1295
35732d01 1296 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1297 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1298 kfree_rcu(rt6_ex, rcu);
1299 WARN_ON_ONCE(!bucket->depth);
1300 bucket->depth--;
1301}
1302
1303/* Remove oldest rt6_ex in bucket and free the memory
1304 * Caller must hold rt6_exception_lock
1305 */
1306static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1307{
1308 struct rt6_exception *rt6_ex, *oldest = NULL;
1309
1310 if (!bucket)
1311 return;
1312
1313 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1314 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1315 oldest = rt6_ex;
1316 }
1317 rt6_remove_exception(bucket, oldest);
1318}
1319
1320static u32 rt6_exception_hash(const struct in6_addr *dst,
1321 const struct in6_addr *src)
1322{
1323 static u32 seed __read_mostly;
1324 u32 val;
1325
1326 net_get_random_once(&seed, sizeof(seed));
1327 val = jhash(dst, sizeof(*dst), seed);
1328
1329#ifdef CONFIG_IPV6_SUBTREES
1330 if (src)
1331 val = jhash(src, sizeof(*src), val);
1332#endif
1333 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1334}
1335
1336/* Helper function to find the cached rt in the hash table
1337 * and update bucket pointer to point to the bucket for this
1338 * (daddr, saddr) pair
1339 * Caller must hold rt6_exception_lock
1340 */
1341static struct rt6_exception *
1342__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1343 const struct in6_addr *daddr,
1344 const struct in6_addr *saddr)
1345{
1346 struct rt6_exception *rt6_ex;
1347 u32 hval;
1348
1349 if (!(*bucket) || !daddr)
1350 return NULL;
1351
1352 hval = rt6_exception_hash(daddr, saddr);
1353 *bucket += hval;
1354
1355 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1356 struct rt6_info *rt6 = rt6_ex->rt6i;
1357 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1358
1359#ifdef CONFIG_IPV6_SUBTREES
1360 if (matched && saddr)
1361 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1362#endif
1363 if (matched)
1364 return rt6_ex;
1365 }
1366 return NULL;
1367}
1368
1369/* Helper function to find the cached rt in the hash table
1370 * and update bucket pointer to point to the bucket for this
1371 * (daddr, saddr) pair
1372 * Caller must hold rcu_read_lock()
1373 */
1374static struct rt6_exception *
1375__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1376 const struct in6_addr *daddr,
1377 const struct in6_addr *saddr)
1378{
1379 struct rt6_exception *rt6_ex;
1380 u32 hval;
1381
1382 WARN_ON_ONCE(!rcu_read_lock_held());
1383
1384 if (!(*bucket) || !daddr)
1385 return NULL;
1386
1387 hval = rt6_exception_hash(daddr, saddr);
1388 *bucket += hval;
1389
1390 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1391 struct rt6_info *rt6 = rt6_ex->rt6i;
1392 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1393
1394#ifdef CONFIG_IPV6_SUBTREES
1395 if (matched && saddr)
1396 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1397#endif
1398 if (matched)
1399 return rt6_ex;
1400 }
1401 return NULL;
1402}
1403
8d1c802b 1404static unsigned int fib6_mtu(const struct fib6_info *rt)
d4ead6b3
DA
1405{
1406 unsigned int mtu;
1407
dcd1f572
DA
1408 if (rt->fib6_pmtu) {
1409 mtu = rt->fib6_pmtu;
1410 } else {
1411 struct net_device *dev = fib6_info_nh_dev(rt);
1412 struct inet6_dev *idev;
1413
1414 rcu_read_lock();
1415 idev = __in6_dev_get(dev);
1416 mtu = idev->cnf.mtu6;
1417 rcu_read_unlock();
1418 }
1419
d4ead6b3
DA
1420 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1421
1422 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1423}
1424
35732d01 1425static int rt6_insert_exception(struct rt6_info *nrt,
8d1c802b 1426 struct fib6_info *ort)
35732d01 1427{
5e670d84 1428 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1429 struct rt6_exception_bucket *bucket;
1430 struct in6_addr *src_key = NULL;
1431 struct rt6_exception *rt6_ex;
1432 int err = 0;
1433
35732d01
WW
1434 spin_lock_bh(&rt6_exception_lock);
1435
1436 if (ort->exception_bucket_flushed) {
1437 err = -EINVAL;
1438 goto out;
1439 }
1440
1441 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1442 lockdep_is_held(&rt6_exception_lock));
1443 if (!bucket) {
1444 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1445 GFP_ATOMIC);
1446 if (!bucket) {
1447 err = -ENOMEM;
1448 goto out;
1449 }
1450 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1451 }
1452
1453#ifdef CONFIG_IPV6_SUBTREES
1454 /* rt6i_src.plen != 0 indicates ort is in subtree
1455 * and exception table is indexed by a hash of
1456 * both rt6i_dst and rt6i_src.
1457 * Otherwise, the exception table is indexed by
1458 * a hash of only rt6i_dst.
1459 */
93c2fb25 1460 if (ort->fib6_src.plen)
35732d01
WW
1461 src_key = &nrt->rt6i_src.addr;
1462#endif
f5bbe7ee
WW
1463 /* rt6_mtu_change() might lower mtu on ort.
1464 * Only insert this exception route if its mtu
1465 * is less than ort's mtu value.
1466 */
d4ead6b3 1467 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
f5bbe7ee
WW
1468 err = -EINVAL;
1469 goto out;
1470 }
60006a48 1471
35732d01
WW
1472 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1473 src_key);
1474 if (rt6_ex)
1475 rt6_remove_exception(bucket, rt6_ex);
1476
1477 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1478 if (!rt6_ex) {
1479 err = -ENOMEM;
1480 goto out;
1481 }
1482 rt6_ex->rt6i = nrt;
1483 rt6_ex->stamp = jiffies;
35732d01
WW
1484 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1485 bucket->depth++;
81eb8447 1486 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1487
1488 if (bucket->depth > FIB6_MAX_DEPTH)
1489 rt6_exception_remove_oldest(bucket);
1490
1491out:
1492 spin_unlock_bh(&rt6_exception_lock);
1493
1494 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1495 if (!err) {
93c2fb25 1496 spin_lock_bh(&ort->fib6_table->tb6_lock);
7aef6859 1497 fib6_update_sernum(net, ort);
93c2fb25 1498 spin_unlock_bh(&ort->fib6_table->tb6_lock);
b886d5f2
PA
1499 fib6_force_start_gc(net);
1500 }
35732d01
WW
1501
1502 return err;
1503}
1504
8d1c802b 1505void rt6_flush_exceptions(struct fib6_info *rt)
35732d01
WW
1506{
1507 struct rt6_exception_bucket *bucket;
1508 struct rt6_exception *rt6_ex;
1509 struct hlist_node *tmp;
1510 int i;
1511
1512 spin_lock_bh(&rt6_exception_lock);
1513 /* Prevent rt6_insert_exception() to recreate the bucket list */
1514 rt->exception_bucket_flushed = 1;
1515
1516 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1517 lockdep_is_held(&rt6_exception_lock));
1518 if (!bucket)
1519 goto out;
1520
1521 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1522 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1523 rt6_remove_exception(bucket, rt6_ex);
1524 WARN_ON_ONCE(bucket->depth);
1525 bucket++;
1526 }
1527
1528out:
1529 spin_unlock_bh(&rt6_exception_lock);
1530}
1531
1532/* Find cached rt in the hash table inside passed in rt
1533 * Caller has to hold rcu_read_lock()
1534 */
8d1c802b 1535static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
1536 struct in6_addr *daddr,
1537 struct in6_addr *saddr)
1538{
1539 struct rt6_exception_bucket *bucket;
1540 struct in6_addr *src_key = NULL;
1541 struct rt6_exception *rt6_ex;
1542 struct rt6_info *res = NULL;
1543
1544 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1545
1546#ifdef CONFIG_IPV6_SUBTREES
1547 /* rt6i_src.plen != 0 indicates rt is in subtree
1548 * and exception table is indexed by a hash of
1549 * both rt6i_dst and rt6i_src.
1550 * Otherwise, the exception table is indexed by
1551 * a hash of only rt6i_dst.
1552 */
93c2fb25 1553 if (rt->fib6_src.plen)
35732d01
WW
1554 src_key = saddr;
1555#endif
1556 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1557
1558 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1559 res = rt6_ex->rt6i;
1560
1561 return res;
1562}
1563
1564/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1565static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1566{
35732d01
WW
1567 struct rt6_exception_bucket *bucket;
1568 struct in6_addr *src_key = NULL;
1569 struct rt6_exception *rt6_ex;
8a14e46f 1570 struct fib6_info *from;
35732d01
WW
1571 int err;
1572
091311de 1573 from = rcu_dereference(rt->from);
35732d01 1574 if (!from ||
442d713b 1575 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1576 return -EINVAL;
1577
1578 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1579 return -ENOENT;
1580
1581 spin_lock_bh(&rt6_exception_lock);
1582 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1583 lockdep_is_held(&rt6_exception_lock));
1584#ifdef CONFIG_IPV6_SUBTREES
1585 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1586 * and exception table is indexed by a hash of
1587 * both rt6i_dst and rt6i_src.
1588 * Otherwise, the exception table is indexed by
1589 * a hash of only rt6i_dst.
1590 */
93c2fb25 1591 if (from->fib6_src.plen)
35732d01
WW
1592 src_key = &rt->rt6i_src.addr;
1593#endif
1594 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1595 &rt->rt6i_dst.addr,
1596 src_key);
1597 if (rt6_ex) {
1598 rt6_remove_exception(bucket, rt6_ex);
1599 err = 0;
1600 } else {
1601 err = -ENOENT;
1602 }
1603
1604 spin_unlock_bh(&rt6_exception_lock);
1605 return err;
1606}
1607
1608/* Find rt6_ex which contains the passed in rt cache and
1609 * refresh its stamp
1610 */
1611static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1612{
35732d01
WW
1613 struct rt6_exception_bucket *bucket;
1614 struct in6_addr *src_key = NULL;
1615 struct rt6_exception *rt6_ex;
193f3685 1616 struct fib6_info *from;
35732d01
WW
1617
1618 rcu_read_lock();
193f3685
PA
1619 from = rcu_dereference(rt->from);
1620 if (!from || !(rt->rt6i_flags & RTF_CACHE))
1621 goto unlock;
1622
35732d01
WW
1623 bucket = rcu_dereference(from->rt6i_exception_bucket);
1624
1625#ifdef CONFIG_IPV6_SUBTREES
1626 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1627 * and exception table is indexed by a hash of
1628 * both rt6i_dst and rt6i_src.
1629 * Otherwise, the exception table is indexed by
1630 * a hash of only rt6i_dst.
1631 */
93c2fb25 1632 if (from->fib6_src.plen)
35732d01
WW
1633 src_key = &rt->rt6i_src.addr;
1634#endif
1635 rt6_ex = __rt6_find_exception_rcu(&bucket,
1636 &rt->rt6i_dst.addr,
1637 src_key);
1638 if (rt6_ex)
1639 rt6_ex->stamp = jiffies;
1640
193f3685 1641unlock:
35732d01
WW
1642 rcu_read_unlock();
1643}
1644
e9fa1495
SB
1645static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1646 struct rt6_info *rt, int mtu)
1647{
1648 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1649 * lowest MTU in the path: always allow updating the route PMTU to
1650 * reflect PMTU decreases.
1651 *
1652 * If the new MTU is higher, and the route PMTU is equal to the local
1653 * MTU, this means the old MTU is the lowest in the path, so allow
1654 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1655 * handle this.
1656 */
1657
1658 if (dst_mtu(&rt->dst) >= mtu)
1659 return true;
1660
1661 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1662 return true;
1663
1664 return false;
1665}
1666
1667static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
8d1c802b 1668 struct fib6_info *rt, int mtu)
f5bbe7ee
WW
1669{
1670 struct rt6_exception_bucket *bucket;
1671 struct rt6_exception *rt6_ex;
1672 int i;
1673
1674 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1675 lockdep_is_held(&rt6_exception_lock));
1676
e9fa1495
SB
1677 if (!bucket)
1678 return;
1679
1680 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1681 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1682 struct rt6_info *entry = rt6_ex->rt6i;
1683
1684 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1685 * route), the metrics of its rt->from have already
e9fa1495
SB
1686 * been updated.
1687 */
d4ead6b3 1688 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1689 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1690 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1691 }
e9fa1495 1692 bucket++;
f5bbe7ee
WW
1693 }
1694}
1695
b16cb459
WW
1696#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1697
8d1c802b 1698static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
b16cb459
WW
1699 struct in6_addr *gateway)
1700{
1701 struct rt6_exception_bucket *bucket;
1702 struct rt6_exception *rt6_ex;
1703 struct hlist_node *tmp;
1704 int i;
1705
1706 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1707 return;
1708
1709 spin_lock_bh(&rt6_exception_lock);
1710 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1711 lockdep_is_held(&rt6_exception_lock));
1712
1713 if (bucket) {
1714 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1715 hlist_for_each_entry_safe(rt6_ex, tmp,
1716 &bucket->chain, hlist) {
1717 struct rt6_info *entry = rt6_ex->rt6i;
1718
1719 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1720 RTF_CACHE_GATEWAY &&
1721 ipv6_addr_equal(gateway,
1722 &entry->rt6i_gateway)) {
1723 rt6_remove_exception(bucket, rt6_ex);
1724 }
1725 }
1726 bucket++;
1727 }
1728 }
1729
1730 spin_unlock_bh(&rt6_exception_lock);
1731}
1732
c757faa8
WW
1733static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1734 struct rt6_exception *rt6_ex,
1735 struct fib6_gc_args *gc_args,
1736 unsigned long now)
1737{
1738 struct rt6_info *rt = rt6_ex->rt6i;
1739
1859bac0
PA
1740 /* we are pruning and obsoleting aged-out and non gateway exceptions
1741 * even if others have still references to them, so that on next
1742 * dst_check() such references can be dropped.
1743 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1744 * expired, independently from their aging, as per RFC 8201 section 4
1745 */
31afeb42
WW
1746 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1747 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1748 RT6_TRACE("aging clone %p\n", rt);
1749 rt6_remove_exception(bucket, rt6_ex);
1750 return;
1751 }
1752 } else if (time_after(jiffies, rt->dst.expires)) {
1753 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1754 rt6_remove_exception(bucket, rt6_ex);
1755 return;
31afeb42
WW
1756 }
1757
1758 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1759 struct neighbour *neigh;
1760 __u8 neigh_flags = 0;
1761
1bfa26ff
ED
1762 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1763 if (neigh)
c757faa8 1764 neigh_flags = neigh->flags;
1bfa26ff 1765
c757faa8
WW
1766 if (!(neigh_flags & NTF_ROUTER)) {
1767 RT6_TRACE("purging route %p via non-router but gateway\n",
1768 rt);
1769 rt6_remove_exception(bucket, rt6_ex);
1770 return;
1771 }
1772 }
31afeb42 1773
c757faa8
WW
1774 gc_args->more++;
1775}
1776
8d1c802b 1777void rt6_age_exceptions(struct fib6_info *rt,
c757faa8
WW
1778 struct fib6_gc_args *gc_args,
1779 unsigned long now)
1780{
1781 struct rt6_exception_bucket *bucket;
1782 struct rt6_exception *rt6_ex;
1783 struct hlist_node *tmp;
1784 int i;
1785
1786 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1787 return;
1788
1bfa26ff
ED
1789 rcu_read_lock_bh();
1790 spin_lock(&rt6_exception_lock);
c757faa8
WW
1791 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1792 lockdep_is_held(&rt6_exception_lock));
1793
1794 if (bucket) {
1795 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1796 hlist_for_each_entry_safe(rt6_ex, tmp,
1797 &bucket->chain, hlist) {
1798 rt6_age_examine_exception(bucket, rt6_ex,
1799 gc_args, now);
1800 }
1801 bucket++;
1802 }
1803 }
1bfa26ff
ED
1804 spin_unlock(&rt6_exception_lock);
1805 rcu_read_unlock_bh();
c757faa8
WW
1806}
1807
1d053da9
DA
1808/* must be called with rcu lock held */
1809struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1810 int oif, struct flowi6 *fl6, int strict)
1da177e4 1811{
367efcb9 1812 struct fib6_node *fn, *saved_fn;
8d1c802b 1813 struct fib6_info *f6i;
1da177e4 1814
6454743b 1815 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1816 saved_fn = fn;
1da177e4 1817
ca254490
DA
1818 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1819 oif = 0;
1820
a3c00e46 1821redo_rt6_select:
23fb93a4 1822 f6i = rt6_select(net, fn, oif, strict);
23fb93a4 1823 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1824 fn = fib6_backtrack(fn, &fl6->saddr);
1825 if (fn)
1826 goto redo_rt6_select;
367efcb9
MKL
1827 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1828 /* also consider unreachable route */
1829 strict &= ~RT6_LOOKUP_F_REACHABLE;
1830 fn = saved_fn;
1831 goto redo_rt6_select;
367efcb9 1832 }
a3c00e46
MKL
1833 }
1834
d4bea421 1835 trace_fib6_table_lookup(net, f6i, table, fl6);
fb9de91e 1836
1d053da9
DA
1837 return f6i;
1838}
1839
1840struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1841 int oif, struct flowi6 *fl6,
1842 const struct sk_buff *skb, int flags)
1843{
1844 struct fib6_info *f6i;
1845 struct rt6_info *rt;
1846 int strict = 0;
1847
1848 strict |= flags & RT6_LOOKUP_F_IFACE;
1849 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1850 if (net->ipv6.devconf_all->forwarding == 0)
1851 strict |= RT6_LOOKUP_F_REACHABLE;
1852
1853 rcu_read_lock();
1854
1855 f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1856 if (f6i->fib6_nsiblings)
1857 f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
1858
23fb93a4 1859 if (f6i == net->ipv6.fib6_null_entry) {
421842ed 1860 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1861 rcu_read_unlock();
d3843fe5 1862 dst_hold(&rt->dst);
d3843fe5 1863 return rt;
23fb93a4
DA
1864 }
1865
1866 /*Search through exception table */
1867 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1868 if (rt) {
10585b43 1869 if (ip6_hold_safe(net, &rt))
d3843fe5 1870 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1871
66f5d6ce 1872 rcu_read_unlock();
d52d3997 1873 return rt;
3da59bd9 1874 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
93c2fb25 1875 !(f6i->fib6_flags & RTF_GATEWAY))) {
3da59bd9
MKL
1876 /* Create a RTF_CACHE clone which will not be
1877 * owned by the fib6 tree. It is for the special case where
1878 * the daddr in the skb during the neighbor look-up is different
1879 * from the fl6->daddr used to look-up route here.
1880 */
3da59bd9
MKL
1881 struct rt6_info *uncached_rt;
1882
23fb93a4 1883 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
d52d3997 1884
4d85cd0c 1885 rcu_read_unlock();
c71099ac 1886
1cfb71ee
WW
1887 if (uncached_rt) {
1888 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1889 * No need for another dst_hold()
1890 */
8d0b94af 1891 rt6_uncached_list_add(uncached_rt);
81eb8447 1892 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1893 } else {
3da59bd9 1894 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1895 dst_hold(&uncached_rt->dst);
1896 }
b811580d 1897
3da59bd9 1898 return uncached_rt;
d52d3997
MKL
1899 } else {
1900 /* Get a percpu copy */
1901
1902 struct rt6_info *pcpu_rt;
1903
951f788a 1904 local_bh_disable();
23fb93a4 1905 pcpu_rt = rt6_get_pcpu_route(f6i);
d52d3997 1906
93531c67
DA
1907 if (!pcpu_rt)
1908 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1909
951f788a
ED
1910 local_bh_enable();
1911 rcu_read_unlock();
d4bea421 1912
d52d3997
MKL
1913 return pcpu_rt;
1914 }
1da177e4 1915}
9ff74384 1916EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1917
b75cc8f9
DA
1918static struct rt6_info *ip6_pol_route_input(struct net *net,
1919 struct fib6_table *table,
1920 struct flowi6 *fl6,
1921 const struct sk_buff *skb,
1922 int flags)
4acad72d 1923{
b75cc8f9 1924 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1925}
1926
d409b847
MB
1927struct dst_entry *ip6_route_input_lookup(struct net *net,
1928 struct net_device *dev,
b75cc8f9
DA
1929 struct flowi6 *fl6,
1930 const struct sk_buff *skb,
1931 int flags)
72331bc0
SL
1932{
1933 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1934 flags |= RT6_LOOKUP_F_IFACE;
1935
b75cc8f9 1936 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1937}
d409b847 1938EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1939
23aebdac 1940static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1941 struct flow_keys *keys,
1942 struct flow_keys *flkeys)
23aebdac
JS
1943{
1944 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1945 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1946 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1947 const struct ipv6hdr *inner_iph;
1948 const struct icmp6hdr *icmph;
1949 struct ipv6hdr _inner_iph;
cea67a2d 1950 struct icmp6hdr _icmph;
23aebdac
JS
1951
1952 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1953 goto out;
1954
cea67a2d
ED
1955 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1956 sizeof(_icmph), &_icmph);
1957 if (!icmph)
1958 goto out;
1959
23aebdac
JS
1960 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1961 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1962 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1963 icmph->icmp6_type != ICMPV6_PARAMPROB)
1964 goto out;
1965
1966 inner_iph = skb_header_pointer(skb,
1967 skb_transport_offset(skb) + sizeof(*icmph),
1968 sizeof(_inner_iph), &_inner_iph);
1969 if (!inner_iph)
1970 goto out;
1971
1972 key_iph = inner_iph;
5e5d6fed 1973 _flkeys = NULL;
23aebdac 1974out:
5e5d6fed
RP
1975 if (_flkeys) {
1976 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1977 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1978 keys->tags.flow_label = _flkeys->tags.flow_label;
1979 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1980 } else {
1981 keys->addrs.v6addrs.src = key_iph->saddr;
1982 keys->addrs.v6addrs.dst = key_iph->daddr;
fa1be7e0 1983 keys->tags.flow_label = ip6_flowlabel(key_iph);
5e5d6fed
RP
1984 keys->basic.ip_proto = key_iph->nexthdr;
1985 }
23aebdac
JS
1986}
1987
1988/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
1989u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1990 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
1991{
1992 struct flow_keys hash_keys;
9a2a537a 1993 u32 mhash;
23aebdac 1994
bbfa047a 1995 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
1996 case 0:
1997 memset(&hash_keys, 0, sizeof(hash_keys));
1998 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1999 if (skb) {
2000 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2001 } else {
2002 hash_keys.addrs.v6addrs.src = fl6->saddr;
2003 hash_keys.addrs.v6addrs.dst = fl6->daddr;
fa1be7e0 2004 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
b4bac172
DA
2005 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2006 }
2007 break;
2008 case 1:
2009 if (skb) {
2010 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2011 struct flow_keys keys;
2012
2013 /* short-circuit if we already have L4 hash present */
2014 if (skb->l4_hash)
2015 return skb_get_hash_raw(skb) >> 1;
2016
2017 memset(&hash_keys, 0, sizeof(hash_keys));
2018
2019 if (!flkeys) {
2020 skb_flow_dissect_flow_keys(skb, &keys, flag);
2021 flkeys = &keys;
2022 }
2023 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2024 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2025 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2026 hash_keys.ports.src = flkeys->ports.src;
2027 hash_keys.ports.dst = flkeys->ports.dst;
2028 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2029 } else {
2030 memset(&hash_keys, 0, sizeof(hash_keys));
2031 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2032 hash_keys.addrs.v6addrs.src = fl6->saddr;
2033 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2034 hash_keys.ports.src = fl6->fl6_sport;
2035 hash_keys.ports.dst = fl6->fl6_dport;
2036 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2037 }
2038 break;
23aebdac 2039 }
9a2a537a 2040 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2041
9a2a537a 2042 return mhash >> 1;
23aebdac
JS
2043}
2044
c71099ac
TG
2045void ip6_route_input(struct sk_buff *skb)
2046{
b71d1d42 2047 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2048 struct net *net = dev_net(skb->dev);
adaa70bb 2049 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2050 struct ip_tunnel_info *tun_info;
4c9483b2 2051 struct flowi6 fl6 = {
e0d56fdd 2052 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2053 .daddr = iph->daddr,
2054 .saddr = iph->saddr,
6502ca52 2055 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2056 .flowi6_mark = skb->mark,
2057 .flowi6_proto = iph->nexthdr,
c71099ac 2058 };
5e5d6fed 2059 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2060
904af04d 2061 tun_info = skb_tunnel_info(skb);
46fa062a 2062 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2063 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2064
2065 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2066 flkeys = &_flkeys;
2067
23aebdac 2068 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2069 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2070 skb_dst_drop(skb);
b75cc8f9
DA
2071 skb_dst_set(skb,
2072 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2073}
2074
b75cc8f9
DA
2075static struct rt6_info *ip6_pol_route_output(struct net *net,
2076 struct fib6_table *table,
2077 struct flowi6 *fl6,
2078 const struct sk_buff *skb,
2079 int flags)
1da177e4 2080{
b75cc8f9 2081 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2082}
2083
6f21c96a
PA
2084struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2085 struct flowi6 *fl6, int flags)
c71099ac 2086{
d46a9d67 2087 bool any_src;
c71099ac 2088
3ede0bbc
RS
2089 if (ipv6_addr_type(&fl6->daddr) &
2090 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
4c1feac5
DA
2091 struct dst_entry *dst;
2092
2093 dst = l3mdev_link_scope_lookup(net, fl6);
2094 if (dst)
2095 return dst;
2096 }
ca254490 2097
1fb9489b 2098 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2099
d46a9d67 2100 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2101 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2102 (fl6->flowi6_oif && any_src))
77d16f45 2103 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2104
d46a9d67 2105 if (!any_src)
adaa70bb 2106 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2107 else if (sk)
2108 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2109
b75cc8f9 2110 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2111}
6f21c96a 2112EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2113
2774c131 2114struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2115{
5c1e6aa3 2116 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2117 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2118 struct dst_entry *new = NULL;
2119
1dbe3252 2120 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2121 DST_OBSOLETE_DEAD, 0);
14e50e57 2122 if (rt) {
0a1f5962 2123 rt6_info_init(rt);
81eb8447 2124 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2125
0a1f5962 2126 new = &rt->dst;
14e50e57 2127 new->__use = 1;
352e512c 2128 new->input = dst_discard;
ede2059d 2129 new->output = dst_discard_out;
14e50e57 2130
0a1f5962 2131 dst_copy_metrics(new, &ort->dst);
14e50e57 2132
1dbe3252 2133 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2134 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2135 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2136
2137 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2138#ifdef CONFIG_IPV6_SUBTREES
2139 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2140#endif
14e50e57
DM
2141 }
2142
69ead7af
DM
2143 dst_release(dst_orig);
2144 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2145}
14e50e57 2146
1da177e4
LT
2147/*
2148 * Destination cache support functions
2149 */
2150
8d1c802b 2151static bool fib6_check(struct fib6_info *f6i, u32 cookie)
4b32b5ad 2152{
93531c67
DA
2153 u32 rt_cookie = 0;
2154
8ae86971 2155 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
93531c67
DA
2156 return false;
2157
2158 if (fib6_check_expired(f6i))
2159 return false;
2160
2161 return true;
4b32b5ad
MKL
2162}
2163
a68886a6
DA
2164static struct dst_entry *rt6_check(struct rt6_info *rt,
2165 struct fib6_info *from,
2166 u32 cookie)
3da59bd9 2167{
36143645 2168 u32 rt_cookie = 0;
c5cff856 2169
a68886a6 2170 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
93531c67 2171 rt_cookie != cookie)
3da59bd9
MKL
2172 return NULL;
2173
2174 if (rt6_check_expired(rt))
2175 return NULL;
2176
2177 return &rt->dst;
2178}
2179
a68886a6
DA
2180static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2181 struct fib6_info *from,
2182 u32 cookie)
3da59bd9 2183{
5973fb1e
MKL
2184 if (!__rt6_check_expired(rt) &&
2185 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
a68886a6 2186 fib6_check(from, cookie))
3da59bd9
MKL
2187 return &rt->dst;
2188 else
2189 return NULL;
2190}
2191
1da177e4
LT
2192static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2193{
a87b7dc9 2194 struct dst_entry *dst_ret;
a68886a6 2195 struct fib6_info *from;
1da177e4
LT
2196 struct rt6_info *rt;
2197
a87b7dc9
DA
2198 rt = container_of(dst, struct rt6_info, dst);
2199
2200 rcu_read_lock();
1da177e4 2201
6f3118b5
ND
2202 /* All IPV6 dsts are created with ->obsolete set to the value
2203 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2204 * into this function always.
2205 */
e3bc10bd 2206
a68886a6 2207 from = rcu_dereference(rt->from);
4b32b5ad 2208
a68886a6
DA
2209 if (from && (rt->rt6i_flags & RTF_PCPU ||
2210 unlikely(!list_empty(&rt->rt6i_uncached))))
2211 dst_ret = rt6_dst_from_check(rt, from, cookie);
3da59bd9 2212 else
a68886a6 2213 dst_ret = rt6_check(rt, from, cookie);
a87b7dc9
DA
2214
2215 rcu_read_unlock();
2216
2217 return dst_ret;
1da177e4
LT
2218}
2219
2220static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2221{
2222 struct rt6_info *rt = (struct rt6_info *) dst;
2223
2224 if (rt) {
54c1a859 2225 if (rt->rt6i_flags & RTF_CACHE) {
c3c14da0 2226 rcu_read_lock();
54c1a859 2227 if (rt6_check_expired(rt)) {
93531c67 2228 rt6_remove_exception_rt(rt);
54c1a859
YH
2229 dst = NULL;
2230 }
c3c14da0 2231 rcu_read_unlock();
54c1a859 2232 } else {
1da177e4 2233 dst_release(dst);
54c1a859
YH
2234 dst = NULL;
2235 }
1da177e4 2236 }
54c1a859 2237 return dst;
1da177e4
LT
2238}
2239
2240static void ip6_link_failure(struct sk_buff *skb)
2241{
2242 struct rt6_info *rt;
2243
3ffe533c 2244 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2245
adf30907 2246 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2247 if (rt) {
8a14e46f 2248 rcu_read_lock();
1eb4f758 2249 if (rt->rt6i_flags & RTF_CACHE) {
761f6026 2250 rt6_remove_exception_rt(rt);
c5cff856 2251 } else {
a68886a6 2252 struct fib6_info *from;
c5cff856
WW
2253 struct fib6_node *fn;
2254
a68886a6
DA
2255 from = rcu_dereference(rt->from);
2256 if (from) {
2257 fn = rcu_dereference(from->fib6_node);
2258 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2259 fn->fn_sernum = -1;
2260 }
1eb4f758 2261 }
8a14e46f 2262 rcu_read_unlock();
1da177e4
LT
2263 }
2264}
2265
6a3e030f
DA
2266static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2267{
a68886a6
DA
2268 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2269 struct fib6_info *from;
2270
2271 rcu_read_lock();
2272 from = rcu_dereference(rt0->from);
2273 if (from)
2274 rt0->dst.expires = from->expires;
2275 rcu_read_unlock();
2276 }
6a3e030f
DA
2277
2278 dst_set_expires(&rt0->dst, timeout);
2279 rt0->rt6i_flags |= RTF_EXPIRES;
2280}
2281
45e4fd26
MKL
2282static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2283{
2284 struct net *net = dev_net(rt->dst.dev);
2285
d4ead6b3 2286 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2287 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2288 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2289}
2290
0d3f6d29
MKL
2291static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2292{
2293 return !(rt->rt6i_flags & RTF_CACHE) &&
1490ed2a 2294 (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
0d3f6d29
MKL
2295}
2296
45e4fd26
MKL
2297static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2298 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2299{
0dec879f 2300 const struct in6_addr *daddr, *saddr;
67ba4152 2301 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2302
19bda36c
XL
2303 if (dst_metric_locked(dst, RTAX_MTU))
2304 return;
2305
0dec879f
JA
2306 if (iph) {
2307 daddr = &iph->daddr;
2308 saddr = &iph->saddr;
2309 } else if (sk) {
2310 daddr = &sk->sk_v6_daddr;
2311 saddr = &inet6_sk(sk)->saddr;
2312 } else {
2313 daddr = NULL;
2314 saddr = NULL;
2315 }
2316 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2317 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2318 if (mtu >= dst_mtu(dst))
2319 return;
9d289715 2320
0d3f6d29 2321 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2322 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2323 /* update rt6_ex->stamp for cache */
2324 if (rt6->rt6i_flags & RTF_CACHE)
2325 rt6_update_exception_stamp_rt(rt6);
0dec879f 2326 } else if (daddr) {
a68886a6 2327 struct fib6_info *from;
45e4fd26
MKL
2328 struct rt6_info *nrt6;
2329
4d85cd0c 2330 rcu_read_lock();
a68886a6
DA
2331 from = rcu_dereference(rt6->from);
2332 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
45e4fd26
MKL
2333 if (nrt6) {
2334 rt6_do_update_pmtu(nrt6, mtu);
a68886a6 2335 if (rt6_insert_exception(nrt6, from))
2b760fcf 2336 dst_release_immediate(&nrt6->dst);
45e4fd26 2337 }
a68886a6 2338 rcu_read_unlock();
1da177e4
LT
2339 }
2340}
2341
45e4fd26
MKL
2342static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2343 struct sk_buff *skb, u32 mtu)
2344{
2345 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2346}
2347
42ae66c8 2348void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2349 int oif, u32 mark, kuid_t uid)
81aded24
DM
2350{
2351 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2352 struct dst_entry *dst;
dc92095d
2353 struct flowi6 fl6 = {
2354 .flowi6_oif = oif,
2355 .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2356 .daddr = iph->daddr,
2357 .saddr = iph->saddr,
2358 .flowlabel = ip6_flowinfo(iph),
2359 .flowi6_uid = uid,
2360 };
81aded24
DM
2361
2362 dst = ip6_route_output(net, NULL, &fl6);
2363 if (!dst->error)
45e4fd26 2364 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2365 dst_release(dst);
2366}
2367EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2368
2369void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2370{
7ddacfa5 2371 int oif = sk->sk_bound_dev_if;
33c162a9
MKL
2372 struct dst_entry *dst;
2373
7ddacfa5
DA
2374 if (!oif && skb->dev)
2375 oif = l3mdev_master_ifindex(skb->dev);
2376
2377 ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2378
2379 dst = __sk_dst_get(sk);
2380 if (!dst || !dst->obsolete ||
2381 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2382 return;
2383
2384 bh_lock_sock(sk);
2385 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2386 ip6_datagram_dst_update(sk, false);
2387 bh_unlock_sock(sk);
81aded24
DM
2388}
2389EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2390
7d6850f7
AK
2391void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2392 const struct flowi6 *fl6)
2393{
2394#ifdef CONFIG_IPV6_SUBTREES
2395 struct ipv6_pinfo *np = inet6_sk(sk);
2396#endif
2397
2398 ip6_dst_store(sk, dst,
2399 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2400 &sk->sk_v6_daddr : NULL,
2401#ifdef CONFIG_IPV6_SUBTREES
2402 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2403 &np->saddr :
2404#endif
2405 NULL);
2406}
2407
b55b76b2
DJ
2408/* Handle redirects */
2409struct ip6rd_flowi {
2410 struct flowi6 fl6;
2411 struct in6_addr gateway;
2412};
2413
2414static struct rt6_info *__ip6_route_redirect(struct net *net,
2415 struct fib6_table *table,
2416 struct flowi6 *fl6,
b75cc8f9 2417 const struct sk_buff *skb,
b55b76b2
DJ
2418 int flags)
2419{
2420 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
23fb93a4 2421 struct rt6_info *ret = NULL, *rt_cache;
8d1c802b 2422 struct fib6_info *rt;
b55b76b2
DJ
2423 struct fib6_node *fn;
2424
2425 /* Get the "current" route for this destination and
67c408cf 2426 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2427 *
2428 * RFC 4861 specifies that redirects should only be
2429 * accepted if they come from the nexthop to the target.
2430 * Due to the way the routes are chosen, this notion
2431 * is a bit fuzzy and one might need to check all possible
2432 * routes.
2433 */
2434
66f5d6ce 2435 rcu_read_lock();
6454743b 2436 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
b55b76b2 2437restart:
66f5d6ce 2438 for_each_fib6_node_rt_rcu(fn) {
5e670d84 2439 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c 2440 continue;
14895687 2441 if (fib6_check_expired(rt))
b55b76b2 2442 continue;
93c2fb25 2443 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 2444 break;
93c2fb25 2445 if (!(rt->fib6_flags & RTF_GATEWAY))
b55b76b2 2446 continue;
5e670d84 2447 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
b55b76b2 2448 continue;
2b760fcf
WW
2449 /* rt_cache's gateway might be different from its 'parent'
2450 * in the case of an ip redirect.
2451 * So we keep searching in the exception table if the gateway
2452 * is different.
2453 */
5e670d84 2454 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2b760fcf
WW
2455 rt_cache = rt6_find_cached_rt(rt,
2456 &fl6->daddr,
2457 &fl6->saddr);
2458 if (rt_cache &&
2459 ipv6_addr_equal(&rdfl->gateway,
2460 &rt_cache->rt6i_gateway)) {
23fb93a4 2461 ret = rt_cache;
2b760fcf
WW
2462 break;
2463 }
b55b76b2 2464 continue;
2b760fcf 2465 }
b55b76b2
DJ
2466 break;
2467 }
2468
2469 if (!rt)
421842ed 2470 rt = net->ipv6.fib6_null_entry;
93c2fb25 2471 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 2472 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2473 goto out;
2474 }
2475
421842ed 2476 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2477 fn = fib6_backtrack(fn, &fl6->saddr);
2478 if (fn)
2479 goto restart;
b55b76b2 2480 }
a3c00e46 2481
b0a1ba59 2482out:
23fb93a4 2483 if (ret)
10585b43 2484 ip6_hold_safe(net, &ret);
23fb93a4
DA
2485 else
2486 ret = ip6_create_rt_rcu(rt);
b55b76b2 2487
66f5d6ce 2488 rcu_read_unlock();
b55b76b2 2489
b65f164d 2490 trace_fib6_table_lookup(net, rt, table, fl6);
23fb93a4 2491 return ret;
b55b76b2
DJ
2492};
2493
2494static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2495 const struct flowi6 *fl6,
2496 const struct sk_buff *skb,
2497 const struct in6_addr *gateway)
b55b76b2
DJ
2498{
2499 int flags = RT6_LOOKUP_F_HAS_SADDR;
2500 struct ip6rd_flowi rdfl;
2501
2502 rdfl.fl6 = *fl6;
2503 rdfl.gateway = *gateway;
2504
b75cc8f9 2505 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2506 flags, __ip6_route_redirect);
2507}
2508
e2d118a1
LC
2509void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2510 kuid_t uid)
3a5ad2ee
DM
2511{
2512 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2513 struct dst_entry *dst;
1f7f10ac
2514 struct flowi6 fl6 = {
2515 .flowi6_iif = LOOPBACK_IFINDEX,
2516 .flowi6_oif = oif,
2517 .flowi6_mark = mark,
2518 .daddr = iph->daddr,
2519 .saddr = iph->saddr,
2520 .flowlabel = ip6_flowinfo(iph),
2521 .flowi6_uid = uid,
2522 };
3a5ad2ee 2523
b75cc8f9 2524 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2525 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2526 dst_release(dst);
2527}
2528EXPORT_SYMBOL_GPL(ip6_redirect);
2529
d456336d 2530void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
c92a59ec
DJ
2531{
2532 const struct ipv6hdr *iph = ipv6_hdr(skb);
2533 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2534 struct dst_entry *dst;
0b26fb17
2535 struct flowi6 fl6 = {
2536 .flowi6_iif = LOOPBACK_IFINDEX,
2537 .flowi6_oif = oif,
0b26fb17
2538 .daddr = msg->dest,
2539 .saddr = iph->daddr,
2540 .flowi6_uid = sock_net_uid(net, NULL),
2541 };
c92a59ec 2542
b75cc8f9 2543 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2544 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2545 dst_release(dst);
2546}
2547
3a5ad2ee
DM
2548void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2549{
e2d118a1
LC
2550 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2551 sk->sk_uid);
3a5ad2ee
DM
2552}
2553EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2554
0dbaee3b 2555static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2556{
0dbaee3b
DM
2557 struct net_device *dev = dst->dev;
2558 unsigned int mtu = dst_mtu(dst);
2559 struct net *net = dev_net(dev);
2560
1da177e4
LT
2561 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2562
5578689a
DL
2563 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2564 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2565
2566 /*
1ab1457c
YH
2567 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2568 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2569 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2570 * rely only on pmtu discovery"
2571 */
2572 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2573 mtu = IPV6_MAXPLEN;
2574 return mtu;
2575}
2576
ebb762f2 2577static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2578{
d33e4553 2579 struct inet6_dev *idev;
d4ead6b3 2580 unsigned int mtu;
4b32b5ad
MKL
2581
2582 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2583 if (mtu)
30f78d8e 2584 goto out;
618f9bc7
SK
2585
2586 mtu = IPV6_MIN_MTU;
d33e4553
DM
2587
2588 rcu_read_lock();
2589 idev = __in6_dev_get(dst->dev);
2590 if (idev)
2591 mtu = idev->cnf.mtu6;
2592 rcu_read_unlock();
2593
30f78d8e 2594out:
14972cbd
RP
2595 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2596
2597 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2598}
2599
901731b8
DA
2600/* MTU selection:
2601 * 1. mtu on route is locked - use it
2602 * 2. mtu from nexthop exception
2603 * 3. mtu from egress device
2604 *
2605 * based on ip6_dst_mtu_forward and exception logic of
2606 * rt6_find_cached_rt; called with rcu_read_lock
2607 */
2608u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2609 struct in6_addr *saddr)
2610{
2611 struct rt6_exception_bucket *bucket;
2612 struct rt6_exception *rt6_ex;
2613 struct in6_addr *src_key;
2614 struct inet6_dev *idev;
2615 u32 mtu = 0;
2616
2617 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2618 mtu = f6i->fib6_pmtu;
2619 if (mtu)
2620 goto out;
2621 }
2622
2623 src_key = NULL;
2624#ifdef CONFIG_IPV6_SUBTREES
2625 if (f6i->fib6_src.plen)
2626 src_key = saddr;
2627#endif
2628
2629 bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2630 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2631 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2632 mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2633
2634 if (likely(!mtu)) {
2635 struct net_device *dev = fib6_info_nh_dev(f6i);
2636
2637 mtu = IPV6_MIN_MTU;
2638 idev = __in6_dev_get(dev);
2639 if (idev && idev->cnf.mtu6 > mtu)
2640 mtu = idev->cnf.mtu6;
2641 }
2642
2643 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2644out:
2645 return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2646}
2647
3b00944c 2648struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2649 struct flowi6 *fl6)
1da177e4 2650{
87a11578 2651 struct dst_entry *dst;
1da177e4
LT
2652 struct rt6_info *rt;
2653 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2654 struct net *net = dev_net(dev);
1da177e4 2655
38308473 2656 if (unlikely(!idev))
122bdf67 2657 return ERR_PTR(-ENODEV);
1da177e4 2658
ad706862 2659 rt = ip6_dst_alloc(net, dev, 0);
38308473 2660 if (unlikely(!rt)) {
1da177e4 2661 in6_dev_put(idev);
87a11578 2662 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2663 goto out;
2664 }
2665
8e2ec639 2666 rt->dst.flags |= DST_HOST;
588753f1 2667 rt->dst.input = ip6_input;
8e2ec639 2668 rt->dst.output = ip6_output;
550bab42 2669 rt->rt6i_gateway = fl6->daddr;
87a11578 2670 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2671 rt->rt6i_dst.plen = 128;
2672 rt->rt6i_idev = idev;
14edd87d 2673 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2674
4c981e28 2675 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2676 * do proper release of the net_device
2677 */
2678 rt6_uncached_list_add(rt);
81eb8447 2679 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2680
87a11578
DM
2681 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2682
1da177e4 2683out:
87a11578 2684 return dst;
1da177e4
LT
2685}
2686
569d3645 2687static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2688{
86393e52 2689 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2690 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2691 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2692 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2693 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2694 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2695 int entries;
7019b78e 2696
fc66f95c 2697 entries = dst_entries_get_fast(ops);
49a18d86 2698 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2699 entries <= rt_max_size)
1da177e4
LT
2700 goto out;
2701
6891a346 2702 net->ipv6.ip6_rt_gc_expire++;
14956643 2703 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2704 entries = dst_entries_get_slow(ops);
2705 if (entries < ops->gc_thresh)
7019b78e 2706 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2707out:
7019b78e 2708 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2709 return entries > rt_max_size;
1da177e4
LT
2710}
2711
8c14586f
DA
2712static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2713 struct fib6_config *cfg,
f4797b33
DA
2714 const struct in6_addr *gw_addr,
2715 u32 tbid, int flags)
8c14586f
DA
2716{
2717 struct flowi6 fl6 = {
2718 .flowi6_oif = cfg->fc_ifindex,
2719 .daddr = *gw_addr,
2720 .saddr = cfg->fc_prefsrc,
2721 };
2722 struct fib6_table *table;
2723 struct rt6_info *rt;
8c14586f 2724
f4797b33 2725 table = fib6_get_table(net, tbid);
8c14586f
DA
2726 if (!table)
2727 return NULL;
2728
2729 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2730 flags |= RT6_LOOKUP_F_HAS_SADDR;
2731
f4797b33 2732 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2733 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2734
2735 /* if table lookup failed, fall back to full lookup */
2736 if (rt == net->ipv6.ip6_null_entry) {
2737 ip6_rt_put(rt);
2738 rt = NULL;
2739 }
2740
2741 return rt;
2742}
2743
fc1e64e1
DA
2744static int ip6_route_check_nh_onlink(struct net *net,
2745 struct fib6_config *cfg,
9fbb704c 2746 const struct net_device *dev,
fc1e64e1
DA
2747 struct netlink_ext_ack *extack)
2748{
44750f84 2749 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2750 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2751 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
bf1dc8ba 2752 struct fib6_info *from;
fc1e64e1
DA
2753 struct rt6_info *grt;
2754 int err;
2755
2756 err = 0;
2757 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2758 if (grt) {
bf1dc8ba
PA
2759 rcu_read_lock();
2760 from = rcu_dereference(grt->from);
58e354c0 2761 if (!grt->dst.error &&
4ed591c8 2762 /* ignore match if it is the default route */
bf1dc8ba 2763 from && !ipv6_addr_any(&from->fib6_dst.addr) &&
58e354c0 2764 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2765 NL_SET_ERR_MSG(extack,
2766 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2767 err = -EINVAL;
2768 }
bf1dc8ba 2769 rcu_read_unlock();
fc1e64e1
DA
2770
2771 ip6_rt_put(grt);
2772 }
2773
2774 return err;
2775}
2776
1edce99f
DA
2777static int ip6_route_check_nh(struct net *net,
2778 struct fib6_config *cfg,
2779 struct net_device **_dev,
2780 struct inet6_dev **idev)
2781{
2782 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2783 struct net_device *dev = _dev ? *_dev : NULL;
2784 struct rt6_info *grt = NULL;
2785 int err = -EHOSTUNREACH;
2786
2787 if (cfg->fc_table) {
f4797b33
DA
2788 int flags = RT6_LOOKUP_F_IFACE;
2789
2790 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2791 cfg->fc_table, flags);
1edce99f
DA
2792 if (grt) {
2793 if (grt->rt6i_flags & RTF_GATEWAY ||
2794 (dev && dev != grt->dst.dev)) {
2795 ip6_rt_put(grt);
2796 grt = NULL;
2797 }
2798 }
2799 }
2800
2801 if (!grt)
b75cc8f9 2802 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2803
2804 if (!grt)
2805 goto out;
2806
2807 if (dev) {
2808 if (dev != grt->dst.dev) {
2809 ip6_rt_put(grt);
2810 goto out;
2811 }
2812 } else {
2813 *_dev = dev = grt->dst.dev;
2814 *idev = grt->rt6i_idev;
2815 dev_hold(dev);
2816 in6_dev_hold(grt->rt6i_idev);
2817 }
2818
2819 if (!(grt->rt6i_flags & RTF_GATEWAY))
2820 err = 0;
2821
2822 ip6_rt_put(grt);
2823
2824out:
2825 return err;
2826}
2827
9fbb704c
DA
2828static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2829 struct net_device **_dev, struct inet6_dev **idev,
2830 struct netlink_ext_ack *extack)
2831{
2832 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2833 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2834 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2835 const struct net_device *dev = *_dev;
232378e8 2836 bool need_addr_check = !dev;
9fbb704c
DA
2837 int err = -EINVAL;
2838
2839 /* if gw_addr is local we will fail to detect this in case
2840 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2841 * will return already-added prefix route via interface that
2842 * prefix route was assigned to, which might be non-loopback.
2843 */
232378e8
DA
2844 if (dev &&
2845 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2846 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2847 goto out;
2848 }
2849
2850 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2851 /* IPv6 strictly inhibits using not link-local
2852 * addresses as nexthop address.
2853 * Otherwise, router will not able to send redirects.
2854 * It is very good, but in some (rare!) circumstances
2855 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2856 * some exceptions. --ANK
2857 * We allow IPv4-mapped nexthops to support RFC4798-type
2858 * addressing
2859 */
2860 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2861 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2862 goto out;
2863 }
2864
2865 if (cfg->fc_flags & RTNH_F_ONLINK)
2866 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2867 else
2868 err = ip6_route_check_nh(net, cfg, _dev, idev);
2869
2870 if (err)
2871 goto out;
2872 }
2873
2874 /* reload in case device was changed */
2875 dev = *_dev;
2876
2877 err = -EINVAL;
2878 if (!dev) {
2879 NL_SET_ERR_MSG(extack, "Egress device not specified");
2880 goto out;
2881 } else if (dev->flags & IFF_LOOPBACK) {
2882 NL_SET_ERR_MSG(extack,
2883 "Egress device can not be loopback device for this route");
2884 goto out;
2885 }
232378e8
DA
2886
2887 /* if we did not check gw_addr above, do so now that the
2888 * egress device has been resolved.
2889 */
2890 if (need_addr_check &&
2891 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2892 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2893 goto out;
2894 }
2895
9fbb704c
DA
2896 err = 0;
2897out:
2898 return err;
2899}
2900
83c44251
DA
2901static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
2902{
2903 if ((flags & RTF_REJECT) ||
2904 (dev && (dev->flags & IFF_LOOPBACK) &&
2905 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2906 !(flags & RTF_LOCAL)))
2907 return true;
2908
2909 return false;
2910}
2911
2912int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
2913 struct fib6_config *cfg, gfp_t gfp_flags,
2914 struct netlink_ext_ack *extack)
2915{
2916 struct net_device *dev = NULL;
2917 struct inet6_dev *idev = NULL;
2918 int addr_type;
2919 int err;
2920
2921 err = -ENODEV;
2922 if (cfg->fc_ifindex) {
2923 dev = dev_get_by_index(net, cfg->fc_ifindex);
2924 if (!dev)
2925 goto out;
2926 idev = in6_dev_get(dev);
2927 if (!idev)
2928 goto out;
2929 }
2930
2931 if (cfg->fc_flags & RTNH_F_ONLINK) {
2932 if (!dev) {
2933 NL_SET_ERR_MSG(extack,
2934 "Nexthop device required for onlink");
2935 goto out;
2936 }
2937
2938 if (!(dev->flags & IFF_UP)) {
2939 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2940 err = -ENETDOWN;
2941 goto out;
2942 }
2943
2944 fib6_nh->nh_flags |= RTNH_F_ONLINK;
2945 }
2946
2947 if (cfg->fc_encap) {
2948 struct lwtunnel_state *lwtstate;
2949
2950 err = lwtunnel_build_state(cfg->fc_encap_type,
2951 cfg->fc_encap, AF_INET6, cfg,
2952 &lwtstate, extack);
2953 if (err)
2954 goto out;
2955
2956 fib6_nh->nh_lwtstate = lwtstate_get(lwtstate);
2957 }
2958
2959 fib6_nh->nh_weight = 1;
2960
2961 /* We cannot add true routes via loopback here,
2962 * they would result in kernel looping; promote them to reject routes
2963 */
2964 addr_type = ipv6_addr_type(&cfg->fc_dst);
2965 if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
2966 /* hold loopback dev/idev if we haven't done so. */
2967 if (dev != net->loopback_dev) {
2968 if (dev) {
2969 dev_put(dev);
2970 in6_dev_put(idev);
2971 }
2972 dev = net->loopback_dev;
2973 dev_hold(dev);
2974 idev = in6_dev_get(dev);
2975 if (!idev) {
2976 err = -ENODEV;
2977 goto out;
2978 }
2979 }
2980 goto set_dev;
2981 }
2982
2983 if (cfg->fc_flags & RTF_GATEWAY) {
2984 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
2985 if (err)
2986 goto out;
2987
2988 fib6_nh->nh_gw = cfg->fc_gateway;
2989 }
2990
2991 err = -ENODEV;
2992 if (!dev)
2993 goto out;
2994
2995 if (idev->cnf.disable_ipv6) {
2996 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
2997 err = -EACCES;
2998 goto out;
2999 }
3000
3001 if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
3002 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3003 err = -ENETDOWN;
3004 goto out;
3005 }
3006
3007 if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3008 !netif_carrier_ok(dev))
3009 fib6_nh->nh_flags |= RTNH_F_LINKDOWN;
3010
3011set_dev:
3012 fib6_nh->nh_dev = dev;
3013 err = 0;
3014out:
3015 if (idev)
3016 in6_dev_put(idev);
3017
3018 if (err) {
3019 lwtstate_put(fib6_nh->nh_lwtstate);
3020 fib6_nh->nh_lwtstate = NULL;
3021 if (dev)
3022 dev_put(dev);
3023 }
3024
3025 return err;
3026}
3027
dac7d0f2
DA
3028void fib6_nh_release(struct fib6_nh *fib6_nh)
3029{
3030 lwtstate_put(fib6_nh->nh_lwtstate);
3031
3032 if (fib6_nh->nh_dev)
3033 dev_put(fib6_nh->nh_dev);
3034}
3035
8d1c802b 3036static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 3037 gfp_t gfp_flags,
333c4301 3038 struct netlink_ext_ack *extack)
1da177e4 3039{
5578689a 3040 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 3041 struct fib6_info *rt = NULL;
c71099ac 3042 struct fib6_table *table;
8c5b83f0 3043 int err = -EINVAL;
83c44251 3044 int addr_type;
1da177e4 3045
557c44be 3046 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
3047 if (cfg->fc_flags & RTF_PCPU) {
3048 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 3049 goto out;
d5d531cb 3050 }
557c44be 3051
2ea2352e
WW
3052 /* RTF_CACHE is an internal flag; can not be set by userspace */
3053 if (cfg->fc_flags & RTF_CACHE) {
3054 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3055 goto out;
3056 }
3057
e8478e80
DA
3058 if (cfg->fc_type > RTN_MAX) {
3059 NL_SET_ERR_MSG(extack, "Invalid route type");
3060 goto out;
3061 }
3062
d5d531cb
DA
3063 if (cfg->fc_dst_len > 128) {
3064 NL_SET_ERR_MSG(extack, "Invalid prefix length");
3065 goto out;
3066 }
3067 if (cfg->fc_src_len > 128) {
3068 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 3069 goto out;
d5d531cb 3070 }
1da177e4 3071#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
3072 if (cfg->fc_src_len) {
3073 NL_SET_ERR_MSG(extack,
3074 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 3075 goto out;
d5d531cb 3076 }
1da177e4 3077#endif
fc1e64e1 3078
d71314b4 3079 err = -ENOBUFS;
38308473
DM
3080 if (cfg->fc_nlinfo.nlh &&
3081 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 3082 table = fib6_get_table(net, cfg->fc_table);
38308473 3083 if (!table) {
f3213831 3084 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
3085 table = fib6_new_table(net, cfg->fc_table);
3086 }
3087 } else {
3088 table = fib6_new_table(net, cfg->fc_table);
3089 }
38308473
DM
3090
3091 if (!table)
c71099ac 3092 goto out;
c71099ac 3093
93531c67
DA
3094 err = -ENOMEM;
3095 rt = fib6_info_alloc(gfp_flags);
3096 if (!rt)
1da177e4 3097 goto out;
93531c67 3098
d7e774f3
DA
3099 rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3100 extack);
767a2217
DA
3101 if (IS_ERR(rt->fib6_metrics)) {
3102 err = PTR_ERR(rt->fib6_metrics);
fda21d46
ED
3103 /* Do not leave garbage there. */
3104 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
767a2217
DA
3105 goto out;
3106 }
3107
93531c67
DA
3108 if (cfg->fc_flags & RTF_ADDRCONF)
3109 rt->dst_nocount = true;
1da177e4 3110
1716a961 3111 if (cfg->fc_flags & RTF_EXPIRES)
14895687 3112 fib6_set_expires(rt, jiffies +
1716a961
G
3113 clock_t_to_jiffies(cfg->fc_expires));
3114 else
14895687 3115 fib6_clean_expires(rt);
1da177e4 3116
86872cb5
TG
3117 if (cfg->fc_protocol == RTPROT_UNSPEC)
3118 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 3119 rt->fib6_protocol = cfg->fc_protocol;
86872cb5 3120
83c44251
DA
3121 rt->fib6_table = table;
3122 rt->fib6_metric = cfg->fc_metric;
3123 rt->fib6_type = cfg->fc_type;
3124 rt->fib6_flags = cfg->fc_flags;
19e42e45 3125
93c2fb25
DA
3126 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3127 rt->fib6_dst.plen = cfg->fc_dst_len;
3128 if (rt->fib6_dst.plen == 128)
3b6761d1 3129 rt->dst_host = true;
e5fd387a 3130
1da177e4 3131#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
3132 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3133 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4 3134#endif
83c44251
DA
3135 err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
3136 if (err)
3137 goto out;
1da177e4
LT
3138
3139 /* We cannot add true routes via loopback here,
83c44251 3140 * they would result in kernel looping; promote them to reject routes
1da177e4 3141 */
83c44251
DA
3142 addr_type = ipv6_addr_type(&cfg->fc_dst);
3143 if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.nh_dev, addr_type))
3144 rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
955ec4cb 3145
c3968a85 3146 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
83c44251
DA
3147 struct net_device *dev = fib6_info_nh_dev(rt);
3148
c3968a85 3149 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3150 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3151 err = -EINVAL;
3152 goto out;
3153 }
93c2fb25
DA
3154 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3155 rt->fib6_prefsrc.plen = 128;
c3968a85 3156 } else
93c2fb25 3157 rt->fib6_prefsrc.plen = 0;
c3968a85 3158
8c5b83f0 3159 return rt;
6b9ea5a6 3160out:
93531c67 3161 fib6_info_release(rt);
8c5b83f0 3162 return ERR_PTR(err);
6b9ea5a6
RP
3163}
3164
acb54e3c 3165int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
333c4301 3166 struct netlink_ext_ack *extack)
6b9ea5a6 3167{
8d1c802b 3168 struct fib6_info *rt;
6b9ea5a6
RP
3169 int err;
3170
acb54e3c 3171 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3172 if (IS_ERR(rt))
3173 return PTR_ERR(rt);
6b9ea5a6 3174
d4ead6b3 3175 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3176 fib6_info_release(rt);
6b9ea5a6 3177
1da177e4
LT
3178 return err;
3179}
3180
8d1c802b 3181static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3182{
afb1d4b5 3183 struct net *net = info->nl_net;
c71099ac 3184 struct fib6_table *table;
afb1d4b5 3185 int err;
1da177e4 3186
421842ed 3187 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3188 err = -ENOENT;
3189 goto out;
3190 }
6c813a72 3191
93c2fb25 3192 table = rt->fib6_table;
66f5d6ce 3193 spin_lock_bh(&table->tb6_lock);
86872cb5 3194 err = fib6_del(rt, info);
66f5d6ce 3195 spin_unlock_bh(&table->tb6_lock);
1da177e4 3196
6825a26c 3197out:
93531c67 3198 fib6_info_release(rt);
1da177e4
LT
3199 return err;
3200}
3201
8d1c802b 3202int ip6_del_rt(struct net *net, struct fib6_info *rt)
e0a1ad73 3203{
afb1d4b5
DA
3204 struct nl_info info = { .nl_net = net };
3205
528c4ceb 3206 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3207}
3208
8d1c802b 3209static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3210{
3211 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3212 struct net *net = info->nl_net;
16a16cd3 3213 struct sk_buff *skb = NULL;
0ae81335 3214 struct fib6_table *table;
e3330039 3215 int err = -ENOENT;
0ae81335 3216
421842ed 3217 if (rt == net->ipv6.fib6_null_entry)
e3330039 3218 goto out_put;
93c2fb25 3219 table = rt->fib6_table;
66f5d6ce 3220 spin_lock_bh(&table->tb6_lock);
0ae81335 3221
93c2fb25 3222 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3223 struct fib6_info *sibling, *next_sibling;
0ae81335 3224
16a16cd3
DA
3225 /* prefer to send a single notification with all hops */
3226 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3227 if (skb) {
3228 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3229
d4ead6b3 3230 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3231 NULL, NULL, 0, RTM_DELROUTE,
3232 info->portid, seq, 0) < 0) {
3233 kfree_skb(skb);
3234 skb = NULL;
3235 } else
3236 info->skip_notify = 1;
3237 }
3238
0ae81335 3239 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3240 &rt->fib6_siblings,
3241 fib6_siblings) {
0ae81335
DA
3242 err = fib6_del(sibling, info);
3243 if (err)
e3330039 3244 goto out_unlock;
0ae81335
DA
3245 }
3246 }
3247
3248 err = fib6_del(rt, info);
e3330039 3249out_unlock:
66f5d6ce 3250 spin_unlock_bh(&table->tb6_lock);
e3330039 3251out_put:
93531c67 3252 fib6_info_release(rt);
16a16cd3
DA
3253
3254 if (skb) {
e3330039 3255 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3256 info->nlh, gfp_any());
3257 }
0ae81335
DA
3258 return err;
3259}
3260
23fb93a4
DA
3261static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3262{
3263 int rc = -ESRCH;
3264
3265 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3266 goto out;
3267
3268 if (cfg->fc_flags & RTF_GATEWAY &&
3269 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3270 goto out;
761f6026
XL
3271
3272 rc = rt6_remove_exception_rt(rt);
23fb93a4
DA
3273out:
3274 return rc;
3275}
3276
333c4301
DA
3277static int ip6_route_del(struct fib6_config *cfg,
3278 struct netlink_ext_ack *extack)
1da177e4 3279{
8d1c802b 3280 struct rt6_info *rt_cache;
c71099ac 3281 struct fib6_table *table;
8d1c802b 3282 struct fib6_info *rt;
1da177e4 3283 struct fib6_node *fn;
1da177e4
LT
3284 int err = -ESRCH;
3285
5578689a 3286 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3287 if (!table) {
3288 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3289 return err;
d5d531cb 3290 }
c71099ac 3291
66f5d6ce 3292 rcu_read_lock();
1da177e4 3293
c71099ac 3294 fn = fib6_locate(&table->tb6_root,
86872cb5 3295 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3296 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3297 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3298
1da177e4 3299 if (fn) {
66f5d6ce 3300 for_each_fib6_node_rt_rcu(fn) {
2b760fcf 3301 if (cfg->fc_flags & RTF_CACHE) {
23fb93a4
DA
3302 int rc;
3303
2b760fcf
WW
3304 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3305 &cfg->fc_src);
23fb93a4
DA
3306 if (rt_cache) {
3307 rc = ip6_del_cached_rt(rt_cache, cfg);
9e575010
ED
3308 if (rc != -ESRCH) {
3309 rcu_read_unlock();
23fb93a4 3310 return rc;
9e575010 3311 }
23fb93a4
DA
3312 }
3313 continue;
2b760fcf 3314 }
86872cb5 3315 if (cfg->fc_ifindex &&
5e670d84
DA
3316 (!rt->fib6_nh.nh_dev ||
3317 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3318 continue;
86872cb5 3319 if (cfg->fc_flags & RTF_GATEWAY &&
5e670d84 3320 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
1da177e4 3321 continue;
93c2fb25 3322 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
1da177e4 3323 continue;
93c2fb25 3324 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
c2ed1880 3325 continue;
e873e4b9
WW
3326 if (!fib6_info_hold_safe(rt))
3327 continue;
66f5d6ce 3328 rcu_read_unlock();
1da177e4 3329
0ae81335
DA
3330 /* if gateway was specified only delete the one hop */
3331 if (cfg->fc_flags & RTF_GATEWAY)
3332 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3333
3334 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3335 }
3336 }
66f5d6ce 3337 rcu_read_unlock();
1da177e4
LT
3338
3339 return err;
3340}
3341
6700c270 3342static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3343{
a6279458 3344 struct netevent_redirect netevent;
e8599ff4 3345 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3346 struct ndisc_options ndopts;
3347 struct inet6_dev *in6_dev;
3348 struct neighbour *neigh;
a68886a6 3349 struct fib6_info *from;
71bcdba0 3350 struct rd_msg *msg;
6e157b6a
DM
3351 int optlen, on_link;
3352 u8 *lladdr;
e8599ff4 3353
29a3cad5 3354 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3355 optlen -= sizeof(*msg);
e8599ff4
DM
3356
3357 if (optlen < 0) {
6e157b6a 3358 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3359 return;
3360 }
3361
71bcdba0 3362 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3363
71bcdba0 3364 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3365 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3366 return;
3367 }
3368
6e157b6a 3369 on_link = 0;
71bcdba0 3370 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3371 on_link = 1;
71bcdba0 3372 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3373 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3374 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3375 return;
3376 }
3377
3378 in6_dev = __in6_dev_get(skb->dev);
3379 if (!in6_dev)
3380 return;
3381 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3382 return;
3383
3384 /* RFC2461 8.1:
3385 * The IP source address of the Redirect MUST be the same as the current
3386 * first-hop router for the specified ICMP Destination Address.
3387 */
3388
f997c55c 3389 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3390 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3391 return;
3392 }
6e157b6a
DM
3393
3394 lladdr = NULL;
e8599ff4
DM
3395 if (ndopts.nd_opts_tgt_lladdr) {
3396 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3397 skb->dev);
3398 if (!lladdr) {
3399 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3400 return;
3401 }
3402 }
3403
6e157b6a 3404 rt = (struct rt6_info *) dst;
ec13ad1d 3405 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3406 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3407 return;
6e157b6a 3408 }
e8599ff4 3409
6e157b6a
DM
3410 /* Redirect received -> path was valid.
3411 * Look, redirects are sent only in response to data packets,
3412 * so that this nexthop apparently is reachable. --ANK
3413 */
0dec879f 3414 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3415
71bcdba0 3416 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3417 if (!neigh)
3418 return;
a6279458 3419
1da177e4
LT
3420 /*
3421 * We have finally decided to accept it.
3422 */
3423
f997c55c 3424 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3425 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3426 NEIGH_UPDATE_F_OVERRIDE|
3427 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3428 NEIGH_UPDATE_F_ISROUTER)),
3429 NDISC_REDIRECT, &ndopts);
1da177e4 3430
4d85cd0c 3431 rcu_read_lock();
a68886a6 3432 from = rcu_dereference(rt->from);
e873e4b9
WW
3433 /* This fib6_info_hold() is safe here because we hold reference to rt
3434 * and rt already holds reference to fib6_info.
3435 */
8a14e46f 3436 fib6_info_hold(from);
4d85cd0c 3437 rcu_read_unlock();
8a14e46f
DA
3438
3439 nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
38308473 3440 if (!nrt)
1da177e4
LT
3441 goto out;
3442
3443 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3444 if (on_link)
3445 nrt->rt6i_flags &= ~RTF_GATEWAY;
3446
4e3fd7a0 3447 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3448
2b760fcf
WW
3449 /* No need to remove rt from the exception table if rt is
3450 * a cached route because rt6_insert_exception() will
3451 * takes care of it
3452 */
8a14e46f 3453 if (rt6_insert_exception(nrt, from)) {
2b760fcf
WW
3454 dst_release_immediate(&nrt->dst);
3455 goto out;
3456 }
1da177e4 3457
d8d1f30b
CG
3458 netevent.old = &rt->dst;
3459 netevent.new = &nrt->dst;
71bcdba0 3460 netevent.daddr = &msg->dest;
60592833 3461 netevent.neigh = neigh;
8d71740c
TT
3462 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3463
1da177e4 3464out:
8a14e46f 3465 fib6_info_release(from);
e8599ff4 3466 neigh_release(neigh);
6e157b6a
DM
3467}
3468
70ceb4f5 3469#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 3470static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 3471 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3472 const struct in6_addr *gwaddr,
3473 struct net_device *dev)
70ceb4f5 3474{
830218c1
DA
3475 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3476 int ifindex = dev->ifindex;
70ceb4f5 3477 struct fib6_node *fn;
8d1c802b 3478 struct fib6_info *rt = NULL;
c71099ac
TG
3479 struct fib6_table *table;
3480
830218c1 3481 table = fib6_get_table(net, tb_id);
38308473 3482 if (!table)
c71099ac 3483 return NULL;
70ceb4f5 3484
66f5d6ce 3485 rcu_read_lock();
38fbeeee 3486 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3487 if (!fn)
3488 goto out;
3489
66f5d6ce 3490 for_each_fib6_node_rt_rcu(fn) {
5e670d84 3491 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
70ceb4f5 3492 continue;
93c2fb25 3493 if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
70ceb4f5 3494 continue;
5e670d84 3495 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
70ceb4f5 3496 continue;
e873e4b9
WW
3497 if (!fib6_info_hold_safe(rt))
3498 continue;
70ceb4f5
YH
3499 break;
3500 }
3501out:
66f5d6ce 3502 rcu_read_unlock();
70ceb4f5
YH
3503 return rt;
3504}
3505
8d1c802b 3506static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 3507 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3508 const struct in6_addr *gwaddr,
3509 struct net_device *dev,
95c96174 3510 unsigned int pref)
70ceb4f5 3511{
86872cb5 3512 struct fib6_config cfg = {
238fc7ea 3513 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3514 .fc_ifindex = dev->ifindex,
86872cb5
TG
3515 .fc_dst_len = prefixlen,
3516 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3517 RTF_UP | RTF_PREF(pref),
b91d5329 3518 .fc_protocol = RTPROT_RA,
e8478e80 3519 .fc_type = RTN_UNICAST,
15e47304 3520 .fc_nlinfo.portid = 0,
efa2cea0
DL
3521 .fc_nlinfo.nlh = NULL,
3522 .fc_nlinfo.nl_net = net,
86872cb5
TG
3523 };
3524
830218c1 3525 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3526 cfg.fc_dst = *prefix;
3527 cfg.fc_gateway = *gwaddr;
70ceb4f5 3528
e317da96
YH
3529 /* We should treat it as a default route if prefix length is 0. */
3530 if (!prefixlen)
86872cb5 3531 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3532
acb54e3c 3533 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3534
830218c1 3535 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3536}
3537#endif
3538
8d1c802b 3539struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
3540 const struct in6_addr *addr,
3541 struct net_device *dev)
1ab1457c 3542{
830218c1 3543 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 3544 struct fib6_info *rt;
c71099ac 3545 struct fib6_table *table;
1da177e4 3546
afb1d4b5 3547 table = fib6_get_table(net, tb_id);
38308473 3548 if (!table)
c71099ac 3549 return NULL;
1da177e4 3550
66f5d6ce
WW
3551 rcu_read_lock();
3552 for_each_fib6_node_rt_rcu(&table->tb6_root) {
5e670d84 3553 if (dev == rt->fib6_nh.nh_dev &&
93c2fb25 3554 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
5e670d84 3555 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
1da177e4
LT
3556 break;
3557 }
e873e4b9
WW
3558 if (rt && !fib6_info_hold_safe(rt))
3559 rt = NULL;
66f5d6ce 3560 rcu_read_unlock();
1da177e4
LT
3561 return rt;
3562}
3563
8d1c802b 3564struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 3565 const struct in6_addr *gwaddr,
ebacaaa0
YH
3566 struct net_device *dev,
3567 unsigned int pref)
1da177e4 3568{
86872cb5 3569 struct fib6_config cfg = {
ca254490 3570 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3571 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3572 .fc_ifindex = dev->ifindex,
3573 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3574 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3575 .fc_protocol = RTPROT_RA,
e8478e80 3576 .fc_type = RTN_UNICAST,
15e47304 3577 .fc_nlinfo.portid = 0,
5578689a 3578 .fc_nlinfo.nlh = NULL,
afb1d4b5 3579 .fc_nlinfo.nl_net = net,
86872cb5 3580 };
1da177e4 3581
4e3fd7a0 3582 cfg.fc_gateway = *gwaddr;
1da177e4 3583
acb54e3c 3584 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3585 struct fib6_table *table;
3586
3587 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3588 if (table)
3589 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3590 }
1da177e4 3591
afb1d4b5 3592 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3593}
3594
afb1d4b5
DA
3595static void __rt6_purge_dflt_routers(struct net *net,
3596 struct fib6_table *table)
1da177e4 3597{
8d1c802b 3598 struct fib6_info *rt;
1da177e4
LT
3599
3600restart:
66f5d6ce
WW
3601 rcu_read_lock();
3602 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
3603 struct net_device *dev = fib6_info_nh_dev(rt);
3604 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3605
93c2fb25 3606 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
e873e4b9
WW
3607 (!idev || idev->cnf.accept_ra != 2) &&
3608 fib6_info_hold_safe(rt)) {
93531c67
DA
3609 rcu_read_unlock();
3610 ip6_del_rt(net, rt);
1da177e4
LT
3611 goto restart;
3612 }
3613 }
66f5d6ce 3614 rcu_read_unlock();
830218c1
DA
3615
3616 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3617}
3618
3619void rt6_purge_dflt_routers(struct net *net)
3620{
3621 struct fib6_table *table;
3622 struct hlist_head *head;
3623 unsigned int h;
3624
3625 rcu_read_lock();
3626
3627 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3628 head = &net->ipv6.fib_table_hash[h];
3629 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3630 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3631 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3632 }
3633 }
3634
3635 rcu_read_unlock();
1da177e4
LT
3636}
3637
5578689a
DL
3638static void rtmsg_to_fib6_config(struct net *net,
3639 struct in6_rtmsg *rtmsg,
86872cb5
TG
3640 struct fib6_config *cfg)
3641{
8823a3ac
3642 *cfg = (struct fib6_config){
3643 .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3644 : RT6_TABLE_MAIN,
3645 .fc_ifindex = rtmsg->rtmsg_ifindex,
67f69513 3646 .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
8823a3ac
3647 .fc_expires = rtmsg->rtmsg_info,
3648 .fc_dst_len = rtmsg->rtmsg_dst_len,
3649 .fc_src_len = rtmsg->rtmsg_src_len,
3650 .fc_flags = rtmsg->rtmsg_flags,
3651 .fc_type = rtmsg->rtmsg_type,
3652
3653 .fc_nlinfo.nl_net = net,
3654
3655 .fc_dst = rtmsg->rtmsg_dst,
3656 .fc_src = rtmsg->rtmsg_src,
3657 .fc_gateway = rtmsg->rtmsg_gateway,
3658 };
86872cb5
TG
3659}
3660
5578689a 3661int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3662{
86872cb5 3663 struct fib6_config cfg;
1da177e4
LT
3664 struct in6_rtmsg rtmsg;
3665 int err;
3666
67ba4152 3667 switch (cmd) {
1da177e4
LT
3668 case SIOCADDRT: /* Add a route */
3669 case SIOCDELRT: /* Delete a route */
af31f412 3670 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3671 return -EPERM;
3672 err = copy_from_user(&rtmsg, arg,
3673 sizeof(struct in6_rtmsg));
3674 if (err)
3675 return -EFAULT;
86872cb5 3676
5578689a 3677 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3678
1da177e4
LT
3679 rtnl_lock();
3680 switch (cmd) {
3681 case SIOCADDRT:
acb54e3c 3682 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3683 break;
3684 case SIOCDELRT:
333c4301 3685 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3686 break;
3687 default:
3688 err = -EINVAL;
3689 }
3690 rtnl_unlock();
3691
3692 return err;
3ff50b79 3693 }
1da177e4
LT
3694
3695 return -EINVAL;
3696}
3697
3698/*
3699 * Drop the packet on the floor
3700 */
3701
d5fdd6ba 3702static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3703{
612f09e8 3704 int type;
adf30907 3705 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3706 switch (ipstats_mib_noroutes) {
3707 case IPSTATS_MIB_INNOROUTES:
0660e03f 3708 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3709 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3710 IP6_INC_STATS(dev_net(dst->dev),
3711 __in6_dev_get_safely(skb->dev),
3bd653c8 3712 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3713 break;
3714 }
3715 /* FALLTHROUGH */
3716 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3717 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3718 ipstats_mib_noroutes);
612f09e8
YH
3719 break;
3720 }
3ffe533c 3721 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3722 kfree_skb(skb);
3723 return 0;
3724}
3725
9ce8ade0
TG
3726static int ip6_pkt_discard(struct sk_buff *skb)
3727{
612f09e8 3728 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3729}
3730
ede2059d 3731static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3732{
adf30907 3733 skb->dev = skb_dst(skb)->dev;
612f09e8 3734 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3735}
3736
9ce8ade0
TG
3737static int ip6_pkt_prohibit(struct sk_buff *skb)
3738{
612f09e8 3739 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3740}
3741
ede2059d 3742static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3743{
adf30907 3744 skb->dev = skb_dst(skb)->dev;
612f09e8 3745 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3746}
3747
1da177e4
LT
3748/*
3749 * Allocate a dst for local (unicast / anycast) address.
3750 */
3751
360a9887
DA
3752struct fib6_info *addrconf_f6i_alloc(struct net *net,
3753 struct inet6_dev *idev,
3754 const struct in6_addr *addr,
3755 bool anycast, gfp_t gfp_flags)
1da177e4 3756{
c7a1ce39
DA
3757 struct fib6_config cfg = {
3758 .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
3759 .fc_ifindex = idev->dev->ifindex,
3760 .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
3761 .fc_dst = *addr,
3762 .fc_dst_len = 128,
3763 .fc_protocol = RTPROT_KERNEL,
3764 .fc_nlinfo.nl_net = net,
3765 .fc_ignore_dev_down = true,
3766 };
1da177e4 3767
e8478e80 3768 if (anycast) {
c7a1ce39
DA
3769 cfg.fc_type = RTN_ANYCAST;
3770 cfg.fc_flags |= RTF_ANYCAST;
e8478e80 3771 } else {
c7a1ce39
DA
3772 cfg.fc_type = RTN_LOCAL;
3773 cfg.fc_flags |= RTF_LOCAL;
e8478e80 3774 }
1da177e4 3775
c7a1ce39 3776 return ip6_route_info_create(&cfg, gfp_flags, NULL);
1da177e4
LT
3777}
3778
c3968a85
DW
3779/* remove deleted ip from prefsrc entries */
3780struct arg_dev_net_ip {
3781 struct net_device *dev;
3782 struct net *net;
3783 struct in6_addr *addr;
3784};
3785
8d1c802b 3786static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85
DW
3787{
3788 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3789 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3790 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3791
5e670d84 3792 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
421842ed 3793 rt != net->ipv6.fib6_null_entry &&
93c2fb25 3794 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
60006a48 3795 spin_lock_bh(&rt6_exception_lock);
c3968a85 3796 /* remove prefsrc entry */
93c2fb25 3797 rt->fib6_prefsrc.plen = 0;
60006a48 3798 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3799 }
3800 return 0;
3801}
3802
3803void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3804{
3805 struct net *net = dev_net(ifp->idev->dev);
3806 struct arg_dev_net_ip adni = {
3807 .dev = ifp->idev->dev,
3808 .net = net,
3809 .addr = &ifp->addr,
3810 };
0c3584d5 3811 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3812}
3813
be7a010d 3814#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3815
3816/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 3817static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
3818{
3819 struct in6_addr *gateway = (struct in6_addr *)arg;
3820
93c2fb25 3821 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
5e670d84 3822 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
be7a010d
DJ
3823 return -1;
3824 }
b16cb459
WW
3825
3826 /* Further clean up cached routes in exception table.
3827 * This is needed because cached route may have a different
3828 * gateway than its 'parent' in the case of an ip redirect.
3829 */
3830 rt6_exceptions_clean_tohost(rt, gateway);
3831
be7a010d
DJ
3832 return 0;
3833}
3834
3835void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3836{
3837 fib6_clean_all(net, fib6_clean_tohost, gateway);
3838}
3839
2127d95a
IS
3840struct arg_netdev_event {
3841 const struct net_device *dev;
4c981e28
IS
3842 union {
3843 unsigned int nh_flags;
3844 unsigned long event;
3845 };
2127d95a
IS
3846};
3847
8d1c802b 3848static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 3849{
8d1c802b 3850 struct fib6_info *iter;
d7dedee1
IS
3851 struct fib6_node *fn;
3852
93c2fb25
DA
3853 fn = rcu_dereference_protected(rt->fib6_node,
3854 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3855 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 3856 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3857 while (iter) {
93c2fb25 3858 if (iter->fib6_metric == rt->fib6_metric &&
33bd5ac5 3859 rt6_qualify_for_ecmp(iter))
d7dedee1 3860 return iter;
8fb11a9a 3861 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 3862 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
3863 }
3864
3865 return NULL;
3866}
3867
8d1c802b 3868static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 3869{
5e670d84
DA
3870 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3871 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
dcd1f572 3872 fib6_ignore_linkdown(rt)))
d7dedee1
IS
3873 return true;
3874
3875 return false;
3876}
3877
8d1c802b 3878static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 3879{
8d1c802b 3880 struct fib6_info *iter;
d7dedee1
IS
3881 int total = 0;
3882
3883 if (!rt6_is_dead(rt))
5e670d84 3884 total += rt->fib6_nh.nh_weight;
d7dedee1 3885
93c2fb25 3886 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 3887 if (!rt6_is_dead(iter))
5e670d84 3888 total += iter->fib6_nh.nh_weight;
d7dedee1
IS
3889 }
3890
3891 return total;
3892}
3893
8d1c802b 3894static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
3895{
3896 int upper_bound = -1;
3897
3898 if (!rt6_is_dead(rt)) {
5e670d84 3899 *weight += rt->fib6_nh.nh_weight;
d7dedee1
IS
3900 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3901 total) - 1;
3902 }
5e670d84 3903 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
d7dedee1
IS
3904}
3905
8d1c802b 3906static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 3907{
8d1c802b 3908 struct fib6_info *iter;
d7dedee1
IS
3909 int weight = 0;
3910
3911 rt6_upper_bound_set(rt, &weight, total);
3912
93c2fb25 3913 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
3914 rt6_upper_bound_set(iter, &weight, total);
3915}
3916
8d1c802b 3917void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 3918{
8d1c802b 3919 struct fib6_info *first;
d7dedee1
IS
3920 int total;
3921
3922 /* In case the entire multipath route was marked for flushing,
3923 * then there is no need to rebalance upon the removal of every
3924 * sibling route.
3925 */
93c2fb25 3926 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
3927 return;
3928
3929 /* During lookup routes are evaluated in order, so we need to
3930 * make sure upper bounds are assigned from the first sibling
3931 * onwards.
3932 */
3933 first = rt6_multipath_first_sibling(rt);
3934 if (WARN_ON_ONCE(!first))
3935 return;
3936
3937 total = rt6_multipath_total_weight(first);
3938 rt6_multipath_upper_bound_set(first, total);
3939}
3940
8d1c802b 3941static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
3942{
3943 const struct arg_netdev_event *arg = p_arg;
7aef6859 3944 struct net *net = dev_net(arg->dev);
2127d95a 3945
421842ed 3946 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
5e670d84 3947 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
7aef6859 3948 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3949 rt6_multipath_rebalance(rt);
1de178ed 3950 }
2127d95a
IS
3951
3952 return 0;
3953}
3954
3955void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3956{
3957 struct arg_netdev_event arg = {
3958 .dev = dev,
6802f3ad
IS
3959 {
3960 .nh_flags = nh_flags,
3961 },
2127d95a
IS
3962 };
3963
3964 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3965 arg.nh_flags |= RTNH_F_LINKDOWN;
3966
3967 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3968}
3969
8d1c802b 3970static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
3971 const struct net_device *dev)
3972{
8d1c802b 3973 struct fib6_info *iter;
1de178ed 3974
5e670d84 3975 if (rt->fib6_nh.nh_dev == dev)
1de178ed 3976 return true;
93c2fb25 3977 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84 3978 if (iter->fib6_nh.nh_dev == dev)
1de178ed
IS
3979 return true;
3980
3981 return false;
3982}
3983
8d1c802b 3984static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 3985{
8d1c802b 3986 struct fib6_info *iter;
1de178ed
IS
3987
3988 rt->should_flush = 1;
93c2fb25 3989 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
3990 iter->should_flush = 1;
3991}
3992
8d1c802b 3993static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
3994 const struct net_device *down_dev)
3995{
8d1c802b 3996 struct fib6_info *iter;
1de178ed
IS
3997 unsigned int dead = 0;
3998
5e670d84
DA
3999 if (rt->fib6_nh.nh_dev == down_dev ||
4000 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed 4001 dead++;
93c2fb25 4002 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
4003 if (iter->fib6_nh.nh_dev == down_dev ||
4004 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
4005 dead++;
4006
4007 return dead;
4008}
4009
8d1c802b 4010static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed
IS
4011 const struct net_device *dev,
4012 unsigned int nh_flags)
4013{
8d1c802b 4014 struct fib6_info *iter;
1de178ed 4015
5e670d84
DA
4016 if (rt->fib6_nh.nh_dev == dev)
4017 rt->fib6_nh.nh_flags |= nh_flags;
93c2fb25 4018 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
4019 if (iter->fib6_nh.nh_dev == dev)
4020 iter->fib6_nh.nh_flags |= nh_flags;
1de178ed
IS
4021}
4022
a1a22c12 4023/* called with write lock held for table with rt */
8d1c802b 4024static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 4025{
4c981e28
IS
4026 const struct arg_netdev_event *arg = p_arg;
4027 const struct net_device *dev = arg->dev;
7aef6859 4028 struct net *net = dev_net(dev);
8ed67789 4029
421842ed 4030 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
4031 return 0;
4032
4033 switch (arg->event) {
4034 case NETDEV_UNREGISTER:
5e670d84 4035 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
27c6fa73 4036 case NETDEV_DOWN:
1de178ed 4037 if (rt->should_flush)
27c6fa73 4038 return -1;
93c2fb25 4039 if (!rt->fib6_nsiblings)
5e670d84 4040 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
1de178ed
IS
4041 if (rt6_multipath_uses_dev(rt, dev)) {
4042 unsigned int count;
4043
4044 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 4045 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
4046 rt6_multipath_flush(rt);
4047 return -1;
4048 }
4049 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4050 RTNH_F_LINKDOWN);
7aef6859 4051 fib6_update_sernum(net, rt);
d7dedee1 4052 rt6_multipath_rebalance(rt);
1de178ed
IS
4053 }
4054 return -2;
27c6fa73 4055 case NETDEV_CHANGE:
5e670d84 4056 if (rt->fib6_nh.nh_dev != dev ||
93c2fb25 4057 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 4058 break;
5e670d84 4059 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 4060 rt6_multipath_rebalance(rt);
27c6fa73 4061 break;
2b241361 4062 }
c159d30c 4063
1da177e4
LT
4064 return 0;
4065}
4066
27c6fa73 4067void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 4068{
4c981e28 4069 struct arg_netdev_event arg = {
8ed67789 4070 .dev = dev,
6802f3ad
IS
4071 {
4072 .event = event,
4073 },
8ed67789 4074 };
7c6bb7d2 4075 struct net *net = dev_net(dev);
8ed67789 4076
7c6bb7d2
DA
4077 if (net->ipv6.sysctl.skip_notify_on_dev_down)
4078 fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4079 else
4080 fib6_clean_all(net, fib6_ifdown, &arg);
4c981e28
IS
4081}
4082
4083void rt6_disable_ip(struct net_device *dev, unsigned long event)
4084{
4085 rt6_sync_down_dev(dev, event);
4086 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4087 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
4088}
4089
95c96174 4090struct rt6_mtu_change_arg {
1da177e4 4091 struct net_device *dev;
95c96174 4092 unsigned int mtu;
1da177e4
LT
4093};
4094
8d1c802b 4095static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4096{
4097 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4098 struct inet6_dev *idev;
4099
4100 /* In IPv6 pmtu discovery is not optional,
4101 so that RTAX_MTU lock cannot disable it.
4102 We still use this lock to block changes
4103 caused by addrconf/ndisc.
4104 */
4105
4106 idev = __in6_dev_get(arg->dev);
38308473 4107 if (!idev)
1da177e4
LT
4108 return 0;
4109
4110 /* For administrative MTU increase, there is no way to discover
4111 IPv6 PMTU increase, so PMTU increase should be updated here.
4112 Since RFC 1981 doesn't include administrative MTU increase
4113 update PMTU increase is a MUST. (i.e. jumbo frame)
4114 */
5e670d84 4115 if (rt->fib6_nh.nh_dev == arg->dev &&
d4ead6b3
DA
4116 !fib6_metric_locked(rt, RTAX_MTU)) {
4117 u32 mtu = rt->fib6_pmtu;
4118
4119 if (mtu >= arg->mtu ||
4120 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4121 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4122
f5bbe7ee 4123 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4124 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4125 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4126 }
1da177e4
LT
4127 return 0;
4128}
4129
95c96174 4130void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4131{
c71099ac
TG
4132 struct rt6_mtu_change_arg arg = {
4133 .dev = dev,
4134 .mtu = mtu,
4135 };
1da177e4 4136
0c3584d5 4137 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4138}
4139
ef7c79ed 4140static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4141 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
aa8f8778 4142 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 4143 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4144 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4145 [RTA_PRIORITY] = { .type = NLA_U32 },
4146 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4147 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4148 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4149 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4150 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4151 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4152 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4153 [RTA_MARK] = { .type = NLA_U32 },
aa8f8778 4154 [RTA_TABLE] = { .type = NLA_U32 },
eacb9384
RP
4155 [RTA_IP_PROTO] = { .type = NLA_U8 },
4156 [RTA_SPORT] = { .type = NLA_U16 },
4157 [RTA_DPORT] = { .type = NLA_U16 },
86872cb5
TG
4158};
4159
4160static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4161 struct fib6_config *cfg,
4162 struct netlink_ext_ack *extack)
1da177e4 4163{
86872cb5
TG
4164 struct rtmsg *rtm;
4165 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4166 unsigned int pref;
86872cb5 4167 int err;
1da177e4 4168
fceb6435 4169 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
dac9c979 4170 extack);
86872cb5
TG
4171 if (err < 0)
4172 goto errout;
1da177e4 4173
86872cb5
TG
4174 err = -EINVAL;
4175 rtm = nlmsg_data(nlh);
86872cb5 4176
84db8407
4177 *cfg = (struct fib6_config){
4178 .fc_table = rtm->rtm_table,
4179 .fc_dst_len = rtm->rtm_dst_len,
4180 .fc_src_len = rtm->rtm_src_len,
4181 .fc_flags = RTF_UP,
4182 .fc_protocol = rtm->rtm_protocol,
4183 .fc_type = rtm->rtm_type,
4184
4185 .fc_nlinfo.portid = NETLINK_CB(skb).portid,
4186 .fc_nlinfo.nlh = nlh,
4187 .fc_nlinfo.nl_net = sock_net(skb->sk),
4188 };
86872cb5 4189
ef2c7d7b
ND
4190 if (rtm->rtm_type == RTN_UNREACHABLE ||
4191 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4192 rtm->rtm_type == RTN_PROHIBIT ||
4193 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4194 cfg->fc_flags |= RTF_REJECT;
4195
ab79ad14
4196 if (rtm->rtm_type == RTN_LOCAL)
4197 cfg->fc_flags |= RTF_LOCAL;
4198
1f56a01f
MKL
4199 if (rtm->rtm_flags & RTM_F_CLONED)
4200 cfg->fc_flags |= RTF_CACHE;
4201
fc1e64e1
DA
4202 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4203
86872cb5 4204 if (tb[RTA_GATEWAY]) {
67b61f6c 4205 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4206 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4207 }
e3818541
DA
4208 if (tb[RTA_VIA]) {
4209 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4210 goto errout;
4211 }
86872cb5
TG
4212
4213 if (tb[RTA_DST]) {
4214 int plen = (rtm->rtm_dst_len + 7) >> 3;
4215
4216 if (nla_len(tb[RTA_DST]) < plen)
4217 goto errout;
4218
4219 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4220 }
86872cb5
TG
4221
4222 if (tb[RTA_SRC]) {
4223 int plen = (rtm->rtm_src_len + 7) >> 3;
4224
4225 if (nla_len(tb[RTA_SRC]) < plen)
4226 goto errout;
4227
4228 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4229 }
86872cb5 4230
c3968a85 4231 if (tb[RTA_PREFSRC])
67b61f6c 4232 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4233
86872cb5
TG
4234 if (tb[RTA_OIF])
4235 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4236
4237 if (tb[RTA_PRIORITY])
4238 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4239
4240 if (tb[RTA_METRICS]) {
4241 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4242 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4243 }
86872cb5
TG
4244
4245 if (tb[RTA_TABLE])
4246 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4247
51ebd318
ND
4248 if (tb[RTA_MULTIPATH]) {
4249 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4250 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4251
4252 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4253 cfg->fc_mp_len, extack);
9ed59592
DA
4254 if (err < 0)
4255 goto errout;
51ebd318
ND
4256 }
4257
c78ba6d6
LR
4258 if (tb[RTA_PREF]) {
4259 pref = nla_get_u8(tb[RTA_PREF]);
4260 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4261 pref != ICMPV6_ROUTER_PREF_HIGH)
4262 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4263 cfg->fc_flags |= RTF_PREF(pref);
4264 }
4265
19e42e45
RP
4266 if (tb[RTA_ENCAP])
4267 cfg->fc_encap = tb[RTA_ENCAP];
4268
9ed59592 4269 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4270 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4271
c255bd68 4272 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4273 if (err < 0)
4274 goto errout;
4275 }
4276
32bc201e
XL
4277 if (tb[RTA_EXPIRES]) {
4278 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4279
4280 if (addrconf_finite_timeout(timeout)) {
4281 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4282 cfg->fc_flags |= RTF_EXPIRES;
4283 }
4284 }
4285
86872cb5
TG
4286 err = 0;
4287errout:
4288 return err;
1da177e4
LT
4289}
4290
6b9ea5a6 4291struct rt6_nh {
8d1c802b 4292 struct fib6_info *fib6_info;
6b9ea5a6 4293 struct fib6_config r_cfg;
6b9ea5a6
RP
4294 struct list_head next;
4295};
4296
d4ead6b3
DA
4297static int ip6_route_info_append(struct net *net,
4298 struct list_head *rt6_nh_list,
8d1c802b
DA
4299 struct fib6_info *rt,
4300 struct fib6_config *r_cfg)
6b9ea5a6
RP
4301{
4302 struct rt6_nh *nh;
6b9ea5a6
RP
4303 int err = -EEXIST;
4304
4305 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
4306 /* check if fib6_info already exists */
4307 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
4308 return err;
4309 }
4310
4311 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4312 if (!nh)
4313 return -ENOMEM;
8d1c802b 4314 nh->fib6_info = rt;
6b9ea5a6
RP
4315 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4316 list_add_tail(&nh->next, rt6_nh_list);
4317
4318 return 0;
4319}
4320
8d1c802b
DA
4321static void ip6_route_mpath_notify(struct fib6_info *rt,
4322 struct fib6_info *rt_last,
3b1137fe
DA
4323 struct nl_info *info,
4324 __u16 nlflags)
4325{
4326 /* if this is an APPEND route, then rt points to the first route
4327 * inserted and rt_last points to last route inserted. Userspace
4328 * wants a consistent dump of the route which starts at the first
4329 * nexthop. Since sibling routes are always added at the end of
4330 * the list, find the first sibling of the last route appended
4331 */
93c2fb25
DA
4332 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4333 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 4334 struct fib6_info,
93c2fb25 4335 fib6_siblings);
3b1137fe
DA
4336 }
4337
4338 if (rt)
4339 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4340}
4341
333c4301
DA
4342static int ip6_route_multipath_add(struct fib6_config *cfg,
4343 struct netlink_ext_ack *extack)
51ebd318 4344{
8d1c802b 4345 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 4346 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4347 struct fib6_config r_cfg;
4348 struct rtnexthop *rtnh;
8d1c802b 4349 struct fib6_info *rt;
6b9ea5a6
RP
4350 struct rt6_nh *err_nh;
4351 struct rt6_nh *nh, *nh_safe;
3b1137fe 4352 __u16 nlflags;
51ebd318
ND
4353 int remaining;
4354 int attrlen;
6b9ea5a6
RP
4355 int err = 1;
4356 int nhn = 0;
4357 int replace = (cfg->fc_nlinfo.nlh &&
4358 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4359 LIST_HEAD(rt6_nh_list);
51ebd318 4360
3b1137fe
DA
4361 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4362 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4363 nlflags |= NLM_F_APPEND;
4364
35f1b4e9 4365 remaining = cfg->fc_mp_len;
51ebd318 4366 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4367
6b9ea5a6 4368 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 4369 * fib6_info structs per nexthop
6b9ea5a6 4370 */
51ebd318
ND
4371 while (rtnh_ok(rtnh, remaining)) {
4372 memcpy(&r_cfg, cfg, sizeof(*cfg));
4373 if (rtnh->rtnh_ifindex)
4374 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4375
4376 attrlen = rtnh_attrlen(rtnh);
4377 if (attrlen > 0) {
4378 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4379
4380 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4381 if (nla) {
67b61f6c 4382 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4383 r_cfg.fc_flags |= RTF_GATEWAY;
4384 }
19e42e45
RP
4385 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4386 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4387 if (nla)
4388 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4389 }
6b9ea5a6 4390
68e2ffde 4391 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4392 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4393 if (IS_ERR(rt)) {
4394 err = PTR_ERR(rt);
4395 rt = NULL;
6b9ea5a6 4396 goto cleanup;
8c5b83f0 4397 }
b5d2d75e
DA
4398 if (!rt6_qualify_for_ecmp(rt)) {
4399 err = -EINVAL;
4400 NL_SET_ERR_MSG(extack,
4401 "Device only routes can not be added for IPv6 using the multipath API.");
4402 fib6_info_release(rt);
4403 goto cleanup;
4404 }
6b9ea5a6 4405
5e670d84 4406 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
398958ae 4407
d4ead6b3
DA
4408 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4409 rt, &r_cfg);
51ebd318 4410 if (err) {
93531c67 4411 fib6_info_release(rt);
6b9ea5a6
RP
4412 goto cleanup;
4413 }
4414
4415 rtnh = rtnh_next(rtnh, &remaining);
4416 }
4417
3b1137fe
DA
4418 /* for add and replace send one notification with all nexthops.
4419 * Skip the notification in fib6_add_rt2node and send one with
4420 * the full route when done
4421 */
4422 info->skip_notify = 1;
4423
6b9ea5a6
RP
4424 err_nh = NULL;
4425 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b
DA
4426 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4427 fib6_info_release(nh->fib6_info);
93531c67 4428
f7225172
DA
4429 if (!err) {
4430 /* save reference to last route successfully inserted */
4431 rt_last = nh->fib6_info;
4432
4433 /* save reference to first route for notification */
4434 if (!rt_notif)
4435 rt_notif = nh->fib6_info;
4436 }
3b1137fe 4437
8d1c802b
DA
4438 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4439 nh->fib6_info = NULL;
6b9ea5a6
RP
4440 if (err) {
4441 if (replace && nhn)
a5a82d84
JK
4442 NL_SET_ERR_MSG_MOD(extack,
4443 "multipath route replace failed (check consistency of installed routes)");
6b9ea5a6
RP
4444 err_nh = nh;
4445 goto add_errout;
51ebd318 4446 }
6b9ea5a6 4447
1a72418b 4448 /* Because each route is added like a single route we remove
27596472
MK
4449 * these flags after the first nexthop: if there is a collision,
4450 * we have already failed to add the first nexthop:
4451 * fib6_add_rt2node() has rejected it; when replacing, old
4452 * nexthops have been replaced by first new, the rest should
4453 * be added to it.
1a72418b 4454 */
27596472
MK
4455 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4456 NLM_F_REPLACE);
6b9ea5a6
RP
4457 nhn++;
4458 }
4459
3b1137fe
DA
4460 /* success ... tell user about new route */
4461 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4462 goto cleanup;
4463
4464add_errout:
3b1137fe
DA
4465 /* send notification for routes that were added so that
4466 * the delete notifications sent by ip6_route_del are
4467 * coherent
4468 */
4469 if (rt_notif)
4470 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4471
6b9ea5a6
RP
4472 /* Delete routes that were already added */
4473 list_for_each_entry(nh, &rt6_nh_list, next) {
4474 if (err_nh == nh)
4475 break;
333c4301 4476 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4477 }
4478
4479cleanup:
4480 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
8d1c802b
DA
4481 if (nh->fib6_info)
4482 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
4483 list_del(&nh->next);
4484 kfree(nh);
4485 }
4486
4487 return err;
4488}
4489
333c4301
DA
4490static int ip6_route_multipath_del(struct fib6_config *cfg,
4491 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4492{
4493 struct fib6_config r_cfg;
4494 struct rtnexthop *rtnh;
4495 int remaining;
4496 int attrlen;
4497 int err = 1, last_err = 0;
4498
4499 remaining = cfg->fc_mp_len;
4500 rtnh = (struct rtnexthop *)cfg->fc_mp;
4501
4502 /* Parse a Multipath Entry */
4503 while (rtnh_ok(rtnh, remaining)) {
4504 memcpy(&r_cfg, cfg, sizeof(*cfg));
4505 if (rtnh->rtnh_ifindex)
4506 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4507
4508 attrlen = rtnh_attrlen(rtnh);
4509 if (attrlen > 0) {
4510 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4511
4512 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4513 if (nla) {
4514 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4515 r_cfg.fc_flags |= RTF_GATEWAY;
4516 }
4517 }
333c4301 4518 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4519 if (err)
4520 last_err = err;
4521
51ebd318
ND
4522 rtnh = rtnh_next(rtnh, &remaining);
4523 }
4524
4525 return last_err;
4526}
4527
c21ef3e3
DA
4528static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4529 struct netlink_ext_ack *extack)
1da177e4 4530{
86872cb5
TG
4531 struct fib6_config cfg;
4532 int err;
1da177e4 4533
333c4301 4534 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4535 if (err < 0)
4536 return err;
4537
51ebd318 4538 if (cfg.fc_mp)
333c4301 4539 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4540 else {
4541 cfg.fc_delete_all_nh = 1;
333c4301 4542 return ip6_route_del(&cfg, extack);
0ae81335 4543 }
1da177e4
LT
4544}
4545
c21ef3e3
DA
4546static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4547 struct netlink_ext_ack *extack)
1da177e4 4548{
86872cb5
TG
4549 struct fib6_config cfg;
4550 int err;
1da177e4 4551
333c4301 4552 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4553 if (err < 0)
4554 return err;
4555
67f69513
DA
4556 if (cfg.fc_metric == 0)
4557 cfg.fc_metric = IP6_RT_PRIO_USER;
4558
51ebd318 4559 if (cfg.fc_mp)
333c4301 4560 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4561 else
acb54e3c 4562 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4563}
4564
8d1c802b 4565static size_t rt6_nlmsg_size(struct fib6_info *rt)
339bf98f 4566{
beb1afac
DA
4567 int nexthop_len = 0;
4568
93c2fb25 4569 if (rt->fib6_nsiblings) {
beb1afac
DA
4570 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4571 + NLA_ALIGN(sizeof(struct rtnexthop))
4572 + nla_total_size(16) /* RTA_GATEWAY */
5e670d84 4573 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
beb1afac 4574
93c2fb25 4575 nexthop_len *= rt->fib6_nsiblings;
beb1afac
DA
4576 }
4577
339bf98f
TG
4578 return NLMSG_ALIGN(sizeof(struct rtmsg))
4579 + nla_total_size(16) /* RTA_SRC */
4580 + nla_total_size(16) /* RTA_DST */
4581 + nla_total_size(16) /* RTA_GATEWAY */
4582 + nla_total_size(16) /* RTA_PREFSRC */
4583 + nla_total_size(4) /* RTA_TABLE */
4584 + nla_total_size(4) /* RTA_IIF */
4585 + nla_total_size(4) /* RTA_OIF */
4586 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4587 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4588 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4589 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4590 + nla_total_size(1) /* RTA_PREF */
5e670d84 4591 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
beb1afac
DA
4592 + nexthop_len;
4593}
4594
8d1c802b 4595static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
5be083ce 4596 unsigned int *flags, bool skip_oif)
beb1afac 4597{
5e670d84 4598 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
f9d882ea
IS
4599 *flags |= RTNH_F_DEAD;
4600
5e670d84 4601 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
beb1afac 4602 *flags |= RTNH_F_LINKDOWN;
dcd1f572
DA
4603
4604 rcu_read_lock();
4605 if (fib6_ignore_linkdown(rt))
beb1afac 4606 *flags |= RTNH_F_DEAD;
dcd1f572 4607 rcu_read_unlock();
beb1afac
DA
4608 }
4609
93c2fb25 4610 if (rt->fib6_flags & RTF_GATEWAY) {
5e670d84 4611 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
beb1afac
DA
4612 goto nla_put_failure;
4613 }
4614
5e670d84
DA
4615 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4616 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4617 *flags |= RTNH_F_OFFLOAD;
4618
5be083ce 4619 /* not needed for multipath encoding b/c it has a rtnexthop struct */
5e670d84
DA
4620 if (!skip_oif && rt->fib6_nh.nh_dev &&
4621 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
beb1afac
DA
4622 goto nla_put_failure;
4623
5e670d84
DA
4624 if (rt->fib6_nh.nh_lwtstate &&
4625 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
beb1afac
DA
4626 goto nla_put_failure;
4627
4628 return 0;
4629
4630nla_put_failure:
4631 return -EMSGSIZE;
4632}
4633
5be083ce 4634/* add multipath next hop */
8d1c802b 4635static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
beb1afac 4636{
5e670d84 4637 const struct net_device *dev = rt->fib6_nh.nh_dev;
beb1afac
DA
4638 struct rtnexthop *rtnh;
4639 unsigned int flags = 0;
4640
4641 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4642 if (!rtnh)
4643 goto nla_put_failure;
4644
5e670d84
DA
4645 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4646 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
beb1afac 4647
5be083ce 4648 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4649 goto nla_put_failure;
4650
4651 rtnh->rtnh_flags = flags;
4652
4653 /* length of rtnetlink header + attributes */
4654 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4655
4656 return 0;
4657
4658nla_put_failure:
4659 return -EMSGSIZE;
339bf98f
TG
4660}
4661
d4ead6b3 4662static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 4663 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 4664 struct in6_addr *dest, struct in6_addr *src,
15e47304 4665 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4666 unsigned int flags)
1da177e4 4667{
22d0bd82
XL
4668 struct rt6_info *rt6 = (struct rt6_info *)dst;
4669 struct rt6key *rt6_dst, *rt6_src;
4670 u32 *pmetrics, table, rt6_flags;
2d7202bf 4671 struct nlmsghdr *nlh;
22d0bd82 4672 struct rtmsg *rtm;
d4ead6b3 4673 long expires = 0;
1da177e4 4674
15e47304 4675 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4676 if (!nlh)
26932566 4677 return -EMSGSIZE;
2d7202bf 4678
22d0bd82
XL
4679 if (rt6) {
4680 rt6_dst = &rt6->rt6i_dst;
4681 rt6_src = &rt6->rt6i_src;
4682 rt6_flags = rt6->rt6i_flags;
4683 } else {
4684 rt6_dst = &rt->fib6_dst;
4685 rt6_src = &rt->fib6_src;
4686 rt6_flags = rt->fib6_flags;
4687 }
4688
2d7202bf 4689 rtm = nlmsg_data(nlh);
1da177e4 4690 rtm->rtm_family = AF_INET6;
22d0bd82
XL
4691 rtm->rtm_dst_len = rt6_dst->plen;
4692 rtm->rtm_src_len = rt6_src->plen;
1da177e4 4693 rtm->rtm_tos = 0;
93c2fb25
DA
4694 if (rt->fib6_table)
4695 table = rt->fib6_table->tb6_id;
c71099ac 4696 else
9e762a4a 4697 table = RT6_TABLE_UNSPEC;
97f0082a 4698 rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
c78679e8
DM
4699 if (nla_put_u32(skb, RTA_TABLE, table))
4700 goto nla_put_failure;
e8478e80
DA
4701
4702 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4703 rtm->rtm_flags = 0;
4704 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 4705 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 4706
22d0bd82 4707 if (rt6_flags & RTF_CACHE)
1da177e4
LT
4708 rtm->rtm_flags |= RTM_F_CLONED;
4709
d4ead6b3
DA
4710 if (dest) {
4711 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4712 goto nla_put_failure;
1ab1457c 4713 rtm->rtm_dst_len = 128;
1da177e4 4714 } else if (rtm->rtm_dst_len)
22d0bd82 4715 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
c78679e8 4716 goto nla_put_failure;
1da177e4
LT
4717#ifdef CONFIG_IPV6_SUBTREES
4718 if (src) {
930345ea 4719 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4720 goto nla_put_failure;
1ab1457c 4721 rtm->rtm_src_len = 128;
c78679e8 4722 } else if (rtm->rtm_src_len &&
22d0bd82 4723 nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
c78679e8 4724 goto nla_put_failure;
1da177e4 4725#endif
7bc570c8
YH
4726 if (iif) {
4727#ifdef CONFIG_IPV6_MROUTE
22d0bd82 4728 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
fd61c6ba
DA
4729 int err = ip6mr_get_route(net, skb, rtm, portid);
4730
4731 if (err == 0)
4732 return 0;
4733 if (err < 0)
4734 goto nla_put_failure;
7bc570c8
YH
4735 } else
4736#endif
c78679e8
DM
4737 if (nla_put_u32(skb, RTA_IIF, iif))
4738 goto nla_put_failure;
d4ead6b3 4739 } else if (dest) {
1da177e4 4740 struct in6_addr saddr_buf;
d4ead6b3 4741 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4742 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4743 goto nla_put_failure;
1da177e4 4744 }
2d7202bf 4745
93c2fb25 4746 if (rt->fib6_prefsrc.plen) {
c3968a85 4747 struct in6_addr saddr_buf;
93c2fb25 4748 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 4749 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4750 goto nla_put_failure;
c3968a85
DW
4751 }
4752
d4ead6b3
DA
4753 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4754 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4755 goto nla_put_failure;
4756
93c2fb25 4757 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 4758 goto nla_put_failure;
8253947e 4759
beb1afac
DA
4760 /* For multipath routes, walk the siblings list and add
4761 * each as a nexthop within RTA_MULTIPATH.
4762 */
22d0bd82
XL
4763 if (rt6) {
4764 if (rt6_flags & RTF_GATEWAY &&
4765 nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
4766 goto nla_put_failure;
4767
4768 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
4769 goto nla_put_failure;
4770 } else if (rt->fib6_nsiblings) {
8d1c802b 4771 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
4772 struct nlattr *mp;
4773
4774 mp = nla_nest_start(skb, RTA_MULTIPATH);
4775 if (!mp)
4776 goto nla_put_failure;
4777
4778 if (rt6_add_nexthop(skb, rt) < 0)
4779 goto nla_put_failure;
4780
4781 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 4782 &rt->fib6_siblings, fib6_siblings) {
beb1afac
DA
4783 if (rt6_add_nexthop(skb, sibling) < 0)
4784 goto nla_put_failure;
4785 }
4786
4787 nla_nest_end(skb, mp);
4788 } else {
5be083ce 4789 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4790 goto nla_put_failure;
4791 }
4792
22d0bd82 4793 if (rt6_flags & RTF_EXPIRES) {
14895687
DA
4794 expires = dst ? dst->expires : rt->expires;
4795 expires -= jiffies;
4796 }
69cdf8f9 4797
d4ead6b3 4798 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4799 goto nla_put_failure;
2d7202bf 4800
22d0bd82 4801 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
c78ba6d6
LR
4802 goto nla_put_failure;
4803
19e42e45 4804
053c095a
JB
4805 nlmsg_end(skb, nlh);
4806 return 0;
2d7202bf
TG
4807
4808nla_put_failure:
26932566
PM
4809 nlmsg_cancel(skb, nlh);
4810 return -EMSGSIZE;
1da177e4
LT
4811}
4812
13e38901
DA
4813static bool fib6_info_uses_dev(const struct fib6_info *f6i,
4814 const struct net_device *dev)
4815{
4816 if (f6i->fib6_nh.nh_dev == dev)
4817 return true;
4818
4819 if (f6i->fib6_nsiblings) {
4820 struct fib6_info *sibling, *next_sibling;
4821
4822 list_for_each_entry_safe(sibling, next_sibling,
4823 &f6i->fib6_siblings, fib6_siblings) {
4824 if (sibling->fib6_nh.nh_dev == dev)
4825 return true;
4826 }
4827 }
4828
4829 return false;
4830}
4831
8d1c802b 4832int rt6_dump_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4833{
4834 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
13e38901
DA
4835 struct fib_dump_filter *filter = &arg->filter;
4836 unsigned int flags = NLM_F_MULTI;
1f17e2f2
DA
4837 struct net *net = arg->net;
4838
421842ed 4839 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4840 return 0;
1da177e4 4841
13e38901
DA
4842 if ((filter->flags & RTM_F_PREFIX) &&
4843 !(rt->fib6_flags & RTF_PREFIX_RT)) {
4844 /* success since this is not a prefix route */
4845 return 1;
4846 }
4847 if (filter->filter_set) {
4848 if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
4849 (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
4850 (filter->protocol && rt->fib6_protocol != filter->protocol)) {
f8cfe2ce
DA
4851 return 1;
4852 }
13e38901 4853 flags |= NLM_F_DUMP_FILTERED;
f8cfe2ce 4854 }
1da177e4 4855
d4ead6b3
DA
4856 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4857 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
13e38901 4858 arg->cb->nlh->nlmsg_seq, flags);
1da177e4
LT
4859}
4860
0eff0a27
JK
4861static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
4862 const struct nlmsghdr *nlh,
4863 struct nlattr **tb,
4864 struct netlink_ext_ack *extack)
4865{
4866 struct rtmsg *rtm;
4867 int i, err;
4868
4869 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
4870 NL_SET_ERR_MSG_MOD(extack,
4871 "Invalid header for get route request");
4872 return -EINVAL;
4873 }
4874
4875 if (!netlink_strict_get_check(skb))
4876 return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
4877 rtm_ipv6_policy, extack);
4878
4879 rtm = nlmsg_data(nlh);
4880 if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
4881 (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
4882 rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
4883 rtm->rtm_type) {
4884 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
4885 return -EINVAL;
4886 }
4887 if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
4888 NL_SET_ERR_MSG_MOD(extack,
4889 "Invalid flags for get route request");
4890 return -EINVAL;
4891 }
4892
4893 err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
4894 rtm_ipv6_policy, extack);
4895 if (err)
4896 return err;
4897
4898 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
4899 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
4900 NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
4901 return -EINVAL;
4902 }
4903
4904 for (i = 0; i <= RTA_MAX; i++) {
4905 if (!tb[i])
4906 continue;
4907
4908 switch (i) {
4909 case RTA_SRC:
4910 case RTA_DST:
4911 case RTA_IIF:
4912 case RTA_OIF:
4913 case RTA_MARK:
4914 case RTA_UID:
4915 case RTA_SPORT:
4916 case RTA_DPORT:
4917 case RTA_IP_PROTO:
4918 break;
4919 default:
4920 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
4921 return -EINVAL;
4922 }
4923 }
4924
4925 return 0;
4926}
4927
c21ef3e3
DA
4928static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4929 struct netlink_ext_ack *extack)
1da177e4 4930{
3b1e0a65 4931 struct net *net = sock_net(in_skb->sk);
ab364a6f 4932 struct nlattr *tb[RTA_MAX+1];
18c3a61c 4933 int err, iif = 0, oif = 0;
a68886a6 4934 struct fib6_info *from;
18c3a61c 4935 struct dst_entry *dst;
ab364a6f 4936 struct rt6_info *rt;
1da177e4 4937 struct sk_buff *skb;
ab364a6f 4938 struct rtmsg *rtm;
744486d4 4939 struct flowi6 fl6 = {};
18c3a61c 4940 bool fibmatch;
1da177e4 4941
0eff0a27 4942 err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
ab364a6f
TG
4943 if (err < 0)
4944 goto errout;
1da177e4 4945
ab364a6f 4946 err = -EINVAL;
38b7097b
HFS
4947 rtm = nlmsg_data(nlh);
4948 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4949 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4950
ab364a6f
TG
4951 if (tb[RTA_SRC]) {
4952 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4953 goto errout;
4954
4e3fd7a0 4955 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4956 }
4957
4958 if (tb[RTA_DST]) {
4959 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4960 goto errout;
4961
4e3fd7a0 4962 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4963 }
4964
4965 if (tb[RTA_IIF])
4966 iif = nla_get_u32(tb[RTA_IIF]);
4967
4968 if (tb[RTA_OIF])
72331bc0 4969 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4970
2e47b291
LC
4971 if (tb[RTA_MARK])
4972 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4973
622ec2c9
LC
4974 if (tb[RTA_UID])
4975 fl6.flowi6_uid = make_kuid(current_user_ns(),
4976 nla_get_u32(tb[RTA_UID]));
4977 else
4978 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4979
eacb9384
RP
4980 if (tb[RTA_SPORT])
4981 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4982
4983 if (tb[RTA_DPORT])
4984 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4985
4986 if (tb[RTA_IP_PROTO]) {
4987 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
5e1a99ea
HL
4988 &fl6.flowi6_proto, AF_INET6,
4989 extack);
eacb9384
RP
4990 if (err)
4991 goto errout;
4992 }
4993
1da177e4
LT
4994 if (iif) {
4995 struct net_device *dev;
72331bc0
SL
4996 int flags = 0;
4997
121622db
FW
4998 rcu_read_lock();
4999
5000 dev = dev_get_by_index_rcu(net, iif);
1da177e4 5001 if (!dev) {
121622db 5002 rcu_read_unlock();
1da177e4 5003 err = -ENODEV;
ab364a6f 5004 goto errout;
1da177e4 5005 }
72331bc0
SL
5006
5007 fl6.flowi6_iif = iif;
5008
5009 if (!ipv6_addr_any(&fl6.saddr))
5010 flags |= RT6_LOOKUP_F_HAS_SADDR;
5011
b75cc8f9 5012 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
5013
5014 rcu_read_unlock();
72331bc0
SL
5015 } else {
5016 fl6.flowi6_oif = oif;
5017
58acfd71 5018 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
5019 }
5020
18c3a61c
RP
5021
5022 rt = container_of(dst, struct rt6_info, dst);
5023 if (rt->dst.error) {
5024 err = rt->dst.error;
5025 ip6_rt_put(rt);
5026 goto errout;
1da177e4
LT
5027 }
5028
9d6acb3b
WC
5029 if (rt == net->ipv6.ip6_null_entry) {
5030 err = rt->dst.error;
5031 ip6_rt_put(rt);
5032 goto errout;
5033 }
5034
ab364a6f 5035 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 5036 if (!skb) {
94e187c0 5037 ip6_rt_put(rt);
ab364a6f
TG
5038 err = -ENOBUFS;
5039 goto errout;
5040 }
1da177e4 5041
d8d1f30b 5042 skb_dst_set(skb, &rt->dst);
a68886a6
DA
5043
5044 rcu_read_lock();
5045 from = rcu_dereference(rt->from);
5046
18c3a61c 5047 if (fibmatch)
a68886a6 5048 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
18c3a61c
RP
5049 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
5050 nlh->nlmsg_seq, 0);
5051 else
a68886a6
DA
5052 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5053 &fl6.saddr, iif, RTM_NEWROUTE,
d4ead6b3
DA
5054 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
5055 0);
a68886a6
DA
5056 rcu_read_unlock();
5057
1da177e4 5058 if (err < 0) {
ab364a6f
TG
5059 kfree_skb(skb);
5060 goto errout;
1da177e4
LT
5061 }
5062
15e47304 5063 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 5064errout:
1da177e4 5065 return err;
1da177e4
LT
5066}
5067
8d1c802b 5068void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 5069 unsigned int nlm_flags)
1da177e4
LT
5070{
5071 struct sk_buff *skb;
5578689a 5072 struct net *net = info->nl_net;
528c4ceb
DL
5073 u32 seq;
5074 int err;
5075
5076 err = -ENOBUFS;
38308473 5077 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 5078
19e42e45 5079 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 5080 if (!skb)
21713ebc
TG
5081 goto errout;
5082
d4ead6b3
DA
5083 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5084 event, info->portid, seq, nlm_flags);
26932566
PM
5085 if (err < 0) {
5086 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5087 WARN_ON(err == -EMSGSIZE);
5088 kfree_skb(skb);
5089 goto errout;
5090 }
15e47304 5091 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
5092 info->nlh, gfp_any());
5093 return;
21713ebc
TG
5094errout:
5095 if (err < 0)
5578689a 5096 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
5097}
5098
8ed67789 5099static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 5100 unsigned long event, void *ptr)
8ed67789 5101{
351638e7 5102 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 5103 struct net *net = dev_net(dev);
8ed67789 5104
242d3a49
WC
5105 if (!(dev->flags & IFF_LOOPBACK))
5106 return NOTIFY_OK;
5107
5108 if (event == NETDEV_REGISTER) {
421842ed 5109 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
d8d1f30b 5110 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
5111 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5112#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 5113 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 5114 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 5115 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 5116 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 5117#endif
76da0704
WC
5118 } else if (event == NETDEV_UNREGISTER &&
5119 dev->reg_state != NETREG_UNREGISTERED) {
5120 /* NETDEV_UNREGISTER could be fired for multiple times by
5121 * netdev_wait_allrefs(). Make sure we only call this once.
5122 */
12d94a80 5123 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 5124#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
5125 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5126 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
5127#endif
5128 }
5129
5130 return NOTIFY_OK;
5131}
5132
1da177e4
LT
5133/*
5134 * /proc
5135 */
5136
5137#ifdef CONFIG_PROC_FS
1da177e4
LT
5138static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5139{
69ddb805 5140 struct net *net = (struct net *)seq->private;
1da177e4 5141 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
5142 net->ipv6.rt6_stats->fib_nodes,
5143 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 5144 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
5145 net->ipv6.rt6_stats->fib_rt_entries,
5146 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 5147 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 5148 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
5149
5150 return 0;
5151}
1da177e4
LT
5152#endif /* CONFIG_PROC_FS */
5153
5154#ifdef CONFIG_SYSCTL
5155
1da177e4 5156static
fe2c6338 5157int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
5158 void __user *buffer, size_t *lenp, loff_t *ppos)
5159{
c486da34
LAG
5160 struct net *net;
5161 int delay;
f0fb9b28 5162 int ret;
c486da34 5163 if (!write)
1da177e4 5164 return -EINVAL;
c486da34
LAG
5165
5166 net = (struct net *)ctl->extra1;
5167 delay = net->ipv6.sysctl.flush_delay;
f0fb9b28
AP
5168 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
5169 if (ret)
5170 return ret;
5171
2ac3ac8f 5172 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 5173 return 0;
1da177e4
LT
5174}
5175
7c6bb7d2
DA
5176static int zero;
5177static int one = 1;
5178
ed792e28 5179static struct ctl_table ipv6_route_table_template[] = {
1ab1457c 5180 {
1da177e4 5181 .procname = "flush",
4990509f 5182 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 5183 .maxlen = sizeof(int),
89c8b3a1 5184 .mode = 0200,
6d9f239a 5185 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
5186 },
5187 {
1da177e4 5188 .procname = "gc_thresh",
9a7ec3a9 5189 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
5190 .maxlen = sizeof(int),
5191 .mode = 0644,
6d9f239a 5192 .proc_handler = proc_dointvec,
1da177e4
LT
5193 },
5194 {
1da177e4 5195 .procname = "max_size",
4990509f 5196 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
5197 .maxlen = sizeof(int),
5198 .mode = 0644,
6d9f239a 5199 .proc_handler = proc_dointvec,
1da177e4
LT
5200 },
5201 {
1da177e4 5202 .procname = "gc_min_interval",
4990509f 5203 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5204 .maxlen = sizeof(int),
5205 .mode = 0644,
6d9f239a 5206 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5207 },
5208 {
1da177e4 5209 .procname = "gc_timeout",
4990509f 5210 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
5211 .maxlen = sizeof(int),
5212 .mode = 0644,
6d9f239a 5213 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5214 },
5215 {
1da177e4 5216 .procname = "gc_interval",
4990509f 5217 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
5218 .maxlen = sizeof(int),
5219 .mode = 0644,
6d9f239a 5220 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5221 },
5222 {
1da177e4 5223 .procname = "gc_elasticity",
4990509f 5224 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5225 .maxlen = sizeof(int),
5226 .mode = 0644,
f3d3f616 5227 .proc_handler = proc_dointvec,
1da177e4
LT
5228 },
5229 {
1da177e4 5230 .procname = "mtu_expires",
4990509f 5231 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5232 .maxlen = sizeof(int),
5233 .mode = 0644,
6d9f239a 5234 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5235 },
5236 {
1da177e4 5237 .procname = "min_adv_mss",
4990509f 5238 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5239 .maxlen = sizeof(int),
5240 .mode = 0644,
f3d3f616 5241 .proc_handler = proc_dointvec,
1da177e4
LT
5242 },
5243 {
1da177e4 5244 .procname = "gc_min_interval_ms",
4990509f 5245 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5246 .maxlen = sizeof(int),
5247 .mode = 0644,
6d9f239a 5248 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5249 },
7c6bb7d2
DA
5250 {
5251 .procname = "skip_notify_on_dev_down",
5252 .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
5253 .maxlen = sizeof(int),
5254 .mode = 0644,
5255 .proc_handler = proc_dointvec,
5256 .extra1 = &zero,
5257 .extra2 = &one,
5258 },
f8572d8f 5259 { }
1da177e4
LT
5260};
5261
2c8c1e72 5262struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5263{
5264 struct ctl_table *table;
5265
5266 table = kmemdup(ipv6_route_table_template,
5267 sizeof(ipv6_route_table_template),
5268 GFP_KERNEL);
5ee09105
YH
5269
5270 if (table) {
5271 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5272 table[0].extra1 = net;
86393e52 5273 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5274 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5275 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5276 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5277 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5278 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5279 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5280 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5281 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
7c6bb7d2 5282 table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
464dc801
EB
5283
5284 /* Don't export sysctls to unprivileged users */
5285 if (net->user_ns != &init_user_ns)
5286 table[0].procname = NULL;
5ee09105
YH
5287 }
5288
760f2d01
DL
5289 return table;
5290}
1da177e4
LT
5291#endif
5292
2c8c1e72 5293static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5294{
633d424b 5295 int ret = -ENOMEM;
8ed67789 5296
86393e52
AD
5297 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5298 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5299
fc66f95c
ED
5300 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5301 goto out_ip6_dst_ops;
5302
421842ed
DA
5303 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5304 sizeof(*net->ipv6.fib6_null_entry),
5305 GFP_KERNEL);
5306 if (!net->ipv6.fib6_null_entry)
5307 goto out_ip6_dst_entries;
5308
8ed67789
DL
5309 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5310 sizeof(*net->ipv6.ip6_null_entry),
5311 GFP_KERNEL);
5312 if (!net->ipv6.ip6_null_entry)
421842ed 5313 goto out_fib6_null_entry;
d8d1f30b 5314 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5315 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5316 ip6_template_metrics, true);
8ed67789
DL
5317
5318#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5319 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5320 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5321 sizeof(*net->ipv6.ip6_prohibit_entry),
5322 GFP_KERNEL);
68fffc67
PZ
5323 if (!net->ipv6.ip6_prohibit_entry)
5324 goto out_ip6_null_entry;
d8d1f30b 5325 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5326 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5327 ip6_template_metrics, true);
8ed67789
DL
5328
5329 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5330 sizeof(*net->ipv6.ip6_blk_hole_entry),
5331 GFP_KERNEL);
68fffc67
PZ
5332 if (!net->ipv6.ip6_blk_hole_entry)
5333 goto out_ip6_prohibit_entry;
d8d1f30b 5334 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5335 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5336 ip6_template_metrics, true);
8ed67789
DL
5337#endif
5338
b339a47c
PZ
5339 net->ipv6.sysctl.flush_delay = 0;
5340 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5341 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5342 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5343 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5344 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5345 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5346 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
7c6bb7d2 5347 net->ipv6.sysctl.skip_notify_on_dev_down = 0;
b339a47c 5348
6891a346
BT
5349 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5350
8ed67789
DL
5351 ret = 0;
5352out:
5353 return ret;
f2fc6a54 5354
68fffc67
PZ
5355#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5356out_ip6_prohibit_entry:
5357 kfree(net->ipv6.ip6_prohibit_entry);
5358out_ip6_null_entry:
5359 kfree(net->ipv6.ip6_null_entry);
5360#endif
421842ed
DA
5361out_fib6_null_entry:
5362 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5363out_ip6_dst_entries:
5364 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5365out_ip6_dst_ops:
f2fc6a54 5366 goto out;
cdb18761
DL
5367}
5368
2c8c1e72 5369static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5370{
421842ed 5371 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5372 kfree(net->ipv6.ip6_null_entry);
5373#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5374 kfree(net->ipv6.ip6_prohibit_entry);
5375 kfree(net->ipv6.ip6_blk_hole_entry);
5376#endif
41bb78b4 5377 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5378}
5379
d189634e
TG
5380static int __net_init ip6_route_net_init_late(struct net *net)
5381{
5382#ifdef CONFIG_PROC_FS
c3506372
CH
5383 proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5384 sizeof(struct ipv6_route_iter));
3617d949
CH
5385 proc_create_net_single("rt6_stats", 0444, net->proc_net,
5386 rt6_stats_seq_show, NULL);
d189634e
TG
5387#endif
5388 return 0;
5389}
5390
5391static void __net_exit ip6_route_net_exit_late(struct net *net)
5392{
5393#ifdef CONFIG_PROC_FS
ece31ffd
G
5394 remove_proc_entry("ipv6_route", net->proc_net);
5395 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5396#endif
5397}
5398
cdb18761
DL
5399static struct pernet_operations ip6_route_net_ops = {
5400 .init = ip6_route_net_init,
5401 .exit = ip6_route_net_exit,
5402};
5403
c3426b47
DM
5404static int __net_init ipv6_inetpeer_init(struct net *net)
5405{
5406 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5407
5408 if (!bp)
5409 return -ENOMEM;
5410 inet_peer_base_init(bp);
5411 net->ipv6.peers = bp;
5412 return 0;
5413}
5414
5415static void __net_exit ipv6_inetpeer_exit(struct net *net)
5416{
5417 struct inet_peer_base *bp = net->ipv6.peers;
5418
5419 net->ipv6.peers = NULL;
56a6b248 5420 inetpeer_invalidate_tree(bp);
c3426b47
DM
5421 kfree(bp);
5422}
5423
2b823f72 5424static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5425 .init = ipv6_inetpeer_init,
5426 .exit = ipv6_inetpeer_exit,
5427};
5428
d189634e
TG
5429static struct pernet_operations ip6_route_net_late_ops = {
5430 .init = ip6_route_net_init_late,
5431 .exit = ip6_route_net_exit_late,
5432};
5433
8ed67789
DL
5434static struct notifier_block ip6_route_dev_notifier = {
5435 .notifier_call = ip6_route_dev_notify,
242d3a49 5436 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5437};
5438
2f460933
WC
5439void __init ip6_route_init_special_entries(void)
5440{
5441 /* Registering of the loopback is done before this portion of code,
5442 * the loopback reference in rt6_info will not be taken, do it
5443 * manually for init_net */
421842ed 5444 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
2f460933
WC
5445 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5446 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5447 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5448 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5449 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5450 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5451 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5452 #endif
5453}
5454
433d49c3 5455int __init ip6_route_init(void)
1da177e4 5456{
433d49c3 5457 int ret;
8d0b94af 5458 int cpu;
433d49c3 5459
9a7ec3a9
DL
5460 ret = -ENOMEM;
5461 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5462 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5463 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5464 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5465 goto out;
14e50e57 5466
fc66f95c 5467 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5468 if (ret)
bdb3289f 5469 goto out_kmem_cache;
bdb3289f 5470
c3426b47
DM
5471 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5472 if (ret)
e8803b6c 5473 goto out_dst_entries;
2a0c451a 5474
7e52b33b
DM
5475 ret = register_pernet_subsys(&ip6_route_net_ops);
5476 if (ret)
5477 goto out_register_inetpeer;
c3426b47 5478
5dc121e9
AE
5479 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5480
e8803b6c 5481 ret = fib6_init();
433d49c3 5482 if (ret)
8ed67789 5483 goto out_register_subsys;
433d49c3 5484
433d49c3
DL
5485 ret = xfrm6_init();
5486 if (ret)
e8803b6c 5487 goto out_fib6_init;
c35b7e72 5488
433d49c3
DL
5489 ret = fib6_rules_init();
5490 if (ret)
5491 goto xfrm6_init;
7e5449c2 5492
d189634e
TG
5493 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5494 if (ret)
5495 goto fib6_rules_init;
5496
16feebcf
FW
5497 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5498 inet6_rtm_newroute, NULL, 0);
5499 if (ret < 0)
5500 goto out_register_late_subsys;
5501
5502 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5503 inet6_rtm_delroute, NULL, 0);
5504 if (ret < 0)
5505 goto out_register_late_subsys;
5506
5507 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5508 inet6_rtm_getroute, NULL,
5509 RTNL_FLAG_DOIT_UNLOCKED);
5510 if (ret < 0)
d189634e 5511 goto out_register_late_subsys;
c127ea2c 5512
8ed67789 5513 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5514 if (ret)
d189634e 5515 goto out_register_late_subsys;
8ed67789 5516
8d0b94af
MKL
5517 for_each_possible_cpu(cpu) {
5518 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5519
5520 INIT_LIST_HEAD(&ul->head);
5521 spin_lock_init(&ul->lock);
5522 }
5523
433d49c3
DL
5524out:
5525 return ret;
5526
d189634e 5527out_register_late_subsys:
16feebcf 5528 rtnl_unregister_all(PF_INET6);
d189634e 5529 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5530fib6_rules_init:
433d49c3
DL
5531 fib6_rules_cleanup();
5532xfrm6_init:
433d49c3 5533 xfrm6_fini();
2a0c451a
TG
5534out_fib6_init:
5535 fib6_gc_cleanup();
8ed67789
DL
5536out_register_subsys:
5537 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5538out_register_inetpeer:
5539 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5540out_dst_entries:
5541 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5542out_kmem_cache:
f2fc6a54 5543 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5544 goto out;
1da177e4
LT
5545}
5546
5547void ip6_route_cleanup(void)
5548{
8ed67789 5549 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5550 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5551 fib6_rules_cleanup();
1da177e4 5552 xfrm6_fini();
1da177e4 5553 fib6_gc_cleanup();
c3426b47 5554 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5555 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5556 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5557 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5558}
This page took 2.613265 seconds and 4 git commands to generate.