]> Git Repo - linux.git/blame - net/ipv6/ip6_fib.c
net: Plumb support for filtering ipv4 and ipv6 multicast route dumps
[linux.git] / net / ipv6 / ip6_fib.c
CommitLineData
1da177e4 1/*
1ab1457c 2 * Linux INET6 implementation
1da177e4
LT
3 * Forwarding Information Database
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <[email protected]>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
8db46f1d
WY
12 *
13 * Changes:
14 * Yuji SEKIYA @USAGI: Support default route on router node;
15 * remove ip6_null_entry from the top of
16 * routing table.
17 * Ville Nuorvala: Fixed routing subtrees.
1da177e4 18 */
f3213831
JP
19
20#define pr_fmt(fmt) "IPv6: " fmt
21
1da177e4
LT
22#include <linux/errno.h>
23#include <linux/types.h>
24#include <linux/net.h>
25#include <linux/route.h>
26#include <linux/netdevice.h>
27#include <linux/in6.h>
28#include <linux/init.h>
c71099ac 29#include <linux/list.h>
5a0e3ad6 30#include <linux/slab.h>
1da177e4 31
cc5f0eb2 32#include <net/ip.h>
1da177e4
LT
33#include <net/ipv6.h>
34#include <net/ndisc.h>
35#include <net/addrconf.h>
19e42e45 36#include <net/lwtunnel.h>
df77fe4d 37#include <net/fib_notifier.h>
1da177e4
LT
38
39#include <net/ip6_fib.h>
40#include <net/ip6_route.h>
41
437de07c 42static struct kmem_cache *fib6_node_kmem __read_mostly;
1da177e4 43
94b2cfe0
HFS
44struct fib6_cleaner {
45 struct fib6_walker w;
ec7d43c2 46 struct net *net;
8d1c802b 47 int (*func)(struct fib6_info *, void *arg);
327571cb 48 int sernum;
1da177e4 49 void *arg;
7c6bb7d2 50 bool skip_notify;
1da177e4
LT
51};
52
1da177e4
LT
53#ifdef CONFIG_IPV6_SUBTREES
54#define FWS_INIT FWS_S
1da177e4
LT
55#else
56#define FWS_INIT FWS_L
1da177e4
LT
57#endif
58
8d1c802b 59static struct fib6_info *fib6_find_prefix(struct net *net,
66f5d6ce
WW
60 struct fib6_table *table,
61 struct fib6_node *fn);
62static struct fib6_node *fib6_repair_tree(struct net *net,
63 struct fib6_table *table,
64 struct fib6_node *fn);
9a03cd8f 65static int fib6_walk(struct net *net, struct fib6_walker *w);
94b2cfe0 66static int fib6_walk_continue(struct fib6_walker *w);
1da177e4
LT
67
68/*
69 * A routing update causes an increase of the serial number on the
70 * affected subtree. This allows for cached routes to be asynchronously
71 * tested when modifications are made to the destination cache as a
72 * result of redirects, path MTU changes, etc.
73 */
74
86cb30ec 75static void fib6_gc_timer_cb(struct timer_list *t);
5b7c931d 76
9a03cd8f
MK
77#define FOR_WALKERS(net, w) \
78 list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh)
1da177e4 79
9a03cd8f 80static void fib6_walker_link(struct net *net, struct fib6_walker *w)
90d41122 81{
9a03cd8f
MK
82 write_lock_bh(&net->ipv6.fib6_walker_lock);
83 list_add(&w->lh, &net->ipv6.fib6_walkers);
84 write_unlock_bh(&net->ipv6.fib6_walker_lock);
90d41122
AB
85}
86
9a03cd8f 87static void fib6_walker_unlink(struct net *net, struct fib6_walker *w)
90d41122 88{
9a03cd8f 89 write_lock_bh(&net->ipv6.fib6_walker_lock);
bbef49da 90 list_del(&w->lh);
9a03cd8f 91 write_unlock_bh(&net->ipv6.fib6_walker_lock);
90d41122 92}
94b2cfe0 93
812918c4 94static int fib6_new_sernum(struct net *net)
1da177e4 95{
42b18706
HFS
96 int new, old;
97
98 do {
812918c4 99 old = atomic_read(&net->ipv6.fib6_sernum);
42b18706 100 new = old < INT_MAX ? old + 1 : 1;
812918c4
HFS
101 } while (atomic_cmpxchg(&net->ipv6.fib6_sernum,
102 old, new) != old);
42b18706 103 return new;
1da177e4
LT
104}
105
327571cb
HFS
106enum {
107 FIB6_NO_SERNUM_CHANGE = 0,
108};
109
93c2fb25 110void fib6_update_sernum(struct net *net, struct fib6_info *f6i)
180ca444 111{
180ca444
WW
112 struct fib6_node *fn;
113
93c2fb25
DA
114 fn = rcu_dereference_protected(f6i->fib6_node,
115 lockdep_is_held(&f6i->fib6_table->tb6_lock));
180ca444
WW
116 if (fn)
117 fn->fn_sernum = fib6_new_sernum(net);
180ca444
WW
118}
119
1da177e4
LT
120/*
121 * Auxiliary address test functions for the radix tree.
122 *
1ab1457c 123 * These assume a 32bit processor (although it will work on
1da177e4
LT
124 * 64bit processors)
125 */
126
127/*
128 * test bit
129 */
02cdce53
YH
130#if defined(__LITTLE_ENDIAN)
131# define BITOP_BE32_SWIZZLE (0x1F & ~7)
132#else
133# define BITOP_BE32_SWIZZLE 0
134#endif
1da177e4 135
94b2cfe0 136static __be32 addr_bit_set(const void *token, int fn_bit)
1da177e4 137{
b71d1d42 138 const __be32 *addr = token;
02cdce53
YH
139 /*
140 * Here,
8db46f1d 141 * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
02cdce53
YH
142 * is optimized version of
143 * htonl(1 << ((~fn_bit)&0x1F))
144 * See include/asm-generic/bitops/le.h.
145 */
0eae88f3
ED
146 return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) &
147 addr[fn_bit >> 5];
1da177e4
LT
148}
149
8d1c802b 150struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
a64efe14 151{
8d1c802b 152 struct fib6_info *f6i;
a64efe14
DA
153
154 f6i = kzalloc(sizeof(*f6i), gfp_flags);
155 if (!f6i)
156 return NULL;
157
158 f6i->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
159 if (!f6i->rt6i_pcpu) {
160 kfree(f6i);
161 return NULL;
162 }
163
93c2fb25 164 INIT_LIST_HEAD(&f6i->fib6_siblings);
93c2fb25 165 atomic_inc(&f6i->fib6_ref);
a64efe14
DA
166
167 return f6i;
168}
169
9b0a8da8 170void fib6_info_destroy_rcu(struct rcu_head *head)
a64efe14 171{
9b0a8da8 172 struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
a64efe14
DA
173 struct rt6_exception_bucket *bucket;
174
93c2fb25 175 WARN_ON(f6i->fib6_node);
a64efe14
DA
176
177 bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
178 if (bucket) {
179 f6i->rt6i_exception_bucket = NULL;
180 kfree(bucket);
181 }
182
183 if (f6i->rt6i_pcpu) {
184 int cpu;
185
186 for_each_possible_cpu(cpu) {
187 struct rt6_info **ppcpu_rt;
188 struct rt6_info *pcpu_rt;
189
190 ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
191 pcpu_rt = *ppcpu_rt;
192 if (pcpu_rt) {
193 dst_dev_put(&pcpu_rt->dst);
194 dst_release(&pcpu_rt->dst);
195 *ppcpu_rt = NULL;
196 }
197 }
7abab7b9
MR
198
199 free_percpu(f6i->rt6i_pcpu);
a64efe14
DA
200 }
201
80f1a0f4
DA
202 lwtstate_put(f6i->fib6_nh.nh_lwtstate);
203
a64efe14
DA
204 if (f6i->fib6_nh.nh_dev)
205 dev_put(f6i->fib6_nh.nh_dev);
206
cc5f0eb2 207 ip_fib_metrics_put(f6i->fib6_metrics);
93531c67 208
a64efe14
DA
209 kfree(f6i);
210}
9b0a8da8 211EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu);
a64efe14 212
81eb8447 213static struct fib6_node *node_alloc(struct net *net)
1da177e4
LT
214{
215 struct fib6_node *fn;
216
c3762229 217 fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
81eb8447
WW
218 if (fn)
219 net->ipv6.rt6_stats->fib_nodes++;
1da177e4
LT
220
221 return fn;
222}
223
81eb8447 224static void node_free_immediate(struct net *net, struct fib6_node *fn)
c5cff856
WW
225{
226 kmem_cache_free(fib6_node_kmem, fn);
81eb8447 227 net->ipv6.rt6_stats->fib_nodes--;
c5cff856
WW
228}
229
230static void node_free_rcu(struct rcu_head *head)
1da177e4 231{
c5cff856
WW
232 struct fib6_node *fn = container_of(head, struct fib6_node, rcu);
233
1da177e4
LT
234 kmem_cache_free(fib6_node_kmem, fn);
235}
236
81eb8447 237static void node_free(struct net *net, struct fib6_node *fn)
c5cff856
WW
238{
239 call_rcu(&fn->rcu, node_free_rcu);
81eb8447 240 net->ipv6.rt6_stats->fib_nodes--;
c5cff856
WW
241}
242
ba1cc08d
SD
243static void fib6_free_table(struct fib6_table *table)
244{
245 inetpeer_invalidate_tree(&table->tb6_peers);
246 kfree(table);
247}
248
58f09b78 249static void fib6_link_table(struct net *net, struct fib6_table *tb)
1b43af54
PM
250{
251 unsigned int h;
252
375216ad
TG
253 /*
254 * Initialize table lock at a single place to give lockdep a key,
255 * tables aren't visible prior to being linked to the list.
256 */
66f5d6ce 257 spin_lock_init(&tb->tb6_lock);
a33bc5c1 258 h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
1b43af54
PM
259
260 /*
261 * No protection necessary, this is the only list mutatation
262 * operation, tables never disappear once they exist.
263 */
58f09b78 264 hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]);
1b43af54 265}
c71099ac 266
1b43af54 267#ifdef CONFIG_IPV6_MULTIPLE_TABLES
e0b85590 268
8ed67789 269static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
c71099ac
TG
270{
271 struct fib6_table *table;
272
273 table = kzalloc(sizeof(*table), GFP_ATOMIC);
507c9b1e 274 if (table) {
c71099ac 275 table->tb6_id = id;
66f5d6ce 276 rcu_assign_pointer(table->tb6_root.leaf,
421842ed 277 net->ipv6.fib6_null_entry);
c71099ac 278 table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
8e773277 279 inet_peer_base_init(&table->tb6_peers);
c71099ac
TG
280 }
281
282 return table;
283}
284
58f09b78 285struct fib6_table *fib6_new_table(struct net *net, u32 id)
c71099ac
TG
286{
287 struct fib6_table *tb;
288
289 if (id == 0)
290 id = RT6_TABLE_MAIN;
58f09b78 291 tb = fib6_get_table(net, id);
c71099ac
TG
292 if (tb)
293 return tb;
294
8ed67789 295 tb = fib6_alloc_table(net, id);
507c9b1e 296 if (tb)
58f09b78 297 fib6_link_table(net, tb);
c71099ac
TG
298
299 return tb;
300}
b3b4663c 301EXPORT_SYMBOL_GPL(fib6_new_table);
c71099ac 302
58f09b78 303struct fib6_table *fib6_get_table(struct net *net, u32 id)
c71099ac
TG
304{
305 struct fib6_table *tb;
58f09b78 306 struct hlist_head *head;
c71099ac
TG
307 unsigned int h;
308
309 if (id == 0)
310 id = RT6_TABLE_MAIN;
a33bc5c1 311 h = id & (FIB6_TABLE_HASHSZ - 1);
c71099ac 312 rcu_read_lock();
58f09b78 313 head = &net->ipv6.fib_table_hash[h];
b67bfe0d 314 hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
c71099ac
TG
315 if (tb->tb6_id == id) {
316 rcu_read_unlock();
317 return tb;
318 }
319 }
320 rcu_read_unlock();
321
322 return NULL;
323}
c4850687 324EXPORT_SYMBOL_GPL(fib6_get_table);
c71099ac 325
2c8c1e72 326static void __net_init fib6_tables_init(struct net *net)
c71099ac 327{
58f09b78
DL
328 fib6_link_table(net, net->ipv6.fib6_main_tbl);
329 fib6_link_table(net, net->ipv6.fib6_local_tbl);
c71099ac 330}
c71099ac
TG
331#else
332
58f09b78 333struct fib6_table *fib6_new_table(struct net *net, u32 id)
c71099ac 334{
58f09b78 335 return fib6_get_table(net, id);
c71099ac
TG
336}
337
58f09b78 338struct fib6_table *fib6_get_table(struct net *net, u32 id)
c71099ac 339{
58f09b78 340 return net->ipv6.fib6_main_tbl;
c71099ac
TG
341}
342
4c9483b2 343struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 344 const struct sk_buff *skb,
58f09b78 345 int flags, pol_lookup_t lookup)
c71099ac 346{
ab997ad4 347 struct rt6_info *rt;
348
b75cc8f9 349 rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
07f61557 350 if (rt->dst.error == -EAGAIN) {
ab997ad4 351 ip6_rt_put(rt);
352 rt = net->ipv6.ip6_null_entry;
353 dst_hold(&rt->dst);
354 }
355
356 return &rt->dst;
c71099ac
TG
357}
358
138118ec
DA
359/* called with rcu lock held; no reference taken on fib6_info */
360struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
361 int flags)
362{
363 return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, flags);
364}
365
2c8c1e72 366static void __net_init fib6_tables_init(struct net *net)
c71099ac 367{
58f09b78 368 fib6_link_table(net, net->ipv6.fib6_main_tbl);
c71099ac
TG
369}
370
371#endif
372
e1ee0a5b
IS
373unsigned int fib6_tables_seq_read(struct net *net)
374{
375 unsigned int h, fib_seq = 0;
376
377 rcu_read_lock();
378 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
379 struct hlist_head *head = &net->ipv6.fib_table_hash[h];
380 struct fib6_table *tb;
381
66f5d6ce 382 hlist_for_each_entry_rcu(tb, head, tb6_hlist)
e1ee0a5b 383 fib_seq += tb->fib_seq;
e1ee0a5b
IS
384 }
385 rcu_read_unlock();
386
387 return fib_seq;
388}
389
390static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
391 enum fib_event_type event_type,
8d1c802b 392 struct fib6_info *rt)
e1ee0a5b
IS
393{
394 struct fib6_entry_notifier_info info = {
395 .rt = rt,
396 };
397
398 return call_fib6_notifier(nb, net, event_type, &info.info);
399}
400
df77fe4d
IS
401static int call_fib6_entry_notifiers(struct net *net,
402 enum fib_event_type event_type,
8d1c802b 403 struct fib6_info *rt,
6c31e5a9 404 struct netlink_ext_ack *extack)
df77fe4d
IS
405{
406 struct fib6_entry_notifier_info info = {
6c31e5a9 407 .info.extack = extack,
df77fe4d
IS
408 .rt = rt,
409 };
410
93c2fb25 411 rt->fib6_table->fib_seq++;
df77fe4d
IS
412 return call_fib6_notifiers(net, event_type, &info.info);
413}
414
e1ee0a5b
IS
415struct fib6_dump_arg {
416 struct net *net;
417 struct notifier_block *nb;
418};
419
8d1c802b 420static void fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
e1ee0a5b 421{
421842ed 422 if (rt == arg->net->ipv6.fib6_null_entry)
e1ee0a5b
IS
423 return;
424 call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
425}
426
427static int fib6_node_dump(struct fib6_walker *w)
428{
8d1c802b 429 struct fib6_info *rt;
e1ee0a5b 430
66f5d6ce 431 for_each_fib6_walker_rt(w)
e1ee0a5b
IS
432 fib6_rt_dump(rt, w->args);
433 w->leaf = NULL;
434 return 0;
435}
436
437static void fib6_table_dump(struct net *net, struct fib6_table *tb,
438 struct fib6_walker *w)
439{
440 w->root = &tb->tb6_root;
66f5d6ce 441 spin_lock_bh(&tb->tb6_lock);
e1ee0a5b 442 fib6_walk(net, w);
66f5d6ce 443 spin_unlock_bh(&tb->tb6_lock);
e1ee0a5b
IS
444}
445
446/* Called with rcu_read_lock() */
447int fib6_tables_dump(struct net *net, struct notifier_block *nb)
448{
449 struct fib6_dump_arg arg;
450 struct fib6_walker *w;
451 unsigned int h;
452
453 w = kzalloc(sizeof(*w), GFP_ATOMIC);
454 if (!w)
455 return -ENOMEM;
456
457 w->func = fib6_node_dump;
458 arg.net = net;
459 arg.nb = nb;
460 w->args = &arg;
461
462 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
463 struct hlist_head *head = &net->ipv6.fib_table_hash[h];
464 struct fib6_table *tb;
465
466 hlist_for_each_entry_rcu(tb, head, tb6_hlist)
467 fib6_table_dump(net, tb, w);
468 }
469
470 kfree(w);
471
472 return 0;
473}
474
94b2cfe0 475static int fib6_dump_node(struct fib6_walker *w)
1b43af54
PM
476{
477 int res;
8d1c802b 478 struct fib6_info *rt;
1b43af54 479
66f5d6ce 480 for_each_fib6_walker_rt(w) {
1b43af54
PM
481 res = rt6_dump_route(rt, w->args);
482 if (res < 0) {
483 /* Frame is full, suspend walking */
484 w->leaf = rt;
485 return 1;
486 }
beb1afac
DA
487
488 /* Multipath routes are dumped in one route with the
489 * RTA_MULTIPATH attribute. Jump 'rt' to point to the
490 * last sibling of this route (no need to dump the
491 * sibling routes again)
492 */
93c2fb25
DA
493 if (rt->fib6_nsiblings)
494 rt = list_last_entry(&rt->fib6_siblings,
8d1c802b 495 struct fib6_info,
93c2fb25 496 fib6_siblings);
1b43af54
PM
497 }
498 w->leaf = NULL;
499 return 0;
500}
501
502static void fib6_dump_end(struct netlink_callback *cb)
503{
9a03cd8f 504 struct net *net = sock_net(cb->skb->sk);
94b2cfe0 505 struct fib6_walker *w = (void *)cb->args[2];
1b43af54
PM
506
507 if (w) {
7891cc81
HX
508 if (cb->args[4]) {
509 cb->args[4] = 0;
9a03cd8f 510 fib6_walker_unlink(net, w);
7891cc81 511 }
1b43af54
PM
512 cb->args[2] = 0;
513 kfree(w);
514 }
437de07c 515 cb->done = (void *)cb->args[3];
1b43af54
PM
516 cb->args[1] = 3;
517}
518
519static int fib6_dump_done(struct netlink_callback *cb)
520{
521 fib6_dump_end(cb);
522 return cb->done ? cb->done(cb) : 0;
523}
524
525static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
526 struct netlink_callback *cb)
527{
9a03cd8f 528 struct net *net = sock_net(skb->sk);
94b2cfe0 529 struct fib6_walker *w;
1b43af54
PM
530 int res;
531
532 w = (void *)cb->args[2];
533 w->root = &table->tb6_root;
534
535 if (cb->args[4] == 0) {
2bec5a36
PM
536 w->count = 0;
537 w->skip = 0;
538
66f5d6ce 539 spin_lock_bh(&table->tb6_lock);
9a03cd8f 540 res = fib6_walk(net, w);
66f5d6ce 541 spin_unlock_bh(&table->tb6_lock);
2bec5a36 542 if (res > 0) {
1b43af54 543 cb->args[4] = 1;
2bec5a36
PM
544 cb->args[5] = w->root->fn_sernum;
545 }
1b43af54 546 } else {
2bec5a36
PM
547 if (cb->args[5] != w->root->fn_sernum) {
548 /* Begin at the root if the tree changed */
549 cb->args[5] = w->root->fn_sernum;
550 w->state = FWS_INIT;
551 w->node = w->root;
552 w->skip = w->count;
553 } else
554 w->skip = 0;
555
66f5d6ce 556 spin_lock_bh(&table->tb6_lock);
1b43af54 557 res = fib6_walk_continue(w);
66f5d6ce 558 spin_unlock_bh(&table->tb6_lock);
7891cc81 559 if (res <= 0) {
9a03cd8f 560 fib6_walker_unlink(net, w);
7891cc81 561 cb->args[4] = 0;
1b43af54 562 }
1b43af54 563 }
7891cc81 564
1b43af54
PM
565 return res;
566}
567
c127ea2c 568static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1b43af54 569{
e8ba330a 570 const struct nlmsghdr *nlh = cb->nlh;
3b1e0a65 571 struct net *net = sock_net(skb->sk);
4724676d 572 struct rt6_rtnl_dump_arg arg = {};
1b43af54
PM
573 unsigned int h, s_h;
574 unsigned int e = 0, s_e;
94b2cfe0 575 struct fib6_walker *w;
1b43af54 576 struct fib6_table *tb;
58f09b78 577 struct hlist_head *head;
1b43af54
PM
578 int res = 0;
579
e8ba330a 580 if (cb->strict_check) {
4724676d 581 int err;
e8ba330a 582
4724676d 583 err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb->extack);
e8ba330a
DA
584 if (err < 0)
585 return err;
13e38901
DA
586 } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
587 struct rtmsg *rtm = nlmsg_data(nlh);
e8ba330a 588
13e38901
DA
589 if (rtm->rtm_flags & RTM_F_PREFIX)
590 arg.filter.flags = RTM_F_PREFIX;
591 }
1b43af54
PM
592
593 w = (void *)cb->args[2];
507c9b1e 594 if (!w) {
1b43af54
PM
595 /* New dump:
596 *
597 * 1. hook callback destructor.
598 */
599 cb->args[3] = (long)cb->done;
600 cb->done = fib6_dump_done;
601
602 /*
603 * 2. allocate and initialize walker.
604 */
605 w = kzalloc(sizeof(*w), GFP_ATOMIC);
507c9b1e 606 if (!w)
1b43af54
PM
607 return -ENOMEM;
608 w->func = fib6_dump_node;
609 cb->args[2] = (long)w;
610 }
611
612 arg.skb = skb;
613 arg.cb = cb;
191cd582 614 arg.net = net;
1b43af54
PM
615 w->args = &arg;
616
13e38901
DA
617 if (arg.filter.table_id) {
618 tb = fib6_get_table(net, arg.filter.table_id);
619 if (!tb) {
620 NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
621 return -ENOENT;
622 }
623
624 res = fib6_dump_table(tb, skb, cb);
625 goto out;
626 }
627
628 s_h = cb->args[0];
629 s_e = cb->args[1];
630
e67f88dd 631 rcu_read_lock();
a33bc5c1 632 for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
1b43af54 633 e = 0;
58f09b78 634 head = &net->ipv6.fib_table_hash[h];
b67bfe0d 635 hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
1b43af54
PM
636 if (e < s_e)
637 goto next;
638 res = fib6_dump_table(tb, skb, cb);
639 if (res != 0)
13e38901 640 goto out_unlock;
1b43af54
PM
641next:
642 e++;
643 }
644 }
13e38901 645out_unlock:
e67f88dd 646 rcu_read_unlock();
1b43af54
PM
647 cb->args[1] = e;
648 cb->args[0] = h;
13e38901 649out:
1b43af54
PM
650 res = res < 0 ? res : skb->len;
651 if (res <= 0)
652 fib6_dump_end(cb);
653 return res;
654}
1da177e4 655
8d1c802b 656void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val)
d4ead6b3
DA
657{
658 if (!f6i)
659 return;
660
661 if (f6i->fib6_metrics == &dst_default_metrics) {
662 struct dst_metrics *p = kzalloc(sizeof(*p), GFP_ATOMIC);
663
664 if (!p)
665 return;
666
667 refcount_set(&p->refcnt, 1);
668 f6i->fib6_metrics = p;
669 }
670
671 f6i->fib6_metrics->metrics[metric - 1] = val;
672}
673
1da177e4
LT
674/*
675 * Routing Table
676 *
677 * return the appropriate node for a routing tree "add" operation
678 * by either creating and inserting or by returning an existing
679 * node.
680 */
681
81eb8447
WW
682static struct fib6_node *fib6_add_1(struct net *net,
683 struct fib6_table *table,
66f5d6ce
WW
684 struct fib6_node *root,
685 struct in6_addr *addr, int plen,
686 int offset, int allow_create,
687 int replace_required,
688 struct netlink_ext_ack *extack)
1da177e4
LT
689{
690 struct fib6_node *fn, *in, *ln;
691 struct fib6_node *pn = NULL;
692 struct rt6key *key;
693 int bit;
1ab1457c 694 __be32 dir = 0;
1da177e4
LT
695
696 RT6_TRACE("fib6_add_1\n");
697
698 /* insert node in tree */
699
700 fn = root;
701
702 do {
8d1c802b 703 struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
66f5d6ce
WW
704 lockdep_is_held(&table->tb6_lock));
705 key = (struct rt6key *)((u8 *)leaf + offset);
1da177e4
LT
706
707 /*
708 * Prefix match
709 */
710 if (plen < fn->fn_bit ||
4a287eba 711 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) {
14df015b
MV
712 if (!allow_create) {
713 if (replace_required) {
d5d531cb
DA
714 NL_SET_ERR_MSG(extack,
715 "Can not replace route - no match found");
f3213831 716 pr_warn("Can't replace route, no match found\n");
14df015b
MV
717 return ERR_PTR(-ENOENT);
718 }
f3213831 719 pr_warn("NLM_F_CREATE should be set when creating new route\n");
14df015b 720 }
1da177e4 721 goto insert_above;
4a287eba 722 }
1ab1457c 723
1da177e4
LT
724 /*
725 * Exact match ?
726 */
1ab1457c 727
1da177e4
LT
728 if (plen == fn->fn_bit) {
729 /* clean up an intermediate node */
507c9b1e 730 if (!(fn->fn_flags & RTN_RTINFO)) {
66f5d6ce 731 RCU_INIT_POINTER(fn->leaf, NULL);
93531c67 732 fib6_info_release(leaf);
4512c43e
WW
733 /* remove null_entry in the root node */
734 } else if (fn->fn_flags & RTN_TL_ROOT &&
735 rcu_access_pointer(fn->leaf) ==
421842ed 736 net->ipv6.fib6_null_entry) {
4512c43e 737 RCU_INIT_POINTER(fn->leaf, NULL);
1da177e4 738 }
1ab1457c 739
1da177e4
LT
740 return fn;
741 }
742
743 /*
744 * We have more bits to go
745 */
1ab1457c 746
1da177e4 747 /* Try to walk down on tree. */
1da177e4
LT
748 dir = addr_bit_set(addr, fn->fn_bit);
749 pn = fn;
66f5d6ce
WW
750 fn = dir ?
751 rcu_dereference_protected(fn->right,
752 lockdep_is_held(&table->tb6_lock)) :
753 rcu_dereference_protected(fn->left,
754 lockdep_is_held(&table->tb6_lock));
1da177e4
LT
755 } while (fn);
756
14df015b 757 if (!allow_create) {
4a287eba
MV
758 /* We should not create new node because
759 * NLM_F_REPLACE was specified without NLM_F_CREATE
760 * I assume it is safe to require NLM_F_CREATE when
761 * REPLACE flag is used! Later we may want to remove the
762 * check for replace_required, because according
763 * to netlink specification, NLM_F_CREATE
764 * MUST be specified if new route is created.
765 * That would keep IPv6 consistent with IPv4
766 */
14df015b 767 if (replace_required) {
d5d531cb
DA
768 NL_SET_ERR_MSG(extack,
769 "Can not replace route - no match found");
f3213831 770 pr_warn("Can't replace route, no match found\n");
14df015b
MV
771 return ERR_PTR(-ENOENT);
772 }
f3213831 773 pr_warn("NLM_F_CREATE should be set when creating new route\n");
4a287eba 774 }
1da177e4
LT
775 /*
776 * We walked to the bottom of tree.
777 * Create new leaf node without children.
778 */
779
81eb8447 780 ln = node_alloc(net);
1da177e4 781
507c9b1e 782 if (!ln)
188c517a 783 return ERR_PTR(-ENOMEM);
1da177e4 784 ln->fn_bit = plen;
66f5d6ce 785 RCU_INIT_POINTER(ln->parent, pn);
1da177e4
LT
786
787 if (dir)
66f5d6ce 788 rcu_assign_pointer(pn->right, ln);
1da177e4 789 else
66f5d6ce 790 rcu_assign_pointer(pn->left, ln);
1da177e4
LT
791
792 return ln;
793
794
795insert_above:
796 /*
1ab1457c 797 * split since we don't have a common prefix anymore or
1da177e4
LT
798 * we have a less significant route.
799 * we've to insert an intermediate node on the list
800 * this new node will point to the one we need to create
801 * and the current
802 */
803
66f5d6ce
WW
804 pn = rcu_dereference_protected(fn->parent,
805 lockdep_is_held(&table->tb6_lock));
1da177e4
LT
806
807 /* find 1st bit in difference between the 2 addrs.
808
971f359d 809 See comment in __ipv6_addr_diff: bit may be an invalid value,
1da177e4
LT
810 but if it is >= plen, the value is ignored in any case.
811 */
1ab1457c 812
9225b230 813 bit = __ipv6_addr_diff(addr, &key->addr, sizeof(*addr));
1da177e4 814
1ab1457c
YH
815 /*
816 * (intermediate)[in]
1da177e4
LT
817 * / \
818 * (new leaf node)[ln] (old node)[fn]
819 */
820 if (plen > bit) {
81eb8447
WW
821 in = node_alloc(net);
822 ln = node_alloc(net);
1ab1457c 823
507c9b1e 824 if (!in || !ln) {
1da177e4 825 if (in)
81eb8447 826 node_free_immediate(net, in);
1da177e4 827 if (ln)
81eb8447 828 node_free_immediate(net, ln);
188c517a 829 return ERR_PTR(-ENOMEM);
1da177e4
LT
830 }
831
1ab1457c
YH
832 /*
833 * new intermediate node.
1da177e4
LT
834 * RTN_RTINFO will
835 * be off since that an address that chooses one of
836 * the branches would not match less specific routes
837 * in the other branch
838 */
839
840 in->fn_bit = bit;
841
66f5d6ce 842 RCU_INIT_POINTER(in->parent, pn);
1da177e4 843 in->leaf = fn->leaf;
66f5d6ce 844 atomic_inc(&rcu_dereference_protected(in->leaf,
93c2fb25 845 lockdep_is_held(&table->tb6_lock))->fib6_ref);
1da177e4 846
1da177e4
LT
847 /* update parent pointer */
848 if (dir)
66f5d6ce 849 rcu_assign_pointer(pn->right, in);
1da177e4 850 else
66f5d6ce 851 rcu_assign_pointer(pn->left, in);
1da177e4
LT
852
853 ln->fn_bit = plen;
854
66f5d6ce
WW
855 RCU_INIT_POINTER(ln->parent, in);
856 rcu_assign_pointer(fn->parent, in);
1da177e4 857
1da177e4 858 if (addr_bit_set(addr, bit)) {
66f5d6ce
WW
859 rcu_assign_pointer(in->right, ln);
860 rcu_assign_pointer(in->left, fn);
1da177e4 861 } else {
66f5d6ce
WW
862 rcu_assign_pointer(in->left, ln);
863 rcu_assign_pointer(in->right, fn);
1da177e4
LT
864 }
865 } else { /* plen <= bit */
866
1ab1457c 867 /*
1da177e4
LT
868 * (new leaf node)[ln]
869 * / \
870 * (old node)[fn] NULL
871 */
872
81eb8447 873 ln = node_alloc(net);
1da177e4 874
507c9b1e 875 if (!ln)
188c517a 876 return ERR_PTR(-ENOMEM);
1da177e4
LT
877
878 ln->fn_bit = plen;
879
66f5d6ce 880 RCU_INIT_POINTER(ln->parent, pn);
1da177e4
LT
881
882 if (addr_bit_set(&key->addr, plen))
66f5d6ce 883 RCU_INIT_POINTER(ln->right, fn);
1da177e4 884 else
66f5d6ce
WW
885 RCU_INIT_POINTER(ln->left, fn);
886
887 rcu_assign_pointer(fn->parent, ln);
1da177e4 888
66f5d6ce
WW
889 if (dir)
890 rcu_assign_pointer(pn->right, ln);
891 else
892 rcu_assign_pointer(pn->left, ln);
1da177e4
LT
893 }
894 return ln;
895}
896
5bcaa41b
DA
897static void fib6_drop_pcpu_from(struct fib6_info *f6i,
898 const struct fib6_table *table)
e715b6d3 899{
5bcaa41b 900 int cpu;
e715b6d3 901
5bcaa41b
DA
902 /* release the reference to this fib entry from
903 * all of its cached pcpu routes
904 */
905 for_each_possible_cpu(cpu) {
906 struct rt6_info **ppcpu_rt;
907 struct rt6_info *pcpu_rt;
e715b6d3 908
5bcaa41b
DA
909 ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
910 pcpu_rt = *ppcpu_rt;
911 if (pcpu_rt) {
a68886a6 912 struct fib6_info *from;
e715b6d3 913
a68886a6
DA
914 from = rcu_dereference_protected(pcpu_rt->from,
915 lockdep_is_held(&table->tb6_lock));
916 rcu_assign_pointer(pcpu_rt->from, NULL);
917 fib6_info_release(from);
5bcaa41b 918 }
e5fd387a 919 }
e5fd387a
MK
920}
921
8d1c802b 922static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
6e9e16e6
HFS
923 struct net *net)
924{
93c2fb25 925 struct fib6_table *table = rt->fib6_table;
66f5d6ce 926
93c2fb25 927 if (atomic_read(&rt->fib6_ref) != 1) {
6e9e16e6
HFS
928 /* This route is used as dummy address holder in some split
929 * nodes. It is not leaked, but it still holds other resources,
930 * which must be released in time. So, scan ascendant nodes
931 * and replace dummy references to this route with references
932 * to still alive ones.
933 */
934 while (fn) {
8d1c802b 935 struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
66f5d6ce 936 lockdep_is_held(&table->tb6_lock));
8d1c802b 937 struct fib6_info *new_leaf;
66f5d6ce
WW
938 if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
939 new_leaf = fib6_find_prefix(net, table, fn);
93c2fb25 940 atomic_inc(&new_leaf->fib6_ref);
93531c67 941
66f5d6ce 942 rcu_assign_pointer(fn->leaf, new_leaf);
93531c67 943 fib6_info_release(rt);
6e9e16e6 944 }
66f5d6ce
WW
945 fn = rcu_dereference_protected(fn->parent,
946 lockdep_is_held(&table->tb6_lock));
6e9e16e6 947 }
93531c67 948
5bcaa41b
DA
949 if (rt->rt6i_pcpu)
950 fib6_drop_pcpu_from(rt, table);
6e9e16e6
HFS
951 }
952}
953
1da177e4
LT
954/*
955 * Insert routing information in a node.
956 */
957
8d1c802b 958static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
d4ead6b3 959 struct nl_info *info,
6c31e5a9 960 struct netlink_ext_ack *extack)
1da177e4 961{
8d1c802b 962 struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
93c2fb25 963 lockdep_is_held(&rt->fib6_table->tb6_lock));
33bd5ac5 964 struct fib6_info *iter = NULL;
8d1c802b 965 struct fib6_info __rcu **ins;
33bd5ac5 966 struct fib6_info __rcu **fallback_ins = NULL;
507c9b1e
DM
967 int replace = (info->nlh &&
968 (info->nlh->nlmsg_flags & NLM_F_REPLACE));
969 int add = (!info->nlh ||
970 (info->nlh->nlmsg_flags & NLM_F_CREATE));
4a287eba 971 int found = 0;
33bd5ac5 972 bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
73483c12 973 u16 nlflags = NLM_F_EXCL;
e5fd387a 974 int err;
1da177e4 975
33bd5ac5 976 if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND))
1f5e29ce
DA
977 nlflags |= NLM_F_APPEND;
978
1da177e4
LT
979 ins = &fn->leaf;
980
66f5d6ce 981 for (iter = leaf; iter;
8fb11a9a 982 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 983 lockdep_is_held(&rt->fib6_table->tb6_lock))) {
1da177e4
LT
984 /*
985 * Search for duplicates
986 */
987
93c2fb25 988 if (iter->fib6_metric == rt->fib6_metric) {
1da177e4
LT
989 /*
990 * Same priority level
991 */
507c9b1e
DM
992 if (info->nlh &&
993 (info->nlh->nlmsg_flags & NLM_F_EXCL))
4a287eba 994 return -EEXIST;
73483c12
GN
995
996 nlflags &= ~NLM_F_EXCL;
4a287eba 997 if (replace) {
33bd5ac5
DA
998 if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
999 found++;
1000 break;
1001 }
1002 if (rt_can_ecmp)
1003 fallback_ins = fallback_ins ?: ins;
1004 goto next_iter;
4a287eba 1005 }
1da177e4 1006
f06b7549 1007 if (rt6_duplicate_nexthop(iter, rt)) {
93c2fb25
DA
1008 if (rt->fib6_nsiblings)
1009 rt->fib6_nsiblings = 0;
1010 if (!(iter->fib6_flags & RTF_EXPIRES))
1da177e4 1011 return -EEXIST;
93c2fb25 1012 if (!(rt->fib6_flags & RTF_EXPIRES))
14895687 1013 fib6_clean_expires(iter);
1716a961 1014 else
14895687 1015 fib6_set_expires(iter, rt->expires);
15a81b41
DA
1016
1017 if (rt->fib6_pmtu)
1018 fib6_metric_set(iter, RTAX_MTU,
1019 rt->fib6_pmtu);
1da177e4
LT
1020 return -EEXIST;
1021 }
33bd5ac5
DA
1022 /* If we have the same destination and the same metric,
1023 * but not the same gateway, then the route we try to
1024 * add is sibling to this route, increment our counter
1025 * of siblings, and later we will add our route to the
1026 * list.
1027 * Only static routes (which don't have flag
1028 * RTF_EXPIRES) are used for ECMPv6.
1029 *
1030 * To avoid long list, we only had siblings if the
1031 * route have a gateway.
1032 */
1033 if (rt_can_ecmp &&
1034 rt6_qualify_for_ecmp(iter))
1035 rt->fib6_nsiblings++;
1da177e4
LT
1036 }
1037
93c2fb25 1038 if (iter->fib6_metric > rt->fib6_metric)
1da177e4
LT
1039 break;
1040
33bd5ac5 1041next_iter:
8fb11a9a 1042 ins = &iter->fib6_next;
27596472
MK
1043 }
1044
33bd5ac5
DA
1045 if (fallback_ins && !found) {
1046 /* No ECMP-able route found, replace first non-ECMP one */
1047 ins = fallback_ins;
1048 iter = rcu_dereference_protected(*ins,
1049 lockdep_is_held(&rt->fib6_table->tb6_lock));
1050 found++;
1051 }
1052
f11e6659
DM
1053 /* Reset round-robin state, if necessary */
1054 if (ins == &fn->leaf)
1055 fn->rr_ptr = NULL;
1056
51ebd318 1057 /* Link this route to others same route. */
33bd5ac5
DA
1058 if (rt->fib6_nsiblings) {
1059 unsigned int fib6_nsiblings;
8d1c802b 1060 struct fib6_info *sibling, *temp_sibling;
51ebd318 1061
33bd5ac5
DA
1062 /* Find the first route that have the same metric */
1063 sibling = leaf;
1064 while (sibling) {
1065 if (sibling->fib6_metric == rt->fib6_metric &&
1066 rt6_qualify_for_ecmp(sibling)) {
1067 list_add_tail(&rt->fib6_siblings,
1068 &sibling->fib6_siblings);
1069 break;
1070 }
1071 sibling = rcu_dereference_protected(sibling->fib6_next,
1072 lockdep_is_held(&rt->fib6_table->tb6_lock));
51ebd318
ND
1073 }
1074 /* For each sibling in the list, increment the counter of
1075 * siblings. BUG() if counters does not match, list of siblings
1076 * is broken!
1077 */
33bd5ac5 1078 fib6_nsiblings = 0;
51ebd318 1079 list_for_each_entry_safe(sibling, temp_sibling,
33bd5ac5 1080 &rt->fib6_siblings, fib6_siblings) {
93c2fb25 1081 sibling->fib6_nsiblings++;
33bd5ac5
DA
1082 BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
1083 fib6_nsiblings++;
51ebd318 1084 }
33bd5ac5
DA
1085 BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
1086 rt6_multipath_rebalance(temp_sibling);
51ebd318
ND
1087 }
1088
1da177e4
LT
1089 /*
1090 * insert node
1091 */
4a287eba
MV
1092 if (!replace) {
1093 if (!add)
f3213831 1094 pr_warn("NLM_F_CREATE should be set when creating new route\n");
4a287eba
MV
1095
1096add:
73483c12 1097 nlflags |= NLM_F_CREATE;
e715b6d3 1098
33bd5ac5
DA
1099 err = call_fib6_entry_notifiers(info->nl_net,
1100 FIB_EVENT_ENTRY_ADD,
1101 rt, extack);
2233000c
DA
1102 if (err)
1103 return err;
1104
8fb11a9a 1105 rcu_assign_pointer(rt->fib6_next, iter);
93c2fb25
DA
1106 atomic_inc(&rt->fib6_ref);
1107 rcu_assign_pointer(rt->fib6_node, fn);
66f5d6ce 1108 rcu_assign_pointer(*ins, rt);
3b1137fe
DA
1109 if (!info->skip_notify)
1110 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4a287eba
MV
1111 info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
1112
507c9b1e 1113 if (!(fn->fn_flags & RTN_RTINFO)) {
4a287eba
MV
1114 info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
1115 fn->fn_flags |= RTN_RTINFO;
1116 }
1da177e4 1117
4a287eba 1118 } else {
33bd5ac5 1119 int nsiblings;
27596472 1120
4a287eba
MV
1121 if (!found) {
1122 if (add)
1123 goto add;
f3213831 1124 pr_warn("NLM_F_REPLACE set, but no existing node found!\n");
4a287eba
MV
1125 return -ENOENT;
1126 }
e715b6d3 1127
2233000c
DA
1128 err = call_fib6_entry_notifiers(info->nl_net,
1129 FIB_EVENT_ENTRY_REPLACE,
1130 rt, extack);
1131 if (err)
1132 return err;
1133
93c2fb25
DA
1134 atomic_inc(&rt->fib6_ref);
1135 rcu_assign_pointer(rt->fib6_node, fn);
33bd5ac5 1136 rt->fib6_next = iter->fib6_next;
66f5d6ce 1137 rcu_assign_pointer(*ins, rt);
3b1137fe
DA
1138 if (!info->skip_notify)
1139 inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
507c9b1e 1140 if (!(fn->fn_flags & RTN_RTINFO)) {
4a287eba
MV
1141 info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
1142 fn->fn_flags |= RTN_RTINFO;
1143 }
33bd5ac5
DA
1144 nsiblings = iter->fib6_nsiblings;
1145 iter->fib6_node = NULL;
1146 fib6_purge_rt(iter, fn, info->nl_net);
1147 if (rcu_access_pointer(fn->rr_ptr) == iter)
1148 fn->rr_ptr = NULL;
1149 fib6_info_release(iter);
27596472 1150
33bd5ac5 1151 if (nsiblings) {
27596472 1152 /* Replacing an ECMP route, remove all siblings */
33bd5ac5
DA
1153 ins = &rt->fib6_next;
1154 iter = rcu_dereference_protected(*ins,
1155 lockdep_is_held(&rt->fib6_table->tb6_lock));
1156 while (iter) {
1157 if (iter->fib6_metric > rt->fib6_metric)
1158 break;
1159 if (rt6_qualify_for_ecmp(iter)) {
1160 *ins = iter->fib6_next;
1161 iter->fib6_node = NULL;
1162 fib6_purge_rt(iter, fn, info->nl_net);
1163 if (rcu_access_pointer(fn->rr_ptr) == iter)
1164 fn->rr_ptr = NULL;
1165 fib6_info_release(iter);
1166 nsiblings--;
1167 info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
1168 } else {
1169 ins = &iter->fib6_next;
1170 }
1171 iter = rcu_dereference_protected(*ins,
1172 lockdep_is_held(&rt->fib6_table->tb6_lock));
27596472 1173 }
33bd5ac5 1174 WARN_ON(nsiblings != 0);
27596472 1175 }
1da177e4
LT
1176 }
1177
1178 return 0;
1179}
1180
8d1c802b 1181static void fib6_start_gc(struct net *net, struct fib6_info *rt)
1da177e4 1182{
417f28bb 1183 if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
93c2fb25 1184 (rt->fib6_flags & RTF_EXPIRES))
417f28bb 1185 mod_timer(&net->ipv6.ip6_fib_timer,
847499ce 1186 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
1da177e4
LT
1187}
1188
63152fc0 1189void fib6_force_start_gc(struct net *net)
1da177e4 1190{
417f28bb
SH
1191 if (!timer_pending(&net->ipv6.ip6_fib_timer))
1192 mod_timer(&net->ipv6.ip6_fib_timer,
847499ce 1193 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
1da177e4
LT
1194}
1195
8d1c802b 1196static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
4a8e56ee 1197 int sernum)
bbd63f06 1198{
93c2fb25
DA
1199 struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node,
1200 lockdep_is_held(&rt->fib6_table->tb6_lock));
bbd63f06
WW
1201
1202 /* paired with smp_rmb() in rt6_get_cookie_safe() */
1203 smp_wmb();
1204 while (fn) {
1205 fn->fn_sernum = sernum;
66f5d6ce 1206 fn = rcu_dereference_protected(fn->parent,
93c2fb25 1207 lockdep_is_held(&rt->fib6_table->tb6_lock));
bbd63f06
WW
1208 }
1209}
1210
8d1c802b 1211void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt)
4a8e56ee
IS
1212{
1213 __fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
1214}
1215
1da177e4
LT
1216/*
1217 * Add routing information to the routing tree.
1218 * <destination addr>/<source addr>
1219 * with source addr info in sub-trees
66f5d6ce 1220 * Need to own table->tb6_lock
1da177e4
LT
1221 */
1222
8d1c802b 1223int fib6_add(struct fib6_node *root, struct fib6_info *rt,
d4ead6b3 1224 struct nl_info *info, struct netlink_ext_ack *extack)
1da177e4 1225{
93c2fb25 1226 struct fib6_table *table = rt->fib6_table;
66729e18 1227 struct fib6_node *fn, *pn = NULL;
1da177e4 1228 int err = -ENOMEM;
4a287eba
MV
1229 int allow_create = 1;
1230 int replace_required = 0;
812918c4 1231 int sernum = fib6_new_sernum(info->nl_net);
507c9b1e
DM
1232
1233 if (info->nlh) {
1234 if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
4a287eba 1235 allow_create = 0;
507c9b1e 1236 if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
4a287eba
MV
1237 replace_required = 1;
1238 }
1239 if (!allow_create && !replace_required)
f3213831 1240 pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
1da177e4 1241
81eb8447 1242 fn = fib6_add_1(info->nl_net, table, root,
93c2fb25
DA
1243 &rt->fib6_dst.addr, rt->fib6_dst.plen,
1244 offsetof(struct fib6_info, fib6_dst), allow_create,
bbd63f06 1245 replace_required, extack);
4a287eba
MV
1246 if (IS_ERR(fn)) {
1247 err = PTR_ERR(fn);
ae7b4e1f 1248 fn = NULL;
1da177e4 1249 goto out;
188c517a 1250 }
1da177e4 1251
66729e18
YH
1252 pn = fn;
1253
1da177e4 1254#ifdef CONFIG_IPV6_SUBTREES
93c2fb25 1255 if (rt->fib6_src.plen) {
1da177e4
LT
1256 struct fib6_node *sn;
1257
66f5d6ce 1258 if (!rcu_access_pointer(fn->subtree)) {
1da177e4
LT
1259 struct fib6_node *sfn;
1260
1261 /*
1262 * Create subtree.
1263 *
1264 * fn[main tree]
1265 * |
1266 * sfn[subtree root]
1267 * \
1268 * sn[new leaf node]
1269 */
1270
1271 /* Create subtree root node */
81eb8447 1272 sfn = node_alloc(info->nl_net);
507c9b1e 1273 if (!sfn)
348a4002 1274 goto failure;
1da177e4 1275
93c2fb25 1276 atomic_inc(&info->nl_net->ipv6.fib6_null_entry->fib6_ref);
66f5d6ce 1277 rcu_assign_pointer(sfn->leaf,
421842ed 1278 info->nl_net->ipv6.fib6_null_entry);
1da177e4 1279 sfn->fn_flags = RTN_ROOT;
1da177e4
LT
1280
1281 /* Now add the first leaf node to new subtree */
1282
81eb8447 1283 sn = fib6_add_1(info->nl_net, table, sfn,
93c2fb25
DA
1284 &rt->fib6_src.addr, rt->fib6_src.plen,
1285 offsetof(struct fib6_info, fib6_src),
bbd63f06 1286 allow_create, replace_required, extack);
1da177e4 1287
f950c0ec 1288 if (IS_ERR(sn)) {
1da177e4 1289 /* If it is failed, discard just allocated
348a4002 1290 root, and then (in failure) stale node
1da177e4
LT
1291 in main tree.
1292 */
81eb8447 1293 node_free_immediate(info->nl_net, sfn);
188c517a 1294 err = PTR_ERR(sn);
348a4002 1295 goto failure;
1da177e4
LT
1296 }
1297
1298 /* Now link new subtree to main tree */
66f5d6ce
WW
1299 rcu_assign_pointer(sfn->parent, fn);
1300 rcu_assign_pointer(fn->subtree, sfn);
1da177e4 1301 } else {
81eb8447 1302 sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn),
93c2fb25
DA
1303 &rt->fib6_src.addr, rt->fib6_src.plen,
1304 offsetof(struct fib6_info, fib6_src),
bbd63f06 1305 allow_create, replace_required, extack);
1da177e4 1306
4a287eba
MV
1307 if (IS_ERR(sn)) {
1308 err = PTR_ERR(sn);
348a4002 1309 goto failure;
188c517a 1310 }
1da177e4
LT
1311 }
1312
66f5d6ce 1313 if (!rcu_access_pointer(fn->leaf)) {
591ff9ea
WW
1314 if (fn->fn_flags & RTN_TL_ROOT) {
1315 /* put back null_entry for root node */
1316 rcu_assign_pointer(fn->leaf,
421842ed 1317 info->nl_net->ipv6.fib6_null_entry);
591ff9ea 1318 } else {
93c2fb25 1319 atomic_inc(&rt->fib6_ref);
591ff9ea
WW
1320 rcu_assign_pointer(fn->leaf, rt);
1321 }
66729e18 1322 }
1da177e4
LT
1323 fn = sn;
1324 }
1325#endif
1326
d4ead6b3 1327 err = fib6_add_rt2node(fn, rt, info, extack);
bbd63f06 1328 if (!err) {
4a8e56ee 1329 __fib6_update_sernum_upto_root(rt, sernum);
63152fc0 1330 fib6_start_gc(info->nl_net, rt);
bbd63f06 1331 }
1da177e4
LT
1332
1333out:
66729e18
YH
1334 if (err) {
1335#ifdef CONFIG_IPV6_SUBTREES
1336 /*
1337 * If fib6_add_1 has cleared the old leaf pointer in the
1338 * super-tree leaf node we have to find a new one for it.
1339 */
7bbfe00e 1340 if (pn != fn) {
8d1c802b 1341 struct fib6_info *pn_leaf =
7bbfe00e
WW
1342 rcu_dereference_protected(pn->leaf,
1343 lockdep_is_held(&table->tb6_lock));
1344 if (pn_leaf == rt) {
1345 pn_leaf = NULL;
1346 RCU_INIT_POINTER(pn->leaf, NULL);
93531c67 1347 fib6_info_release(rt);
66729e18 1348 }
7bbfe00e
WW
1349 if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
1350 pn_leaf = fib6_find_prefix(info->nl_net, table,
1351 pn);
1352#if RT6_DEBUG >= 2
1353 if (!pn_leaf) {
1354 WARN_ON(!pn_leaf);
1355 pn_leaf =
421842ed 1356 info->nl_net->ipv6.fib6_null_entry;
7bbfe00e 1357 }
66729e18 1358#endif
93531c67 1359 fib6_info_hold(pn_leaf);
7bbfe00e
WW
1360 rcu_assign_pointer(pn->leaf, pn_leaf);
1361 }
66729e18
YH
1362 }
1363#endif
348a4002 1364 goto failure;
66729e18 1365 }
1da177e4
LT
1366 return err;
1367
348a4002 1368failure:
4512c43e
WW
1369 /* fn->leaf could be NULL and fib6_repair_tree() needs to be called if:
1370 * 1. fn is an intermediate node and we failed to add the new
1371 * route to it in both subtree creation failure and fib6_add_rt2node()
1372 * failure case.
1373 * 2. fn is the root node in the table and we fail to add the first
1374 * default route to it.
1da177e4 1375 */
4512c43e
WW
1376 if (fn &&
1377 (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) ||
1378 (fn->fn_flags & RTN_TL_ROOT &&
1379 !rcu_access_pointer(fn->leaf))))
66f5d6ce 1380 fib6_repair_tree(info->nl_net, table, fn);
1da177e4 1381 return err;
1da177e4
LT
1382}
1383
1384/*
1385 * Routing tree lookup
1386 *
1387 */
1388
1389struct lookup_args {
8d1c802b 1390 int offset; /* key offset on fib6_info */
b71d1d42 1391 const struct in6_addr *addr; /* search key */
1da177e4
LT
1392};
1393
6454743b
DA
1394static struct fib6_node *fib6_node_lookup_1(struct fib6_node *root,
1395 struct lookup_args *args)
1da177e4
LT
1396{
1397 struct fib6_node *fn;
e69a4adc 1398 __be32 dir;
1da177e4 1399
825e288e
YH
1400 if (unlikely(args->offset == 0))
1401 return NULL;
1402
1da177e4
LT
1403 /*
1404 * Descend on a tree
1405 */
1406
1407 fn = root;
1408
1409 for (;;) {
1410 struct fib6_node *next;
1411
1412 dir = addr_bit_set(args->addr, fn->fn_bit);
1413
66f5d6ce
WW
1414 next = dir ? rcu_dereference(fn->right) :
1415 rcu_dereference(fn->left);
1da177e4
LT
1416
1417 if (next) {
1418 fn = next;
1419 continue;
1420 }
1da177e4
LT
1421 break;
1422 }
1423
507c9b1e 1424 while (fn) {
66f5d6ce
WW
1425 struct fib6_node *subtree = FIB6_SUBTREE(fn);
1426
1427 if (subtree || fn->fn_flags & RTN_RTINFO) {
8d1c802b 1428 struct fib6_info *leaf = rcu_dereference(fn->leaf);
1da177e4
LT
1429 struct rt6key *key;
1430
8d1040e8
WW
1431 if (!leaf)
1432 goto backtrack;
1433
1434 key = (struct rt6key *) ((u8 *)leaf + args->offset);
1da177e4 1435
3fc5e044
YH
1436 if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
1437#ifdef CONFIG_IPV6_SUBTREES
66f5d6ce 1438 if (subtree) {
3e3be275 1439 struct fib6_node *sfn;
6454743b
DA
1440 sfn = fib6_node_lookup_1(subtree,
1441 args + 1);
3e3be275
HFS
1442 if (!sfn)
1443 goto backtrack;
1444 fn = sfn;
1445 }
3fc5e044 1446#endif
3e3be275 1447 if (fn->fn_flags & RTN_RTINFO)
3fc5e044
YH
1448 return fn;
1449 }
1da177e4 1450 }
3e3be275 1451backtrack:
3fc5e044
YH
1452 if (fn->fn_flags & RTN_ROOT)
1453 break;
1454
66f5d6ce 1455 fn = rcu_dereference(fn->parent);
1da177e4
LT
1456 }
1457
1458 return NULL;
1459}
1460
66f5d6ce
WW
1461/* called with rcu_read_lock() held
1462 */
6454743b
DA
1463struct fib6_node *fib6_node_lookup(struct fib6_node *root,
1464 const struct in6_addr *daddr,
1465 const struct in6_addr *saddr)
1da177e4 1466{
1da177e4 1467 struct fib6_node *fn;
825e288e
YH
1468 struct lookup_args args[] = {
1469 {
93c2fb25 1470 .offset = offsetof(struct fib6_info, fib6_dst),
825e288e
YH
1471 .addr = daddr,
1472 },
1da177e4 1473#ifdef CONFIG_IPV6_SUBTREES
825e288e 1474 {
93c2fb25 1475 .offset = offsetof(struct fib6_info, fib6_src),
825e288e
YH
1476 .addr = saddr,
1477 },
1da177e4 1478#endif
825e288e
YH
1479 {
1480 .offset = 0, /* sentinel */
1481 }
1482 };
1da177e4 1483
6454743b 1484 fn = fib6_node_lookup_1(root, daddr ? args : args + 1);
507c9b1e 1485 if (!fn || fn->fn_flags & RTN_TL_ROOT)
1da177e4
LT
1486 fn = root;
1487
1488 return fn;
1489}
1490
1491/*
1492 * Get node with specified destination prefix (and source prefix,
1493 * if subtrees are used)
38fbeeee
WW
1494 * exact_match == true means we try to find fn with exact match of
1495 * the passed in prefix addr
1496 * exact_match == false means we try to find fn with longest prefix
1497 * match of the passed in prefix addr. This is useful for finding fn
1498 * for cached route as it will be stored in the exception table under
1499 * the node with longest prefix length.
1da177e4
LT
1500 */
1501
1502
437de07c
WY
1503static struct fib6_node *fib6_locate_1(struct fib6_node *root,
1504 const struct in6_addr *addr,
38fbeeee
WW
1505 int plen, int offset,
1506 bool exact_match)
1da177e4 1507{
38fbeeee 1508 struct fib6_node *fn, *prev = NULL;
1da177e4
LT
1509
1510 for (fn = root; fn ; ) {
8d1c802b 1511 struct fib6_info *leaf = rcu_dereference(fn->leaf);
8d1040e8
WW
1512 struct rt6key *key;
1513
1514 /* This node is being deleted */
1515 if (!leaf) {
1516 if (plen <= fn->fn_bit)
1517 goto out;
1518 else
1519 goto next;
1520 }
1521
1522 key = (struct rt6key *)((u8 *)leaf + offset);
1da177e4
LT
1523
1524 /*
1525 * Prefix match
1526 */
1527 if (plen < fn->fn_bit ||
1528 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
38fbeeee 1529 goto out;
1da177e4
LT
1530
1531 if (plen == fn->fn_bit)
1532 return fn;
1533
38fbeeee
WW
1534 prev = fn;
1535
8d1040e8 1536next:
1da177e4
LT
1537 /*
1538 * We have more bits to go
1539 */
1540 if (addr_bit_set(addr, fn->fn_bit))
66f5d6ce 1541 fn = rcu_dereference(fn->right);
1da177e4 1542 else
66f5d6ce 1543 fn = rcu_dereference(fn->left);
1da177e4 1544 }
38fbeeee
WW
1545out:
1546 if (exact_match)
1547 return NULL;
1548 else
1549 return prev;
1da177e4
LT
1550}
1551
437de07c
WY
1552struct fib6_node *fib6_locate(struct fib6_node *root,
1553 const struct in6_addr *daddr, int dst_len,
38fbeeee
WW
1554 const struct in6_addr *saddr, int src_len,
1555 bool exact_match)
1da177e4
LT
1556{
1557 struct fib6_node *fn;
1558
1559 fn = fib6_locate_1(root, daddr, dst_len,
93c2fb25 1560 offsetof(struct fib6_info, fib6_dst),
38fbeeee 1561 exact_match);
1da177e4
LT
1562
1563#ifdef CONFIG_IPV6_SUBTREES
1564 if (src_len) {
547b792c 1565 WARN_ON(saddr == NULL);
0e80193b
WW
1566 if (fn) {
1567 struct fib6_node *subtree = FIB6_SUBTREE(fn);
1568
1569 if (subtree) {
1570 fn = fib6_locate_1(subtree, saddr, src_len,
93c2fb25 1571 offsetof(struct fib6_info, fib6_src),
38fbeeee 1572 exact_match);
0e80193b
WW
1573 }
1574 }
1da177e4
LT
1575 }
1576#endif
1577
507c9b1e 1578 if (fn && fn->fn_flags & RTN_RTINFO)
1da177e4
LT
1579 return fn;
1580
1581 return NULL;
1582}
1583
1584
1585/*
1586 * Deletion
1587 *
1588 */
1589
8d1c802b 1590static struct fib6_info *fib6_find_prefix(struct net *net,
66f5d6ce
WW
1591 struct fib6_table *table,
1592 struct fib6_node *fn)
1da177e4 1593{
66f5d6ce
WW
1594 struct fib6_node *child_left, *child_right;
1595
507c9b1e 1596 if (fn->fn_flags & RTN_ROOT)
421842ed 1597 return net->ipv6.fib6_null_entry;
1da177e4 1598
507c9b1e 1599 while (fn) {
66f5d6ce
WW
1600 child_left = rcu_dereference_protected(fn->left,
1601 lockdep_is_held(&table->tb6_lock));
1602 child_right = rcu_dereference_protected(fn->right,
1603 lockdep_is_held(&table->tb6_lock));
1604 if (child_left)
1605 return rcu_dereference_protected(child_left->leaf,
1606 lockdep_is_held(&table->tb6_lock));
1607 if (child_right)
1608 return rcu_dereference_protected(child_right->leaf,
1609 lockdep_is_held(&table->tb6_lock));
1da177e4 1610
7fc33165 1611 fn = FIB6_SUBTREE(fn);
1da177e4
LT
1612 }
1613 return NULL;
1614}
1615
1616/*
1617 * Called to trim the tree of intermediate nodes when possible. "fn"
1618 * is the node we want to try and remove.
66f5d6ce 1619 * Need to own table->tb6_lock
1da177e4
LT
1620 */
1621
8ed67789 1622static struct fib6_node *fib6_repair_tree(struct net *net,
66f5d6ce
WW
1623 struct fib6_table *table,
1624 struct fib6_node *fn)
1da177e4
LT
1625{
1626 int children;
1627 int nstate;
66f5d6ce 1628 struct fib6_node *child;
94b2cfe0 1629 struct fib6_walker *w;
1da177e4
LT
1630 int iter = 0;
1631
4512c43e
WW
1632 /* Set fn->leaf to null_entry for root node. */
1633 if (fn->fn_flags & RTN_TL_ROOT) {
421842ed 1634 rcu_assign_pointer(fn->leaf, net->ipv6.fib6_null_entry);
4512c43e
WW
1635 return fn;
1636 }
1637
1da177e4 1638 for (;;) {
66f5d6ce
WW
1639 struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
1640 lockdep_is_held(&table->tb6_lock));
1641 struct fib6_node *fn_l = rcu_dereference_protected(fn->left,
1642 lockdep_is_held(&table->tb6_lock));
1643 struct fib6_node *pn = rcu_dereference_protected(fn->parent,
1644 lockdep_is_held(&table->tb6_lock));
1645 struct fib6_node *pn_r = rcu_dereference_protected(pn->right,
1646 lockdep_is_held(&table->tb6_lock));
1647 struct fib6_node *pn_l = rcu_dereference_protected(pn->left,
1648 lockdep_is_held(&table->tb6_lock));
8d1c802b 1649 struct fib6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
66f5d6ce 1650 lockdep_is_held(&table->tb6_lock));
8d1c802b 1651 struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
66f5d6ce 1652 lockdep_is_held(&table->tb6_lock));
8d1c802b 1653 struct fib6_info *new_fn_leaf;
66f5d6ce 1654
1da177e4
LT
1655 RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
1656 iter++;
1657
547b792c
IJ
1658 WARN_ON(fn->fn_flags & RTN_RTINFO);
1659 WARN_ON(fn->fn_flags & RTN_TL_ROOT);
66f5d6ce 1660 WARN_ON(fn_leaf);
1da177e4
LT
1661
1662 children = 0;
1663 child = NULL;
66f5d6ce
WW
1664 if (fn_r)
1665 child = fn_r, children |= 1;
1666 if (fn_l)
1667 child = fn_l, children |= 2;
1da177e4 1668
7fc33165 1669 if (children == 3 || FIB6_SUBTREE(fn)
1da177e4
LT
1670#ifdef CONFIG_IPV6_SUBTREES
1671 /* Subtree root (i.e. fn) may have one child */
507c9b1e 1672 || (children && fn->fn_flags & RTN_ROOT)
1da177e4
LT
1673#endif
1674 ) {
66f5d6ce 1675 new_fn_leaf = fib6_find_prefix(net, table, fn);
1da177e4 1676#if RT6_DEBUG >= 2
66f5d6ce
WW
1677 if (!new_fn_leaf) {
1678 WARN_ON(!new_fn_leaf);
421842ed 1679 new_fn_leaf = net->ipv6.fib6_null_entry;
1da177e4
LT
1680 }
1681#endif
93531c67 1682 fib6_info_hold(new_fn_leaf);
66f5d6ce
WW
1683 rcu_assign_pointer(fn->leaf, new_fn_leaf);
1684 return pn;
1da177e4
LT
1685 }
1686
1da177e4 1687#ifdef CONFIG_IPV6_SUBTREES
7fc33165 1688 if (FIB6_SUBTREE(pn) == fn) {
547b792c 1689 WARN_ON(!(fn->fn_flags & RTN_ROOT));
66f5d6ce 1690 RCU_INIT_POINTER(pn->subtree, NULL);
1da177e4
LT
1691 nstate = FWS_L;
1692 } else {
547b792c 1693 WARN_ON(fn->fn_flags & RTN_ROOT);
1da177e4 1694#endif
66f5d6ce
WW
1695 if (pn_r == fn)
1696 rcu_assign_pointer(pn->right, child);
1697 else if (pn_l == fn)
1698 rcu_assign_pointer(pn->left, child);
1da177e4 1699#if RT6_DEBUG >= 2
547b792c
IJ
1700 else
1701 WARN_ON(1);
1da177e4
LT
1702#endif
1703 if (child)
66f5d6ce 1704 rcu_assign_pointer(child->parent, pn);
1da177e4
LT
1705 nstate = FWS_R;
1706#ifdef CONFIG_IPV6_SUBTREES
1707 }
1708#endif
1709
9a03cd8f
MK
1710 read_lock(&net->ipv6.fib6_walker_lock);
1711 FOR_WALKERS(net, w) {
507c9b1e 1712 if (!child) {
2b760fcf 1713 if (w->node == fn) {
1da177e4
LT
1714 RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
1715 w->node = pn;
1716 w->state = nstate;
1717 }
1718 } else {
1da177e4
LT
1719 if (w->node == fn) {
1720 w->node = child;
1721 if (children&2) {
1722 RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
8db46f1d 1723 w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;
1da177e4
LT
1724 } else {
1725 RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
8db46f1d 1726 w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;
1da177e4
LT
1727 }
1728 }
1729 }
1730 }
9a03cd8f 1731 read_unlock(&net->ipv6.fib6_walker_lock);
1da177e4 1732
81eb8447 1733 node_free(net, fn);
507c9b1e 1734 if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
1da177e4
LT
1735 return pn;
1736
66f5d6ce 1737 RCU_INIT_POINTER(pn->leaf, NULL);
93531c67 1738 fib6_info_release(pn_leaf);
1da177e4
LT
1739 fn = pn;
1740 }
1741}
1742
66f5d6ce 1743static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
8d1c802b 1744 struct fib6_info __rcu **rtp, struct nl_info *info)
1da177e4 1745{
94b2cfe0 1746 struct fib6_walker *w;
8d1c802b 1747 struct fib6_info *rt = rcu_dereference_protected(*rtp,
66f5d6ce 1748 lockdep_is_held(&table->tb6_lock));
c572872f 1749 struct net *net = info->nl_net;
1da177e4
LT
1750
1751 RT6_TRACE("fib6_del_route\n");
1752
1753 /* Unlink it */
8fb11a9a 1754 *rtp = rt->fib6_next;
93c2fb25 1755 rt->fib6_node = NULL;
c572872f
BT
1756 net->ipv6.rt6_stats->fib_rt_entries--;
1757 net->ipv6.rt6_stats->fib_discarded_routes++;
1da177e4 1758
2b760fcf
WW
1759 /* Flush all cached dst in exception table */
1760 rt6_flush_exceptions(rt);
1761
f11e6659 1762 /* Reset round-robin state, if necessary */
66f5d6ce 1763 if (rcu_access_pointer(fn->rr_ptr) == rt)
f11e6659
DM
1764 fn->rr_ptr = NULL;
1765
51ebd318 1766 /* Remove this entry from other siblings */
93c2fb25 1767 if (rt->fib6_nsiblings) {
8d1c802b 1768 struct fib6_info *sibling, *next_sibling;
51ebd318
ND
1769
1770 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
1771 &rt->fib6_siblings, fib6_siblings)
1772 sibling->fib6_nsiblings--;
1773 rt->fib6_nsiblings = 0;
1774 list_del_init(&rt->fib6_siblings);
d7dedee1 1775 rt6_multipath_rebalance(next_sibling);
51ebd318
ND
1776 }
1777
1da177e4 1778 /* Adjust walkers */
9a03cd8f
MK
1779 read_lock(&net->ipv6.fib6_walker_lock);
1780 FOR_WALKERS(net, w) {
1da177e4
LT
1781 if (w->state == FWS_C && w->leaf == rt) {
1782 RT6_TRACE("walker %p adjusted by delroute\n", w);
8fb11a9a 1783 w->leaf = rcu_dereference_protected(rt->fib6_next,
66f5d6ce 1784 lockdep_is_held(&table->tb6_lock));
507c9b1e 1785 if (!w->leaf)
1da177e4
LT
1786 w->state = FWS_U;
1787 }
1788 }
9a03cd8f 1789 read_unlock(&net->ipv6.fib6_walker_lock);
1da177e4 1790
4512c43e
WW
1791 /* If it was last route, call fib6_repair_tree() to:
1792 * 1. For root node, put back null_entry as how the table was created.
1793 * 2. For other nodes, expunge its radix tree node.
1794 */
66f5d6ce 1795 if (!rcu_access_pointer(fn->leaf)) {
4512c43e
WW
1796 if (!(fn->fn_flags & RTN_TL_ROOT)) {
1797 fn->fn_flags &= ~RTN_RTINFO;
1798 net->ipv6.rt6_stats->fib_route_nodes--;
1799 }
66f5d6ce 1800 fn = fib6_repair_tree(net, table, fn);
1da177e4
LT
1801 }
1802
6e9e16e6 1803 fib6_purge_rt(rt, fn, net);
1da177e4 1804
6c31e5a9 1805 call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
16a16cd3
DA
1806 if (!info->skip_notify)
1807 inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
93531c67 1808 fib6_info_release(rt);
1da177e4
LT
1809}
1810
66f5d6ce 1811/* Need to own table->tb6_lock */
8d1c802b 1812int fib6_del(struct fib6_info *rt, struct nl_info *info)
1da177e4 1813{
93c2fb25
DA
1814 struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node,
1815 lockdep_is_held(&rt->fib6_table->tb6_lock));
1816 struct fib6_table *table = rt->fib6_table;
8ed67789 1817 struct net *net = info->nl_net;
8d1c802b
DA
1818 struct fib6_info __rcu **rtp;
1819 struct fib6_info __rcu **rtp_next;
1da177e4 1820
421842ed 1821 if (!fn || rt == net->ipv6.fib6_null_entry)
1da177e4
LT
1822 return -ENOENT;
1823
547b792c 1824 WARN_ON(!(fn->fn_flags & RTN_RTINFO));
1da177e4 1825
1da177e4
LT
1826 /*
1827 * Walk the leaf entries looking for ourself
1828 */
1829
66f5d6ce 1830 for (rtp = &fn->leaf; *rtp; rtp = rtp_next) {
8d1c802b 1831 struct fib6_info *cur = rcu_dereference_protected(*rtp,
66f5d6ce
WW
1832 lockdep_is_held(&table->tb6_lock));
1833 if (rt == cur) {
1834 fib6_del_route(table, fn, rtp, info);
1da177e4
LT
1835 return 0;
1836 }
8fb11a9a 1837 rtp_next = &cur->fib6_next;
1da177e4
LT
1838 }
1839 return -ENOENT;
1840}
1841
1842/*
1843 * Tree traversal function.
1844 *
1845 * Certainly, it is not interrupt safe.
1846 * However, it is internally reenterable wrt itself and fib6_add/fib6_del.
1847 * It means, that we can modify tree during walking
1848 * and use this function for garbage collection, clone pruning,
1ab1457c 1849 * cleaning tree when a device goes down etc. etc.
1da177e4
LT
1850 *
1851 * It guarantees that every node will be traversed,
1852 * and that it will be traversed only once.
1853 *
1854 * Callback function w->func may return:
1855 * 0 -> continue walking.
1856 * positive value -> walking is suspended (used by tree dumps,
1857 * and probably by gc, if it will be split to several slices)
1858 * negative value -> terminate walking.
1859 *
1860 * The function itself returns:
1861 * 0 -> walk is complete.
1862 * >0 -> walk is incomplete (i.e. suspended)
1863 * <0 -> walk is terminated by an error.
66f5d6ce
WW
1864 *
1865 * This function is called with tb6_lock held.
1da177e4
LT
1866 */
1867
94b2cfe0 1868static int fib6_walk_continue(struct fib6_walker *w)
1da177e4 1869{
66f5d6ce 1870 struct fib6_node *fn, *pn, *left, *right;
1da177e4 1871
2b760fcf
WW
1872 /* w->root should always be table->tb6_root */
1873 WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT));
1874
1da177e4
LT
1875 for (;;) {
1876 fn = w->node;
507c9b1e 1877 if (!fn)
1da177e4
LT
1878 return 0;
1879
1da177e4
LT
1880 switch (w->state) {
1881#ifdef CONFIG_IPV6_SUBTREES
1882 case FWS_S:
7fc33165
YH
1883 if (FIB6_SUBTREE(fn)) {
1884 w->node = FIB6_SUBTREE(fn);
1da177e4
LT
1885 continue;
1886 }
1887 w->state = FWS_L;
1ab1457c 1888#endif
275757e6 1889 /* fall through */
1da177e4 1890 case FWS_L:
66f5d6ce
WW
1891 left = rcu_dereference_protected(fn->left, 1);
1892 if (left) {
1893 w->node = left;
1da177e4
LT
1894 w->state = FWS_INIT;
1895 continue;
1896 }
1897 w->state = FWS_R;
275757e6 1898 /* fall through */
1da177e4 1899 case FWS_R:
66f5d6ce
WW
1900 right = rcu_dereference_protected(fn->right, 1);
1901 if (right) {
1902 w->node = right;
1da177e4
LT
1903 w->state = FWS_INIT;
1904 continue;
1905 }
1906 w->state = FWS_C;
66f5d6ce 1907 w->leaf = rcu_dereference_protected(fn->leaf, 1);
275757e6 1908 /* fall through */
1da177e4 1909 case FWS_C:
507c9b1e 1910 if (w->leaf && fn->fn_flags & RTN_RTINFO) {
2bec5a36
PM
1911 int err;
1912
fa809e2f
ED
1913 if (w->skip) {
1914 w->skip--;
1c265854 1915 goto skip;
2bec5a36
PM
1916 }
1917
1918 err = w->func(w);
1da177e4
LT
1919 if (err)
1920 return err;
2bec5a36
PM
1921
1922 w->count++;
1da177e4
LT
1923 continue;
1924 }
1c265854 1925skip:
1da177e4 1926 w->state = FWS_U;
275757e6 1927 /* fall through */
1da177e4
LT
1928 case FWS_U:
1929 if (fn == w->root)
1930 return 0;
66f5d6ce
WW
1931 pn = rcu_dereference_protected(fn->parent, 1);
1932 left = rcu_dereference_protected(pn->left, 1);
1933 right = rcu_dereference_protected(pn->right, 1);
1da177e4
LT
1934 w->node = pn;
1935#ifdef CONFIG_IPV6_SUBTREES
7fc33165 1936 if (FIB6_SUBTREE(pn) == fn) {
547b792c 1937 WARN_ON(!(fn->fn_flags & RTN_ROOT));
1da177e4
LT
1938 w->state = FWS_L;
1939 continue;
1940 }
1941#endif
66f5d6ce 1942 if (left == fn) {
1da177e4
LT
1943 w->state = FWS_R;
1944 continue;
1945 }
66f5d6ce 1946 if (right == fn) {
1da177e4 1947 w->state = FWS_C;
66f5d6ce 1948 w->leaf = rcu_dereference_protected(w->node->leaf, 1);
1da177e4
LT
1949 continue;
1950 }
1951#if RT6_DEBUG >= 2
547b792c 1952 WARN_ON(1);
1da177e4
LT
1953#endif
1954 }
1955 }
1956}
1957
9a03cd8f 1958static int fib6_walk(struct net *net, struct fib6_walker *w)
1da177e4
LT
1959{
1960 int res;
1961
1962 w->state = FWS_INIT;
1963 w->node = w->root;
1964
9a03cd8f 1965 fib6_walker_link(net, w);
1da177e4
LT
1966 res = fib6_walk_continue(w);
1967 if (res <= 0)
9a03cd8f 1968 fib6_walker_unlink(net, w);
1da177e4
LT
1969 return res;
1970}
1971
94b2cfe0 1972static int fib6_clean_node(struct fib6_walker *w)
1da177e4
LT
1973{
1974 int res;
8d1c802b 1975 struct fib6_info *rt;
94b2cfe0 1976 struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w);
ec7d43c2
BT
1977 struct nl_info info = {
1978 .nl_net = c->net,
7c6bb7d2 1979 .skip_notify = c->skip_notify,
ec7d43c2 1980 };
1da177e4 1981
327571cb
HFS
1982 if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
1983 w->node->fn_sernum != c->sernum)
1984 w->node->fn_sernum = c->sernum;
1985
1986 if (!c->func) {
1987 WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE);
1988 w->leaf = NULL;
1989 return 0;
1990 }
1991
66f5d6ce 1992 for_each_fib6_walker_rt(w) {
1da177e4 1993 res = c->func(rt, c->arg);
b5cb5a75 1994 if (res == -1) {
1da177e4 1995 w->leaf = rt;
528c4ceb 1996 res = fib6_del(rt, &info);
1da177e4
LT
1997 if (res) {
1998#if RT6_DEBUG >= 2
91df42be 1999 pr_debug("%s: del failed: rt=%p@%p err=%d\n",
4e587ea7 2000 __func__, rt,
93c2fb25 2001 rcu_access_pointer(rt->fib6_node),
4e587ea7 2002 res);
1da177e4
LT
2003#endif
2004 continue;
2005 }
2006 return 0;
b5cb5a75 2007 } else if (res == -2) {
93c2fb25 2008 if (WARN_ON(!rt->fib6_nsiblings))
b5cb5a75 2009 continue;
93c2fb25
DA
2010 rt = list_last_entry(&rt->fib6_siblings,
2011 struct fib6_info, fib6_siblings);
b5cb5a75 2012 continue;
1da177e4 2013 }
547b792c 2014 WARN_ON(res != 0);
1da177e4
LT
2015 }
2016 w->leaf = rt;
2017 return 0;
2018}
2019
2020/*
2021 * Convenient frontend to tree walker.
1ab1457c 2022 *
1da177e4 2023 * func is called on each route.
b5cb5a75
IS
2024 * It may return -2 -> skip multipath route.
2025 * -1 -> delete this route.
1da177e4 2026 * 0 -> continue walking
1da177e4
LT
2027 */
2028
ec7d43c2 2029static void fib6_clean_tree(struct net *net, struct fib6_node *root,
8d1c802b 2030 int (*func)(struct fib6_info *, void *arg),
7c6bb7d2 2031 int sernum, void *arg, bool skip_notify)
1da177e4 2032{
94b2cfe0 2033 struct fib6_cleaner c;
1da177e4
LT
2034
2035 c.w.root = root;
2036 c.w.func = fib6_clean_node;
2bec5a36
PM
2037 c.w.count = 0;
2038 c.w.skip = 0;
1da177e4 2039 c.func = func;
327571cb 2040 c.sernum = sernum;
1da177e4 2041 c.arg = arg;
ec7d43c2 2042 c.net = net;
7c6bb7d2 2043 c.skip_notify = skip_notify;
1da177e4 2044
9a03cd8f 2045 fib6_walk(net, &c.w);
1da177e4
LT
2046}
2047
327571cb 2048static void __fib6_clean_all(struct net *net,
8d1c802b 2049 int (*func)(struct fib6_info *, void *),
7c6bb7d2 2050 int sernum, void *arg, bool skip_notify)
c71099ac 2051{
c71099ac 2052 struct fib6_table *table;
58f09b78 2053 struct hlist_head *head;
1b43af54 2054 unsigned int h;
c71099ac 2055
1b43af54 2056 rcu_read_lock();
a33bc5c1 2057 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
f3db4851 2058 head = &net->ipv6.fib_table_hash[h];
b67bfe0d 2059 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
66f5d6ce 2060 spin_lock_bh(&table->tb6_lock);
ec7d43c2 2061 fib6_clean_tree(net, &table->tb6_root,
7c6bb7d2 2062 func, sernum, arg, skip_notify);
66f5d6ce 2063 spin_unlock_bh(&table->tb6_lock);
c71099ac
TG
2064 }
2065 }
1b43af54 2066 rcu_read_unlock();
c71099ac
TG
2067}
2068
8d1c802b 2069void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *),
327571cb
HFS
2070 void *arg)
2071{
7c6bb7d2
DA
2072 __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, false);
2073}
2074
2075void fib6_clean_all_skip_notify(struct net *net,
2076 int (*func)(struct fib6_info *, void *),
2077 void *arg)
2078{
2079 __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, true);
327571cb
HFS
2080}
2081
705f1c86
HFS
2082static void fib6_flush_trees(struct net *net)
2083{
812918c4 2084 int new_sernum = fib6_new_sernum(net);
705f1c86 2085
7c6bb7d2 2086 __fib6_clean_all(net, NULL, new_sernum, NULL, false);
705f1c86
HFS
2087}
2088
1da177e4
LT
2089/*
2090 * Garbage collection
2091 */
2092
8d1c802b 2093static int fib6_age(struct fib6_info *rt, void *arg)
1da177e4 2094{
3570df91 2095 struct fib6_gc_args *gc_args = arg;
1da177e4
LT
2096 unsigned long now = jiffies;
2097
2098 /*
2099 * check addrconf expiration here.
2100 * Routes are expired even if they are in use.
1da177e4
LT
2101 */
2102
93c2fb25 2103 if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
14895687 2104 if (time_after(now, rt->expires)) {
1da177e4 2105 RT6_TRACE("expiring %p\n", rt);
1da177e4
LT
2106 return -1;
2107 }
3570df91 2108 gc_args->more++;
1da177e4
LT
2109 }
2110
c757faa8
WW
2111 /* Also age clones in the exception table.
2112 * Note, that clones are aged out
2113 * only if they are not in use now.
2114 */
2115 rt6_age_exceptions(rt, gc_args, now);
2116
1da177e4
LT
2117 return 0;
2118}
2119
2ac3ac8f 2120void fib6_run_gc(unsigned long expires, struct net *net, bool force)
1da177e4 2121{
3570df91 2122 struct fib6_gc_args gc_args;
49a18d86
MK
2123 unsigned long now;
2124
2ac3ac8f 2125 if (force) {
3dc94f93
MK
2126 spin_lock_bh(&net->ipv6.fib6_gc_lock);
2127 } else if (!spin_trylock_bh(&net->ipv6.fib6_gc_lock)) {
2ac3ac8f
MK
2128 mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
2129 return;
1da177e4 2130 }
2ac3ac8f
MK
2131 gc_args.timeout = expires ? (int)expires :
2132 net->ipv6.sysctl.ip6_rt_gc_interval;
db916649 2133 gc_args.more = 0;
f3db4851 2134
3570df91 2135 fib6_clean_all(net, fib6_age, &gc_args);
49a18d86
MK
2136 now = jiffies;
2137 net->ipv6.ip6_rt_last_gc = now;
1da177e4
LT
2138
2139 if (gc_args.more)
c8a45222 2140 mod_timer(&net->ipv6.ip6_fib_timer,
49a18d86 2141 round_jiffies(now
c8a45222 2142 + net->ipv6.sysctl.ip6_rt_gc_interval));
417f28bb
SH
2143 else
2144 del_timer(&net->ipv6.ip6_fib_timer);
3dc94f93 2145 spin_unlock_bh(&net->ipv6.fib6_gc_lock);
1da177e4
LT
2146}
2147
86cb30ec 2148static void fib6_gc_timer_cb(struct timer_list *t)
5b7c931d 2149{
86cb30ec
KC
2150 struct net *arg = from_timer(arg, t, ipv6.ip6_fib_timer);
2151
2152 fib6_run_gc(0, arg, true);
5b7c931d
DL
2153}
2154
2c8c1e72 2155static int __net_init fib6_net_init(struct net *net)
1da177e4 2156{
10da66f7 2157 size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
16ab6d7d
IS
2158 int err;
2159
2160 err = fib6_notifier_init(net);
2161 if (err)
2162 return err;
10da66f7 2163
3dc94f93 2164 spin_lock_init(&net->ipv6.fib6_gc_lock);
9a03cd8f
MK
2165 rwlock_init(&net->ipv6.fib6_walker_lock);
2166 INIT_LIST_HEAD(&net->ipv6.fib6_walkers);
86cb30ec 2167 timer_setup(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, 0);
63152fc0 2168
c572872f
BT
2169 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
2170 if (!net->ipv6.rt6_stats)
2171 goto out_timer;
2172
10da66f7
ED
2173 /* Avoid false sharing : Use at least a full cache line */
2174 size = max_t(size_t, size, L1_CACHE_BYTES);
2175
2176 net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL);
58f09b78 2177 if (!net->ipv6.fib_table_hash)
c572872f 2178 goto out_rt6_stats;
e0b85590 2179
58f09b78
DL
2180 net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl),
2181 GFP_KERNEL);
2182 if (!net->ipv6.fib6_main_tbl)
e0b85590
DL
2183 goto out_fib_table_hash;
2184
58f09b78 2185 net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
66f5d6ce 2186 rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf,
421842ed 2187 net->ipv6.fib6_null_entry);
58f09b78
DL
2188 net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
2189 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
8e773277 2190 inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
e0b85590
DL
2191
2192#ifdef CONFIG_IPV6_MULTIPLE_TABLES
58f09b78
DL
2193 net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
2194 GFP_KERNEL);
2195 if (!net->ipv6.fib6_local_tbl)
e0b85590 2196 goto out_fib6_main_tbl;
58f09b78 2197 net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
66f5d6ce 2198 rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf,
421842ed 2199 net->ipv6.fib6_null_entry);
58f09b78
DL
2200 net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
2201 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
8e773277 2202 inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
e0b85590 2203#endif
58f09b78 2204 fib6_tables_init(net);
f845ab6b 2205
417f28bb 2206 return 0;
d63bddbe 2207
e0b85590 2208#ifdef CONFIG_IPV6_MULTIPLE_TABLES
e0b85590 2209out_fib6_main_tbl:
58f09b78 2210 kfree(net->ipv6.fib6_main_tbl);
e0b85590 2211#endif
e0b85590 2212out_fib_table_hash:
58f09b78 2213 kfree(net->ipv6.fib_table_hash);
c572872f
BT
2214out_rt6_stats:
2215 kfree(net->ipv6.rt6_stats);
63152fc0 2216out_timer:
16ab6d7d 2217 fib6_notifier_exit(net);
417f28bb 2218 return -ENOMEM;
8db46f1d 2219}
58f09b78
DL
2220
2221static void fib6_net_exit(struct net *net)
2222{
ba1cc08d
SD
2223 unsigned int i;
2224
417f28bb
SH
2225 del_timer_sync(&net->ipv6.ip6_fib_timer);
2226
32a805ba 2227 for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
ba1cc08d
SD
2228 struct hlist_head *head = &net->ipv6.fib_table_hash[i];
2229 struct hlist_node *tmp;
2230 struct fib6_table *tb;
2231
2232 hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) {
2233 hlist_del(&tb->tb6_hlist);
2234 fib6_free_table(tb);
2235 }
2236 }
2237
58f09b78 2238 kfree(net->ipv6.fib_table_hash);
c572872f 2239 kfree(net->ipv6.rt6_stats);
16ab6d7d 2240 fib6_notifier_exit(net);
58f09b78
DL
2241}
2242
2243static struct pernet_operations fib6_net_ops = {
2244 .init = fib6_net_init,
2245 .exit = fib6_net_exit,
2246};
2247
2248int __init fib6_init(void)
2249{
2250 int ret = -ENOMEM;
63152fc0 2251
58f09b78
DL
2252 fib6_node_kmem = kmem_cache_create("fib6_nodes",
2253 sizeof(struct fib6_node),
2254 0, SLAB_HWCACHE_ALIGN,
2255 NULL);
2256 if (!fib6_node_kmem)
2257 goto out;
2258
2259 ret = register_pernet_subsys(&fib6_net_ops);
2260 if (ret)
c572872f 2261 goto out_kmem_cache_create;
e8803b6c 2262
16feebcf
FW
2263 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
2264 inet6_dump_fib, 0);
e8803b6c
DM
2265 if (ret)
2266 goto out_unregister_subsys;
705f1c86
HFS
2267
2268 __fib6_flush_trees = fib6_flush_trees;
58f09b78
DL
2269out:
2270 return ret;
2271
e8803b6c
DM
2272out_unregister_subsys:
2273 unregister_pernet_subsys(&fib6_net_ops);
d63bddbe
DL
2274out_kmem_cache_create:
2275 kmem_cache_destroy(fib6_node_kmem);
2276 goto out;
1da177e4
LT
2277}
2278
2279void fib6_gc_cleanup(void)
2280{
58f09b78 2281 unregister_pernet_subsys(&fib6_net_ops);
1da177e4
LT
2282 kmem_cache_destroy(fib6_node_kmem);
2283}
8d2ca1d7
HFS
2284
2285#ifdef CONFIG_PROC_FS
8d2ca1d7
HFS
2286static int ipv6_route_seq_show(struct seq_file *seq, void *v)
2287{
8d1c802b 2288 struct fib6_info *rt = v;
8d2ca1d7 2289 struct ipv6_route_iter *iter = seq->private;
5e670d84 2290 const struct net_device *dev;
8d2ca1d7 2291
93c2fb25 2292 seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
8d2ca1d7
HFS
2293
2294#ifdef CONFIG_IPV6_SUBTREES
93c2fb25 2295 seq_printf(seq, "%pi6 %02x ", &rt->fib6_src.addr, rt->fib6_src.plen);
8d2ca1d7
HFS
2296#else
2297 seq_puts(seq, "00000000000000000000000000000000 00 ");
2298#endif
93c2fb25 2299 if (rt->fib6_flags & RTF_GATEWAY)
5e670d84 2300 seq_printf(seq, "%pi6", &rt->fib6_nh.nh_gw);
8d2ca1d7
HFS
2301 else
2302 seq_puts(seq, "00000000000000000000000000000000");
2303
5e670d84 2304 dev = rt->fib6_nh.nh_dev;
8d2ca1d7 2305 seq_printf(seq, " %08x %08x %08x %08x %8s\n",
93c2fb25
DA
2306 rt->fib6_metric, atomic_read(&rt->fib6_ref), 0,
2307 rt->fib6_flags, dev ? dev->name : "");
8d2ca1d7
HFS
2308 iter->w.leaf = NULL;
2309 return 0;
2310}
2311
94b2cfe0 2312static int ipv6_route_yield(struct fib6_walker *w)
8d2ca1d7
HFS
2313{
2314 struct ipv6_route_iter *iter = w->args;
2315
2316 if (!iter->skip)
2317 return 1;
2318
2319 do {
66f5d6ce 2320 iter->w.leaf = rcu_dereference_protected(
8fb11a9a 2321 iter->w.leaf->fib6_next,
66f5d6ce 2322 lockdep_is_held(&iter->tbl->tb6_lock));
8d2ca1d7
HFS
2323 iter->skip--;
2324 if (!iter->skip && iter->w.leaf)
2325 return 1;
2326 } while (iter->w.leaf);
2327
2328 return 0;
2329}
2330
9a03cd8f
MK
2331static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
2332 struct net *net)
8d2ca1d7
HFS
2333{
2334 memset(&iter->w, 0, sizeof(iter->w));
2335 iter->w.func = ipv6_route_yield;
2336 iter->w.root = &iter->tbl->tb6_root;
2337 iter->w.state = FWS_INIT;
2338 iter->w.node = iter->w.root;
2339 iter->w.args = iter;
0a67d3ef 2340 iter->sernum = iter->w.root->fn_sernum;
8d2ca1d7 2341 INIT_LIST_HEAD(&iter->w.lh);
9a03cd8f 2342 fib6_walker_link(net, &iter->w);
8d2ca1d7
HFS
2343}
2344
2345static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
2346 struct net *net)
2347{
2348 unsigned int h;
2349 struct hlist_node *node;
2350
2351 if (tbl) {
2352 h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1;
2353 node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist));
2354 } else {
2355 h = 0;
2356 node = NULL;
2357 }
2358
2359 while (!node && h < FIB6_TABLE_HASHSZ) {
2360 node = rcu_dereference_bh(
2361 hlist_first_rcu(&net->ipv6.fib_table_hash[h++]));
2362 }
2363 return hlist_entry_safe(node, struct fib6_table, tb6_hlist);
2364}
2365
0a67d3ef
HFS
2366static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
2367{
2368 if (iter->sernum != iter->w.root->fn_sernum) {
2369 iter->sernum = iter->w.root->fn_sernum;
2370 iter->w.state = FWS_INIT;
2371 iter->w.node = iter->w.root;
2372 WARN_ON(iter->w.skip);
2373 iter->w.skip = iter->w.count;
2374 }
2375}
2376
8d2ca1d7
HFS
2377static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2378{
2379 int r;
8d1c802b 2380 struct fib6_info *n;
8d2ca1d7
HFS
2381 struct net *net = seq_file_net(seq);
2382 struct ipv6_route_iter *iter = seq->private;
2383
2384 if (!v)
2385 goto iter_table;
2386
8fb11a9a 2387 n = rcu_dereference_bh(((struct fib6_info *)v)->fib6_next);
8d2ca1d7
HFS
2388 if (n) {
2389 ++*pos;
2390 return n;
2391 }
2392
2393iter_table:
0a67d3ef 2394 ipv6_route_check_sernum(iter);
66f5d6ce 2395 spin_lock_bh(&iter->tbl->tb6_lock);
8d2ca1d7 2396 r = fib6_walk_continue(&iter->w);
66f5d6ce 2397 spin_unlock_bh(&iter->tbl->tb6_lock);
8d2ca1d7
HFS
2398 if (r > 0) {
2399 if (v)
2400 ++*pos;
2401 return iter->w.leaf;
2402 } else if (r < 0) {
9a03cd8f 2403 fib6_walker_unlink(net, &iter->w);
8d2ca1d7
HFS
2404 return NULL;
2405 }
9a03cd8f 2406 fib6_walker_unlink(net, &iter->w);
8d2ca1d7
HFS
2407
2408 iter->tbl = ipv6_route_seq_next_table(iter->tbl, net);
2409 if (!iter->tbl)
2410 return NULL;
2411
9a03cd8f 2412 ipv6_route_seq_setup_walk(iter, net);
8d2ca1d7
HFS
2413 goto iter_table;
2414}
2415
2416static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
2417 __acquires(RCU_BH)
2418{
2419 struct net *net = seq_file_net(seq);
2420 struct ipv6_route_iter *iter = seq->private;
2421
2422 rcu_read_lock_bh();
2423 iter->tbl = ipv6_route_seq_next_table(NULL, net);
2424 iter->skip = *pos;
2425
2426 if (iter->tbl) {
9a03cd8f 2427 ipv6_route_seq_setup_walk(iter, net);
8d2ca1d7
HFS
2428 return ipv6_route_seq_next(seq, NULL, pos);
2429 } else {
2430 return NULL;
2431 }
2432}
2433
2434static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
2435{
94b2cfe0 2436 struct fib6_walker *w = &iter->w;
8d2ca1d7
HFS
2437 return w->node && !(w->state == FWS_U && w->node == w->root);
2438}
2439
2440static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
2441 __releases(RCU_BH)
2442{
9a03cd8f 2443 struct net *net = seq_file_net(seq);
8d2ca1d7
HFS
2444 struct ipv6_route_iter *iter = seq->private;
2445
2446 if (ipv6_route_iter_active(iter))
9a03cd8f 2447 fib6_walker_unlink(net, &iter->w);
8d2ca1d7
HFS
2448
2449 rcu_read_unlock_bh();
2450}
2451
c3506372 2452const struct seq_operations ipv6_route_seq_ops = {
8d2ca1d7
HFS
2453 .start = ipv6_route_seq_start,
2454 .next = ipv6_route_seq_next,
2455 .stop = ipv6_route_seq_stop,
2456 .show = ipv6_route_seq_show
2457};
8d2ca1d7 2458#endif /* CONFIG_PROC_FS */
This page took 1.747861 seconds and 4 git commands to generate.