2 * Generic address resolution entity
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
15 * Harald Welte Add neighbour cache statistics like rtstat
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
28 #include <linux/sysctl.h>
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 #include <linux/inetdevice.h>
42 #include <net/addrconf.h>
46 #define neigh_dbg(level, fmt, ...) \
48 if (level <= NEIGH_DEBUG) \
49 pr_debug(fmt, ##__VA_ARGS__); \
52 #define PNEIGH_HASHMASK 0xF
54 static void neigh_timer_handler(unsigned long arg);
55 static void __neigh_notify(struct neighbour *n, int type, int flags);
56 static void neigh_update_notify(struct neighbour *neigh);
57 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
60 static const struct file_operations neigh_stat_seq_fops;
64 Neighbour hash table buckets are protected with rwlock tbl->lock.
66 - All the scans/updates to hash buckets MUST be made under this lock.
67 - NOTHING clever should be made under this lock: no callbacks
68 to protocol backends, no attempts to send something to network.
69 It will result in deadlocks, if backend/driver wants to use neighbour
71 - If the entry requires some non-trivial actions, increase
72 its reference count and release table lock.
74 Neighbour entries are protected:
75 - with reference count.
76 - with rwlock neigh->lock
78 Reference count prevents destruction.
80 neigh->lock mainly serializes ll address data and its validity state.
81 However, the same lock is used to protect another entry fields:
85 Again, nothing clever shall be made under neigh->lock,
86 the most complicated procedure, which we allow is dev->hard_header.
87 It is supposed, that dev->hard_header is simplistic and does
88 not make callbacks to neighbour tables.
91 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
97 static void neigh_cleanup_and_release(struct neighbour *neigh)
99 if (neigh->parms->neigh_cleanup)
100 neigh->parms->neigh_cleanup(neigh);
102 __neigh_notify(neigh, RTM_DELNEIGH, 0);
103 neigh_release(neigh);
107 * It is random distribution in the interval (1/2)*base...(3/2)*base.
108 * It corresponds to default IPv6 settings and is not overridable,
109 * because it is really reasonable choice.
112 unsigned long neigh_rand_reach_time(unsigned long base)
114 return base ? (prandom_u32() % base) + (base >> 1) : 0;
116 EXPORT_SYMBOL(neigh_rand_reach_time);
119 static int neigh_forced_gc(struct neigh_table *tbl)
123 struct neigh_hash_table *nht;
125 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
127 write_lock_bh(&tbl->lock);
128 nht = rcu_dereference_protected(tbl->nht,
129 lockdep_is_held(&tbl->lock));
130 for (i = 0; i < (1 << nht->hash_shift); i++) {
132 struct neighbour __rcu **np;
134 np = &nht->hash_buckets[i];
135 while ((n = rcu_dereference_protected(*np,
136 lockdep_is_held(&tbl->lock))) != NULL) {
137 /* Neighbour record may be discarded if:
138 * - nobody refers to it.
139 * - it is not permanent
141 write_lock(&n->lock);
142 if (atomic_read(&n->refcnt) == 1 &&
143 !(n->nud_state & NUD_PERMANENT)) {
144 rcu_assign_pointer(*np,
145 rcu_dereference_protected(n->next,
146 lockdep_is_held(&tbl->lock)));
149 write_unlock(&n->lock);
150 neigh_cleanup_and_release(n);
153 write_unlock(&n->lock);
158 tbl->last_flush = jiffies;
160 write_unlock_bh(&tbl->lock);
165 static void neigh_add_timer(struct neighbour *n, unsigned long when)
168 if (unlikely(mod_timer(&n->timer, when))) {
169 printk("NEIGH: BUG, double timer add, state is %x\n",
175 static int neigh_del_timer(struct neighbour *n)
177 if ((n->nud_state & NUD_IN_TIMER) &&
178 del_timer(&n->timer)) {
185 static void pneigh_queue_purge(struct sk_buff_head *list)
189 while ((skb = skb_dequeue(list)) != NULL) {
195 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
198 struct neigh_hash_table *nht;
200 nht = rcu_dereference_protected(tbl->nht,
201 lockdep_is_held(&tbl->lock));
203 for (i = 0; i < (1 << nht->hash_shift); i++) {
205 struct neighbour __rcu **np = &nht->hash_buckets[i];
207 while ((n = rcu_dereference_protected(*np,
208 lockdep_is_held(&tbl->lock))) != NULL) {
209 if (dev && n->dev != dev) {
213 rcu_assign_pointer(*np,
214 rcu_dereference_protected(n->next,
215 lockdep_is_held(&tbl->lock)));
216 write_lock(&n->lock);
220 if (atomic_read(&n->refcnt) != 1) {
221 /* The most unpleasant situation.
222 We must destroy neighbour entry,
223 but someone still uses it.
225 The destroy will be delayed until
226 the last user releases us, but
227 we must kill timers etc. and move
230 __skb_queue_purge(&n->arp_queue);
231 n->arp_queue_len_bytes = 0;
232 n->output = neigh_blackhole;
233 if (n->nud_state & NUD_VALID)
234 n->nud_state = NUD_NOARP;
236 n->nud_state = NUD_NONE;
237 neigh_dbg(2, "neigh %p is stray\n", n);
239 write_unlock(&n->lock);
240 neigh_cleanup_and_release(n);
245 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
247 write_lock_bh(&tbl->lock);
248 neigh_flush_dev(tbl, dev);
249 write_unlock_bh(&tbl->lock);
251 EXPORT_SYMBOL(neigh_changeaddr);
253 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
255 write_lock_bh(&tbl->lock);
256 neigh_flush_dev(tbl, dev);
257 pneigh_ifdown(tbl, dev);
258 write_unlock_bh(&tbl->lock);
260 del_timer_sync(&tbl->proxy_timer);
261 pneigh_queue_purge(&tbl->proxy_queue);
264 EXPORT_SYMBOL(neigh_ifdown);
266 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
268 struct neighbour *n = NULL;
269 unsigned long now = jiffies;
272 entries = atomic_inc_return(&tbl->entries) - 1;
273 if (entries >= tbl->gc_thresh3 ||
274 (entries >= tbl->gc_thresh2 &&
275 time_after(now, tbl->last_flush + 5 * HZ))) {
276 if (!neigh_forced_gc(tbl) &&
277 entries >= tbl->gc_thresh3)
281 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
285 __skb_queue_head_init(&n->arp_queue);
286 rwlock_init(&n->lock);
287 seqlock_init(&n->ha_lock);
288 n->updated = n->used = now;
289 n->nud_state = NUD_NONE;
290 n->output = neigh_blackhole;
291 seqlock_init(&n->hh.hh_lock);
292 n->parms = neigh_parms_clone(&tbl->parms);
293 setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
295 NEIGH_CACHE_STAT_INC(tbl, allocs);
297 atomic_set(&n->refcnt, 1);
303 atomic_dec(&tbl->entries);
307 static void neigh_get_hash_rnd(u32 *x)
309 get_random_bytes(x, sizeof(*x));
313 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
315 size_t size = (1 << shift) * sizeof(struct neighbour *);
316 struct neigh_hash_table *ret;
317 struct neighbour __rcu **buckets;
320 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
323 if (size <= PAGE_SIZE)
324 buckets = kzalloc(size, GFP_ATOMIC);
326 buckets = (struct neighbour __rcu **)
327 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
333 ret->hash_buckets = buckets;
334 ret->hash_shift = shift;
335 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
336 neigh_get_hash_rnd(&ret->hash_rnd[i]);
340 static void neigh_hash_free_rcu(struct rcu_head *head)
342 struct neigh_hash_table *nht = container_of(head,
343 struct neigh_hash_table,
345 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
346 struct neighbour __rcu **buckets = nht->hash_buckets;
348 if (size <= PAGE_SIZE)
351 free_pages((unsigned long)buckets, get_order(size));
355 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
356 unsigned long new_shift)
358 unsigned int i, hash;
359 struct neigh_hash_table *new_nht, *old_nht;
361 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
363 old_nht = rcu_dereference_protected(tbl->nht,
364 lockdep_is_held(&tbl->lock));
365 new_nht = neigh_hash_alloc(new_shift);
369 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
370 struct neighbour *n, *next;
372 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
373 lockdep_is_held(&tbl->lock));
376 hash = tbl->hash(n->primary_key, n->dev,
379 hash >>= (32 - new_nht->hash_shift);
380 next = rcu_dereference_protected(n->next,
381 lockdep_is_held(&tbl->lock));
383 rcu_assign_pointer(n->next,
384 rcu_dereference_protected(
385 new_nht->hash_buckets[hash],
386 lockdep_is_held(&tbl->lock)));
387 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
391 rcu_assign_pointer(tbl->nht, new_nht);
392 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
396 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
397 struct net_device *dev)
401 NEIGH_CACHE_STAT_INC(tbl, lookups);
404 n = __neigh_lookup_noref(tbl, pkey, dev);
406 if (!atomic_inc_not_zero(&n->refcnt))
408 NEIGH_CACHE_STAT_INC(tbl, hits);
411 rcu_read_unlock_bh();
414 EXPORT_SYMBOL(neigh_lookup);
416 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
420 int key_len = tbl->key_len;
422 struct neigh_hash_table *nht;
424 NEIGH_CACHE_STAT_INC(tbl, lookups);
427 nht = rcu_dereference_bh(tbl->nht);
428 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
430 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
432 n = rcu_dereference_bh(n->next)) {
433 if (!memcmp(n->primary_key, pkey, key_len) &&
434 net_eq(dev_net(n->dev), net)) {
435 if (!atomic_inc_not_zero(&n->refcnt))
437 NEIGH_CACHE_STAT_INC(tbl, hits);
442 rcu_read_unlock_bh();
445 EXPORT_SYMBOL(neigh_lookup_nodev);
447 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
448 struct net_device *dev, bool want_ref)
451 int key_len = tbl->key_len;
453 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
454 struct neigh_hash_table *nht;
457 rc = ERR_PTR(-ENOBUFS);
461 memcpy(n->primary_key, pkey, key_len);
465 /* Protocol specific setup. */
466 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
468 goto out_neigh_release;
471 if (dev->netdev_ops->ndo_neigh_construct) {
472 error = dev->netdev_ops->ndo_neigh_construct(n);
475 goto out_neigh_release;
479 /* Device specific setup. */
480 if (n->parms->neigh_setup &&
481 (error = n->parms->neigh_setup(n)) < 0) {
483 goto out_neigh_release;
486 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
488 write_lock_bh(&tbl->lock);
489 nht = rcu_dereference_protected(tbl->nht,
490 lockdep_is_held(&tbl->lock));
492 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
493 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
495 hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
497 if (n->parms->dead) {
498 rc = ERR_PTR(-EINVAL);
502 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
503 lockdep_is_held(&tbl->lock));
505 n1 = rcu_dereference_protected(n1->next,
506 lockdep_is_held(&tbl->lock))) {
507 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
518 rcu_assign_pointer(n->next,
519 rcu_dereference_protected(nht->hash_buckets[hash_val],
520 lockdep_is_held(&tbl->lock)));
521 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
522 write_unlock_bh(&tbl->lock);
523 neigh_dbg(2, "neigh %p is created\n", n);
528 write_unlock_bh(&tbl->lock);
533 EXPORT_SYMBOL(__neigh_create);
535 static u32 pneigh_hash(const void *pkey, int key_len)
537 u32 hash_val = *(u32 *)(pkey + key_len - 4);
538 hash_val ^= (hash_val >> 16);
539 hash_val ^= hash_val >> 8;
540 hash_val ^= hash_val >> 4;
541 hash_val &= PNEIGH_HASHMASK;
545 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
549 struct net_device *dev)
552 if (!memcmp(n->key, pkey, key_len) &&
553 net_eq(pneigh_net(n), net) &&
554 (n->dev == dev || !n->dev))
561 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
562 struct net *net, const void *pkey, struct net_device *dev)
564 int key_len = tbl->key_len;
565 u32 hash_val = pneigh_hash(pkey, key_len);
567 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
568 net, pkey, key_len, dev);
570 EXPORT_SYMBOL_GPL(__pneigh_lookup);
572 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
573 struct net *net, const void *pkey,
574 struct net_device *dev, int creat)
576 struct pneigh_entry *n;
577 int key_len = tbl->key_len;
578 u32 hash_val = pneigh_hash(pkey, key_len);
580 read_lock_bh(&tbl->lock);
581 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
582 net, pkey, key_len, dev);
583 read_unlock_bh(&tbl->lock);
590 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
594 write_pnet(&n->net, net);
595 memcpy(n->key, pkey, key_len);
600 if (tbl->pconstructor && tbl->pconstructor(n)) {
608 write_lock_bh(&tbl->lock);
609 n->next = tbl->phash_buckets[hash_val];
610 tbl->phash_buckets[hash_val] = n;
611 write_unlock_bh(&tbl->lock);
615 EXPORT_SYMBOL(pneigh_lookup);
618 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
619 struct net_device *dev)
621 struct pneigh_entry *n, **np;
622 int key_len = tbl->key_len;
623 u32 hash_val = pneigh_hash(pkey, key_len);
625 write_lock_bh(&tbl->lock);
626 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
628 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
629 net_eq(pneigh_net(n), net)) {
631 write_unlock_bh(&tbl->lock);
632 if (tbl->pdestructor)
640 write_unlock_bh(&tbl->lock);
644 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
646 struct pneigh_entry *n, **np;
649 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
650 np = &tbl->phash_buckets[h];
651 while ((n = *np) != NULL) {
652 if (!dev || n->dev == dev) {
654 if (tbl->pdestructor)
667 static void neigh_parms_destroy(struct neigh_parms *parms);
669 static inline void neigh_parms_put(struct neigh_parms *parms)
671 if (atomic_dec_and_test(&parms->refcnt))
672 neigh_parms_destroy(parms);
676 * neighbour must already be out of the table;
679 void neigh_destroy(struct neighbour *neigh)
681 struct net_device *dev = neigh->dev;
683 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
686 pr_warn("Destroying alive neighbour %p\n", neigh);
691 if (neigh_del_timer(neigh))
692 pr_warn("Impossible event\n");
694 write_lock_bh(&neigh->lock);
695 __skb_queue_purge(&neigh->arp_queue);
696 write_unlock_bh(&neigh->lock);
697 neigh->arp_queue_len_bytes = 0;
699 if (dev->netdev_ops->ndo_neigh_destroy)
700 dev->netdev_ops->ndo_neigh_destroy(neigh);
703 neigh_parms_put(neigh->parms);
705 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
707 atomic_dec(&neigh->tbl->entries);
708 kfree_rcu(neigh, rcu);
710 EXPORT_SYMBOL(neigh_destroy);
712 /* Neighbour state is suspicious;
715 Called with write_locked neigh.
717 static void neigh_suspect(struct neighbour *neigh)
719 neigh_dbg(2, "neigh %p is suspected\n", neigh);
721 neigh->output = neigh->ops->output;
724 /* Neighbour state is OK;
727 Called with write_locked neigh.
729 static void neigh_connect(struct neighbour *neigh)
731 neigh_dbg(2, "neigh %p is connected\n", neigh);
733 neigh->output = neigh->ops->connected_output;
736 static void neigh_periodic_work(struct work_struct *work)
738 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
740 struct neighbour __rcu **np;
742 struct neigh_hash_table *nht;
744 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
746 write_lock_bh(&tbl->lock);
747 nht = rcu_dereference_protected(tbl->nht,
748 lockdep_is_held(&tbl->lock));
751 * periodically recompute ReachableTime from random function
754 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
755 struct neigh_parms *p;
756 tbl->last_rand = jiffies;
757 list_for_each_entry(p, &tbl->parms_list, list)
759 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
762 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
765 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
766 np = &nht->hash_buckets[i];
768 while ((n = rcu_dereference_protected(*np,
769 lockdep_is_held(&tbl->lock))) != NULL) {
772 write_lock(&n->lock);
774 state = n->nud_state;
775 if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
776 write_unlock(&n->lock);
780 if (time_before(n->used, n->confirmed))
781 n->used = n->confirmed;
783 if (atomic_read(&n->refcnt) == 1 &&
784 (state == NUD_FAILED ||
785 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
788 write_unlock(&n->lock);
789 neigh_cleanup_and_release(n);
792 write_unlock(&n->lock);
798 * It's fine to release lock here, even if hash table
799 * grows while we are preempted.
801 write_unlock_bh(&tbl->lock);
803 write_lock_bh(&tbl->lock);
804 nht = rcu_dereference_protected(tbl->nht,
805 lockdep_is_held(&tbl->lock));
808 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
809 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
810 * BASE_REACHABLE_TIME.
812 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
813 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
814 write_unlock_bh(&tbl->lock);
817 static __inline__ int neigh_max_probes(struct neighbour *n)
819 struct neigh_parms *p = n->parms;
820 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
821 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
822 NEIGH_VAR(p, MCAST_PROBES));
825 static void neigh_invalidate(struct neighbour *neigh)
826 __releases(neigh->lock)
827 __acquires(neigh->lock)
831 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
832 neigh_dbg(2, "neigh %p is failed\n", neigh);
833 neigh->updated = jiffies;
835 /* It is very thin place. report_unreachable is very complicated
836 routine. Particularly, it can hit the same neighbour entry!
838 So that, we try to be accurate and avoid dead loop. --ANK
840 while (neigh->nud_state == NUD_FAILED &&
841 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
842 write_unlock(&neigh->lock);
843 neigh->ops->error_report(neigh, skb);
844 write_lock(&neigh->lock);
846 __skb_queue_purge(&neigh->arp_queue);
847 neigh->arp_queue_len_bytes = 0;
850 static void neigh_probe(struct neighbour *neigh)
851 __releases(neigh->lock)
853 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
854 /* keep skb alive even if arp_queue overflows */
856 skb = skb_copy(skb, GFP_ATOMIC);
857 write_unlock(&neigh->lock);
858 neigh->ops->solicit(neigh, skb);
859 atomic_inc(&neigh->probes);
863 /* Called when a timer expires for a neighbour entry. */
865 static void neigh_timer_handler(unsigned long arg)
867 unsigned long now, next;
868 struct neighbour *neigh = (struct neighbour *)arg;
872 write_lock(&neigh->lock);
874 state = neigh->nud_state;
878 if (!(state & NUD_IN_TIMER))
881 if (state & NUD_REACHABLE) {
882 if (time_before_eq(now,
883 neigh->confirmed + neigh->parms->reachable_time)) {
884 neigh_dbg(2, "neigh %p is still alive\n", neigh);
885 next = neigh->confirmed + neigh->parms->reachable_time;
886 } else if (time_before_eq(now,
888 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
889 neigh_dbg(2, "neigh %p is delayed\n", neigh);
890 neigh->nud_state = NUD_DELAY;
891 neigh->updated = jiffies;
892 neigh_suspect(neigh);
893 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
895 neigh_dbg(2, "neigh %p is suspected\n", neigh);
896 neigh->nud_state = NUD_STALE;
897 neigh->updated = jiffies;
898 neigh_suspect(neigh);
901 } else if (state & NUD_DELAY) {
902 if (time_before_eq(now,
904 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
905 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
906 neigh->nud_state = NUD_REACHABLE;
907 neigh->updated = jiffies;
908 neigh_connect(neigh);
910 next = neigh->confirmed + neigh->parms->reachable_time;
912 neigh_dbg(2, "neigh %p is probed\n", neigh);
913 neigh->nud_state = NUD_PROBE;
914 neigh->updated = jiffies;
915 atomic_set(&neigh->probes, 0);
916 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
919 /* NUD_PROBE|NUD_INCOMPLETE */
920 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
923 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
924 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
925 neigh->nud_state = NUD_FAILED;
927 neigh_invalidate(neigh);
931 if (neigh->nud_state & NUD_IN_TIMER) {
932 if (time_before(next, jiffies + HZ/2))
933 next = jiffies + HZ/2;
934 if (!mod_timer(&neigh->timer, next))
937 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
941 write_unlock(&neigh->lock);
945 neigh_update_notify(neigh);
947 neigh_release(neigh);
950 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
953 bool immediate_probe = false;
955 write_lock_bh(&neigh->lock);
958 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
961 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
962 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
963 NEIGH_VAR(neigh->parms, APP_PROBES)) {
964 unsigned long next, now = jiffies;
966 atomic_set(&neigh->probes,
967 NEIGH_VAR(neigh->parms, UCAST_PROBES));
968 neigh->nud_state = NUD_INCOMPLETE;
969 neigh->updated = now;
970 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
972 neigh_add_timer(neigh, next);
973 immediate_probe = true;
975 neigh->nud_state = NUD_FAILED;
976 neigh->updated = jiffies;
977 write_unlock_bh(&neigh->lock);
982 } else if (neigh->nud_state & NUD_STALE) {
983 neigh_dbg(2, "neigh %p is delayed\n", neigh);
984 neigh->nud_state = NUD_DELAY;
985 neigh->updated = jiffies;
986 neigh_add_timer(neigh, jiffies +
987 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
990 if (neigh->nud_state == NUD_INCOMPLETE) {
992 while (neigh->arp_queue_len_bytes + skb->truesize >
993 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
994 struct sk_buff *buff;
996 buff = __skb_dequeue(&neigh->arp_queue);
999 neigh->arp_queue_len_bytes -= buff->truesize;
1001 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1004 __skb_queue_tail(&neigh->arp_queue, skb);
1005 neigh->arp_queue_len_bytes += skb->truesize;
1010 if (immediate_probe)
1013 write_unlock(&neigh->lock);
1017 EXPORT_SYMBOL(__neigh_event_send);
1019 static void neigh_update_hhs(struct neighbour *neigh)
1021 struct hh_cache *hh;
1022 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1025 if (neigh->dev->header_ops)
1026 update = neigh->dev->header_ops->cache_update;
1031 write_seqlock_bh(&hh->hh_lock);
1032 update(hh, neigh->dev, neigh->ha);
1033 write_sequnlock_bh(&hh->hh_lock);
1040 /* Generic update routine.
1041 -- lladdr is new lladdr or NULL, if it is not supplied.
1042 -- new is new state.
1044 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1046 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1047 lladdr instead of overriding it
1049 It also allows to retain current state
1050 if lladdr is unchanged.
1051 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1053 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1055 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1058 Caller MUST hold reference count on the entry.
1061 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1067 struct net_device *dev;
1068 int update_isrouter = 0;
1070 write_lock_bh(&neigh->lock);
1073 old = neigh->nud_state;
1076 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1077 (old & (NUD_NOARP | NUD_PERMANENT)))
1080 if (!(new & NUD_VALID)) {
1081 neigh_del_timer(neigh);
1082 if (old & NUD_CONNECTED)
1083 neigh_suspect(neigh);
1084 neigh->nud_state = new;
1086 notify = old & NUD_VALID;
1087 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1088 (new & NUD_FAILED)) {
1089 neigh_invalidate(neigh);
1095 /* Compare new lladdr with cached one */
1096 if (!dev->addr_len) {
1097 /* First case: device needs no address. */
1099 } else if (lladdr) {
1100 /* The second case: if something is already cached
1101 and a new address is proposed:
1103 - if they are different, check override flag
1105 if ((old & NUD_VALID) &&
1106 !memcmp(lladdr, neigh->ha, dev->addr_len))
1109 /* No address is supplied; if we know something,
1110 use it, otherwise discard the request.
1113 if (!(old & NUD_VALID))
1118 if (new & NUD_CONNECTED)
1119 neigh->confirmed = jiffies;
1120 neigh->updated = jiffies;
1122 /* If entry was valid and address is not changed,
1123 do not change entry state, if new one is STALE.
1126 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1127 if (old & NUD_VALID) {
1128 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1129 update_isrouter = 0;
1130 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1131 (old & NUD_CONNECTED)) {
1137 if (lladdr == neigh->ha && new == NUD_STALE &&
1138 ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1139 (old & NUD_CONNECTED))
1146 neigh_del_timer(neigh);
1147 if (new & NUD_IN_TIMER)
1148 neigh_add_timer(neigh, (jiffies +
1149 ((new & NUD_REACHABLE) ?
1150 neigh->parms->reachable_time :
1152 neigh->nud_state = new;
1156 if (lladdr != neigh->ha) {
1157 write_seqlock(&neigh->ha_lock);
1158 memcpy(&neigh->ha, lladdr, dev->addr_len);
1159 write_sequnlock(&neigh->ha_lock);
1160 neigh_update_hhs(neigh);
1161 if (!(new & NUD_CONNECTED))
1162 neigh->confirmed = jiffies -
1163 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1168 if (new & NUD_CONNECTED)
1169 neigh_connect(neigh);
1171 neigh_suspect(neigh);
1172 if (!(old & NUD_VALID)) {
1173 struct sk_buff *skb;
1175 /* Again: avoid dead loop if something went wrong */
1177 while (neigh->nud_state & NUD_VALID &&
1178 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1179 struct dst_entry *dst = skb_dst(skb);
1180 struct neighbour *n2, *n1 = neigh;
1181 write_unlock_bh(&neigh->lock);
1185 /* Why not just use 'neigh' as-is? The problem is that
1186 * things such as shaper, eql, and sch_teql can end up
1187 * using alternative, different, neigh objects to output
1188 * the packet in the output path. So what we need to do
1189 * here is re-lookup the top-level neigh in the path so
1190 * we can reinject the packet there.
1194 n2 = dst_neigh_lookup_skb(dst, skb);
1198 n1->output(n1, skb);
1203 write_lock_bh(&neigh->lock);
1205 __skb_queue_purge(&neigh->arp_queue);
1206 neigh->arp_queue_len_bytes = 0;
1209 if (update_isrouter) {
1210 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1211 (neigh->flags | NTF_ROUTER) :
1212 (neigh->flags & ~NTF_ROUTER);
1214 write_unlock_bh(&neigh->lock);
1217 neigh_update_notify(neigh);
1221 EXPORT_SYMBOL(neigh_update);
1223 /* Update the neigh to listen temporarily for probe responses, even if it is
1224 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1226 void __neigh_set_probe_once(struct neighbour *neigh)
1228 neigh->updated = jiffies;
1229 if (!(neigh->nud_state & NUD_FAILED))
1231 neigh->nud_state = NUD_INCOMPLETE;
1232 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1233 neigh_add_timer(neigh,
1234 jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1236 EXPORT_SYMBOL(__neigh_set_probe_once);
1238 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1239 u8 *lladdr, void *saddr,
1240 struct net_device *dev)
1242 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1243 lladdr || !dev->addr_len);
1245 neigh_update(neigh, lladdr, NUD_STALE,
1246 NEIGH_UPDATE_F_OVERRIDE);
1249 EXPORT_SYMBOL(neigh_event_ns);
1251 /* called with read_lock_bh(&n->lock); */
1252 static void neigh_hh_init(struct neighbour *n)
1254 struct net_device *dev = n->dev;
1255 __be16 prot = n->tbl->protocol;
1256 struct hh_cache *hh = &n->hh;
1258 write_lock_bh(&n->lock);
1260 /* Only one thread can come in here and initialize the
1264 dev->header_ops->cache(n, hh, prot);
1266 write_unlock_bh(&n->lock);
1269 /* Slow and careful. */
1271 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1275 if (!neigh_event_send(neigh, skb)) {
1277 struct net_device *dev = neigh->dev;
1280 if (dev->header_ops->cache && !neigh->hh.hh_len)
1281 neigh_hh_init(neigh);
1284 __skb_pull(skb, skb_network_offset(skb));
1285 seq = read_seqbegin(&neigh->ha_lock);
1286 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1287 neigh->ha, NULL, skb->len);
1288 } while (read_seqretry(&neigh->ha_lock, seq));
1291 rc = dev_queue_xmit(skb);
1302 EXPORT_SYMBOL(neigh_resolve_output);
1304 /* As fast as possible without hh cache */
1306 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1308 struct net_device *dev = neigh->dev;
1313 __skb_pull(skb, skb_network_offset(skb));
1314 seq = read_seqbegin(&neigh->ha_lock);
1315 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1316 neigh->ha, NULL, skb->len);
1317 } while (read_seqretry(&neigh->ha_lock, seq));
1320 err = dev_queue_xmit(skb);
1327 EXPORT_SYMBOL(neigh_connected_output);
1329 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1331 return dev_queue_xmit(skb);
1333 EXPORT_SYMBOL(neigh_direct_output);
1335 static void neigh_proxy_process(unsigned long arg)
1337 struct neigh_table *tbl = (struct neigh_table *)arg;
1338 long sched_next = 0;
1339 unsigned long now = jiffies;
1340 struct sk_buff *skb, *n;
1342 spin_lock(&tbl->proxy_queue.lock);
1344 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1345 long tdif = NEIGH_CB(skb)->sched_next - now;
1348 struct net_device *dev = skb->dev;
1350 __skb_unlink(skb, &tbl->proxy_queue);
1351 if (tbl->proxy_redo && netif_running(dev)) {
1353 tbl->proxy_redo(skb);
1360 } else if (!sched_next || tdif < sched_next)
1363 del_timer(&tbl->proxy_timer);
1365 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1366 spin_unlock(&tbl->proxy_queue.lock);
1369 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1370 struct sk_buff *skb)
1372 unsigned long now = jiffies;
1374 unsigned long sched_next = now + (prandom_u32() %
1375 NEIGH_VAR(p, PROXY_DELAY));
1377 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1382 NEIGH_CB(skb)->sched_next = sched_next;
1383 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1385 spin_lock(&tbl->proxy_queue.lock);
1386 if (del_timer(&tbl->proxy_timer)) {
1387 if (time_before(tbl->proxy_timer.expires, sched_next))
1388 sched_next = tbl->proxy_timer.expires;
1392 __skb_queue_tail(&tbl->proxy_queue, skb);
1393 mod_timer(&tbl->proxy_timer, sched_next);
1394 spin_unlock(&tbl->proxy_queue.lock);
1396 EXPORT_SYMBOL(pneigh_enqueue);
1398 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1399 struct net *net, int ifindex)
1401 struct neigh_parms *p;
1403 list_for_each_entry(p, &tbl->parms_list, list) {
1404 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1405 (!p->dev && !ifindex && net_eq(net, &init_net)))
1412 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1413 struct neigh_table *tbl)
1415 struct neigh_parms *p;
1416 struct net *net = dev_net(dev);
1417 const struct net_device_ops *ops = dev->netdev_ops;
1419 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1422 atomic_set(&p->refcnt, 1);
1424 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1427 write_pnet(&p->net, net);
1428 p->sysctl_table = NULL;
1430 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1436 write_lock_bh(&tbl->lock);
1437 list_add(&p->list, &tbl->parms.list);
1438 write_unlock_bh(&tbl->lock);
1440 neigh_parms_data_state_cleanall(p);
1444 EXPORT_SYMBOL(neigh_parms_alloc);
1446 static void neigh_rcu_free_parms(struct rcu_head *head)
1448 struct neigh_parms *parms =
1449 container_of(head, struct neigh_parms, rcu_head);
1451 neigh_parms_put(parms);
1454 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1456 if (!parms || parms == &tbl->parms)
1458 write_lock_bh(&tbl->lock);
1459 list_del(&parms->list);
1461 write_unlock_bh(&tbl->lock);
1463 dev_put(parms->dev);
1464 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1466 EXPORT_SYMBOL(neigh_parms_release);
1468 static void neigh_parms_destroy(struct neigh_parms *parms)
1473 static struct lock_class_key neigh_table_proxy_queue_class;
1475 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1477 void neigh_table_init(int index, struct neigh_table *tbl)
1479 unsigned long now = jiffies;
1480 unsigned long phsize;
1482 INIT_LIST_HEAD(&tbl->parms_list);
1483 list_add(&tbl->parms.list, &tbl->parms_list);
1484 write_pnet(&tbl->parms.net, &init_net);
1485 atomic_set(&tbl->parms.refcnt, 1);
1486 tbl->parms.reachable_time =
1487 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1489 tbl->stats = alloc_percpu(struct neigh_statistics);
1491 panic("cannot create neighbour cache statistics");
1493 #ifdef CONFIG_PROC_FS
1494 if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1495 &neigh_stat_seq_fops, tbl))
1496 panic("cannot create neighbour proc dir entry");
1499 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1501 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1502 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1504 if (!tbl->nht || !tbl->phash_buckets)
1505 panic("cannot allocate neighbour cache hashes");
1507 if (!tbl->entry_size)
1508 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1509 tbl->key_len, NEIGH_PRIV_ALIGN);
1511 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1513 rwlock_init(&tbl->lock);
1514 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1515 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1516 tbl->parms.reachable_time);
1517 setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1518 skb_queue_head_init_class(&tbl->proxy_queue,
1519 &neigh_table_proxy_queue_class);
1521 tbl->last_flush = now;
1522 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1524 neigh_tables[index] = tbl;
1526 EXPORT_SYMBOL(neigh_table_init);
1528 int neigh_table_clear(int index, struct neigh_table *tbl)
1530 neigh_tables[index] = NULL;
1531 /* It is not clean... Fix it to unload IPv6 module safely */
1532 cancel_delayed_work_sync(&tbl->gc_work);
1533 del_timer_sync(&tbl->proxy_timer);
1534 pneigh_queue_purge(&tbl->proxy_queue);
1535 neigh_ifdown(tbl, NULL);
1536 if (atomic_read(&tbl->entries))
1537 pr_crit("neighbour leakage\n");
1539 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1540 neigh_hash_free_rcu);
1543 kfree(tbl->phash_buckets);
1544 tbl->phash_buckets = NULL;
1546 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1548 free_percpu(tbl->stats);
1553 EXPORT_SYMBOL(neigh_table_clear);
1555 static struct neigh_table *neigh_find_table(int family)
1557 struct neigh_table *tbl = NULL;
1561 tbl = neigh_tables[NEIGH_ARP_TABLE];
1564 tbl = neigh_tables[NEIGH_ND_TABLE];
1567 tbl = neigh_tables[NEIGH_DN_TABLE];
1574 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1576 struct net *net = sock_net(skb->sk);
1578 struct nlattr *dst_attr;
1579 struct neigh_table *tbl;
1580 struct neighbour *neigh;
1581 struct net_device *dev = NULL;
1585 if (nlmsg_len(nlh) < sizeof(*ndm))
1588 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1589 if (dst_attr == NULL)
1592 ndm = nlmsg_data(nlh);
1593 if (ndm->ndm_ifindex) {
1594 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1601 tbl = neigh_find_table(ndm->ndm_family);
1603 return -EAFNOSUPPORT;
1605 if (nla_len(dst_attr) < tbl->key_len)
1608 if (ndm->ndm_flags & NTF_PROXY) {
1609 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1616 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1617 if (neigh == NULL) {
1622 err = neigh_update(neigh, NULL, NUD_FAILED,
1623 NEIGH_UPDATE_F_OVERRIDE |
1624 NEIGH_UPDATE_F_ADMIN);
1625 neigh_release(neigh);
1631 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1633 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1634 struct net *net = sock_net(skb->sk);
1636 struct nlattr *tb[NDA_MAX+1];
1637 struct neigh_table *tbl;
1638 struct net_device *dev = NULL;
1639 struct neighbour *neigh;
1644 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1649 if (tb[NDA_DST] == NULL)
1652 ndm = nlmsg_data(nlh);
1653 if (ndm->ndm_ifindex) {
1654 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1660 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1664 tbl = neigh_find_table(ndm->ndm_family);
1666 return -EAFNOSUPPORT;
1668 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1670 dst = nla_data(tb[NDA_DST]);
1671 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1673 if (ndm->ndm_flags & NTF_PROXY) {
1674 struct pneigh_entry *pn;
1677 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1679 pn->flags = ndm->ndm_flags;
1688 neigh = neigh_lookup(tbl, dst, dev);
1689 if (neigh == NULL) {
1690 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1695 neigh = __neigh_lookup_errno(tbl, dst, dev);
1696 if (IS_ERR(neigh)) {
1697 err = PTR_ERR(neigh);
1701 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1703 neigh_release(neigh);
1707 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1708 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1711 if (ndm->ndm_flags & NTF_USE) {
1712 neigh_event_send(neigh, NULL);
1715 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1716 neigh_release(neigh);
1722 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1724 struct nlattr *nest;
1726 nest = nla_nest_start(skb, NDTA_PARMS);
1731 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1732 nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1733 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1734 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1735 /* approximative value for deprecated QUEUE_LEN (in packets) */
1736 nla_put_u32(skb, NDTPA_QUEUE_LEN,
1737 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1738 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1739 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1740 nla_put_u32(skb, NDTPA_UCAST_PROBES,
1741 NEIGH_VAR(parms, UCAST_PROBES)) ||
1742 nla_put_u32(skb, NDTPA_MCAST_PROBES,
1743 NEIGH_VAR(parms, MCAST_PROBES)) ||
1744 nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1745 NEIGH_VAR(parms, MCAST_REPROBES)) ||
1746 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1747 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1748 NEIGH_VAR(parms, BASE_REACHABLE_TIME)) ||
1749 nla_put_msecs(skb, NDTPA_GC_STALETIME,
1750 NEIGH_VAR(parms, GC_STALETIME)) ||
1751 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1752 NEIGH_VAR(parms, DELAY_PROBE_TIME)) ||
1753 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1754 NEIGH_VAR(parms, RETRANS_TIME)) ||
1755 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1756 NEIGH_VAR(parms, ANYCAST_DELAY)) ||
1757 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1758 NEIGH_VAR(parms, PROXY_DELAY)) ||
1759 nla_put_msecs(skb, NDTPA_LOCKTIME,
1760 NEIGH_VAR(parms, LOCKTIME)))
1761 goto nla_put_failure;
1762 return nla_nest_end(skb, nest);
1765 nla_nest_cancel(skb, nest);
1769 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1770 u32 pid, u32 seq, int type, int flags)
1772 struct nlmsghdr *nlh;
1773 struct ndtmsg *ndtmsg;
1775 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1779 ndtmsg = nlmsg_data(nlh);
1781 read_lock_bh(&tbl->lock);
1782 ndtmsg->ndtm_family = tbl->family;
1783 ndtmsg->ndtm_pad1 = 0;
1784 ndtmsg->ndtm_pad2 = 0;
1786 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1787 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1788 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1789 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1790 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1791 goto nla_put_failure;
1793 unsigned long now = jiffies;
1794 unsigned int flush_delta = now - tbl->last_flush;
1795 unsigned int rand_delta = now - tbl->last_rand;
1796 struct neigh_hash_table *nht;
1797 struct ndt_config ndc = {
1798 .ndtc_key_len = tbl->key_len,
1799 .ndtc_entry_size = tbl->entry_size,
1800 .ndtc_entries = atomic_read(&tbl->entries),
1801 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
1802 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
1803 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
1807 nht = rcu_dereference_bh(tbl->nht);
1808 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1809 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1810 rcu_read_unlock_bh();
1812 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1813 goto nla_put_failure;
1818 struct ndt_stats ndst;
1820 memset(&ndst, 0, sizeof(ndst));
1822 for_each_possible_cpu(cpu) {
1823 struct neigh_statistics *st;
1825 st = per_cpu_ptr(tbl->stats, cpu);
1826 ndst.ndts_allocs += st->allocs;
1827 ndst.ndts_destroys += st->destroys;
1828 ndst.ndts_hash_grows += st->hash_grows;
1829 ndst.ndts_res_failed += st->res_failed;
1830 ndst.ndts_lookups += st->lookups;
1831 ndst.ndts_hits += st->hits;
1832 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
1833 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
1834 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
1835 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
1838 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1839 goto nla_put_failure;
1842 BUG_ON(tbl->parms.dev);
1843 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1844 goto nla_put_failure;
1846 read_unlock_bh(&tbl->lock);
1847 nlmsg_end(skb, nlh);
1851 read_unlock_bh(&tbl->lock);
1852 nlmsg_cancel(skb, nlh);
1856 static int neightbl_fill_param_info(struct sk_buff *skb,
1857 struct neigh_table *tbl,
1858 struct neigh_parms *parms,
1859 u32 pid, u32 seq, int type,
1862 struct ndtmsg *ndtmsg;
1863 struct nlmsghdr *nlh;
1865 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1869 ndtmsg = nlmsg_data(nlh);
1871 read_lock_bh(&tbl->lock);
1872 ndtmsg->ndtm_family = tbl->family;
1873 ndtmsg->ndtm_pad1 = 0;
1874 ndtmsg->ndtm_pad2 = 0;
1876 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1877 neightbl_fill_parms(skb, parms) < 0)
1880 read_unlock_bh(&tbl->lock);
1881 nlmsg_end(skb, nlh);
1884 read_unlock_bh(&tbl->lock);
1885 nlmsg_cancel(skb, nlh);
1889 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1890 [NDTA_NAME] = { .type = NLA_STRING },
1891 [NDTA_THRESH1] = { .type = NLA_U32 },
1892 [NDTA_THRESH2] = { .type = NLA_U32 },
1893 [NDTA_THRESH3] = { .type = NLA_U32 },
1894 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
1895 [NDTA_PARMS] = { .type = NLA_NESTED },
1898 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1899 [NDTPA_IFINDEX] = { .type = NLA_U32 },
1900 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
1901 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
1902 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
1903 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
1904 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
1905 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
1906 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
1907 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
1908 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
1909 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
1910 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
1911 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
1912 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
1915 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1917 struct net *net = sock_net(skb->sk);
1918 struct neigh_table *tbl;
1919 struct ndtmsg *ndtmsg;
1920 struct nlattr *tb[NDTA_MAX+1];
1924 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1925 nl_neightbl_policy);
1929 if (tb[NDTA_NAME] == NULL) {
1934 ndtmsg = nlmsg_data(nlh);
1936 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
1937 tbl = neigh_tables[tidx];
1940 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1942 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
1952 * We acquire tbl->lock to be nice to the periodic timers and
1953 * make sure they always see a consistent set of values.
1955 write_lock_bh(&tbl->lock);
1957 if (tb[NDTA_PARMS]) {
1958 struct nlattr *tbp[NDTPA_MAX+1];
1959 struct neigh_parms *p;
1962 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1963 nl_ntbl_parm_policy);
1965 goto errout_tbl_lock;
1967 if (tbp[NDTPA_IFINDEX])
1968 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1970 p = lookup_neigh_parms(tbl, net, ifindex);
1973 goto errout_tbl_lock;
1976 for (i = 1; i <= NDTPA_MAX; i++) {
1981 case NDTPA_QUEUE_LEN:
1982 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
1983 nla_get_u32(tbp[i]) *
1984 SKB_TRUESIZE(ETH_FRAME_LEN));
1986 case NDTPA_QUEUE_LENBYTES:
1987 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
1988 nla_get_u32(tbp[i]));
1990 case NDTPA_PROXY_QLEN:
1991 NEIGH_VAR_SET(p, PROXY_QLEN,
1992 nla_get_u32(tbp[i]));
1994 case NDTPA_APP_PROBES:
1995 NEIGH_VAR_SET(p, APP_PROBES,
1996 nla_get_u32(tbp[i]));
1998 case NDTPA_UCAST_PROBES:
1999 NEIGH_VAR_SET(p, UCAST_PROBES,
2000 nla_get_u32(tbp[i]));
2002 case NDTPA_MCAST_PROBES:
2003 NEIGH_VAR_SET(p, MCAST_PROBES,
2004 nla_get_u32(tbp[i]));
2006 case NDTPA_MCAST_REPROBES:
2007 NEIGH_VAR_SET(p, MCAST_REPROBES,
2008 nla_get_u32(tbp[i]));
2010 case NDTPA_BASE_REACHABLE_TIME:
2011 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2012 nla_get_msecs(tbp[i]));
2013 /* update reachable_time as well, otherwise, the change will
2014 * only be effective after the next time neigh_periodic_work
2015 * decides to recompute it (can be multiple minutes)
2018 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2020 case NDTPA_GC_STALETIME:
2021 NEIGH_VAR_SET(p, GC_STALETIME,
2022 nla_get_msecs(tbp[i]));
2024 case NDTPA_DELAY_PROBE_TIME:
2025 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2026 nla_get_msecs(tbp[i]));
2028 case NDTPA_RETRANS_TIME:
2029 NEIGH_VAR_SET(p, RETRANS_TIME,
2030 nla_get_msecs(tbp[i]));
2032 case NDTPA_ANYCAST_DELAY:
2033 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2034 nla_get_msecs(tbp[i]));
2036 case NDTPA_PROXY_DELAY:
2037 NEIGH_VAR_SET(p, PROXY_DELAY,
2038 nla_get_msecs(tbp[i]));
2040 case NDTPA_LOCKTIME:
2041 NEIGH_VAR_SET(p, LOCKTIME,
2042 nla_get_msecs(tbp[i]));
2049 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2050 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2051 !net_eq(net, &init_net))
2052 goto errout_tbl_lock;
2054 if (tb[NDTA_THRESH1])
2055 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2057 if (tb[NDTA_THRESH2])
2058 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2060 if (tb[NDTA_THRESH3])
2061 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2063 if (tb[NDTA_GC_INTERVAL])
2064 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2069 write_unlock_bh(&tbl->lock);
2074 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2076 struct net *net = sock_net(skb->sk);
2077 int family, tidx, nidx = 0;
2078 int tbl_skip = cb->args[0];
2079 int neigh_skip = cb->args[1];
2080 struct neigh_table *tbl;
2082 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2084 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2085 struct neigh_parms *p;
2087 tbl = neigh_tables[tidx];
2091 if (tidx < tbl_skip || (family && tbl->family != family))
2094 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2095 cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2100 p = list_next_entry(&tbl->parms, list);
2101 list_for_each_entry_from(p, &tbl->parms_list, list) {
2102 if (!net_eq(neigh_parms_net(p), net))
2105 if (nidx < neigh_skip)
2108 if (neightbl_fill_param_info(skb, tbl, p,
2109 NETLINK_CB(cb->skb).portid,
2127 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2128 u32 pid, u32 seq, int type, unsigned int flags)
2130 unsigned long now = jiffies;
2131 struct nda_cacheinfo ci;
2132 struct nlmsghdr *nlh;
2135 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2139 ndm = nlmsg_data(nlh);
2140 ndm->ndm_family = neigh->ops->family;
2143 ndm->ndm_flags = neigh->flags;
2144 ndm->ndm_type = neigh->type;
2145 ndm->ndm_ifindex = neigh->dev->ifindex;
2147 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2148 goto nla_put_failure;
2150 read_lock_bh(&neigh->lock);
2151 ndm->ndm_state = neigh->nud_state;
2152 if (neigh->nud_state & NUD_VALID) {
2153 char haddr[MAX_ADDR_LEN];
2155 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2156 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2157 read_unlock_bh(&neigh->lock);
2158 goto nla_put_failure;
2162 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2163 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2164 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2165 ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
2166 read_unlock_bh(&neigh->lock);
2168 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2169 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2170 goto nla_put_failure;
2172 nlmsg_end(skb, nlh);
2176 nlmsg_cancel(skb, nlh);
2180 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2181 u32 pid, u32 seq, int type, unsigned int flags,
2182 struct neigh_table *tbl)
2184 struct nlmsghdr *nlh;
2187 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2191 ndm = nlmsg_data(nlh);
2192 ndm->ndm_family = tbl->family;
2195 ndm->ndm_flags = pn->flags | NTF_PROXY;
2196 ndm->ndm_type = RTN_UNICAST;
2197 ndm->ndm_ifindex = pn->dev->ifindex;
2198 ndm->ndm_state = NUD_NONE;
2200 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2201 goto nla_put_failure;
2203 nlmsg_end(skb, nlh);
2207 nlmsg_cancel(skb, nlh);
2211 static void neigh_update_notify(struct neighbour *neigh)
2213 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2214 __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2217 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2218 struct netlink_callback *cb)
2220 struct net *net = sock_net(skb->sk);
2221 struct neighbour *n;
2222 int rc, h, s_h = cb->args[1];
2223 int idx, s_idx = idx = cb->args[2];
2224 struct neigh_hash_table *nht;
2227 nht = rcu_dereference_bh(tbl->nht);
2229 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2232 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2234 n = rcu_dereference_bh(n->next)) {
2235 if (!net_eq(dev_net(n->dev), net))
2239 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2252 rcu_read_unlock_bh();
2258 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2259 struct netlink_callback *cb)
2261 struct pneigh_entry *n;
2262 struct net *net = sock_net(skb->sk);
2263 int rc, h, s_h = cb->args[3];
2264 int idx, s_idx = idx = cb->args[4];
2266 read_lock_bh(&tbl->lock);
2268 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2271 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2272 if (dev_net(n->dev) != net)
2276 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2279 NLM_F_MULTI, tbl) < 0) {
2280 read_unlock_bh(&tbl->lock);
2289 read_unlock_bh(&tbl->lock);
2298 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2300 struct neigh_table *tbl;
2305 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2307 /* check for full ndmsg structure presence, family member is
2308 * the same for both structures
2310 if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2311 ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2316 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2317 tbl = neigh_tables[t];
2321 if (t < s_t || (family && tbl->family != family))
2324 memset(&cb->args[1], 0, sizeof(cb->args) -
2325 sizeof(cb->args[0]));
2327 err = pneigh_dump_table(tbl, skb, cb);
2329 err = neigh_dump_table(tbl, skb, cb);
2338 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2341 struct neigh_hash_table *nht;
2344 nht = rcu_dereference_bh(tbl->nht);
2346 read_lock(&tbl->lock); /* avoid resizes */
2347 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2348 struct neighbour *n;
2350 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2352 n = rcu_dereference_bh(n->next))
2355 read_unlock(&tbl->lock);
2356 rcu_read_unlock_bh();
2358 EXPORT_SYMBOL(neigh_for_each);
2360 /* The tbl->lock must be held as a writer and BH disabled. */
2361 void __neigh_for_each_release(struct neigh_table *tbl,
2362 int (*cb)(struct neighbour *))
2365 struct neigh_hash_table *nht;
2367 nht = rcu_dereference_protected(tbl->nht,
2368 lockdep_is_held(&tbl->lock));
2369 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2370 struct neighbour *n;
2371 struct neighbour __rcu **np;
2373 np = &nht->hash_buckets[chain];
2374 while ((n = rcu_dereference_protected(*np,
2375 lockdep_is_held(&tbl->lock))) != NULL) {
2378 write_lock(&n->lock);
2381 rcu_assign_pointer(*np,
2382 rcu_dereference_protected(n->next,
2383 lockdep_is_held(&tbl->lock)));
2387 write_unlock(&n->lock);
2389 neigh_cleanup_and_release(n);
2393 EXPORT_SYMBOL(__neigh_for_each_release);
2395 int neigh_xmit(int index, struct net_device *dev,
2396 const void *addr, struct sk_buff *skb)
2398 int err = -EAFNOSUPPORT;
2399 if (likely(index < NEIGH_NR_TABLES)) {
2400 struct neigh_table *tbl;
2401 struct neighbour *neigh;
2403 tbl = neigh_tables[index];
2406 neigh = __neigh_lookup_noref(tbl, addr, dev);
2408 neigh = __neigh_create(tbl, addr, dev, false);
2409 err = PTR_ERR(neigh);
2412 err = neigh->output(neigh, skb);
2414 else if (index == NEIGH_LINK_TABLE) {
2415 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2416 addr, NULL, skb->len);
2419 err = dev_queue_xmit(skb);
2427 EXPORT_SYMBOL(neigh_xmit);
2429 #ifdef CONFIG_PROC_FS
2431 static struct neighbour *neigh_get_first(struct seq_file *seq)
2433 struct neigh_seq_state *state = seq->private;
2434 struct net *net = seq_file_net(seq);
2435 struct neigh_hash_table *nht = state->nht;
2436 struct neighbour *n = NULL;
2437 int bucket = state->bucket;
2439 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2440 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2441 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2444 if (!net_eq(dev_net(n->dev), net))
2446 if (state->neigh_sub_iter) {
2450 v = state->neigh_sub_iter(state, n, &fakep);
2454 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2456 if (n->nud_state & ~NUD_NOARP)
2459 n = rcu_dereference_bh(n->next);
2465 state->bucket = bucket;
2470 static struct neighbour *neigh_get_next(struct seq_file *seq,
2471 struct neighbour *n,
2474 struct neigh_seq_state *state = seq->private;
2475 struct net *net = seq_file_net(seq);
2476 struct neigh_hash_table *nht = state->nht;
2478 if (state->neigh_sub_iter) {
2479 void *v = state->neigh_sub_iter(state, n, pos);
2483 n = rcu_dereference_bh(n->next);
2487 if (!net_eq(dev_net(n->dev), net))
2489 if (state->neigh_sub_iter) {
2490 void *v = state->neigh_sub_iter(state, n, pos);
2495 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2498 if (n->nud_state & ~NUD_NOARP)
2501 n = rcu_dereference_bh(n->next);
2507 if (++state->bucket >= (1 << nht->hash_shift))
2510 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2518 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2520 struct neighbour *n = neigh_get_first(seq);
2525 n = neigh_get_next(seq, n, pos);
2530 return *pos ? NULL : n;
2533 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2535 struct neigh_seq_state *state = seq->private;
2536 struct net *net = seq_file_net(seq);
2537 struct neigh_table *tbl = state->tbl;
2538 struct pneigh_entry *pn = NULL;
2539 int bucket = state->bucket;
2541 state->flags |= NEIGH_SEQ_IS_PNEIGH;
2542 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2543 pn = tbl->phash_buckets[bucket];
2544 while (pn && !net_eq(pneigh_net(pn), net))
2549 state->bucket = bucket;
2554 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2555 struct pneigh_entry *pn,
2558 struct neigh_seq_state *state = seq->private;
2559 struct net *net = seq_file_net(seq);
2560 struct neigh_table *tbl = state->tbl;
2564 } while (pn && !net_eq(pneigh_net(pn), net));
2567 if (++state->bucket > PNEIGH_HASHMASK)
2569 pn = tbl->phash_buckets[state->bucket];
2570 while (pn && !net_eq(pneigh_net(pn), net))
2582 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2584 struct pneigh_entry *pn = pneigh_get_first(seq);
2589 pn = pneigh_get_next(seq, pn, pos);
2594 return *pos ? NULL : pn;
2597 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2599 struct neigh_seq_state *state = seq->private;
2601 loff_t idxpos = *pos;
2603 rc = neigh_get_idx(seq, &idxpos);
2604 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2605 rc = pneigh_get_idx(seq, &idxpos);
2610 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2613 struct neigh_seq_state *state = seq->private;
2617 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2620 state->nht = rcu_dereference_bh(tbl->nht);
2622 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2624 EXPORT_SYMBOL(neigh_seq_start);
2626 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2628 struct neigh_seq_state *state;
2631 if (v == SEQ_START_TOKEN) {
2632 rc = neigh_get_first(seq);
2636 state = seq->private;
2637 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2638 rc = neigh_get_next(seq, v, NULL);
2641 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2642 rc = pneigh_get_first(seq);
2644 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2645 rc = pneigh_get_next(seq, v, NULL);
2651 EXPORT_SYMBOL(neigh_seq_next);
2653 void neigh_seq_stop(struct seq_file *seq, void *v)
2656 rcu_read_unlock_bh();
2658 EXPORT_SYMBOL(neigh_seq_stop);
2660 /* statistics via seq_file */
2662 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2664 struct neigh_table *tbl = seq->private;
2668 return SEQ_START_TOKEN;
2670 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2671 if (!cpu_possible(cpu))
2674 return per_cpu_ptr(tbl->stats, cpu);
2679 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2681 struct neigh_table *tbl = seq->private;
2684 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2685 if (!cpu_possible(cpu))
2688 return per_cpu_ptr(tbl->stats, cpu);
2693 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2698 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2700 struct neigh_table *tbl = seq->private;
2701 struct neigh_statistics *st = v;
2703 if (v == SEQ_START_TOKEN) {
2704 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n");
2708 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
2709 "%08lx %08lx %08lx %08lx %08lx\n",
2710 atomic_read(&tbl->entries),
2721 st->rcv_probes_mcast,
2722 st->rcv_probes_ucast,
2724 st->periodic_gc_runs,
2732 static const struct seq_operations neigh_stat_seq_ops = {
2733 .start = neigh_stat_seq_start,
2734 .next = neigh_stat_seq_next,
2735 .stop = neigh_stat_seq_stop,
2736 .show = neigh_stat_seq_show,
2739 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2741 int ret = seq_open(file, &neigh_stat_seq_ops);
2744 struct seq_file *sf = file->private_data;
2745 sf->private = PDE_DATA(inode);
2750 static const struct file_operations neigh_stat_seq_fops = {
2751 .owner = THIS_MODULE,
2752 .open = neigh_stat_seq_open,
2754 .llseek = seq_lseek,
2755 .release = seq_release,
2758 #endif /* CONFIG_PROC_FS */
2760 static inline size_t neigh_nlmsg_size(void)
2762 return NLMSG_ALIGN(sizeof(struct ndmsg))
2763 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2764 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2765 + nla_total_size(sizeof(struct nda_cacheinfo))
2766 + nla_total_size(4); /* NDA_PROBES */
2769 static void __neigh_notify(struct neighbour *n, int type, int flags)
2771 struct net *net = dev_net(n->dev);
2772 struct sk_buff *skb;
2775 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2779 err = neigh_fill_info(skb, n, 0, 0, type, flags);
2781 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2782 WARN_ON(err == -EMSGSIZE);
2786 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2790 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2793 void neigh_app_ns(struct neighbour *n)
2795 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2797 EXPORT_SYMBOL(neigh_app_ns);
2799 #ifdef CONFIG_SYSCTL
2801 static int int_max = INT_MAX;
2802 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2804 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2805 void __user *buffer, size_t *lenp, loff_t *ppos)
2808 struct ctl_table tmp = *ctl;
2811 tmp.extra2 = &unres_qlen_max;
2814 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2815 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2818 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2822 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2827 return __in_dev_arp_parms_get_rcu(dev);
2829 return __in6_dev_nd_parms_get_rcu(dev);
2834 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2837 struct net_device *dev;
2838 int family = neigh_parms_family(p);
2841 for_each_netdev_rcu(net, dev) {
2842 struct neigh_parms *dst_p =
2843 neigh_get_dev_parms_rcu(dev, family);
2845 if (dst_p && !test_bit(index, dst_p->data_state))
2846 dst_p->data[index] = p->data[index];
2851 static void neigh_proc_update(struct ctl_table *ctl, int write)
2853 struct net_device *dev = ctl->extra1;
2854 struct neigh_parms *p = ctl->extra2;
2855 struct net *net = neigh_parms_net(p);
2856 int index = (int *) ctl->data - p->data;
2861 set_bit(index, p->data_state);
2862 if (!dev) /* NULL dev means this is default value */
2863 neigh_copy_dflt_parms(net, p, index);
2866 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
2867 void __user *buffer,
2868 size_t *lenp, loff_t *ppos)
2870 struct ctl_table tmp = *ctl;
2874 tmp.extra2 = &int_max;
2876 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2877 neigh_proc_update(ctl, write);
2881 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
2882 void __user *buffer, size_t *lenp, loff_t *ppos)
2884 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2886 neigh_proc_update(ctl, write);
2889 EXPORT_SYMBOL(neigh_proc_dointvec);
2891 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
2892 void __user *buffer,
2893 size_t *lenp, loff_t *ppos)
2895 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
2897 neigh_proc_update(ctl, write);
2900 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
2902 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
2903 void __user *buffer,
2904 size_t *lenp, loff_t *ppos)
2906 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
2908 neigh_proc_update(ctl, write);
2912 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
2913 void __user *buffer,
2914 size_t *lenp, loff_t *ppos)
2916 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
2918 neigh_proc_update(ctl, write);
2921 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
2923 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
2924 void __user *buffer,
2925 size_t *lenp, loff_t *ppos)
2927 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
2929 neigh_proc_update(ctl, write);
2933 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
2934 void __user *buffer,
2935 size_t *lenp, loff_t *ppos)
2937 struct neigh_parms *p = ctl->extra2;
2940 if (strcmp(ctl->procname, "base_reachable_time") == 0)
2941 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
2942 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
2943 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
2947 if (write && ret == 0) {
2948 /* update reachable_time as well, otherwise, the change will
2949 * only be effective after the next time neigh_periodic_work
2950 * decides to recompute it
2953 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2958 #define NEIGH_PARMS_DATA_OFFSET(index) \
2959 (&((struct neigh_parms *) 0)->data[index])
2961 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
2962 [NEIGH_VAR_ ## attr] = { \
2964 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
2965 .maxlen = sizeof(int), \
2967 .proc_handler = proc, \
2970 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
2971 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
2973 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
2974 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
2976 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
2977 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
2979 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
2980 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
2982 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
2983 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
2985 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
2986 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
2988 static struct neigh_sysctl_table {
2989 struct ctl_table_header *sysctl_header;
2990 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2991 } neigh_sysctl_template __read_mostly = {
2993 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
2994 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
2995 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
2996 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
2997 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
2998 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
2999 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3000 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3001 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3002 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3003 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3004 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3005 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3006 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3007 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3008 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3009 [NEIGH_VAR_GC_INTERVAL] = {
3010 .procname = "gc_interval",
3011 .maxlen = sizeof(int),
3013 .proc_handler = proc_dointvec_jiffies,
3015 [NEIGH_VAR_GC_THRESH1] = {
3016 .procname = "gc_thresh1",
3017 .maxlen = sizeof(int),
3021 .proc_handler = proc_dointvec_minmax,
3023 [NEIGH_VAR_GC_THRESH2] = {
3024 .procname = "gc_thresh2",
3025 .maxlen = sizeof(int),
3029 .proc_handler = proc_dointvec_minmax,
3031 [NEIGH_VAR_GC_THRESH3] = {
3032 .procname = "gc_thresh3",
3033 .maxlen = sizeof(int),
3037 .proc_handler = proc_dointvec_minmax,
3043 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3044 proc_handler *handler)
3047 struct neigh_sysctl_table *t;
3048 const char *dev_name_source;
3049 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3052 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3056 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3057 t->neigh_vars[i].data += (long) p;
3058 t->neigh_vars[i].extra1 = dev;
3059 t->neigh_vars[i].extra2 = p;
3063 dev_name_source = dev->name;
3064 /* Terminate the table early */
3065 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3066 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3068 struct neigh_table *tbl = p->tbl;
3069 dev_name_source = "default";
3070 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3071 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3072 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3073 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3078 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3080 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3081 /* RetransTime (in milliseconds)*/
3082 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3083 /* ReachableTime (in milliseconds) */
3084 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3086 /* Those handlers will update p->reachable_time after
3087 * base_reachable_time(_ms) is set to ensure the new timer starts being
3088 * applied after the next neighbour update instead of waiting for
3089 * neigh_periodic_work to update its value (can be multiple minutes)
3090 * So any handler that replaces them should do this as well
3093 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3094 neigh_proc_base_reachable_time;
3095 /* ReachableTime (in milliseconds) */
3096 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3097 neigh_proc_base_reachable_time;
3100 /* Don't export sysctls to unprivileged users */
3101 if (neigh_parms_net(p)->user_ns != &init_user_ns)
3102 t->neigh_vars[0].procname = NULL;
3104 switch (neigh_parms_family(p)) {
3115 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3116 p_name, dev_name_source);
3118 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3119 if (!t->sysctl_header)
3122 p->sysctl_table = t;
3130 EXPORT_SYMBOL(neigh_sysctl_register);
3132 void neigh_sysctl_unregister(struct neigh_parms *p)
3134 if (p->sysctl_table) {
3135 struct neigh_sysctl_table *t = p->sysctl_table;
3136 p->sysctl_table = NULL;
3137 unregister_net_sysctl_table(t->sysctl_header);
3141 EXPORT_SYMBOL(neigh_sysctl_unregister);
3143 #endif /* CONFIG_SYSCTL */
3145 static int __init neigh_init(void)
3147 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3148 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3149 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3151 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3153 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3158 subsys_initcall(neigh_init);