1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Generic address resolution entity
10 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
11 * Harald Welte Add neighbour cache statistics like rtstat
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16 #include <linux/slab.h>
17 #include <linux/kmemleak.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/socket.h>
22 #include <linux/netdevice.h>
23 #include <linux/proc_fs.h>
25 #include <linux/sysctl.h>
27 #include <linux/times.h>
28 #include <net/net_namespace.h>
29 #include <net/neighbour.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 #include <linux/inetdevice.h>
40 #include <net/addrconf.h>
42 #include <trace/events/neigh.h>
45 #define neigh_dbg(level, fmt, ...) \
47 if (level <= NEIGH_DEBUG) \
48 pr_debug(fmt, ##__VA_ARGS__); \
51 #define PNEIGH_HASHMASK 0xF
53 static void neigh_timer_handler(struct timer_list *t);
54 static void __neigh_notify(struct neighbour *n, int type, int flags,
56 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
57 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
58 struct net_device *dev);
61 static const struct seq_operations neigh_stat_seq_ops;
65 Neighbour hash table buckets are protected with rwlock tbl->lock.
67 - All the scans/updates to hash buckets MUST be made under this lock.
68 - NOTHING clever should be made under this lock: no callbacks
69 to protocol backends, no attempts to send something to network.
70 It will result in deadlocks, if backend/driver wants to use neighbour
72 - If the entry requires some non-trivial actions, increase
73 its reference count and release table lock.
75 Neighbour entries are protected:
76 - with reference count.
77 - with rwlock neigh->lock
79 Reference count prevents destruction.
81 neigh->lock mainly serializes ll address data and its validity state.
82 However, the same lock is used to protect another entry fields:
86 Again, nothing clever shall be made under neigh->lock,
87 the most complicated procedure, which we allow is dev->hard_header.
88 It is supposed, that dev->hard_header is simplistic and does
89 not make callbacks to neighbour tables.
92 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
98 static void neigh_cleanup_and_release(struct neighbour *neigh)
100 trace_neigh_cleanup_and_release(neigh, 0);
101 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
102 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
103 neigh_release(neigh);
107 * It is random distribution in the interval (1/2)*base...(3/2)*base.
108 * It corresponds to default IPv6 settings and is not overridable,
109 * because it is really reasonable choice.
112 unsigned long neigh_rand_reach_time(unsigned long base)
114 return base ? get_random_u32_below(base) + (base >> 1) : 0;
116 EXPORT_SYMBOL(neigh_rand_reach_time);
118 static void neigh_mark_dead(struct neighbour *n)
121 if (!list_empty(&n->gc_list)) {
122 list_del_init(&n->gc_list);
123 atomic_dec(&n->tbl->gc_entries);
125 if (!list_empty(&n->managed_list))
126 list_del_init(&n->managed_list);
129 static void neigh_update_gc_list(struct neighbour *n)
131 bool on_gc_list, exempt_from_gc;
133 write_lock_bh(&n->tbl->lock);
134 write_lock(&n->lock);
138 /* remove from the gc list if new state is permanent or if neighbor
139 * is externally learned; otherwise entry should be on the gc list
141 exempt_from_gc = n->nud_state & NUD_PERMANENT ||
142 n->flags & NTF_EXT_LEARNED;
143 on_gc_list = !list_empty(&n->gc_list);
145 if (exempt_from_gc && on_gc_list) {
146 list_del_init(&n->gc_list);
147 atomic_dec(&n->tbl->gc_entries);
148 } else if (!exempt_from_gc && !on_gc_list) {
149 /* add entries to the tail; cleaning removes from the front */
150 list_add_tail(&n->gc_list, &n->tbl->gc_list);
151 atomic_inc(&n->tbl->gc_entries);
154 write_unlock(&n->lock);
155 write_unlock_bh(&n->tbl->lock);
158 static void neigh_update_managed_list(struct neighbour *n)
160 bool on_managed_list, add_to_managed;
162 write_lock_bh(&n->tbl->lock);
163 write_lock(&n->lock);
167 add_to_managed = n->flags & NTF_MANAGED;
168 on_managed_list = !list_empty(&n->managed_list);
170 if (!add_to_managed && on_managed_list)
171 list_del_init(&n->managed_list);
172 else if (add_to_managed && !on_managed_list)
173 list_add_tail(&n->managed_list, &n->tbl->managed_list);
175 write_unlock(&n->lock);
176 write_unlock_bh(&n->tbl->lock);
179 static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
180 bool *gc_update, bool *managed_update)
182 u32 ndm_flags, old_flags = neigh->flags;
184 if (!(flags & NEIGH_UPDATE_F_ADMIN))
187 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
188 ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
190 if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
191 if (ndm_flags & NTF_EXT_LEARNED)
192 neigh->flags |= NTF_EXT_LEARNED;
194 neigh->flags &= ~NTF_EXT_LEARNED;
198 if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
199 if (ndm_flags & NTF_MANAGED)
200 neigh->flags |= NTF_MANAGED;
202 neigh->flags &= ~NTF_MANAGED;
204 *managed_update = true;
208 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
209 struct neigh_table *tbl)
213 write_lock(&n->lock);
214 if (refcount_read(&n->refcnt) == 1) {
215 struct neighbour *neigh;
217 neigh = rcu_dereference_protected(n->next,
218 lockdep_is_held(&tbl->lock));
219 rcu_assign_pointer(*np, neigh);
223 write_unlock(&n->lock);
225 neigh_cleanup_and_release(n);
229 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
231 struct neigh_hash_table *nht;
232 void *pkey = ndel->primary_key;
235 struct neighbour __rcu **np;
237 nht = rcu_dereference_protected(tbl->nht,
238 lockdep_is_held(&tbl->lock));
239 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
240 hash_val = hash_val >> (32 - nht->hash_shift);
242 np = &nht->hash_buckets[hash_val];
243 while ((n = rcu_dereference_protected(*np,
244 lockdep_is_held(&tbl->lock)))) {
246 return neigh_del(n, np, tbl);
252 static int neigh_forced_gc(struct neigh_table *tbl)
254 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
255 unsigned long tref = jiffies - 5 * HZ;
256 struct neighbour *n, *tmp;
259 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
261 write_lock_bh(&tbl->lock);
263 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
264 if (refcount_read(&n->refcnt) == 1) {
267 write_lock(&n->lock);
268 if ((n->nud_state == NUD_FAILED) ||
269 (n->nud_state == NUD_NOARP) ||
270 (tbl->is_multicast &&
271 tbl->is_multicast(n->primary_key)) ||
272 time_after(tref, n->updated))
274 write_unlock(&n->lock);
276 if (remove && neigh_remove_one(n, tbl))
278 if (shrunk >= max_clean)
283 tbl->last_flush = jiffies;
285 write_unlock_bh(&tbl->lock);
290 static void neigh_add_timer(struct neighbour *n, unsigned long when)
293 if (unlikely(mod_timer(&n->timer, when))) {
294 printk("NEIGH: BUG, double timer add, state is %x\n",
300 static int neigh_del_timer(struct neighbour *n)
302 if ((n->nud_state & NUD_IN_TIMER) &&
303 del_timer(&n->timer)) {
310 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
315 return __in_dev_arp_parms_get_rcu(dev);
317 return __in6_dev_nd_parms_get_rcu(dev);
322 static void neigh_parms_qlen_dec(struct net_device *dev, int family)
324 struct neigh_parms *p;
327 p = neigh_get_dev_parms_rcu(dev, family);
333 static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
336 struct sk_buff_head tmp;
340 skb_queue_head_init(&tmp);
341 spin_lock_irqsave(&list->lock, flags);
342 skb = skb_peek(list);
343 while (skb != NULL) {
344 struct sk_buff *skb_next = skb_peek_next(skb, list);
345 struct net_device *dev = skb->dev;
347 if (net == NULL || net_eq(dev_net(dev), net)) {
348 neigh_parms_qlen_dec(dev, family);
349 __skb_unlink(skb, list);
350 __skb_queue_tail(&tmp, skb);
354 spin_unlock_irqrestore(&list->lock, flags);
356 while ((skb = __skb_dequeue(&tmp))) {
362 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
366 struct neigh_hash_table *nht;
368 nht = rcu_dereference_protected(tbl->nht,
369 lockdep_is_held(&tbl->lock));
371 for (i = 0; i < (1 << nht->hash_shift); i++) {
373 struct neighbour __rcu **np = &nht->hash_buckets[i];
375 while ((n = rcu_dereference_protected(*np,
376 lockdep_is_held(&tbl->lock))) != NULL) {
377 if (dev && n->dev != dev) {
381 if (skip_perm && n->nud_state & NUD_PERMANENT) {
385 rcu_assign_pointer(*np,
386 rcu_dereference_protected(n->next,
387 lockdep_is_held(&tbl->lock)));
388 write_lock(&n->lock);
391 if (refcount_read(&n->refcnt) != 1) {
392 /* The most unpleasant situation.
393 We must destroy neighbour entry,
394 but someone still uses it.
396 The destroy will be delayed until
397 the last user releases us, but
398 we must kill timers etc. and move
401 __skb_queue_purge(&n->arp_queue);
402 n->arp_queue_len_bytes = 0;
403 n->output = neigh_blackhole;
404 if (n->nud_state & NUD_VALID)
405 n->nud_state = NUD_NOARP;
407 n->nud_state = NUD_NONE;
408 neigh_dbg(2, "neigh %p is stray\n", n);
410 write_unlock(&n->lock);
411 neigh_cleanup_and_release(n);
416 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
418 write_lock_bh(&tbl->lock);
419 neigh_flush_dev(tbl, dev, false);
420 write_unlock_bh(&tbl->lock);
422 EXPORT_SYMBOL(neigh_changeaddr);
424 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
427 write_lock_bh(&tbl->lock);
428 neigh_flush_dev(tbl, dev, skip_perm);
429 pneigh_ifdown_and_unlock(tbl, dev);
430 pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
432 if (skb_queue_empty_lockless(&tbl->proxy_queue))
433 del_timer_sync(&tbl->proxy_timer);
437 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
439 __neigh_ifdown(tbl, dev, true);
442 EXPORT_SYMBOL(neigh_carrier_down);
444 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
446 __neigh_ifdown(tbl, dev, false);
449 EXPORT_SYMBOL(neigh_ifdown);
451 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
452 struct net_device *dev,
453 u32 flags, bool exempt_from_gc)
455 struct neighbour *n = NULL;
456 unsigned long now = jiffies;
462 entries = atomic_inc_return(&tbl->gc_entries) - 1;
463 if (entries >= tbl->gc_thresh3 ||
464 (entries >= tbl->gc_thresh2 &&
465 time_after(now, tbl->last_flush + 5 * HZ))) {
466 if (!neigh_forced_gc(tbl) &&
467 entries >= tbl->gc_thresh3) {
468 net_info_ratelimited("%s: neighbor table overflow!\n",
470 NEIGH_CACHE_STAT_INC(tbl, table_fulls);
476 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
480 __skb_queue_head_init(&n->arp_queue);
481 rwlock_init(&n->lock);
482 seqlock_init(&n->ha_lock);
483 n->updated = n->used = now;
484 n->nud_state = NUD_NONE;
485 n->output = neigh_blackhole;
487 seqlock_init(&n->hh.hh_lock);
488 n->parms = neigh_parms_clone(&tbl->parms);
489 timer_setup(&n->timer, neigh_timer_handler, 0);
491 NEIGH_CACHE_STAT_INC(tbl, allocs);
493 refcount_set(&n->refcnt, 1);
495 INIT_LIST_HEAD(&n->gc_list);
496 INIT_LIST_HEAD(&n->managed_list);
498 atomic_inc(&tbl->entries);
504 atomic_dec(&tbl->gc_entries);
508 static void neigh_get_hash_rnd(u32 *x)
510 *x = get_random_u32() | 1;
513 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
515 size_t size = (1 << shift) * sizeof(struct neighbour *);
516 struct neigh_hash_table *ret;
517 struct neighbour __rcu **buckets;
520 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
523 if (size <= PAGE_SIZE) {
524 buckets = kzalloc(size, GFP_ATOMIC);
526 buckets = (struct neighbour __rcu **)
527 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
529 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
535 ret->hash_buckets = buckets;
536 ret->hash_shift = shift;
537 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
538 neigh_get_hash_rnd(&ret->hash_rnd[i]);
542 static void neigh_hash_free_rcu(struct rcu_head *head)
544 struct neigh_hash_table *nht = container_of(head,
545 struct neigh_hash_table,
547 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
548 struct neighbour __rcu **buckets = nht->hash_buckets;
550 if (size <= PAGE_SIZE) {
553 kmemleak_free(buckets);
554 free_pages((unsigned long)buckets, get_order(size));
559 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
560 unsigned long new_shift)
562 unsigned int i, hash;
563 struct neigh_hash_table *new_nht, *old_nht;
565 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
567 old_nht = rcu_dereference_protected(tbl->nht,
568 lockdep_is_held(&tbl->lock));
569 new_nht = neigh_hash_alloc(new_shift);
573 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
574 struct neighbour *n, *next;
576 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
577 lockdep_is_held(&tbl->lock));
580 hash = tbl->hash(n->primary_key, n->dev,
583 hash >>= (32 - new_nht->hash_shift);
584 next = rcu_dereference_protected(n->next,
585 lockdep_is_held(&tbl->lock));
587 rcu_assign_pointer(n->next,
588 rcu_dereference_protected(
589 new_nht->hash_buckets[hash],
590 lockdep_is_held(&tbl->lock)));
591 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
595 rcu_assign_pointer(tbl->nht, new_nht);
596 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
600 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
601 struct net_device *dev)
605 NEIGH_CACHE_STAT_INC(tbl, lookups);
608 n = __neigh_lookup_noref(tbl, pkey, dev);
610 if (!refcount_inc_not_zero(&n->refcnt))
612 NEIGH_CACHE_STAT_INC(tbl, hits);
615 rcu_read_unlock_bh();
618 EXPORT_SYMBOL(neigh_lookup);
620 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
624 unsigned int key_len = tbl->key_len;
626 struct neigh_hash_table *nht;
628 NEIGH_CACHE_STAT_INC(tbl, lookups);
631 nht = rcu_dereference_bh(tbl->nht);
632 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
634 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
636 n = rcu_dereference_bh(n->next)) {
637 if (!memcmp(n->primary_key, pkey, key_len) &&
638 net_eq(dev_net(n->dev), net)) {
639 if (!refcount_inc_not_zero(&n->refcnt))
641 NEIGH_CACHE_STAT_INC(tbl, hits);
646 rcu_read_unlock_bh();
649 EXPORT_SYMBOL(neigh_lookup_nodev);
651 static struct neighbour *
652 ___neigh_create(struct neigh_table *tbl, const void *pkey,
653 struct net_device *dev, u32 flags,
654 bool exempt_from_gc, bool want_ref)
656 u32 hash_val, key_len = tbl->key_len;
657 struct neighbour *n1, *rc, *n;
658 struct neigh_hash_table *nht;
661 n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
662 trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
664 rc = ERR_PTR(-ENOBUFS);
668 memcpy(n->primary_key, pkey, key_len);
670 netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC);
672 /* Protocol specific setup. */
673 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
675 goto out_neigh_release;
678 if (dev->netdev_ops->ndo_neigh_construct) {
679 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
682 goto out_neigh_release;
686 /* Device specific setup. */
687 if (n->parms->neigh_setup &&
688 (error = n->parms->neigh_setup(n)) < 0) {
690 goto out_neigh_release;
693 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
695 write_lock_bh(&tbl->lock);
696 nht = rcu_dereference_protected(tbl->nht,
697 lockdep_is_held(&tbl->lock));
699 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
700 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
702 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
704 if (n->parms->dead) {
705 rc = ERR_PTR(-EINVAL);
709 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
710 lockdep_is_held(&tbl->lock));
712 n1 = rcu_dereference_protected(n1->next,
713 lockdep_is_held(&tbl->lock))) {
714 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
724 list_add_tail(&n->gc_list, &n->tbl->gc_list);
725 if (n->flags & NTF_MANAGED)
726 list_add_tail(&n->managed_list, &n->tbl->managed_list);
729 rcu_assign_pointer(n->next,
730 rcu_dereference_protected(nht->hash_buckets[hash_val],
731 lockdep_is_held(&tbl->lock)));
732 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
733 write_unlock_bh(&tbl->lock);
734 neigh_dbg(2, "neigh %p is created\n", n);
739 write_unlock_bh(&tbl->lock);
742 atomic_dec(&tbl->gc_entries);
747 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
748 struct net_device *dev, bool want_ref)
750 return ___neigh_create(tbl, pkey, dev, 0, false, want_ref);
752 EXPORT_SYMBOL(__neigh_create);
754 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
756 u32 hash_val = *(u32 *)(pkey + key_len - 4);
757 hash_val ^= (hash_val >> 16);
758 hash_val ^= hash_val >> 8;
759 hash_val ^= hash_val >> 4;
760 hash_val &= PNEIGH_HASHMASK;
764 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
767 unsigned int key_len,
768 struct net_device *dev)
771 if (!memcmp(n->key, pkey, key_len) &&
772 net_eq(pneigh_net(n), net) &&
773 (n->dev == dev || !n->dev))
780 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
781 struct net *net, const void *pkey, struct net_device *dev)
783 unsigned int key_len = tbl->key_len;
784 u32 hash_val = pneigh_hash(pkey, key_len);
786 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
787 net, pkey, key_len, dev);
789 EXPORT_SYMBOL_GPL(__pneigh_lookup);
791 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
792 struct net *net, const void *pkey,
793 struct net_device *dev, int creat)
795 struct pneigh_entry *n;
796 unsigned int key_len = tbl->key_len;
797 u32 hash_val = pneigh_hash(pkey, key_len);
799 read_lock_bh(&tbl->lock);
800 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
801 net, pkey, key_len, dev);
802 read_unlock_bh(&tbl->lock);
809 n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
813 write_pnet(&n->net, net);
814 memcpy(n->key, pkey, key_len);
816 netdev_hold(dev, &n->dev_tracker, GFP_KERNEL);
818 if (tbl->pconstructor && tbl->pconstructor(n)) {
819 netdev_put(dev, &n->dev_tracker);
825 write_lock_bh(&tbl->lock);
826 n->next = tbl->phash_buckets[hash_val];
827 tbl->phash_buckets[hash_val] = n;
828 write_unlock_bh(&tbl->lock);
832 EXPORT_SYMBOL(pneigh_lookup);
835 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
836 struct net_device *dev)
838 struct pneigh_entry *n, **np;
839 unsigned int key_len = tbl->key_len;
840 u32 hash_val = pneigh_hash(pkey, key_len);
842 write_lock_bh(&tbl->lock);
843 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
845 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
846 net_eq(pneigh_net(n), net)) {
848 write_unlock_bh(&tbl->lock);
849 if (tbl->pdestructor)
851 netdev_put(n->dev, &n->dev_tracker);
856 write_unlock_bh(&tbl->lock);
860 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
861 struct net_device *dev)
863 struct pneigh_entry *n, **np, *freelist = NULL;
866 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
867 np = &tbl->phash_buckets[h];
868 while ((n = *np) != NULL) {
869 if (!dev || n->dev == dev) {
878 write_unlock_bh(&tbl->lock);
879 while ((n = freelist)) {
882 if (tbl->pdestructor)
884 netdev_put(n->dev, &n->dev_tracker);
890 static void neigh_parms_destroy(struct neigh_parms *parms);
892 static inline void neigh_parms_put(struct neigh_parms *parms)
894 if (refcount_dec_and_test(&parms->refcnt))
895 neigh_parms_destroy(parms);
899 * neighbour must already be out of the table;
902 void neigh_destroy(struct neighbour *neigh)
904 struct net_device *dev = neigh->dev;
906 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
909 pr_warn("Destroying alive neighbour %p\n", neigh);
914 if (neigh_del_timer(neigh))
915 pr_warn("Impossible event\n");
917 write_lock_bh(&neigh->lock);
918 __skb_queue_purge(&neigh->arp_queue);
919 write_unlock_bh(&neigh->lock);
920 neigh->arp_queue_len_bytes = 0;
922 if (dev->netdev_ops->ndo_neigh_destroy)
923 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
925 netdev_put(dev, &neigh->dev_tracker);
926 neigh_parms_put(neigh->parms);
928 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
930 atomic_dec(&neigh->tbl->entries);
931 kfree_rcu(neigh, rcu);
933 EXPORT_SYMBOL(neigh_destroy);
935 /* Neighbour state is suspicious;
938 Called with write_locked neigh.
940 static void neigh_suspect(struct neighbour *neigh)
942 neigh_dbg(2, "neigh %p is suspected\n", neigh);
944 neigh->output = neigh->ops->output;
947 /* Neighbour state is OK;
950 Called with write_locked neigh.
952 static void neigh_connect(struct neighbour *neigh)
954 neigh_dbg(2, "neigh %p is connected\n", neigh);
956 neigh->output = neigh->ops->connected_output;
959 static void neigh_periodic_work(struct work_struct *work)
961 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
963 struct neighbour __rcu **np;
965 struct neigh_hash_table *nht;
967 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
969 write_lock_bh(&tbl->lock);
970 nht = rcu_dereference_protected(tbl->nht,
971 lockdep_is_held(&tbl->lock));
974 * periodically recompute ReachableTime from random function
977 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
978 struct neigh_parms *p;
979 tbl->last_rand = jiffies;
980 list_for_each_entry(p, &tbl->parms_list, list)
982 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
985 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
988 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
989 np = &nht->hash_buckets[i];
991 while ((n = rcu_dereference_protected(*np,
992 lockdep_is_held(&tbl->lock))) != NULL) {
995 write_lock(&n->lock);
997 state = n->nud_state;
998 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
999 (n->flags & NTF_EXT_LEARNED)) {
1000 write_unlock(&n->lock);
1004 if (time_before(n->used, n->confirmed))
1005 n->used = n->confirmed;
1007 if (refcount_read(&n->refcnt) == 1 &&
1008 (state == NUD_FAILED ||
1009 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
1012 write_unlock(&n->lock);
1013 neigh_cleanup_and_release(n);
1016 write_unlock(&n->lock);
1022 * It's fine to release lock here, even if hash table
1023 * grows while we are preempted.
1025 write_unlock_bh(&tbl->lock);
1027 write_lock_bh(&tbl->lock);
1028 nht = rcu_dereference_protected(tbl->nht,
1029 lockdep_is_held(&tbl->lock));
1032 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
1033 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
1034 * BASE_REACHABLE_TIME.
1036 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1037 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
1038 write_unlock_bh(&tbl->lock);
1041 static __inline__ int neigh_max_probes(struct neighbour *n)
1043 struct neigh_parms *p = n->parms;
1044 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
1045 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
1046 NEIGH_VAR(p, MCAST_PROBES));
1049 static void neigh_invalidate(struct neighbour *neigh)
1050 __releases(neigh->lock)
1051 __acquires(neigh->lock)
1053 struct sk_buff *skb;
1055 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
1056 neigh_dbg(2, "neigh %p is failed\n", neigh);
1057 neigh->updated = jiffies;
1059 /* It is very thin place. report_unreachable is very complicated
1060 routine. Particularly, it can hit the same neighbour entry!
1062 So that, we try to be accurate and avoid dead loop. --ANK
1064 while (neigh->nud_state == NUD_FAILED &&
1065 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1066 write_unlock(&neigh->lock);
1067 neigh->ops->error_report(neigh, skb);
1068 write_lock(&neigh->lock);
1070 __skb_queue_purge(&neigh->arp_queue);
1071 neigh->arp_queue_len_bytes = 0;
1074 static void neigh_probe(struct neighbour *neigh)
1075 __releases(neigh->lock)
1077 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1078 /* keep skb alive even if arp_queue overflows */
1080 skb = skb_clone(skb, GFP_ATOMIC);
1081 write_unlock(&neigh->lock);
1082 if (neigh->ops->solicit)
1083 neigh->ops->solicit(neigh, skb);
1084 atomic_inc(&neigh->probes);
1088 /* Called when a timer expires for a neighbour entry. */
1090 static void neigh_timer_handler(struct timer_list *t)
1092 unsigned long now, next;
1093 struct neighbour *neigh = from_timer(neigh, t, timer);
1097 write_lock(&neigh->lock);
1099 state = neigh->nud_state;
1103 if (!(state & NUD_IN_TIMER))
1106 if (state & NUD_REACHABLE) {
1107 if (time_before_eq(now,
1108 neigh->confirmed + neigh->parms->reachable_time)) {
1109 neigh_dbg(2, "neigh %p is still alive\n", neigh);
1110 next = neigh->confirmed + neigh->parms->reachable_time;
1111 } else if (time_before_eq(now,
1113 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1114 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1115 neigh->nud_state = NUD_DELAY;
1116 neigh->updated = jiffies;
1117 neigh_suspect(neigh);
1118 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1120 neigh_dbg(2, "neigh %p is suspected\n", neigh);
1121 neigh->nud_state = NUD_STALE;
1122 neigh->updated = jiffies;
1123 neigh_suspect(neigh);
1126 } else if (state & NUD_DELAY) {
1127 if (time_before_eq(now,
1129 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1130 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1131 neigh->nud_state = NUD_REACHABLE;
1132 neigh->updated = jiffies;
1133 neigh_connect(neigh);
1135 next = neigh->confirmed + neigh->parms->reachable_time;
1137 neigh_dbg(2, "neigh %p is probed\n", neigh);
1138 neigh->nud_state = NUD_PROBE;
1139 neigh->updated = jiffies;
1140 atomic_set(&neigh->probes, 0);
1142 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1146 /* NUD_PROBE|NUD_INCOMPLETE */
1147 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
1150 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1151 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1152 neigh->nud_state = NUD_FAILED;
1154 neigh_invalidate(neigh);
1158 if (neigh->nud_state & NUD_IN_TIMER) {
1159 if (time_before(next, jiffies + HZ/100))
1160 next = jiffies + HZ/100;
1161 if (!mod_timer(&neigh->timer, next))
1164 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1168 write_unlock(&neigh->lock);
1172 neigh_update_notify(neigh, 0);
1174 trace_neigh_timer_handler(neigh, 0);
1176 neigh_release(neigh);
1179 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
1180 const bool immediate_ok)
1183 bool immediate_probe = false;
1185 write_lock_bh(&neigh->lock);
1188 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1193 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1194 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1195 NEIGH_VAR(neigh->parms, APP_PROBES)) {
1196 unsigned long next, now = jiffies;
1198 atomic_set(&neigh->probes,
1199 NEIGH_VAR(neigh->parms, UCAST_PROBES));
1200 neigh_del_timer(neigh);
1201 neigh->nud_state = NUD_INCOMPLETE;
1202 neigh->updated = now;
1203 if (!immediate_ok) {
1206 immediate_probe = true;
1207 next = now + max(NEIGH_VAR(neigh->parms,
1211 neigh_add_timer(neigh, next);
1213 neigh->nud_state = NUD_FAILED;
1214 neigh->updated = jiffies;
1215 write_unlock_bh(&neigh->lock);
1217 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
1220 } else if (neigh->nud_state & NUD_STALE) {
1221 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1222 neigh_del_timer(neigh);
1223 neigh->nud_state = NUD_DELAY;
1224 neigh->updated = jiffies;
1225 neigh_add_timer(neigh, jiffies +
1226 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1229 if (neigh->nud_state == NUD_INCOMPLETE) {
1231 while (neigh->arp_queue_len_bytes + skb->truesize >
1232 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1233 struct sk_buff *buff;
1235 buff = __skb_dequeue(&neigh->arp_queue);
1238 neigh->arp_queue_len_bytes -= buff->truesize;
1239 kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
1240 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1243 __skb_queue_tail(&neigh->arp_queue, skb);
1244 neigh->arp_queue_len_bytes += skb->truesize;
1249 if (immediate_probe)
1252 write_unlock(&neigh->lock);
1254 trace_neigh_event_send_done(neigh, rc);
1258 if (neigh->nud_state & NUD_STALE)
1260 write_unlock_bh(&neigh->lock);
1261 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
1262 trace_neigh_event_send_dead(neigh, 1);
1265 EXPORT_SYMBOL(__neigh_event_send);
1267 static void neigh_update_hhs(struct neighbour *neigh)
1269 struct hh_cache *hh;
1270 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1273 if (neigh->dev->header_ops)
1274 update = neigh->dev->header_ops->cache_update;
1278 if (READ_ONCE(hh->hh_len)) {
1279 write_seqlock_bh(&hh->hh_lock);
1280 update(hh, neigh->dev, neigh->ha);
1281 write_sequnlock_bh(&hh->hh_lock);
1286 /* Generic update routine.
1287 -- lladdr is new lladdr or NULL, if it is not supplied.
1288 -- new is new state.
1290 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1292 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1293 lladdr instead of overriding it
1295 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1296 NEIGH_UPDATE_F_USE means that the entry is user triggered.
1297 NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed.
1298 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1300 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1303 Caller MUST hold reference count on the entry.
1305 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1306 u8 new, u32 flags, u32 nlmsg_pid,
1307 struct netlink_ext_ack *extack)
1309 bool gc_update = false, managed_update = false;
1310 int update_isrouter = 0;
1311 struct net_device *dev;
1312 int err, notify = 0;
1315 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1317 write_lock_bh(&neigh->lock);
1320 old = neigh->nud_state;
1324 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1328 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1329 (old & (NUD_NOARP | NUD_PERMANENT)))
1332 neigh_update_flags(neigh, flags, ¬ify, &gc_update, &managed_update);
1333 if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
1334 new = old & ~NUD_PERMANENT;
1335 neigh->nud_state = new;
1340 if (!(new & NUD_VALID)) {
1341 neigh_del_timer(neigh);
1342 if (old & NUD_CONNECTED)
1343 neigh_suspect(neigh);
1344 neigh->nud_state = new;
1346 notify = old & NUD_VALID;
1347 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1348 (new & NUD_FAILED)) {
1349 neigh_invalidate(neigh);
1355 /* Compare new lladdr with cached one */
1356 if (!dev->addr_len) {
1357 /* First case: device needs no address. */
1359 } else if (lladdr) {
1360 /* The second case: if something is already cached
1361 and a new address is proposed:
1363 - if they are different, check override flag
1365 if ((old & NUD_VALID) &&
1366 !memcmp(lladdr, neigh->ha, dev->addr_len))
1369 /* No address is supplied; if we know something,
1370 use it, otherwise discard the request.
1373 if (!(old & NUD_VALID)) {
1374 NL_SET_ERR_MSG(extack, "No link layer address given");
1380 /* Update confirmed timestamp for neighbour entry after we
1381 * received ARP packet even if it doesn't change IP to MAC binding.
1383 if (new & NUD_CONNECTED)
1384 neigh->confirmed = jiffies;
1386 /* If entry was valid and address is not changed,
1387 do not change entry state, if new one is STALE.
1390 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1391 if (old & NUD_VALID) {
1392 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1393 update_isrouter = 0;
1394 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1395 (old & NUD_CONNECTED)) {
1401 if (lladdr == neigh->ha && new == NUD_STALE &&
1402 !(flags & NEIGH_UPDATE_F_ADMIN))
1407 /* Update timestamp only once we know we will make a change to the
1408 * neighbour entry. Otherwise we risk to move the locktime window with
1409 * noop updates and ignore relevant ARP updates.
1411 if (new != old || lladdr != neigh->ha)
1412 neigh->updated = jiffies;
1415 neigh_del_timer(neigh);
1416 if (new & NUD_PROBE)
1417 atomic_set(&neigh->probes, 0);
1418 if (new & NUD_IN_TIMER)
1419 neigh_add_timer(neigh, (jiffies +
1420 ((new & NUD_REACHABLE) ?
1421 neigh->parms->reachable_time :
1423 neigh->nud_state = new;
1427 if (lladdr != neigh->ha) {
1428 write_seqlock(&neigh->ha_lock);
1429 memcpy(&neigh->ha, lladdr, dev->addr_len);
1430 write_sequnlock(&neigh->ha_lock);
1431 neigh_update_hhs(neigh);
1432 if (!(new & NUD_CONNECTED))
1433 neigh->confirmed = jiffies -
1434 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1439 if (new & NUD_CONNECTED)
1440 neigh_connect(neigh);
1442 neigh_suspect(neigh);
1443 if (!(old & NUD_VALID)) {
1444 struct sk_buff *skb;
1446 /* Again: avoid dead loop if something went wrong */
1448 while (neigh->nud_state & NUD_VALID &&
1449 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1450 struct dst_entry *dst = skb_dst(skb);
1451 struct neighbour *n2, *n1 = neigh;
1452 write_unlock_bh(&neigh->lock);
1456 /* Why not just use 'neigh' as-is? The problem is that
1457 * things such as shaper, eql, and sch_teql can end up
1458 * using alternative, different, neigh objects to output
1459 * the packet in the output path. So what we need to do
1460 * here is re-lookup the top-level neigh in the path so
1461 * we can reinject the packet there.
1464 if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1465 n2 = dst_neigh_lookup_skb(dst, skb);
1469 n1->output(n1, skb);
1474 write_lock_bh(&neigh->lock);
1476 __skb_queue_purge(&neigh->arp_queue);
1477 neigh->arp_queue_len_bytes = 0;
1480 if (update_isrouter)
1481 neigh_update_is_router(neigh, flags, ¬ify);
1482 write_unlock_bh(&neigh->lock);
1483 if (((new ^ old) & NUD_PERMANENT) || gc_update)
1484 neigh_update_gc_list(neigh);
1486 neigh_update_managed_list(neigh);
1488 neigh_update_notify(neigh, nlmsg_pid);
1489 trace_neigh_update_done(neigh, err);
1493 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1494 u32 flags, u32 nlmsg_pid)
1496 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1498 EXPORT_SYMBOL(neigh_update);
1500 /* Update the neigh to listen temporarily for probe responses, even if it is
1501 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1503 void __neigh_set_probe_once(struct neighbour *neigh)
1507 neigh->updated = jiffies;
1508 if (!(neigh->nud_state & NUD_FAILED))
1510 neigh->nud_state = NUD_INCOMPLETE;
1511 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1512 neigh_add_timer(neigh,
1513 jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1516 EXPORT_SYMBOL(__neigh_set_probe_once);
1518 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1519 u8 *lladdr, void *saddr,
1520 struct net_device *dev)
1522 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1523 lladdr || !dev->addr_len);
1525 neigh_update(neigh, lladdr, NUD_STALE,
1526 NEIGH_UPDATE_F_OVERRIDE, 0);
1529 EXPORT_SYMBOL(neigh_event_ns);
1531 /* called with read_lock_bh(&n->lock); */
1532 static void neigh_hh_init(struct neighbour *n)
1534 struct net_device *dev = n->dev;
1535 __be16 prot = n->tbl->protocol;
1536 struct hh_cache *hh = &n->hh;
1538 write_lock_bh(&n->lock);
1540 /* Only one thread can come in here and initialize the
1544 dev->header_ops->cache(n, hh, prot);
1546 write_unlock_bh(&n->lock);
1549 /* Slow and careful. */
1551 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1555 if (!neigh_event_send(neigh, skb)) {
1557 struct net_device *dev = neigh->dev;
1560 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1561 neigh_hh_init(neigh);
1564 __skb_pull(skb, skb_network_offset(skb));
1565 seq = read_seqbegin(&neigh->ha_lock);
1566 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1567 neigh->ha, NULL, skb->len);
1568 } while (read_seqretry(&neigh->ha_lock, seq));
1571 rc = dev_queue_xmit(skb);
1582 EXPORT_SYMBOL(neigh_resolve_output);
1584 /* As fast as possible without hh cache */
1586 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1588 struct net_device *dev = neigh->dev;
1593 __skb_pull(skb, skb_network_offset(skb));
1594 seq = read_seqbegin(&neigh->ha_lock);
1595 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1596 neigh->ha, NULL, skb->len);
1597 } while (read_seqretry(&neigh->ha_lock, seq));
1600 err = dev_queue_xmit(skb);
1607 EXPORT_SYMBOL(neigh_connected_output);
1609 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1611 return dev_queue_xmit(skb);
1613 EXPORT_SYMBOL(neigh_direct_output);
1615 static void neigh_managed_work(struct work_struct *work)
1617 struct neigh_table *tbl = container_of(work, struct neigh_table,
1619 struct neighbour *neigh;
1621 write_lock_bh(&tbl->lock);
1622 list_for_each_entry(neigh, &tbl->managed_list, managed_list)
1623 neigh_event_send_probe(neigh, NULL, false);
1624 queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
1625 NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS));
1626 write_unlock_bh(&tbl->lock);
1629 static void neigh_proxy_process(struct timer_list *t)
1631 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1632 long sched_next = 0;
1633 unsigned long now = jiffies;
1634 struct sk_buff *skb, *n;
1636 spin_lock(&tbl->proxy_queue.lock);
1638 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1639 long tdif = NEIGH_CB(skb)->sched_next - now;
1642 struct net_device *dev = skb->dev;
1644 neigh_parms_qlen_dec(dev, tbl->family);
1645 __skb_unlink(skb, &tbl->proxy_queue);
1647 if (tbl->proxy_redo && netif_running(dev)) {
1649 tbl->proxy_redo(skb);
1656 } else if (!sched_next || tdif < sched_next)
1659 del_timer(&tbl->proxy_timer);
1661 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1662 spin_unlock(&tbl->proxy_queue.lock);
1665 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1666 struct sk_buff *skb)
1668 unsigned long sched_next = jiffies +
1669 get_random_u32_below(NEIGH_VAR(p, PROXY_DELAY));
1671 if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1676 NEIGH_CB(skb)->sched_next = sched_next;
1677 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1679 spin_lock(&tbl->proxy_queue.lock);
1680 if (del_timer(&tbl->proxy_timer)) {
1681 if (time_before(tbl->proxy_timer.expires, sched_next))
1682 sched_next = tbl->proxy_timer.expires;
1686 __skb_queue_tail(&tbl->proxy_queue, skb);
1688 mod_timer(&tbl->proxy_timer, sched_next);
1689 spin_unlock(&tbl->proxy_queue.lock);
1691 EXPORT_SYMBOL(pneigh_enqueue);
1693 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1694 struct net *net, int ifindex)
1696 struct neigh_parms *p;
1698 list_for_each_entry(p, &tbl->parms_list, list) {
1699 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1700 (!p->dev && !ifindex && net_eq(net, &init_net)))
1707 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1708 struct neigh_table *tbl)
1710 struct neigh_parms *p;
1711 struct net *net = dev_net(dev);
1712 const struct net_device_ops *ops = dev->netdev_ops;
1714 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1717 refcount_set(&p->refcnt, 1);
1719 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1721 netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
1723 write_pnet(&p->net, net);
1724 p->sysctl_table = NULL;
1726 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1727 netdev_put(dev, &p->dev_tracker);
1732 write_lock_bh(&tbl->lock);
1733 list_add(&p->list, &tbl->parms.list);
1734 write_unlock_bh(&tbl->lock);
1736 neigh_parms_data_state_cleanall(p);
1740 EXPORT_SYMBOL(neigh_parms_alloc);
1742 static void neigh_rcu_free_parms(struct rcu_head *head)
1744 struct neigh_parms *parms =
1745 container_of(head, struct neigh_parms, rcu_head);
1747 neigh_parms_put(parms);
1750 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1752 if (!parms || parms == &tbl->parms)
1754 write_lock_bh(&tbl->lock);
1755 list_del(&parms->list);
1757 write_unlock_bh(&tbl->lock);
1758 netdev_put(parms->dev, &parms->dev_tracker);
1759 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1761 EXPORT_SYMBOL(neigh_parms_release);
1763 static void neigh_parms_destroy(struct neigh_parms *parms)
1768 static struct lock_class_key neigh_table_proxy_queue_class;
1770 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1772 void neigh_table_init(int index, struct neigh_table *tbl)
1774 unsigned long now = jiffies;
1775 unsigned long phsize;
1777 INIT_LIST_HEAD(&tbl->parms_list);
1778 INIT_LIST_HEAD(&tbl->gc_list);
1779 INIT_LIST_HEAD(&tbl->managed_list);
1781 list_add(&tbl->parms.list, &tbl->parms_list);
1782 write_pnet(&tbl->parms.net, &init_net);
1783 refcount_set(&tbl->parms.refcnt, 1);
1784 tbl->parms.reachable_time =
1785 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1786 tbl->parms.qlen = 0;
1788 tbl->stats = alloc_percpu(struct neigh_statistics);
1790 panic("cannot create neighbour cache statistics");
1792 #ifdef CONFIG_PROC_FS
1793 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1794 &neigh_stat_seq_ops, tbl))
1795 panic("cannot create neighbour proc dir entry");
1798 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1800 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1801 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1803 if (!tbl->nht || !tbl->phash_buckets)
1804 panic("cannot allocate neighbour cache hashes");
1806 if (!tbl->entry_size)
1807 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1808 tbl->key_len, NEIGH_PRIV_ALIGN);
1810 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1812 rwlock_init(&tbl->lock);
1814 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1815 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1816 tbl->parms.reachable_time);
1817 INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
1818 queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
1820 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1821 skb_queue_head_init_class(&tbl->proxy_queue,
1822 &neigh_table_proxy_queue_class);
1824 tbl->last_flush = now;
1825 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1827 neigh_tables[index] = tbl;
1829 EXPORT_SYMBOL(neigh_table_init);
1831 int neigh_table_clear(int index, struct neigh_table *tbl)
1833 neigh_tables[index] = NULL;
1834 /* It is not clean... Fix it to unload IPv6 module safely */
1835 cancel_delayed_work_sync(&tbl->managed_work);
1836 cancel_delayed_work_sync(&tbl->gc_work);
1837 del_timer_sync(&tbl->proxy_timer);
1838 pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family);
1839 neigh_ifdown(tbl, NULL);
1840 if (atomic_read(&tbl->entries))
1841 pr_crit("neighbour leakage\n");
1843 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1844 neigh_hash_free_rcu);
1847 kfree(tbl->phash_buckets);
1848 tbl->phash_buckets = NULL;
1850 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1852 free_percpu(tbl->stats);
1857 EXPORT_SYMBOL(neigh_table_clear);
1859 static struct neigh_table *neigh_find_table(int family)
1861 struct neigh_table *tbl = NULL;
1865 tbl = neigh_tables[NEIGH_ARP_TABLE];
1868 tbl = neigh_tables[NEIGH_ND_TABLE];
1875 const struct nla_policy nda_policy[NDA_MAX+1] = {
1876 [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID },
1877 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1878 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1879 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
1880 [NDA_PROBES] = { .type = NLA_U32 },
1881 [NDA_VLAN] = { .type = NLA_U16 },
1882 [NDA_PORT] = { .type = NLA_U16 },
1883 [NDA_VNI] = { .type = NLA_U32 },
1884 [NDA_IFINDEX] = { .type = NLA_U32 },
1885 [NDA_MASTER] = { .type = NLA_U32 },
1886 [NDA_PROTOCOL] = { .type = NLA_U8 },
1887 [NDA_NH_ID] = { .type = NLA_U32 },
1888 [NDA_FLAGS_EXT] = NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
1889 [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED },
1892 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1893 struct netlink_ext_ack *extack)
1895 struct net *net = sock_net(skb->sk);
1897 struct nlattr *dst_attr;
1898 struct neigh_table *tbl;
1899 struct neighbour *neigh;
1900 struct net_device *dev = NULL;
1904 if (nlmsg_len(nlh) < sizeof(*ndm))
1907 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1909 NL_SET_ERR_MSG(extack, "Network address not specified");
1913 ndm = nlmsg_data(nlh);
1914 if (ndm->ndm_ifindex) {
1915 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1922 tbl = neigh_find_table(ndm->ndm_family);
1924 return -EAFNOSUPPORT;
1926 if (nla_len(dst_attr) < (int)tbl->key_len) {
1927 NL_SET_ERR_MSG(extack, "Invalid network address");
1931 if (ndm->ndm_flags & NTF_PROXY) {
1932 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1939 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1940 if (neigh == NULL) {
1945 err = __neigh_update(neigh, NULL, NUD_FAILED,
1946 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1947 NETLINK_CB(skb).portid, extack);
1948 write_lock_bh(&tbl->lock);
1949 neigh_release(neigh);
1950 neigh_remove_one(neigh, tbl);
1951 write_unlock_bh(&tbl->lock);
1957 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1958 struct netlink_ext_ack *extack)
1960 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1961 NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1962 struct net *net = sock_net(skb->sk);
1964 struct nlattr *tb[NDA_MAX+1];
1965 struct neigh_table *tbl;
1966 struct net_device *dev = NULL;
1967 struct neighbour *neigh;
1974 err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
1975 nda_policy, extack);
1981 NL_SET_ERR_MSG(extack, "Network address not specified");
1985 ndm = nlmsg_data(nlh);
1986 ndm_flags = ndm->ndm_flags;
1987 if (tb[NDA_FLAGS_EXT]) {
1988 u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
1990 BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
1991 (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
1992 hweight32(NTF_EXT_MASK)));
1993 ndm_flags |= (ext << NTF_EXT_SHIFT);
1995 if (ndm->ndm_ifindex) {
1996 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
2002 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
2003 NL_SET_ERR_MSG(extack, "Invalid link address");
2008 tbl = neigh_find_table(ndm->ndm_family);
2010 return -EAFNOSUPPORT;
2012 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
2013 NL_SET_ERR_MSG(extack, "Invalid network address");
2017 dst = nla_data(tb[NDA_DST]);
2018 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
2020 if (tb[NDA_PROTOCOL])
2021 protocol = nla_get_u8(tb[NDA_PROTOCOL]);
2022 if (ndm_flags & NTF_PROXY) {
2023 struct pneigh_entry *pn;
2025 if (ndm_flags & NTF_MANAGED) {
2026 NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
2031 pn = pneigh_lookup(tbl, net, dst, dev, 1);
2033 pn->flags = ndm_flags;
2035 pn->protocol = protocol;
2042 NL_SET_ERR_MSG(extack, "Device not specified");
2046 if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
2051 neigh = neigh_lookup(tbl, dst, dev);
2052 if (neigh == NULL) {
2053 bool ndm_permanent = ndm->ndm_state & NUD_PERMANENT;
2054 bool exempt_from_gc = ndm_permanent ||
2055 ndm_flags & NTF_EXT_LEARNED;
2057 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
2061 if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
2062 NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
2067 neigh = ___neigh_create(tbl, dst, dev,
2069 (NTF_EXT_LEARNED | NTF_MANAGED),
2070 exempt_from_gc, true);
2071 if (IS_ERR(neigh)) {
2072 err = PTR_ERR(neigh);
2076 if (nlh->nlmsg_flags & NLM_F_EXCL) {
2078 neigh_release(neigh);
2082 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
2083 flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
2084 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
2088 neigh->protocol = protocol;
2089 if (ndm_flags & NTF_EXT_LEARNED)
2090 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
2091 if (ndm_flags & NTF_ROUTER)
2092 flags |= NEIGH_UPDATE_F_ISROUTER;
2093 if (ndm_flags & NTF_MANAGED)
2094 flags |= NEIGH_UPDATE_F_MANAGED;
2095 if (ndm_flags & NTF_USE)
2096 flags |= NEIGH_UPDATE_F_USE;
2098 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
2099 NETLINK_CB(skb).portid, extack);
2100 if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) {
2101 neigh_event_send(neigh, NULL);
2104 neigh_release(neigh);
2109 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
2111 struct nlattr *nest;
2113 nest = nla_nest_start_noflag(skb, NDTA_PARMS);
2118 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
2119 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
2120 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
2121 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
2122 /* approximative value for deprecated QUEUE_LEN (in packets) */
2123 nla_put_u32(skb, NDTPA_QUEUE_LEN,
2124 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2125 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2126 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2127 nla_put_u32(skb, NDTPA_UCAST_PROBES,
2128 NEIGH_VAR(parms, UCAST_PROBES)) ||
2129 nla_put_u32(skb, NDTPA_MCAST_PROBES,
2130 NEIGH_VAR(parms, MCAST_PROBES)) ||
2131 nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2132 NEIGH_VAR(parms, MCAST_REPROBES)) ||
2133 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
2135 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2136 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2137 nla_put_msecs(skb, NDTPA_GC_STALETIME,
2138 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2139 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2140 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2141 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2142 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2143 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2144 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2145 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2146 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2147 nla_put_msecs(skb, NDTPA_LOCKTIME,
2148 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) ||
2149 nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS,
2150 NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD))
2151 goto nla_put_failure;
2152 return nla_nest_end(skb, nest);
2155 nla_nest_cancel(skb, nest);
2159 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2160 u32 pid, u32 seq, int type, int flags)
2162 struct nlmsghdr *nlh;
2163 struct ndtmsg *ndtmsg;
2165 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2169 ndtmsg = nlmsg_data(nlh);
2171 read_lock_bh(&tbl->lock);
2172 ndtmsg->ndtm_family = tbl->family;
2173 ndtmsg->ndtm_pad1 = 0;
2174 ndtmsg->ndtm_pad2 = 0;
2176 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2177 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
2178 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
2179 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
2180 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
2181 goto nla_put_failure;
2183 unsigned long now = jiffies;
2184 long flush_delta = now - tbl->last_flush;
2185 long rand_delta = now - tbl->last_rand;
2186 struct neigh_hash_table *nht;
2187 struct ndt_config ndc = {
2188 .ndtc_key_len = tbl->key_len,
2189 .ndtc_entry_size = tbl->entry_size,
2190 .ndtc_entries = atomic_read(&tbl->entries),
2191 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
2192 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
2193 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
2197 nht = rcu_dereference_bh(tbl->nht);
2198 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2199 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2200 rcu_read_unlock_bh();
2202 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2203 goto nla_put_failure;
2208 struct ndt_stats ndst;
2210 memset(&ndst, 0, sizeof(ndst));
2212 for_each_possible_cpu(cpu) {
2213 struct neigh_statistics *st;
2215 st = per_cpu_ptr(tbl->stats, cpu);
2216 ndst.ndts_allocs += st->allocs;
2217 ndst.ndts_destroys += st->destroys;
2218 ndst.ndts_hash_grows += st->hash_grows;
2219 ndst.ndts_res_failed += st->res_failed;
2220 ndst.ndts_lookups += st->lookups;
2221 ndst.ndts_hits += st->hits;
2222 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
2223 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
2224 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
2225 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
2226 ndst.ndts_table_fulls += st->table_fulls;
2229 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2231 goto nla_put_failure;
2234 BUG_ON(tbl->parms.dev);
2235 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2236 goto nla_put_failure;
2238 read_unlock_bh(&tbl->lock);
2239 nlmsg_end(skb, nlh);
2243 read_unlock_bh(&tbl->lock);
2244 nlmsg_cancel(skb, nlh);
2248 static int neightbl_fill_param_info(struct sk_buff *skb,
2249 struct neigh_table *tbl,
2250 struct neigh_parms *parms,
2251 u32 pid, u32 seq, int type,
2254 struct ndtmsg *ndtmsg;
2255 struct nlmsghdr *nlh;
2257 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2261 ndtmsg = nlmsg_data(nlh);
2263 read_lock_bh(&tbl->lock);
2264 ndtmsg->ndtm_family = tbl->family;
2265 ndtmsg->ndtm_pad1 = 0;
2266 ndtmsg->ndtm_pad2 = 0;
2268 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2269 neightbl_fill_parms(skb, parms) < 0)
2272 read_unlock_bh(&tbl->lock);
2273 nlmsg_end(skb, nlh);
2276 read_unlock_bh(&tbl->lock);
2277 nlmsg_cancel(skb, nlh);
2281 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2282 [NDTA_NAME] = { .type = NLA_STRING },
2283 [NDTA_THRESH1] = { .type = NLA_U32 },
2284 [NDTA_THRESH2] = { .type = NLA_U32 },
2285 [NDTA_THRESH3] = { .type = NLA_U32 },
2286 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
2287 [NDTA_PARMS] = { .type = NLA_NESTED },
2290 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2291 [NDTPA_IFINDEX] = { .type = NLA_U32 },
2292 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
2293 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
2294 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
2295 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
2296 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
2297 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
2298 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
2299 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
2300 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
2301 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
2302 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
2303 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
2304 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
2305 [NDTPA_INTERVAL_PROBE_TIME_MS] = { .type = NLA_U64, .min = 1 },
2308 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2309 struct netlink_ext_ack *extack)
2311 struct net *net = sock_net(skb->sk);
2312 struct neigh_table *tbl;
2313 struct ndtmsg *ndtmsg;
2314 struct nlattr *tb[NDTA_MAX+1];
2318 err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2319 nl_neightbl_policy, extack);
2323 if (tb[NDTA_NAME] == NULL) {
2328 ndtmsg = nlmsg_data(nlh);
2330 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2331 tbl = neigh_tables[tidx];
2334 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2336 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2346 * We acquire tbl->lock to be nice to the periodic timers and
2347 * make sure they always see a consistent set of values.
2349 write_lock_bh(&tbl->lock);
2351 if (tb[NDTA_PARMS]) {
2352 struct nlattr *tbp[NDTPA_MAX+1];
2353 struct neigh_parms *p;
2356 err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2358 nl_ntbl_parm_policy, extack);
2360 goto errout_tbl_lock;
2362 if (tbp[NDTPA_IFINDEX])
2363 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2365 p = lookup_neigh_parms(tbl, net, ifindex);
2368 goto errout_tbl_lock;
2371 for (i = 1; i <= NDTPA_MAX; i++) {
2376 case NDTPA_QUEUE_LEN:
2377 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2378 nla_get_u32(tbp[i]) *
2379 SKB_TRUESIZE(ETH_FRAME_LEN));
2381 case NDTPA_QUEUE_LENBYTES:
2382 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2383 nla_get_u32(tbp[i]));
2385 case NDTPA_PROXY_QLEN:
2386 NEIGH_VAR_SET(p, PROXY_QLEN,
2387 nla_get_u32(tbp[i]));
2389 case NDTPA_APP_PROBES:
2390 NEIGH_VAR_SET(p, APP_PROBES,
2391 nla_get_u32(tbp[i]));
2393 case NDTPA_UCAST_PROBES:
2394 NEIGH_VAR_SET(p, UCAST_PROBES,
2395 nla_get_u32(tbp[i]));
2397 case NDTPA_MCAST_PROBES:
2398 NEIGH_VAR_SET(p, MCAST_PROBES,
2399 nla_get_u32(tbp[i]));
2401 case NDTPA_MCAST_REPROBES:
2402 NEIGH_VAR_SET(p, MCAST_REPROBES,
2403 nla_get_u32(tbp[i]));
2405 case NDTPA_BASE_REACHABLE_TIME:
2406 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2407 nla_get_msecs(tbp[i]));
2408 /* update reachable_time as well, otherwise, the change will
2409 * only be effective after the next time neigh_periodic_work
2410 * decides to recompute it (can be multiple minutes)
2413 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2415 case NDTPA_GC_STALETIME:
2416 NEIGH_VAR_SET(p, GC_STALETIME,
2417 nla_get_msecs(tbp[i]));
2419 case NDTPA_DELAY_PROBE_TIME:
2420 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2421 nla_get_msecs(tbp[i]));
2422 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2424 case NDTPA_INTERVAL_PROBE_TIME_MS:
2425 NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS,
2426 nla_get_msecs(tbp[i]));
2428 case NDTPA_RETRANS_TIME:
2429 NEIGH_VAR_SET(p, RETRANS_TIME,
2430 nla_get_msecs(tbp[i]));
2432 case NDTPA_ANYCAST_DELAY:
2433 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2434 nla_get_msecs(tbp[i]));
2436 case NDTPA_PROXY_DELAY:
2437 NEIGH_VAR_SET(p, PROXY_DELAY,
2438 nla_get_msecs(tbp[i]));
2440 case NDTPA_LOCKTIME:
2441 NEIGH_VAR_SET(p, LOCKTIME,
2442 nla_get_msecs(tbp[i]));
2449 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2450 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2451 !net_eq(net, &init_net))
2452 goto errout_tbl_lock;
2454 if (tb[NDTA_THRESH1])
2455 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2457 if (tb[NDTA_THRESH2])
2458 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2460 if (tb[NDTA_THRESH3])
2461 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2463 if (tb[NDTA_GC_INTERVAL])
2464 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2469 write_unlock_bh(&tbl->lock);
2474 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2475 struct netlink_ext_ack *extack)
2477 struct ndtmsg *ndtm;
2479 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2480 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2484 ndtm = nlmsg_data(nlh);
2485 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
2486 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2490 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2491 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2498 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2500 const struct nlmsghdr *nlh = cb->nlh;
2501 struct net *net = sock_net(skb->sk);
2502 int family, tidx, nidx = 0;
2503 int tbl_skip = cb->args[0];
2504 int neigh_skip = cb->args[1];
2505 struct neigh_table *tbl;
2507 if (cb->strict_check) {
2508 int err = neightbl_valid_dump_info(nlh, cb->extack);
2514 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2516 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2517 struct neigh_parms *p;
2519 tbl = neigh_tables[tidx];
2523 if (tidx < tbl_skip || (family && tbl->family != family))
2526 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2527 nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2532 p = list_next_entry(&tbl->parms, list);
2533 list_for_each_entry_from(p, &tbl->parms_list, list) {
2534 if (!net_eq(neigh_parms_net(p), net))
2537 if (nidx < neigh_skip)
2540 if (neightbl_fill_param_info(skb, tbl, p,
2541 NETLINK_CB(cb->skb).portid,
2559 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2560 u32 pid, u32 seq, int type, unsigned int flags)
2562 u32 neigh_flags, neigh_flags_ext;
2563 unsigned long now = jiffies;
2564 struct nda_cacheinfo ci;
2565 struct nlmsghdr *nlh;
2568 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2572 neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
2573 neigh_flags = neigh->flags & NTF_OLD_MASK;
2575 ndm = nlmsg_data(nlh);
2576 ndm->ndm_family = neigh->ops->family;
2579 ndm->ndm_flags = neigh_flags;
2580 ndm->ndm_type = neigh->type;
2581 ndm->ndm_ifindex = neigh->dev->ifindex;
2583 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2584 goto nla_put_failure;
2586 read_lock_bh(&neigh->lock);
2587 ndm->ndm_state = neigh->nud_state;
2588 if (neigh->nud_state & NUD_VALID) {
2589 char haddr[MAX_ADDR_LEN];
2591 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2592 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2593 read_unlock_bh(&neigh->lock);
2594 goto nla_put_failure;
2598 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2599 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2600 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2601 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
2602 read_unlock_bh(&neigh->lock);
2604 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2605 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2606 goto nla_put_failure;
2608 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2609 goto nla_put_failure;
2610 if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2611 goto nla_put_failure;
2613 nlmsg_end(skb, nlh);
2617 nlmsg_cancel(skb, nlh);
2621 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2622 u32 pid, u32 seq, int type, unsigned int flags,
2623 struct neigh_table *tbl)
2625 u32 neigh_flags, neigh_flags_ext;
2626 struct nlmsghdr *nlh;
2629 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2633 neigh_flags_ext = pn->flags >> NTF_EXT_SHIFT;
2634 neigh_flags = pn->flags & NTF_OLD_MASK;
2636 ndm = nlmsg_data(nlh);
2637 ndm->ndm_family = tbl->family;
2640 ndm->ndm_flags = neigh_flags | NTF_PROXY;
2641 ndm->ndm_type = RTN_UNICAST;
2642 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2643 ndm->ndm_state = NUD_NONE;
2645 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2646 goto nla_put_failure;
2648 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2649 goto nla_put_failure;
2650 if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2651 goto nla_put_failure;
2653 nlmsg_end(skb, nlh);
2657 nlmsg_cancel(skb, nlh);
2661 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2663 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2664 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2667 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2669 struct net_device *master;
2674 master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2676 /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
2677 * invalid value for ifindex to denote "no master".
2679 if (master_idx == -1)
2682 if (!master || master->ifindex != master_idx)
2688 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2690 if (filter_idx && (!dev || dev->ifindex != filter_idx))
2696 struct neigh_dump_filter {
2701 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2702 struct netlink_callback *cb,
2703 struct neigh_dump_filter *filter)
2705 struct net *net = sock_net(skb->sk);
2706 struct neighbour *n;
2707 int rc, h, s_h = cb->args[1];
2708 int idx, s_idx = idx = cb->args[2];
2709 struct neigh_hash_table *nht;
2710 unsigned int flags = NLM_F_MULTI;
2712 if (filter->dev_idx || filter->master_idx)
2713 flags |= NLM_F_DUMP_FILTERED;
2716 nht = rcu_dereference_bh(tbl->nht);
2718 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2721 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2723 n = rcu_dereference_bh(n->next)) {
2724 if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2726 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2727 neigh_master_filtered(n->dev, filter->master_idx))
2729 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2742 rcu_read_unlock_bh();
2748 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2749 struct netlink_callback *cb,
2750 struct neigh_dump_filter *filter)
2752 struct pneigh_entry *n;
2753 struct net *net = sock_net(skb->sk);
2754 int rc, h, s_h = cb->args[3];
2755 int idx, s_idx = idx = cb->args[4];
2756 unsigned int flags = NLM_F_MULTI;
2758 if (filter->dev_idx || filter->master_idx)
2759 flags |= NLM_F_DUMP_FILTERED;
2761 read_lock_bh(&tbl->lock);
2763 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2766 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2767 if (idx < s_idx || pneigh_net(n) != net)
2769 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2770 neigh_master_filtered(n->dev, filter->master_idx))
2772 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2774 RTM_NEWNEIGH, flags, tbl) < 0) {
2775 read_unlock_bh(&tbl->lock);
2784 read_unlock_bh(&tbl->lock);
2793 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2795 struct neigh_dump_filter *filter,
2796 struct netlink_ext_ack *extack)
2798 struct nlattr *tb[NDA_MAX + 1];
2804 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2805 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2809 ndm = nlmsg_data(nlh);
2810 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
2811 ndm->ndm_state || ndm->ndm_type) {
2812 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2816 if (ndm->ndm_flags & ~NTF_PROXY) {
2817 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2821 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2822 tb, NDA_MAX, nda_policy,
2825 err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2826 NDA_MAX, nda_policy, extack);
2831 for (i = 0; i <= NDA_MAX; ++i) {
2835 /* all new attributes should require strict_check */
2838 filter->dev_idx = nla_get_u32(tb[i]);
2841 filter->master_idx = nla_get_u32(tb[i]);
2845 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2854 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2856 const struct nlmsghdr *nlh = cb->nlh;
2857 struct neigh_dump_filter filter = {};
2858 struct neigh_table *tbl;
2863 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2865 /* check for full ndmsg structure presence, family member is
2866 * the same for both structures
2868 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2869 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2872 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2873 if (err < 0 && cb->strict_check)
2878 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2879 tbl = neigh_tables[t];
2883 if (t < s_t || (family && tbl->family != family))
2886 memset(&cb->args[1], 0, sizeof(cb->args) -
2887 sizeof(cb->args[0]));
2889 err = pneigh_dump_table(tbl, skb, cb, &filter);
2891 err = neigh_dump_table(tbl, skb, cb, &filter);
2900 static int neigh_valid_get_req(const struct nlmsghdr *nlh,
2901 struct neigh_table **tbl,
2902 void **dst, int *dev_idx, u8 *ndm_flags,
2903 struct netlink_ext_ack *extack)
2905 struct nlattr *tb[NDA_MAX + 1];
2909 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2910 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2914 ndm = nlmsg_data(nlh);
2915 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
2917 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2921 if (ndm->ndm_flags & ~NTF_PROXY) {
2922 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2926 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
2927 NDA_MAX, nda_policy, extack);
2931 *ndm_flags = ndm->ndm_flags;
2932 *dev_idx = ndm->ndm_ifindex;
2933 *tbl = neigh_find_table(ndm->ndm_family);
2935 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
2936 return -EAFNOSUPPORT;
2939 for (i = 0; i <= NDA_MAX; ++i) {
2945 if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
2946 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
2949 *dst = nla_data(tb[i]);
2952 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
2960 static inline size_t neigh_nlmsg_size(void)
2962 return NLMSG_ALIGN(sizeof(struct ndmsg))
2963 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2964 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2965 + nla_total_size(sizeof(struct nda_cacheinfo))
2966 + nla_total_size(4) /* NDA_PROBES */
2967 + nla_total_size(4) /* NDA_FLAGS_EXT */
2968 + nla_total_size(1); /* NDA_PROTOCOL */
2971 static int neigh_get_reply(struct net *net, struct neighbour *neigh,
2974 struct sk_buff *skb;
2977 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
2981 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
2987 err = rtnl_unicast(skb, net, pid);
2992 static inline size_t pneigh_nlmsg_size(void)
2994 return NLMSG_ALIGN(sizeof(struct ndmsg))
2995 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2996 + nla_total_size(4) /* NDA_FLAGS_EXT */
2997 + nla_total_size(1); /* NDA_PROTOCOL */
3000 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
3001 u32 pid, u32 seq, struct neigh_table *tbl)
3003 struct sk_buff *skb;
3006 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
3010 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
3016 err = rtnl_unicast(skb, net, pid);
3021 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3022 struct netlink_ext_ack *extack)
3024 struct net *net = sock_net(in_skb->sk);
3025 struct net_device *dev = NULL;
3026 struct neigh_table *tbl = NULL;
3027 struct neighbour *neigh;
3033 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
3039 dev = __dev_get_by_index(net, dev_idx);
3041 NL_SET_ERR_MSG(extack, "Unknown device ifindex");
3047 NL_SET_ERR_MSG(extack, "Network address not specified");
3051 if (ndm_flags & NTF_PROXY) {
3052 struct pneigh_entry *pn;
3054 pn = pneigh_lookup(tbl, net, dst, dev, 0);
3056 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
3059 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
3060 nlh->nlmsg_seq, tbl);
3064 NL_SET_ERR_MSG(extack, "No device specified");
3068 neigh = neigh_lookup(tbl, dst, dev);
3070 NL_SET_ERR_MSG(extack, "Neighbour entry not found");
3074 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
3077 neigh_release(neigh);
3082 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
3085 struct neigh_hash_table *nht;
3088 nht = rcu_dereference_bh(tbl->nht);
3090 read_lock(&tbl->lock); /* avoid resizes */
3091 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3092 struct neighbour *n;
3094 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
3096 n = rcu_dereference_bh(n->next))
3099 read_unlock(&tbl->lock);
3100 rcu_read_unlock_bh();
3102 EXPORT_SYMBOL(neigh_for_each);
3104 /* The tbl->lock must be held as a writer and BH disabled. */
3105 void __neigh_for_each_release(struct neigh_table *tbl,
3106 int (*cb)(struct neighbour *))
3109 struct neigh_hash_table *nht;
3111 nht = rcu_dereference_protected(tbl->nht,
3112 lockdep_is_held(&tbl->lock));
3113 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3114 struct neighbour *n;
3115 struct neighbour __rcu **np;
3117 np = &nht->hash_buckets[chain];
3118 while ((n = rcu_dereference_protected(*np,
3119 lockdep_is_held(&tbl->lock))) != NULL) {
3122 write_lock(&n->lock);
3125 rcu_assign_pointer(*np,
3126 rcu_dereference_protected(n->next,
3127 lockdep_is_held(&tbl->lock)));
3131 write_unlock(&n->lock);
3133 neigh_cleanup_and_release(n);
3137 EXPORT_SYMBOL(__neigh_for_each_release);
3139 int neigh_xmit(int index, struct net_device *dev,
3140 const void *addr, struct sk_buff *skb)
3142 int err = -EAFNOSUPPORT;
3143 if (likely(index < NEIGH_NR_TABLES)) {
3144 struct neigh_table *tbl;
3145 struct neighbour *neigh;
3147 tbl = neigh_tables[index];
3151 if (index == NEIGH_ARP_TABLE) {
3152 u32 key = *((u32 *)addr);
3154 neigh = __ipv4_neigh_lookup_noref(dev, key);
3156 neigh = __neigh_lookup_noref(tbl, addr, dev);
3159 neigh = __neigh_create(tbl, addr, dev, false);
3160 err = PTR_ERR(neigh);
3161 if (IS_ERR(neigh)) {
3162 rcu_read_unlock_bh();
3165 err = neigh->output(neigh, skb);
3166 rcu_read_unlock_bh();
3168 else if (index == NEIGH_LINK_TABLE) {
3169 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3170 addr, NULL, skb->len);
3173 err = dev_queue_xmit(skb);
3181 EXPORT_SYMBOL(neigh_xmit);
3183 #ifdef CONFIG_PROC_FS
3185 static struct neighbour *neigh_get_first(struct seq_file *seq)
3187 struct neigh_seq_state *state = seq->private;
3188 struct net *net = seq_file_net(seq);
3189 struct neigh_hash_table *nht = state->nht;
3190 struct neighbour *n = NULL;
3193 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3194 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
3195 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
3198 if (!net_eq(dev_net(n->dev), net))
3200 if (state->neigh_sub_iter) {
3204 v = state->neigh_sub_iter(state, n, &fakep);
3208 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3210 if (n->nud_state & ~NUD_NOARP)
3213 n = rcu_dereference_bh(n->next);
3219 state->bucket = bucket;
3224 static struct neighbour *neigh_get_next(struct seq_file *seq,
3225 struct neighbour *n,
3228 struct neigh_seq_state *state = seq->private;
3229 struct net *net = seq_file_net(seq);
3230 struct neigh_hash_table *nht = state->nht;
3232 if (state->neigh_sub_iter) {
3233 void *v = state->neigh_sub_iter(state, n, pos);
3237 n = rcu_dereference_bh(n->next);
3241 if (!net_eq(dev_net(n->dev), net))
3243 if (state->neigh_sub_iter) {
3244 void *v = state->neigh_sub_iter(state, n, pos);
3249 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3252 if (n->nud_state & ~NUD_NOARP)
3255 n = rcu_dereference_bh(n->next);
3261 if (++state->bucket >= (1 << nht->hash_shift))
3264 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
3272 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3274 struct neighbour *n = neigh_get_first(seq);
3279 n = neigh_get_next(seq, n, pos);
3284 return *pos ? NULL : n;
3287 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3289 struct neigh_seq_state *state = seq->private;
3290 struct net *net = seq_file_net(seq);
3291 struct neigh_table *tbl = state->tbl;
3292 struct pneigh_entry *pn = NULL;
3295 state->flags |= NEIGH_SEQ_IS_PNEIGH;
3296 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3297 pn = tbl->phash_buckets[bucket];
3298 while (pn && !net_eq(pneigh_net(pn), net))
3303 state->bucket = bucket;
3308 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3309 struct pneigh_entry *pn,
3312 struct neigh_seq_state *state = seq->private;
3313 struct net *net = seq_file_net(seq);
3314 struct neigh_table *tbl = state->tbl;
3318 } while (pn && !net_eq(pneigh_net(pn), net));
3321 if (++state->bucket > PNEIGH_HASHMASK)
3323 pn = tbl->phash_buckets[state->bucket];
3324 while (pn && !net_eq(pneigh_net(pn), net))
3336 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3338 struct pneigh_entry *pn = pneigh_get_first(seq);
3343 pn = pneigh_get_next(seq, pn, pos);
3348 return *pos ? NULL : pn;
3351 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3353 struct neigh_seq_state *state = seq->private;
3355 loff_t idxpos = *pos;
3357 rc = neigh_get_idx(seq, &idxpos);
3358 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3359 rc = pneigh_get_idx(seq, &idxpos);
3364 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3365 __acquires(tbl->lock)
3368 struct neigh_seq_state *state = seq->private;
3372 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3375 state->nht = rcu_dereference_bh(tbl->nht);
3376 read_lock(&tbl->lock);
3378 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3380 EXPORT_SYMBOL(neigh_seq_start);
3382 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3384 struct neigh_seq_state *state;
3387 if (v == SEQ_START_TOKEN) {
3388 rc = neigh_get_first(seq);
3392 state = seq->private;
3393 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3394 rc = neigh_get_next(seq, v, NULL);
3397 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3398 rc = pneigh_get_first(seq);
3400 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3401 rc = pneigh_get_next(seq, v, NULL);
3407 EXPORT_SYMBOL(neigh_seq_next);
3409 void neigh_seq_stop(struct seq_file *seq, void *v)
3410 __releases(tbl->lock)
3413 struct neigh_seq_state *state = seq->private;
3414 struct neigh_table *tbl = state->tbl;
3416 read_unlock(&tbl->lock);
3417 rcu_read_unlock_bh();
3419 EXPORT_SYMBOL(neigh_seq_stop);
3421 /* statistics via seq_file */
3423 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3425 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3429 return SEQ_START_TOKEN;
3431 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3432 if (!cpu_possible(cpu))
3435 return per_cpu_ptr(tbl->stats, cpu);
3440 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3442 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3445 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3446 if (!cpu_possible(cpu))
3449 return per_cpu_ptr(tbl->stats, cpu);
3455 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3460 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3462 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3463 struct neigh_statistics *st = v;
3465 if (v == SEQ_START_TOKEN) {
3466 seq_puts(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3470 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
3471 "%08lx %08lx %08lx "
3472 "%08lx %08lx %08lx\n",
3473 atomic_read(&tbl->entries),
3484 st->rcv_probes_mcast,
3485 st->rcv_probes_ucast,
3487 st->periodic_gc_runs,
3496 static const struct seq_operations neigh_stat_seq_ops = {
3497 .start = neigh_stat_seq_start,
3498 .next = neigh_stat_seq_next,
3499 .stop = neigh_stat_seq_stop,
3500 .show = neigh_stat_seq_show,
3502 #endif /* CONFIG_PROC_FS */
3504 static void __neigh_notify(struct neighbour *n, int type, int flags,
3507 struct net *net = dev_net(n->dev);
3508 struct sk_buff *skb;
3511 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3515 err = neigh_fill_info(skb, n, pid, 0, type, flags);
3517 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3518 WARN_ON(err == -EMSGSIZE);
3522 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3526 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3529 void neigh_app_ns(struct neighbour *n)
3531 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3533 EXPORT_SYMBOL(neigh_app_ns);
3535 #ifdef CONFIG_SYSCTL
3536 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3538 static int proc_unres_qlen(struct ctl_table *ctl, int write,
3539 void *buffer, size_t *lenp, loff_t *ppos)
3542 struct ctl_table tmp = *ctl;
3544 tmp.extra1 = SYSCTL_ZERO;
3545 tmp.extra2 = &unres_qlen_max;
3548 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3549 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3552 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3556 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3559 struct net_device *dev;
3560 int family = neigh_parms_family(p);
3563 for_each_netdev_rcu(net, dev) {
3564 struct neigh_parms *dst_p =
3565 neigh_get_dev_parms_rcu(dev, family);
3567 if (dst_p && !test_bit(index, dst_p->data_state))
3568 dst_p->data[index] = p->data[index];
3573 static void neigh_proc_update(struct ctl_table *ctl, int write)
3575 struct net_device *dev = ctl->extra1;
3576 struct neigh_parms *p = ctl->extra2;
3577 struct net *net = neigh_parms_net(p);
3578 int index = (int *) ctl->data - p->data;
3583 set_bit(index, p->data_state);
3584 if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3585 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3586 if (!dev) /* NULL dev means this is default value */
3587 neigh_copy_dflt_parms(net, p, index);
3590 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3591 void *buffer, size_t *lenp,
3594 struct ctl_table tmp = *ctl;
3597 tmp.extra1 = SYSCTL_ZERO;
3598 tmp.extra2 = SYSCTL_INT_MAX;
3600 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3601 neigh_proc_update(ctl, write);
3605 static int neigh_proc_dointvec_ms_jiffies_positive(struct ctl_table *ctl, int write,
3606 void *buffer, size_t *lenp, loff_t *ppos)
3608 struct ctl_table tmp = *ctl;
3611 int min = msecs_to_jiffies(1);
3616 ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos);
3617 neigh_proc_update(ctl, write);
3621 int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer,
3622 size_t *lenp, loff_t *ppos)
3624 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3626 neigh_proc_update(ctl, write);
3629 EXPORT_SYMBOL(neigh_proc_dointvec);
3631 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer,
3632 size_t *lenp, loff_t *ppos)
3634 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3636 neigh_proc_update(ctl, write);
3639 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3641 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3642 void *buffer, size_t *lenp,
3645 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3647 neigh_proc_update(ctl, write);
3651 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3652 void *buffer, size_t *lenp, loff_t *ppos)
3654 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3656 neigh_proc_update(ctl, write);
3659 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3661 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3662 void *buffer, size_t *lenp,
3665 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3667 neigh_proc_update(ctl, write);
3671 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3672 void *buffer, size_t *lenp,
3675 struct neigh_parms *p = ctl->extra2;
3678 if (strcmp(ctl->procname, "base_reachable_time") == 0)
3679 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3680 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3681 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3685 if (write && ret == 0) {
3686 /* update reachable_time as well, otherwise, the change will
3687 * only be effective after the next time neigh_periodic_work
3688 * decides to recompute it
3691 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3696 #define NEIGH_PARMS_DATA_OFFSET(index) \
3697 (&((struct neigh_parms *) 0)->data[index])
3699 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3700 [NEIGH_VAR_ ## attr] = { \
3702 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3703 .maxlen = sizeof(int), \
3705 .proc_handler = proc, \
3708 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3709 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3711 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3712 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3714 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3715 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3717 #define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
3718 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)
3720 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3721 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3723 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3724 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3726 static struct neigh_sysctl_table {
3727 struct ctl_table_header *sysctl_header;
3728 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3729 } neigh_sysctl_template __read_mostly = {
3731 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3732 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3733 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3734 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3735 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3736 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3737 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3738 NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS,
3739 "interval_probe_time_ms"),
3740 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3741 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3742 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3743 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3744 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3745 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3746 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3747 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3748 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3749 [NEIGH_VAR_GC_INTERVAL] = {
3750 .procname = "gc_interval",
3751 .maxlen = sizeof(int),
3753 .proc_handler = proc_dointvec_jiffies,
3755 [NEIGH_VAR_GC_THRESH1] = {
3756 .procname = "gc_thresh1",
3757 .maxlen = sizeof(int),
3759 .extra1 = SYSCTL_ZERO,
3760 .extra2 = SYSCTL_INT_MAX,
3761 .proc_handler = proc_dointvec_minmax,
3763 [NEIGH_VAR_GC_THRESH2] = {
3764 .procname = "gc_thresh2",
3765 .maxlen = sizeof(int),
3767 .extra1 = SYSCTL_ZERO,
3768 .extra2 = SYSCTL_INT_MAX,
3769 .proc_handler = proc_dointvec_minmax,
3771 [NEIGH_VAR_GC_THRESH3] = {
3772 .procname = "gc_thresh3",
3773 .maxlen = sizeof(int),
3775 .extra1 = SYSCTL_ZERO,
3776 .extra2 = SYSCTL_INT_MAX,
3777 .proc_handler = proc_dointvec_minmax,
3783 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3784 proc_handler *handler)
3787 struct neigh_sysctl_table *t;
3788 const char *dev_name_source;
3789 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3792 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
3796 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3797 t->neigh_vars[i].data += (long) p;
3798 t->neigh_vars[i].extra1 = dev;
3799 t->neigh_vars[i].extra2 = p;
3803 dev_name_source = dev->name;
3804 /* Terminate the table early */
3805 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3806 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3808 struct neigh_table *tbl = p->tbl;
3809 dev_name_source = "default";
3810 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3811 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3812 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3813 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3818 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3820 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3821 /* RetransTime (in milliseconds)*/
3822 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3823 /* ReachableTime (in milliseconds) */
3824 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3826 /* Those handlers will update p->reachable_time after
3827 * base_reachable_time(_ms) is set to ensure the new timer starts being
3828 * applied after the next neighbour update instead of waiting for
3829 * neigh_periodic_work to update its value (can be multiple minutes)
3830 * So any handler that replaces them should do this as well
3833 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3834 neigh_proc_base_reachable_time;
3835 /* ReachableTime (in milliseconds) */
3836 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3837 neigh_proc_base_reachable_time;
3840 switch (neigh_parms_family(p)) {
3851 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3852 p_name, dev_name_source);
3854 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3855 if (!t->sysctl_header)
3858 p->sysctl_table = t;
3866 EXPORT_SYMBOL(neigh_sysctl_register);
3868 void neigh_sysctl_unregister(struct neigh_parms *p)
3870 if (p->sysctl_table) {
3871 struct neigh_sysctl_table *t = p->sysctl_table;
3872 p->sysctl_table = NULL;
3873 unregister_net_sysctl_table(t->sysctl_header);
3877 EXPORT_SYMBOL(neigh_sysctl_unregister);
3879 #endif /* CONFIG_SYSCTL */
3881 static int __init neigh_init(void)
3883 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3884 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3885 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
3887 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3889 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3894 subsys_initcall(neigh_init);