2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
55 #include <linux/sysctl.h>
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
68 #include "fib_lookup.h"
70 static struct ipv4_devconf ipv4_devconf = {
72 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
81 static struct ipv4_devconf ipv4_devconf_dflt = {
83 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97 [IFA_LOCAL] = { .type = NLA_U32 },
98 [IFA_ADDRESS] = { .type = NLA_U32 },
99 [IFA_BROADCAST] = { .type = NLA_U32 },
100 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
102 [IFA_FLAGS] = { .type = NLA_U32 },
105 #define IN4_ADDR_HSIZE_SHIFT 8
106 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
110 static u32 inet_addr_hash(const struct net *net, __be32 addr)
112 u32 val = (__force u32) addr ^ net_hash_mix(net);
114 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
119 u32 hash = inet_addr_hash(net, ifa->ifa_local);
122 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
125 static void inet_hash_remove(struct in_ifaddr *ifa)
128 hlist_del_init_rcu(&ifa->hash);
132 * __ip_dev_find - find the first device with a given source address.
133 * @net: the net namespace
134 * @addr: the source address
135 * @devref: if true, take a reference on the found device
137 * If a caller uses devref=false, it should be protected by RCU, or RTNL
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
141 u32 hash = inet_addr_hash(net, addr);
142 struct net_device *result = NULL;
143 struct in_ifaddr *ifa;
146 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
147 if (ifa->ifa_local == addr) {
148 struct net_device *dev = ifa->ifa_dev->dev;
150 if (!net_eq(dev_net(dev), net))
157 struct flowi4 fl4 = { .daddr = addr };
158 struct fib_result res = { 0 };
159 struct fib_table *local;
161 /* Fallback to FIB local table so that communication
162 * over loopback subnets work.
164 local = fib_get_table(net, RT_TABLE_LOCAL);
166 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167 res.type == RTN_LOCAL)
168 result = FIB_RES_DEV(res);
170 if (result && devref)
175 EXPORT_SYMBOL(__ip_dev_find);
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
183 static int devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
186 static int devinet_sysctl_register(struct in_device *idev)
190 static void devinet_sysctl_unregister(struct in_device *idev)
195 /* Locks all the inet devices. */
197 static struct in_ifaddr *inet_alloc_ifa(void)
199 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
202 static void inet_rcu_free_ifa(struct rcu_head *head)
204 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
206 in_dev_put(ifa->ifa_dev);
210 static void inet_free_ifa(struct in_ifaddr *ifa)
212 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
215 void in_dev_finish_destroy(struct in_device *idev)
217 struct net_device *dev = idev->dev;
219 WARN_ON(idev->ifa_list);
220 WARN_ON(idev->mc_list);
221 kfree(rcu_dereference_protected(idev->mc_hash, 1));
222 #ifdef NET_REFCNT_DEBUG
223 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
227 pr_err("Freeing alive in_device %p\n", idev);
231 EXPORT_SYMBOL(in_dev_finish_destroy);
233 static struct in_device *inetdev_init(struct net_device *dev)
235 struct in_device *in_dev;
240 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
243 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244 sizeof(in_dev->cnf));
245 in_dev->cnf.sysctl = NULL;
247 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248 if (!in_dev->arp_parms)
250 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251 dev_disable_lro(dev);
252 /* Reference in_dev->dev */
254 /* Account for reference dev->ip_ptr (below) */
257 err = devinet_sysctl_register(in_dev);
264 ip_mc_init_dev(in_dev);
265 if (dev->flags & IFF_UP)
268 /* we can receive as soon as ip_ptr is set -- do this last */
269 rcu_assign_pointer(dev->ip_ptr, in_dev);
271 return in_dev ?: ERR_PTR(err);
278 static void in_dev_rcu_put(struct rcu_head *head)
280 struct in_device *idev = container_of(head, struct in_device, rcu_head);
284 static void inetdev_destroy(struct in_device *in_dev)
286 struct in_ifaddr *ifa;
287 struct net_device *dev;
295 ip_mc_destroy_dev(in_dev);
297 while ((ifa = in_dev->ifa_list) != NULL) {
298 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
302 RCU_INIT_POINTER(dev->ip_ptr, NULL);
304 devinet_sysctl_unregister(in_dev);
305 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
308 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
311 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
314 for_primary_ifa(in_dev) {
315 if (inet_ifa_match(a, ifa)) {
316 if (!b || inet_ifa_match(b, ifa)) {
321 } endfor_ifa(in_dev);
326 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327 int destroy, struct nlmsghdr *nlh, u32 portid)
329 struct in_ifaddr *promote = NULL;
330 struct in_ifaddr *ifa, *ifa1 = *ifap;
331 struct in_ifaddr *last_prim = in_dev->ifa_list;
332 struct in_ifaddr *prev_prom = NULL;
333 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
337 /* 1. Deleting primary ifaddr forces deletion all secondaries
338 * unless alias promotion is set
341 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
342 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
344 while ((ifa = *ifap1) != NULL) {
345 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
346 ifa1->ifa_scope <= ifa->ifa_scope)
349 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
350 ifa1->ifa_mask != ifa->ifa_mask ||
351 !inet_ifa_match(ifa1->ifa_address, ifa)) {
352 ifap1 = &ifa->ifa_next;
358 inet_hash_remove(ifa);
359 *ifap1 = ifa->ifa_next;
361 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
362 blocking_notifier_call_chain(&inetaddr_chain,
372 /* On promotion all secondaries from subnet are changing
373 * the primary IP, we must remove all their routes silently
374 * and later to add them back with new prefsrc. Do this
375 * while all addresses are on the device list.
377 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
378 if (ifa1->ifa_mask == ifa->ifa_mask &&
379 inet_ifa_match(ifa1->ifa_address, ifa))
380 fib_del_ifaddr(ifa, ifa1);
385 *ifap = ifa1->ifa_next;
386 inet_hash_remove(ifa1);
388 /* 3. Announce address deletion */
390 /* Send message first, then call notifier.
391 At first sight, FIB update triggered by notifier
392 will refer to already deleted ifaddr, that could confuse
393 netlink listeners. It is not true: look, gated sees
394 that route deleted and if it still thinks that ifaddr
395 is valid, it will try to restore deleted routes... Grr.
396 So that, this order is correct.
398 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
399 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
402 struct in_ifaddr *next_sec = promote->ifa_next;
405 prev_prom->ifa_next = promote->ifa_next;
406 promote->ifa_next = last_prim->ifa_next;
407 last_prim->ifa_next = promote;
410 promote->ifa_flags &= ~IFA_F_SECONDARY;
411 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
412 blocking_notifier_call_chain(&inetaddr_chain,
414 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
415 if (ifa1->ifa_mask != ifa->ifa_mask ||
416 !inet_ifa_match(ifa1->ifa_address, ifa))
426 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
429 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
432 static void check_lifetime(struct work_struct *work);
434 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
436 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
439 struct in_device *in_dev = ifa->ifa_dev;
440 struct in_ifaddr *ifa1, **ifap, **last_primary;
444 if (!ifa->ifa_local) {
449 ifa->ifa_flags &= ~IFA_F_SECONDARY;
450 last_primary = &in_dev->ifa_list;
452 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
453 ifap = &ifa1->ifa_next) {
454 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
455 ifa->ifa_scope <= ifa1->ifa_scope)
456 last_primary = &ifa1->ifa_next;
457 if (ifa1->ifa_mask == ifa->ifa_mask &&
458 inet_ifa_match(ifa1->ifa_address, ifa)) {
459 if (ifa1->ifa_local == ifa->ifa_local) {
463 if (ifa1->ifa_scope != ifa->ifa_scope) {
467 ifa->ifa_flags |= IFA_F_SECONDARY;
471 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
472 prandom_seed((__force u32) ifa->ifa_local);
476 ifa->ifa_next = *ifap;
479 inet_hash_insert(dev_net(in_dev->dev), ifa);
481 cancel_delayed_work(&check_lifetime_work);
482 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
484 /* Send message first, then call notifier.
485 Notifier will trigger FIB update, so that
486 listeners of netlink will know about new ifaddr */
487 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
488 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
493 static int inet_insert_ifa(struct in_ifaddr *ifa)
495 return __inet_insert_ifa(ifa, NULL, 0);
498 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
500 struct in_device *in_dev = __in_dev_get_rtnl(dev);
508 ipv4_devconf_setall(in_dev);
509 neigh_parms_data_state_setall(in_dev->arp_parms);
510 if (ifa->ifa_dev != in_dev) {
511 WARN_ON(ifa->ifa_dev);
513 ifa->ifa_dev = in_dev;
515 if (ipv4_is_loopback(ifa->ifa_local))
516 ifa->ifa_scope = RT_SCOPE_HOST;
517 return inet_insert_ifa(ifa);
520 /* Caller must hold RCU or RTNL :
521 * We dont take a reference on found in_device
523 struct in_device *inetdev_by_index(struct net *net, int ifindex)
525 struct net_device *dev;
526 struct in_device *in_dev = NULL;
529 dev = dev_get_by_index_rcu(net, ifindex);
531 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
535 EXPORT_SYMBOL(inetdev_by_index);
537 /* Called only from RTNL semaphored context. No locks. */
539 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
544 for_primary_ifa(in_dev) {
545 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
547 } endfor_ifa(in_dev);
551 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
553 struct ip_mreqn mreq = {
554 .imr_multiaddr.s_addr = ifa->ifa_address,
555 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
563 ret = ip_mc_join_group(sk, &mreq);
565 ret = ip_mc_leave_group(sk, &mreq);
571 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
573 struct net *net = sock_net(skb->sk);
574 struct nlattr *tb[IFA_MAX+1];
575 struct in_device *in_dev;
576 struct ifaddrmsg *ifm;
577 struct in_ifaddr *ifa, **ifap;
582 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
586 ifm = nlmsg_data(nlh);
587 in_dev = inetdev_by_index(net, ifm->ifa_index);
593 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
594 ifap = &ifa->ifa_next) {
596 ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
599 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
602 if (tb[IFA_ADDRESS] &&
603 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
604 !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
607 if (ipv4_is_multicast(ifa->ifa_address))
608 ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
609 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
613 err = -EADDRNOTAVAIL;
618 #define INFINITY_LIFE_TIME 0xFFFFFFFF
620 static void check_lifetime(struct work_struct *work)
622 unsigned long now, next, next_sec, next_sched;
623 struct in_ifaddr *ifa;
624 struct hlist_node *n;
628 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
630 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
631 bool change_needed = false;
634 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
637 if (ifa->ifa_flags & IFA_F_PERMANENT)
640 /* We try to batch several events at once. */
641 age = (now - ifa->ifa_tstamp +
642 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
644 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
645 age >= ifa->ifa_valid_lft) {
646 change_needed = true;
647 } else if (ifa->ifa_preferred_lft ==
648 INFINITY_LIFE_TIME) {
650 } else if (age >= ifa->ifa_preferred_lft) {
651 if (time_before(ifa->ifa_tstamp +
652 ifa->ifa_valid_lft * HZ, next))
653 next = ifa->ifa_tstamp +
654 ifa->ifa_valid_lft * HZ;
656 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
657 change_needed = true;
658 } else if (time_before(ifa->ifa_tstamp +
659 ifa->ifa_preferred_lft * HZ,
661 next = ifa->ifa_tstamp +
662 ifa->ifa_preferred_lft * HZ;
669 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
672 if (ifa->ifa_flags & IFA_F_PERMANENT)
675 /* We try to batch several events at once. */
676 age = (now - ifa->ifa_tstamp +
677 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
679 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
680 age >= ifa->ifa_valid_lft) {
681 struct in_ifaddr **ifap;
683 for (ifap = &ifa->ifa_dev->ifa_list;
684 *ifap != NULL; ifap = &(*ifap)->ifa_next) {
686 inet_del_ifa(ifa->ifa_dev,
691 } else if (ifa->ifa_preferred_lft !=
692 INFINITY_LIFE_TIME &&
693 age >= ifa->ifa_preferred_lft &&
694 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
695 ifa->ifa_flags |= IFA_F_DEPRECATED;
696 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
702 next_sec = round_jiffies_up(next);
705 /* If rounded timeout is accurate enough, accept it. */
706 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
707 next_sched = next_sec;
710 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
711 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
712 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
714 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
718 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
721 unsigned long timeout;
723 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
725 timeout = addrconf_timeout_fixup(valid_lft, HZ);
726 if (addrconf_finite_timeout(timeout))
727 ifa->ifa_valid_lft = timeout;
729 ifa->ifa_flags |= IFA_F_PERMANENT;
731 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
732 if (addrconf_finite_timeout(timeout)) {
734 ifa->ifa_flags |= IFA_F_DEPRECATED;
735 ifa->ifa_preferred_lft = timeout;
737 ifa->ifa_tstamp = jiffies;
738 if (!ifa->ifa_cstamp)
739 ifa->ifa_cstamp = ifa->ifa_tstamp;
742 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
743 __u32 *pvalid_lft, __u32 *pprefered_lft)
745 struct nlattr *tb[IFA_MAX+1];
746 struct in_ifaddr *ifa;
747 struct ifaddrmsg *ifm;
748 struct net_device *dev;
749 struct in_device *in_dev;
752 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
756 ifm = nlmsg_data(nlh);
758 if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
761 dev = __dev_get_by_index(net, ifm->ifa_index);
766 in_dev = __in_dev_get_rtnl(dev);
771 ifa = inet_alloc_ifa();
774 * A potential indev allocation can be left alive, it stays
775 * assigned to its device and is destroy with it.
779 ipv4_devconf_setall(in_dev);
780 neigh_parms_data_state_setall(in_dev->arp_parms);
783 if (!tb[IFA_ADDRESS])
784 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
786 INIT_HLIST_NODE(&ifa->hash);
787 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
788 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
789 ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
791 ifa->ifa_scope = ifm->ifa_scope;
792 ifa->ifa_dev = in_dev;
794 ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
795 ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
797 if (tb[IFA_BROADCAST])
798 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
801 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
803 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
805 if (tb[IFA_CACHEINFO]) {
806 struct ifa_cacheinfo *ci;
808 ci = nla_data(tb[IFA_CACHEINFO]);
809 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
813 *pvalid_lft = ci->ifa_valid;
814 *pprefered_lft = ci->ifa_prefered;
825 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
827 struct in_device *in_dev = ifa->ifa_dev;
828 struct in_ifaddr *ifa1, **ifap;
833 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
834 ifap = &ifa1->ifa_next) {
835 if (ifa1->ifa_mask == ifa->ifa_mask &&
836 inet_ifa_match(ifa1->ifa_address, ifa) &&
837 ifa1->ifa_local == ifa->ifa_local)
843 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
845 struct net *net = sock_net(skb->sk);
846 struct in_ifaddr *ifa;
847 struct in_ifaddr *ifa_existing;
848 __u32 valid_lft = INFINITY_LIFE_TIME;
849 __u32 prefered_lft = INFINITY_LIFE_TIME;
853 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
857 ifa_existing = find_matching_ifa(ifa);
859 /* It would be best to check for !NLM_F_CREATE here but
860 * userspace already relies on not having to provide this.
862 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
863 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
864 int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
872 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
876 if (nlh->nlmsg_flags & NLM_F_EXCL ||
877 !(nlh->nlmsg_flags & NLM_F_REPLACE))
880 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
881 cancel_delayed_work(&check_lifetime_work);
882 queue_delayed_work(system_power_efficient_wq,
883 &check_lifetime_work, 0);
884 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
885 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
891 * Determine a default network mask, based on the IP address.
894 static int inet_abc_len(__be32 addr)
896 int rc = -1; /* Something else, probably a multicast. */
898 if (ipv4_is_zeronet(addr))
901 __u32 haddr = ntohl(addr);
903 if (IN_CLASSA(haddr))
905 else if (IN_CLASSB(haddr))
907 else if (IN_CLASSC(haddr))
915 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
918 struct sockaddr_in sin_orig;
919 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
920 struct in_device *in_dev;
921 struct in_ifaddr **ifap = NULL;
922 struct in_ifaddr *ifa = NULL;
923 struct net_device *dev;
926 int tryaddrmatch = 0;
929 * Fetch the caller's info block into kernel space
932 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
934 ifr.ifr_name[IFNAMSIZ - 1] = 0;
936 /* save original address for comparison */
937 memcpy(&sin_orig, sin, sizeof(*sin));
939 colon = strchr(ifr.ifr_name, ':');
943 dev_load(net, ifr.ifr_name);
946 case SIOCGIFADDR: /* Get interface address */
947 case SIOCGIFBRDADDR: /* Get the broadcast address */
948 case SIOCGIFDSTADDR: /* Get the destination address */
949 case SIOCGIFNETMASK: /* Get the netmask for the interface */
950 /* Note that these ioctls will not sleep,
951 so that we do not impose a lock.
952 One day we will be forced to put shlock here (I mean SMP)
954 tryaddrmatch = (sin_orig.sin_family == AF_INET);
955 memset(sin, 0, sizeof(*sin));
956 sin->sin_family = AF_INET;
961 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
964 case SIOCSIFADDR: /* Set interface address (and family) */
965 case SIOCSIFBRDADDR: /* Set the broadcast address */
966 case SIOCSIFDSTADDR: /* Set the destination address */
967 case SIOCSIFNETMASK: /* Set the netmask for the interface */
969 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
972 if (sin->sin_family != AF_INET)
983 dev = __dev_get_by_name(net, ifr.ifr_name);
990 in_dev = __in_dev_get_rtnl(dev);
993 /* Matthias Andree */
994 /* compare label and address (4.4BSD style) */
995 /* note: we only do this for a limited set of ioctls
996 and only if the original address family was AF_INET.
997 This is checked above. */
998 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
999 ifap = &ifa->ifa_next) {
1000 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1001 sin_orig.sin_addr.s_addr ==
1007 /* we didn't get a match, maybe the application is
1008 4.3BSD-style and passed in junk so we fall back to
1009 comparing just the label */
1011 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1012 ifap = &ifa->ifa_next)
1013 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1018 ret = -EADDRNOTAVAIL;
1019 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1023 case SIOCGIFADDR: /* Get interface address */
1024 sin->sin_addr.s_addr = ifa->ifa_local;
1027 case SIOCGIFBRDADDR: /* Get the broadcast address */
1028 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1031 case SIOCGIFDSTADDR: /* Get the destination address */
1032 sin->sin_addr.s_addr = ifa->ifa_address;
1035 case SIOCGIFNETMASK: /* Get the netmask for the interface */
1036 sin->sin_addr.s_addr = ifa->ifa_mask;
1041 ret = -EADDRNOTAVAIL;
1045 if (!(ifr.ifr_flags & IFF_UP))
1046 inet_del_ifa(in_dev, ifap, 1);
1049 ret = dev_change_flags(dev, ifr.ifr_flags);
1052 case SIOCSIFADDR: /* Set interface address (and family) */
1054 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1059 ifa = inet_alloc_ifa();
1062 INIT_HLIST_NODE(&ifa->hash);
1064 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1066 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1069 if (ifa->ifa_local == sin->sin_addr.s_addr)
1071 inet_del_ifa(in_dev, ifap, 0);
1072 ifa->ifa_broadcast = 0;
1076 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1078 if (!(dev->flags & IFF_POINTOPOINT)) {
1079 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1080 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1081 if ((dev->flags & IFF_BROADCAST) &&
1082 ifa->ifa_prefixlen < 31)
1083 ifa->ifa_broadcast = ifa->ifa_address |
1086 ifa->ifa_prefixlen = 32;
1087 ifa->ifa_mask = inet_make_mask(32);
1089 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1090 ret = inet_set_ifa(dev, ifa);
1093 case SIOCSIFBRDADDR: /* Set the broadcast address */
1095 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1096 inet_del_ifa(in_dev, ifap, 0);
1097 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1098 inet_insert_ifa(ifa);
1102 case SIOCSIFDSTADDR: /* Set the destination address */
1104 if (ifa->ifa_address == sin->sin_addr.s_addr)
1107 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1110 inet_del_ifa(in_dev, ifap, 0);
1111 ifa->ifa_address = sin->sin_addr.s_addr;
1112 inet_insert_ifa(ifa);
1115 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1118 * The mask we set must be legal.
1121 if (bad_mask(sin->sin_addr.s_addr, 0))
1124 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1125 __be32 old_mask = ifa->ifa_mask;
1126 inet_del_ifa(in_dev, ifap, 0);
1127 ifa->ifa_mask = sin->sin_addr.s_addr;
1128 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1130 /* See if current broadcast address matches
1131 * with current netmask, then recalculate
1132 * the broadcast address. Otherwise it's a
1133 * funny address, so don't touch it since
1134 * the user seems to know what (s)he's doing...
1136 if ((dev->flags & IFF_BROADCAST) &&
1137 (ifa->ifa_prefixlen < 31) &&
1138 (ifa->ifa_broadcast ==
1139 (ifa->ifa_local|~old_mask))) {
1140 ifa->ifa_broadcast = (ifa->ifa_local |
1141 ~sin->sin_addr.s_addr);
1143 inet_insert_ifa(ifa);
1153 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1157 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1159 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1160 struct in_ifaddr *ifa;
1167 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1169 done += sizeof(ifr);
1172 if (len < (int) sizeof(ifr))
1174 memset(&ifr, 0, sizeof(struct ifreq));
1175 strcpy(ifr.ifr_name, ifa->ifa_label);
1177 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1178 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1181 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1185 buf += sizeof(struct ifreq);
1186 len -= sizeof(struct ifreq);
1187 done += sizeof(struct ifreq);
1193 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1196 struct in_device *in_dev;
1197 struct net *net = dev_net(dev);
1200 in_dev = __in_dev_get_rcu(dev);
1204 for_primary_ifa(in_dev) {
1205 if (ifa->ifa_scope > scope)
1207 if (!dst || inet_ifa_match(dst, ifa)) {
1208 addr = ifa->ifa_local;
1212 addr = ifa->ifa_local;
1213 } endfor_ifa(in_dev);
1219 /* Not loopback addresses on loopback should be preferred
1220 in this case. It is important that lo is the first interface
1223 for_each_netdev_rcu(net, dev) {
1224 in_dev = __in_dev_get_rcu(dev);
1228 for_primary_ifa(in_dev) {
1229 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1230 ifa->ifa_scope <= scope) {
1231 addr = ifa->ifa_local;
1234 } endfor_ifa(in_dev);
1240 EXPORT_SYMBOL(inet_select_addr);
1242 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1243 __be32 local, int scope)
1250 (local == ifa->ifa_local || !local) &&
1251 ifa->ifa_scope <= scope) {
1252 addr = ifa->ifa_local;
1257 same = (!local || inet_ifa_match(local, ifa)) &&
1258 (!dst || inet_ifa_match(dst, ifa));
1262 /* Is the selected addr into dst subnet? */
1263 if (inet_ifa_match(addr, ifa))
1265 /* No, then can we use new local src? */
1266 if (ifa->ifa_scope <= scope) {
1267 addr = ifa->ifa_local;
1270 /* search for large dst subnet for addr */
1274 } endfor_ifa(in_dev);
1276 return same ? addr : 0;
1280 * Confirm that local IP address exists using wildcards:
1281 * - net: netns to check, cannot be NULL
1282 * - in_dev: only on this interface, NULL=any interface
1283 * - dst: only in the same subnet as dst, 0=any dst
1284 * - local: address, 0=autoselect the local address
1285 * - scope: maximum allowed scope value for the local address
1287 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1288 __be32 dst, __be32 local, int scope)
1291 struct net_device *dev;
1294 return confirm_addr_indev(in_dev, dst, local, scope);
1297 for_each_netdev_rcu(net, dev) {
1298 in_dev = __in_dev_get_rcu(dev);
1300 addr = confirm_addr_indev(in_dev, dst, local, scope);
1309 EXPORT_SYMBOL(inet_confirm_addr);
1315 int register_inetaddr_notifier(struct notifier_block *nb)
1317 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1319 EXPORT_SYMBOL(register_inetaddr_notifier);
1321 int unregister_inetaddr_notifier(struct notifier_block *nb)
1323 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1325 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1327 /* Rename ifa_labels for a device name change. Make some effort to preserve
1328 * existing alias numbering and to create unique labels if possible.
1330 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1332 struct in_ifaddr *ifa;
1335 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1336 char old[IFNAMSIZ], *dot;
1338 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1339 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1342 dot = strchr(old, ':');
1344 sprintf(old, ":%d", named);
1347 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1348 strcat(ifa->ifa_label, dot);
1350 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1352 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1356 static bool inetdev_valid_mtu(unsigned int mtu)
1361 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1362 struct in_device *in_dev)
1365 struct in_ifaddr *ifa;
1367 for (ifa = in_dev->ifa_list; ifa;
1368 ifa = ifa->ifa_next) {
1369 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1370 ifa->ifa_local, dev,
1371 ifa->ifa_local, NULL,
1372 dev->dev_addr, NULL);
1376 /* Called only under RTNL semaphore */
1378 static int inetdev_event(struct notifier_block *this, unsigned long event,
1381 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1382 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1387 if (event == NETDEV_REGISTER) {
1388 in_dev = inetdev_init(dev);
1390 return notifier_from_errno(PTR_ERR(in_dev));
1391 if (dev->flags & IFF_LOOPBACK) {
1392 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1393 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1395 } else if (event == NETDEV_CHANGEMTU) {
1396 /* Re-enabling IP */
1397 if (inetdev_valid_mtu(dev->mtu))
1398 in_dev = inetdev_init(dev);
1404 case NETDEV_REGISTER:
1405 pr_debug("%s: bug\n", __func__);
1406 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1409 if (!inetdev_valid_mtu(dev->mtu))
1411 if (dev->flags & IFF_LOOPBACK) {
1412 struct in_ifaddr *ifa = inet_alloc_ifa();
1415 INIT_HLIST_NODE(&ifa->hash);
1417 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1418 ifa->ifa_prefixlen = 8;
1419 ifa->ifa_mask = inet_make_mask(8);
1420 in_dev_hold(in_dev);
1421 ifa->ifa_dev = in_dev;
1422 ifa->ifa_scope = RT_SCOPE_HOST;
1423 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1424 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1425 INFINITY_LIFE_TIME);
1426 ipv4_devconf_setall(in_dev);
1427 neigh_parms_data_state_setall(in_dev->arp_parms);
1428 inet_insert_ifa(ifa);
1433 case NETDEV_CHANGEADDR:
1434 if (!IN_DEV_ARP_NOTIFY(in_dev))
1437 case NETDEV_NOTIFY_PEERS:
1438 /* Send gratuitous ARP to notify of link change */
1439 inetdev_send_gratuitous_arp(dev, in_dev);
1444 case NETDEV_PRE_TYPE_CHANGE:
1445 ip_mc_unmap(in_dev);
1447 case NETDEV_POST_TYPE_CHANGE:
1448 ip_mc_remap(in_dev);
1450 case NETDEV_CHANGEMTU:
1451 if (inetdev_valid_mtu(dev->mtu))
1453 /* disable IP when MTU is not enough */
1454 case NETDEV_UNREGISTER:
1455 inetdev_destroy(in_dev);
1457 case NETDEV_CHANGENAME:
1458 /* Do not notify about label change, this event is
1459 * not interesting to applications using netlink.
1461 inetdev_changename(dev, in_dev);
1463 devinet_sysctl_unregister(in_dev);
1464 devinet_sysctl_register(in_dev);
1471 static struct notifier_block ip_netdev_notifier = {
1472 .notifier_call = inetdev_event,
1475 static size_t inet_nlmsg_size(void)
1477 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1478 + nla_total_size(4) /* IFA_ADDRESS */
1479 + nla_total_size(4) /* IFA_LOCAL */
1480 + nla_total_size(4) /* IFA_BROADCAST */
1481 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1482 + nla_total_size(4) /* IFA_FLAGS */
1483 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1486 static inline u32 cstamp_delta(unsigned long cstamp)
1488 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1491 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1492 unsigned long tstamp, u32 preferred, u32 valid)
1494 struct ifa_cacheinfo ci;
1496 ci.cstamp = cstamp_delta(cstamp);
1497 ci.tstamp = cstamp_delta(tstamp);
1498 ci.ifa_prefered = preferred;
1499 ci.ifa_valid = valid;
1501 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1504 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1505 u32 portid, u32 seq, int event, unsigned int flags)
1507 struct ifaddrmsg *ifm;
1508 struct nlmsghdr *nlh;
1509 u32 preferred, valid;
1511 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1515 ifm = nlmsg_data(nlh);
1516 ifm->ifa_family = AF_INET;
1517 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1518 ifm->ifa_flags = ifa->ifa_flags;
1519 ifm->ifa_scope = ifa->ifa_scope;
1520 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1522 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1523 preferred = ifa->ifa_preferred_lft;
1524 valid = ifa->ifa_valid_lft;
1525 if (preferred != INFINITY_LIFE_TIME) {
1526 long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1528 if (preferred > tval)
1532 if (valid != INFINITY_LIFE_TIME) {
1540 preferred = INFINITY_LIFE_TIME;
1541 valid = INFINITY_LIFE_TIME;
1543 if ((ifa->ifa_address &&
1544 nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1546 nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1547 (ifa->ifa_broadcast &&
1548 nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1549 (ifa->ifa_label[0] &&
1550 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1551 nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1552 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1554 goto nla_put_failure;
1556 nlmsg_end(skb, nlh);
1560 nlmsg_cancel(skb, nlh);
1564 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1566 struct net *net = sock_net(skb->sk);
1569 int ip_idx, s_ip_idx;
1570 struct net_device *dev;
1571 struct in_device *in_dev;
1572 struct in_ifaddr *ifa;
1573 struct hlist_head *head;
1576 s_idx = idx = cb->args[1];
1577 s_ip_idx = ip_idx = cb->args[2];
1579 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1581 head = &net->dev_index_head[h];
1583 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1585 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1588 if (h > s_h || idx > s_idx)
1590 in_dev = __in_dev_get_rcu(dev);
1594 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1595 ifa = ifa->ifa_next, ip_idx++) {
1596 if (ip_idx < s_ip_idx)
1598 if (inet_fill_ifaddr(skb, ifa,
1599 NETLINK_CB(cb->skb).portid,
1601 RTM_NEWADDR, NLM_F_MULTI) < 0) {
1605 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1616 cb->args[2] = ip_idx;
1621 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1624 struct sk_buff *skb;
1625 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1629 net = dev_net(ifa->ifa_dev->dev);
1630 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1634 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1636 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1637 WARN_ON(err == -EMSGSIZE);
1641 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1645 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1648 static size_t inet_get_link_af_size(const struct net_device *dev)
1650 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1655 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1658 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1660 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1667 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1671 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1672 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1677 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1678 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1681 static int inet_validate_link_af(const struct net_device *dev,
1682 const struct nlattr *nla)
1684 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1687 if (dev && !__in_dev_get_rtnl(dev))
1688 return -EAFNOSUPPORT;
1690 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1694 if (tb[IFLA_INET_CONF]) {
1695 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1696 int cfgid = nla_type(a);
1701 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1709 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1711 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1712 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1716 return -EAFNOSUPPORT;
1718 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1721 if (tb[IFLA_INET_CONF]) {
1722 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1723 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1729 static int inet_netconf_msgsize_devconf(int type)
1731 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1732 + nla_total_size(4); /* NETCONFA_IFINDEX */
1734 /* type -1 is used for ALL */
1735 if (type == -1 || type == NETCONFA_FORWARDING)
1736 size += nla_total_size(4);
1737 if (type == -1 || type == NETCONFA_RP_FILTER)
1738 size += nla_total_size(4);
1739 if (type == -1 || type == NETCONFA_MC_FORWARDING)
1740 size += nla_total_size(4);
1741 if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1742 size += nla_total_size(4);
1747 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1748 struct ipv4_devconf *devconf, u32 portid,
1749 u32 seq, int event, unsigned int flags,
1752 struct nlmsghdr *nlh;
1753 struct netconfmsg *ncm;
1755 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1760 ncm = nlmsg_data(nlh);
1761 ncm->ncm_family = AF_INET;
1763 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1764 goto nla_put_failure;
1766 /* type -1 is used for ALL */
1767 if ((type == -1 || type == NETCONFA_FORWARDING) &&
1768 nla_put_s32(skb, NETCONFA_FORWARDING,
1769 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1770 goto nla_put_failure;
1771 if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1772 nla_put_s32(skb, NETCONFA_RP_FILTER,
1773 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1774 goto nla_put_failure;
1775 if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1776 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1777 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1778 goto nla_put_failure;
1779 if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1780 nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1781 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1782 goto nla_put_failure;
1784 nlmsg_end(skb, nlh);
1788 nlmsg_cancel(skb, nlh);
1792 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1793 struct ipv4_devconf *devconf)
1795 struct sk_buff *skb;
1798 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1802 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1803 RTM_NEWNETCONF, 0, type);
1805 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1806 WARN_ON(err == -EMSGSIZE);
1810 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1814 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1817 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1818 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1819 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1820 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1821 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
1824 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1825 struct nlmsghdr *nlh)
1827 struct net *net = sock_net(in_skb->sk);
1828 struct nlattr *tb[NETCONFA_MAX+1];
1829 struct netconfmsg *ncm;
1830 struct sk_buff *skb;
1831 struct ipv4_devconf *devconf;
1832 struct in_device *in_dev;
1833 struct net_device *dev;
1837 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1838 devconf_ipv4_policy);
1843 if (!tb[NETCONFA_IFINDEX])
1846 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1848 case NETCONFA_IFINDEX_ALL:
1849 devconf = net->ipv4.devconf_all;
1851 case NETCONFA_IFINDEX_DEFAULT:
1852 devconf = net->ipv4.devconf_dflt;
1855 dev = __dev_get_by_index(net, ifindex);
1858 in_dev = __in_dev_get_rtnl(dev);
1861 devconf = &in_dev->cnf;
1866 skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1870 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1871 NETLINK_CB(in_skb).portid,
1872 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1875 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1876 WARN_ON(err == -EMSGSIZE);
1880 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1885 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1886 struct netlink_callback *cb)
1888 struct net *net = sock_net(skb->sk);
1891 struct net_device *dev;
1892 struct in_device *in_dev;
1893 struct hlist_head *head;
1896 s_idx = idx = cb->args[1];
1898 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1900 head = &net->dev_index_head[h];
1902 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1904 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1907 in_dev = __in_dev_get_rcu(dev);
1911 if (inet_netconf_fill_devconf(skb, dev->ifindex,
1913 NETLINK_CB(cb->skb).portid,
1921 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1927 if (h == NETDEV_HASHENTRIES) {
1928 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1929 net->ipv4.devconf_all,
1930 NETLINK_CB(cb->skb).portid,
1932 RTM_NEWNETCONF, NLM_F_MULTI,
1938 if (h == NETDEV_HASHENTRIES + 1) {
1939 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1940 net->ipv4.devconf_dflt,
1941 NETLINK_CB(cb->skb).portid,
1943 RTM_NEWNETCONF, NLM_F_MULTI,
1956 #ifdef CONFIG_SYSCTL
1958 static void devinet_copy_dflt_conf(struct net *net, int i)
1960 struct net_device *dev;
1963 for_each_netdev_rcu(net, dev) {
1964 struct in_device *in_dev;
1966 in_dev = __in_dev_get_rcu(dev);
1967 if (in_dev && !test_bit(i, in_dev->cnf.state))
1968 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1973 /* called with RTNL locked */
1974 static void inet_forward_change(struct net *net)
1976 struct net_device *dev;
1977 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1979 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1980 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1981 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1982 NETCONFA_IFINDEX_ALL,
1983 net->ipv4.devconf_all);
1984 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1985 NETCONFA_IFINDEX_DEFAULT,
1986 net->ipv4.devconf_dflt);
1988 for_each_netdev(net, dev) {
1989 struct in_device *in_dev;
1991 dev_disable_lro(dev);
1993 in_dev = __in_dev_get_rcu(dev);
1995 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1996 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1997 dev->ifindex, &in_dev->cnf);
2003 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2005 if (cnf == net->ipv4.devconf_dflt)
2006 return NETCONFA_IFINDEX_DEFAULT;
2007 else if (cnf == net->ipv4.devconf_all)
2008 return NETCONFA_IFINDEX_ALL;
2010 struct in_device *idev
2011 = container_of(cnf, struct in_device, cnf);
2012 return idev->dev->ifindex;
2016 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2017 void __user *buffer,
2018 size_t *lenp, loff_t *ppos)
2020 int old_value = *(int *)ctl->data;
2021 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2022 int new_value = *(int *)ctl->data;
2025 struct ipv4_devconf *cnf = ctl->extra1;
2026 struct net *net = ctl->extra2;
2027 int i = (int *)ctl->data - cnf->data;
2030 set_bit(i, cnf->state);
2032 if (cnf == net->ipv4.devconf_dflt)
2033 devinet_copy_dflt_conf(net, i);
2034 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2035 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2036 if ((new_value == 0) && (old_value != 0))
2037 rt_cache_flush(net);
2039 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2040 new_value != old_value) {
2041 ifindex = devinet_conf_ifindex(net, cnf);
2042 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2045 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2046 new_value != old_value) {
2047 ifindex = devinet_conf_ifindex(net, cnf);
2048 inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2056 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2057 void __user *buffer,
2058 size_t *lenp, loff_t *ppos)
2060 int *valp = ctl->data;
2063 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2065 if (write && *valp != val) {
2066 struct net *net = ctl->extra2;
2068 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2069 if (!rtnl_trylock()) {
2070 /* Restore the original values before restarting */
2073 return restart_syscall();
2075 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2076 inet_forward_change(net);
2078 struct ipv4_devconf *cnf = ctl->extra1;
2079 struct in_device *idev =
2080 container_of(cnf, struct in_device, cnf);
2082 dev_disable_lro(idev->dev);
2083 inet_netconf_notify_devconf(net,
2084 NETCONFA_FORWARDING,
2089 rt_cache_flush(net);
2091 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2092 NETCONFA_IFINDEX_DEFAULT,
2093 net->ipv4.devconf_dflt);
2099 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2100 void __user *buffer,
2101 size_t *lenp, loff_t *ppos)
2103 int *valp = ctl->data;
2105 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2106 struct net *net = ctl->extra2;
2108 if (write && *valp != val)
2109 rt_cache_flush(net);
2114 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2117 .data = ipv4_devconf.data + \
2118 IPV4_DEVCONF_ ## attr - 1, \
2119 .maxlen = sizeof(int), \
2121 .proc_handler = proc, \
2122 .extra1 = &ipv4_devconf, \
2125 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2126 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2128 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2129 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2131 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2132 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2134 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2135 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2137 static struct devinet_sysctl_table {
2138 struct ctl_table_header *sysctl_header;
2139 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2140 } devinet_sysctl = {
2142 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2143 devinet_sysctl_forward),
2144 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2146 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2147 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2148 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2149 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2150 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2151 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2152 "accept_source_route"),
2153 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2154 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2155 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2156 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2157 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2158 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2159 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2160 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2161 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2162 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2163 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2164 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2165 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2166 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2167 "force_igmp_version"),
2168 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2169 "igmpv2_unsolicited_report_interval"),
2170 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2171 "igmpv3_unsolicited_report_interval"),
2173 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2174 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2175 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2176 "promote_secondaries"),
2177 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2182 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2183 struct ipv4_devconf *p)
2186 struct devinet_sysctl_table *t;
2187 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2189 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2193 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2194 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2195 t->devinet_vars[i].extra1 = p;
2196 t->devinet_vars[i].extra2 = net;
2199 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2201 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2202 if (!t->sysctl_header)
2214 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2216 struct devinet_sysctl_table *t = cnf->sysctl;
2222 unregister_net_sysctl_table(t->sysctl_header);
2226 static int devinet_sysctl_register(struct in_device *idev)
2230 if (!sysctl_dev_name_is_allowed(idev->dev->name))
2233 err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2236 err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2239 neigh_sysctl_unregister(idev->arp_parms);
2243 static void devinet_sysctl_unregister(struct in_device *idev)
2245 __devinet_sysctl_unregister(&idev->cnf);
2246 neigh_sysctl_unregister(idev->arp_parms);
2249 static struct ctl_table ctl_forward_entry[] = {
2251 .procname = "ip_forward",
2252 .data = &ipv4_devconf.data[
2253 IPV4_DEVCONF_FORWARDING - 1],
2254 .maxlen = sizeof(int),
2256 .proc_handler = devinet_sysctl_forward,
2257 .extra1 = &ipv4_devconf,
2258 .extra2 = &init_net,
2264 static __net_init int devinet_init_net(struct net *net)
2267 struct ipv4_devconf *all, *dflt;
2268 #ifdef CONFIG_SYSCTL
2269 struct ctl_table *tbl = ctl_forward_entry;
2270 struct ctl_table_header *forw_hdr;
2274 all = &ipv4_devconf;
2275 dflt = &ipv4_devconf_dflt;
2277 if (!net_eq(net, &init_net)) {
2278 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2282 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2284 goto err_alloc_dflt;
2286 #ifdef CONFIG_SYSCTL
2287 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2291 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2292 tbl[0].extra1 = all;
2293 tbl[0].extra2 = net;
2297 #ifdef CONFIG_SYSCTL
2298 err = __devinet_sysctl_register(net, "all", all);
2302 err = __devinet_sysctl_register(net, "default", dflt);
2307 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2310 net->ipv4.forw_hdr = forw_hdr;
2313 net->ipv4.devconf_all = all;
2314 net->ipv4.devconf_dflt = dflt;
2317 #ifdef CONFIG_SYSCTL
2319 __devinet_sysctl_unregister(dflt);
2321 __devinet_sysctl_unregister(all);
2323 if (tbl != ctl_forward_entry)
2327 if (dflt != &ipv4_devconf_dflt)
2330 if (all != &ipv4_devconf)
2336 static __net_exit void devinet_exit_net(struct net *net)
2338 #ifdef CONFIG_SYSCTL
2339 struct ctl_table *tbl;
2341 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2342 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2343 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2344 __devinet_sysctl_unregister(net->ipv4.devconf_all);
2347 kfree(net->ipv4.devconf_dflt);
2348 kfree(net->ipv4.devconf_all);
2351 static __net_initdata struct pernet_operations devinet_ops = {
2352 .init = devinet_init_net,
2353 .exit = devinet_exit_net,
2356 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2358 .fill_link_af = inet_fill_link_af,
2359 .get_link_af_size = inet_get_link_af_size,
2360 .validate_link_af = inet_validate_link_af,
2361 .set_link_af = inet_set_link_af,
2364 void __init devinet_init(void)
2368 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2369 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2371 register_pernet_subsys(&devinet_ops);
2373 register_gifconf(PF_INET, inet_gifconf);
2374 register_netdevice_notifier(&ip_netdev_notifier);
2376 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2378 rtnl_af_register(&inet_af_ops);
2380 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2381 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2382 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2383 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2384 inet_netconf_dump_devconf, NULL);