2 * NET3 IP device support routines.
4 * Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
11 * Derived from the IP parts of dev.c 1.0.19
21 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
23 * Cyrus Durgin: updated for kmod
24 * Matthias Andree: in devinet_ioctl, compare label and
25 * address (4.4BSD alias style support),
26 * fall back to comparing just the label
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
56 #include <linux/sysctl.h>
58 #include <linux/kmod.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
67 static struct ipv4_devconf ipv4_devconf = {
69 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
70 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
71 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
72 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
76 static struct ipv4_devconf ipv4_devconf_dflt = {
78 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
79 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
80 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
81 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
82 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86 #define IPV4_DEVCONF_DFLT(net, attr) \
87 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
89 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90 [IFA_LOCAL] = { .type = NLA_U32 },
91 [IFA_ADDRESS] = { .type = NLA_U32 },
92 [IFA_BROADCAST] = { .type = NLA_U32 },
93 [IFA_ANYCAST] = { .type = NLA_U32 },
94 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
99 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
100 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
103 static void devinet_sysctl_register(struct in_device *idev);
104 static void devinet_sysctl_unregister(struct in_device *idev);
106 static inline void devinet_sysctl_register(struct in_device *idev)
109 static inline void devinet_sysctl_unregister(struct in_device *idev)
114 /* Locks all the inet devices. */
116 static struct in_ifaddr *inet_alloc_ifa(void)
118 struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
121 INIT_RCU_HEAD(&ifa->rcu_head);
127 static void inet_rcu_free_ifa(struct rcu_head *head)
129 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
131 in_dev_put(ifa->ifa_dev);
135 static inline void inet_free_ifa(struct in_ifaddr *ifa)
137 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
140 void in_dev_finish_destroy(struct in_device *idev)
142 struct net_device *dev = idev->dev;
144 BUG_TRAP(!idev->ifa_list);
145 BUG_TRAP(!idev->mc_list);
146 #ifdef NET_REFCNT_DEBUG
147 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
148 idev, dev ? dev->name : "NIL");
152 printk("Freeing alive in_device %p\n", idev);
158 static struct in_device *inetdev_init(struct net_device *dev)
160 struct in_device *in_dev;
164 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
167 INIT_RCU_HEAD(&in_dev->rcu_head);
168 memcpy(&in_dev->cnf, dev->nd_net->ipv4.devconf_dflt,
169 sizeof(in_dev->cnf));
170 in_dev->cnf.sysctl = NULL;
172 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
174 /* Reference in_dev->dev */
176 /* Account for reference dev->ip_ptr (below) */
179 devinet_sysctl_register(in_dev);
180 ip_mc_init_dev(in_dev);
181 if (dev->flags & IFF_UP)
184 /* we can receive as soon as ip_ptr is set -- do this last */
185 rcu_assign_pointer(dev->ip_ptr, in_dev);
194 static void in_dev_rcu_put(struct rcu_head *head)
196 struct in_device *idev = container_of(head, struct in_device, rcu_head);
200 static void inetdev_destroy(struct in_device *in_dev)
202 struct in_ifaddr *ifa;
203 struct net_device *dev;
211 ip_mc_destroy_dev(in_dev);
213 while ((ifa = in_dev->ifa_list) != NULL) {
214 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
220 devinet_sysctl_unregister(in_dev);
221 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
224 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
227 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
230 for_primary_ifa(in_dev) {
231 if (inet_ifa_match(a, ifa)) {
232 if (!b || inet_ifa_match(b, ifa)) {
237 } endfor_ifa(in_dev);
242 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
243 int destroy, struct nlmsghdr *nlh, u32 pid)
245 struct in_ifaddr *promote = NULL;
246 struct in_ifaddr *ifa, *ifa1 = *ifap;
247 struct in_ifaddr *last_prim = in_dev->ifa_list;
248 struct in_ifaddr *prev_prom = NULL;
249 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
253 /* 1. Deleting primary ifaddr forces deletion all secondaries
254 * unless alias promotion is set
257 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
258 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
260 while ((ifa = *ifap1) != NULL) {
261 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
262 ifa1->ifa_scope <= ifa->ifa_scope)
265 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
266 ifa1->ifa_mask != ifa->ifa_mask ||
267 !inet_ifa_match(ifa1->ifa_address, ifa)) {
268 ifap1 = &ifa->ifa_next;
274 *ifap1 = ifa->ifa_next;
276 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
277 blocking_notifier_call_chain(&inetaddr_chain,
289 *ifap = ifa1->ifa_next;
291 /* 3. Announce address deletion */
293 /* Send message first, then call notifier.
294 At first sight, FIB update triggered by notifier
295 will refer to already deleted ifaddr, that could confuse
296 netlink listeners. It is not true: look, gated sees
297 that route deleted and if it still thinks that ifaddr
298 is valid, it will try to restore deleted routes... Grr.
299 So that, this order is correct.
301 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
302 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
307 prev_prom->ifa_next = promote->ifa_next;
308 promote->ifa_next = last_prim->ifa_next;
309 last_prim->ifa_next = promote;
312 promote->ifa_flags &= ~IFA_F_SECONDARY;
313 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
314 blocking_notifier_call_chain(&inetaddr_chain,
316 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
317 if (ifa1->ifa_mask != ifa->ifa_mask ||
318 !inet_ifa_match(ifa1->ifa_address, ifa))
328 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
331 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
334 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
337 struct in_device *in_dev = ifa->ifa_dev;
338 struct in_ifaddr *ifa1, **ifap, **last_primary;
342 if (!ifa->ifa_local) {
347 ifa->ifa_flags &= ~IFA_F_SECONDARY;
348 last_primary = &in_dev->ifa_list;
350 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
351 ifap = &ifa1->ifa_next) {
352 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
353 ifa->ifa_scope <= ifa1->ifa_scope)
354 last_primary = &ifa1->ifa_next;
355 if (ifa1->ifa_mask == ifa->ifa_mask &&
356 inet_ifa_match(ifa1->ifa_address, ifa)) {
357 if (ifa1->ifa_local == ifa->ifa_local) {
361 if (ifa1->ifa_scope != ifa->ifa_scope) {
365 ifa->ifa_flags |= IFA_F_SECONDARY;
369 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
370 net_srandom(ifa->ifa_local);
374 ifa->ifa_next = *ifap;
377 /* Send message first, then call notifier.
378 Notifier will trigger FIB update, so that
379 listeners of netlink will know about new ifaddr */
380 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
381 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
386 static int inet_insert_ifa(struct in_ifaddr *ifa)
388 return __inet_insert_ifa(ifa, NULL, 0);
391 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
393 struct in_device *in_dev = __in_dev_get_rtnl(dev);
401 ipv4_devconf_setall(in_dev);
402 if (ifa->ifa_dev != in_dev) {
403 BUG_TRAP(!ifa->ifa_dev);
405 ifa->ifa_dev = in_dev;
407 if (ipv4_is_loopback(ifa->ifa_local))
408 ifa->ifa_scope = RT_SCOPE_HOST;
409 return inet_insert_ifa(ifa);
412 struct in_device *inetdev_by_index(struct net *net, int ifindex)
414 struct net_device *dev;
415 struct in_device *in_dev = NULL;
416 read_lock(&dev_base_lock);
417 dev = __dev_get_by_index(net, ifindex);
419 in_dev = in_dev_get(dev);
420 read_unlock(&dev_base_lock);
424 /* Called only from RTNL semaphored context. No locks. */
426 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
431 for_primary_ifa(in_dev) {
432 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
434 } endfor_ifa(in_dev);
438 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
440 struct net *net = skb->sk->sk_net;
441 struct nlattr *tb[IFA_MAX+1];
442 struct in_device *in_dev;
443 struct ifaddrmsg *ifm;
444 struct in_ifaddr *ifa, **ifap;
449 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
453 ifm = nlmsg_data(nlh);
454 in_dev = inetdev_by_index(net, ifm->ifa_index);
455 if (in_dev == NULL) {
460 __in_dev_put(in_dev);
462 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
463 ifap = &ifa->ifa_next) {
465 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
468 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
471 if (tb[IFA_ADDRESS] &&
472 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
473 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
476 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
480 err = -EADDRNOTAVAIL;
485 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
487 struct nlattr *tb[IFA_MAX+1];
488 struct in_ifaddr *ifa;
489 struct ifaddrmsg *ifm;
490 struct net_device *dev;
491 struct in_device *in_dev;
494 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
498 ifm = nlmsg_data(nlh);
500 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
503 dev = __dev_get_by_index(net, ifm->ifa_index);
508 in_dev = __in_dev_get_rtnl(dev);
513 ifa = inet_alloc_ifa();
516 * A potential indev allocation can be left alive, it stays
517 * assigned to its device and is destroy with it.
521 ipv4_devconf_setall(in_dev);
524 if (tb[IFA_ADDRESS] == NULL)
525 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
527 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
528 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
529 ifa->ifa_flags = ifm->ifa_flags;
530 ifa->ifa_scope = ifm->ifa_scope;
531 ifa->ifa_dev = in_dev;
533 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
534 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
536 if (tb[IFA_BROADCAST])
537 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
540 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
543 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
545 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
553 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
555 struct net *net = skb->sk->sk_net;
556 struct in_ifaddr *ifa;
560 ifa = rtm_to_ifaddr(net, nlh);
564 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
568 * Determine a default network mask, based on the IP address.
571 static __inline__ int inet_abc_len(__be32 addr)
573 int rc = -1; /* Something else, probably a multicast. */
575 if (ipv4_is_zeronet(addr))
578 __u32 haddr = ntohl(addr);
580 if (IN_CLASSA(haddr))
582 else if (IN_CLASSB(haddr))
584 else if (IN_CLASSC(haddr))
592 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
595 struct sockaddr_in sin_orig;
596 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
597 struct in_device *in_dev;
598 struct in_ifaddr **ifap = NULL;
599 struct in_ifaddr *ifa = NULL;
600 struct net_device *dev;
603 int tryaddrmatch = 0;
606 * Fetch the caller's info block into kernel space
609 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
611 ifr.ifr_name[IFNAMSIZ - 1] = 0;
613 /* save original address for comparison */
614 memcpy(&sin_orig, sin, sizeof(*sin));
616 colon = strchr(ifr.ifr_name, ':');
621 dev_load(net, ifr.ifr_name);
625 case SIOCGIFADDR: /* Get interface address */
626 case SIOCGIFBRDADDR: /* Get the broadcast address */
627 case SIOCGIFDSTADDR: /* Get the destination address */
628 case SIOCGIFNETMASK: /* Get the netmask for the interface */
629 /* Note that these ioctls will not sleep,
630 so that we do not impose a lock.
631 One day we will be forced to put shlock here (I mean SMP)
633 tryaddrmatch = (sin_orig.sin_family == AF_INET);
634 memset(sin, 0, sizeof(*sin));
635 sin->sin_family = AF_INET;
640 if (!capable(CAP_NET_ADMIN))
643 case SIOCSIFADDR: /* Set interface address (and family) */
644 case SIOCSIFBRDADDR: /* Set the broadcast address */
645 case SIOCSIFDSTADDR: /* Set the destination address */
646 case SIOCSIFNETMASK: /* Set the netmask for the interface */
648 if (!capable(CAP_NET_ADMIN))
651 if (sin->sin_family != AF_INET)
662 if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
668 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
670 /* Matthias Andree */
671 /* compare label and address (4.4BSD style) */
672 /* note: we only do this for a limited set of ioctls
673 and only if the original address family was AF_INET.
674 This is checked above. */
675 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
676 ifap = &ifa->ifa_next) {
677 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
678 sin_orig.sin_addr.s_addr ==
684 /* we didn't get a match, maybe the application is
685 4.3BSD-style and passed in junk so we fall back to
686 comparing just the label */
688 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
689 ifap = &ifa->ifa_next)
690 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
695 ret = -EADDRNOTAVAIL;
696 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
700 case SIOCGIFADDR: /* Get interface address */
701 sin->sin_addr.s_addr = ifa->ifa_local;
704 case SIOCGIFBRDADDR: /* Get the broadcast address */
705 sin->sin_addr.s_addr = ifa->ifa_broadcast;
708 case SIOCGIFDSTADDR: /* Get the destination address */
709 sin->sin_addr.s_addr = ifa->ifa_address;
712 case SIOCGIFNETMASK: /* Get the netmask for the interface */
713 sin->sin_addr.s_addr = ifa->ifa_mask;
718 ret = -EADDRNOTAVAIL;
722 if (!(ifr.ifr_flags & IFF_UP))
723 inet_del_ifa(in_dev, ifap, 1);
726 ret = dev_change_flags(dev, ifr.ifr_flags);
729 case SIOCSIFADDR: /* Set interface address (and family) */
731 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
736 if ((ifa = inet_alloc_ifa()) == NULL)
739 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
741 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
744 if (ifa->ifa_local == sin->sin_addr.s_addr)
746 inet_del_ifa(in_dev, ifap, 0);
747 ifa->ifa_broadcast = 0;
748 ifa->ifa_anycast = 0;
752 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
754 if (!(dev->flags & IFF_POINTOPOINT)) {
755 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
756 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
757 if ((dev->flags & IFF_BROADCAST) &&
758 ifa->ifa_prefixlen < 31)
759 ifa->ifa_broadcast = ifa->ifa_address |
762 ifa->ifa_prefixlen = 32;
763 ifa->ifa_mask = inet_make_mask(32);
765 ret = inet_set_ifa(dev, ifa);
768 case SIOCSIFBRDADDR: /* Set the broadcast address */
770 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
771 inet_del_ifa(in_dev, ifap, 0);
772 ifa->ifa_broadcast = sin->sin_addr.s_addr;
773 inet_insert_ifa(ifa);
777 case SIOCSIFDSTADDR: /* Set the destination address */
779 if (ifa->ifa_address == sin->sin_addr.s_addr)
782 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
785 inet_del_ifa(in_dev, ifap, 0);
786 ifa->ifa_address = sin->sin_addr.s_addr;
787 inet_insert_ifa(ifa);
790 case SIOCSIFNETMASK: /* Set the netmask for the interface */
793 * The mask we set must be legal.
796 if (bad_mask(sin->sin_addr.s_addr, 0))
799 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
800 __be32 old_mask = ifa->ifa_mask;
801 inet_del_ifa(in_dev, ifap, 0);
802 ifa->ifa_mask = sin->sin_addr.s_addr;
803 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
805 /* See if current broadcast address matches
806 * with current netmask, then recalculate
807 * the broadcast address. Otherwise it's a
808 * funny address, so don't touch it since
809 * the user seems to know what (s)he's doing...
811 if ((dev->flags & IFF_BROADCAST) &&
812 (ifa->ifa_prefixlen < 31) &&
813 (ifa->ifa_broadcast ==
814 (ifa->ifa_local|~old_mask))) {
815 ifa->ifa_broadcast = (ifa->ifa_local |
816 ~sin->sin_addr.s_addr);
818 inet_insert_ifa(ifa);
828 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
832 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
834 struct in_device *in_dev = __in_dev_get_rtnl(dev);
835 struct in_ifaddr *ifa;
839 if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
842 for (; ifa; ifa = ifa->ifa_next) {
847 if (len < (int) sizeof(ifr))
849 memset(&ifr, 0, sizeof(struct ifreq));
851 strcpy(ifr.ifr_name, ifa->ifa_label);
853 strcpy(ifr.ifr_name, dev->name);
855 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
856 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
859 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
863 buf += sizeof(struct ifreq);
864 len -= sizeof(struct ifreq);
865 done += sizeof(struct ifreq);
871 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
874 struct in_device *in_dev;
877 in_dev = __in_dev_get_rcu(dev);
881 for_primary_ifa(in_dev) {
882 if (ifa->ifa_scope > scope)
884 if (!dst || inet_ifa_match(dst, ifa)) {
885 addr = ifa->ifa_local;
889 addr = ifa->ifa_local;
890 } endfor_ifa(in_dev);
897 /* Not loopback addresses on loopback should be preferred
898 in this case. It is importnat that lo is the first interface
901 read_lock(&dev_base_lock);
903 for_each_netdev(&init_net, dev) {
904 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
907 for_primary_ifa(in_dev) {
908 if (ifa->ifa_scope != RT_SCOPE_LINK &&
909 ifa->ifa_scope <= scope) {
910 addr = ifa->ifa_local;
911 goto out_unlock_both;
913 } endfor_ifa(in_dev);
916 read_unlock(&dev_base_lock);
922 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
923 __be32 local, int scope)
930 (local == ifa->ifa_local || !local) &&
931 ifa->ifa_scope <= scope) {
932 addr = ifa->ifa_local;
937 same = (!local || inet_ifa_match(local, ifa)) &&
938 (!dst || inet_ifa_match(dst, ifa));
942 /* Is the selected addr into dst subnet? */
943 if (inet_ifa_match(addr, ifa))
945 /* No, then can we use new local src? */
946 if (ifa->ifa_scope <= scope) {
947 addr = ifa->ifa_local;
950 /* search for large dst subnet for addr */
954 } endfor_ifa(in_dev);
956 return same? addr : 0;
960 * Confirm that local IP address exists using wildcards:
961 * - in_dev: only on this interface, 0=any interface
962 * - dst: only in the same subnet as dst, 0=any dst
963 * - local: address, 0=autoselect the local address
964 * - scope: maximum allowed scope value for the local address
966 __be32 inet_confirm_addr(struct in_device *in_dev,
967 __be32 dst, __be32 local, int scope)
970 struct net_device *dev;
973 if (scope != RT_SCOPE_LINK)
974 return confirm_addr_indev(in_dev, dst, local, scope);
976 net = in_dev->dev->nd_net;
977 read_lock(&dev_base_lock);
979 for_each_netdev(net, dev) {
980 if ((in_dev = __in_dev_get_rcu(dev))) {
981 addr = confirm_addr_indev(in_dev, dst, local, scope);
987 read_unlock(&dev_base_lock);
996 int register_inetaddr_notifier(struct notifier_block *nb)
998 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1001 int unregister_inetaddr_notifier(struct notifier_block *nb)
1003 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1006 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1007 * alias numbering and to create unique labels if possible.
1009 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1011 struct in_ifaddr *ifa;
1014 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1015 char old[IFNAMSIZ], *dot;
1017 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1018 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1021 dot = strchr(old, ':');
1023 sprintf(old, ":%d", named);
1026 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1027 strcat(ifa->ifa_label, dot);
1029 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1034 /* Called only under RTNL semaphore */
1036 static int inetdev_event(struct notifier_block *this, unsigned long event,
1039 struct net_device *dev = ptr;
1040 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1045 if (event == NETDEV_REGISTER) {
1046 in_dev = inetdev_init(dev);
1048 return notifier_from_errno(-ENOMEM);
1049 if (dev->flags & IFF_LOOPBACK) {
1050 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1051 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1058 case NETDEV_REGISTER:
1059 printk(KERN_DEBUG "inetdev_event: bug\n");
1065 if (dev->flags & IFF_LOOPBACK) {
1066 struct in_ifaddr *ifa;
1067 if ((ifa = inet_alloc_ifa()) != NULL) {
1069 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1070 ifa->ifa_prefixlen = 8;
1071 ifa->ifa_mask = inet_make_mask(8);
1072 in_dev_hold(in_dev);
1073 ifa->ifa_dev = in_dev;
1074 ifa->ifa_scope = RT_SCOPE_HOST;
1075 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1076 inet_insert_ifa(ifa);
1084 case NETDEV_CHANGEMTU:
1087 /* MTU falled under 68, disable IP */
1088 case NETDEV_UNREGISTER:
1089 inetdev_destroy(in_dev);
1091 case NETDEV_CHANGENAME:
1092 /* Do not notify about label change, this event is
1093 * not interesting to applications using netlink.
1095 inetdev_changename(dev, in_dev);
1097 devinet_sysctl_unregister(in_dev);
1098 devinet_sysctl_register(in_dev);
1105 static struct notifier_block ip_netdev_notifier = {
1106 .notifier_call =inetdev_event,
1109 static inline size_t inet_nlmsg_size(void)
1111 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1112 + nla_total_size(4) /* IFA_ADDRESS */
1113 + nla_total_size(4) /* IFA_LOCAL */
1114 + nla_total_size(4) /* IFA_BROADCAST */
1115 + nla_total_size(4) /* IFA_ANYCAST */
1116 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1119 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1120 u32 pid, u32 seq, int event, unsigned int flags)
1122 struct ifaddrmsg *ifm;
1123 struct nlmsghdr *nlh;
1125 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1129 ifm = nlmsg_data(nlh);
1130 ifm->ifa_family = AF_INET;
1131 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1132 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1133 ifm->ifa_scope = ifa->ifa_scope;
1134 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1136 if (ifa->ifa_address)
1137 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1140 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1142 if (ifa->ifa_broadcast)
1143 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1145 if (ifa->ifa_anycast)
1146 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1148 if (ifa->ifa_label[0])
1149 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1151 return nlmsg_end(skb, nlh);
1154 nlmsg_cancel(skb, nlh);
1158 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1160 struct net *net = skb->sk->sk_net;
1162 struct net_device *dev;
1163 struct in_device *in_dev;
1164 struct in_ifaddr *ifa;
1165 int s_ip_idx, s_idx = cb->args[0];
1167 s_ip_idx = ip_idx = cb->args[1];
1169 for_each_netdev(net, dev) {
1174 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1177 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1178 ifa = ifa->ifa_next, ip_idx++) {
1179 if (ip_idx < s_ip_idx)
1181 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1183 RTM_NEWADDR, NLM_F_MULTI) <= 0)
1192 cb->args[1] = ip_idx;
1197 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1200 struct sk_buff *skb;
1201 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1205 net = ifa->ifa_dev->dev->nd_net;
1206 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1210 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1212 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1213 WARN_ON(err == -EMSGSIZE);
1217 err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1220 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1223 #ifdef CONFIG_SYSCTL
1225 static void devinet_copy_dflt_conf(struct net *net, int i)
1227 struct net_device *dev;
1229 read_lock(&dev_base_lock);
1230 for_each_netdev(net, dev) {
1231 struct in_device *in_dev;
1233 in_dev = __in_dev_get_rcu(dev);
1234 if (in_dev && !test_bit(i, in_dev->cnf.state))
1235 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1238 read_unlock(&dev_base_lock);
1241 static void inet_forward_change(struct net *net)
1243 struct net_device *dev;
1244 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1246 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1247 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1249 read_lock(&dev_base_lock);
1250 for_each_netdev(net, dev) {
1251 struct in_device *in_dev;
1253 in_dev = __in_dev_get_rcu(dev);
1255 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1258 read_unlock(&dev_base_lock);
1263 static int devinet_conf_proc(ctl_table *ctl, int write,
1264 struct file* filp, void __user *buffer,
1265 size_t *lenp, loff_t *ppos)
1267 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1270 struct ipv4_devconf *cnf = ctl->extra1;
1271 struct net *net = ctl->extra2;
1272 int i = (int *)ctl->data - cnf->data;
1274 set_bit(i, cnf->state);
1276 if (cnf == net->ipv4.devconf_dflt)
1277 devinet_copy_dflt_conf(net, i);
1283 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1284 void __user *oldval, size_t __user *oldlenp,
1285 void __user *newval, size_t newlen)
1287 struct ipv4_devconf *cnf;
1289 int *valp = table->data;
1293 if (!newval || !newlen)
1296 if (newlen != sizeof(int))
1299 if (get_user(new, (int __user *)newval))
1305 if (oldval && oldlenp) {
1308 if (get_user(len, oldlenp))
1312 if (len > table->maxlen)
1313 len = table->maxlen;
1314 if (copy_to_user(oldval, valp, len))
1316 if (put_user(len, oldlenp))
1323 cnf = table->extra1;
1324 net = table->extra2;
1325 i = (int *)table->data - cnf->data;
1327 set_bit(i, cnf->state);
1329 if (cnf == net->ipv4.devconf_dflt)
1330 devinet_copy_dflt_conf(net, i);
1335 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1336 struct file* filp, void __user *buffer,
1337 size_t *lenp, loff_t *ppos)
1339 int *valp = ctl->data;
1341 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1343 if (write && *valp != val) {
1344 struct net *net = ctl->extra2;
1346 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
1347 inet_forward_change(net);
1348 else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
1355 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1356 struct file* filp, void __user *buffer,
1357 size_t *lenp, loff_t *ppos)
1359 int *valp = ctl->data;
1361 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1363 if (write && *valp != val)
1369 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1370 void __user *oldval, size_t __user *oldlenp,
1371 void __user *newval, size_t newlen)
1373 int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1383 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1385 .ctl_name = NET_IPV4_CONF_ ## attr, \
1387 .data = ipv4_devconf.data + \
1388 NET_IPV4_CONF_ ## attr - 1, \
1389 .maxlen = sizeof(int), \
1391 .proc_handler = proc, \
1392 .strategy = sysctl, \
1393 .extra1 = &ipv4_devconf, \
1396 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1397 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1398 devinet_conf_sysctl)
1400 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1401 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1402 devinet_conf_sysctl)
1404 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1405 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1407 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1408 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1409 ipv4_doint_and_flush_strategy)
1411 static struct devinet_sysctl_table {
1412 struct ctl_table_header *sysctl_header;
1413 struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1415 } devinet_sysctl = {
1417 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1418 devinet_sysctl_forward,
1419 devinet_conf_sysctl),
1420 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1422 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1423 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1424 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1425 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1426 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1427 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1428 "accept_source_route"),
1429 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1430 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1431 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1432 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1433 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1434 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1435 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1436 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1437 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1439 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1440 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1441 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1442 "force_igmp_version"),
1443 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1444 "promote_secondaries"),
1448 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1449 int ctl_name, struct ipv4_devconf *p)
1452 struct devinet_sysctl_table *t;
1454 #define DEVINET_CTL_PATH_DEV 3
1456 struct ctl_path devinet_ctl_path[] = {
1457 { .procname = "net", .ctl_name = CTL_NET, },
1458 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1459 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1460 { /* to be set */ },
1464 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1468 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1469 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1470 t->devinet_vars[i].extra1 = p;
1471 t->devinet_vars[i].extra2 = net;
1475 * Make a copy of dev_name, because '.procname' is regarded as const
1476 * by sysctl and we wouldn't want anyone to change it under our feet
1477 * (see SIOCSIFNAME).
1479 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1483 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1484 devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1486 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1488 if (!t->sysctl_header)
1502 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1504 struct devinet_sysctl_table *t = cnf->sysctl;
1510 unregister_sysctl_table(t->sysctl_header);
1515 static void devinet_sysctl_register(struct in_device *idev)
1517 neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1518 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1519 __devinet_sysctl_register(idev->dev->nd_net, idev->dev->name,
1520 idev->dev->ifindex, &idev->cnf);
1523 static void devinet_sysctl_unregister(struct in_device *idev)
1525 __devinet_sysctl_unregister(&idev->cnf);
1526 neigh_sysctl_unregister(idev->arp_parms);
1529 static struct ctl_table ctl_forward_entry[] = {
1531 .ctl_name = NET_IPV4_FORWARD,
1532 .procname = "ip_forward",
1533 .data = &ipv4_devconf.data[
1534 NET_IPV4_CONF_FORWARDING - 1],
1535 .maxlen = sizeof(int),
1537 .proc_handler = devinet_sysctl_forward,
1538 .strategy = devinet_conf_sysctl,
1539 .extra1 = &ipv4_devconf,
1540 .extra2 = &init_net,
1545 static __net_initdata struct ctl_path net_ipv4_path[] = {
1546 { .procname = "net", .ctl_name = CTL_NET, },
1547 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1552 static __net_init int devinet_init_net(struct net *net)
1555 struct ipv4_devconf *all, *dflt;
1556 #ifdef CONFIG_SYSCTL
1557 struct ctl_table *tbl = ctl_forward_entry;
1558 struct ctl_table_header *forw_hdr;
1562 all = &ipv4_devconf;
1563 dflt = &ipv4_devconf_dflt;
1565 if (net != &init_net) {
1566 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1570 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1572 goto err_alloc_dflt;
1574 #ifdef CONFIG_SYSCTL
1575 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1579 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1580 tbl[0].extra1 = all;
1581 tbl[0].extra2 = net;
1585 #ifdef CONFIG_SYSCTL
1586 err = __devinet_sysctl_register(net, "all",
1587 NET_PROTO_CONF_ALL, all);
1591 err = __devinet_sysctl_register(net, "default",
1592 NET_PROTO_CONF_DEFAULT, dflt);
1597 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1598 if (forw_hdr == NULL)
1600 net->ipv4.forw_hdr = forw_hdr;
1603 net->ipv4.devconf_all = all;
1604 net->ipv4.devconf_dflt = dflt;
1607 #ifdef CONFIG_SYSCTL
1609 __devinet_sysctl_unregister(dflt);
1611 __devinet_sysctl_unregister(all);
1613 if (tbl != ctl_forward_entry)
1617 if (dflt != &ipv4_devconf_dflt)
1620 if (all != &ipv4_devconf)
1626 static __net_exit void devinet_exit_net(struct net *net)
1628 #ifdef CONFIG_SYSCTL
1629 struct ctl_table *tbl;
1631 tbl = net->ipv4.forw_hdr->ctl_table_arg;
1632 unregister_net_sysctl_table(net->ipv4.forw_hdr);
1633 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1634 __devinet_sysctl_unregister(net->ipv4.devconf_all);
1637 kfree(net->ipv4.devconf_dflt);
1638 kfree(net->ipv4.devconf_all);
1641 static __net_initdata struct pernet_operations devinet_ops = {
1642 .init = devinet_init_net,
1643 .exit = devinet_exit_net,
1646 void __init devinet_init(void)
1648 register_pernet_subsys(&devinet_ops);
1650 register_gifconf(PF_INET, inet_gifconf);
1651 register_netdevice_notifier(&ip_netdev_notifier);
1653 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1654 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1655 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1658 EXPORT_SYMBOL(in_dev_finish_destroy);
1659 EXPORT_SYMBOL(inet_select_addr);
1660 EXPORT_SYMBOL(inetdev_by_index);
1661 EXPORT_SYMBOL(register_inetaddr_notifier);
1662 EXPORT_SYMBOL(unregister_inetaddr_notifier);