2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
55 #include <linux/sysctl.h>
57 #include <linux/kmod.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
66 static struct ipv4_devconf ipv4_devconf = {
68 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
69 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
70 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
71 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 static struct ipv4_devconf ipv4_devconf_dflt = {
77 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
78 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
79 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
80 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
81 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
85 #define IPV4_DEVCONF_DFLT(net, attr) \
86 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
89 [IFA_LOCAL] = { .type = NLA_U32 },
90 [IFA_ADDRESS] = { .type = NLA_U32 },
91 [IFA_BROADCAST] = { .type = NLA_U32 },
92 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
97 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
101 static void devinet_sysctl_register(struct in_device *idev);
102 static void devinet_sysctl_unregister(struct in_device *idev);
104 static inline void devinet_sysctl_register(struct in_device *idev)
107 static inline void devinet_sysctl_unregister(struct in_device *idev)
112 /* Locks all the inet devices. */
114 static struct in_ifaddr *inet_alloc_ifa(void)
116 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
119 static void inet_rcu_free_ifa(struct rcu_head *head)
121 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
123 in_dev_put(ifa->ifa_dev);
127 static inline void inet_free_ifa(struct in_ifaddr *ifa)
129 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
132 void in_dev_finish_destroy(struct in_device *idev)
134 struct net_device *dev = idev->dev;
136 WARN_ON(idev->ifa_list);
137 WARN_ON(idev->mc_list);
138 #ifdef NET_REFCNT_DEBUG
139 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
140 idev, dev ? dev->name : "NIL");
144 pr_err("Freeing alive in_device %p\n", idev);
148 EXPORT_SYMBOL(in_dev_finish_destroy);
150 static struct in_device *inetdev_init(struct net_device *dev)
152 struct in_device *in_dev;
156 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
159 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
160 sizeof(in_dev->cnf));
161 in_dev->cnf.sysctl = NULL;
163 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
164 if (!in_dev->arp_parms)
166 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
167 dev_disable_lro(dev);
168 /* Reference in_dev->dev */
170 /* Account for reference dev->ip_ptr (below) */
173 devinet_sysctl_register(in_dev);
174 ip_mc_init_dev(in_dev);
175 if (dev->flags & IFF_UP)
178 /* we can receive as soon as ip_ptr is set -- do this last */
179 rcu_assign_pointer(dev->ip_ptr, in_dev);
188 static void in_dev_rcu_put(struct rcu_head *head)
190 struct in_device *idev = container_of(head, struct in_device, rcu_head);
194 static void inetdev_destroy(struct in_device *in_dev)
196 struct in_ifaddr *ifa;
197 struct net_device *dev;
205 ip_mc_destroy_dev(in_dev);
207 while ((ifa = in_dev->ifa_list) != NULL) {
208 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
214 devinet_sysctl_unregister(in_dev);
215 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
218 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
221 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
224 for_primary_ifa(in_dev) {
225 if (inet_ifa_match(a, ifa)) {
226 if (!b || inet_ifa_match(b, ifa)) {
231 } endfor_ifa(in_dev);
236 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
237 int destroy, struct nlmsghdr *nlh, u32 pid)
239 struct in_ifaddr *promote = NULL;
240 struct in_ifaddr *ifa, *ifa1 = *ifap;
241 struct in_ifaddr *last_prim = in_dev->ifa_list;
242 struct in_ifaddr *prev_prom = NULL;
243 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
247 /* 1. Deleting primary ifaddr forces deletion all secondaries
248 * unless alias promotion is set
251 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
252 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
254 while ((ifa = *ifap1) != NULL) {
255 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
256 ifa1->ifa_scope <= ifa->ifa_scope)
259 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
260 ifa1->ifa_mask != ifa->ifa_mask ||
261 !inet_ifa_match(ifa1->ifa_address, ifa)) {
262 ifap1 = &ifa->ifa_next;
268 *ifap1 = ifa->ifa_next;
270 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
271 blocking_notifier_call_chain(&inetaddr_chain,
283 *ifap = ifa1->ifa_next;
285 /* 3. Announce address deletion */
287 /* Send message first, then call notifier.
288 At first sight, FIB update triggered by notifier
289 will refer to already deleted ifaddr, that could confuse
290 netlink listeners. It is not true: look, gated sees
291 that route deleted and if it still thinks that ifaddr
292 is valid, it will try to restore deleted routes... Grr.
293 So that, this order is correct.
295 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
296 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
301 prev_prom->ifa_next = promote->ifa_next;
302 promote->ifa_next = last_prim->ifa_next;
303 last_prim->ifa_next = promote;
306 promote->ifa_flags &= ~IFA_F_SECONDARY;
307 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
308 blocking_notifier_call_chain(&inetaddr_chain,
310 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
311 if (ifa1->ifa_mask != ifa->ifa_mask ||
312 !inet_ifa_match(ifa1->ifa_address, ifa))
322 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
325 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
328 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
331 struct in_device *in_dev = ifa->ifa_dev;
332 struct in_ifaddr *ifa1, **ifap, **last_primary;
336 if (!ifa->ifa_local) {
341 ifa->ifa_flags &= ~IFA_F_SECONDARY;
342 last_primary = &in_dev->ifa_list;
344 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
345 ifap = &ifa1->ifa_next) {
346 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
347 ifa->ifa_scope <= ifa1->ifa_scope)
348 last_primary = &ifa1->ifa_next;
349 if (ifa1->ifa_mask == ifa->ifa_mask &&
350 inet_ifa_match(ifa1->ifa_address, ifa)) {
351 if (ifa1->ifa_local == ifa->ifa_local) {
355 if (ifa1->ifa_scope != ifa->ifa_scope) {
359 ifa->ifa_flags |= IFA_F_SECONDARY;
363 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
364 net_srandom(ifa->ifa_local);
368 ifa->ifa_next = *ifap;
371 /* Send message first, then call notifier.
372 Notifier will trigger FIB update, so that
373 listeners of netlink will know about new ifaddr */
374 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
375 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
380 static int inet_insert_ifa(struct in_ifaddr *ifa)
382 return __inet_insert_ifa(ifa, NULL, 0);
385 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
387 struct in_device *in_dev = __in_dev_get_rtnl(dev);
395 ipv4_devconf_setall(in_dev);
396 if (ifa->ifa_dev != in_dev) {
397 WARN_ON(ifa->ifa_dev);
399 ifa->ifa_dev = in_dev;
401 if (ipv4_is_loopback(ifa->ifa_local))
402 ifa->ifa_scope = RT_SCOPE_HOST;
403 return inet_insert_ifa(ifa);
406 struct in_device *inetdev_by_index(struct net *net, int ifindex)
408 struct net_device *dev;
409 struct in_device *in_dev = NULL;
412 dev = dev_get_by_index_rcu(net, ifindex);
414 in_dev = in_dev_get(dev);
418 EXPORT_SYMBOL(inetdev_by_index);
420 /* Called only from RTNL semaphored context. No locks. */
422 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
427 for_primary_ifa(in_dev) {
428 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
430 } endfor_ifa(in_dev);
434 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
436 struct net *net = sock_net(skb->sk);
437 struct nlattr *tb[IFA_MAX+1];
438 struct in_device *in_dev;
439 struct ifaddrmsg *ifm;
440 struct in_ifaddr *ifa, **ifap;
445 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
449 ifm = nlmsg_data(nlh);
450 in_dev = inetdev_by_index(net, ifm->ifa_index);
451 if (in_dev == NULL) {
456 __in_dev_put(in_dev);
458 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
459 ifap = &ifa->ifa_next) {
461 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
464 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
467 if (tb[IFA_ADDRESS] &&
468 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
469 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
472 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
476 err = -EADDRNOTAVAIL;
481 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
483 struct nlattr *tb[IFA_MAX+1];
484 struct in_ifaddr *ifa;
485 struct ifaddrmsg *ifm;
486 struct net_device *dev;
487 struct in_device *in_dev;
490 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
494 ifm = nlmsg_data(nlh);
496 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
499 dev = __dev_get_by_index(net, ifm->ifa_index);
504 in_dev = __in_dev_get_rtnl(dev);
509 ifa = inet_alloc_ifa();
512 * A potential indev allocation can be left alive, it stays
513 * assigned to its device and is destroy with it.
517 ipv4_devconf_setall(in_dev);
520 if (tb[IFA_ADDRESS] == NULL)
521 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
523 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
524 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
525 ifa->ifa_flags = ifm->ifa_flags;
526 ifa->ifa_scope = ifm->ifa_scope;
527 ifa->ifa_dev = in_dev;
529 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
530 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
532 if (tb[IFA_BROADCAST])
533 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
536 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
538 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
546 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
548 struct net *net = sock_net(skb->sk);
549 struct in_ifaddr *ifa;
553 ifa = rtm_to_ifaddr(net, nlh);
557 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
561 * Determine a default network mask, based on the IP address.
564 static inline int inet_abc_len(__be32 addr)
566 int rc = -1; /* Something else, probably a multicast. */
568 if (ipv4_is_zeronet(addr))
571 __u32 haddr = ntohl(addr);
573 if (IN_CLASSA(haddr))
575 else if (IN_CLASSB(haddr))
577 else if (IN_CLASSC(haddr))
585 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
588 struct sockaddr_in sin_orig;
589 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
590 struct in_device *in_dev;
591 struct in_ifaddr **ifap = NULL;
592 struct in_ifaddr *ifa = NULL;
593 struct net_device *dev;
596 int tryaddrmatch = 0;
599 * Fetch the caller's info block into kernel space
602 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
604 ifr.ifr_name[IFNAMSIZ - 1] = 0;
606 /* save original address for comparison */
607 memcpy(&sin_orig, sin, sizeof(*sin));
609 colon = strchr(ifr.ifr_name, ':');
613 dev_load(net, ifr.ifr_name);
616 case SIOCGIFADDR: /* Get interface address */
617 case SIOCGIFBRDADDR: /* Get the broadcast address */
618 case SIOCGIFDSTADDR: /* Get the destination address */
619 case SIOCGIFNETMASK: /* Get the netmask for the interface */
620 /* Note that these ioctls will not sleep,
621 so that we do not impose a lock.
622 One day we will be forced to put shlock here (I mean SMP)
624 tryaddrmatch = (sin_orig.sin_family == AF_INET);
625 memset(sin, 0, sizeof(*sin));
626 sin->sin_family = AF_INET;
631 if (!capable(CAP_NET_ADMIN))
634 case SIOCSIFADDR: /* Set interface address (and family) */
635 case SIOCSIFBRDADDR: /* Set the broadcast address */
636 case SIOCSIFDSTADDR: /* Set the destination address */
637 case SIOCSIFNETMASK: /* Set the netmask for the interface */
639 if (!capable(CAP_NET_ADMIN))
642 if (sin->sin_family != AF_INET)
653 dev = __dev_get_by_name(net, ifr.ifr_name);
660 in_dev = __in_dev_get_rtnl(dev);
663 /* Matthias Andree */
664 /* compare label and address (4.4BSD style) */
665 /* note: we only do this for a limited set of ioctls
666 and only if the original address family was AF_INET.
667 This is checked above. */
668 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
669 ifap = &ifa->ifa_next) {
670 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
671 sin_orig.sin_addr.s_addr ==
677 /* we didn't get a match, maybe the application is
678 4.3BSD-style and passed in junk so we fall back to
679 comparing just the label */
681 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
682 ifap = &ifa->ifa_next)
683 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
688 ret = -EADDRNOTAVAIL;
689 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
693 case SIOCGIFADDR: /* Get interface address */
694 sin->sin_addr.s_addr = ifa->ifa_local;
697 case SIOCGIFBRDADDR: /* Get the broadcast address */
698 sin->sin_addr.s_addr = ifa->ifa_broadcast;
701 case SIOCGIFDSTADDR: /* Get the destination address */
702 sin->sin_addr.s_addr = ifa->ifa_address;
705 case SIOCGIFNETMASK: /* Get the netmask for the interface */
706 sin->sin_addr.s_addr = ifa->ifa_mask;
711 ret = -EADDRNOTAVAIL;
715 if (!(ifr.ifr_flags & IFF_UP))
716 inet_del_ifa(in_dev, ifap, 1);
719 ret = dev_change_flags(dev, ifr.ifr_flags);
722 case SIOCSIFADDR: /* Set interface address (and family) */
724 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
729 ifa = inet_alloc_ifa();
733 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
735 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
738 if (ifa->ifa_local == sin->sin_addr.s_addr)
740 inet_del_ifa(in_dev, ifap, 0);
741 ifa->ifa_broadcast = 0;
745 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
747 if (!(dev->flags & IFF_POINTOPOINT)) {
748 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
749 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
750 if ((dev->flags & IFF_BROADCAST) &&
751 ifa->ifa_prefixlen < 31)
752 ifa->ifa_broadcast = ifa->ifa_address |
755 ifa->ifa_prefixlen = 32;
756 ifa->ifa_mask = inet_make_mask(32);
758 ret = inet_set_ifa(dev, ifa);
761 case SIOCSIFBRDADDR: /* Set the broadcast address */
763 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
764 inet_del_ifa(in_dev, ifap, 0);
765 ifa->ifa_broadcast = sin->sin_addr.s_addr;
766 inet_insert_ifa(ifa);
770 case SIOCSIFDSTADDR: /* Set the destination address */
772 if (ifa->ifa_address == sin->sin_addr.s_addr)
775 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
778 inet_del_ifa(in_dev, ifap, 0);
779 ifa->ifa_address = sin->sin_addr.s_addr;
780 inet_insert_ifa(ifa);
783 case SIOCSIFNETMASK: /* Set the netmask for the interface */
786 * The mask we set must be legal.
789 if (bad_mask(sin->sin_addr.s_addr, 0))
792 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
793 __be32 old_mask = ifa->ifa_mask;
794 inet_del_ifa(in_dev, ifap, 0);
795 ifa->ifa_mask = sin->sin_addr.s_addr;
796 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
798 /* See if current broadcast address matches
799 * with current netmask, then recalculate
800 * the broadcast address. Otherwise it's a
801 * funny address, so don't touch it since
802 * the user seems to know what (s)he's doing...
804 if ((dev->flags & IFF_BROADCAST) &&
805 (ifa->ifa_prefixlen < 31) &&
806 (ifa->ifa_broadcast ==
807 (ifa->ifa_local|~old_mask))) {
808 ifa->ifa_broadcast = (ifa->ifa_local |
809 ~sin->sin_addr.s_addr);
811 inet_insert_ifa(ifa);
821 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
825 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
827 struct in_device *in_dev = __in_dev_get_rtnl(dev);
828 struct in_ifaddr *ifa;
835 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
840 if (len < (int) sizeof(ifr))
842 memset(&ifr, 0, sizeof(struct ifreq));
844 strcpy(ifr.ifr_name, ifa->ifa_label);
846 strcpy(ifr.ifr_name, dev->name);
848 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
849 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
852 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
856 buf += sizeof(struct ifreq);
857 len -= sizeof(struct ifreq);
858 done += sizeof(struct ifreq);
864 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
867 struct in_device *in_dev;
868 struct net *net = dev_net(dev);
871 in_dev = __in_dev_get_rcu(dev);
875 for_primary_ifa(in_dev) {
876 if (ifa->ifa_scope > scope)
878 if (!dst || inet_ifa_match(dst, ifa)) {
879 addr = ifa->ifa_local;
883 addr = ifa->ifa_local;
884 } endfor_ifa(in_dev);
890 /* Not loopback addresses on loopback should be preferred
891 in this case. It is importnat that lo is the first interface
894 for_each_netdev_rcu(net, dev) {
895 in_dev = __in_dev_get_rcu(dev);
899 for_primary_ifa(in_dev) {
900 if (ifa->ifa_scope != RT_SCOPE_LINK &&
901 ifa->ifa_scope <= scope) {
902 addr = ifa->ifa_local;
905 } endfor_ifa(in_dev);
911 EXPORT_SYMBOL(inet_select_addr);
913 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
914 __be32 local, int scope)
921 (local == ifa->ifa_local || !local) &&
922 ifa->ifa_scope <= scope) {
923 addr = ifa->ifa_local;
928 same = (!local || inet_ifa_match(local, ifa)) &&
929 (!dst || inet_ifa_match(dst, ifa));
933 /* Is the selected addr into dst subnet? */
934 if (inet_ifa_match(addr, ifa))
936 /* No, then can we use new local src? */
937 if (ifa->ifa_scope <= scope) {
938 addr = ifa->ifa_local;
941 /* search for large dst subnet for addr */
945 } endfor_ifa(in_dev);
947 return same ? addr : 0;
951 * Confirm that local IP address exists using wildcards:
952 * - in_dev: only on this interface, 0=any interface
953 * - dst: only in the same subnet as dst, 0=any dst
954 * - local: address, 0=autoselect the local address
955 * - scope: maximum allowed scope value for the local address
957 __be32 inet_confirm_addr(struct in_device *in_dev,
958 __be32 dst, __be32 local, int scope)
961 struct net_device *dev;
964 if (scope != RT_SCOPE_LINK)
965 return confirm_addr_indev(in_dev, dst, local, scope);
967 net = dev_net(in_dev->dev);
969 for_each_netdev_rcu(net, dev) {
970 in_dev = __in_dev_get_rcu(dev);
972 addr = confirm_addr_indev(in_dev, dst, local, scope);
986 int register_inetaddr_notifier(struct notifier_block *nb)
988 return blocking_notifier_chain_register(&inetaddr_chain, nb);
990 EXPORT_SYMBOL(register_inetaddr_notifier);
992 int unregister_inetaddr_notifier(struct notifier_block *nb)
994 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
996 EXPORT_SYMBOL(unregister_inetaddr_notifier);
998 /* Rename ifa_labels for a device name change. Make some effort to preserve
999 * existing alias numbering and to create unique labels if possible.
1001 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1003 struct in_ifaddr *ifa;
1006 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1007 char old[IFNAMSIZ], *dot;
1009 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1010 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1013 dot = strchr(old, ':');
1015 sprintf(old, ":%d", named);
1018 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1019 strcat(ifa->ifa_label, dot);
1021 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1023 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1027 static inline bool inetdev_valid_mtu(unsigned mtu)
1032 /* Called only under RTNL semaphore */
1034 static int inetdev_event(struct notifier_block *this, unsigned long event,
1037 struct net_device *dev = ptr;
1038 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1043 if (event == NETDEV_REGISTER) {
1044 in_dev = inetdev_init(dev);
1046 return notifier_from_errno(-ENOMEM);
1047 if (dev->flags & IFF_LOOPBACK) {
1048 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1049 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1051 } else if (event == NETDEV_CHANGEMTU) {
1052 /* Re-enabling IP */
1053 if (inetdev_valid_mtu(dev->mtu))
1054 in_dev = inetdev_init(dev);
1060 case NETDEV_REGISTER:
1061 printk(KERN_DEBUG "inetdev_event: bug\n");
1065 if (!inetdev_valid_mtu(dev->mtu))
1067 if (dev->flags & IFF_LOOPBACK) {
1068 struct in_ifaddr *ifa = inet_alloc_ifa();
1072 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1073 ifa->ifa_prefixlen = 8;
1074 ifa->ifa_mask = inet_make_mask(8);
1075 in_dev_hold(in_dev);
1076 ifa->ifa_dev = in_dev;
1077 ifa->ifa_scope = RT_SCOPE_HOST;
1078 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1079 inet_insert_ifa(ifa);
1084 case NETDEV_NOTIFY_PEERS:
1085 case NETDEV_CHANGEADDR:
1086 /* Send gratuitous ARP to notify of link change */
1087 if (IN_DEV_ARP_NOTIFY(in_dev)) {
1088 struct in_ifaddr *ifa = in_dev->ifa_list;
1091 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1092 ifa->ifa_address, dev,
1093 ifa->ifa_address, NULL,
1094 dev->dev_addr, NULL);
1100 case NETDEV_PRE_TYPE_CHANGE:
1101 ip_mc_unmap(in_dev);
1103 case NETDEV_POST_TYPE_CHANGE:
1104 ip_mc_remap(in_dev);
1106 case NETDEV_CHANGEMTU:
1107 if (inetdev_valid_mtu(dev->mtu))
1109 /* disable IP when MTU is not enough */
1110 case NETDEV_UNREGISTER:
1111 inetdev_destroy(in_dev);
1113 case NETDEV_CHANGENAME:
1114 /* Do not notify about label change, this event is
1115 * not interesting to applications using netlink.
1117 inetdev_changename(dev, in_dev);
1119 devinet_sysctl_unregister(in_dev);
1120 devinet_sysctl_register(in_dev);
1127 static struct notifier_block ip_netdev_notifier = {
1128 .notifier_call = inetdev_event,
1131 static inline size_t inet_nlmsg_size(void)
1133 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1134 + nla_total_size(4) /* IFA_ADDRESS */
1135 + nla_total_size(4) /* IFA_LOCAL */
1136 + nla_total_size(4) /* IFA_BROADCAST */
1137 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1140 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1141 u32 pid, u32 seq, int event, unsigned int flags)
1143 struct ifaddrmsg *ifm;
1144 struct nlmsghdr *nlh;
1146 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1150 ifm = nlmsg_data(nlh);
1151 ifm->ifa_family = AF_INET;
1152 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1153 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1154 ifm->ifa_scope = ifa->ifa_scope;
1155 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1157 if (ifa->ifa_address)
1158 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1161 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1163 if (ifa->ifa_broadcast)
1164 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1166 if (ifa->ifa_label[0])
1167 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1169 return nlmsg_end(skb, nlh);
1172 nlmsg_cancel(skb, nlh);
1176 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1178 struct net *net = sock_net(skb->sk);
1181 int ip_idx, s_ip_idx;
1182 struct net_device *dev;
1183 struct in_device *in_dev;
1184 struct in_ifaddr *ifa;
1185 struct hlist_head *head;
1186 struct hlist_node *node;
1189 s_idx = idx = cb->args[1];
1190 s_ip_idx = ip_idx = cb->args[2];
1192 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1194 head = &net->dev_index_head[h];
1196 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1199 if (h > s_h || idx > s_idx)
1201 in_dev = __in_dev_get_rcu(dev);
1205 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1206 ifa = ifa->ifa_next, ip_idx++) {
1207 if (ip_idx < s_ip_idx)
1209 if (inet_fill_ifaddr(skb, ifa,
1210 NETLINK_CB(cb->skb).pid,
1212 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1226 cb->args[2] = ip_idx;
1231 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1234 struct sk_buff *skb;
1235 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1239 net = dev_net(ifa->ifa_dev->dev);
1240 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1244 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1246 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1247 WARN_ON(err == -EMSGSIZE);
1251 rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1255 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1258 #ifdef CONFIG_SYSCTL
1260 static void devinet_copy_dflt_conf(struct net *net, int i)
1262 struct net_device *dev;
1265 for_each_netdev_rcu(net, dev) {
1266 struct in_device *in_dev;
1268 in_dev = __in_dev_get_rcu(dev);
1269 if (in_dev && !test_bit(i, in_dev->cnf.state))
1270 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1275 /* called with RTNL locked */
1276 static void inet_forward_change(struct net *net)
1278 struct net_device *dev;
1279 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1281 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1282 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1284 for_each_netdev(net, dev) {
1285 struct in_device *in_dev;
1287 dev_disable_lro(dev);
1289 in_dev = __in_dev_get_rcu(dev);
1291 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1296 static int devinet_conf_proc(ctl_table *ctl, int write,
1297 void __user *buffer,
1298 size_t *lenp, loff_t *ppos)
1300 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1303 struct ipv4_devconf *cnf = ctl->extra1;
1304 struct net *net = ctl->extra2;
1305 int i = (int *)ctl->data - cnf->data;
1307 set_bit(i, cnf->state);
1309 if (cnf == net->ipv4.devconf_dflt)
1310 devinet_copy_dflt_conf(net, i);
1316 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1317 void __user *buffer,
1318 size_t *lenp, loff_t *ppos)
1320 int *valp = ctl->data;
1323 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1325 if (write && *valp != val) {
1326 struct net *net = ctl->extra2;
1328 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1329 if (!rtnl_trylock()) {
1330 /* Restore the original values before restarting */
1333 return restart_syscall();
1335 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1336 inet_forward_change(net);
1338 struct ipv4_devconf *cnf = ctl->extra1;
1339 struct in_device *idev =
1340 container_of(cnf, struct in_device, cnf);
1341 dev_disable_lro(idev->dev);
1344 rt_cache_flush(net, 0);
1351 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1352 void __user *buffer,
1353 size_t *lenp, loff_t *ppos)
1355 int *valp = ctl->data;
1357 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1358 struct net *net = ctl->extra2;
1360 if (write && *valp != val)
1361 rt_cache_flush(net, 0);
1366 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1369 .data = ipv4_devconf.data + \
1370 IPV4_DEVCONF_ ## attr - 1, \
1371 .maxlen = sizeof(int), \
1373 .proc_handler = proc, \
1374 .extra1 = &ipv4_devconf, \
1377 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1378 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1380 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1381 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1383 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1384 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1386 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1387 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1389 static struct devinet_sysctl_table {
1390 struct ctl_table_header *sysctl_header;
1391 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1393 } devinet_sysctl = {
1395 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1396 devinet_sysctl_forward),
1397 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1399 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1400 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1401 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1402 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1403 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1404 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1405 "accept_source_route"),
1406 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1407 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1408 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1409 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1410 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1411 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1412 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1413 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1414 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1415 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1416 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1417 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1418 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1420 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1421 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1422 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1423 "force_igmp_version"),
1424 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1425 "promote_secondaries"),
1429 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1430 struct ipv4_devconf *p)
1433 struct devinet_sysctl_table *t;
1435 #define DEVINET_CTL_PATH_DEV 3
1437 struct ctl_path devinet_ctl_path[] = {
1438 { .procname = "net", },
1439 { .procname = "ipv4", },
1440 { .procname = "conf", },
1441 { /* to be set */ },
1445 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1449 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1450 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1451 t->devinet_vars[i].extra1 = p;
1452 t->devinet_vars[i].extra2 = net;
1456 * Make a copy of dev_name, because '.procname' is regarded as const
1457 * by sysctl and we wouldn't want anyone to change it under our feet
1458 * (see SIOCSIFNAME).
1460 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1464 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1466 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1468 if (!t->sysctl_header)
1482 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1484 struct devinet_sysctl_table *t = cnf->sysctl;
1490 unregister_sysctl_table(t->sysctl_header);
1495 static void devinet_sysctl_register(struct in_device *idev)
1497 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1498 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1502 static void devinet_sysctl_unregister(struct in_device *idev)
1504 __devinet_sysctl_unregister(&idev->cnf);
1505 neigh_sysctl_unregister(idev->arp_parms);
1508 static struct ctl_table ctl_forward_entry[] = {
1510 .procname = "ip_forward",
1511 .data = &ipv4_devconf.data[
1512 IPV4_DEVCONF_FORWARDING - 1],
1513 .maxlen = sizeof(int),
1515 .proc_handler = devinet_sysctl_forward,
1516 .extra1 = &ipv4_devconf,
1517 .extra2 = &init_net,
1522 static __net_initdata struct ctl_path net_ipv4_path[] = {
1523 { .procname = "net", },
1524 { .procname = "ipv4", },
1529 static __net_init int devinet_init_net(struct net *net)
1532 struct ipv4_devconf *all, *dflt;
1533 #ifdef CONFIG_SYSCTL
1534 struct ctl_table *tbl = ctl_forward_entry;
1535 struct ctl_table_header *forw_hdr;
1539 all = &ipv4_devconf;
1540 dflt = &ipv4_devconf_dflt;
1542 if (!net_eq(net, &init_net)) {
1543 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1547 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1549 goto err_alloc_dflt;
1551 #ifdef CONFIG_SYSCTL
1552 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1556 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1557 tbl[0].extra1 = all;
1558 tbl[0].extra2 = net;
1562 #ifdef CONFIG_SYSCTL
1563 err = __devinet_sysctl_register(net, "all", all);
1567 err = __devinet_sysctl_register(net, "default", dflt);
1572 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1573 if (forw_hdr == NULL)
1575 net->ipv4.forw_hdr = forw_hdr;
1578 net->ipv4.devconf_all = all;
1579 net->ipv4.devconf_dflt = dflt;
1582 #ifdef CONFIG_SYSCTL
1584 __devinet_sysctl_unregister(dflt);
1586 __devinet_sysctl_unregister(all);
1588 if (tbl != ctl_forward_entry)
1592 if (dflt != &ipv4_devconf_dflt)
1595 if (all != &ipv4_devconf)
1601 static __net_exit void devinet_exit_net(struct net *net)
1603 #ifdef CONFIG_SYSCTL
1604 struct ctl_table *tbl;
1606 tbl = net->ipv4.forw_hdr->ctl_table_arg;
1607 unregister_net_sysctl_table(net->ipv4.forw_hdr);
1608 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1609 __devinet_sysctl_unregister(net->ipv4.devconf_all);
1612 kfree(net->ipv4.devconf_dflt);
1613 kfree(net->ipv4.devconf_all);
1616 static __net_initdata struct pernet_operations devinet_ops = {
1617 .init = devinet_init_net,
1618 .exit = devinet_exit_net,
1621 void __init devinet_init(void)
1623 register_pernet_subsys(&devinet_ops);
1625 register_gifconf(PF_INET, inet_gifconf);
1626 register_netdevice_notifier(&ip_netdev_notifier);
1628 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1629 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1630 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);