2 * Linux NET3: GRE over IP protocol decoder.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <linux/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/if_vlan.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
50 #include <net/dst_metadata.h>
51 #include <net/erspan.h>
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is a good
66 solution, but it supposes maintaining new variable in ALL
67 skb, even if no tunneling is used.
69 Current solution: xmit_recursion breaks dead loops. This is a percpu
70 counter, since when we enter the first ndo_xmit(), cpu migration is
71 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, ttl is not solution at all.
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 rapidly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
110 static bool log_ecn_error = true;
111 module_param(log_ecn_error, bool, 0644);
112 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
114 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
115 static int ipgre_tunnel_init(struct net_device *dev);
116 static void erspan_build_header(struct sk_buff *skb,
118 bool truncate, bool is_ipv4);
120 static unsigned int ipgre_net_id __read_mostly;
121 static unsigned int gre_tap_net_id __read_mostly;
122 static unsigned int erspan_net_id __read_mostly;
124 static int ipgre_err(struct sk_buff *skb, u32 info,
125 const struct tnl_ptk_info *tpi)
128 /* All the routers (except for Linux) return only
129 8 bytes of packet payload. It means, that precise relaying of
130 ICMP in the real Internet is absolutely infeasible.
132 Moreover, Cisco "wise men" put GRE key to the third word
133 in GRE header. It makes impossible maintaining even soft
134 state for keyed GRE tunnels with enabled checksum. Tell
137 Well, I wonder, rfc1812 was written by Cisco employee,
138 what the hell these idiots break standards established
141 struct net *net = dev_net(skb->dev);
142 struct ip_tunnel_net *itn;
143 const struct iphdr *iph;
144 const int type = icmp_hdr(skb)->type;
145 const int code = icmp_hdr(skb)->code;
146 unsigned int data_len = 0;
149 if (tpi->proto == htons(ETH_P_TEB))
150 itn = net_generic(net, gre_tap_net_id);
151 else if (tpi->proto == htons(ETH_P_ERSPAN) ||
152 tpi->proto == htons(ETH_P_ERSPAN2))
153 itn = net_generic(net, erspan_net_id);
155 itn = net_generic(net, ipgre_net_id);
157 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
158 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
159 iph->daddr, iph->saddr, tpi->key);
166 case ICMP_PARAMETERPROB:
169 case ICMP_DEST_UNREACH:
172 case ICMP_PORT_UNREACH:
173 /* Impossible event. */
176 /* All others are translated to HOST_UNREACH.
177 rfc2003 contains "deep thoughts" about NET_UNREACH,
178 I believe they are just ether pollution. --ANK
184 case ICMP_TIME_EXCEEDED:
185 if (code != ICMP_EXC_TTL)
187 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
194 #if IS_ENABLED(CONFIG_IPV6)
195 if (tpi->proto == htons(ETH_P_IPV6) &&
196 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
201 if (t->parms.iph.daddr == 0 ||
202 ipv4_is_multicast(t->parms.iph.daddr))
205 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
208 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
212 t->err_time = jiffies;
217 static void gre_err(struct sk_buff *skb, u32 info)
219 /* All the routers (except for Linux) return only
220 * 8 bytes of packet payload. It means, that precise relaying of
221 * ICMP in the real Internet is absolutely infeasible.
223 * Moreover, Cisco "wise men" put GRE key to the third word
224 * in GRE header. It makes impossible maintaining even soft
226 * GRE tunnels with enabled checksum. Tell them "thank you".
228 * Well, I wonder, rfc1812 was written by Cisco employee,
229 * what the hell these idiots break standards established
233 const struct iphdr *iph = (struct iphdr *)skb->data;
234 const int type = icmp_hdr(skb)->type;
235 const int code = icmp_hdr(skb)->code;
236 struct tnl_ptk_info tpi;
238 if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
242 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
243 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
244 skb->dev->ifindex, IPPROTO_GRE);
247 if (type == ICMP_REDIRECT) {
248 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
253 ipgre_err(skb, info, &tpi);
256 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
259 struct net *net = dev_net(skb->dev);
260 struct metadata_dst *tun_dst = NULL;
261 struct erspan_base_hdr *ershdr;
262 struct erspan_metadata *pkt_md;
263 struct ip_tunnel_net *itn;
264 struct ip_tunnel *tunnel;
265 const struct iphdr *iph;
266 struct erspan_md2 *md2;
270 itn = net_generic(net, erspan_net_id);
273 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
276 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
277 tpi->flags | TUNNEL_KEY,
278 iph->saddr, iph->daddr, tpi->key);
281 len = gre_hdr_len + erspan_hdr_len(ver);
282 if (unlikely(!pskb_may_pull(skb, len)))
283 return PACKET_REJECT;
285 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
286 pkt_md = (struct erspan_metadata *)(ershdr + 1);
288 if (__iptunnel_pull_header(skb,
294 if (tunnel->collect_md) {
295 struct ip_tunnel_info *info;
296 struct erspan_metadata *md;
300 tpi->flags |= TUNNEL_KEY;
302 tun_id = key32_to_tunnel_id(tpi->key);
304 tun_dst = ip_tun_rx_dst(skb, flags,
305 tun_id, sizeof(*md));
307 return PACKET_REJECT;
309 md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
312 memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
315 info = &tun_dst->u.tun_info;
316 info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
317 info->options_len = sizeof(*md);
320 skb_reset_mac_header(skb);
321 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
324 return PACKET_REJECT;
331 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
332 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
334 struct metadata_dst *tun_dst = NULL;
335 const struct iphdr *iph;
336 struct ip_tunnel *tunnel;
339 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
340 iph->saddr, iph->daddr, tpi->key);
343 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
344 raw_proto, false) < 0)
347 if (tunnel->dev->type != ARPHRD_NONE)
348 skb_pop_mac_header(skb);
350 skb_reset_mac_header(skb);
351 if (tunnel->collect_md) {
355 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
356 tun_id = key32_to_tunnel_id(tpi->key);
357 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
359 return PACKET_REJECT;
362 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
372 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
375 struct net *net = dev_net(skb->dev);
376 struct ip_tunnel_net *itn;
379 if (tpi->proto == htons(ETH_P_TEB))
380 itn = net_generic(net, gre_tap_net_id);
382 itn = net_generic(net, ipgre_net_id);
384 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
385 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
386 /* ipgre tunnels in collect metadata mode should receive
387 * also ETH_P_TEB traffic.
389 itn = net_generic(net, ipgre_net_id);
390 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
395 static int gre_rcv(struct sk_buff *skb)
397 struct tnl_ptk_info tpi;
398 bool csum_err = false;
401 #ifdef CONFIG_NET_IPGRE_BROADCAST
402 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
403 /* Looped back packet, drop it! */
404 if (rt_is_output_route(skb_rtable(skb)))
409 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
413 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
414 tpi.proto == htons(ETH_P_ERSPAN2))) {
415 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
420 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
424 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
430 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
431 const struct iphdr *tnl_params,
434 struct ip_tunnel *tunnel = netdev_priv(dev);
436 if (tunnel->parms.o_flags & TUNNEL_SEQ)
439 /* Push GRE header. */
440 gre_build_header(skb, tunnel->tun_hlen,
441 tunnel->parms.o_flags, proto, tunnel->parms.o_key,
442 htonl(tunnel->o_seqno));
444 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
447 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
449 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
452 static struct rtable *gre_get_rt(struct sk_buff *skb,
453 struct net_device *dev,
455 const struct ip_tunnel_key *key)
457 struct net *net = dev_net(dev);
459 memset(fl, 0, sizeof(*fl));
460 fl->daddr = key->u.ipv4.dst;
461 fl->saddr = key->u.ipv4.src;
462 fl->flowi4_tos = RT_TOS(key->tos);
463 fl->flowi4_mark = skb->mark;
464 fl->flowi4_proto = IPPROTO_GRE;
466 return ip_route_output_key(net, fl);
469 static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
470 struct net_device *dev,
474 struct ip_tunnel_info *tun_info;
475 const struct ip_tunnel_key *key;
476 struct rtable *rt = NULL;
481 tun_info = skb_tunnel_info(skb);
482 key = &tun_info->key;
483 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
486 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
488 rt = gre_get_rt(skb, dev, fl, key);
492 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
496 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
497 + tunnel_hlen + sizeof(struct iphdr);
498 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
499 int head_delta = SKB_DATA_ALIGN(min_headroom -
502 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
513 dev->stats.tx_dropped++;
517 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
520 struct ip_tunnel *tunnel = netdev_priv(dev);
521 struct ip_tunnel_info *tun_info;
522 const struct ip_tunnel_key *key;
523 struct rtable *rt = NULL;
528 tun_info = skb_tunnel_info(skb);
529 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
530 ip_tunnel_info_af(tun_info) != AF_INET))
533 key = &tun_info->key;
534 tunnel_hlen = gre_calc_hlen(key->tun_flags);
536 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
540 /* Push Tunnel header. */
541 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
544 flags = tun_info->key.tun_flags &
545 (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
546 gre_build_header(skb, tunnel_hlen, flags, proto,
547 tunnel_id_to_key32(tun_info->key.tun_id),
548 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
550 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
552 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
553 key->tos, key->ttl, df, false);
560 dev->stats.tx_dropped++;
563 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
565 struct ip_tunnel *tunnel = netdev_priv(dev);
566 struct ip_tunnel_info *tun_info;
567 const struct ip_tunnel_key *key;
568 struct erspan_metadata *md;
569 struct rtable *rt = NULL;
570 bool truncate = false;
578 tun_info = skb_tunnel_info(skb);
579 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
580 ip_tunnel_info_af(tun_info) != AF_INET))
583 key = &tun_info->key;
584 if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
586 md = ip_tunnel_info_opts(tun_info);
590 /* ERSPAN has fixed 8 byte GRE header */
591 version = md->version;
592 tunnel_hlen = 8 + erspan_hdr_len(version);
594 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
598 if (gre_handle_offloads(skb, false))
601 if (skb->len > dev->mtu + dev->hard_header_len) {
602 pskb_trim(skb, dev->mtu + dev->hard_header_len);
606 nhoff = skb_network_header(skb) - skb_mac_header(skb);
607 if (skb->protocol == htons(ETH_P_IP) &&
608 (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
611 thoff = skb_transport_header(skb) - skb_mac_header(skb);
612 if (skb->protocol == htons(ETH_P_IPV6) &&
613 (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
617 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
618 ntohl(md->u.index), truncate, true);
619 proto = htons(ETH_P_ERSPAN);
620 } else if (version == 2) {
621 erspan_build_header_v2(skb,
622 ntohl(tunnel_id_to_key32(key->tun_id)),
624 get_hwid(&md->u.md2),
626 proto = htons(ETH_P_ERSPAN2);
631 gre_build_header(skb, 8, TUNNEL_SEQ,
632 proto, 0, htonl(tunnel->o_seqno++));
634 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
636 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
637 key->tos, key->ttl, df, false);
644 dev->stats.tx_dropped++;
647 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
649 struct ip_tunnel_info *info = skb_tunnel_info(skb);
653 if (ip_tunnel_info_af(info) != AF_INET)
656 rt = gre_get_rt(skb, dev, &fl4, &info->key);
661 info->key.u.ipv4.src = fl4.saddr;
665 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
666 struct net_device *dev)
668 struct ip_tunnel *tunnel = netdev_priv(dev);
669 const struct iphdr *tnl_params;
671 if (!pskb_inet_may_pull(skb))
674 if (tunnel->collect_md) {
675 gre_fb_xmit(skb, dev, skb->protocol);
679 if (dev->header_ops) {
680 /* Need space for new headers */
681 if (skb_cow_head(skb, dev->needed_headroom -
682 (tunnel->hlen + sizeof(struct iphdr))))
685 tnl_params = (const struct iphdr *)skb->data;
687 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
690 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
691 skb_reset_mac_header(skb);
693 if (skb_cow_head(skb, dev->needed_headroom))
696 tnl_params = &tunnel->parms.iph;
699 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
702 __gre_xmit(skb, dev, tnl_params, skb->protocol);
707 dev->stats.tx_dropped++;
711 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
712 struct net_device *dev)
714 struct ip_tunnel *tunnel = netdev_priv(dev);
715 bool truncate = false;
718 if (!pskb_inet_may_pull(skb))
721 if (tunnel->collect_md) {
722 erspan_fb_xmit(skb, dev);
726 if (gre_handle_offloads(skb, false))
729 if (skb_cow_head(skb, dev->needed_headroom))
732 if (skb->len > dev->mtu + dev->hard_header_len) {
733 pskb_trim(skb, dev->mtu + dev->hard_header_len);
737 /* Push ERSPAN header */
738 if (tunnel->erspan_ver == 1) {
739 erspan_build_header(skb, ntohl(tunnel->parms.o_key),
742 proto = htons(ETH_P_ERSPAN);
743 } else if (tunnel->erspan_ver == 2) {
744 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
745 tunnel->dir, tunnel->hwid,
747 proto = htons(ETH_P_ERSPAN2);
752 tunnel->parms.o_flags &= ~TUNNEL_KEY;
753 __gre_xmit(skb, dev, &tunnel->parms.iph, proto);
758 dev->stats.tx_dropped++;
762 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
763 struct net_device *dev)
765 struct ip_tunnel *tunnel = netdev_priv(dev);
767 if (!pskb_inet_may_pull(skb))
770 if (tunnel->collect_md) {
771 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
775 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
778 if (skb_cow_head(skb, dev->needed_headroom))
781 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
786 dev->stats.tx_dropped++;
790 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
792 struct ip_tunnel *tunnel = netdev_priv(dev);
795 len = tunnel->tun_hlen;
796 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
797 len = tunnel->tun_hlen - len;
798 tunnel->hlen = tunnel->hlen + len;
800 dev->needed_headroom = dev->needed_headroom + len;
802 dev->mtu = max_t(int, dev->mtu - len, 68);
804 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
805 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
806 tunnel->encap.type == TUNNEL_ENCAP_NONE) {
807 dev->features |= NETIF_F_GSO_SOFTWARE;
808 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
810 dev->features &= ~NETIF_F_GSO_SOFTWARE;
811 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
813 dev->features |= NETIF_F_LLTX;
815 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
816 dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
820 static int ipgre_tunnel_ioctl(struct net_device *dev,
821 struct ifreq *ifr, int cmd)
823 struct ip_tunnel_parm p;
826 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
829 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
830 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
831 p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
832 ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
836 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
837 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
839 err = ip_tunnel_ioctl(dev, &p, cmd);
843 if (cmd == SIOCCHGTUNNEL) {
844 struct ip_tunnel *t = netdev_priv(dev);
846 t->parms.i_flags = p.i_flags;
847 t->parms.o_flags = p.o_flags;
849 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
850 ipgre_link_update(dev, true);
853 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
854 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
856 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
862 /* Nice toy. Unfortunately, useless in real life :-)
863 It allows to construct virtual multiprotocol broadcast "LAN"
864 over the Internet, provided multicast routing is tuned.
867 I have no idea was this bicycle invented before me,
868 so that I had to set ARPHRD_IPGRE to a random value.
869 I have an impression, that Cisco could make something similar,
870 but this feature is apparently missing in IOS<=11.2(8).
872 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
873 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
875 ping -t 255 224.66.66.66
877 If nobody answers, mbone does not work.
879 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
880 ip addr add 10.66.66.<somewhat>/24 dev Universe
882 ifconfig Universe add fe80::<Your_real_addr>/10
883 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
886 ftp fec0:6666:6666::193.233.7.65
889 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
891 const void *daddr, const void *saddr, unsigned int len)
893 struct ip_tunnel *t = netdev_priv(dev);
895 struct gre_base_hdr *greh;
897 iph = skb_push(skb, t->hlen + sizeof(*iph));
898 greh = (struct gre_base_hdr *)(iph+1);
899 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
900 greh->protocol = htons(type);
902 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
904 /* Set the source hardware address. */
906 memcpy(&iph->saddr, saddr, 4);
908 memcpy(&iph->daddr, daddr, 4);
910 return t->hlen + sizeof(*iph);
912 return -(t->hlen + sizeof(*iph));
915 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
917 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
918 memcpy(haddr, &iph->saddr, 4);
922 static const struct header_ops ipgre_header_ops = {
923 .create = ipgre_header,
924 .parse = ipgre_header_parse,
927 #ifdef CONFIG_NET_IPGRE_BROADCAST
928 static int ipgre_open(struct net_device *dev)
930 struct ip_tunnel *t = netdev_priv(dev);
932 if (ipv4_is_multicast(t->parms.iph.daddr)) {
936 rt = ip_route_output_gre(t->net, &fl4,
940 RT_TOS(t->parms.iph.tos),
943 return -EADDRNOTAVAIL;
946 if (!__in_dev_get_rtnl(dev))
947 return -EADDRNOTAVAIL;
948 t->mlink = dev->ifindex;
949 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
954 static int ipgre_close(struct net_device *dev)
956 struct ip_tunnel *t = netdev_priv(dev);
958 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
959 struct in_device *in_dev;
960 in_dev = inetdev_by_index(t->net, t->mlink);
962 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
968 static const struct net_device_ops ipgre_netdev_ops = {
969 .ndo_init = ipgre_tunnel_init,
970 .ndo_uninit = ip_tunnel_uninit,
971 #ifdef CONFIG_NET_IPGRE_BROADCAST
972 .ndo_open = ipgre_open,
973 .ndo_stop = ipgre_close,
975 .ndo_start_xmit = ipgre_xmit,
976 .ndo_do_ioctl = ipgre_tunnel_ioctl,
977 .ndo_change_mtu = ip_tunnel_change_mtu,
978 .ndo_get_stats64 = ip_tunnel_get_stats64,
979 .ndo_get_iflink = ip_tunnel_get_iflink,
982 #define GRE_FEATURES (NETIF_F_SG | \
987 static void ipgre_tunnel_setup(struct net_device *dev)
989 dev->netdev_ops = &ipgre_netdev_ops;
990 dev->type = ARPHRD_IPGRE;
991 ip_tunnel_setup(dev, ipgre_net_id);
994 static void __gre_tunnel_init(struct net_device *dev)
996 struct ip_tunnel *tunnel;
998 tunnel = netdev_priv(dev);
999 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
1000 tunnel->parms.iph.protocol = IPPROTO_GRE;
1002 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
1004 dev->features |= GRE_FEATURES;
1005 dev->hw_features |= GRE_FEATURES;
1007 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
1008 /* TCP offload with GRE SEQ is not supported, nor
1009 * can we support 2 levels of outer headers requiring
1012 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
1013 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
1014 dev->features |= NETIF_F_GSO_SOFTWARE;
1015 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1018 /* Can use a lockless transmit, unless we generate
1021 dev->features |= NETIF_F_LLTX;
1025 static int ipgre_tunnel_init(struct net_device *dev)
1027 struct ip_tunnel *tunnel = netdev_priv(dev);
1028 struct iphdr *iph = &tunnel->parms.iph;
1030 __gre_tunnel_init(dev);
1032 memcpy(dev->dev_addr, &iph->saddr, 4);
1033 memcpy(dev->broadcast, &iph->daddr, 4);
1035 dev->flags = IFF_NOARP;
1036 netif_keep_dst(dev);
1039 if (iph->daddr && !tunnel->collect_md) {
1040 #ifdef CONFIG_NET_IPGRE_BROADCAST
1041 if (ipv4_is_multicast(iph->daddr)) {
1044 dev->flags = IFF_BROADCAST;
1045 dev->header_ops = &ipgre_header_ops;
1048 } else if (!tunnel->collect_md) {
1049 dev->header_ops = &ipgre_header_ops;
1052 return ip_tunnel_init(dev);
1055 static const struct gre_protocol ipgre_protocol = {
1057 .err_handler = gre_err,
1060 static int __net_init ipgre_init_net(struct net *net)
1062 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1065 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1067 ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1070 static struct pernet_operations ipgre_net_ops = {
1071 .init = ipgre_init_net,
1072 .exit_batch = ipgre_exit_batch_net,
1073 .id = &ipgre_net_id,
1074 .size = sizeof(struct ip_tunnel_net),
1077 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1078 struct netlink_ext_ack *extack)
1086 if (data[IFLA_GRE_IFLAGS])
1087 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1088 if (data[IFLA_GRE_OFLAGS])
1089 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1090 if (flags & (GRE_VERSION|GRE_ROUTING))
1093 if (data[IFLA_GRE_COLLECT_METADATA] &&
1094 data[IFLA_GRE_ENCAP_TYPE] &&
1095 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1101 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1102 struct netlink_ext_ack *extack)
1106 if (tb[IFLA_ADDRESS]) {
1107 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1109 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1110 return -EADDRNOTAVAIL;
1116 if (data[IFLA_GRE_REMOTE]) {
1117 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1123 return ipgre_tunnel_validate(tb, data, extack);
1126 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1127 struct netlink_ext_ack *extack)
1135 ret = ipgre_tap_validate(tb, data, extack);
1139 /* ERSPAN should only have GRE sequence and key flag */
1140 if (data[IFLA_GRE_OFLAGS])
1141 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1142 if (data[IFLA_GRE_IFLAGS])
1143 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1144 if (!data[IFLA_GRE_COLLECT_METADATA] &&
1145 flags != (GRE_SEQ | GRE_KEY))
1148 /* ERSPAN Session ID only has 10-bit. Since we reuse
1149 * 32-bit key field as ID, check it's range.
1151 if (data[IFLA_GRE_IKEY] &&
1152 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1155 if (data[IFLA_GRE_OKEY] &&
1156 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1162 static int ipgre_netlink_parms(struct net_device *dev,
1163 struct nlattr *data[],
1164 struct nlattr *tb[],
1165 struct ip_tunnel_parm *parms,
1168 struct ip_tunnel *t = netdev_priv(dev);
1170 memset(parms, 0, sizeof(*parms));
1172 parms->iph.protocol = IPPROTO_GRE;
1177 if (data[IFLA_GRE_LINK])
1178 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1180 if (data[IFLA_GRE_IFLAGS])
1181 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1183 if (data[IFLA_GRE_OFLAGS])
1184 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1186 if (data[IFLA_GRE_IKEY])
1187 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1189 if (data[IFLA_GRE_OKEY])
1190 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1192 if (data[IFLA_GRE_LOCAL])
1193 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1195 if (data[IFLA_GRE_REMOTE])
1196 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1198 if (data[IFLA_GRE_TTL])
1199 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1201 if (data[IFLA_GRE_TOS])
1202 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1204 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1207 parms->iph.frag_off = htons(IP_DF);
1210 if (data[IFLA_GRE_COLLECT_METADATA]) {
1211 t->collect_md = true;
1212 if (dev->type == ARPHRD_IPGRE)
1213 dev->type = ARPHRD_NONE;
1216 if (data[IFLA_GRE_IGNORE_DF]) {
1217 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1218 && (parms->iph.frag_off & htons(IP_DF)))
1220 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1223 if (data[IFLA_GRE_FWMARK])
1224 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1226 if (data[IFLA_GRE_ERSPAN_VER]) {
1227 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1229 if (t->erspan_ver != 1 && t->erspan_ver != 2)
1233 if (t->erspan_ver == 1) {
1234 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1235 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1236 if (t->index & ~INDEX_MASK)
1239 } else if (t->erspan_ver == 2) {
1240 if (data[IFLA_GRE_ERSPAN_DIR]) {
1241 t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1242 if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1245 if (data[IFLA_GRE_ERSPAN_HWID]) {
1246 t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1247 if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1255 /* This function returns true when ENCAP attributes are present in the nl msg */
1256 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1257 struct ip_tunnel_encap *ipencap)
1261 memset(ipencap, 0, sizeof(*ipencap));
1266 if (data[IFLA_GRE_ENCAP_TYPE]) {
1268 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1271 if (data[IFLA_GRE_ENCAP_FLAGS]) {
1273 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1276 if (data[IFLA_GRE_ENCAP_SPORT]) {
1278 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1281 if (data[IFLA_GRE_ENCAP_DPORT]) {
1283 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1289 static int gre_tap_init(struct net_device *dev)
1291 __gre_tunnel_init(dev);
1292 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1293 netif_keep_dst(dev);
1295 return ip_tunnel_init(dev);
1298 static const struct net_device_ops gre_tap_netdev_ops = {
1299 .ndo_init = gre_tap_init,
1300 .ndo_uninit = ip_tunnel_uninit,
1301 .ndo_start_xmit = gre_tap_xmit,
1302 .ndo_set_mac_address = eth_mac_addr,
1303 .ndo_validate_addr = eth_validate_addr,
1304 .ndo_change_mtu = ip_tunnel_change_mtu,
1305 .ndo_get_stats64 = ip_tunnel_get_stats64,
1306 .ndo_get_iflink = ip_tunnel_get_iflink,
1307 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1310 static int erspan_tunnel_init(struct net_device *dev)
1312 struct ip_tunnel *tunnel = netdev_priv(dev);
1314 tunnel->tun_hlen = 8;
1315 tunnel->parms.iph.protocol = IPPROTO_GRE;
1316 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1317 erspan_hdr_len(tunnel->erspan_ver);
1319 dev->features |= GRE_FEATURES;
1320 dev->hw_features |= GRE_FEATURES;
1321 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1322 netif_keep_dst(dev);
1324 return ip_tunnel_init(dev);
1327 static const struct net_device_ops erspan_netdev_ops = {
1328 .ndo_init = erspan_tunnel_init,
1329 .ndo_uninit = ip_tunnel_uninit,
1330 .ndo_start_xmit = erspan_xmit,
1331 .ndo_set_mac_address = eth_mac_addr,
1332 .ndo_validate_addr = eth_validate_addr,
1333 .ndo_change_mtu = ip_tunnel_change_mtu,
1334 .ndo_get_stats64 = ip_tunnel_get_stats64,
1335 .ndo_get_iflink = ip_tunnel_get_iflink,
1336 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1339 static void ipgre_tap_setup(struct net_device *dev)
1343 dev->netdev_ops = &gre_tap_netdev_ops;
1344 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1345 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1346 ip_tunnel_setup(dev, gre_tap_net_id);
1349 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1350 struct nlattr *tb[], struct nlattr *data[],
1351 struct netlink_ext_ack *extack)
1353 struct ip_tunnel_parm p;
1354 struct ip_tunnel_encap ipencap;
1358 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1359 struct ip_tunnel *t = netdev_priv(dev);
1360 err = ip_tunnel_encap_setup(t, &ipencap);
1366 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1369 return ip_tunnel_newlink(dev, tb, &p, fwmark);
1372 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1373 struct nlattr *data[],
1374 struct netlink_ext_ack *extack)
1376 struct ip_tunnel *t = netdev_priv(dev);
1377 struct ip_tunnel_encap ipencap;
1378 __u32 fwmark = t->fwmark;
1379 struct ip_tunnel_parm p;
1382 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1383 err = ip_tunnel_encap_setup(t, &ipencap);
1389 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1393 err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1397 t->parms.i_flags = p.i_flags;
1398 t->parms.o_flags = p.o_flags;
1400 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
1401 ipgre_link_update(dev, !tb[IFLA_MTU]);
1406 static size_t ipgre_get_size(const struct net_device *dev)
1411 /* IFLA_GRE_IFLAGS */
1413 /* IFLA_GRE_OFLAGS */
1419 /* IFLA_GRE_LOCAL */
1421 /* IFLA_GRE_REMOTE */
1427 /* IFLA_GRE_PMTUDISC */
1429 /* IFLA_GRE_ENCAP_TYPE */
1431 /* IFLA_GRE_ENCAP_FLAGS */
1433 /* IFLA_GRE_ENCAP_SPORT */
1435 /* IFLA_GRE_ENCAP_DPORT */
1437 /* IFLA_GRE_COLLECT_METADATA */
1439 /* IFLA_GRE_IGNORE_DF */
1441 /* IFLA_GRE_FWMARK */
1443 /* IFLA_GRE_ERSPAN_INDEX */
1445 /* IFLA_GRE_ERSPAN_VER */
1447 /* IFLA_GRE_ERSPAN_DIR */
1449 /* IFLA_GRE_ERSPAN_HWID */
1454 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1456 struct ip_tunnel *t = netdev_priv(dev);
1457 struct ip_tunnel_parm *p = &t->parms;
1458 __be16 o_flags = p->o_flags;
1460 if (t->erspan_ver == 1 || t->erspan_ver == 2) {
1462 o_flags |= TUNNEL_KEY;
1464 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1465 goto nla_put_failure;
1467 if (t->erspan_ver == 1) {
1468 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1469 goto nla_put_failure;
1471 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1472 goto nla_put_failure;
1473 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1474 goto nla_put_failure;
1478 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1479 nla_put_be16(skb, IFLA_GRE_IFLAGS,
1480 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1481 nla_put_be16(skb, IFLA_GRE_OFLAGS,
1482 gre_tnl_flags_to_gre_flags(o_flags)) ||
1483 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1484 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1485 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1486 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1487 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1488 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1489 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1490 !!(p->iph.frag_off & htons(IP_DF))) ||
1491 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1492 goto nla_put_failure;
1494 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1496 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1498 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1500 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1502 goto nla_put_failure;
1504 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1505 goto nla_put_failure;
1507 if (t->collect_md) {
1508 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1509 goto nla_put_failure;
1518 static void erspan_setup(struct net_device *dev)
1520 struct ip_tunnel *t = netdev_priv(dev);
1523 dev->netdev_ops = &erspan_netdev_ops;
1524 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1525 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1526 ip_tunnel_setup(dev, erspan_net_id);
1530 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1531 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1532 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1533 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1534 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1535 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1536 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1537 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1538 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1539 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1540 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1541 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
1542 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1543 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1544 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
1545 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
1546 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
1547 [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
1548 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1549 [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
1550 [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
1551 [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
1554 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1556 .maxtype = IFLA_GRE_MAX,
1557 .policy = ipgre_policy,
1558 .priv_size = sizeof(struct ip_tunnel),
1559 .setup = ipgre_tunnel_setup,
1560 .validate = ipgre_tunnel_validate,
1561 .newlink = ipgre_newlink,
1562 .changelink = ipgre_changelink,
1563 .dellink = ip_tunnel_dellink,
1564 .get_size = ipgre_get_size,
1565 .fill_info = ipgre_fill_info,
1566 .get_link_net = ip_tunnel_get_link_net,
1569 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1571 .maxtype = IFLA_GRE_MAX,
1572 .policy = ipgre_policy,
1573 .priv_size = sizeof(struct ip_tunnel),
1574 .setup = ipgre_tap_setup,
1575 .validate = ipgre_tap_validate,
1576 .newlink = ipgre_newlink,
1577 .changelink = ipgre_changelink,
1578 .dellink = ip_tunnel_dellink,
1579 .get_size = ipgre_get_size,
1580 .fill_info = ipgre_fill_info,
1581 .get_link_net = ip_tunnel_get_link_net,
1584 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1586 .maxtype = IFLA_GRE_MAX,
1587 .policy = ipgre_policy,
1588 .priv_size = sizeof(struct ip_tunnel),
1589 .setup = erspan_setup,
1590 .validate = erspan_validate,
1591 .newlink = ipgre_newlink,
1592 .changelink = ipgre_changelink,
1593 .dellink = ip_tunnel_dellink,
1594 .get_size = ipgre_get_size,
1595 .fill_info = ipgre_fill_info,
1596 .get_link_net = ip_tunnel_get_link_net,
1599 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1600 u8 name_assign_type)
1602 struct nlattr *tb[IFLA_MAX + 1];
1603 struct net_device *dev;
1604 LIST_HEAD(list_kill);
1605 struct ip_tunnel *t;
1608 memset(&tb, 0, sizeof(tb));
1610 dev = rtnl_create_link(net, name, name_assign_type,
1611 &ipgre_tap_ops, tb, NULL);
1615 /* Configure flow based GRE device. */
1616 t = netdev_priv(dev);
1617 t->collect_md = true;
1619 err = ipgre_newlink(net, dev, tb, NULL, NULL);
1622 return ERR_PTR(err);
1625 /* openvswitch users expect packet sizes to be unrestricted,
1626 * so set the largest MTU we can.
1628 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1632 err = rtnl_configure_link(dev, NULL);
1638 ip_tunnel_dellink(dev, &list_kill);
1639 unregister_netdevice_many(&list_kill);
1640 return ERR_PTR(err);
1642 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1644 static int __net_init ipgre_tap_init_net(struct net *net)
1646 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1649 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1651 ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1654 static struct pernet_operations ipgre_tap_net_ops = {
1655 .init = ipgre_tap_init_net,
1656 .exit_batch = ipgre_tap_exit_batch_net,
1657 .id = &gre_tap_net_id,
1658 .size = sizeof(struct ip_tunnel_net),
1661 static int __net_init erspan_init_net(struct net *net)
1663 return ip_tunnel_init_net(net, erspan_net_id,
1664 &erspan_link_ops, "erspan0");
1667 static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1669 ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1672 static struct pernet_operations erspan_net_ops = {
1673 .init = erspan_init_net,
1674 .exit_batch = erspan_exit_batch_net,
1675 .id = &erspan_net_id,
1676 .size = sizeof(struct ip_tunnel_net),
1679 static int __init ipgre_init(void)
1683 pr_info("GRE over IPv4 tunneling driver\n");
1685 err = register_pernet_device(&ipgre_net_ops);
1689 err = register_pernet_device(&ipgre_tap_net_ops);
1691 goto pnet_tap_failed;
1693 err = register_pernet_device(&erspan_net_ops);
1695 goto pnet_erspan_failed;
1697 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1699 pr_info("%s: can't add protocol\n", __func__);
1700 goto add_proto_failed;
1703 err = rtnl_link_register(&ipgre_link_ops);
1705 goto rtnl_link_failed;
1707 err = rtnl_link_register(&ipgre_tap_ops);
1709 goto tap_ops_failed;
1711 err = rtnl_link_register(&erspan_link_ops);
1713 goto erspan_link_failed;
1718 rtnl_link_unregister(&ipgre_tap_ops);
1720 rtnl_link_unregister(&ipgre_link_ops);
1722 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1724 unregister_pernet_device(&erspan_net_ops);
1726 unregister_pernet_device(&ipgre_tap_net_ops);
1728 unregister_pernet_device(&ipgre_net_ops);
1732 static void __exit ipgre_fini(void)
1734 rtnl_link_unregister(&ipgre_tap_ops);
1735 rtnl_link_unregister(&ipgre_link_ops);
1736 rtnl_link_unregister(&erspan_link_ops);
1737 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1738 unregister_pernet_device(&ipgre_tap_net_ops);
1739 unregister_pernet_device(&ipgre_net_ops);
1740 unregister_pernet_device(&erspan_net_ops);
1743 module_init(ipgre_init);
1744 module_exit(ipgre_fini);
1745 MODULE_LICENSE("GPL");
1746 MODULE_ALIAS_RTNL_LINK("gre");
1747 MODULE_ALIAS_RTNL_LINK("gretap");
1748 MODULE_ALIAS_RTNL_LINK("erspan");
1749 MODULE_ALIAS_NETDEV("gre0");
1750 MODULE_ALIAS_NETDEV("gretap0");
1751 MODULE_ALIAS_NETDEV("erspan0");