]> Git Repo - linux.git/blame - net/ipv4/ip_tunnel.c
ipv4: Cache dst in tunnels
[linux.git] / net / ipv4 / ip_tunnel.c
CommitLineData
c5441932
PS
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
43
44#include <net/sock.h>
45#include <net/ip.h>
46#include <net/icmp.h>
47#include <net/protocol.h>
48#include <net/ip_tunnels.h>
49#include <net/arp.h>
50#include <net/checksum.h>
51#include <net/dsfield.h>
52#include <net/inet_ecn.h>
53#include <net/xfrm.h>
54#include <net/net_namespace.h>
55#include <net/netns/generic.h>
56#include <net/rtnetlink.h>
57
58#if IS_ENABLED(CONFIG_IPV6)
59#include <net/ipv6.h>
60#include <net/ip6_fib.h>
61#include <net/ip6_route.h>
62#endif
63
64static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
65 __be32 key, __be32 remote)
66{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
7d442fab
TH
71static inline void __tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
72{
73 struct dst_entry *old_dst;
74
75 if (dst && (dst->flags & DST_NOCACHE))
76 dst = NULL;
77
78 spin_lock_bh(&t->dst_lock);
79 old_dst = rcu_dereference_raw(t->dst_cache);
80 rcu_assign_pointer(t->dst_cache, dst);
81 dst_release(old_dst);
82 spin_unlock_bh(&t->dst_lock);
83}
84
85static inline void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
86{
87 __tunnel_dst_set(t, dst);
88}
89
90static inline void tunnel_dst_reset(struct ip_tunnel *t)
91{
92 tunnel_dst_set(t, NULL);
93}
94
95static inline struct dst_entry *tunnel_dst_get(struct ip_tunnel *t)
96{
97 struct dst_entry *dst;
98
99 rcu_read_lock();
100 dst = rcu_dereference(t->dst_cache);
101 if (dst)
102 dst_hold(dst);
103 rcu_read_unlock();
104 return dst;
105}
106
107struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie)
108{
109 struct dst_entry *dst = tunnel_dst_get(t);
110
111 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
112 tunnel_dst_reset(t);
113 return NULL;
114 }
115
116 return dst;
117}
118
c5441932
PS
119/* Often modified stats are per cpu, other are shared (netdev->stats) */
120struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
121 struct rtnl_link_stats64 *tot)
122{
123 int i;
124
125 for_each_possible_cpu(i) {
126 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
127 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
128 unsigned int start;
129
130 do {
131 start = u64_stats_fetch_begin_bh(&tstats->syncp);
132 rx_packets = tstats->rx_packets;
133 tx_packets = tstats->tx_packets;
134 rx_bytes = tstats->rx_bytes;
135 tx_bytes = tstats->tx_bytes;
136 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
137
138 tot->rx_packets += rx_packets;
139 tot->tx_packets += tx_packets;
140 tot->rx_bytes += rx_bytes;
141 tot->tx_bytes += tx_bytes;
142 }
143
144 tot->multicast = dev->stats.multicast;
145
146 tot->rx_crc_errors = dev->stats.rx_crc_errors;
147 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
148 tot->rx_length_errors = dev->stats.rx_length_errors;
149 tot->rx_frame_errors = dev->stats.rx_frame_errors;
150 tot->rx_errors = dev->stats.rx_errors;
151
152 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
153 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
154 tot->tx_dropped = dev->stats.tx_dropped;
155 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
156 tot->tx_errors = dev->stats.tx_errors;
157
158 tot->collisions = dev->stats.collisions;
159
160 return tot;
161}
162EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
163
164static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
165 __be16 flags, __be32 key)
166{
167 if (p->i_flags & TUNNEL_KEY) {
168 if (flags & TUNNEL_KEY)
169 return key == p->i_key;
170 else
171 /* key expected, none present */
172 return false;
173 } else
174 return !(flags & TUNNEL_KEY);
175}
176
177/* Fallback tunnel: no source, no destination, no key, no options
178
179 Tunnel hash table:
180 We require exact key match i.e. if a key is present in packet
181 it will match only tunnel with the same key; if it is not present,
182 it will match only keyless tunnel.
183
184 All keysless packets, if not matched configured keyless tunnels
185 will match fallback tunnel.
186 Given src, dst and key, find appropriate for input tunnel.
187*/
188struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
189 int link, __be16 flags,
190 __be32 remote, __be32 local,
191 __be32 key)
192{
193 unsigned int hash;
194 struct ip_tunnel *t, *cand = NULL;
195 struct hlist_head *head;
196
197 hash = ip_tunnel_hash(itn, key, remote);
198 head = &itn->tunnels[hash];
199
200 hlist_for_each_entry_rcu(t, head, hash_node) {
201 if (local != t->parms.iph.saddr ||
202 remote != t->parms.iph.daddr ||
203 !(t->dev->flags & IFF_UP))
204 continue;
205
206 if (!ip_tunnel_key_match(&t->parms, flags, key))
207 continue;
208
209 if (t->parms.link == link)
210 return t;
211 else
212 cand = t;
213 }
214
215 hlist_for_each_entry_rcu(t, head, hash_node) {
216 if (remote != t->parms.iph.daddr ||
217 !(t->dev->flags & IFF_UP))
218 continue;
219
220 if (!ip_tunnel_key_match(&t->parms, flags, key))
221 continue;
222
223 if (t->parms.link == link)
224 return t;
225 else if (!cand)
226 cand = t;
227 }
228
229 hash = ip_tunnel_hash(itn, key, 0);
230 head = &itn->tunnels[hash];
231
232 hlist_for_each_entry_rcu(t, head, hash_node) {
233 if ((local != t->parms.iph.saddr &&
234 (local != t->parms.iph.daddr ||
235 !ipv4_is_multicast(local))) ||
236 !(t->dev->flags & IFF_UP))
237 continue;
238
239 if (!ip_tunnel_key_match(&t->parms, flags, key))
240 continue;
241
242 if (t->parms.link == link)
243 return t;
244 else if (!cand)
245 cand = t;
246 }
247
248 if (flags & TUNNEL_NO_KEY)
249 goto skip_key_lookup;
250
251 hlist_for_each_entry_rcu(t, head, hash_node) {
252 if (t->parms.i_key != key ||
253 !(t->dev->flags & IFF_UP))
254 continue;
255
256 if (t->parms.link == link)
257 return t;
258 else if (!cand)
259 cand = t;
260 }
261
262skip_key_lookup:
263 if (cand)
264 return cand;
265
266 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
267 return netdev_priv(itn->fb_tunnel_dev);
268
269
270 return NULL;
271}
272EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
273
274static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
275 struct ip_tunnel_parm *parms)
276{
277 unsigned int h;
278 __be32 remote;
279
280 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
281 remote = parms->iph.daddr;
282 else
283 remote = 0;
284
285 h = ip_tunnel_hash(itn, parms->i_key, remote);
286 return &itn->tunnels[h];
287}
288
289static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
290{
291 struct hlist_head *head = ip_bucket(itn, &t->parms);
292
293 hlist_add_head_rcu(&t->hash_node, head);
294}
295
296static void ip_tunnel_del(struct ip_tunnel *t)
297{
298 hlist_del_init_rcu(&t->hash_node);
299}
300
301static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
302 struct ip_tunnel_parm *parms,
303 int type)
304{
305 __be32 remote = parms->iph.daddr;
306 __be32 local = parms->iph.saddr;
307 __be32 key = parms->i_key;
308 int link = parms->link;
309 struct ip_tunnel *t = NULL;
310 struct hlist_head *head = ip_bucket(itn, parms);
311
312 hlist_for_each_entry_rcu(t, head, hash_node) {
313 if (local == t->parms.iph.saddr &&
314 remote == t->parms.iph.daddr &&
315 key == t->parms.i_key &&
316 link == t->parms.link &&
317 type == t->dev->type)
318 break;
319 }
320 return t;
321}
322
323static struct net_device *__ip_tunnel_create(struct net *net,
324 const struct rtnl_link_ops *ops,
325 struct ip_tunnel_parm *parms)
326{
327 int err;
328 struct ip_tunnel *tunnel;
329 struct net_device *dev;
330 char name[IFNAMSIZ];
331
332 if (parms->name[0])
333 strlcpy(name, parms->name, IFNAMSIZ);
334 else {
54a5d382 335 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
c5441932
PS
336 err = -E2BIG;
337 goto failed;
338 }
339 strlcpy(name, ops->kind, IFNAMSIZ);
340 strncat(name, "%d", 2);
341 }
342
343 ASSERT_RTNL();
344 dev = alloc_netdev(ops->priv_size, name, ops->setup);
345 if (!dev) {
346 err = -ENOMEM;
347 goto failed;
348 }
349 dev_net_set(dev, net);
350
351 dev->rtnl_link_ops = ops;
352
353 tunnel = netdev_priv(dev);
354 tunnel->parms = *parms;
5e6700b3 355 tunnel->net = net;
c5441932
PS
356
357 err = register_netdevice(dev);
358 if (err)
359 goto failed_free;
360
361 return dev;
362
363failed_free:
364 free_netdev(dev);
365failed:
366 return ERR_PTR(err);
367}
368
7d442fab
TH
369static inline void init_tunnel_flow(struct flowi4 *fl4,
370 int proto,
371 __be32 daddr, __be32 saddr,
372 __be32 key, __u8 tos, int oif)
c5441932
PS
373{
374 memset(fl4, 0, sizeof(*fl4));
375 fl4->flowi4_oif = oif;
376 fl4->daddr = daddr;
377 fl4->saddr = saddr;
378 fl4->flowi4_tos = tos;
379 fl4->flowi4_proto = proto;
380 fl4->fl4_gre_key = key;
c5441932
PS
381}
382
383static int ip_tunnel_bind_dev(struct net_device *dev)
384{
385 struct net_device *tdev = NULL;
386 struct ip_tunnel *tunnel = netdev_priv(dev);
387 const struct iphdr *iph;
388 int hlen = LL_MAX_HEADER;
389 int mtu = ETH_DATA_LEN;
390 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
391
392 iph = &tunnel->parms.iph;
393
394 /* Guess output device to choose reasonable mtu and needed_headroom */
395 if (iph->daddr) {
396 struct flowi4 fl4;
397 struct rtable *rt;
398
7d442fab
TH
399 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
400 iph->saddr, tunnel->parms.o_key,
401 RT_TOS(iph->tos), tunnel->parms.link);
402 rt = ip_route_output_key(tunnel->net, &fl4);
403
c5441932
PS
404 if (!IS_ERR(rt)) {
405 tdev = rt->dst.dev;
7d442fab 406 tunnel_dst_set(tunnel, dst_clone(&rt->dst));
c5441932
PS
407 ip_rt_put(rt);
408 }
409 if (dev->type != ARPHRD_ETHER)
410 dev->flags |= IFF_POINTOPOINT;
411 }
412
413 if (!tdev && tunnel->parms.link)
6c742e71 414 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
c5441932
PS
415
416 if (tdev) {
417 hlen = tdev->hard_header_len + tdev->needed_headroom;
418 mtu = tdev->mtu;
419 }
420 dev->iflink = tunnel->parms.link;
421
422 dev->needed_headroom = t_hlen + hlen;
423 mtu -= (dev->hard_header_len + t_hlen);
424
425 if (mtu < 68)
426 mtu = 68;
427
428 return mtu;
429}
430
431static struct ip_tunnel *ip_tunnel_create(struct net *net,
432 struct ip_tunnel_net *itn,
433 struct ip_tunnel_parm *parms)
434{
435 struct ip_tunnel *nt, *fbt;
436 struct net_device *dev;
437
438 BUG_ON(!itn->fb_tunnel_dev);
439 fbt = netdev_priv(itn->fb_tunnel_dev);
440 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
441 if (IS_ERR(dev))
442 return NULL;
443
444 dev->mtu = ip_tunnel_bind_dev(dev);
445
446 nt = netdev_priv(dev);
447 ip_tunnel_add(itn, nt);
448 return nt;
449}
450
451int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
452 const struct tnl_ptk_info *tpi, bool log_ecn_error)
453{
454 struct pcpu_tstats *tstats;
455 const struct iphdr *iph = ip_hdr(skb);
456 int err;
457
c5441932
PS
458#ifdef CONFIG_NET_IPGRE_BROADCAST
459 if (ipv4_is_multicast(iph->daddr)) {
460 /* Looped back packet, drop it! */
461 if (rt_is_output_route(skb_rtable(skb)))
462 goto drop;
463 tunnel->dev->stats.multicast++;
464 skb->pkt_type = PACKET_BROADCAST;
465 }
466#endif
467
468 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
469 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
470 tunnel->dev->stats.rx_crc_errors++;
471 tunnel->dev->stats.rx_errors++;
472 goto drop;
473 }
474
475 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
476 if (!(tpi->flags&TUNNEL_SEQ) ||
477 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
478 tunnel->dev->stats.rx_fifo_errors++;
479 tunnel->dev->stats.rx_errors++;
480 goto drop;
481 }
482 tunnel->i_seqno = ntohl(tpi->seq) + 1;
483 }
484
c5441932
PS
485 err = IP_ECN_decapsulate(iph, skb);
486 if (unlikely(err)) {
487 if (log_ecn_error)
488 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
489 &iph->saddr, iph->tos);
490 if (err > 1) {
491 ++tunnel->dev->stats.rx_frame_errors;
492 ++tunnel->dev->stats.rx_errors;
493 goto drop;
494 }
495 }
496
497 tstats = this_cpu_ptr(tunnel->dev->tstats);
498 u64_stats_update_begin(&tstats->syncp);
499 tstats->rx_packets++;
500 tstats->rx_bytes += skb->len;
501 u64_stats_update_end(&tstats->syncp);
502
81b9eab5
AS
503 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
504
3d7b46cd
PS
505 if (tunnel->dev->type == ARPHRD_ETHER) {
506 skb->protocol = eth_type_trans(skb, tunnel->dev);
507 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
508 } else {
509 skb->dev = tunnel->dev;
510 }
64261f23 511
c5441932
PS
512 gro_cells_receive(&tunnel->gro_cells, skb);
513 return 0;
514
515drop:
516 kfree_skb(skb);
517 return 0;
518}
519EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
520
23a3647b
PS
521static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
522 struct rtable *rt, __be16 df)
523{
524 struct ip_tunnel *tunnel = netdev_priv(dev);
8c91e162 525 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
23a3647b
PS
526 int mtu;
527
528 if (df)
529 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
530 - sizeof(struct iphdr) - tunnel->hlen;
531 else
532 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
533
534 if (skb_dst(skb))
535 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
536
537 if (skb->protocol == htons(ETH_P_IP)) {
538 if (!skb_is_gso(skb) &&
539 (df & htons(IP_DF)) && mtu < pkt_size) {
540 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
541 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
542 return -E2BIG;
543 }
544 }
545#if IS_ENABLED(CONFIG_IPV6)
546 else if (skb->protocol == htons(ETH_P_IPV6)) {
547 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
548
549 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
550 mtu >= IPV6_MIN_MTU) {
551 if ((tunnel->parms.iph.daddr &&
552 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
553 rt6->rt6i_dst.plen == 128) {
554 rt6->rt6i_flags |= RTF_MODIFIED;
555 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
556 }
557 }
558
559 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
560 mtu < pkt_size) {
561 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
562 return -E2BIG;
563 }
564 }
565#endif
566 return 0;
567}
568
c5441932 569void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
bf3d6a8f 570 const struct iphdr *tnl_params, const u8 protocol)
c5441932
PS
571{
572 struct ip_tunnel *tunnel = netdev_priv(dev);
573 const struct iphdr *inner_iph;
c5441932
PS
574 struct flowi4 fl4;
575 u8 tos, ttl;
576 __be16 df;
7d442fab 577 struct rtable *rt = NULL; /* Route to the other host */
c5441932
PS
578 unsigned int max_headroom; /* The extra header space needed */
579 __be32 dst;
0e6fbc5b 580 int err;
7d442fab 581 bool connected = true;
c5441932
PS
582
583 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
584
585 dst = tnl_params->daddr;
586 if (dst == 0) {
587 /* NBMA tunnel */
588
589 if (skb_dst(skb) == NULL) {
590 dev->stats.tx_fifo_errors++;
591 goto tx_error;
592 }
593
594 if (skb->protocol == htons(ETH_P_IP)) {
595 rt = skb_rtable(skb);
596 dst = rt_nexthop(rt, inner_iph->daddr);
597 }
598#if IS_ENABLED(CONFIG_IPV6)
599 else if (skb->protocol == htons(ETH_P_IPV6)) {
600 const struct in6_addr *addr6;
601 struct neighbour *neigh;
602 bool do_tx_error_icmp;
603 int addr_type;
604
605 neigh = dst_neigh_lookup(skb_dst(skb),
606 &ipv6_hdr(skb)->daddr);
607 if (neigh == NULL)
608 goto tx_error;
609
610 addr6 = (const struct in6_addr *)&neigh->primary_key;
611 addr_type = ipv6_addr_type(addr6);
612
613 if (addr_type == IPV6_ADDR_ANY) {
614 addr6 = &ipv6_hdr(skb)->daddr;
615 addr_type = ipv6_addr_type(addr6);
616 }
617
618 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
619 do_tx_error_icmp = true;
620 else {
621 do_tx_error_icmp = false;
622 dst = addr6->s6_addr32[3];
623 }
624 neigh_release(neigh);
625 if (do_tx_error_icmp)
626 goto tx_error_icmp;
627 }
628#endif
629 else
630 goto tx_error;
7d442fab
TH
631
632 connected = false;
c5441932
PS
633 }
634
635 tos = tnl_params->tos;
636 if (tos & 0x1) {
637 tos &= ~0x1;
7d442fab 638 if (skb->protocol == htons(ETH_P_IP)) {
c5441932 639 tos = inner_iph->tos;
7d442fab
TH
640 connected = false;
641 } else if (skb->protocol == htons(ETH_P_IPV6)) {
c5441932 642 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
7d442fab
TH
643 connected = false;
644 }
c5441932
PS
645 }
646
7d442fab
TH
647 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
648 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
649
650 if (connected)
651 rt = (struct rtable *)tunnel_dst_check(tunnel, 0);
652
653 if (!rt) {
654 rt = ip_route_output_key(tunnel->net, &fl4);
655
656 if (IS_ERR(rt)) {
657 dev->stats.tx_carrier_errors++;
658 goto tx_error;
659 }
660 if (connected)
661 tunnel_dst_set(tunnel, dst_clone(&rt->dst));
c5441932 662 }
7d442fab 663
0e6fbc5b 664 if (rt->dst.dev == dev) {
c5441932
PS
665 ip_rt_put(rt);
666 dev->stats.collisions++;
667 goto tx_error;
668 }
c5441932 669
23a3647b
PS
670 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
671 ip_rt_put(rt);
672 goto tx_error;
c5441932 673 }
c5441932
PS
674
675 if (tunnel->err_count > 0) {
676 if (time_before(jiffies,
677 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
678 tunnel->err_count--;
679
680 dst_link_failure(skb);
681 } else
682 tunnel->err_count = 0;
683 }
684
d4a71b15 685 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
c5441932
PS
686 ttl = tnl_params->ttl;
687 if (ttl == 0) {
688 if (skb->protocol == htons(ETH_P_IP))
689 ttl = inner_iph->ttl;
690#if IS_ENABLED(CONFIG_IPV6)
691 else if (skb->protocol == htons(ETH_P_IPV6))
692 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
693#endif
694 else
695 ttl = ip4_dst_hoplimit(&rt->dst);
696 }
697
23a3647b
PS
698 df = tnl_params->frag_off;
699 if (skb->protocol == htons(ETH_P_IP))
700 df |= (inner_iph->frag_off&htons(IP_DF));
701
0e6fbc5b
PS
702 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
703 + rt->dst.header_len;
3e08f4a7 704 if (max_headroom > dev->needed_headroom)
c5441932 705 dev->needed_headroom = max_headroom;
3e08f4a7
SK
706
707 if (skb_cow_head(skb, dev->needed_headroom)) {
708 dev->stats.tx_dropped++;
709 dev_kfree_skb(skb);
710 return;
c5441932
PS
711 }
712
8b7ed2d9 713 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
d4a71b15 714 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
0e6fbc5b 715 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
c5441932 716
c5441932
PS
717 return;
718
719#if IS_ENABLED(CONFIG_IPV6)
720tx_error_icmp:
721 dst_link_failure(skb);
722#endif
723tx_error:
724 dev->stats.tx_errors++;
725 dev_kfree_skb(skb);
726}
727EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
728
729static void ip_tunnel_update(struct ip_tunnel_net *itn,
730 struct ip_tunnel *t,
731 struct net_device *dev,
732 struct ip_tunnel_parm *p,
733 bool set_mtu)
734{
735 ip_tunnel_del(t);
736 t->parms.iph.saddr = p->iph.saddr;
737 t->parms.iph.daddr = p->iph.daddr;
738 t->parms.i_key = p->i_key;
739 t->parms.o_key = p->o_key;
740 if (dev->type != ARPHRD_ETHER) {
741 memcpy(dev->dev_addr, &p->iph.saddr, 4);
742 memcpy(dev->broadcast, &p->iph.daddr, 4);
743 }
744 ip_tunnel_add(itn, t);
745
746 t->parms.iph.ttl = p->iph.ttl;
747 t->parms.iph.tos = p->iph.tos;
748 t->parms.iph.frag_off = p->iph.frag_off;
749
750 if (t->parms.link != p->link) {
751 int mtu;
752
753 t->parms.link = p->link;
754 mtu = ip_tunnel_bind_dev(dev);
755 if (set_mtu)
756 dev->mtu = mtu;
757 }
7d442fab 758 tunnel_dst_reset(t);
c5441932
PS
759 netdev_state_change(dev);
760}
761
762int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
763{
764 int err = 0;
765 struct ip_tunnel *t;
766 struct net *net = dev_net(dev);
767 struct ip_tunnel *tunnel = netdev_priv(dev);
768 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
769
770 BUG_ON(!itn->fb_tunnel_dev);
771 switch (cmd) {
772 case SIOCGETTUNNEL:
773 t = NULL;
774 if (dev == itn->fb_tunnel_dev)
775 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
776 if (t == NULL)
777 t = netdev_priv(dev);
778 memcpy(p, &t->parms, sizeof(*p));
779 break;
780
781 case SIOCADDTUNNEL:
782 case SIOCCHGTUNNEL:
783 err = -EPERM;
784 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
785 goto done;
786 if (p->iph.ttl)
787 p->iph.frag_off |= htons(IP_DF);
788 if (!(p->i_flags&TUNNEL_KEY))
789 p->i_key = 0;
790 if (!(p->o_flags&TUNNEL_KEY))
791 p->o_key = 0;
792
793 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
794
795 if (!t && (cmd == SIOCADDTUNNEL))
796 t = ip_tunnel_create(net, itn, p);
797
798 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
799 if (t != NULL) {
800 if (t->dev != dev) {
801 err = -EEXIST;
802 break;
803 }
804 } else {
805 unsigned int nflags = 0;
806
807 if (ipv4_is_multicast(p->iph.daddr))
808 nflags = IFF_BROADCAST;
809 else if (p->iph.daddr)
810 nflags = IFF_POINTOPOINT;
811
812 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
813 err = -EINVAL;
814 break;
815 }
816
817 t = netdev_priv(dev);
818 }
819 }
820
821 if (t) {
822 err = 0;
823 ip_tunnel_update(itn, t, dev, p, true);
824 } else
825 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
826 break;
827
828 case SIOCDELTUNNEL:
829 err = -EPERM;
830 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
831 goto done;
832
833 if (dev == itn->fb_tunnel_dev) {
834 err = -ENOENT;
835 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
836 if (t == NULL)
837 goto done;
838 err = -EPERM;
839 if (t == netdev_priv(itn->fb_tunnel_dev))
840 goto done;
841 dev = t->dev;
842 }
843 unregister_netdevice(dev);
844 err = 0;
845 break;
846
847 default:
848 err = -EINVAL;
849 }
850
851done:
852 return err;
853}
854EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
855
856int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
857{
858 struct ip_tunnel *tunnel = netdev_priv(dev);
859 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
860
861 if (new_mtu < 68 ||
862 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
863 return -EINVAL;
864 dev->mtu = new_mtu;
865 return 0;
866}
867EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
868
869static void ip_tunnel_dev_free(struct net_device *dev)
870{
871 struct ip_tunnel *tunnel = netdev_priv(dev);
872
873 gro_cells_destroy(&tunnel->gro_cells);
874 free_percpu(dev->tstats);
875 free_netdev(dev);
876}
877
878void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
879{
c5441932
PS
880 struct ip_tunnel *tunnel = netdev_priv(dev);
881 struct ip_tunnel_net *itn;
882
6c742e71 883 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
c5441932
PS
884
885 if (itn->fb_tunnel_dev != dev) {
886 ip_tunnel_del(netdev_priv(dev));
887 unregister_netdevice_queue(dev, head);
888 }
889}
890EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
891
d3b6f614 892int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
c5441932
PS
893 struct rtnl_link_ops *ops, char *devname)
894{
895 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
896 struct ip_tunnel_parm parms;
6261d983 897 unsigned int i;
c5441932 898
6261d983 899 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
900 INIT_HLIST_HEAD(&itn->tunnels[i]);
c5441932
PS
901
902 if (!ops) {
903 itn->fb_tunnel_dev = NULL;
904 return 0;
905 }
6261d983 906
c5441932
PS
907 memset(&parms, 0, sizeof(parms));
908 if (devname)
909 strlcpy(parms.name, devname, IFNAMSIZ);
910
911 rtnl_lock();
912 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
ea857f28
DC
913 /* FB netdevice is special: we have one, and only one per netns.
914 * Allowing to move it to another netns is clearly unsafe.
915 */
67013282 916 if (!IS_ERR(itn->fb_tunnel_dev)) {
b4de77ad 917 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
67013282
SK
918 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
919 }
b4de77ad 920 rtnl_unlock();
c5441932 921
b4de77ad 922 return PTR_RET(itn->fb_tunnel_dev);
c5441932
PS
923}
924EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
925
6c742e71
ND
926static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
927 struct rtnl_link_ops *ops)
c5441932 928{
6c742e71
ND
929 struct net *net = dev_net(itn->fb_tunnel_dev);
930 struct net_device *dev, *aux;
c5441932
PS
931 int h;
932
6c742e71
ND
933 for_each_netdev_safe(net, dev, aux)
934 if (dev->rtnl_link_ops == ops)
935 unregister_netdevice_queue(dev, head);
936
c5441932
PS
937 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
938 struct ip_tunnel *t;
939 struct hlist_node *n;
940 struct hlist_head *thead = &itn->tunnels[h];
941
942 hlist_for_each_entry_safe(t, n, thead, hash_node)
6c742e71
ND
943 /* If dev is in the same netns, it has already
944 * been added to the list by the previous loop.
945 */
946 if (!net_eq(dev_net(t->dev), net))
947 unregister_netdevice_queue(t->dev, head);
c5441932 948 }
c5441932
PS
949}
950
6c742e71 951void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
c5441932
PS
952{
953 LIST_HEAD(list);
954
955 rtnl_lock();
6c742e71 956 ip_tunnel_destroy(itn, &list, ops);
c5441932
PS
957 unregister_netdevice_many(&list);
958 rtnl_unlock();
c5441932
PS
959}
960EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
961
962int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
963 struct ip_tunnel_parm *p)
964{
965 struct ip_tunnel *nt;
966 struct net *net = dev_net(dev);
967 struct ip_tunnel_net *itn;
968 int mtu;
969 int err;
970
971 nt = netdev_priv(dev);
972 itn = net_generic(net, nt->ip_tnl_net_id);
973
974 if (ip_tunnel_find(itn, p, dev->type))
975 return -EEXIST;
976
5e6700b3 977 nt->net = net;
c5441932
PS
978 nt->parms = *p;
979 err = register_netdevice(dev);
980 if (err)
981 goto out;
982
983 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
984 eth_hw_addr_random(dev);
985
986 mtu = ip_tunnel_bind_dev(dev);
987 if (!tb[IFLA_MTU])
988 dev->mtu = mtu;
989
990 ip_tunnel_add(itn, nt);
991
992out:
993 return err;
994}
995EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
996
997int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
998 struct ip_tunnel_parm *p)
999{
6c742e71 1000 struct ip_tunnel *t;
c5441932 1001 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1002 struct net *net = tunnel->net;
c5441932
PS
1003 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1004
1005 if (dev == itn->fb_tunnel_dev)
1006 return -EINVAL;
1007
c5441932
PS
1008 t = ip_tunnel_find(itn, p, dev->type);
1009
1010 if (t) {
1011 if (t->dev != dev)
1012 return -EEXIST;
1013 } else {
6c742e71 1014 t = tunnel;
c5441932
PS
1015
1016 if (dev->type != ARPHRD_ETHER) {
1017 unsigned int nflags = 0;
1018
1019 if (ipv4_is_multicast(p->iph.daddr))
1020 nflags = IFF_BROADCAST;
1021 else if (p->iph.daddr)
1022 nflags = IFF_POINTOPOINT;
1023
1024 if ((dev->flags ^ nflags) &
1025 (IFF_POINTOPOINT | IFF_BROADCAST))
1026 return -EINVAL;
1027 }
1028 }
1029
1030 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1031 return 0;
1032}
1033EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1034
1035int ip_tunnel_init(struct net_device *dev)
1036{
1037 struct ip_tunnel *tunnel = netdev_priv(dev);
1038 struct iphdr *iph = &tunnel->parms.iph;
827da44c 1039 int i, err;
c5441932
PS
1040
1041 dev->destructor = ip_tunnel_dev_free;
1042 dev->tstats = alloc_percpu(struct pcpu_tstats);
1043 if (!dev->tstats)
1044 return -ENOMEM;
1045
827da44c
JS
1046 for_each_possible_cpu(i) {
1047 struct pcpu_tstats *ipt_stats;
1048 ipt_stats = per_cpu_ptr(dev->tstats, i);
1049 u64_stats_init(&ipt_stats->syncp);
1050 }
1051
c5441932
PS
1052 err = gro_cells_init(&tunnel->gro_cells, dev);
1053 if (err) {
1054 free_percpu(dev->tstats);
1055 return err;
1056 }
1057
1058 tunnel->dev = dev;
6c742e71 1059 tunnel->net = dev_net(dev);
c5441932
PS
1060 strcpy(tunnel->parms.name, dev->name);
1061 iph->version = 4;
1062 iph->ihl = 5;
1063
7d442fab
TH
1064 tunnel->dst_cache = NULL;
1065 spin_lock_init(&tunnel->dst_lock);
1066
c5441932
PS
1067 return 0;
1068}
1069EXPORT_SYMBOL_GPL(ip_tunnel_init);
1070
1071void ip_tunnel_uninit(struct net_device *dev)
1072{
c5441932 1073 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1074 struct net *net = tunnel->net;
c5441932
PS
1075 struct ip_tunnel_net *itn;
1076
1077 itn = net_generic(net, tunnel->ip_tnl_net_id);
1078 /* fb_tunnel_dev will be unregisted in net-exit call. */
1079 if (itn->fb_tunnel_dev != dev)
1080 ip_tunnel_del(netdev_priv(dev));
7d442fab
TH
1081
1082 tunnel_dst_reset(tunnel);
c5441932
PS
1083}
1084EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1085
1086/* Do least required initialization, rest of init is done in tunnel_init call */
1087void ip_tunnel_setup(struct net_device *dev, int net_id)
1088{
1089 struct ip_tunnel *tunnel = netdev_priv(dev);
1090 tunnel->ip_tnl_net_id = net_id;
1091}
1092EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1093
1094MODULE_LICENSE("GPL");
This page took 0.28443 seconds and 4 git commands to generate.