]> Git Repo - linux.git/blame - net/ipv6/ip6_output.c
udp: better wmem accounting on gso
[linux.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <[email protected]>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4 41
33b48679 42#include <linux/bpf-cgroup.h>
1da177e4
LT
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45
46#include <net/sock.h>
47#include <net/snmp.h>
48
49#include <net/ipv6.h>
50#include <net/ndisc.h>
51#include <net/protocol.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/rawv6.h>
55#include <net/icmp.h>
56#include <net/xfrm.h>
57#include <net/checksum.h>
7bc570c8 58#include <linux/mroute6.h>
ca254490 59#include <net/l3mdev.h>
14972cbd 60#include <net/lwtunnel.h>
1da177e4 61
7d8c6e39 62static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 63{
adf30907 64 struct dst_entry *dst = skb_dst(skb);
1da177e4 65 struct net_device *dev = dst->dev;
f6b72b62 66 struct neighbour *neigh;
6fd6ce20
YH
67 struct in6_addr *nexthop;
68 int ret;
1da177e4 69
0660e03f 70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 72
7026b1dd 73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
8571ab47 74 ((mroute6_is_socket(net, skb) &&
bd91b8bf 75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
82 */
83 if (newskb)
b2e0b385 84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 85 net, sk, newskb, NULL, newskb->dev,
95603e22 86 dev_loopback_xmit);
1da177e4 87
0660e03f 88 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 89 IP6_INC_STATS(net, idev,
3bd653c8 90 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
91 kfree_skb(skb);
92 return 0;
93 }
94 }
95
78126c41 96 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
97
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 IPV6_ADDR_SCOPE_NODELOCAL &&
100 !(dev->flags & IFF_LOOPBACK)) {
101 kfree_skb(skb);
102 return 0;
103 }
1da177e4
LT
104 }
105
14972cbd
RP
106 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
107 int res = lwtunnel_xmit(skb);
108
109 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
110 return res;
111 }
112
6fd6ce20 113 rcu_read_lock_bh();
2647a9b0 114 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
115 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
116 if (unlikely(!neigh))
117 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
118 if (!IS_ERR(neigh)) {
4ff06203 119 sock_confirm_neigh(skb, neigh);
c16ec185 120 ret = neigh_output(neigh, skb);
6fd6ce20
YH
121 rcu_read_unlock_bh();
122 return ret;
123 }
124 rcu_read_unlock_bh();
05e3aa09 125
78126c41 126 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
127 kfree_skb(skb);
128 return -EINVAL;
1da177e4
LT
129}
130
0c4b51f0 131static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490 132{
33b48679
DM
133 int ret;
134
135 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
136 if (ret) {
137 kfree_skb(skb);
138 return ret;
139 }
140
09ee9dba
TB
141#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
142 /* Policy lookup after SNAT yielded a new policy */
143 if (skb_dst(skb)->xfrm) {
144 IPCB(skb)->flags |= IPSKB_REROUTED;
145 return dst_output(net, sk, skb);
146 }
147#endif
148
9e508490 149 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
150 dst_allfrag(skb_dst(skb)) ||
151 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7d8c6e39 152 return ip6_fragment(net, sk, skb, ip6_finish_output2);
9e508490 153 else
7d8c6e39 154 return ip6_finish_output2(net, sk, skb);
9e508490
JE
155}
156
ede2059d 157int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 158{
9e508490 159 struct net_device *dev = skb_dst(skb)->dev;
adf30907 160 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
be10de0a 161
97a7a37a
CF
162 skb->protocol = htons(ETH_P_IPV6);
163 skb->dev = dev;
164
778d80be 165 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 166 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
167 kfree_skb(skb);
168 return 0;
169 }
170
29a26a56
EB
171 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
172 net, sk, skb, NULL, dev,
9c6eb28a
JE
173 ip6_finish_output,
174 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
175}
176
e9191ffb 177bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
513674b5
SL
178{
179 if (!np->autoflowlabel_set)
180 return ip6_default_np_autolabel(net);
181 else
182 return np->autoflowlabel;
183}
184
1da177e4 185/*
1c1e9d2b
ED
186 * xmit an sk_buff (used by TCP, SCTP and DCCP)
187 * Note : socket lock is not held for SYNACK packets, but might be modified
188 * by calls to skb_set_owner_w() and ipv6_local_error(),
189 * which are using proper atomic operations or spinlocks.
1da177e4 190 */
1c1e9d2b 191int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
92e55f41 192 __u32 mark, struct ipv6_txoptions *opt, int tclass)
1da177e4 193{
3bd653c8 194 struct net *net = sock_net(sk);
1c1e9d2b 195 const struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 196 struct in6_addr *first_hop = &fl6->daddr;
adf30907 197 struct dst_entry *dst = skb_dst(skb);
1da177e4 198 struct ipv6hdr *hdr;
4c9483b2 199 u8 proto = fl6->flowi6_proto;
1da177e4 200 int seg_len = skb->len;
e651f03a 201 int hlimit = -1;
1da177e4
LT
202 u32 mtu;
203
204 if (opt) {
c2636b4d 205 unsigned int head_room;
1da177e4
LT
206
207 /* First: exthdrs may take lots of space (~8K for now)
208 MAX_HEADER is not enough.
209 */
210 head_room = opt->opt_nflen + opt->opt_flen;
211 seg_len += head_room;
212 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
213
214 if (skb_headroom(skb) < head_room) {
215 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
63159f29 216 if (!skb2) {
adf30907 217 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
218 IPSTATS_MIB_OUTDISCARDS);
219 kfree_skb(skb);
1da177e4
LT
220 return -ENOBUFS;
221 }
808db80a 222 consume_skb(skb);
a11d206d 223 skb = skb2;
1c1e9d2b
ED
224 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
225 * it is safe to call in our context (socket lock not held)
226 */
227 skb_set_owner_w(skb, (struct sock *)sk);
1da177e4
LT
228 }
229 if (opt->opt_flen)
230 ipv6_push_frag_opts(skb, opt, &proto);
231 if (opt->opt_nflen)
613fa3ca
DL
232 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
233 &fl6->saddr);
1da177e4
LT
234 }
235
e2d1bca7
ACM
236 skb_push(skb, sizeof(struct ipv6hdr));
237 skb_reset_network_header(skb);
0660e03f 238 hdr = ipv6_hdr(skb);
1da177e4
LT
239
240 /*
241 * Fill in the IPv6 header
242 */
b903d324 243 if (np)
1da177e4
LT
244 hlimit = np->hop_limit;
245 if (hlimit < 0)
6b75d090 246 hlimit = ip6_dst_hoplimit(dst);
1da177e4 247
cb1ce2ef 248 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
513674b5 249 ip6_autoflowlabel(net, np), fl6));
41a1f8ea 250
1da177e4
LT
251 hdr->payload_len = htons(seg_len);
252 hdr->nexthdr = proto;
253 hdr->hop_limit = hlimit;
254
4e3fd7a0
AD
255 hdr->saddr = fl6->saddr;
256 hdr->daddr = *first_hop;
1da177e4 257
9c9c9ad5 258 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 259 skb->priority = sk->sk_priority;
92e55f41 260 skb->mark = mark;
a2c2064f 261
1da177e4 262 mtu = dst_mtu(dst);
60ff7467 263 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 264 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 265 IPSTATS_MIB_OUT, skb->len);
a8e3e1a9
DA
266
267 /* if egress device is enslaved to an L3 master device pass the
268 * skb to its handler for processing
269 */
270 skb = l3mdev_ip6_out((struct sock *)sk, skb);
271 if (unlikely(!skb))
272 return 0;
273
1c1e9d2b
ED
274 /* hooks should never assume socket lock is held.
275 * we promote our socket to non const
276 */
29a26a56 277 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1c1e9d2b 278 net, (struct sock *)sk, skb, NULL, dst->dev,
13206b6b 279 dst_output);
1da177e4
LT
280 }
281
1da177e4 282 skb->dev = dst->dev;
1c1e9d2b
ED
283 /* ipv6_local_error() does not require socket lock,
284 * we promote our socket to non const
285 */
286 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
287
adf30907 288 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
289 kfree_skb(skb);
290 return -EMSGSIZE;
291}
7159039a
YH
292EXPORT_SYMBOL(ip6_xmit);
293
1da177e4
LT
294static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
295{
296 struct ip6_ra_chain *ra;
297 struct sock *last = NULL;
298
299 read_lock(&ip6_ra_lock);
300 for (ra = ip6_ra_chain; ra; ra = ra->next) {
301 struct sock *sk = ra->sk;
0bd1b59b
AM
302 if (sk && ra->sel == sel &&
303 (!sk->sk_bound_dev_if ||
304 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
305 if (last) {
306 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
307 if (skb2)
308 rawv6_rcv(last, skb2);
309 }
310 last = sk;
311 }
312 }
313
314 if (last) {
315 rawv6_rcv(last, skb);
316 read_unlock(&ip6_ra_lock);
317 return 1;
318 }
319 read_unlock(&ip6_ra_lock);
320 return 0;
321}
322
e21e0b5f
VN
323static int ip6_forward_proxy_check(struct sk_buff *skb)
324{
0660e03f 325 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 326 u8 nexthdr = hdr->nexthdr;
75f2811c 327 __be16 frag_off;
e21e0b5f
VN
328 int offset;
329
330 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 331 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
332 if (offset < 0)
333 return 0;
334 } else
335 offset = sizeof(struct ipv6hdr);
336
337 if (nexthdr == IPPROTO_ICMPV6) {
338 struct icmp6hdr *icmp6;
339
d56f90a7
ACM
340 if (!pskb_may_pull(skb, (skb_network_header(skb) +
341 offset + 1 - skb->data)))
e21e0b5f
VN
342 return 0;
343
d56f90a7 344 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
345
346 switch (icmp6->icmp6_type) {
347 case NDISC_ROUTER_SOLICITATION:
348 case NDISC_ROUTER_ADVERTISEMENT:
349 case NDISC_NEIGHBOUR_SOLICITATION:
350 case NDISC_NEIGHBOUR_ADVERTISEMENT:
351 case NDISC_REDIRECT:
352 /* For reaction involving unicast neighbor discovery
353 * message destined to the proxied address, pass it to
354 * input function.
355 */
356 return 1;
357 default:
358 break;
359 }
360 }
361
74553b09
VN
362 /*
363 * The proxying router can't forward traffic sent to a link-local
364 * address, so signal the sender and discard the packet. This
365 * behavior is clarified by the MIPv6 specification.
366 */
367 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
368 dst_link_failure(skb);
369 return -1;
370 }
371
e21e0b5f
VN
372 return 0;
373}
374
0c4b51f0
EB
375static inline int ip6_forward_finish(struct net *net, struct sock *sk,
376 struct sk_buff *skb)
1da177e4 377{
71a1c915
JB
378 struct dst_entry *dst = skb_dst(skb);
379
380 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
381 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
382
13206b6b 383 return dst_output(net, sk, skb);
1da177e4
LT
384}
385
09952107 386unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
0954cf9c
HFS
387{
388 unsigned int mtu;
389 struct inet6_dev *idev;
390
391 if (dst_metric_locked(dst, RTAX_MTU)) {
392 mtu = dst_metric_raw(dst, RTAX_MTU);
393 if (mtu)
394 return mtu;
395 }
396
397 mtu = IPV6_MIN_MTU;
398 rcu_read_lock();
399 idev = __in6_dev_get(dst->dev);
400 if (idev)
401 mtu = idev->cnf.mtu6;
402 rcu_read_unlock();
403
404 return mtu;
405}
09952107 406EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
0954cf9c 407
fe6cc55f
FW
408static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
409{
418a3156 410 if (skb->len <= mtu)
fe6cc55f
FW
411 return false;
412
60ff7467 413 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
414 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
415 return true;
416
60ff7467 417 if (skb->ignore_df)
418a3156
FW
418 return false;
419
779b7931 420 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
fe6cc55f
FW
421 return false;
422
423 return true;
424}
425
1da177e4
LT
426int ip6_forward(struct sk_buff *skb)
427{
bdb7cc64 428 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
adf30907 429 struct dst_entry *dst = skb_dst(skb);
0660e03f 430 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 431 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 432 struct net *net = dev_net(dst->dev);
14f3ad6f 433 u32 mtu;
1ab1457c 434
53b7997f 435 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
436 goto error;
437
090f1166
LR
438 if (skb->pkt_type != PACKET_HOST)
439 goto drop;
440
9ef2e965
HFS
441 if (unlikely(skb->sk))
442 goto drop;
443
4497b076
BH
444 if (skb_warn_if_lro(skb))
445 goto drop;
446
1da177e4 447 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
bdb7cc64 448 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
1da177e4
LT
449 goto drop;
450 }
451
35fc92a9 452 skb_forward_csum(skb);
1da177e4
LT
453
454 /*
455 * We DO NOT make any processing on
456 * RA packets, pushing them to user level AS IS
457 * without ane WARRANTY that application will be able
458 * to interpret them. The reason is that we
459 * cannot make anything clever here.
460 *
461 * We are not end-node, so that if packet contains
462 * AH/ESP, we cannot make anything.
463 * Defragmentation also would be mistake, RA packets
464 * cannot be fragmented, because there is no warranty
465 * that different fragments will go along one path. --ANK
466 */
ab4eb353
YH
467 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
468 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
469 return 0;
470 }
471
472 /*
473 * check and decrement ttl
474 */
475 if (hdr->hop_limit <= 1) {
476 /* Force OUTPUT device used as source address */
477 skb->dev = dst->dev;
3ffe533c 478 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
bdb7cc64 479 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
480
481 kfree_skb(skb);
482 return -ETIMEDOUT;
483 }
484
fbea49e1 485 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 486 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 487 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
488 int proxied = ip6_forward_proxy_check(skb);
489 if (proxied > 0)
e21e0b5f 490 return ip6_input(skb);
74553b09 491 else if (proxied < 0) {
bdb7cc64 492 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
74553b09
VN
493 goto drop;
494 }
e21e0b5f
VN
495 }
496
1da177e4 497 if (!xfrm6_route_forward(skb)) {
bdb7cc64 498 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
1da177e4
LT
499 goto drop;
500 }
adf30907 501 dst = skb_dst(skb);
1da177e4
LT
502
503 /* IPv6 specs say nothing about it, but it is clear that we cannot
504 send redirects to source routed frames.
1e5dc146 505 We don't send redirects to frames decapsulated from IPsec.
1da177e4 506 */
c45a3dfb 507 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 508 struct in6_addr *target = NULL;
fbfe95a4 509 struct inet_peer *peer;
1da177e4 510 struct rt6_info *rt;
1da177e4
LT
511
512 /*
513 * incoming and outgoing devices are the same
514 * send a redirect.
515 */
516
517 rt = (struct rt6_info *) dst;
c45a3dfb
DM
518 if (rt->rt6i_flags & RTF_GATEWAY)
519 target = &rt->rt6i_gateway;
1da177e4
LT
520 else
521 target = &hdr->daddr;
522
fd0273d7 523 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 524
1da177e4
LT
525 /* Limit redirects both by destination (here)
526 and by source (inside ndisc_send_redirect)
527 */
fbfe95a4 528 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 529 ndisc_send_redirect(skb, target);
1d861aa4
DM
530 if (peer)
531 inet_putpeer(peer);
5bb1ab09
DS
532 } else {
533 int addrtype = ipv6_addr_type(&hdr->saddr);
534
1da177e4 535 /* This check is security critical. */
f81b2e7d
YH
536 if (addrtype == IPV6_ADDR_ANY ||
537 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
538 goto error;
539 if (addrtype & IPV6_ADDR_LINKLOCAL) {
540 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 541 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
542 goto error;
543 }
1da177e4
LT
544 }
545
0954cf9c 546 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
547 if (mtu < IPV6_MIN_MTU)
548 mtu = IPV6_MIN_MTU;
549
fe6cc55f 550 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
551 /* Again, force OUTPUT device used as source address */
552 skb->dev = dst->dev;
14f3ad6f 553 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
bdb7cc64 554 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
1d015503
ED
555 __IP6_INC_STATS(net, ip6_dst_idev(dst),
556 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
557 kfree_skb(skb);
558 return -EMSGSIZE;
559 }
560
561 if (skb_cow(skb, dst->dev->hard_header_len)) {
1d015503
ED
562 __IP6_INC_STATS(net, ip6_dst_idev(dst),
563 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
564 goto drop;
565 }
566
0660e03f 567 hdr = ipv6_hdr(skb);
1da177e4
LT
568
569 /* Mangling hops number delayed to point after skb COW */
1ab1457c 570
1da177e4
LT
571 hdr->hop_limit--;
572
29a26a56
EB
573 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
574 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 575 ip6_forward_finish);
1da177e4
LT
576
577error:
bdb7cc64 578 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
579drop:
580 kfree_skb(skb);
581 return -EINVAL;
582}
583
584static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
585{
586 to->pkt_type = from->pkt_type;
587 to->priority = from->priority;
588 to->protocol = from->protocol;
adf30907
ED
589 skb_dst_drop(to);
590 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 591 to->dev = from->dev;
82e91ffe 592 to->mark = from->mark;
1da177e4
LT
593
594#ifdef CONFIG_NET_SCHED
595 to->tc_index = from->tc_index;
596#endif
e7ac05f3 597 nf_copy(to, from);
984bc16c 598 skb_copy_secmark(to, from);
1da177e4
LT
599}
600
7d8c6e39
EB
601int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
602 int (*output)(struct net *, struct sock *, struct sk_buff *))
1da177e4 603{
1da177e4 604 struct sk_buff *frag;
67ba4152 605 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 606 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
607 inet6_sk(skb->sk) : NULL;
1da177e4
LT
608 struct ipv6hdr *tmp_hdr;
609 struct frag_hdr *fh;
610 unsigned int mtu, hlen, left, len;
a7ae1992 611 int hroom, troom;
286c2349 612 __be32 frag_id;
67ba4152 613 int ptr, offset = 0, err = 0;
1da177e4
LT
614 u8 *prevhdr, nexthdr = 0;
615
7dd7eb95
DM
616 err = ip6_find_1stfragopt(skb, &prevhdr);
617 if (err < 0)
2423496a 618 goto fail;
7dd7eb95 619 hlen = err;
1da177e4
LT
620 nexthdr = *prevhdr;
621
628a5c56 622 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
623
624 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 625 * or if the skb it not generated by a local socket.
b881ef76 626 */
485fca66
FW
627 if (unlikely(!skb->ignore_df && skb->len > mtu))
628 goto fail_toobig;
a34a101e 629
485fca66
FW
630 if (IP6CB(skb)->frag_max_size) {
631 if (IP6CB(skb)->frag_max_size > mtu)
632 goto fail_toobig;
633
634 /* don't send fragments larger than what we received */
635 mtu = IP6CB(skb)->frag_max_size;
636 if (mtu < IPV6_MIN_MTU)
637 mtu = IPV6_MIN_MTU;
b881ef76
JH
638 }
639
d91675f9
YH
640 if (np && np->frag_size < mtu) {
641 if (np->frag_size)
642 mtu = np->frag_size;
643 }
89bc7848 644 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
b72a2b01 645 goto fail_toobig;
1e0d69a9 646 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 647
fd0273d7
MKL
648 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
649 &ipv6_hdr(skb)->saddr);
286c2349 650
405c92f7
HFS
651 if (skb->ip_summed == CHECKSUM_PARTIAL &&
652 (err = skb_checksum_help(skb)))
653 goto fail;
654
1d325d21 655 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 656 if (skb_has_frag_list(skb)) {
c72d8cda 657 unsigned int first_len = skb_pagelen(skb);
3d13008e 658 struct sk_buff *frag2;
1da177e4
LT
659
660 if (first_len - hlen > mtu ||
661 ((first_len - hlen) & 7) ||
1d325d21
FW
662 skb_cloned(skb) ||
663 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
664 goto slow_path;
665
4d9092bb 666 skb_walk_frags(skb, frag) {
1da177e4
LT
667 /* Correct geometry. */
668 if (frag->len > mtu ||
669 ((frag->len & 7) && frag->next) ||
1d325d21 670 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 671 goto slow_path_clean;
1da177e4 672
1da177e4
LT
673 /* Partially cloned skb? */
674 if (skb_shared(frag))
3d13008e 675 goto slow_path_clean;
2fdba6b0
HX
676
677 BUG_ON(frag->sk);
678 if (skb->sk) {
2fdba6b0
HX
679 frag->sk = skb->sk;
680 frag->destructor = sock_wfree;
2fdba6b0 681 }
3d13008e 682 skb->truesize -= frag->truesize;
1da177e4
LT
683 }
684
685 err = 0;
686 offset = 0;
1da177e4
LT
687 /* BUILD HEADER */
688
9a217a1c 689 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 690 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 691 if (!tmp_hdr) {
1d325d21
FW
692 err = -ENOMEM;
693 goto fail;
1da177e4 694 }
1d325d21
FW
695 frag = skb_shinfo(skb)->frag_list;
696 skb_frag_list_init(skb);
1da177e4 697
1da177e4 698 __skb_pull(skb, hlen);
d58ff351 699 fh = __skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
700 __skb_push(skb, hlen);
701 skb_reset_network_header(skb);
d56f90a7 702 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 703
1da177e4
LT
704 fh->nexthdr = nexthdr;
705 fh->reserved = 0;
706 fh->frag_off = htons(IP6_MF);
286c2349 707 fh->identification = frag_id;
1da177e4
LT
708
709 first_len = skb_pagelen(skb);
710 skb->data_len = first_len - skb_headlen(skb);
711 skb->len = first_len;
0660e03f
ACM
712 ipv6_hdr(skb)->payload_len = htons(first_len -
713 sizeof(struct ipv6hdr));
a11d206d 714
1da177e4
LT
715 for (;;) {
716 /* Prepare header of the next frame,
717 * before previous one went down. */
718 if (frag) {
719 frag->ip_summed = CHECKSUM_NONE;
badff6d0 720 skb_reset_transport_header(frag);
d58ff351 721 fh = __skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
722 __skb_push(frag, hlen);
723 skb_reset_network_header(frag);
d56f90a7
ACM
724 memcpy(skb_network_header(frag), tmp_hdr,
725 hlen);
1da177e4
LT
726 offset += skb->len - hlen - sizeof(struct frag_hdr);
727 fh->nexthdr = nexthdr;
728 fh->reserved = 0;
729 fh->frag_off = htons(offset);
53b24b8f 730 if (frag->next)
1da177e4
LT
731 fh->frag_off |= htons(IP6_MF);
732 fh->identification = frag_id;
0660e03f
ACM
733 ipv6_hdr(frag)->payload_len =
734 htons(frag->len -
735 sizeof(struct ipv6hdr));
1da177e4
LT
736 ip6_copy_metadata(frag, skb);
737 }
1ab1457c 738
7d8c6e39 739 err = output(net, sk, skb);
67ba4152 740 if (!err)
d8d1f30b 741 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 742 IPSTATS_MIB_FRAGCREATES);
dafee490 743
1da177e4
LT
744 if (err || !frag)
745 break;
746
747 skb = frag;
748 frag = skb->next;
749 skb->next = NULL;
750 }
751
a51482bd 752 kfree(tmp_hdr);
1da177e4
LT
753
754 if (err == 0) {
d8d1f30b 755 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 756 IPSTATS_MIB_FRAGOKS);
1da177e4
LT
757 return 0;
758 }
759
46cfd725 760 kfree_skb_list(frag);
1da177e4 761
d8d1f30b 762 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 763 IPSTATS_MIB_FRAGFAILS);
1da177e4 764 return err;
3d13008e
ED
765
766slow_path_clean:
767 skb_walk_frags(skb, frag2) {
768 if (frag2 == frag)
769 break;
770 frag2->sk = NULL;
771 frag2->destructor = NULL;
772 skb->truesize += frag2->truesize;
773 }
1da177e4
LT
774 }
775
776slow_path:
777 left = skb->len - hlen; /* Space per frame */
778 ptr = hlen; /* Where to start from */
779
780 /*
781 * Fragment the datagram.
782 */
783
a7ae1992 784 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
785
786 /*
787 * Keep copying data until we run out.
788 */
67ba4152 789 while (left > 0) {
79e49503
FW
790 u8 *fragnexthdr_offset;
791
1da177e4
LT
792 len = left;
793 /* IF: it doesn't fit, use 'mtu' - the data space left */
794 if (len > mtu)
795 len = mtu;
25985edc 796 /* IF: we are not sending up to and including the packet end
1da177e4
LT
797 then align the next start on an eight byte boundary */
798 if (len < left) {
799 len &= ~7;
800 }
1da177e4 801
cbffccc9
JP
802 /* Allocate buffer */
803 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
804 hroom + troom, GFP_ATOMIC);
805 if (!frag) {
1da177e4
LT
806 err = -ENOMEM;
807 goto fail;
808 }
809
810 /*
811 * Set up data on packet
812 */
813
814 ip6_copy_metadata(frag, skb);
a7ae1992 815 skb_reserve(frag, hroom);
1da177e4 816 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 817 skb_reset_network_header(frag);
badff6d0 818 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
819 frag->transport_header = (frag->network_header + hlen +
820 sizeof(struct frag_hdr));
1da177e4
LT
821
822 /*
823 * Charge the memory for the fragment to any owner
824 * it might possess
825 */
826 if (skb->sk)
827 skb_set_owner_w(frag, skb->sk);
828
829 /*
830 * Copy the packet header into the new buffer.
831 */
d626f62b 832 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4 833
79e49503
FW
834 fragnexthdr_offset = skb_network_header(frag);
835 fragnexthdr_offset += prevhdr - skb_network_header(skb);
836 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
837
1da177e4
LT
838 /*
839 * Build fragment header.
840 */
841 fh->nexthdr = nexthdr;
842 fh->reserved = 0;
286c2349 843 fh->identification = frag_id;
1da177e4
LT
844
845 /*
846 * Copy a block of the IP datagram.
847 */
e3f0b86b
HS
848 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
849 len));
1da177e4
LT
850 left -= len;
851
852 fh->frag_off = htons(offset);
853 if (left > 0)
854 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
855 ipv6_hdr(frag)->payload_len = htons(frag->len -
856 sizeof(struct ipv6hdr));
1da177e4
LT
857
858 ptr += len;
859 offset += len;
860
861 /*
862 * Put this fragment into the sending queue.
863 */
7d8c6e39 864 err = output(net, sk, frag);
1da177e4
LT
865 if (err)
866 goto fail;
dafee490 867
adf30907 868 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 869 IPSTATS_MIB_FRAGCREATES);
1da177e4 870 }
adf30907 871 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 872 IPSTATS_MIB_FRAGOKS);
808db80a 873 consume_skb(skb);
1da177e4
LT
874 return err;
875
485fca66
FW
876fail_toobig:
877 if (skb->sk && dst_allfrag(skb_dst(skb)))
878 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
879
485fca66
FW
880 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
881 err = -EMSGSIZE;
882
1da177e4 883fail:
adf30907 884 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 885 IPSTATS_MIB_FRAGFAILS);
1ab1457c 886 kfree_skb(skb);
1da177e4
LT
887 return err;
888}
889
b71d1d42
ED
890static inline int ip6_rt_check(const struct rt6key *rt_key,
891 const struct in6_addr *fl_addr,
892 const struct in6_addr *addr_cache)
cf6b1982 893{
a02cec21 894 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 895 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
896}
897
497c615a
HX
898static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
899 struct dst_entry *dst,
b71d1d42 900 const struct flowi6 *fl6)
1da177e4 901{
497c615a 902 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 903 struct rt6_info *rt;
1da177e4 904
497c615a
HX
905 if (!dst)
906 goto out;
907
a963a37d
ED
908 if (dst->ops->family != AF_INET6) {
909 dst_release(dst);
910 return NULL;
911 }
912
913 rt = (struct rt6_info *)dst;
497c615a
HX
914 /* Yes, checking route validity in not connected
915 * case is not very simple. Take into account,
916 * that we do not support routing by source, TOS,
67ba4152 917 * and MSG_DONTROUTE --ANK (980726)
497c615a 918 *
cf6b1982
YH
919 * 1. ip6_rt_check(): If route was host route,
920 * check that cached destination is current.
497c615a
HX
921 * If it is network route, we still may
922 * check its validity using saved pointer
923 * to the last used address: daddr_cache.
924 * We do not want to save whole address now,
925 * (because main consumer of this service
926 * is tcp, which has not this problem),
927 * so that the last trick works only on connected
928 * sockets.
929 * 2. oif also should be the same.
930 */
4c9483b2 931 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 932#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 933 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 934#endif
ca254490
DA
935 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
936 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
497c615a
HX
937 dst_release(dst);
938 dst = NULL;
1da177e4
LT
939 }
940
497c615a
HX
941out:
942 return dst;
943}
944
3aef934f 945static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
4c9483b2 946 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 947{
69cce1d1
DM
948#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
949 struct neighbour *n;
97cac082 950 struct rt6_info *rt;
69cce1d1
DM
951#endif
952 int err;
6f21c96a 953 int flags = 0;
497c615a 954
e16e888b
MS
955 /* The correct way to handle this would be to do
956 * ip6_route_get_saddr, and then ip6_route_output; however,
957 * the route-specific preferred source forces the
958 * ip6_route_output call _before_ ip6_route_get_saddr.
959 *
960 * In source specific routing (no src=any default route),
961 * ip6_route_output will fail given src=any saddr, though, so
962 * that's why we try it again later.
963 */
964 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
a68886a6 965 struct fib6_info *from;
e16e888b
MS
966 struct rt6_info *rt;
967 bool had_dst = *dst != NULL;
1da177e4 968
e16e888b
MS
969 if (!had_dst)
970 *dst = ip6_route_output(net, sk, fl6);
971 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
a68886a6
DA
972
973 rcu_read_lock();
974 from = rt ? rcu_dereference(rt->from) : NULL;
975 err = ip6_route_get_saddr(net, from, &fl6->daddr,
c3968a85
DW
976 sk ? inet6_sk(sk)->srcprefs : 0,
977 &fl6->saddr);
a68886a6
DA
978 rcu_read_unlock();
979
44456d37 980 if (err)
1da177e4 981 goto out_err_release;
e16e888b
MS
982
983 /* If we had an erroneous initial result, pretend it
984 * never existed and let the SA-enabled version take
985 * over.
986 */
987 if (!had_dst && (*dst)->error) {
988 dst_release(*dst);
989 *dst = NULL;
990 }
6f21c96a
PA
991
992 if (fl6->flowi6_oif)
993 flags |= RT6_LOOKUP_F_IFACE;
1da177e4
LT
994 }
995
e16e888b 996 if (!*dst)
6f21c96a 997 *dst = ip6_route_output_flags(net, sk, fl6, flags);
e16e888b
MS
998
999 err = (*dst)->error;
1000 if (err)
1001 goto out_err_release;
1002
95c385b4 1003#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
1004 /*
1005 * Here if the dst entry we've looked up
1006 * has a neighbour entry that is in the INCOMPLETE
1007 * state and the src address from the flow is
1008 * marked as OPTIMISTIC, we release the found
1009 * dst entry and replace it instead with the
1010 * dst entry of the nexthop router
1011 */
c56bf6fe 1012 rt = (struct rt6_info *) *dst;
707be1ff 1013 rcu_read_lock_bh();
2647a9b0
MKL
1014 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1015 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
1016 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1017 rcu_read_unlock_bh();
1018
1019 if (err) {
e550dfb0 1020 struct inet6_ifaddr *ifp;
4c9483b2 1021 struct flowi6 fl_gw6;
e550dfb0
NH
1022 int redirect;
1023
4c9483b2 1024 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
1025 (*dst)->dev, 1);
1026
1027 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1028 if (ifp)
1029 in6_ifa_put(ifp);
1030
1031 if (redirect) {
1032 /*
1033 * We need to get the dst entry for the
1034 * default router instead
1035 */
1036 dst_release(*dst);
4c9483b2
DM
1037 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1038 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1039 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
1040 err = (*dst)->error;
1041 if (err)
e550dfb0 1042 goto out_err_release;
95c385b4 1043 }
e550dfb0 1044 }
95c385b4 1045#endif
ec5e3b0a 1046 if (ipv6_addr_v4mapped(&fl6->saddr) &&
00ea1cee
WB
1047 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1048 err = -EAFNOSUPPORT;
1049 goto out_err_release;
1050 }
95c385b4 1051
1da177e4
LT
1052 return 0;
1053
1054out_err_release:
1055 dst_release(*dst);
1056 *dst = NULL;
8a966fc0 1057
0d240e78
DA
1058 if (err == -ENETUNREACH)
1059 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1060 return err;
1061}
34a0b3cd 1062
497c615a
HX
1063/**
1064 * ip6_dst_lookup - perform route lookup on flow
1065 * @sk: socket which provides route info
1066 * @dst: pointer to dst_entry * for result
4c9483b2 1067 * @fl6: flow to lookup
497c615a
HX
1068 *
1069 * This function performs a route lookup on the given flow.
1070 *
1071 * It returns zero on success, or a standard errno code on error.
1072 */
343d60aa
RP
1073int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1074 struct flowi6 *fl6)
497c615a
HX
1075{
1076 *dst = NULL;
343d60aa 1077 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1078}
3cf3dc6c
ACM
1079EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1080
497c615a 1081/**
68d0c6d3
DM
1082 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1083 * @sk: socket which provides route info
4c9483b2 1084 * @fl6: flow to lookup
68d0c6d3 1085 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1086 *
1087 * This function performs a route lookup on the given flow.
1088 *
1089 * It returns a valid dst pointer on success, or a pointer encoded
1090 * error code.
1091 */
3aef934f 1092struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1093 const struct in6_addr *final_dst)
68d0c6d3
DM
1094{
1095 struct dst_entry *dst = NULL;
1096 int err;
1097
343d60aa 1098 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1099 if (err)
1100 return ERR_PTR(err);
1101 if (final_dst)
4e3fd7a0 1102 fl6->daddr = *final_dst;
2774c131 1103
f92ee619 1104 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1105}
1106EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1107
1108/**
1109 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1110 * @sk: socket which provides the dst cache and route info
4c9483b2 1111 * @fl6: flow to lookup
68d0c6d3 1112 * @final_dst: final destination address for ipsec lookup
96818159 1113 * @connected: whether @sk is connected or not
497c615a
HX
1114 *
1115 * This function performs a route lookup on the given flow with the
1116 * possibility of using the cached route in the socket if it is valid.
1117 * It will take the socket dst lock when operating on the dst cache.
1118 * As a result, this function can only be used in process context.
1119 *
96818159
AK
1120 * In addition, for a connected socket, cache the dst in the socket
1121 * if the current cache is not valid.
1122 *
68d0c6d3
DM
1123 * It returns a valid dst pointer on success, or a pointer encoded
1124 * error code.
497c615a 1125 */
4c9483b2 1126struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
96818159
AK
1127 const struct in6_addr *final_dst,
1128 bool connected)
497c615a 1129{
68d0c6d3 1130 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
497c615a 1131
4c9483b2 1132 dst = ip6_sk_dst_check(sk, dst, fl6);
96818159
AK
1133 if (dst)
1134 return dst;
1135
1136 dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1137 if (connected && !IS_ERR(dst))
1138 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
68d0c6d3 1139
00bc0ef5 1140 return dst;
497c615a 1141}
68d0c6d3 1142EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1143
0178b695
HX
1144static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1145 gfp_t gfp)
1146{
1147 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1148}
1149
1150static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1151 gfp_t gfp)
1152{
1153 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1154}
1155
75a493e6 1156static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1157 int *maxfraglen,
1158 unsigned int fragheaderlen,
1159 struct sk_buff *skb,
75a493e6 1160 struct rt6_info *rt,
e367c2d0 1161 unsigned int orig_mtu)
0c183379
G
1162{
1163 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1164 if (!skb) {
0c183379 1165 /* first fragment, reserve header_len */
e367c2d0 1166 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1167
1168 } else {
1169 /*
1170 * this fragment is not first, the headers
1171 * space is regarded as data space.
1172 */
e367c2d0 1173 *mtu = orig_mtu;
0c183379
G
1174 }
1175 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1176 + fragheaderlen - sizeof(struct frag_hdr);
1177 }
1178}
1179
366e41d9 1180static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
26879da5 1181 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
366e41d9
VY
1182 struct rt6_info *rt, struct flowi6 *fl6)
1183{
1184 struct ipv6_pinfo *np = inet6_sk(sk);
1185 unsigned int mtu;
26879da5 1186 struct ipv6_txoptions *opt = ipc6->opt;
366e41d9
VY
1187
1188 /*
1189 * setup for corking
1190 */
1191 if (opt) {
1192 if (WARN_ON(v6_cork->opt))
1193 return -EINVAL;
1194
864e2a1f 1195 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
63159f29 1196 if (unlikely(!v6_cork->opt))
366e41d9
VY
1197 return -ENOBUFS;
1198
864e2a1f 1199 v6_cork->opt->tot_len = sizeof(*opt);
366e41d9
VY
1200 v6_cork->opt->opt_flen = opt->opt_flen;
1201 v6_cork->opt->opt_nflen = opt->opt_nflen;
1202
1203 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1204 sk->sk_allocation);
1205 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1206 return -ENOBUFS;
1207
1208 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1209 sk->sk_allocation);
1210 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1211 return -ENOBUFS;
1212
1213 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1214 sk->sk_allocation);
1215 if (opt->hopopt && !v6_cork->opt->hopopt)
1216 return -ENOBUFS;
1217
1218 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1219 sk->sk_allocation);
1220 if (opt->srcrt && !v6_cork->opt->srcrt)
1221 return -ENOBUFS;
1222
1223 /* need source address above miyazawa*/
1224 }
1225 dst_hold(&rt->dst);
1226 cork->base.dst = &rt->dst;
1227 cork->fl.u.ip6 = *fl6;
26879da5
WW
1228 v6_cork->hop_limit = ipc6->hlimit;
1229 v6_cork->tclass = ipc6->tclass;
366e41d9
VY
1230 if (rt->dst.flags & DST_XFRM_TUNNEL)
1231 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
749439bf 1232 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
366e41d9
VY
1233 else
1234 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
c02b3741 1235 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
366e41d9
VY
1236 if (np->frag_size < mtu) {
1237 if (np->frag_size)
1238 mtu = np->frag_size;
1239 }
749439bf
MM
1240 if (mtu < IPV6_MIN_MTU)
1241 return -EINVAL;
366e41d9 1242 cork->base.fragsize = mtu;
bec1f6f6
WB
1243 cork->base.gso_size = sk->sk_type == SOCK_DGRAM ? ipc6->gso_size : 0;
1244
0f6c480f 1245 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
366e41d9
VY
1246 cork->base.flags |= IPCORK_ALLFRAG;
1247 cork->base.length = 0;
1248
1249 return 0;
1250}
1251
0bbe84a6
VY
1252static int __ip6_append_data(struct sock *sk,
1253 struct flowi6 *fl6,
1254 struct sk_buff_head *queue,
1255 struct inet_cork *cork,
1256 struct inet6_cork *v6_cork,
1257 struct page_frag *pfrag,
1258 int getfrag(void *from, char *to, int offset,
1259 int len, int odd, struct sk_buff *skb),
1260 void *from, int length, int transhdrlen,
26879da5 1261 unsigned int flags, struct ipcm6_cookie *ipc6,
c14ac945 1262 const struct sockcm_cookie *sockc)
1da177e4 1263{
0c183379 1264 struct sk_buff *skb, *skb_prev = NULL;
10b8a3de 1265 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
0bbe84a6
VY
1266 int exthdrlen = 0;
1267 int dst_exthdrlen = 0;
1da177e4 1268 int hh_len;
1da177e4
LT
1269 int copy;
1270 int err;
1271 int offset = 0;
a693e698 1272 __u8 tx_flags = 0;
09c2d251 1273 u32 tskey = 0;
0bbe84a6
VY
1274 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1275 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1276 int csummode = CHECKSUM_NONE;
682b1a9d 1277 unsigned int maxnonfragsize, headersize;
1f4c6eb2 1278 unsigned int wmem_alloc_delta = 0;
1da177e4 1279
0bbe84a6
VY
1280 skb = skb_peek_tail(queue);
1281 if (!skb) {
1282 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1283 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1284 }
0bbe84a6 1285
bec1f6f6 1286 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
e367c2d0 1287 orig_mtu = mtu;
1da177e4 1288
d8d1f30b 1289 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1290
a1b05140 1291 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1292 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1293 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1294 sizeof(struct frag_hdr);
1da177e4 1295
682b1a9d
HFS
1296 headersize = sizeof(struct ipv6hdr) +
1297 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1298 (dst_allfrag(&rt->dst) ?
1299 sizeof(struct frag_hdr) : 0) +
1300 rt->rt6i_nfheader_len;
1301
10b8a3de
PA
1302 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1303 * the first fragment
1304 */
1305 if (headersize + transhdrlen > mtu)
1306 goto emsgsize;
1307
26879da5 1308 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
682b1a9d
HFS
1309 (sk->sk_protocol == IPPROTO_UDP ||
1310 sk->sk_protocol == IPPROTO_RAW)) {
1311 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1312 sizeof(struct ipv6hdr));
1313 goto emsgsize;
1314 }
4df98e76 1315
682b1a9d
HFS
1316 if (ip6_sk_ignore_df(sk))
1317 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1318 else
1319 maxnonfragsize = mtu;
4df98e76 1320
682b1a9d 1321 if (cork->length + length > maxnonfragsize - headersize) {
4df98e76 1322emsgsize:
10b8a3de
PA
1323 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1324 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
682b1a9d 1325 return -EMSGSIZE;
1da177e4
LT
1326 }
1327
682b1a9d
HFS
1328 /* CHECKSUM_PARTIAL only with no extension headers and when
1329 * we are not going to fragment
1330 */
1331 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1332 headersize == sizeof(struct ipv6hdr) &&
2b89ed65 1333 length <= mtu - headersize &&
bec1f6f6 1334 (!(flags & MSG_MORE) || cork->gso_size) &&
c8cd0989 1335 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
682b1a9d
HFS
1336 csummode = CHECKSUM_PARTIAL;
1337
09c2d251 1338 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
c14ac945 1339 sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
09c2d251
WB
1340 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1341 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1342 tskey = sk->sk_tskey++;
1343 }
a693e698 1344
1da177e4
LT
1345 /*
1346 * Let's try using as much space as possible.
1347 * Use MTU if total length of the message fits into the MTU.
1348 * Otherwise, we need to reserve fragment header and
1349 * fragment alignment (= 8-15 octects, in total).
1350 *
1351 * Note that we may need to "move" the data from the tail of
1ab1457c 1352 * of the buffer to the new fragment when we split
1da177e4
LT
1353 * the message.
1354 *
1ab1457c 1355 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1356 * at once if non-fragmentable extension headers
1357 * are too large.
1ab1457c 1358 * --yoshfuji
1da177e4
LT
1359 */
1360
2811ebac 1361 cork->length += length;
2811ebac 1362 if (!skb)
1da177e4
LT
1363 goto alloc_new_skb;
1364
1365 while (length > 0) {
1366 /* Check if the remaining data fits into current packet. */
bdc712b4 1367 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1368 if (copy < length)
1369 copy = maxfraglen - skb->len;
1370
1371 if (copy <= 0) {
1372 char *data;
1373 unsigned int datalen;
1374 unsigned int fraglen;
1375 unsigned int fraggap;
1376 unsigned int alloclen;
1da177e4 1377alloc_new_skb:
1da177e4 1378 /* There's no room in the current skb */
0c183379
G
1379 if (skb)
1380 fraggap = skb->len - maxfraglen;
1da177e4
LT
1381 else
1382 fraggap = 0;
0c183379 1383 /* update mtu and maxfraglen if necessary */
63159f29 1384 if (!skb || !skb_prev)
0c183379 1385 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1386 fragheaderlen, skb, rt,
e367c2d0 1387 orig_mtu);
0c183379
G
1388
1389 skb_prev = skb;
1da177e4
LT
1390
1391 /*
1392 * If remaining data exceeds the mtu,
1393 * we know we need more fragment(s).
1394 */
1395 datalen = length + fraggap;
1da177e4 1396
0c183379
G
1397 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1398 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1399 if ((flags & MSG_MORE) &&
d8d1f30b 1400 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1401 alloclen = mtu;
1402 else
1403 alloclen = datalen + fragheaderlen;
1404
299b0767
SK
1405 alloclen += dst_exthdrlen;
1406
0c183379
G
1407 if (datalen != length + fraggap) {
1408 /*
1409 * this is not the last fragment, the trailer
1410 * space is regarded as data space.
1411 */
1412 datalen += rt->dst.trailer_len;
1413 }
1414
1415 alloclen += rt->dst.trailer_len;
1416 fraglen = datalen + fragheaderlen;
1da177e4
LT
1417
1418 /*
1419 * We just reserve space for fragment header.
1ab1457c 1420 * Note: this may be overallocation if the message
1da177e4
LT
1421 * (without MSG_MORE) fits into the MTU.
1422 */
1423 alloclen += sizeof(struct frag_hdr);
1424
232cd35d
ED
1425 copy = datalen - transhdrlen - fraggap;
1426 if (copy < 0) {
1427 err = -EINVAL;
1428 goto error;
1429 }
1da177e4
LT
1430 if (transhdrlen) {
1431 skb = sock_alloc_send_skb(sk,
1432 alloclen + hh_len,
1433 (flags & MSG_DONTWAIT), &err);
1434 } else {
1435 skb = NULL;
1f4c6eb2 1436 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1da177e4 1437 2 * sk->sk_sndbuf)
1f4c6eb2
ED
1438 skb = alloc_skb(alloclen + hh_len,
1439 sk->sk_allocation);
63159f29 1440 if (unlikely(!skb))
1da177e4
LT
1441 err = -ENOBUFS;
1442 }
63159f29 1443 if (!skb)
1da177e4
LT
1444 goto error;
1445 /*
1446 * Fill in the control structures
1447 */
9c9c9ad5 1448 skb->protocol = htons(ETH_P_IPV6);
32dce968 1449 skb->ip_summed = csummode;
1da177e4 1450 skb->csum = 0;
1f85851e
G
1451 /* reserve for fragmentation and ipsec header */
1452 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1453 dst_exthdrlen);
1da177e4 1454
11878b40
WB
1455 /* Only the initial fragment is time stamped */
1456 skb_shinfo(skb)->tx_flags = tx_flags;
1457 tx_flags = 0;
09c2d251
WB
1458 skb_shinfo(skb)->tskey = tskey;
1459 tskey = 0;
a693e698 1460
1da177e4
LT
1461 /*
1462 * Find where to start putting bytes
1463 */
1f85851e
G
1464 data = skb_put(skb, fraglen);
1465 skb_set_network_header(skb, exthdrlen);
1466 data += fragheaderlen;
b0e380b1
ACM
1467 skb->transport_header = (skb->network_header +
1468 fragheaderlen);
1da177e4
LT
1469 if (fraggap) {
1470 skb->csum = skb_copy_and_csum_bits(
1471 skb_prev, maxfraglen,
1472 data + transhdrlen, fraggap, 0);
1473 skb_prev->csum = csum_sub(skb_prev->csum,
1474 skb->csum);
1475 data += fraggap;
e9fa4f7b 1476 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4 1477 }
232cd35d
ED
1478 if (copy > 0 &&
1479 getfrag(from, data + transhdrlen, offset,
1480 copy, fraggap, skb) < 0) {
1da177e4
LT
1481 err = -EFAULT;
1482 kfree_skb(skb);
1483 goto error;
1484 }
1485
1486 offset += copy;
1487 length -= datalen - fraggap;
1488 transhdrlen = 0;
1489 exthdrlen = 0;
299b0767 1490 dst_exthdrlen = 0;
1da177e4 1491
0dec879f
JA
1492 if ((flags & MSG_CONFIRM) && !skb_prev)
1493 skb_set_dst_pending_confirm(skb, 1);
1494
1da177e4
LT
1495 /*
1496 * Put the packet on the pending queue
1497 */
1f4c6eb2
ED
1498 if (!skb->destructor) {
1499 skb->destructor = sock_wfree;
1500 skb->sk = sk;
1501 wmem_alloc_delta += skb->truesize;
1502 }
0bbe84a6 1503 __skb_queue_tail(queue, skb);
1da177e4
LT
1504 continue;
1505 }
1506
1507 if (copy > length)
1508 copy = length;
1509
d8d1f30b 1510 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1511 unsigned int off;
1512
1513 off = skb->len;
1514 if (getfrag(from, skb_put(skb, copy),
1515 offset, copy, off, skb) < 0) {
1516 __skb_trim(skb, off);
1517 err = -EFAULT;
1518 goto error;
1519 }
1520 } else {
1521 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1522
5640f768
ED
1523 err = -ENOMEM;
1524 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1525 goto error;
5640f768
ED
1526
1527 if (!skb_can_coalesce(skb, i, pfrag->page,
1528 pfrag->offset)) {
1529 err = -EMSGSIZE;
1530 if (i == MAX_SKB_FRAGS)
1531 goto error;
1532
1533 __skb_fill_page_desc(skb, i, pfrag->page,
1534 pfrag->offset, 0);
1535 skb_shinfo(skb)->nr_frags = ++i;
1536 get_page(pfrag->page);
1da177e4 1537 }
5640f768 1538 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1539 if (getfrag(from,
5640f768
ED
1540 page_address(pfrag->page) + pfrag->offset,
1541 offset, copy, skb->len, skb) < 0)
1542 goto error_efault;
1543
1544 pfrag->offset += copy;
1545 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1546 skb->len += copy;
1547 skb->data_len += copy;
f945fa7a 1548 skb->truesize += copy;
1f4c6eb2 1549 wmem_alloc_delta += copy;
1da177e4
LT
1550 }
1551 offset += copy;
1552 length -= copy;
1553 }
5640f768 1554
9e8445a5
PA
1555 if (wmem_alloc_delta)
1556 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1da177e4 1557 return 0;
5640f768
ED
1558
1559error_efault:
1560 err = -EFAULT;
1da177e4 1561error:
bdc712b4 1562 cork->length -= length;
3bd653c8 1563 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1f4c6eb2 1564 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1da177e4
LT
1565 return err;
1566}
0bbe84a6
VY
1567
1568int ip6_append_data(struct sock *sk,
1569 int getfrag(void *from, char *to, int offset, int len,
1570 int odd, struct sk_buff *skb),
26879da5
WW
1571 void *from, int length, int transhdrlen,
1572 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1573 struct rt6_info *rt, unsigned int flags,
c14ac945 1574 const struct sockcm_cookie *sockc)
0bbe84a6
VY
1575{
1576 struct inet_sock *inet = inet_sk(sk);
1577 struct ipv6_pinfo *np = inet6_sk(sk);
1578 int exthdrlen;
1579 int err;
1580
1581 if (flags&MSG_PROBE)
1582 return 0;
1583 if (skb_queue_empty(&sk->sk_write_queue)) {
1584 /*
1585 * setup for corking
1586 */
26879da5
WW
1587 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1588 ipc6, rt, fl6);
0bbe84a6
VY
1589 if (err)
1590 return err;
1591
26879da5 1592 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
0bbe84a6
VY
1593 length += exthdrlen;
1594 transhdrlen += exthdrlen;
1595 } else {
1596 fl6 = &inet->cork.fl.u.ip6;
1597 transhdrlen = 0;
1598 }
1599
1600 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1601 &np->cork, sk_page_frag(sk), getfrag,
26879da5 1602 from, length, transhdrlen, flags, ipc6, sockc);
0bbe84a6 1603}
a495f836 1604EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1605
366e41d9
VY
1606static void ip6_cork_release(struct inet_cork_full *cork,
1607 struct inet6_cork *v6_cork)
bf138862 1608{
366e41d9
VY
1609 if (v6_cork->opt) {
1610 kfree(v6_cork->opt->dst0opt);
1611 kfree(v6_cork->opt->dst1opt);
1612 kfree(v6_cork->opt->hopopt);
1613 kfree(v6_cork->opt->srcrt);
1614 kfree(v6_cork->opt);
1615 v6_cork->opt = NULL;
0178b695
HX
1616 }
1617
366e41d9
VY
1618 if (cork->base.dst) {
1619 dst_release(cork->base.dst);
1620 cork->base.dst = NULL;
1621 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1622 }
366e41d9 1623 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1624}
1625
6422398c
VY
1626struct sk_buff *__ip6_make_skb(struct sock *sk,
1627 struct sk_buff_head *queue,
1628 struct inet_cork_full *cork,
1629 struct inet6_cork *v6_cork)
1da177e4
LT
1630{
1631 struct sk_buff *skb, *tmp_skb;
1632 struct sk_buff **tail_skb;
1633 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1634 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1635 struct net *net = sock_net(sk);
1da177e4 1636 struct ipv6hdr *hdr;
6422398c
VY
1637 struct ipv6_txoptions *opt = v6_cork->opt;
1638 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1639 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1640 unsigned char proto = fl6->flowi6_proto;
1da177e4 1641
6422398c 1642 skb = __skb_dequeue(queue);
63159f29 1643 if (!skb)
1da177e4
LT
1644 goto out;
1645 tail_skb = &(skb_shinfo(skb)->frag_list);
1646
1647 /* move skb->data to ip header from ext header */
d56f90a7 1648 if (skb->data < skb_network_header(skb))
bbe735e4 1649 __skb_pull(skb, skb_network_offset(skb));
6422398c 1650 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1651 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1652 *tail_skb = tmp_skb;
1653 tail_skb = &(tmp_skb->next);
1654 skb->len += tmp_skb->len;
1655 skb->data_len += tmp_skb->len;
1da177e4 1656 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1657 tmp_skb->destructor = NULL;
1658 tmp_skb->sk = NULL;
1da177e4
LT
1659 }
1660
28a89453 1661 /* Allow local fragmentation. */
60ff7467 1662 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1663
4e3fd7a0 1664 *final_dst = fl6->daddr;
cfe1fc77 1665 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1666 if (opt && opt->opt_flen)
1667 ipv6_push_frag_opts(skb, opt, &proto);
1668 if (opt && opt->opt_nflen)
613fa3ca 1669 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1da177e4 1670
e2d1bca7
ACM
1671 skb_push(skb, sizeof(struct ipv6hdr));
1672 skb_reset_network_header(skb);
0660e03f 1673 hdr = ipv6_hdr(skb);
1ab1457c 1674
6422398c 1675 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1676 ip6_make_flowlabel(net, skb, fl6->flowlabel,
513674b5 1677 ip6_autoflowlabel(net, np), fl6));
6422398c 1678 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1679 hdr->nexthdr = proto;
4e3fd7a0
AD
1680 hdr->saddr = fl6->saddr;
1681 hdr->daddr = *final_dst;
1da177e4 1682
a2c2064f 1683 skb->priority = sk->sk_priority;
4a19ec58 1684 skb->mark = sk->sk_mark;
a2c2064f 1685
d8d1f30b 1686 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1687 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1688 if (proto == IPPROTO_ICMPV6) {
adf30907 1689 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1690
43a43b60
HFS
1691 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1692 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1693 }
1694
6422398c
VY
1695 ip6_cork_release(cork, v6_cork);
1696out:
1697 return skb;
1698}
1699
1700int ip6_send_skb(struct sk_buff *skb)
1701{
1702 struct net *net = sock_net(skb->sk);
1703 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1704 int err;
1705
33224b16 1706 err = ip6_local_out(net, skb->sk, skb);
1da177e4
LT
1707 if (err) {
1708 if (err > 0)
6ce9e7b5 1709 err = net_xmit_errno(err);
1da177e4 1710 if (err)
6422398c
VY
1711 IP6_INC_STATS(net, rt->rt6i_idev,
1712 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1713 }
1714
1da177e4 1715 return err;
6422398c
VY
1716}
1717
1718int ip6_push_pending_frames(struct sock *sk)
1719{
1720 struct sk_buff *skb;
1721
1722 skb = ip6_finish_skb(sk);
1723 if (!skb)
1724 return 0;
1725
1726 return ip6_send_skb(skb);
1da177e4 1727}
a495f836 1728EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1729
0bbe84a6 1730static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1731 struct sk_buff_head *queue,
1732 struct inet_cork_full *cork,
1733 struct inet6_cork *v6_cork)
1da177e4 1734{
1da177e4
LT
1735 struct sk_buff *skb;
1736
0bbe84a6 1737 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1738 if (skb_dst(skb))
1739 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1740 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1741 kfree_skb(skb);
1742 }
1743
6422398c 1744 ip6_cork_release(cork, v6_cork);
1da177e4 1745}
0bbe84a6
VY
1746
1747void ip6_flush_pending_frames(struct sock *sk)
1748{
6422398c
VY
1749 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1750 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1751}
a495f836 1752EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1753
1754struct sk_buff *ip6_make_skb(struct sock *sk,
1755 int getfrag(void *from, char *to, int offset,
1756 int len, int odd, struct sk_buff *skb),
1757 void *from, int length, int transhdrlen,
26879da5 1758 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
6422398c 1759 struct rt6_info *rt, unsigned int flags,
1cd7884d 1760 struct inet_cork_full *cork,
26879da5 1761 const struct sockcm_cookie *sockc)
6422398c 1762{
6422398c
VY
1763 struct inet6_cork v6_cork;
1764 struct sk_buff_head queue;
26879da5 1765 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
6422398c
VY
1766 int err;
1767
1768 if (flags & MSG_PROBE)
1769 return NULL;
1770
1771 __skb_queue_head_init(&queue);
1772
1cd7884d
WB
1773 cork->base.flags = 0;
1774 cork->base.addr = 0;
1775 cork->base.opt = NULL;
1776 cork->base.dst = NULL;
6422398c 1777 v6_cork.opt = NULL;
1cd7884d 1778 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
862c03ee 1779 if (err) {
1cd7884d 1780 ip6_cork_release(cork, &v6_cork);
6422398c 1781 return ERR_PTR(err);
862c03ee 1782 }
26879da5
WW
1783 if (ipc6->dontfrag < 0)
1784 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
6422398c 1785
1cd7884d 1786 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
6422398c
VY
1787 &current->task_frag, getfrag, from,
1788 length + exthdrlen, transhdrlen + exthdrlen,
26879da5 1789 flags, ipc6, sockc);
6422398c 1790 if (err) {
1cd7884d 1791 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
6422398c
VY
1792 return ERR_PTR(err);
1793 }
1794
1cd7884d 1795 return __ip6_make_skb(sk, &queue, cork, &v6_cork);
6422398c 1796}
This page took 1.489114 seconds and 4 git commands to generate.