]> Git Repo - linux.git/blame - net/ipv6/ip6_output.c
ipv4: Pass struct net through ip_fragment
[linux.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <[email protected]>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
1da177e4 58
7026b1dd 59static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
1da177e4 60{
adf30907 61 struct dst_entry *dst = skb_dst(skb);
1da177e4 62 struct net_device *dev = dst->dev;
78126c41 63 struct net *net = dev_net(dev);
f6b72b62 64 struct neighbour *neigh;
6fd6ce20
YH
65 struct in6_addr *nexthop;
66 int ret;
1da177e4
LT
67
68 skb->protocol = htons(ETH_P_IPV6);
69 skb->dev = dev;
70
0660e03f 71 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 72 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 73
7026b1dd 74 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
78126c41 75 ((mroute6_socket(net, skb) &&
bd91b8bf 76 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
77 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
78 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
79 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
80
81 /* Do not check for IFF_ALLMULTI; multicast routing
82 is not supported in any case.
83 */
84 if (newskb)
b2e0b385 85 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 86 net, sk, newskb, NULL, newskb->dev,
95603e22 87 dev_loopback_xmit);
1da177e4 88
0660e03f 89 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 90 IP6_INC_STATS(net, idev,
3bd653c8 91 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
92 kfree_skb(skb);
93 return 0;
94 }
95 }
96
78126c41 97 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
98
99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100 IPV6_ADDR_SCOPE_NODELOCAL &&
101 !(dev->flags & IFF_LOOPBACK)) {
102 kfree_skb(skb);
103 return 0;
104 }
1da177e4
LT
105 }
106
6fd6ce20 107 rcu_read_lock_bh();
2647a9b0 108 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110 if (unlikely(!neigh))
111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112 if (!IS_ERR(neigh)) {
113 ret = dst_neigh_output(dst, neigh, skb);
114 rcu_read_unlock_bh();
115 return ret;
116 }
117 rcu_read_unlock_bh();
05e3aa09 118
78126c41 119 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
120 kfree_skb(skb);
121 return -EINVAL;
1da177e4
LT
122}
123
0c4b51f0 124static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490
JE
125{
126 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
127 dst_allfrag(skb_dst(skb)) ||
128 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7026b1dd 129 return ip6_fragment(sk, skb, ip6_finish_output2);
9e508490 130 else
7026b1dd 131 return ip6_finish_output2(sk, skb);
9e508490
JE
132}
133
aad88724 134int ip6_output(struct sock *sk, struct sk_buff *skb)
1da177e4 135{
9e508490 136 struct net_device *dev = skb_dst(skb)->dev;
adf30907 137 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
19a0644c 138 struct net *net = dev_net(dev);
be10de0a 139
778d80be 140 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 141 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
142 kfree_skb(skb);
143 return 0;
144 }
145
29a26a56
EB
146 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
147 net, sk, skb, NULL, dev,
9c6eb28a
JE
148 ip6_finish_output,
149 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
150}
151
1da177e4 152/*
1c1e9d2b
ED
153 * xmit an sk_buff (used by TCP, SCTP and DCCP)
154 * Note : socket lock is not held for SYNACK packets, but might be modified
155 * by calls to skb_set_owner_w() and ipv6_local_error(),
156 * which are using proper atomic operations or spinlocks.
1da177e4 157 */
1c1e9d2b 158int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 159 struct ipv6_txoptions *opt, int tclass)
1da177e4 160{
3bd653c8 161 struct net *net = sock_net(sk);
1c1e9d2b 162 const struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 163 struct in6_addr *first_hop = &fl6->daddr;
adf30907 164 struct dst_entry *dst = skb_dst(skb);
1da177e4 165 struct ipv6hdr *hdr;
4c9483b2 166 u8 proto = fl6->flowi6_proto;
1da177e4 167 int seg_len = skb->len;
e651f03a 168 int hlimit = -1;
1da177e4
LT
169 u32 mtu;
170
171 if (opt) {
c2636b4d 172 unsigned int head_room;
1da177e4
LT
173
174 /* First: exthdrs may take lots of space (~8K for now)
175 MAX_HEADER is not enough.
176 */
177 head_room = opt->opt_nflen + opt->opt_flen;
178 seg_len += head_room;
179 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
180
181 if (skb_headroom(skb) < head_room) {
182 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
63159f29 183 if (!skb2) {
adf30907 184 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
185 IPSTATS_MIB_OUTDISCARDS);
186 kfree_skb(skb);
1da177e4
LT
187 return -ENOBUFS;
188 }
808db80a 189 consume_skb(skb);
a11d206d 190 skb = skb2;
1c1e9d2b
ED
191 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
192 * it is safe to call in our context (socket lock not held)
193 */
194 skb_set_owner_w(skb, (struct sock *)sk);
1da177e4
LT
195 }
196 if (opt->opt_flen)
197 ipv6_push_frag_opts(skb, opt, &proto);
198 if (opt->opt_nflen)
199 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
200 }
201
e2d1bca7
ACM
202 skb_push(skb, sizeof(struct ipv6hdr));
203 skb_reset_network_header(skb);
0660e03f 204 hdr = ipv6_hdr(skb);
1da177e4
LT
205
206 /*
207 * Fill in the IPv6 header
208 */
b903d324 209 if (np)
1da177e4
LT
210 hlimit = np->hop_limit;
211 if (hlimit < 0)
6b75d090 212 hlimit = ip6_dst_hoplimit(dst);
1da177e4 213
cb1ce2ef 214 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 215 np->autoflowlabel, fl6));
41a1f8ea 216
1da177e4
LT
217 hdr->payload_len = htons(seg_len);
218 hdr->nexthdr = proto;
219 hdr->hop_limit = hlimit;
220
4e3fd7a0
AD
221 hdr->saddr = fl6->saddr;
222 hdr->daddr = *first_hop;
1da177e4 223
9c9c9ad5 224 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 225 skb->priority = sk->sk_priority;
4a19ec58 226 skb->mark = sk->sk_mark;
a2c2064f 227
1da177e4 228 mtu = dst_mtu(dst);
60ff7467 229 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 230 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 231 IPSTATS_MIB_OUT, skb->len);
1c1e9d2b
ED
232 /* hooks should never assume socket lock is held.
233 * we promote our socket to non const
234 */
29a26a56 235 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1c1e9d2b 236 net, (struct sock *)sk, skb, NULL, dst->dev,
0c4b51f0 237 dst_output_okfn);
1da177e4
LT
238 }
239
1da177e4 240 skb->dev = dst->dev;
1c1e9d2b
ED
241 /* ipv6_local_error() does not require socket lock,
242 * we promote our socket to non const
243 */
244 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
245
adf30907 246 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
247 kfree_skb(skb);
248 return -EMSGSIZE;
249}
7159039a
YH
250EXPORT_SYMBOL(ip6_xmit);
251
1da177e4
LT
252static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
253{
254 struct ip6_ra_chain *ra;
255 struct sock *last = NULL;
256
257 read_lock(&ip6_ra_lock);
258 for (ra = ip6_ra_chain; ra; ra = ra->next) {
259 struct sock *sk = ra->sk;
0bd1b59b
AM
260 if (sk && ra->sel == sel &&
261 (!sk->sk_bound_dev_if ||
262 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
263 if (last) {
264 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
265 if (skb2)
266 rawv6_rcv(last, skb2);
267 }
268 last = sk;
269 }
270 }
271
272 if (last) {
273 rawv6_rcv(last, skb);
274 read_unlock(&ip6_ra_lock);
275 return 1;
276 }
277 read_unlock(&ip6_ra_lock);
278 return 0;
279}
280
e21e0b5f
VN
281static int ip6_forward_proxy_check(struct sk_buff *skb)
282{
0660e03f 283 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 284 u8 nexthdr = hdr->nexthdr;
75f2811c 285 __be16 frag_off;
e21e0b5f
VN
286 int offset;
287
288 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 289 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
290 if (offset < 0)
291 return 0;
292 } else
293 offset = sizeof(struct ipv6hdr);
294
295 if (nexthdr == IPPROTO_ICMPV6) {
296 struct icmp6hdr *icmp6;
297
d56f90a7
ACM
298 if (!pskb_may_pull(skb, (skb_network_header(skb) +
299 offset + 1 - skb->data)))
e21e0b5f
VN
300 return 0;
301
d56f90a7 302 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
303
304 switch (icmp6->icmp6_type) {
305 case NDISC_ROUTER_SOLICITATION:
306 case NDISC_ROUTER_ADVERTISEMENT:
307 case NDISC_NEIGHBOUR_SOLICITATION:
308 case NDISC_NEIGHBOUR_ADVERTISEMENT:
309 case NDISC_REDIRECT:
310 /* For reaction involving unicast neighbor discovery
311 * message destined to the proxied address, pass it to
312 * input function.
313 */
314 return 1;
315 default:
316 break;
317 }
318 }
319
74553b09
VN
320 /*
321 * The proxying router can't forward traffic sent to a link-local
322 * address, so signal the sender and discard the packet. This
323 * behavior is clarified by the MIPv6 specification.
324 */
325 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
326 dst_link_failure(skb);
327 return -1;
328 }
329
e21e0b5f
VN
330 return 0;
331}
332
0c4b51f0
EB
333static inline int ip6_forward_finish(struct net *net, struct sock *sk,
334 struct sk_buff *skb)
1da177e4 335{
c29390c6 336 skb_sender_cpu_clear(skb);
5a70649e 337 return dst_output(sk, skb);
1da177e4
LT
338}
339
0954cf9c
HFS
340static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
341{
342 unsigned int mtu;
343 struct inet6_dev *idev;
344
345 if (dst_metric_locked(dst, RTAX_MTU)) {
346 mtu = dst_metric_raw(dst, RTAX_MTU);
347 if (mtu)
348 return mtu;
349 }
350
351 mtu = IPV6_MIN_MTU;
352 rcu_read_lock();
353 idev = __in6_dev_get(dst->dev);
354 if (idev)
355 mtu = idev->cnf.mtu6;
356 rcu_read_unlock();
357
358 return mtu;
359}
360
fe6cc55f
FW
361static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
362{
418a3156 363 if (skb->len <= mtu)
fe6cc55f
FW
364 return false;
365
60ff7467 366 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
367 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
368 return true;
369
60ff7467 370 if (skb->ignore_df)
418a3156
FW
371 return false;
372
fe6cc55f
FW
373 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
374 return false;
375
376 return true;
377}
378
1da177e4
LT
379int ip6_forward(struct sk_buff *skb)
380{
adf30907 381 struct dst_entry *dst = skb_dst(skb);
0660e03f 382 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 383 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 384 struct net *net = dev_net(dst->dev);
14f3ad6f 385 u32 mtu;
1ab1457c 386
53b7997f 387 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
388 goto error;
389
090f1166
LR
390 if (skb->pkt_type != PACKET_HOST)
391 goto drop;
392
4497b076
BH
393 if (skb_warn_if_lro(skb))
394 goto drop;
395
1da177e4 396 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
15c77d8b
ED
397 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
398 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
399 goto drop;
400 }
401
35fc92a9 402 skb_forward_csum(skb);
1da177e4
LT
403
404 /*
405 * We DO NOT make any processing on
406 * RA packets, pushing them to user level AS IS
407 * without ane WARRANTY that application will be able
408 * to interpret them. The reason is that we
409 * cannot make anything clever here.
410 *
411 * We are not end-node, so that if packet contains
412 * AH/ESP, we cannot make anything.
413 * Defragmentation also would be mistake, RA packets
414 * cannot be fragmented, because there is no warranty
415 * that different fragments will go along one path. --ANK
416 */
ab4eb353
YH
417 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
418 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
419 return 0;
420 }
421
422 /*
423 * check and decrement ttl
424 */
425 if (hdr->hop_limit <= 1) {
426 /* Force OUTPUT device used as source address */
427 skb->dev = dst->dev;
3ffe533c 428 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
15c77d8b
ED
429 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
430 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
431
432 kfree_skb(skb);
433 return -ETIMEDOUT;
434 }
435
fbea49e1 436 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 437 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 438 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
439 int proxied = ip6_forward_proxy_check(skb);
440 if (proxied > 0)
e21e0b5f 441 return ip6_input(skb);
74553b09 442 else if (proxied < 0) {
15c77d8b
ED
443 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
444 IPSTATS_MIB_INDISCARDS);
74553b09
VN
445 goto drop;
446 }
e21e0b5f
VN
447 }
448
1da177e4 449 if (!xfrm6_route_forward(skb)) {
15c77d8b
ED
450 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
451 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
452 goto drop;
453 }
adf30907 454 dst = skb_dst(skb);
1da177e4
LT
455
456 /* IPv6 specs say nothing about it, but it is clear that we cannot
457 send redirects to source routed frames.
1e5dc146 458 We don't send redirects to frames decapsulated from IPsec.
1da177e4 459 */
c45a3dfb 460 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 461 struct in6_addr *target = NULL;
fbfe95a4 462 struct inet_peer *peer;
1da177e4 463 struct rt6_info *rt;
1da177e4
LT
464
465 /*
466 * incoming and outgoing devices are the same
467 * send a redirect.
468 */
469
470 rt = (struct rt6_info *) dst;
c45a3dfb
DM
471 if (rt->rt6i_flags & RTF_GATEWAY)
472 target = &rt->rt6i_gateway;
1da177e4
LT
473 else
474 target = &hdr->daddr;
475
fd0273d7 476 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 477
1da177e4
LT
478 /* Limit redirects both by destination (here)
479 and by source (inside ndisc_send_redirect)
480 */
fbfe95a4 481 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 482 ndisc_send_redirect(skb, target);
1d861aa4
DM
483 if (peer)
484 inet_putpeer(peer);
5bb1ab09
DS
485 } else {
486 int addrtype = ipv6_addr_type(&hdr->saddr);
487
1da177e4 488 /* This check is security critical. */
f81b2e7d
YH
489 if (addrtype == IPV6_ADDR_ANY ||
490 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
491 goto error;
492 if (addrtype & IPV6_ADDR_LINKLOCAL) {
493 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 494 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
495 goto error;
496 }
1da177e4
LT
497 }
498
0954cf9c 499 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
500 if (mtu < IPV6_MIN_MTU)
501 mtu = IPV6_MIN_MTU;
502
fe6cc55f 503 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
504 /* Again, force OUTPUT device used as source address */
505 skb->dev = dst->dev;
14f3ad6f 506 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
15c77d8b
ED
507 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
508 IPSTATS_MIB_INTOOBIGERRORS);
509 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
510 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
511 kfree_skb(skb);
512 return -EMSGSIZE;
513 }
514
515 if (skb_cow(skb, dst->dev->hard_header_len)) {
15c77d8b
ED
516 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
517 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
518 goto drop;
519 }
520
0660e03f 521 hdr = ipv6_hdr(skb);
1da177e4
LT
522
523 /* Mangling hops number delayed to point after skb COW */
1ab1457c 524
1da177e4
LT
525 hdr->hop_limit--;
526
483a47d2 527 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
2d8dbb04 528 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
29a26a56
EB
529 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
530 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 531 ip6_forward_finish);
1da177e4
LT
532
533error:
483a47d2 534 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
535drop:
536 kfree_skb(skb);
537 return -EINVAL;
538}
539
540static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
541{
542 to->pkt_type = from->pkt_type;
543 to->priority = from->priority;
544 to->protocol = from->protocol;
adf30907
ED
545 skb_dst_drop(to);
546 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 547 to->dev = from->dev;
82e91ffe 548 to->mark = from->mark;
1da177e4
LT
549
550#ifdef CONFIG_NET_SCHED
551 to->tc_index = from->tc_index;
552#endif
e7ac05f3 553 nf_copy(to, from);
984bc16c 554 skb_copy_secmark(to, from);
1da177e4
LT
555}
556
7026b1dd
DM
557int ip6_fragment(struct sock *sk, struct sk_buff *skb,
558 int (*output)(struct sock *, struct sk_buff *))
1da177e4 559{
1da177e4 560 struct sk_buff *frag;
67ba4152 561 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 562 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
563 inet6_sk(skb->sk) : NULL;
1da177e4
LT
564 struct ipv6hdr *tmp_hdr;
565 struct frag_hdr *fh;
566 unsigned int mtu, hlen, left, len;
a7ae1992 567 int hroom, troom;
286c2349 568 __be32 frag_id;
67ba4152 569 int ptr, offset = 0, err = 0;
1da177e4 570 u8 *prevhdr, nexthdr = 0;
adf30907 571 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 572
1da177e4
LT
573 hlen = ip6_find_1stfragopt(skb, &prevhdr);
574 nexthdr = *prevhdr;
575
628a5c56 576 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
577
578 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 579 * or if the skb it not generated by a local socket.
b881ef76 580 */
485fca66
FW
581 if (unlikely(!skb->ignore_df && skb->len > mtu))
582 goto fail_toobig;
a34a101e 583
485fca66
FW
584 if (IP6CB(skb)->frag_max_size) {
585 if (IP6CB(skb)->frag_max_size > mtu)
586 goto fail_toobig;
587
588 /* don't send fragments larger than what we received */
589 mtu = IP6CB(skb)->frag_max_size;
590 if (mtu < IPV6_MIN_MTU)
591 mtu = IPV6_MIN_MTU;
b881ef76
JH
592 }
593
d91675f9
YH
594 if (np && np->frag_size < mtu) {
595 if (np->frag_size)
596 mtu = np->frag_size;
597 }
598 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 599
fd0273d7
MKL
600 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
601 &ipv6_hdr(skb)->saddr);
286c2349 602
1d325d21 603 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 604 if (skb_has_frag_list(skb)) {
1da177e4 605 int first_len = skb_pagelen(skb);
3d13008e 606 struct sk_buff *frag2;
1da177e4
LT
607
608 if (first_len - hlen > mtu ||
609 ((first_len - hlen) & 7) ||
1d325d21
FW
610 skb_cloned(skb) ||
611 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
612 goto slow_path;
613
4d9092bb 614 skb_walk_frags(skb, frag) {
1da177e4
LT
615 /* Correct geometry. */
616 if (frag->len > mtu ||
617 ((frag->len & 7) && frag->next) ||
1d325d21 618 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 619 goto slow_path_clean;
1da177e4 620
1da177e4
LT
621 /* Partially cloned skb? */
622 if (skb_shared(frag))
3d13008e 623 goto slow_path_clean;
2fdba6b0
HX
624
625 BUG_ON(frag->sk);
626 if (skb->sk) {
2fdba6b0
HX
627 frag->sk = skb->sk;
628 frag->destructor = sock_wfree;
2fdba6b0 629 }
3d13008e 630 skb->truesize -= frag->truesize;
1da177e4
LT
631 }
632
633 err = 0;
634 offset = 0;
1da177e4
LT
635 /* BUILD HEADER */
636
9a217a1c 637 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 638 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 639 if (!tmp_hdr) {
adf30907 640 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 641 IPSTATS_MIB_FRAGFAILS);
1d325d21
FW
642 err = -ENOMEM;
643 goto fail;
1da177e4 644 }
1d325d21
FW
645 frag = skb_shinfo(skb)->frag_list;
646 skb_frag_list_init(skb);
1da177e4 647
1da177e4 648 __skb_pull(skb, hlen);
67ba4152 649 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
650 __skb_push(skb, hlen);
651 skb_reset_network_header(skb);
d56f90a7 652 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 653
1da177e4
LT
654 fh->nexthdr = nexthdr;
655 fh->reserved = 0;
656 fh->frag_off = htons(IP6_MF);
286c2349 657 fh->identification = frag_id;
1da177e4
LT
658
659 first_len = skb_pagelen(skb);
660 skb->data_len = first_len - skb_headlen(skb);
661 skb->len = first_len;
0660e03f
ACM
662 ipv6_hdr(skb)->payload_len = htons(first_len -
663 sizeof(struct ipv6hdr));
a11d206d 664
d8d1f30b 665 dst_hold(&rt->dst);
1da177e4
LT
666
667 for (;;) {
668 /* Prepare header of the next frame,
669 * before previous one went down. */
670 if (frag) {
671 frag->ip_summed = CHECKSUM_NONE;
badff6d0 672 skb_reset_transport_header(frag);
67ba4152 673 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
674 __skb_push(frag, hlen);
675 skb_reset_network_header(frag);
d56f90a7
ACM
676 memcpy(skb_network_header(frag), tmp_hdr,
677 hlen);
1da177e4
LT
678 offset += skb->len - hlen - sizeof(struct frag_hdr);
679 fh->nexthdr = nexthdr;
680 fh->reserved = 0;
681 fh->frag_off = htons(offset);
53b24b8f 682 if (frag->next)
1da177e4
LT
683 fh->frag_off |= htons(IP6_MF);
684 fh->identification = frag_id;
0660e03f
ACM
685 ipv6_hdr(frag)->payload_len =
686 htons(frag->len -
687 sizeof(struct ipv6hdr));
1da177e4
LT
688 ip6_copy_metadata(frag, skb);
689 }
1ab1457c 690
7026b1dd 691 err = output(sk, skb);
67ba4152 692 if (!err)
d8d1f30b 693 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 694 IPSTATS_MIB_FRAGCREATES);
dafee490 695
1da177e4
LT
696 if (err || !frag)
697 break;
698
699 skb = frag;
700 frag = skb->next;
701 skb->next = NULL;
702 }
703
a51482bd 704 kfree(tmp_hdr);
1da177e4
LT
705
706 if (err == 0) {
d8d1f30b 707 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 708 IPSTATS_MIB_FRAGOKS);
94e187c0 709 ip6_rt_put(rt);
1da177e4
LT
710 return 0;
711 }
712
46cfd725 713 kfree_skb_list(frag);
1da177e4 714
d8d1f30b 715 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 716 IPSTATS_MIB_FRAGFAILS);
94e187c0 717 ip6_rt_put(rt);
1da177e4 718 return err;
3d13008e
ED
719
720slow_path_clean:
721 skb_walk_frags(skb, frag2) {
722 if (frag2 == frag)
723 break;
724 frag2->sk = NULL;
725 frag2->destructor = NULL;
726 skb->truesize += frag2->truesize;
727 }
1da177e4
LT
728 }
729
730slow_path:
72e843bb
ED
731 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
732 skb_checksum_help(skb))
733 goto fail;
734
1da177e4
LT
735 left = skb->len - hlen; /* Space per frame */
736 ptr = hlen; /* Where to start from */
737
738 /*
739 * Fragment the datagram.
740 */
741
742 *prevhdr = NEXTHDR_FRAGMENT;
a7ae1992 743 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
744
745 /*
746 * Keep copying data until we run out.
747 */
67ba4152 748 while (left > 0) {
1da177e4
LT
749 len = left;
750 /* IF: it doesn't fit, use 'mtu' - the data space left */
751 if (len > mtu)
752 len = mtu;
25985edc 753 /* IF: we are not sending up to and including the packet end
1da177e4
LT
754 then align the next start on an eight byte boundary */
755 if (len < left) {
756 len &= ~7;
757 }
1da177e4 758
cbffccc9
JP
759 /* Allocate buffer */
760 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
761 hroom + troom, GFP_ATOMIC);
762 if (!frag) {
adf30907 763 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 764 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
765 err = -ENOMEM;
766 goto fail;
767 }
768
769 /*
770 * Set up data on packet
771 */
772
773 ip6_copy_metadata(frag, skb);
a7ae1992 774 skb_reserve(frag, hroom);
1da177e4 775 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 776 skb_reset_network_header(frag);
badff6d0 777 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
778 frag->transport_header = (frag->network_header + hlen +
779 sizeof(struct frag_hdr));
1da177e4
LT
780
781 /*
782 * Charge the memory for the fragment to any owner
783 * it might possess
784 */
785 if (skb->sk)
786 skb_set_owner_w(frag, skb->sk);
787
788 /*
789 * Copy the packet header into the new buffer.
790 */
d626f62b 791 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
792
793 /*
794 * Build fragment header.
795 */
796 fh->nexthdr = nexthdr;
797 fh->reserved = 0;
286c2349 798 fh->identification = frag_id;
1da177e4
LT
799
800 /*
801 * Copy a block of the IP datagram.
802 */
e3f0b86b
HS
803 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
804 len));
1da177e4
LT
805 left -= len;
806
807 fh->frag_off = htons(offset);
808 if (left > 0)
809 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
810 ipv6_hdr(frag)->payload_len = htons(frag->len -
811 sizeof(struct ipv6hdr));
1da177e4
LT
812
813 ptr += len;
814 offset += len;
815
816 /*
817 * Put this fragment into the sending queue.
818 */
7026b1dd 819 err = output(sk, frag);
1da177e4
LT
820 if (err)
821 goto fail;
dafee490 822
adf30907 823 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 824 IPSTATS_MIB_FRAGCREATES);
1da177e4 825 }
adf30907 826 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 827 IPSTATS_MIB_FRAGOKS);
808db80a 828 consume_skb(skb);
1da177e4
LT
829 return err;
830
485fca66
FW
831fail_toobig:
832 if (skb->sk && dst_allfrag(skb_dst(skb)))
833 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
834
835 skb->dev = skb_dst(skb)->dev;
836 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
837 err = -EMSGSIZE;
838
1da177e4 839fail:
adf30907 840 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 841 IPSTATS_MIB_FRAGFAILS);
1ab1457c 842 kfree_skb(skb);
1da177e4
LT
843 return err;
844}
845
b71d1d42
ED
846static inline int ip6_rt_check(const struct rt6key *rt_key,
847 const struct in6_addr *fl_addr,
848 const struct in6_addr *addr_cache)
cf6b1982 849{
a02cec21 850 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 851 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
852}
853
497c615a
HX
854static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
855 struct dst_entry *dst,
b71d1d42 856 const struct flowi6 *fl6)
1da177e4 857{
497c615a 858 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 859 struct rt6_info *rt;
1da177e4 860
497c615a
HX
861 if (!dst)
862 goto out;
863
a963a37d
ED
864 if (dst->ops->family != AF_INET6) {
865 dst_release(dst);
866 return NULL;
867 }
868
869 rt = (struct rt6_info *)dst;
497c615a
HX
870 /* Yes, checking route validity in not connected
871 * case is not very simple. Take into account,
872 * that we do not support routing by source, TOS,
67ba4152 873 * and MSG_DONTROUTE --ANK (980726)
497c615a 874 *
cf6b1982
YH
875 * 1. ip6_rt_check(): If route was host route,
876 * check that cached destination is current.
497c615a
HX
877 * If it is network route, we still may
878 * check its validity using saved pointer
879 * to the last used address: daddr_cache.
880 * We do not want to save whole address now,
881 * (because main consumer of this service
882 * is tcp, which has not this problem),
883 * so that the last trick works only on connected
884 * sockets.
885 * 2. oif also should be the same.
886 */
4c9483b2 887 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 888#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 889 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 890#endif
4c9483b2 891 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
497c615a
HX
892 dst_release(dst);
893 dst = NULL;
1da177e4
LT
894 }
895
497c615a
HX
896out:
897 return dst;
898}
899
3aef934f 900static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
4c9483b2 901 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 902{
69cce1d1
DM
903#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
904 struct neighbour *n;
97cac082 905 struct rt6_info *rt;
69cce1d1
DM
906#endif
907 int err;
497c615a 908
e16e888b
MS
909 /* The correct way to handle this would be to do
910 * ip6_route_get_saddr, and then ip6_route_output; however,
911 * the route-specific preferred source forces the
912 * ip6_route_output call _before_ ip6_route_get_saddr.
913 *
914 * In source specific routing (no src=any default route),
915 * ip6_route_output will fail given src=any saddr, though, so
916 * that's why we try it again later.
917 */
918 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
919 struct rt6_info *rt;
920 bool had_dst = *dst != NULL;
1da177e4 921
e16e888b
MS
922 if (!had_dst)
923 *dst = ip6_route_output(net, sk, fl6);
924 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
c3968a85
DW
925 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
926 sk ? inet6_sk(sk)->srcprefs : 0,
927 &fl6->saddr);
44456d37 928 if (err)
1da177e4 929 goto out_err_release;
e16e888b
MS
930
931 /* If we had an erroneous initial result, pretend it
932 * never existed and let the SA-enabled version take
933 * over.
934 */
935 if (!had_dst && (*dst)->error) {
936 dst_release(*dst);
937 *dst = NULL;
938 }
1da177e4
LT
939 }
940
e16e888b
MS
941 if (!*dst)
942 *dst = ip6_route_output(net, sk, fl6);
943
944 err = (*dst)->error;
945 if (err)
946 goto out_err_release;
947
95c385b4 948#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
949 /*
950 * Here if the dst entry we've looked up
951 * has a neighbour entry that is in the INCOMPLETE
952 * state and the src address from the flow is
953 * marked as OPTIMISTIC, we release the found
954 * dst entry and replace it instead with the
955 * dst entry of the nexthop router
956 */
c56bf6fe 957 rt = (struct rt6_info *) *dst;
707be1ff 958 rcu_read_lock_bh();
2647a9b0
MKL
959 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
960 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
961 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
962 rcu_read_unlock_bh();
963
964 if (err) {
e550dfb0 965 struct inet6_ifaddr *ifp;
4c9483b2 966 struct flowi6 fl_gw6;
e550dfb0
NH
967 int redirect;
968
4c9483b2 969 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
970 (*dst)->dev, 1);
971
972 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
973 if (ifp)
974 in6_ifa_put(ifp);
975
976 if (redirect) {
977 /*
978 * We need to get the dst entry for the
979 * default router instead
980 */
981 dst_release(*dst);
4c9483b2
DM
982 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
983 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
984 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
985 err = (*dst)->error;
986 if (err)
e550dfb0 987 goto out_err_release;
95c385b4 988 }
e550dfb0 989 }
95c385b4
NH
990#endif
991
1da177e4
LT
992 return 0;
993
994out_err_release:
ca46f9c8 995 if (err == -ENETUNREACH)
5ac68e7c 996 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
997 dst_release(*dst);
998 *dst = NULL;
999 return err;
1000}
34a0b3cd 1001
497c615a
HX
1002/**
1003 * ip6_dst_lookup - perform route lookup on flow
1004 * @sk: socket which provides route info
1005 * @dst: pointer to dst_entry * for result
4c9483b2 1006 * @fl6: flow to lookup
497c615a
HX
1007 *
1008 * This function performs a route lookup on the given flow.
1009 *
1010 * It returns zero on success, or a standard errno code on error.
1011 */
343d60aa
RP
1012int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1013 struct flowi6 *fl6)
497c615a
HX
1014{
1015 *dst = NULL;
343d60aa 1016 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1017}
3cf3dc6c
ACM
1018EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1019
497c615a 1020/**
68d0c6d3
DM
1021 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1022 * @sk: socket which provides route info
4c9483b2 1023 * @fl6: flow to lookup
68d0c6d3 1024 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1025 *
1026 * This function performs a route lookup on the given flow.
1027 *
1028 * It returns a valid dst pointer on success, or a pointer encoded
1029 * error code.
1030 */
3aef934f 1031struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1032 const struct in6_addr *final_dst)
68d0c6d3
DM
1033{
1034 struct dst_entry *dst = NULL;
1035 int err;
1036
343d60aa 1037 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1038 if (err)
1039 return ERR_PTR(err);
1040 if (final_dst)
4e3fd7a0 1041 fl6->daddr = *final_dst;
a0a9f33b
PS
1042 if (!fl6->flowi6_oif)
1043 fl6->flowi6_oif = dst->dev->ifindex;
2774c131 1044
f92ee619 1045 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1046}
1047EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1048
1049/**
1050 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1051 * @sk: socket which provides the dst cache and route info
4c9483b2 1052 * @fl6: flow to lookup
68d0c6d3 1053 * @final_dst: final destination address for ipsec lookup
497c615a
HX
1054 *
1055 * This function performs a route lookup on the given flow with the
1056 * possibility of using the cached route in the socket if it is valid.
1057 * It will take the socket dst lock when operating on the dst cache.
1058 * As a result, this function can only be used in process context.
1059 *
68d0c6d3
DM
1060 * It returns a valid dst pointer on success, or a pointer encoded
1061 * error code.
497c615a 1062 */
4c9483b2 1063struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1064 const struct in6_addr *final_dst)
497c615a 1065{
68d0c6d3
DM
1066 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1067 int err;
497c615a 1068
4c9483b2 1069 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 1070
343d60aa 1071 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1072 if (err)
1073 return ERR_PTR(err);
1074 if (final_dst)
4e3fd7a0 1075 fl6->daddr = *final_dst;
2774c131 1076
f92ee619 1077 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 1078}
68d0c6d3 1079EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1080
34a0b3cd 1081static inline int ip6_ufo_append_data(struct sock *sk,
0bbe84a6 1082 struct sk_buff_head *queue,
e89e9cf5
AR
1083 int getfrag(void *from, char *to, int offset, int len,
1084 int odd, struct sk_buff *skb),
1085 void *from, int length, int hh_len, int fragheaderlen,
67ba4152 1086 int transhdrlen, int mtu, unsigned int flags,
fd0273d7 1087 const struct flowi6 *fl6)
e89e9cf5
AR
1088
1089{
1090 struct sk_buff *skb;
1091 int err;
1092
1093 /* There is support for UDP large send offload by network
1094 * device, so create one single skb packet containing complete
1095 * udp datagram
1096 */
0bbe84a6 1097 skb = skb_peek_tail(queue);
63159f29 1098 if (!skb) {
e89e9cf5
AR
1099 skb = sock_alloc_send_skb(sk,
1100 hh_len + fragheaderlen + transhdrlen + 20,
1101 (flags & MSG_DONTWAIT), &err);
63159f29 1102 if (!skb)
504744e4 1103 return err;
e89e9cf5
AR
1104
1105 /* reserve space for Hardware header */
1106 skb_reserve(skb, hh_len);
1107
1108 /* create space for UDP/IP header */
67ba4152 1109 skb_put(skb, fragheaderlen + transhdrlen);
e89e9cf5
AR
1110
1111 /* initialize network header pointer */
c1d2bbe1 1112 skb_reset_network_header(skb);
e89e9cf5
AR
1113
1114 /* initialize protocol header pointer */
b0e380b1 1115 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1116
9c9c9ad5 1117 skb->protocol = htons(ETH_P_IPV6);
e89e9cf5 1118 skb->csum = 0;
e89e9cf5 1119
0bbe84a6 1120 __skb_queue_tail(queue, skb);
c547dbf5
JP
1121 } else if (skb_is_gso(skb)) {
1122 goto append;
e89e9cf5 1123 }
e89e9cf5 1124
c547dbf5
JP
1125 skb->ip_summed = CHECKSUM_PARTIAL;
1126 /* Specify the length of each IPv6 datagram fragment.
1127 * It has to be a multiple of 8.
1128 */
1129 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1130 sizeof(struct frag_hdr)) & ~7;
1131 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
fd0273d7
MKL
1132 skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1133 &fl6->daddr,
1134 &fl6->saddr);
c547dbf5
JP
1135
1136append:
2811ebac
HFS
1137 return skb_append_datato_frags(sk, skb, getfrag, from,
1138 (length - transhdrlen));
e89e9cf5 1139}
1da177e4 1140
0178b695
HX
1141static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1142 gfp_t gfp)
1143{
1144 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1145}
1146
1147static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1148 gfp_t gfp)
1149{
1150 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1151}
1152
75a493e6 1153static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1154 int *maxfraglen,
1155 unsigned int fragheaderlen,
1156 struct sk_buff *skb,
75a493e6 1157 struct rt6_info *rt,
e367c2d0 1158 unsigned int orig_mtu)
0c183379
G
1159{
1160 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1161 if (!skb) {
0c183379 1162 /* first fragment, reserve header_len */
e367c2d0 1163 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1164
1165 } else {
1166 /*
1167 * this fragment is not first, the headers
1168 * space is regarded as data space.
1169 */
e367c2d0 1170 *mtu = orig_mtu;
0c183379
G
1171 }
1172 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1173 + fragheaderlen - sizeof(struct frag_hdr);
1174 }
1175}
1176
366e41d9
VY
1177static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1178 struct inet6_cork *v6_cork,
1179 int hlimit, int tclass, struct ipv6_txoptions *opt,
1180 struct rt6_info *rt, struct flowi6 *fl6)
1181{
1182 struct ipv6_pinfo *np = inet6_sk(sk);
1183 unsigned int mtu;
1184
1185 /*
1186 * setup for corking
1187 */
1188 if (opt) {
1189 if (WARN_ON(v6_cork->opt))
1190 return -EINVAL;
1191
1192 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
63159f29 1193 if (unlikely(!v6_cork->opt))
366e41d9
VY
1194 return -ENOBUFS;
1195
1196 v6_cork->opt->tot_len = opt->tot_len;
1197 v6_cork->opt->opt_flen = opt->opt_flen;
1198 v6_cork->opt->opt_nflen = opt->opt_nflen;
1199
1200 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1201 sk->sk_allocation);
1202 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1203 return -ENOBUFS;
1204
1205 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1206 sk->sk_allocation);
1207 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1208 return -ENOBUFS;
1209
1210 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1211 sk->sk_allocation);
1212 if (opt->hopopt && !v6_cork->opt->hopopt)
1213 return -ENOBUFS;
1214
1215 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1216 sk->sk_allocation);
1217 if (opt->srcrt && !v6_cork->opt->srcrt)
1218 return -ENOBUFS;
1219
1220 /* need source address above miyazawa*/
1221 }
1222 dst_hold(&rt->dst);
1223 cork->base.dst = &rt->dst;
1224 cork->fl.u.ip6 = *fl6;
1225 v6_cork->hop_limit = hlimit;
1226 v6_cork->tclass = tclass;
1227 if (rt->dst.flags & DST_XFRM_TUNNEL)
1228 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1229 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1230 else
1231 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1232 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1233 if (np->frag_size < mtu) {
1234 if (np->frag_size)
1235 mtu = np->frag_size;
1236 }
1237 cork->base.fragsize = mtu;
1238 if (dst_allfrag(rt->dst.path))
1239 cork->base.flags |= IPCORK_ALLFRAG;
1240 cork->base.length = 0;
1241
1242 return 0;
1243}
1244
0bbe84a6
VY
1245static int __ip6_append_data(struct sock *sk,
1246 struct flowi6 *fl6,
1247 struct sk_buff_head *queue,
1248 struct inet_cork *cork,
1249 struct inet6_cork *v6_cork,
1250 struct page_frag *pfrag,
1251 int getfrag(void *from, char *to, int offset,
1252 int len, int odd, struct sk_buff *skb),
1253 void *from, int length, int transhdrlen,
1254 unsigned int flags, int dontfrag)
1da177e4 1255{
0c183379 1256 struct sk_buff *skb, *skb_prev = NULL;
e367c2d0 1257 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
0bbe84a6
VY
1258 int exthdrlen = 0;
1259 int dst_exthdrlen = 0;
1da177e4 1260 int hh_len;
1da177e4
LT
1261 int copy;
1262 int err;
1263 int offset = 0;
a693e698 1264 __u8 tx_flags = 0;
09c2d251 1265 u32 tskey = 0;
0bbe84a6
VY
1266 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1267 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1268 int csummode = CHECKSUM_NONE;
1da177e4 1269
0bbe84a6
VY
1270 skb = skb_peek_tail(queue);
1271 if (!skb) {
1272 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1273 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1274 }
0bbe84a6 1275
366e41d9 1276 mtu = cork->fragsize;
e367c2d0 1277 orig_mtu = mtu;
1da177e4 1278
d8d1f30b 1279 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1280
a1b05140 1281 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1282 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1283 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1284 sizeof(struct frag_hdr);
1da177e4
LT
1285
1286 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
4df98e76
HFS
1287 unsigned int maxnonfragsize, headersize;
1288
1289 headersize = sizeof(struct ipv6hdr) +
3a1cebe7 1290 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
4df98e76
HFS
1291 (dst_allfrag(&rt->dst) ?
1292 sizeof(struct frag_hdr) : 0) +
1293 rt->rt6i_nfheader_len;
1294
60ff7467 1295 if (ip6_sk_ignore_df(sk))
0b95227a
HFS
1296 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1297 else
1298 maxnonfragsize = mtu;
4df98e76
HFS
1299
1300 /* dontfrag active */
1301 if ((cork->length + length > mtu - headersize) && dontfrag &&
1302 (sk->sk_protocol == IPPROTO_UDP ||
1303 sk->sk_protocol == IPPROTO_RAW)) {
1304 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1305 sizeof(struct ipv6hdr));
1306 goto emsgsize;
1307 }
1308
1309 if (cork->length + length > maxnonfragsize - headersize) {
1310emsgsize:
1311 ipv6_local_error(sk, EMSGSIZE, fl6,
1312 mtu - headersize +
1313 sizeof(struct ipv6hdr));
1da177e4
LT
1314 return -EMSGSIZE;
1315 }
1316 }
1317
09c2d251 1318 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
bf84a010 1319 sock_tx_timestamp(sk, &tx_flags);
09c2d251
WB
1320 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1321 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1322 tskey = sk->sk_tskey++;
1323 }
a693e698 1324
32dce968
VY
1325 /* If this is the first and only packet and device
1326 * supports checksum offloading, let's use it.
e87a468e
VY
1327 * Use transhdrlen, same as IPv4, because partial
1328 * sums only work when transhdrlen is set.
32dce968 1329 */
e87a468e 1330 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
32dce968
VY
1331 length + fragheaderlen < mtu &&
1332 rt->dst.dev->features & NETIF_F_V6_CSUM &&
1333 !exthdrlen)
1334 csummode = CHECKSUM_PARTIAL;
1da177e4
LT
1335 /*
1336 * Let's try using as much space as possible.
1337 * Use MTU if total length of the message fits into the MTU.
1338 * Otherwise, we need to reserve fragment header and
1339 * fragment alignment (= 8-15 octects, in total).
1340 *
1341 * Note that we may need to "move" the data from the tail of
1ab1457c 1342 * of the buffer to the new fragment when we split
1da177e4
LT
1343 * the message.
1344 *
1ab1457c 1345 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1346 * at once if non-fragmentable extension headers
1347 * are too large.
1ab1457c 1348 * --yoshfuji
1da177e4
LT
1349 */
1350
2811ebac
HFS
1351 cork->length += length;
1352 if (((length > mtu) ||
1353 (skb && skb_is_gso(skb))) &&
1354 (sk->sk_protocol == IPPROTO_UDP) &&
acf8dd0a
MK
1355 (rt->dst.dev->features & NETIF_F_UFO) &&
1356 (sk->sk_type == SOCK_DGRAM)) {
0bbe84a6 1357 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
2811ebac 1358 hh_len, fragheaderlen,
fd0273d7 1359 transhdrlen, mtu, flags, fl6);
2811ebac
HFS
1360 if (err)
1361 goto error;
1362 return 0;
e89e9cf5 1363 }
1da177e4 1364
2811ebac 1365 if (!skb)
1da177e4
LT
1366 goto alloc_new_skb;
1367
1368 while (length > 0) {
1369 /* Check if the remaining data fits into current packet. */
bdc712b4 1370 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1371 if (copy < length)
1372 copy = maxfraglen - skb->len;
1373
1374 if (copy <= 0) {
1375 char *data;
1376 unsigned int datalen;
1377 unsigned int fraglen;
1378 unsigned int fraggap;
1379 unsigned int alloclen;
1da177e4 1380alloc_new_skb:
1da177e4 1381 /* There's no room in the current skb */
0c183379
G
1382 if (skb)
1383 fraggap = skb->len - maxfraglen;
1da177e4
LT
1384 else
1385 fraggap = 0;
0c183379 1386 /* update mtu and maxfraglen if necessary */
63159f29 1387 if (!skb || !skb_prev)
0c183379 1388 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1389 fragheaderlen, skb, rt,
e367c2d0 1390 orig_mtu);
0c183379
G
1391
1392 skb_prev = skb;
1da177e4
LT
1393
1394 /*
1395 * If remaining data exceeds the mtu,
1396 * we know we need more fragment(s).
1397 */
1398 datalen = length + fraggap;
1da177e4 1399
0c183379
G
1400 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1401 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1402 if ((flags & MSG_MORE) &&
d8d1f30b 1403 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1404 alloclen = mtu;
1405 else
1406 alloclen = datalen + fragheaderlen;
1407
299b0767
SK
1408 alloclen += dst_exthdrlen;
1409
0c183379
G
1410 if (datalen != length + fraggap) {
1411 /*
1412 * this is not the last fragment, the trailer
1413 * space is regarded as data space.
1414 */
1415 datalen += rt->dst.trailer_len;
1416 }
1417
1418 alloclen += rt->dst.trailer_len;
1419 fraglen = datalen + fragheaderlen;
1da177e4
LT
1420
1421 /*
1422 * We just reserve space for fragment header.
1ab1457c 1423 * Note: this may be overallocation if the message
1da177e4
LT
1424 * (without MSG_MORE) fits into the MTU.
1425 */
1426 alloclen += sizeof(struct frag_hdr);
1427
1428 if (transhdrlen) {
1429 skb = sock_alloc_send_skb(sk,
1430 alloclen + hh_len,
1431 (flags & MSG_DONTWAIT), &err);
1432 } else {
1433 skb = NULL;
1434 if (atomic_read(&sk->sk_wmem_alloc) <=
1435 2 * sk->sk_sndbuf)
1436 skb = sock_wmalloc(sk,
1437 alloclen + hh_len, 1,
1438 sk->sk_allocation);
63159f29 1439 if (unlikely(!skb))
1da177e4
LT
1440 err = -ENOBUFS;
1441 }
63159f29 1442 if (!skb)
1da177e4
LT
1443 goto error;
1444 /*
1445 * Fill in the control structures
1446 */
9c9c9ad5 1447 skb->protocol = htons(ETH_P_IPV6);
32dce968 1448 skb->ip_summed = csummode;
1da177e4 1449 skb->csum = 0;
1f85851e
G
1450 /* reserve for fragmentation and ipsec header */
1451 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1452 dst_exthdrlen);
1da177e4 1453
11878b40
WB
1454 /* Only the initial fragment is time stamped */
1455 skb_shinfo(skb)->tx_flags = tx_flags;
1456 tx_flags = 0;
09c2d251
WB
1457 skb_shinfo(skb)->tskey = tskey;
1458 tskey = 0;
a693e698 1459
1da177e4
LT
1460 /*
1461 * Find where to start putting bytes
1462 */
1f85851e
G
1463 data = skb_put(skb, fraglen);
1464 skb_set_network_header(skb, exthdrlen);
1465 data += fragheaderlen;
b0e380b1
ACM
1466 skb->transport_header = (skb->network_header +
1467 fragheaderlen);
1da177e4
LT
1468 if (fraggap) {
1469 skb->csum = skb_copy_and_csum_bits(
1470 skb_prev, maxfraglen,
1471 data + transhdrlen, fraggap, 0);
1472 skb_prev->csum = csum_sub(skb_prev->csum,
1473 skb->csum);
1474 data += fraggap;
e9fa4f7b 1475 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1476 }
1477 copy = datalen - transhdrlen - fraggap;
299b0767 1478
1da177e4
LT
1479 if (copy < 0) {
1480 err = -EINVAL;
1481 kfree_skb(skb);
1482 goto error;
1483 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1484 err = -EFAULT;
1485 kfree_skb(skb);
1486 goto error;
1487 }
1488
1489 offset += copy;
1490 length -= datalen - fraggap;
1491 transhdrlen = 0;
1492 exthdrlen = 0;
299b0767 1493 dst_exthdrlen = 0;
1da177e4
LT
1494
1495 /*
1496 * Put the packet on the pending queue
1497 */
0bbe84a6 1498 __skb_queue_tail(queue, skb);
1da177e4
LT
1499 continue;
1500 }
1501
1502 if (copy > length)
1503 copy = length;
1504
d8d1f30b 1505 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1506 unsigned int off;
1507
1508 off = skb->len;
1509 if (getfrag(from, skb_put(skb, copy),
1510 offset, copy, off, skb) < 0) {
1511 __skb_trim(skb, off);
1512 err = -EFAULT;
1513 goto error;
1514 }
1515 } else {
1516 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1517
5640f768
ED
1518 err = -ENOMEM;
1519 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1520 goto error;
5640f768
ED
1521
1522 if (!skb_can_coalesce(skb, i, pfrag->page,
1523 pfrag->offset)) {
1524 err = -EMSGSIZE;
1525 if (i == MAX_SKB_FRAGS)
1526 goto error;
1527
1528 __skb_fill_page_desc(skb, i, pfrag->page,
1529 pfrag->offset, 0);
1530 skb_shinfo(skb)->nr_frags = ++i;
1531 get_page(pfrag->page);
1da177e4 1532 }
5640f768 1533 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1534 if (getfrag(from,
5640f768
ED
1535 page_address(pfrag->page) + pfrag->offset,
1536 offset, copy, skb->len, skb) < 0)
1537 goto error_efault;
1538
1539 pfrag->offset += copy;
1540 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1541 skb->len += copy;
1542 skb->data_len += copy;
f945fa7a
HX
1543 skb->truesize += copy;
1544 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1545 }
1546 offset += copy;
1547 length -= copy;
1548 }
5640f768 1549
1da177e4 1550 return 0;
5640f768
ED
1551
1552error_efault:
1553 err = -EFAULT;
1da177e4 1554error:
bdc712b4 1555 cork->length -= length;
3bd653c8 1556 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1557 return err;
1558}
0bbe84a6
VY
1559
1560int ip6_append_data(struct sock *sk,
1561 int getfrag(void *from, char *to, int offset, int len,
1562 int odd, struct sk_buff *skb),
1563 void *from, int length, int transhdrlen, int hlimit,
1564 int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1565 struct rt6_info *rt, unsigned int flags, int dontfrag)
1566{
1567 struct inet_sock *inet = inet_sk(sk);
1568 struct ipv6_pinfo *np = inet6_sk(sk);
1569 int exthdrlen;
1570 int err;
1571
1572 if (flags&MSG_PROBE)
1573 return 0;
1574 if (skb_queue_empty(&sk->sk_write_queue)) {
1575 /*
1576 * setup for corking
1577 */
1578 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
1579 tclass, opt, rt, fl6);
1580 if (err)
1581 return err;
1582
1583 exthdrlen = (opt ? opt->opt_flen : 0);
1584 length += exthdrlen;
1585 transhdrlen += exthdrlen;
1586 } else {
1587 fl6 = &inet->cork.fl.u.ip6;
1588 transhdrlen = 0;
1589 }
1590
1591 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1592 &np->cork, sk_page_frag(sk), getfrag,
1593 from, length, transhdrlen, flags, dontfrag);
1594}
a495f836 1595EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1596
366e41d9
VY
1597static void ip6_cork_release(struct inet_cork_full *cork,
1598 struct inet6_cork *v6_cork)
bf138862 1599{
366e41d9
VY
1600 if (v6_cork->opt) {
1601 kfree(v6_cork->opt->dst0opt);
1602 kfree(v6_cork->opt->dst1opt);
1603 kfree(v6_cork->opt->hopopt);
1604 kfree(v6_cork->opt->srcrt);
1605 kfree(v6_cork->opt);
1606 v6_cork->opt = NULL;
0178b695
HX
1607 }
1608
366e41d9
VY
1609 if (cork->base.dst) {
1610 dst_release(cork->base.dst);
1611 cork->base.dst = NULL;
1612 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1613 }
366e41d9 1614 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1615}
1616
6422398c
VY
1617struct sk_buff *__ip6_make_skb(struct sock *sk,
1618 struct sk_buff_head *queue,
1619 struct inet_cork_full *cork,
1620 struct inet6_cork *v6_cork)
1da177e4
LT
1621{
1622 struct sk_buff *skb, *tmp_skb;
1623 struct sk_buff **tail_skb;
1624 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1625 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1626 struct net *net = sock_net(sk);
1da177e4 1627 struct ipv6hdr *hdr;
6422398c
VY
1628 struct ipv6_txoptions *opt = v6_cork->opt;
1629 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1630 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1631 unsigned char proto = fl6->flowi6_proto;
1da177e4 1632
6422398c 1633 skb = __skb_dequeue(queue);
63159f29 1634 if (!skb)
1da177e4
LT
1635 goto out;
1636 tail_skb = &(skb_shinfo(skb)->frag_list);
1637
1638 /* move skb->data to ip header from ext header */
d56f90a7 1639 if (skb->data < skb_network_header(skb))
bbe735e4 1640 __skb_pull(skb, skb_network_offset(skb));
6422398c 1641 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1642 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1643 *tail_skb = tmp_skb;
1644 tail_skb = &(tmp_skb->next);
1645 skb->len += tmp_skb->len;
1646 skb->data_len += tmp_skb->len;
1da177e4 1647 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1648 tmp_skb->destructor = NULL;
1649 tmp_skb->sk = NULL;
1da177e4
LT
1650 }
1651
28a89453 1652 /* Allow local fragmentation. */
60ff7467 1653 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1654
4e3fd7a0 1655 *final_dst = fl6->daddr;
cfe1fc77 1656 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1657 if (opt && opt->opt_flen)
1658 ipv6_push_frag_opts(skb, opt, &proto);
1659 if (opt && opt->opt_nflen)
1660 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1661
e2d1bca7
ACM
1662 skb_push(skb, sizeof(struct ipv6hdr));
1663 skb_reset_network_header(skb);
0660e03f 1664 hdr = ipv6_hdr(skb);
1ab1457c 1665
6422398c 1666 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1667 ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 1668 np->autoflowlabel, fl6));
6422398c 1669 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1670 hdr->nexthdr = proto;
4e3fd7a0
AD
1671 hdr->saddr = fl6->saddr;
1672 hdr->daddr = *final_dst;
1da177e4 1673
a2c2064f 1674 skb->priority = sk->sk_priority;
4a19ec58 1675 skb->mark = sk->sk_mark;
a2c2064f 1676
d8d1f30b 1677 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1678 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1679 if (proto == IPPROTO_ICMPV6) {
adf30907 1680 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1681
43a43b60
HFS
1682 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1683 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1684 }
1685
6422398c
VY
1686 ip6_cork_release(cork, v6_cork);
1687out:
1688 return skb;
1689}
1690
1691int ip6_send_skb(struct sk_buff *skb)
1692{
1693 struct net *net = sock_net(skb->sk);
1694 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1695 int err;
1696
ef76bc23 1697 err = ip6_local_out(skb);
1da177e4
LT
1698 if (err) {
1699 if (err > 0)
6ce9e7b5 1700 err = net_xmit_errno(err);
1da177e4 1701 if (err)
6422398c
VY
1702 IP6_INC_STATS(net, rt->rt6i_idev,
1703 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1704 }
1705
1da177e4 1706 return err;
6422398c
VY
1707}
1708
1709int ip6_push_pending_frames(struct sock *sk)
1710{
1711 struct sk_buff *skb;
1712
1713 skb = ip6_finish_skb(sk);
1714 if (!skb)
1715 return 0;
1716
1717 return ip6_send_skb(skb);
1da177e4 1718}
a495f836 1719EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1720
0bbe84a6 1721static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1722 struct sk_buff_head *queue,
1723 struct inet_cork_full *cork,
1724 struct inet6_cork *v6_cork)
1da177e4 1725{
1da177e4
LT
1726 struct sk_buff *skb;
1727
0bbe84a6 1728 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1729 if (skb_dst(skb))
1730 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1731 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1732 kfree_skb(skb);
1733 }
1734
6422398c 1735 ip6_cork_release(cork, v6_cork);
1da177e4 1736}
0bbe84a6
VY
1737
1738void ip6_flush_pending_frames(struct sock *sk)
1739{
6422398c
VY
1740 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1741 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1742}
a495f836 1743EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1744
1745struct sk_buff *ip6_make_skb(struct sock *sk,
1746 int getfrag(void *from, char *to, int offset,
1747 int len, int odd, struct sk_buff *skb),
1748 void *from, int length, int transhdrlen,
1749 int hlimit, int tclass,
1750 struct ipv6_txoptions *opt, struct flowi6 *fl6,
1751 struct rt6_info *rt, unsigned int flags,
1752 int dontfrag)
1753{
1754 struct inet_cork_full cork;
1755 struct inet6_cork v6_cork;
1756 struct sk_buff_head queue;
1757 int exthdrlen = (opt ? opt->opt_flen : 0);
1758 int err;
1759
1760 if (flags & MSG_PROBE)
1761 return NULL;
1762
1763 __skb_queue_head_init(&queue);
1764
1765 cork.base.flags = 0;
1766 cork.base.addr = 0;
1767 cork.base.opt = NULL;
1768 v6_cork.opt = NULL;
1769 err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
1770 if (err)
1771 return ERR_PTR(err);
1772
1773 if (dontfrag < 0)
1774 dontfrag = inet6_sk(sk)->dontfrag;
1775
1776 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1777 &current->task_frag, getfrag, from,
1778 length + exthdrlen, transhdrlen + exthdrlen,
1779 flags, dontfrag);
1780 if (err) {
1781 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1782 return ERR_PTR(err);
1783 }
1784
1785 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1786}
This page took 1.331542 seconds and 4 git commands to generate.