]> Git Repo - linux.git/blame - net/ipv6/ip6_output.c
ipv4: add defensive check for CHECKSUM_PARTIAL skbs in ip_fragment
[linux.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <[email protected]>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
ca254490 58#include <net/l3mdev.h>
1da177e4 59
7d8c6e39 60static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 61{
adf30907 62 struct dst_entry *dst = skb_dst(skb);
1da177e4 63 struct net_device *dev = dst->dev;
f6b72b62 64 struct neighbour *neigh;
6fd6ce20
YH
65 struct in6_addr *nexthop;
66 int ret;
1da177e4
LT
67
68 skb->protocol = htons(ETH_P_IPV6);
69 skb->dev = dev;
70
0660e03f 71 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 72 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 73
7026b1dd 74 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
78126c41 75 ((mroute6_socket(net, skb) &&
bd91b8bf 76 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
77 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
78 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
79 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
80
81 /* Do not check for IFF_ALLMULTI; multicast routing
82 is not supported in any case.
83 */
84 if (newskb)
b2e0b385 85 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 86 net, sk, newskb, NULL, newskb->dev,
95603e22 87 dev_loopback_xmit);
1da177e4 88
0660e03f 89 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 90 IP6_INC_STATS(net, idev,
3bd653c8 91 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
92 kfree_skb(skb);
93 return 0;
94 }
95 }
96
78126c41 97 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
98
99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100 IPV6_ADDR_SCOPE_NODELOCAL &&
101 !(dev->flags & IFF_LOOPBACK)) {
102 kfree_skb(skb);
103 return 0;
104 }
1da177e4
LT
105 }
106
6fd6ce20 107 rcu_read_lock_bh();
2647a9b0 108 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110 if (unlikely(!neigh))
111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112 if (!IS_ERR(neigh)) {
113 ret = dst_neigh_output(dst, neigh, skb);
114 rcu_read_unlock_bh();
115 return ret;
116 }
117 rcu_read_unlock_bh();
05e3aa09 118
78126c41 119 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
120 kfree_skb(skb);
121 return -EINVAL;
1da177e4
LT
122}
123
0c4b51f0 124static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490
JE
125{
126 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
127 dst_allfrag(skb_dst(skb)) ||
128 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7d8c6e39 129 return ip6_fragment(net, sk, skb, ip6_finish_output2);
9e508490 130 else
7d8c6e39 131 return ip6_finish_output2(net, sk, skb);
9e508490
JE
132}
133
ede2059d 134int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 135{
9e508490 136 struct net_device *dev = skb_dst(skb)->dev;
adf30907 137 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
be10de0a 138
778d80be 139 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 140 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
141 kfree_skb(skb);
142 return 0;
143 }
144
29a26a56
EB
145 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
146 net, sk, skb, NULL, dev,
9c6eb28a
JE
147 ip6_finish_output,
148 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
149}
150
1da177e4 151/*
1c1e9d2b
ED
152 * xmit an sk_buff (used by TCP, SCTP and DCCP)
153 * Note : socket lock is not held for SYNACK packets, but might be modified
154 * by calls to skb_set_owner_w() and ipv6_local_error(),
155 * which are using proper atomic operations or spinlocks.
1da177e4 156 */
1c1e9d2b 157int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 158 struct ipv6_txoptions *opt, int tclass)
1da177e4 159{
3bd653c8 160 struct net *net = sock_net(sk);
1c1e9d2b 161 const struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 162 struct in6_addr *first_hop = &fl6->daddr;
adf30907 163 struct dst_entry *dst = skb_dst(skb);
1da177e4 164 struct ipv6hdr *hdr;
4c9483b2 165 u8 proto = fl6->flowi6_proto;
1da177e4 166 int seg_len = skb->len;
e651f03a 167 int hlimit = -1;
1da177e4
LT
168 u32 mtu;
169
170 if (opt) {
c2636b4d 171 unsigned int head_room;
1da177e4
LT
172
173 /* First: exthdrs may take lots of space (~8K for now)
174 MAX_HEADER is not enough.
175 */
176 head_room = opt->opt_nflen + opt->opt_flen;
177 seg_len += head_room;
178 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
179
180 if (skb_headroom(skb) < head_room) {
181 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
63159f29 182 if (!skb2) {
adf30907 183 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
184 IPSTATS_MIB_OUTDISCARDS);
185 kfree_skb(skb);
1da177e4
LT
186 return -ENOBUFS;
187 }
808db80a 188 consume_skb(skb);
a11d206d 189 skb = skb2;
1c1e9d2b
ED
190 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
191 * it is safe to call in our context (socket lock not held)
192 */
193 skb_set_owner_w(skb, (struct sock *)sk);
1da177e4
LT
194 }
195 if (opt->opt_flen)
196 ipv6_push_frag_opts(skb, opt, &proto);
197 if (opt->opt_nflen)
198 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
199 }
200
e2d1bca7
ACM
201 skb_push(skb, sizeof(struct ipv6hdr));
202 skb_reset_network_header(skb);
0660e03f 203 hdr = ipv6_hdr(skb);
1da177e4
LT
204
205 /*
206 * Fill in the IPv6 header
207 */
b903d324 208 if (np)
1da177e4
LT
209 hlimit = np->hop_limit;
210 if (hlimit < 0)
6b75d090 211 hlimit = ip6_dst_hoplimit(dst);
1da177e4 212
cb1ce2ef 213 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 214 np->autoflowlabel, fl6));
41a1f8ea 215
1da177e4
LT
216 hdr->payload_len = htons(seg_len);
217 hdr->nexthdr = proto;
218 hdr->hop_limit = hlimit;
219
4e3fd7a0
AD
220 hdr->saddr = fl6->saddr;
221 hdr->daddr = *first_hop;
1da177e4 222
9c9c9ad5 223 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 224 skb->priority = sk->sk_priority;
4a19ec58 225 skb->mark = sk->sk_mark;
a2c2064f 226
1da177e4 227 mtu = dst_mtu(dst);
60ff7467 228 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 229 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 230 IPSTATS_MIB_OUT, skb->len);
1c1e9d2b
ED
231 /* hooks should never assume socket lock is held.
232 * we promote our socket to non const
233 */
29a26a56 234 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1c1e9d2b 235 net, (struct sock *)sk, skb, NULL, dst->dev,
13206b6b 236 dst_output);
1da177e4
LT
237 }
238
1da177e4 239 skb->dev = dst->dev;
1c1e9d2b
ED
240 /* ipv6_local_error() does not require socket lock,
241 * we promote our socket to non const
242 */
243 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
244
adf30907 245 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
246 kfree_skb(skb);
247 return -EMSGSIZE;
248}
7159039a
YH
249EXPORT_SYMBOL(ip6_xmit);
250
1da177e4
LT
251static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
252{
253 struct ip6_ra_chain *ra;
254 struct sock *last = NULL;
255
256 read_lock(&ip6_ra_lock);
257 for (ra = ip6_ra_chain; ra; ra = ra->next) {
258 struct sock *sk = ra->sk;
0bd1b59b
AM
259 if (sk && ra->sel == sel &&
260 (!sk->sk_bound_dev_if ||
261 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
262 if (last) {
263 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
264 if (skb2)
265 rawv6_rcv(last, skb2);
266 }
267 last = sk;
268 }
269 }
270
271 if (last) {
272 rawv6_rcv(last, skb);
273 read_unlock(&ip6_ra_lock);
274 return 1;
275 }
276 read_unlock(&ip6_ra_lock);
277 return 0;
278}
279
e21e0b5f
VN
280static int ip6_forward_proxy_check(struct sk_buff *skb)
281{
0660e03f 282 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 283 u8 nexthdr = hdr->nexthdr;
75f2811c 284 __be16 frag_off;
e21e0b5f
VN
285 int offset;
286
287 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 288 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
289 if (offset < 0)
290 return 0;
291 } else
292 offset = sizeof(struct ipv6hdr);
293
294 if (nexthdr == IPPROTO_ICMPV6) {
295 struct icmp6hdr *icmp6;
296
d56f90a7
ACM
297 if (!pskb_may_pull(skb, (skb_network_header(skb) +
298 offset + 1 - skb->data)))
e21e0b5f
VN
299 return 0;
300
d56f90a7 301 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
302
303 switch (icmp6->icmp6_type) {
304 case NDISC_ROUTER_SOLICITATION:
305 case NDISC_ROUTER_ADVERTISEMENT:
306 case NDISC_NEIGHBOUR_SOLICITATION:
307 case NDISC_NEIGHBOUR_ADVERTISEMENT:
308 case NDISC_REDIRECT:
309 /* For reaction involving unicast neighbor discovery
310 * message destined to the proxied address, pass it to
311 * input function.
312 */
313 return 1;
314 default:
315 break;
316 }
317 }
318
74553b09
VN
319 /*
320 * The proxying router can't forward traffic sent to a link-local
321 * address, so signal the sender and discard the packet. This
322 * behavior is clarified by the MIPv6 specification.
323 */
324 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
325 dst_link_failure(skb);
326 return -1;
327 }
328
e21e0b5f
VN
329 return 0;
330}
331
0c4b51f0
EB
332static inline int ip6_forward_finish(struct net *net, struct sock *sk,
333 struct sk_buff *skb)
1da177e4 334{
c29390c6 335 skb_sender_cpu_clear(skb);
13206b6b 336 return dst_output(net, sk, skb);
1da177e4
LT
337}
338
0954cf9c
HFS
339static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
340{
341 unsigned int mtu;
342 struct inet6_dev *idev;
343
344 if (dst_metric_locked(dst, RTAX_MTU)) {
345 mtu = dst_metric_raw(dst, RTAX_MTU);
346 if (mtu)
347 return mtu;
348 }
349
350 mtu = IPV6_MIN_MTU;
351 rcu_read_lock();
352 idev = __in6_dev_get(dst->dev);
353 if (idev)
354 mtu = idev->cnf.mtu6;
355 rcu_read_unlock();
356
357 return mtu;
358}
359
fe6cc55f
FW
360static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
361{
418a3156 362 if (skb->len <= mtu)
fe6cc55f
FW
363 return false;
364
60ff7467 365 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
366 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
367 return true;
368
60ff7467 369 if (skb->ignore_df)
418a3156
FW
370 return false;
371
fe6cc55f
FW
372 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
373 return false;
374
375 return true;
376}
377
1da177e4
LT
378int ip6_forward(struct sk_buff *skb)
379{
adf30907 380 struct dst_entry *dst = skb_dst(skb);
0660e03f 381 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 382 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 383 struct net *net = dev_net(dst->dev);
14f3ad6f 384 u32 mtu;
1ab1457c 385
53b7997f 386 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
387 goto error;
388
090f1166
LR
389 if (skb->pkt_type != PACKET_HOST)
390 goto drop;
391
9ef2e965
HFS
392 if (unlikely(skb->sk))
393 goto drop;
394
4497b076
BH
395 if (skb_warn_if_lro(skb))
396 goto drop;
397
1da177e4 398 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
15c77d8b
ED
399 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
400 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
401 goto drop;
402 }
403
35fc92a9 404 skb_forward_csum(skb);
1da177e4
LT
405
406 /*
407 * We DO NOT make any processing on
408 * RA packets, pushing them to user level AS IS
409 * without ane WARRANTY that application will be able
410 * to interpret them. The reason is that we
411 * cannot make anything clever here.
412 *
413 * We are not end-node, so that if packet contains
414 * AH/ESP, we cannot make anything.
415 * Defragmentation also would be mistake, RA packets
416 * cannot be fragmented, because there is no warranty
417 * that different fragments will go along one path. --ANK
418 */
ab4eb353
YH
419 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
420 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
421 return 0;
422 }
423
424 /*
425 * check and decrement ttl
426 */
427 if (hdr->hop_limit <= 1) {
428 /* Force OUTPUT device used as source address */
429 skb->dev = dst->dev;
3ffe533c 430 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
15c77d8b
ED
431 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
432 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
433
434 kfree_skb(skb);
435 return -ETIMEDOUT;
436 }
437
fbea49e1 438 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 439 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 440 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
441 int proxied = ip6_forward_proxy_check(skb);
442 if (proxied > 0)
e21e0b5f 443 return ip6_input(skb);
74553b09 444 else if (proxied < 0) {
15c77d8b
ED
445 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
446 IPSTATS_MIB_INDISCARDS);
74553b09
VN
447 goto drop;
448 }
e21e0b5f
VN
449 }
450
1da177e4 451 if (!xfrm6_route_forward(skb)) {
15c77d8b
ED
452 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
453 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
454 goto drop;
455 }
adf30907 456 dst = skb_dst(skb);
1da177e4
LT
457
458 /* IPv6 specs say nothing about it, but it is clear that we cannot
459 send redirects to source routed frames.
1e5dc146 460 We don't send redirects to frames decapsulated from IPsec.
1da177e4 461 */
c45a3dfb 462 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 463 struct in6_addr *target = NULL;
fbfe95a4 464 struct inet_peer *peer;
1da177e4 465 struct rt6_info *rt;
1da177e4
LT
466
467 /*
468 * incoming and outgoing devices are the same
469 * send a redirect.
470 */
471
472 rt = (struct rt6_info *) dst;
c45a3dfb
DM
473 if (rt->rt6i_flags & RTF_GATEWAY)
474 target = &rt->rt6i_gateway;
1da177e4
LT
475 else
476 target = &hdr->daddr;
477
fd0273d7 478 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 479
1da177e4
LT
480 /* Limit redirects both by destination (here)
481 and by source (inside ndisc_send_redirect)
482 */
fbfe95a4 483 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 484 ndisc_send_redirect(skb, target);
1d861aa4
DM
485 if (peer)
486 inet_putpeer(peer);
5bb1ab09
DS
487 } else {
488 int addrtype = ipv6_addr_type(&hdr->saddr);
489
1da177e4 490 /* This check is security critical. */
f81b2e7d
YH
491 if (addrtype == IPV6_ADDR_ANY ||
492 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
493 goto error;
494 if (addrtype & IPV6_ADDR_LINKLOCAL) {
495 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 496 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
497 goto error;
498 }
1da177e4
LT
499 }
500
0954cf9c 501 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
502 if (mtu < IPV6_MIN_MTU)
503 mtu = IPV6_MIN_MTU;
504
fe6cc55f 505 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
506 /* Again, force OUTPUT device used as source address */
507 skb->dev = dst->dev;
14f3ad6f 508 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
15c77d8b
ED
509 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
510 IPSTATS_MIB_INTOOBIGERRORS);
511 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
512 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
513 kfree_skb(skb);
514 return -EMSGSIZE;
515 }
516
517 if (skb_cow(skb, dst->dev->hard_header_len)) {
15c77d8b
ED
518 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
519 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
520 goto drop;
521 }
522
0660e03f 523 hdr = ipv6_hdr(skb);
1da177e4
LT
524
525 /* Mangling hops number delayed to point after skb COW */
1ab1457c 526
1da177e4
LT
527 hdr->hop_limit--;
528
483a47d2 529 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
2d8dbb04 530 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
29a26a56
EB
531 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
532 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 533 ip6_forward_finish);
1da177e4
LT
534
535error:
483a47d2 536 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
537drop:
538 kfree_skb(skb);
539 return -EINVAL;
540}
541
542static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
543{
544 to->pkt_type = from->pkt_type;
545 to->priority = from->priority;
546 to->protocol = from->protocol;
adf30907
ED
547 skb_dst_drop(to);
548 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 549 to->dev = from->dev;
82e91ffe 550 to->mark = from->mark;
1da177e4
LT
551
552#ifdef CONFIG_NET_SCHED
553 to->tc_index = from->tc_index;
554#endif
e7ac05f3 555 nf_copy(to, from);
984bc16c 556 skb_copy_secmark(to, from);
1da177e4
LT
557}
558
7d8c6e39
EB
559int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
560 int (*output)(struct net *, struct sock *, struct sk_buff *))
1da177e4 561{
1da177e4 562 struct sk_buff *frag;
67ba4152 563 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 564 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
565 inet6_sk(skb->sk) : NULL;
1da177e4
LT
566 struct ipv6hdr *tmp_hdr;
567 struct frag_hdr *fh;
568 unsigned int mtu, hlen, left, len;
a7ae1992 569 int hroom, troom;
286c2349 570 __be32 frag_id;
67ba4152 571 int ptr, offset = 0, err = 0;
1da177e4
LT
572 u8 *prevhdr, nexthdr = 0;
573
1da177e4
LT
574 hlen = ip6_find_1stfragopt(skb, &prevhdr);
575 nexthdr = *prevhdr;
576
628a5c56 577 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
578
579 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 580 * or if the skb it not generated by a local socket.
b881ef76 581 */
485fca66
FW
582 if (unlikely(!skb->ignore_df && skb->len > mtu))
583 goto fail_toobig;
a34a101e 584
485fca66
FW
585 if (IP6CB(skb)->frag_max_size) {
586 if (IP6CB(skb)->frag_max_size > mtu)
587 goto fail_toobig;
588
589 /* don't send fragments larger than what we received */
590 mtu = IP6CB(skb)->frag_max_size;
591 if (mtu < IPV6_MIN_MTU)
592 mtu = IPV6_MIN_MTU;
b881ef76
JH
593 }
594
d91675f9
YH
595 if (np && np->frag_size < mtu) {
596 if (np->frag_size)
597 mtu = np->frag_size;
598 }
89bc7848 599 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
b72a2b01 600 goto fail_toobig;
1e0d69a9 601 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 602
fd0273d7
MKL
603 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
604 &ipv6_hdr(skb)->saddr);
286c2349 605
1d325d21 606 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 607 if (skb_has_frag_list(skb)) {
1da177e4 608 int first_len = skb_pagelen(skb);
3d13008e 609 struct sk_buff *frag2;
1da177e4
LT
610
611 if (first_len - hlen > mtu ||
612 ((first_len - hlen) & 7) ||
1d325d21
FW
613 skb_cloned(skb) ||
614 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
615 goto slow_path;
616
4d9092bb 617 skb_walk_frags(skb, frag) {
1da177e4
LT
618 /* Correct geometry. */
619 if (frag->len > mtu ||
620 ((frag->len & 7) && frag->next) ||
1d325d21 621 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 622 goto slow_path_clean;
1da177e4 623
1da177e4
LT
624 /* Partially cloned skb? */
625 if (skb_shared(frag))
3d13008e 626 goto slow_path_clean;
2fdba6b0
HX
627
628 BUG_ON(frag->sk);
629 if (skb->sk) {
2fdba6b0
HX
630 frag->sk = skb->sk;
631 frag->destructor = sock_wfree;
2fdba6b0 632 }
3d13008e 633 skb->truesize -= frag->truesize;
1da177e4
LT
634 }
635
636 err = 0;
637 offset = 0;
1da177e4
LT
638 /* BUILD HEADER */
639
9a217a1c 640 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 641 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 642 if (!tmp_hdr) {
adf30907 643 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 644 IPSTATS_MIB_FRAGFAILS);
1d325d21
FW
645 err = -ENOMEM;
646 goto fail;
1da177e4 647 }
1d325d21
FW
648 frag = skb_shinfo(skb)->frag_list;
649 skb_frag_list_init(skb);
1da177e4 650
1da177e4 651 __skb_pull(skb, hlen);
67ba4152 652 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
653 __skb_push(skb, hlen);
654 skb_reset_network_header(skb);
d56f90a7 655 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 656
1da177e4
LT
657 fh->nexthdr = nexthdr;
658 fh->reserved = 0;
659 fh->frag_off = htons(IP6_MF);
286c2349 660 fh->identification = frag_id;
1da177e4
LT
661
662 first_len = skb_pagelen(skb);
663 skb->data_len = first_len - skb_headlen(skb);
664 skb->len = first_len;
0660e03f
ACM
665 ipv6_hdr(skb)->payload_len = htons(first_len -
666 sizeof(struct ipv6hdr));
a11d206d 667
d8d1f30b 668 dst_hold(&rt->dst);
1da177e4
LT
669
670 for (;;) {
671 /* Prepare header of the next frame,
672 * before previous one went down. */
673 if (frag) {
674 frag->ip_summed = CHECKSUM_NONE;
badff6d0 675 skb_reset_transport_header(frag);
67ba4152 676 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
677 __skb_push(frag, hlen);
678 skb_reset_network_header(frag);
d56f90a7
ACM
679 memcpy(skb_network_header(frag), tmp_hdr,
680 hlen);
1da177e4
LT
681 offset += skb->len - hlen - sizeof(struct frag_hdr);
682 fh->nexthdr = nexthdr;
683 fh->reserved = 0;
684 fh->frag_off = htons(offset);
53b24b8f 685 if (frag->next)
1da177e4
LT
686 fh->frag_off |= htons(IP6_MF);
687 fh->identification = frag_id;
0660e03f
ACM
688 ipv6_hdr(frag)->payload_len =
689 htons(frag->len -
690 sizeof(struct ipv6hdr));
1da177e4
LT
691 ip6_copy_metadata(frag, skb);
692 }
1ab1457c 693
7d8c6e39 694 err = output(net, sk, skb);
67ba4152 695 if (!err)
d8d1f30b 696 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 697 IPSTATS_MIB_FRAGCREATES);
dafee490 698
1da177e4
LT
699 if (err || !frag)
700 break;
701
702 skb = frag;
703 frag = skb->next;
704 skb->next = NULL;
705 }
706
a51482bd 707 kfree(tmp_hdr);
1da177e4
LT
708
709 if (err == 0) {
d8d1f30b 710 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 711 IPSTATS_MIB_FRAGOKS);
94e187c0 712 ip6_rt_put(rt);
1da177e4
LT
713 return 0;
714 }
715
46cfd725 716 kfree_skb_list(frag);
1da177e4 717
d8d1f30b 718 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 719 IPSTATS_MIB_FRAGFAILS);
94e187c0 720 ip6_rt_put(rt);
1da177e4 721 return err;
3d13008e
ED
722
723slow_path_clean:
724 skb_walk_frags(skb, frag2) {
725 if (frag2 == frag)
726 break;
727 frag2->sk = NULL;
728 frag2->destructor = NULL;
729 skb->truesize += frag2->truesize;
730 }
1da177e4
LT
731 }
732
733slow_path:
72e843bb
ED
734 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
735 skb_checksum_help(skb))
736 goto fail;
737
1da177e4
LT
738 left = skb->len - hlen; /* Space per frame */
739 ptr = hlen; /* Where to start from */
740
741 /*
742 * Fragment the datagram.
743 */
744
745 *prevhdr = NEXTHDR_FRAGMENT;
a7ae1992 746 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
747
748 /*
749 * Keep copying data until we run out.
750 */
67ba4152 751 while (left > 0) {
1da177e4
LT
752 len = left;
753 /* IF: it doesn't fit, use 'mtu' - the data space left */
754 if (len > mtu)
755 len = mtu;
25985edc 756 /* IF: we are not sending up to and including the packet end
1da177e4
LT
757 then align the next start on an eight byte boundary */
758 if (len < left) {
759 len &= ~7;
760 }
1da177e4 761
cbffccc9
JP
762 /* Allocate buffer */
763 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
764 hroom + troom, GFP_ATOMIC);
765 if (!frag) {
adf30907 766 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 767 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
768 err = -ENOMEM;
769 goto fail;
770 }
771
772 /*
773 * Set up data on packet
774 */
775
776 ip6_copy_metadata(frag, skb);
a7ae1992 777 skb_reserve(frag, hroom);
1da177e4 778 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 779 skb_reset_network_header(frag);
badff6d0 780 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
781 frag->transport_header = (frag->network_header + hlen +
782 sizeof(struct frag_hdr));
1da177e4
LT
783
784 /*
785 * Charge the memory for the fragment to any owner
786 * it might possess
787 */
788 if (skb->sk)
789 skb_set_owner_w(frag, skb->sk);
790
791 /*
792 * Copy the packet header into the new buffer.
793 */
d626f62b 794 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
795
796 /*
797 * Build fragment header.
798 */
799 fh->nexthdr = nexthdr;
800 fh->reserved = 0;
286c2349 801 fh->identification = frag_id;
1da177e4
LT
802
803 /*
804 * Copy a block of the IP datagram.
805 */
e3f0b86b
HS
806 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
807 len));
1da177e4
LT
808 left -= len;
809
810 fh->frag_off = htons(offset);
811 if (left > 0)
812 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
813 ipv6_hdr(frag)->payload_len = htons(frag->len -
814 sizeof(struct ipv6hdr));
1da177e4
LT
815
816 ptr += len;
817 offset += len;
818
819 /*
820 * Put this fragment into the sending queue.
821 */
7d8c6e39 822 err = output(net, sk, frag);
1da177e4
LT
823 if (err)
824 goto fail;
dafee490 825
adf30907 826 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 827 IPSTATS_MIB_FRAGCREATES);
1da177e4 828 }
adf30907 829 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 830 IPSTATS_MIB_FRAGOKS);
808db80a 831 consume_skb(skb);
1da177e4
LT
832 return err;
833
485fca66
FW
834fail_toobig:
835 if (skb->sk && dst_allfrag(skb_dst(skb)))
836 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
837
838 skb->dev = skb_dst(skb)->dev;
839 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
840 err = -EMSGSIZE;
841
1da177e4 842fail:
adf30907 843 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 844 IPSTATS_MIB_FRAGFAILS);
1ab1457c 845 kfree_skb(skb);
1da177e4
LT
846 return err;
847}
848
b71d1d42
ED
849static inline int ip6_rt_check(const struct rt6key *rt_key,
850 const struct in6_addr *fl_addr,
851 const struct in6_addr *addr_cache)
cf6b1982 852{
a02cec21 853 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 854 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
855}
856
497c615a
HX
857static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
858 struct dst_entry *dst,
b71d1d42 859 const struct flowi6 *fl6)
1da177e4 860{
497c615a 861 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 862 struct rt6_info *rt;
1da177e4 863
497c615a
HX
864 if (!dst)
865 goto out;
866
a963a37d
ED
867 if (dst->ops->family != AF_INET6) {
868 dst_release(dst);
869 return NULL;
870 }
871
872 rt = (struct rt6_info *)dst;
497c615a
HX
873 /* Yes, checking route validity in not connected
874 * case is not very simple. Take into account,
875 * that we do not support routing by source, TOS,
67ba4152 876 * and MSG_DONTROUTE --ANK (980726)
497c615a 877 *
cf6b1982
YH
878 * 1. ip6_rt_check(): If route was host route,
879 * check that cached destination is current.
497c615a
HX
880 * If it is network route, we still may
881 * check its validity using saved pointer
882 * to the last used address: daddr_cache.
883 * We do not want to save whole address now,
884 * (because main consumer of this service
885 * is tcp, which has not this problem),
886 * so that the last trick works only on connected
887 * sockets.
888 * 2. oif also should be the same.
889 */
4c9483b2 890 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 891#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 892 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 893#endif
ca254490
DA
894 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
895 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
497c615a
HX
896 dst_release(dst);
897 dst = NULL;
1da177e4
LT
898 }
899
497c615a
HX
900out:
901 return dst;
902}
903
3aef934f 904static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
4c9483b2 905 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 906{
69cce1d1
DM
907#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
908 struct neighbour *n;
97cac082 909 struct rt6_info *rt;
69cce1d1
DM
910#endif
911 int err;
497c615a 912
e16e888b
MS
913 /* The correct way to handle this would be to do
914 * ip6_route_get_saddr, and then ip6_route_output; however,
915 * the route-specific preferred source forces the
916 * ip6_route_output call _before_ ip6_route_get_saddr.
917 *
918 * In source specific routing (no src=any default route),
919 * ip6_route_output will fail given src=any saddr, though, so
920 * that's why we try it again later.
921 */
922 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
923 struct rt6_info *rt;
924 bool had_dst = *dst != NULL;
1da177e4 925
e16e888b
MS
926 if (!had_dst)
927 *dst = ip6_route_output(net, sk, fl6);
928 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
c3968a85
DW
929 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
930 sk ? inet6_sk(sk)->srcprefs : 0,
931 &fl6->saddr);
44456d37 932 if (err)
1da177e4 933 goto out_err_release;
e16e888b
MS
934
935 /* If we had an erroneous initial result, pretend it
936 * never existed and let the SA-enabled version take
937 * over.
938 */
939 if (!had_dst && (*dst)->error) {
940 dst_release(*dst);
941 *dst = NULL;
942 }
1da177e4
LT
943 }
944
e16e888b
MS
945 if (!*dst)
946 *dst = ip6_route_output(net, sk, fl6);
947
948 err = (*dst)->error;
949 if (err)
950 goto out_err_release;
951
95c385b4 952#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
953 /*
954 * Here if the dst entry we've looked up
955 * has a neighbour entry that is in the INCOMPLETE
956 * state and the src address from the flow is
957 * marked as OPTIMISTIC, we release the found
958 * dst entry and replace it instead with the
959 * dst entry of the nexthop router
960 */
c56bf6fe 961 rt = (struct rt6_info *) *dst;
707be1ff 962 rcu_read_lock_bh();
2647a9b0
MKL
963 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
964 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
965 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
966 rcu_read_unlock_bh();
967
968 if (err) {
e550dfb0 969 struct inet6_ifaddr *ifp;
4c9483b2 970 struct flowi6 fl_gw6;
e550dfb0
NH
971 int redirect;
972
4c9483b2 973 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
974 (*dst)->dev, 1);
975
976 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
977 if (ifp)
978 in6_ifa_put(ifp);
979
980 if (redirect) {
981 /*
982 * We need to get the dst entry for the
983 * default router instead
984 */
985 dst_release(*dst);
4c9483b2
DM
986 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
987 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
988 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
989 err = (*dst)->error;
990 if (err)
e550dfb0 991 goto out_err_release;
95c385b4 992 }
e550dfb0 993 }
95c385b4
NH
994#endif
995
1da177e4
LT
996 return 0;
997
998out_err_release:
ca46f9c8 999 if (err == -ENETUNREACH)
5ac68e7c 1000 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1001 dst_release(*dst);
1002 *dst = NULL;
1003 return err;
1004}
34a0b3cd 1005
497c615a
HX
1006/**
1007 * ip6_dst_lookup - perform route lookup on flow
1008 * @sk: socket which provides route info
1009 * @dst: pointer to dst_entry * for result
4c9483b2 1010 * @fl6: flow to lookup
497c615a
HX
1011 *
1012 * This function performs a route lookup on the given flow.
1013 *
1014 * It returns zero on success, or a standard errno code on error.
1015 */
343d60aa
RP
1016int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1017 struct flowi6 *fl6)
497c615a
HX
1018{
1019 *dst = NULL;
343d60aa 1020 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1021}
3cf3dc6c
ACM
1022EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1023
497c615a 1024/**
68d0c6d3
DM
1025 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1026 * @sk: socket which provides route info
4c9483b2 1027 * @fl6: flow to lookup
68d0c6d3 1028 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1029 *
1030 * This function performs a route lookup on the given flow.
1031 *
1032 * It returns a valid dst pointer on success, or a pointer encoded
1033 * error code.
1034 */
3aef934f 1035struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1036 const struct in6_addr *final_dst)
68d0c6d3
DM
1037{
1038 struct dst_entry *dst = NULL;
1039 int err;
1040
343d60aa 1041 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1042 if (err)
1043 return ERR_PTR(err);
1044 if (final_dst)
4e3fd7a0 1045 fl6->daddr = *final_dst;
a0a9f33b 1046 if (!fl6->flowi6_oif)
ca254490 1047 fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
2774c131 1048
f92ee619 1049 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1050}
1051EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1052
1053/**
1054 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1055 * @sk: socket which provides the dst cache and route info
4c9483b2 1056 * @fl6: flow to lookup
68d0c6d3 1057 * @final_dst: final destination address for ipsec lookup
497c615a
HX
1058 *
1059 * This function performs a route lookup on the given flow with the
1060 * possibility of using the cached route in the socket if it is valid.
1061 * It will take the socket dst lock when operating on the dst cache.
1062 * As a result, this function can only be used in process context.
1063 *
68d0c6d3
DM
1064 * It returns a valid dst pointer on success, or a pointer encoded
1065 * error code.
497c615a 1066 */
4c9483b2 1067struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1068 const struct in6_addr *final_dst)
497c615a 1069{
68d0c6d3
DM
1070 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1071 int err;
497c615a 1072
4c9483b2 1073 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 1074
343d60aa 1075 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1076 if (err)
1077 return ERR_PTR(err);
1078 if (final_dst)
4e3fd7a0 1079 fl6->daddr = *final_dst;
2774c131 1080
f92ee619 1081 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 1082}
68d0c6d3 1083EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1084
34a0b3cd 1085static inline int ip6_ufo_append_data(struct sock *sk,
0bbe84a6 1086 struct sk_buff_head *queue,
e89e9cf5
AR
1087 int getfrag(void *from, char *to, int offset, int len,
1088 int odd, struct sk_buff *skb),
1089 void *from, int length, int hh_len, int fragheaderlen,
67ba4152 1090 int transhdrlen, int mtu, unsigned int flags,
fd0273d7 1091 const struct flowi6 *fl6)
e89e9cf5
AR
1092
1093{
1094 struct sk_buff *skb;
1095 int err;
1096
1097 /* There is support for UDP large send offload by network
1098 * device, so create one single skb packet containing complete
1099 * udp datagram
1100 */
0bbe84a6 1101 skb = skb_peek_tail(queue);
63159f29 1102 if (!skb) {
e89e9cf5
AR
1103 skb = sock_alloc_send_skb(sk,
1104 hh_len + fragheaderlen + transhdrlen + 20,
1105 (flags & MSG_DONTWAIT), &err);
63159f29 1106 if (!skb)
504744e4 1107 return err;
e89e9cf5
AR
1108
1109 /* reserve space for Hardware header */
1110 skb_reserve(skb, hh_len);
1111
1112 /* create space for UDP/IP header */
67ba4152 1113 skb_put(skb, fragheaderlen + transhdrlen);
e89e9cf5
AR
1114
1115 /* initialize network header pointer */
c1d2bbe1 1116 skb_reset_network_header(skb);
e89e9cf5
AR
1117
1118 /* initialize protocol header pointer */
b0e380b1 1119 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1120
9c9c9ad5 1121 skb->protocol = htons(ETH_P_IPV6);
e89e9cf5 1122 skb->csum = 0;
e89e9cf5 1123
0bbe84a6 1124 __skb_queue_tail(queue, skb);
c547dbf5
JP
1125 } else if (skb_is_gso(skb)) {
1126 goto append;
e89e9cf5 1127 }
e89e9cf5 1128
c547dbf5
JP
1129 skb->ip_summed = CHECKSUM_PARTIAL;
1130 /* Specify the length of each IPv6 datagram fragment.
1131 * It has to be a multiple of 8.
1132 */
1133 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1134 sizeof(struct frag_hdr)) & ~7;
1135 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
fd0273d7
MKL
1136 skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1137 &fl6->daddr,
1138 &fl6->saddr);
c547dbf5
JP
1139
1140append:
2811ebac
HFS
1141 return skb_append_datato_frags(sk, skb, getfrag, from,
1142 (length - transhdrlen));
e89e9cf5 1143}
1da177e4 1144
0178b695
HX
1145static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1146 gfp_t gfp)
1147{
1148 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1149}
1150
1151static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1152 gfp_t gfp)
1153{
1154 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1155}
1156
75a493e6 1157static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1158 int *maxfraglen,
1159 unsigned int fragheaderlen,
1160 struct sk_buff *skb,
75a493e6 1161 struct rt6_info *rt,
e367c2d0 1162 unsigned int orig_mtu)
0c183379
G
1163{
1164 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1165 if (!skb) {
0c183379 1166 /* first fragment, reserve header_len */
e367c2d0 1167 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1168
1169 } else {
1170 /*
1171 * this fragment is not first, the headers
1172 * space is regarded as data space.
1173 */
e367c2d0 1174 *mtu = orig_mtu;
0c183379
G
1175 }
1176 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1177 + fragheaderlen - sizeof(struct frag_hdr);
1178 }
1179}
1180
366e41d9
VY
1181static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1182 struct inet6_cork *v6_cork,
1183 int hlimit, int tclass, struct ipv6_txoptions *opt,
1184 struct rt6_info *rt, struct flowi6 *fl6)
1185{
1186 struct ipv6_pinfo *np = inet6_sk(sk);
1187 unsigned int mtu;
1188
1189 /*
1190 * setup for corking
1191 */
1192 if (opt) {
1193 if (WARN_ON(v6_cork->opt))
1194 return -EINVAL;
1195
1196 v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
63159f29 1197 if (unlikely(!v6_cork->opt))
366e41d9
VY
1198 return -ENOBUFS;
1199
1200 v6_cork->opt->tot_len = opt->tot_len;
1201 v6_cork->opt->opt_flen = opt->opt_flen;
1202 v6_cork->opt->opt_nflen = opt->opt_nflen;
1203
1204 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1205 sk->sk_allocation);
1206 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1207 return -ENOBUFS;
1208
1209 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1210 sk->sk_allocation);
1211 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1212 return -ENOBUFS;
1213
1214 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1215 sk->sk_allocation);
1216 if (opt->hopopt && !v6_cork->opt->hopopt)
1217 return -ENOBUFS;
1218
1219 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1220 sk->sk_allocation);
1221 if (opt->srcrt && !v6_cork->opt->srcrt)
1222 return -ENOBUFS;
1223
1224 /* need source address above miyazawa*/
1225 }
1226 dst_hold(&rt->dst);
1227 cork->base.dst = &rt->dst;
1228 cork->fl.u.ip6 = *fl6;
1229 v6_cork->hop_limit = hlimit;
1230 v6_cork->tclass = tclass;
1231 if (rt->dst.flags & DST_XFRM_TUNNEL)
1232 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1233 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1234 else
1235 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1236 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1237 if (np->frag_size < mtu) {
1238 if (np->frag_size)
1239 mtu = np->frag_size;
1240 }
1241 cork->base.fragsize = mtu;
1242 if (dst_allfrag(rt->dst.path))
1243 cork->base.flags |= IPCORK_ALLFRAG;
1244 cork->base.length = 0;
1245
1246 return 0;
1247}
1248
0bbe84a6
VY
1249static int __ip6_append_data(struct sock *sk,
1250 struct flowi6 *fl6,
1251 struct sk_buff_head *queue,
1252 struct inet_cork *cork,
1253 struct inet6_cork *v6_cork,
1254 struct page_frag *pfrag,
1255 int getfrag(void *from, char *to, int offset,
1256 int len, int odd, struct sk_buff *skb),
1257 void *from, int length, int transhdrlen,
1258 unsigned int flags, int dontfrag)
1da177e4 1259{
0c183379 1260 struct sk_buff *skb, *skb_prev = NULL;
e367c2d0 1261 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
0bbe84a6
VY
1262 int exthdrlen = 0;
1263 int dst_exthdrlen = 0;
1da177e4 1264 int hh_len;
1da177e4
LT
1265 int copy;
1266 int err;
1267 int offset = 0;
a693e698 1268 __u8 tx_flags = 0;
09c2d251 1269 u32 tskey = 0;
0bbe84a6
VY
1270 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1271 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1272 int csummode = CHECKSUM_NONE;
1da177e4 1273
0bbe84a6
VY
1274 skb = skb_peek_tail(queue);
1275 if (!skb) {
1276 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1277 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1278 }
0bbe84a6 1279
366e41d9 1280 mtu = cork->fragsize;
e367c2d0 1281 orig_mtu = mtu;
1da177e4 1282
d8d1f30b 1283 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1284
a1b05140 1285 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1286 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1287 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1288 sizeof(struct frag_hdr);
1da177e4
LT
1289
1290 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
4df98e76
HFS
1291 unsigned int maxnonfragsize, headersize;
1292
1293 headersize = sizeof(struct ipv6hdr) +
3a1cebe7 1294 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
4df98e76
HFS
1295 (dst_allfrag(&rt->dst) ?
1296 sizeof(struct frag_hdr) : 0) +
1297 rt->rt6i_nfheader_len;
1298
60ff7467 1299 if (ip6_sk_ignore_df(sk))
0b95227a
HFS
1300 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1301 else
1302 maxnonfragsize = mtu;
4df98e76
HFS
1303
1304 /* dontfrag active */
1305 if ((cork->length + length > mtu - headersize) && dontfrag &&
1306 (sk->sk_protocol == IPPROTO_UDP ||
1307 sk->sk_protocol == IPPROTO_RAW)) {
1308 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1309 sizeof(struct ipv6hdr));
1310 goto emsgsize;
1311 }
1312
1313 if (cork->length + length > maxnonfragsize - headersize) {
1314emsgsize:
1315 ipv6_local_error(sk, EMSGSIZE, fl6,
1316 mtu - headersize +
1317 sizeof(struct ipv6hdr));
1da177e4
LT
1318 return -EMSGSIZE;
1319 }
1320 }
1321
09c2d251 1322 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
bf84a010 1323 sock_tx_timestamp(sk, &tx_flags);
09c2d251
WB
1324 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1325 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1326 tskey = sk->sk_tskey++;
1327 }
a693e698 1328
32dce968
VY
1329 /* If this is the first and only packet and device
1330 * supports checksum offloading, let's use it.
e87a468e
VY
1331 * Use transhdrlen, same as IPv4, because partial
1332 * sums only work when transhdrlen is set.
32dce968 1333 */
e87a468e 1334 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
32dce968
VY
1335 length + fragheaderlen < mtu &&
1336 rt->dst.dev->features & NETIF_F_V6_CSUM &&
1337 !exthdrlen)
1338 csummode = CHECKSUM_PARTIAL;
1da177e4
LT
1339 /*
1340 * Let's try using as much space as possible.
1341 * Use MTU if total length of the message fits into the MTU.
1342 * Otherwise, we need to reserve fragment header and
1343 * fragment alignment (= 8-15 octects, in total).
1344 *
1345 * Note that we may need to "move" the data from the tail of
1ab1457c 1346 * of the buffer to the new fragment when we split
1da177e4
LT
1347 * the message.
1348 *
1ab1457c 1349 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1350 * at once if non-fragmentable extension headers
1351 * are too large.
1ab1457c 1352 * --yoshfuji
1da177e4
LT
1353 */
1354
2811ebac
HFS
1355 cork->length += length;
1356 if (((length > mtu) ||
1357 (skb && skb_is_gso(skb))) &&
1358 (sk->sk_protocol == IPPROTO_UDP) &&
acf8dd0a
MK
1359 (rt->dst.dev->features & NETIF_F_UFO) &&
1360 (sk->sk_type == SOCK_DGRAM)) {
0bbe84a6 1361 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
2811ebac 1362 hh_len, fragheaderlen,
fd0273d7 1363 transhdrlen, mtu, flags, fl6);
2811ebac
HFS
1364 if (err)
1365 goto error;
1366 return 0;
e89e9cf5 1367 }
1da177e4 1368
2811ebac 1369 if (!skb)
1da177e4
LT
1370 goto alloc_new_skb;
1371
1372 while (length > 0) {
1373 /* Check if the remaining data fits into current packet. */
bdc712b4 1374 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1375 if (copy < length)
1376 copy = maxfraglen - skb->len;
1377
1378 if (copy <= 0) {
1379 char *data;
1380 unsigned int datalen;
1381 unsigned int fraglen;
1382 unsigned int fraggap;
1383 unsigned int alloclen;
1da177e4 1384alloc_new_skb:
1da177e4 1385 /* There's no room in the current skb */
0c183379
G
1386 if (skb)
1387 fraggap = skb->len - maxfraglen;
1da177e4
LT
1388 else
1389 fraggap = 0;
0c183379 1390 /* update mtu and maxfraglen if necessary */
63159f29 1391 if (!skb || !skb_prev)
0c183379 1392 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1393 fragheaderlen, skb, rt,
e367c2d0 1394 orig_mtu);
0c183379
G
1395
1396 skb_prev = skb;
1da177e4
LT
1397
1398 /*
1399 * If remaining data exceeds the mtu,
1400 * we know we need more fragment(s).
1401 */
1402 datalen = length + fraggap;
1da177e4 1403
0c183379
G
1404 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1405 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1406 if ((flags & MSG_MORE) &&
d8d1f30b 1407 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1408 alloclen = mtu;
1409 else
1410 alloclen = datalen + fragheaderlen;
1411
299b0767
SK
1412 alloclen += dst_exthdrlen;
1413
0c183379
G
1414 if (datalen != length + fraggap) {
1415 /*
1416 * this is not the last fragment, the trailer
1417 * space is regarded as data space.
1418 */
1419 datalen += rt->dst.trailer_len;
1420 }
1421
1422 alloclen += rt->dst.trailer_len;
1423 fraglen = datalen + fragheaderlen;
1da177e4
LT
1424
1425 /*
1426 * We just reserve space for fragment header.
1ab1457c 1427 * Note: this may be overallocation if the message
1da177e4
LT
1428 * (without MSG_MORE) fits into the MTU.
1429 */
1430 alloclen += sizeof(struct frag_hdr);
1431
1432 if (transhdrlen) {
1433 skb = sock_alloc_send_skb(sk,
1434 alloclen + hh_len,
1435 (flags & MSG_DONTWAIT), &err);
1436 } else {
1437 skb = NULL;
1438 if (atomic_read(&sk->sk_wmem_alloc) <=
1439 2 * sk->sk_sndbuf)
1440 skb = sock_wmalloc(sk,
1441 alloclen + hh_len, 1,
1442 sk->sk_allocation);
63159f29 1443 if (unlikely(!skb))
1da177e4
LT
1444 err = -ENOBUFS;
1445 }
63159f29 1446 if (!skb)
1da177e4
LT
1447 goto error;
1448 /*
1449 * Fill in the control structures
1450 */
9c9c9ad5 1451 skb->protocol = htons(ETH_P_IPV6);
32dce968 1452 skb->ip_summed = csummode;
1da177e4 1453 skb->csum = 0;
1f85851e
G
1454 /* reserve for fragmentation and ipsec header */
1455 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1456 dst_exthdrlen);
1da177e4 1457
11878b40
WB
1458 /* Only the initial fragment is time stamped */
1459 skb_shinfo(skb)->tx_flags = tx_flags;
1460 tx_flags = 0;
09c2d251
WB
1461 skb_shinfo(skb)->tskey = tskey;
1462 tskey = 0;
a693e698 1463
1da177e4
LT
1464 /*
1465 * Find where to start putting bytes
1466 */
1f85851e
G
1467 data = skb_put(skb, fraglen);
1468 skb_set_network_header(skb, exthdrlen);
1469 data += fragheaderlen;
b0e380b1
ACM
1470 skb->transport_header = (skb->network_header +
1471 fragheaderlen);
1da177e4
LT
1472 if (fraggap) {
1473 skb->csum = skb_copy_and_csum_bits(
1474 skb_prev, maxfraglen,
1475 data + transhdrlen, fraggap, 0);
1476 skb_prev->csum = csum_sub(skb_prev->csum,
1477 skb->csum);
1478 data += fraggap;
e9fa4f7b 1479 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1480 }
1481 copy = datalen - transhdrlen - fraggap;
299b0767 1482
1da177e4
LT
1483 if (copy < 0) {
1484 err = -EINVAL;
1485 kfree_skb(skb);
1486 goto error;
1487 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1488 err = -EFAULT;
1489 kfree_skb(skb);
1490 goto error;
1491 }
1492
1493 offset += copy;
1494 length -= datalen - fraggap;
1495 transhdrlen = 0;
1496 exthdrlen = 0;
299b0767 1497 dst_exthdrlen = 0;
1da177e4
LT
1498
1499 /*
1500 * Put the packet on the pending queue
1501 */
0bbe84a6 1502 __skb_queue_tail(queue, skb);
1da177e4
LT
1503 continue;
1504 }
1505
1506 if (copy > length)
1507 copy = length;
1508
d8d1f30b 1509 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1510 unsigned int off;
1511
1512 off = skb->len;
1513 if (getfrag(from, skb_put(skb, copy),
1514 offset, copy, off, skb) < 0) {
1515 __skb_trim(skb, off);
1516 err = -EFAULT;
1517 goto error;
1518 }
1519 } else {
1520 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1521
5640f768
ED
1522 err = -ENOMEM;
1523 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1524 goto error;
5640f768
ED
1525
1526 if (!skb_can_coalesce(skb, i, pfrag->page,
1527 pfrag->offset)) {
1528 err = -EMSGSIZE;
1529 if (i == MAX_SKB_FRAGS)
1530 goto error;
1531
1532 __skb_fill_page_desc(skb, i, pfrag->page,
1533 pfrag->offset, 0);
1534 skb_shinfo(skb)->nr_frags = ++i;
1535 get_page(pfrag->page);
1da177e4 1536 }
5640f768 1537 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1538 if (getfrag(from,
5640f768
ED
1539 page_address(pfrag->page) + pfrag->offset,
1540 offset, copy, skb->len, skb) < 0)
1541 goto error_efault;
1542
1543 pfrag->offset += copy;
1544 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1545 skb->len += copy;
1546 skb->data_len += copy;
f945fa7a
HX
1547 skb->truesize += copy;
1548 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1549 }
1550 offset += copy;
1551 length -= copy;
1552 }
5640f768 1553
1da177e4 1554 return 0;
5640f768
ED
1555
1556error_efault:
1557 err = -EFAULT;
1da177e4 1558error:
bdc712b4 1559 cork->length -= length;
3bd653c8 1560 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1561 return err;
1562}
0bbe84a6
VY
1563
1564int ip6_append_data(struct sock *sk,
1565 int getfrag(void *from, char *to, int offset, int len,
1566 int odd, struct sk_buff *skb),
1567 void *from, int length, int transhdrlen, int hlimit,
1568 int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1569 struct rt6_info *rt, unsigned int flags, int dontfrag)
1570{
1571 struct inet_sock *inet = inet_sk(sk);
1572 struct ipv6_pinfo *np = inet6_sk(sk);
1573 int exthdrlen;
1574 int err;
1575
1576 if (flags&MSG_PROBE)
1577 return 0;
1578 if (skb_queue_empty(&sk->sk_write_queue)) {
1579 /*
1580 * setup for corking
1581 */
1582 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
1583 tclass, opt, rt, fl6);
1584 if (err)
1585 return err;
1586
1587 exthdrlen = (opt ? opt->opt_flen : 0);
1588 length += exthdrlen;
1589 transhdrlen += exthdrlen;
1590 } else {
1591 fl6 = &inet->cork.fl.u.ip6;
1592 transhdrlen = 0;
1593 }
1594
1595 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1596 &np->cork, sk_page_frag(sk), getfrag,
1597 from, length, transhdrlen, flags, dontfrag);
1598}
a495f836 1599EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1600
366e41d9
VY
1601static void ip6_cork_release(struct inet_cork_full *cork,
1602 struct inet6_cork *v6_cork)
bf138862 1603{
366e41d9
VY
1604 if (v6_cork->opt) {
1605 kfree(v6_cork->opt->dst0opt);
1606 kfree(v6_cork->opt->dst1opt);
1607 kfree(v6_cork->opt->hopopt);
1608 kfree(v6_cork->opt->srcrt);
1609 kfree(v6_cork->opt);
1610 v6_cork->opt = NULL;
0178b695
HX
1611 }
1612
366e41d9
VY
1613 if (cork->base.dst) {
1614 dst_release(cork->base.dst);
1615 cork->base.dst = NULL;
1616 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1617 }
366e41d9 1618 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1619}
1620
6422398c
VY
1621struct sk_buff *__ip6_make_skb(struct sock *sk,
1622 struct sk_buff_head *queue,
1623 struct inet_cork_full *cork,
1624 struct inet6_cork *v6_cork)
1da177e4
LT
1625{
1626 struct sk_buff *skb, *tmp_skb;
1627 struct sk_buff **tail_skb;
1628 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1629 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1630 struct net *net = sock_net(sk);
1da177e4 1631 struct ipv6hdr *hdr;
6422398c
VY
1632 struct ipv6_txoptions *opt = v6_cork->opt;
1633 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1634 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1635 unsigned char proto = fl6->flowi6_proto;
1da177e4 1636
6422398c 1637 skb = __skb_dequeue(queue);
63159f29 1638 if (!skb)
1da177e4
LT
1639 goto out;
1640 tail_skb = &(skb_shinfo(skb)->frag_list);
1641
1642 /* move skb->data to ip header from ext header */
d56f90a7 1643 if (skb->data < skb_network_header(skb))
bbe735e4 1644 __skb_pull(skb, skb_network_offset(skb));
6422398c 1645 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1646 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1647 *tail_skb = tmp_skb;
1648 tail_skb = &(tmp_skb->next);
1649 skb->len += tmp_skb->len;
1650 skb->data_len += tmp_skb->len;
1da177e4 1651 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1652 tmp_skb->destructor = NULL;
1653 tmp_skb->sk = NULL;
1da177e4
LT
1654 }
1655
28a89453 1656 /* Allow local fragmentation. */
60ff7467 1657 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1658
4e3fd7a0 1659 *final_dst = fl6->daddr;
cfe1fc77 1660 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1661 if (opt && opt->opt_flen)
1662 ipv6_push_frag_opts(skb, opt, &proto);
1663 if (opt && opt->opt_nflen)
1664 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1665
e2d1bca7
ACM
1666 skb_push(skb, sizeof(struct ipv6hdr));
1667 skb_reset_network_header(skb);
0660e03f 1668 hdr = ipv6_hdr(skb);
1ab1457c 1669
6422398c 1670 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1671 ip6_make_flowlabel(net, skb, fl6->flowlabel,
67800f9b 1672 np->autoflowlabel, fl6));
6422398c 1673 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1674 hdr->nexthdr = proto;
4e3fd7a0
AD
1675 hdr->saddr = fl6->saddr;
1676 hdr->daddr = *final_dst;
1da177e4 1677
a2c2064f 1678 skb->priority = sk->sk_priority;
4a19ec58 1679 skb->mark = sk->sk_mark;
a2c2064f 1680
d8d1f30b 1681 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1682 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1683 if (proto == IPPROTO_ICMPV6) {
adf30907 1684 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1685
43a43b60
HFS
1686 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1687 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1688 }
1689
6422398c
VY
1690 ip6_cork_release(cork, v6_cork);
1691out:
1692 return skb;
1693}
1694
1695int ip6_send_skb(struct sk_buff *skb)
1696{
1697 struct net *net = sock_net(skb->sk);
1698 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1699 int err;
1700
33224b16 1701 err = ip6_local_out(net, skb->sk, skb);
1da177e4
LT
1702 if (err) {
1703 if (err > 0)
6ce9e7b5 1704 err = net_xmit_errno(err);
1da177e4 1705 if (err)
6422398c
VY
1706 IP6_INC_STATS(net, rt->rt6i_idev,
1707 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1708 }
1709
1da177e4 1710 return err;
6422398c
VY
1711}
1712
1713int ip6_push_pending_frames(struct sock *sk)
1714{
1715 struct sk_buff *skb;
1716
1717 skb = ip6_finish_skb(sk);
1718 if (!skb)
1719 return 0;
1720
1721 return ip6_send_skb(skb);
1da177e4 1722}
a495f836 1723EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1724
0bbe84a6 1725static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1726 struct sk_buff_head *queue,
1727 struct inet_cork_full *cork,
1728 struct inet6_cork *v6_cork)
1da177e4 1729{
1da177e4
LT
1730 struct sk_buff *skb;
1731
0bbe84a6 1732 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1733 if (skb_dst(skb))
1734 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1735 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1736 kfree_skb(skb);
1737 }
1738
6422398c 1739 ip6_cork_release(cork, v6_cork);
1da177e4 1740}
0bbe84a6
VY
1741
1742void ip6_flush_pending_frames(struct sock *sk)
1743{
6422398c
VY
1744 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1745 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1746}
a495f836 1747EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1748
1749struct sk_buff *ip6_make_skb(struct sock *sk,
1750 int getfrag(void *from, char *to, int offset,
1751 int len, int odd, struct sk_buff *skb),
1752 void *from, int length, int transhdrlen,
1753 int hlimit, int tclass,
1754 struct ipv6_txoptions *opt, struct flowi6 *fl6,
1755 struct rt6_info *rt, unsigned int flags,
1756 int dontfrag)
1757{
1758 struct inet_cork_full cork;
1759 struct inet6_cork v6_cork;
1760 struct sk_buff_head queue;
1761 int exthdrlen = (opt ? opt->opt_flen : 0);
1762 int err;
1763
1764 if (flags & MSG_PROBE)
1765 return NULL;
1766
1767 __skb_queue_head_init(&queue);
1768
1769 cork.base.flags = 0;
1770 cork.base.addr = 0;
1771 cork.base.opt = NULL;
1772 v6_cork.opt = NULL;
1773 err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
1774 if (err)
1775 return ERR_PTR(err);
1776
1777 if (dontfrag < 0)
1778 dontfrag = inet6_sk(sk)->dontfrag;
1779
1780 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1781 &current->task_frag, getfrag, from,
1782 length + exthdrlen, transhdrlen + exthdrlen,
1783 flags, dontfrag);
1784 if (err) {
1785 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1786 return ERR_PTR(err);
1787 }
1788
1789 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1790}
This page took 1.29048 seconds and 4 git commands to generate.