]> Git Repo - linux.git/blame - net/ipv6/ip6_output.c
ipv4: Remove all uses of LL_ALLOCATED_SPACE
[linux.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <[email protected]>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
1da177e4 58
ad0081e4 59int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
1da177e4 60
ef76bc23
HX
61int __ip6_local_out(struct sk_buff *skb)
62{
63 int len;
64
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
69
b2e0b385
JE
70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71 skb_dst(skb)->dev, dst_output);
ef76bc23
HX
72}
73
74int ip6_local_out(struct sk_buff *skb)
75{
76 int err;
77
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
81
82 return err;
83}
84EXPORT_SYMBOL_GPL(ip6_local_out);
85
1da177e4
LT
86/* dev_loopback_xmit for use with netfilter. */
87static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
88{
459a98ed 89 skb_reset_mac_header(newskb);
bbe735e4 90 __skb_pull(newskb, skb_network_offset(newskb));
1da177e4
LT
91 newskb->pkt_type = PACKET_LOOPBACK;
92 newskb->ip_summed = CHECKSUM_UNNECESSARY;
adf30907 93 WARN_ON(!skb_dst(newskb));
1da177e4 94
e30b38c2 95 netif_rx_ni(newskb);
1da177e4
LT
96 return 0;
97}
98
9e508490 99static int ip6_finish_output2(struct sk_buff *skb)
1da177e4 100{
adf30907 101 struct dst_entry *dst = skb_dst(skb);
1da177e4 102 struct net_device *dev = dst->dev;
f6b72b62 103 struct neighbour *neigh;
1da177e4
LT
104
105 skb->protocol = htons(ETH_P_IPV6);
106 skb->dev = dev;
107
0660e03f 108 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 109 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 110
7ad6848c 111 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
d1db275d 112 ((mroute6_socket(dev_net(dev), skb) &&
bd91b8bf 113 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
114 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
115 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
116 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
117
118 /* Do not check for IFF_ALLMULTI; multicast routing
119 is not supported in any case.
120 */
121 if (newskb)
b2e0b385
JE
122 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
123 newskb, NULL, newskb->dev,
1da177e4
LT
124 ip6_dev_loopback_xmit);
125
0660e03f 126 if (ipv6_hdr(skb)->hop_limit == 0) {
3bd653c8
DL
127 IP6_INC_STATS(dev_net(dev), idev,
128 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
129 kfree_skb(skb);
130 return 0;
131 }
132 }
133
edf391ff
NH
134 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
135 skb->len);
1da177e4
LT
136 }
137
f2c31e32 138 rcu_read_lock();
69cce1d1 139 neigh = dst_get_neighbour(dst);
f2c31e32
ED
140 if (neigh) {
141 int res = neigh_output(neigh, skb);
05e3aa09 142
f2c31e32
ED
143 rcu_read_unlock();
144 return res;
145 }
146 rcu_read_unlock();
9e508490
JE
147 IP6_INC_STATS_BH(dev_net(dst->dev),
148 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
149 kfree_skb(skb);
150 return -EINVAL;
1da177e4
LT
151}
152
9e508490
JE
153static int ip6_finish_output(struct sk_buff *skb)
154{
155 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
156 dst_allfrag(skb_dst(skb)))
157 return ip6_fragment(skb, ip6_finish_output2);
158 else
159 return ip6_finish_output2(skb);
160}
161
1da177e4
LT
162int ip6_output(struct sk_buff *skb)
163{
9e508490 164 struct net_device *dev = skb_dst(skb)->dev;
adf30907 165 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
778d80be 166 if (unlikely(idev->cnf.disable_ipv6)) {
9e508490 167 IP6_INC_STATS(dev_net(dev), idev,
3bd653c8 168 IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
169 kfree_skb(skb);
170 return 0;
171 }
172
9c6eb28a
JE
173 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
174 ip6_finish_output,
175 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
176}
177
1da177e4 178/*
b5d43998 179 * xmit an sk_buff (used by TCP, SCTP and DCCP)
1da177e4
LT
180 */
181
4c9483b2 182int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
b903d324 183 struct ipv6_txoptions *opt, int tclass)
1da177e4 184{
3bd653c8 185 struct net *net = sock_net(sk);
b30bd282 186 struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 187 struct in6_addr *first_hop = &fl6->daddr;
adf30907 188 struct dst_entry *dst = skb_dst(skb);
1da177e4 189 struct ipv6hdr *hdr;
4c9483b2 190 u8 proto = fl6->flowi6_proto;
1da177e4 191 int seg_len = skb->len;
e651f03a 192 int hlimit = -1;
1da177e4
LT
193 u32 mtu;
194
195 if (opt) {
c2636b4d 196 unsigned int head_room;
1da177e4
LT
197
198 /* First: exthdrs may take lots of space (~8K for now)
199 MAX_HEADER is not enough.
200 */
201 head_room = opt->opt_nflen + opt->opt_flen;
202 seg_len += head_room;
203 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
204
205 if (skb_headroom(skb) < head_room) {
206 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d 207 if (skb2 == NULL) {
adf30907 208 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
209 IPSTATS_MIB_OUTDISCARDS);
210 kfree_skb(skb);
1da177e4
LT
211 return -ENOBUFS;
212 }
a11d206d
YH
213 kfree_skb(skb);
214 skb = skb2;
83d7eb29 215 skb_set_owner_w(skb, sk);
1da177e4
LT
216 }
217 if (opt->opt_flen)
218 ipv6_push_frag_opts(skb, opt, &proto);
219 if (opt->opt_nflen)
220 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
221 }
222
e2d1bca7
ACM
223 skb_push(skb, sizeof(struct ipv6hdr));
224 skb_reset_network_header(skb);
0660e03f 225 hdr = ipv6_hdr(skb);
1da177e4
LT
226
227 /*
228 * Fill in the IPv6 header
229 */
b903d324 230 if (np)
1da177e4
LT
231 hlimit = np->hop_limit;
232 if (hlimit < 0)
6b75d090 233 hlimit = ip6_dst_hoplimit(dst);
1da177e4 234
4c9483b2 235 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
41a1f8ea 236
1da177e4
LT
237 hdr->payload_len = htons(seg_len);
238 hdr->nexthdr = proto;
239 hdr->hop_limit = hlimit;
240
4c9483b2 241 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
1da177e4
LT
242 ipv6_addr_copy(&hdr->daddr, first_hop);
243
a2c2064f 244 skb->priority = sk->sk_priority;
4a19ec58 245 skb->mark = sk->sk_mark;
a2c2064f 246
1da177e4 247 mtu = dst_mtu(dst);
283d07ac 248 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
adf30907 249 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 250 IPSTATS_MIB_OUT, skb->len);
b2e0b385
JE
251 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
252 dst->dev, dst_output);
1da177e4
LT
253 }
254
255 if (net_ratelimit())
256 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
257 skb->dev = dst->dev;
3ffe533c 258 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 259 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
260 kfree_skb(skb);
261 return -EMSGSIZE;
262}
263
7159039a
YH
264EXPORT_SYMBOL(ip6_xmit);
265
1da177e4
LT
266/*
267 * To avoid extra problems ND packets are send through this
268 * routine. It's code duplication but I really want to avoid
269 * extra checks since ipv6_build_header is used by TCP (which
270 * is for us performance critical)
271 */
272
273int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
9acd9f3a 274 const struct in6_addr *saddr, const struct in6_addr *daddr,
1da177e4
LT
275 int proto, int len)
276{
277 struct ipv6_pinfo *np = inet6_sk(sk);
278 struct ipv6hdr *hdr;
1da177e4
LT
279
280 skb->protocol = htons(ETH_P_IPV6);
281 skb->dev = dev;
282
55f79cc0
ACM
283 skb_reset_network_header(skb);
284 skb_put(skb, sizeof(struct ipv6hdr));
0660e03f 285 hdr = ipv6_hdr(skb);
1da177e4 286
ae08e1f0 287 *(__be32*)hdr = htonl(0x60000000);
1da177e4
LT
288
289 hdr->payload_len = htons(len);
290 hdr->nexthdr = proto;
291 hdr->hop_limit = np->hop_limit;
292
293 ipv6_addr_copy(&hdr->saddr, saddr);
294 ipv6_addr_copy(&hdr->daddr, daddr);
295
296 return 0;
297}
298
299static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
300{
301 struct ip6_ra_chain *ra;
302 struct sock *last = NULL;
303
304 read_lock(&ip6_ra_lock);
305 for (ra = ip6_ra_chain; ra; ra = ra->next) {
306 struct sock *sk = ra->sk;
0bd1b59b
AM
307 if (sk && ra->sel == sel &&
308 (!sk->sk_bound_dev_if ||
309 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
310 if (last) {
311 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
312 if (skb2)
313 rawv6_rcv(last, skb2);
314 }
315 last = sk;
316 }
317 }
318
319 if (last) {
320 rawv6_rcv(last, skb);
321 read_unlock(&ip6_ra_lock);
322 return 1;
323 }
324 read_unlock(&ip6_ra_lock);
325 return 0;
326}
327
e21e0b5f
VN
328static int ip6_forward_proxy_check(struct sk_buff *skb)
329{
0660e03f 330 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f
VN
331 u8 nexthdr = hdr->nexthdr;
332 int offset;
333
334 if (ipv6_ext_hdr(nexthdr)) {
335 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
336 if (offset < 0)
337 return 0;
338 } else
339 offset = sizeof(struct ipv6hdr);
340
341 if (nexthdr == IPPROTO_ICMPV6) {
342 struct icmp6hdr *icmp6;
343
d56f90a7
ACM
344 if (!pskb_may_pull(skb, (skb_network_header(skb) +
345 offset + 1 - skb->data)))
e21e0b5f
VN
346 return 0;
347
d56f90a7 348 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
349
350 switch (icmp6->icmp6_type) {
351 case NDISC_ROUTER_SOLICITATION:
352 case NDISC_ROUTER_ADVERTISEMENT:
353 case NDISC_NEIGHBOUR_SOLICITATION:
354 case NDISC_NEIGHBOUR_ADVERTISEMENT:
355 case NDISC_REDIRECT:
356 /* For reaction involving unicast neighbor discovery
357 * message destined to the proxied address, pass it to
358 * input function.
359 */
360 return 1;
361 default:
362 break;
363 }
364 }
365
74553b09
VN
366 /*
367 * The proxying router can't forward traffic sent to a link-local
368 * address, so signal the sender and discard the packet. This
369 * behavior is clarified by the MIPv6 specification.
370 */
371 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
372 dst_link_failure(skb);
373 return -1;
374 }
375
e21e0b5f
VN
376 return 0;
377}
378
1da177e4
LT
379static inline int ip6_forward_finish(struct sk_buff *skb)
380{
381 return dst_output(skb);
382}
383
384int ip6_forward(struct sk_buff *skb)
385{
adf30907 386 struct dst_entry *dst = skb_dst(skb);
0660e03f 387 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 388 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 389 struct net *net = dev_net(dst->dev);
69cce1d1 390 struct neighbour *n;
14f3ad6f 391 u32 mtu;
1ab1457c 392
53b7997f 393 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
394 goto error;
395
4497b076
BH
396 if (skb_warn_if_lro(skb))
397 goto drop;
398
1da177e4 399 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
3bd653c8 400 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
401 goto drop;
402 }
403
72b43d08
AK
404 if (skb->pkt_type != PACKET_HOST)
405 goto drop;
406
35fc92a9 407 skb_forward_csum(skb);
1da177e4
LT
408
409 /*
410 * We DO NOT make any processing on
411 * RA packets, pushing them to user level AS IS
412 * without ane WARRANTY that application will be able
413 * to interpret them. The reason is that we
414 * cannot make anything clever here.
415 *
416 * We are not end-node, so that if packet contains
417 * AH/ESP, we cannot make anything.
418 * Defragmentation also would be mistake, RA packets
419 * cannot be fragmented, because there is no warranty
420 * that different fragments will go along one path. --ANK
421 */
422 if (opt->ra) {
d56f90a7 423 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
424 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
425 return 0;
426 }
427
428 /*
429 * check and decrement ttl
430 */
431 if (hdr->hop_limit <= 1) {
432 /* Force OUTPUT device used as source address */
433 skb->dev = dst->dev;
3ffe533c 434 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
483a47d2
DL
435 IP6_INC_STATS_BH(net,
436 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
437
438 kfree_skb(skb);
439 return -ETIMEDOUT;
440 }
441
fbea49e1 442 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 443 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 444 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
445 int proxied = ip6_forward_proxy_check(skb);
446 if (proxied > 0)
e21e0b5f 447 return ip6_input(skb);
74553b09 448 else if (proxied < 0) {
3bd653c8
DL
449 IP6_INC_STATS(net, ip6_dst_idev(dst),
450 IPSTATS_MIB_INDISCARDS);
74553b09
VN
451 goto drop;
452 }
e21e0b5f
VN
453 }
454
1da177e4 455 if (!xfrm6_route_forward(skb)) {
3bd653c8 456 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
457 goto drop;
458 }
adf30907 459 dst = skb_dst(skb);
1da177e4
LT
460
461 /* IPv6 specs say nothing about it, but it is clear that we cannot
462 send redirects to source routed frames.
1e5dc146 463 We don't send redirects to frames decapsulated from IPsec.
1da177e4 464 */
69cce1d1
DM
465 n = dst_get_neighbour(dst);
466 if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4
LT
467 struct in6_addr *target = NULL;
468 struct rt6_info *rt;
1da177e4
LT
469
470 /*
471 * incoming and outgoing devices are the same
472 * send a redirect.
473 */
474
475 rt = (struct rt6_info *) dst;
476 if ((rt->rt6i_flags & RTF_GATEWAY))
477 target = (struct in6_addr*)&n->primary_key;
478 else
479 target = &hdr->daddr;
480
92d86829
DM
481 if (!rt->rt6i_peer)
482 rt6_bind_peer(rt, 1);
483
1da177e4
LT
484 /* Limit redirects both by destination (here)
485 and by source (inside ndisc_send_redirect)
486 */
92d86829 487 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
1da177e4 488 ndisc_send_redirect(skb, n, target);
5bb1ab09
DS
489 } else {
490 int addrtype = ipv6_addr_type(&hdr->saddr);
491
1da177e4 492 /* This check is security critical. */
f81b2e7d
YH
493 if (addrtype == IPV6_ADDR_ANY ||
494 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
495 goto error;
496 if (addrtype & IPV6_ADDR_LINKLOCAL) {
497 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 498 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
499 goto error;
500 }
1da177e4
LT
501 }
502
14f3ad6f
UW
503 mtu = dst_mtu(dst);
504 if (mtu < IPV6_MIN_MTU)
505 mtu = IPV6_MIN_MTU;
506
0aa68271 507 if (skb->len > mtu && !skb_is_gso(skb)) {
1da177e4
LT
508 /* Again, force OUTPUT device used as source address */
509 skb->dev = dst->dev;
14f3ad6f 510 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
483a47d2
DL
511 IP6_INC_STATS_BH(net,
512 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
513 IP6_INC_STATS_BH(net,
514 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
515 kfree_skb(skb);
516 return -EMSGSIZE;
517 }
518
519 if (skb_cow(skb, dst->dev->hard_header_len)) {
3bd653c8 520 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
521 goto drop;
522 }
523
0660e03f 524 hdr = ipv6_hdr(skb);
1da177e4
LT
525
526 /* Mangling hops number delayed to point after skb COW */
1ab1457c 527
1da177e4
LT
528 hdr->hop_limit--;
529
483a47d2 530 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
b2e0b385 531 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
6e23ae2a 532 ip6_forward_finish);
1da177e4
LT
533
534error:
483a47d2 535 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
536drop:
537 kfree_skb(skb);
538 return -EINVAL;
539}
540
541static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
542{
543 to->pkt_type = from->pkt_type;
544 to->priority = from->priority;
545 to->protocol = from->protocol;
adf30907
ED
546 skb_dst_drop(to);
547 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 548 to->dev = from->dev;
82e91ffe 549 to->mark = from->mark;
1da177e4
LT
550
551#ifdef CONFIG_NET_SCHED
552 to->tc_index = from->tc_index;
553#endif
e7ac05f3 554 nf_copy(to, from);
ba9dda3a
JK
555#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
556 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
557 to->nf_trace = from->nf_trace;
558#endif
984bc16c 559 skb_copy_secmark(to, from);
1da177e4
LT
560}
561
562int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
563{
564 u16 offset = sizeof(struct ipv6hdr);
0660e03f
ACM
565 struct ipv6_opt_hdr *exthdr =
566 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
27a884dc 567 unsigned int packet_len = skb->tail - skb->network_header;
1da177e4 568 int found_rhdr = 0;
0660e03f 569 *nexthdr = &ipv6_hdr(skb)->nexthdr;
1da177e4
LT
570
571 while (offset + 1 <= packet_len) {
572
573 switch (**nexthdr) {
574
575 case NEXTHDR_HOP:
27637df9 576 break;
1da177e4 577 case NEXTHDR_ROUTING:
27637df9
MN
578 found_rhdr = 1;
579 break;
1da177e4 580 case NEXTHDR_DEST:
59fbb3a6 581#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
27637df9
MN
582 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
583 break;
584#endif
585 if (found_rhdr)
586 return offset;
1da177e4
LT
587 break;
588 default :
589 return offset;
590 }
27637df9
MN
591
592 offset += ipv6_optlen(exthdr);
593 *nexthdr = &exthdr->nexthdr;
d56f90a7
ACM
594 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
595 offset);
1da177e4
LT
596 }
597
598 return offset;
599}
600
87c48fa3
ED
601void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
602{
603 static atomic_t ipv6_fragmentation_id;
604 int old, new;
605
606 if (rt) {
607 struct inet_peer *peer;
608
609 if (!rt->rt6i_peer)
610 rt6_bind_peer(rt, 1);
611 peer = rt->rt6i_peer;
612 if (peer) {
613 fhdr->identification = htonl(inet_getid(peer, 0));
614 return;
615 }
616 }
617 do {
618 old = atomic_read(&ipv6_fragmentation_id);
619 new = old + 1;
620 if (!new)
621 new = 1;
622 } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
623 fhdr->identification = htonl(new);
624}
625
ad0081e4 626int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
1da177e4 627{
1da177e4 628 struct sk_buff *frag;
adf30907 629 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
d91675f9 630 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
631 struct ipv6hdr *tmp_hdr;
632 struct frag_hdr *fh;
633 unsigned int mtu, hlen, left, len;
ae08e1f0 634 __be32 frag_id = 0;
1da177e4
LT
635 int ptr, offset = 0, err=0;
636 u8 *prevhdr, nexthdr = 0;
adf30907 637 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 638
1da177e4
LT
639 hlen = ip6_find_1stfragopt(skb, &prevhdr);
640 nexthdr = *prevhdr;
641
628a5c56 642 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
643
644 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 645 * or if the skb it not generated by a local socket.
b881ef76 646 */
f2228f78 647 if (!skb->local_df && skb->len > mtu) {
adf30907 648 skb->dev = skb_dst(skb)->dev;
3ffe533c 649 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 650 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 651 IPSTATS_MIB_FRAGFAILS);
b881ef76
JH
652 kfree_skb(skb);
653 return -EMSGSIZE;
654 }
655
d91675f9
YH
656 if (np && np->frag_size < mtu) {
657 if (np->frag_size)
658 mtu = np->frag_size;
659 }
660 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 661
21dc3301 662 if (skb_has_frag_list(skb)) {
1da177e4 663 int first_len = skb_pagelen(skb);
3d13008e 664 struct sk_buff *frag2;
1da177e4
LT
665
666 if (first_len - hlen > mtu ||
667 ((first_len - hlen) & 7) ||
668 skb_cloned(skb))
669 goto slow_path;
670
4d9092bb 671 skb_walk_frags(skb, frag) {
1da177e4
LT
672 /* Correct geometry. */
673 if (frag->len > mtu ||
674 ((frag->len & 7) && frag->next) ||
675 skb_headroom(frag) < hlen)
3d13008e 676 goto slow_path_clean;
1da177e4 677
1da177e4
LT
678 /* Partially cloned skb? */
679 if (skb_shared(frag))
3d13008e 680 goto slow_path_clean;
2fdba6b0
HX
681
682 BUG_ON(frag->sk);
683 if (skb->sk) {
2fdba6b0
HX
684 frag->sk = skb->sk;
685 frag->destructor = sock_wfree;
2fdba6b0 686 }
3d13008e 687 skb->truesize -= frag->truesize;
1da177e4
LT
688 }
689
690 err = 0;
691 offset = 0;
692 frag = skb_shinfo(skb)->frag_list;
4d9092bb 693 skb_frag_list_init(skb);
1da177e4
LT
694 /* BUILD HEADER */
695
9a217a1c 696 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 697 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 698 if (!tmp_hdr) {
adf30907 699 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 700 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
701 return -ENOMEM;
702 }
703
1da177e4
LT
704 __skb_pull(skb, hlen);
705 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
706 __skb_push(skb, hlen);
707 skb_reset_network_header(skb);
d56f90a7 708 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 709
87c48fa3 710 ipv6_select_ident(fh, rt);
1da177e4
LT
711 fh->nexthdr = nexthdr;
712 fh->reserved = 0;
713 fh->frag_off = htons(IP6_MF);
714 frag_id = fh->identification;
715
716 first_len = skb_pagelen(skb);
717 skb->data_len = first_len - skb_headlen(skb);
718 skb->len = first_len;
0660e03f
ACM
719 ipv6_hdr(skb)->payload_len = htons(first_len -
720 sizeof(struct ipv6hdr));
a11d206d 721
d8d1f30b 722 dst_hold(&rt->dst);
1da177e4
LT
723
724 for (;;) {
725 /* Prepare header of the next frame,
726 * before previous one went down. */
727 if (frag) {
728 frag->ip_summed = CHECKSUM_NONE;
badff6d0 729 skb_reset_transport_header(frag);
1da177e4 730 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
731 __skb_push(frag, hlen);
732 skb_reset_network_header(frag);
d56f90a7
ACM
733 memcpy(skb_network_header(frag), tmp_hdr,
734 hlen);
1da177e4
LT
735 offset += skb->len - hlen - sizeof(struct frag_hdr);
736 fh->nexthdr = nexthdr;
737 fh->reserved = 0;
738 fh->frag_off = htons(offset);
739 if (frag->next != NULL)
740 fh->frag_off |= htons(IP6_MF);
741 fh->identification = frag_id;
0660e03f
ACM
742 ipv6_hdr(frag)->payload_len =
743 htons(frag->len -
744 sizeof(struct ipv6hdr));
1da177e4
LT
745 ip6_copy_metadata(frag, skb);
746 }
1ab1457c 747
1da177e4 748 err = output(skb);
dafee490 749 if(!err)
d8d1f30b 750 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 751 IPSTATS_MIB_FRAGCREATES);
dafee490 752
1da177e4
LT
753 if (err || !frag)
754 break;
755
756 skb = frag;
757 frag = skb->next;
758 skb->next = NULL;
759 }
760
a51482bd 761 kfree(tmp_hdr);
1da177e4
LT
762
763 if (err == 0) {
d8d1f30b 764 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 765 IPSTATS_MIB_FRAGOKS);
d8d1f30b 766 dst_release(&rt->dst);
1da177e4
LT
767 return 0;
768 }
769
770 while (frag) {
771 skb = frag->next;
772 kfree_skb(frag);
773 frag = skb;
774 }
775
d8d1f30b 776 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 777 IPSTATS_MIB_FRAGFAILS);
d8d1f30b 778 dst_release(&rt->dst);
1da177e4 779 return err;
3d13008e
ED
780
781slow_path_clean:
782 skb_walk_frags(skb, frag2) {
783 if (frag2 == frag)
784 break;
785 frag2->sk = NULL;
786 frag2->destructor = NULL;
787 skb->truesize += frag2->truesize;
788 }
1da177e4
LT
789 }
790
791slow_path:
792 left = skb->len - hlen; /* Space per frame */
793 ptr = hlen; /* Where to start from */
794
795 /*
796 * Fragment the datagram.
797 */
798
799 *prevhdr = NEXTHDR_FRAGMENT;
800
801 /*
802 * Keep copying data until we run out.
803 */
804 while(left > 0) {
805 len = left;
806 /* IF: it doesn't fit, use 'mtu' - the data space left */
807 if (len > mtu)
808 len = mtu;
25985edc 809 /* IF: we are not sending up to and including the packet end
1da177e4
LT
810 then align the next start on an eight byte boundary */
811 if (len < left) {
812 len &= ~7;
813 }
814 /*
815 * Allocate buffer.
816 */
817
d8d1f30b 818 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
64ce2073 819 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
adf30907 820 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 821 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
822 err = -ENOMEM;
823 goto fail;
824 }
825
826 /*
827 * Set up data on packet
828 */
829
830 ip6_copy_metadata(frag, skb);
d8d1f30b 831 skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
1da177e4 832 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 833 skb_reset_network_header(frag);
badff6d0 834 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
835 frag->transport_header = (frag->network_header + hlen +
836 sizeof(struct frag_hdr));
1da177e4
LT
837
838 /*
839 * Charge the memory for the fragment to any owner
840 * it might possess
841 */
842 if (skb->sk)
843 skb_set_owner_w(frag, skb->sk);
844
845 /*
846 * Copy the packet header into the new buffer.
847 */
d626f62b 848 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
849
850 /*
851 * Build fragment header.
852 */
853 fh->nexthdr = nexthdr;
854 fh->reserved = 0;
f36d6ab1 855 if (!frag_id) {
87c48fa3 856 ipv6_select_ident(fh, rt);
1da177e4
LT
857 frag_id = fh->identification;
858 } else
859 fh->identification = frag_id;
860
861 /*
862 * Copy a block of the IP datagram.
863 */
8984e41d 864 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
865 BUG();
866 left -= len;
867
868 fh->frag_off = htons(offset);
869 if (left > 0)
870 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
871 ipv6_hdr(frag)->payload_len = htons(frag->len -
872 sizeof(struct ipv6hdr));
1da177e4
LT
873
874 ptr += len;
875 offset += len;
876
877 /*
878 * Put this fragment into the sending queue.
879 */
1da177e4
LT
880 err = output(frag);
881 if (err)
882 goto fail;
dafee490 883
adf30907 884 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 885 IPSTATS_MIB_FRAGCREATES);
1da177e4 886 }
adf30907 887 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 888 IPSTATS_MIB_FRAGOKS);
1da177e4 889 kfree_skb(skb);
1da177e4
LT
890 return err;
891
892fail:
adf30907 893 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 894 IPSTATS_MIB_FRAGFAILS);
1ab1457c 895 kfree_skb(skb);
1da177e4
LT
896 return err;
897}
898
b71d1d42
ED
899static inline int ip6_rt_check(const struct rt6key *rt_key,
900 const struct in6_addr *fl_addr,
901 const struct in6_addr *addr_cache)
cf6b1982 902{
a02cec21
ED
903 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
904 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
905}
906
497c615a
HX
907static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
908 struct dst_entry *dst,
b71d1d42 909 const struct flowi6 *fl6)
1da177e4 910{
497c615a
HX
911 struct ipv6_pinfo *np = inet6_sk(sk);
912 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 913
497c615a
HX
914 if (!dst)
915 goto out;
916
917 /* Yes, checking route validity in not connected
918 * case is not very simple. Take into account,
919 * that we do not support routing by source, TOS,
920 * and MSG_DONTROUTE --ANK (980726)
921 *
cf6b1982
YH
922 * 1. ip6_rt_check(): If route was host route,
923 * check that cached destination is current.
497c615a
HX
924 * If it is network route, we still may
925 * check its validity using saved pointer
926 * to the last used address: daddr_cache.
927 * We do not want to save whole address now,
928 * (because main consumer of this service
929 * is tcp, which has not this problem),
930 * so that the last trick works only on connected
931 * sockets.
932 * 2. oif also should be the same.
933 */
4c9483b2 934 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 935#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 936 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 937#endif
4c9483b2 938 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
497c615a
HX
939 dst_release(dst);
940 dst = NULL;
1da177e4
LT
941 }
942
497c615a
HX
943out:
944 return dst;
945}
946
947static int ip6_dst_lookup_tail(struct sock *sk,
4c9483b2 948 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 949{
3b1e0a65 950 struct net *net = sock_net(sk);
69cce1d1
DM
951#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
952 struct neighbour *n;
953#endif
954 int err;
497c615a 955
1da177e4 956 if (*dst == NULL)
4c9483b2 957 *dst = ip6_route_output(net, sk, fl6);
1da177e4
LT
958
959 if ((err = (*dst)->error))
960 goto out_err_release;
961
4c9483b2 962 if (ipv6_addr_any(&fl6->saddr)) {
c3968a85
DW
963 struct rt6_info *rt = (struct rt6_info *) *dst;
964 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
965 sk ? inet6_sk(sk)->srcprefs : 0,
966 &fl6->saddr);
44456d37 967 if (err)
1da177e4 968 goto out_err_release;
1da177e4
LT
969 }
970
95c385b4 971#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
972 /*
973 * Here if the dst entry we've looked up
974 * has a neighbour entry that is in the INCOMPLETE
975 * state and the src address from the flow is
976 * marked as OPTIMISTIC, we release the found
977 * dst entry and replace it instead with the
978 * dst entry of the nexthop router
979 */
f2c31e32 980 rcu_read_lock();
69cce1d1
DM
981 n = dst_get_neighbour(*dst);
982 if (n && !(n->nud_state & NUD_VALID)) {
e550dfb0 983 struct inet6_ifaddr *ifp;
4c9483b2 984 struct flowi6 fl_gw6;
e550dfb0
NH
985 int redirect;
986
f2c31e32 987 rcu_read_unlock();
4c9483b2 988 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
989 (*dst)->dev, 1);
990
991 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
992 if (ifp)
993 in6_ifa_put(ifp);
994
995 if (redirect) {
996 /*
997 * We need to get the dst entry for the
998 * default router instead
999 */
1000 dst_release(*dst);
4c9483b2
DM
1001 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1002 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1003 *dst = ip6_route_output(net, sk, &fl_gw6);
e550dfb0
NH
1004 if ((err = (*dst)->error))
1005 goto out_err_release;
95c385b4 1006 }
f2c31e32
ED
1007 } else {
1008 rcu_read_unlock();
e550dfb0 1009 }
95c385b4
NH
1010#endif
1011
1da177e4
LT
1012 return 0;
1013
1014out_err_release:
ca46f9c8 1015 if (err == -ENETUNREACH)
483a47d2 1016 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1017 dst_release(*dst);
1018 *dst = NULL;
1019 return err;
1020}
34a0b3cd 1021
497c615a
HX
1022/**
1023 * ip6_dst_lookup - perform route lookup on flow
1024 * @sk: socket which provides route info
1025 * @dst: pointer to dst_entry * for result
4c9483b2 1026 * @fl6: flow to lookup
497c615a
HX
1027 *
1028 * This function performs a route lookup on the given flow.
1029 *
1030 * It returns zero on success, or a standard errno code on error.
1031 */
4c9483b2 1032int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
497c615a
HX
1033{
1034 *dst = NULL;
4c9483b2 1035 return ip6_dst_lookup_tail(sk, dst, fl6);
497c615a 1036}
3cf3dc6c
ACM
1037EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1038
497c615a 1039/**
68d0c6d3
DM
1040 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1041 * @sk: socket which provides route info
4c9483b2 1042 * @fl6: flow to lookup
68d0c6d3 1043 * @final_dst: final destination address for ipsec lookup
a1414715 1044 * @can_sleep: we are in a sleepable context
68d0c6d3
DM
1045 *
1046 * This function performs a route lookup on the given flow.
1047 *
1048 * It returns a valid dst pointer on success, or a pointer encoded
1049 * error code.
1050 */
4c9483b2 1051struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
68d0c6d3 1052 const struct in6_addr *final_dst,
a1414715 1053 bool can_sleep)
68d0c6d3
DM
1054{
1055 struct dst_entry *dst = NULL;
1056 int err;
1057
4c9483b2 1058 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
1059 if (err)
1060 return ERR_PTR(err);
1061 if (final_dst)
4c9483b2 1062 ipv6_addr_copy(&fl6->daddr, final_dst);
2774c131 1063 if (can_sleep)
4c9483b2 1064 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
2774c131 1065
4c9483b2 1066 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1067}
1068EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1069
1070/**
1071 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1072 * @sk: socket which provides the dst cache and route info
4c9483b2 1073 * @fl6: flow to lookup
68d0c6d3 1074 * @final_dst: final destination address for ipsec lookup
a1414715 1075 * @can_sleep: we are in a sleepable context
497c615a
HX
1076 *
1077 * This function performs a route lookup on the given flow with the
1078 * possibility of using the cached route in the socket if it is valid.
1079 * It will take the socket dst lock when operating on the dst cache.
1080 * As a result, this function can only be used in process context.
1081 *
68d0c6d3
DM
1082 * It returns a valid dst pointer on success, or a pointer encoded
1083 * error code.
497c615a 1084 */
4c9483b2 1085struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
68d0c6d3 1086 const struct in6_addr *final_dst,
a1414715 1087 bool can_sleep)
497c615a 1088{
68d0c6d3
DM
1089 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1090 int err;
497c615a 1091
4c9483b2 1092 dst = ip6_sk_dst_check(sk, dst, fl6);
68d0c6d3 1093
4c9483b2 1094 err = ip6_dst_lookup_tail(sk, &dst, fl6);
68d0c6d3
DM
1095 if (err)
1096 return ERR_PTR(err);
1097 if (final_dst)
4c9483b2 1098 ipv6_addr_copy(&fl6->daddr, final_dst);
2774c131 1099 if (can_sleep)
4c9483b2 1100 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
2774c131 1101
4c9483b2 1102 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
497c615a 1103}
68d0c6d3 1104EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1105
34a0b3cd 1106static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1107 int getfrag(void *from, char *to, int offset, int len,
1108 int odd, struct sk_buff *skb),
1109 void *from, int length, int hh_len, int fragheaderlen,
87c48fa3
ED
1110 int transhdrlen, int mtu,unsigned int flags,
1111 struct rt6_info *rt)
e89e9cf5
AR
1112
1113{
1114 struct sk_buff *skb;
1115 int err;
1116
1117 /* There is support for UDP large send offload by network
1118 * device, so create one single skb packet containing complete
1119 * udp datagram
1120 */
1121 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1122 skb = sock_alloc_send_skb(sk,
1123 hh_len + fragheaderlen + transhdrlen + 20,
1124 (flags & MSG_DONTWAIT), &err);
1125 if (skb == NULL)
504744e4 1126 return err;
e89e9cf5
AR
1127
1128 /* reserve space for Hardware header */
1129 skb_reserve(skb, hh_len);
1130
1131 /* create space for UDP/IP header */
1132 skb_put(skb,fragheaderlen + transhdrlen);
1133
1134 /* initialize network header pointer */
c1d2bbe1 1135 skb_reset_network_header(skb);
e89e9cf5
AR
1136
1137 /* initialize protocol header pointer */
b0e380b1 1138 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1139
84fa7933 1140 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5 1141 skb->csum = 0;
e89e9cf5
AR
1142 }
1143
1144 err = skb_append_datato_frags(sk,skb, getfrag, from,
1145 (length - transhdrlen));
1146 if (!err) {
1147 struct frag_hdr fhdr;
1148
c31d5326
SS
1149 /* Specify the length of each IPv6 datagram fragment.
1150 * It has to be a multiple of 8.
1151 */
1152 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1153 sizeof(struct frag_hdr)) & ~7;
f83ef8c0 1154 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
87c48fa3 1155 ipv6_select_ident(&fhdr, rt);
e89e9cf5
AR
1156 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1157 __skb_queue_tail(&sk->sk_write_queue, skb);
1158
1159 return 0;
1160 }
1161 /* There is not enough support do UPD LSO,
1162 * so follow normal path
1163 */
1164 kfree_skb(skb);
1165
1166 return err;
1167}
1da177e4 1168
0178b695
HX
1169static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1170 gfp_t gfp)
1171{
1172 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1173}
1174
1175static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1176 gfp_t gfp)
1177{
1178 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1179}
1180
41a1f8ea
YH
1181int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1182 int offset, int len, int odd, struct sk_buff *skb),
1183 void *from, int length, int transhdrlen,
4c9483b2 1184 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
13b52cd4 1185 struct rt6_info *rt, unsigned int flags, int dontfrag)
1da177e4
LT
1186{
1187 struct inet_sock *inet = inet_sk(sk);
1188 struct ipv6_pinfo *np = inet6_sk(sk);
bdc712b4 1189 struct inet_cork *cork;
1da177e4
LT
1190 struct sk_buff *skb;
1191 unsigned int maxfraglen, fragheaderlen;
1192 int exthdrlen;
299b0767 1193 int dst_exthdrlen;
1da177e4
LT
1194 int hh_len;
1195 int mtu;
1196 int copy;
1197 int err;
1198 int offset = 0;
1199 int csummode = CHECKSUM_NONE;
a693e698 1200 __u8 tx_flags = 0;
1da177e4
LT
1201
1202 if (flags&MSG_PROBE)
1203 return 0;
bdc712b4 1204 cork = &inet->cork.base;
1da177e4
LT
1205 if (skb_queue_empty(&sk->sk_write_queue)) {
1206 /*
1207 * setup for corking
1208 */
1209 if (opt) {
0178b695 1210 if (WARN_ON(np->cork.opt))
1da177e4 1211 return -EINVAL;
0178b695
HX
1212
1213 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1214 if (unlikely(np->cork.opt == NULL))
1215 return -ENOBUFS;
1216
1217 np->cork.opt->tot_len = opt->tot_len;
1218 np->cork.opt->opt_flen = opt->opt_flen;
1219 np->cork.opt->opt_nflen = opt->opt_nflen;
1220
1221 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1222 sk->sk_allocation);
1223 if (opt->dst0opt && !np->cork.opt->dst0opt)
1224 return -ENOBUFS;
1225
1226 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1227 sk->sk_allocation);
1228 if (opt->dst1opt && !np->cork.opt->dst1opt)
1229 return -ENOBUFS;
1230
1231 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1232 sk->sk_allocation);
1233 if (opt->hopopt && !np->cork.opt->hopopt)
1234 return -ENOBUFS;
1235
1236 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1237 sk->sk_allocation);
1238 if (opt->srcrt && !np->cork.opt->srcrt)
1239 return -ENOBUFS;
1240
1da177e4
LT
1241 /* need source address above miyazawa*/
1242 }
d8d1f30b 1243 dst_hold(&rt->dst);
bdc712b4 1244 cork->dst = &rt->dst;
4c9483b2 1245 inet->cork.fl.u.ip6 = *fl6;
1da177e4 1246 np->cork.hop_limit = hlimit;
41a1f8ea 1247 np->cork.tclass = tclass;
628a5c56 1248 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
299b0767 1249 rt->dst.dev->mtu : dst_mtu(&rt->dst);
c7503609 1250 if (np->frag_size < mtu) {
d91675f9
YH
1251 if (np->frag_size)
1252 mtu = np->frag_size;
1253 }
bdc712b4 1254 cork->fragsize = mtu;
d8d1f30b 1255 if (dst_allfrag(rt->dst.path))
bdc712b4
DM
1256 cork->flags |= IPCORK_ALLFRAG;
1257 cork->length = 0;
1da177e4
LT
1258 sk->sk_sndmsg_page = NULL;
1259 sk->sk_sndmsg_off = 0;
299b0767 1260 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
1da177e4
LT
1261 length += exthdrlen;
1262 transhdrlen += exthdrlen;
299b0767 1263 dst_exthdrlen = rt->dst.header_len;
1da177e4 1264 } else {
bdc712b4 1265 rt = (struct rt6_info *)cork->dst;
4c9483b2 1266 fl6 = &inet->cork.fl.u.ip6;
0178b695 1267 opt = np->cork.opt;
1da177e4
LT
1268 transhdrlen = 0;
1269 exthdrlen = 0;
299b0767 1270 dst_exthdrlen = 0;
bdc712b4 1271 mtu = cork->fragsize;
1da177e4
LT
1272 }
1273
d8d1f30b 1274 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1275
a1b05140 1276 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1277 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1278 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1279
1280 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
bdc712b4 1281 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
4c9483b2 1282 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1da177e4
LT
1283 return -EMSGSIZE;
1284 }
1285 }
1286
a693e698
AB
1287 /* For UDP, check if TX timestamp is enabled */
1288 if (sk->sk_type == SOCK_DGRAM) {
1289 err = sock_tx_timestamp(sk, &tx_flags);
1290 if (err)
1291 goto error;
1292 }
1293
1da177e4
LT
1294 /*
1295 * Let's try using as much space as possible.
1296 * Use MTU if total length of the message fits into the MTU.
1297 * Otherwise, we need to reserve fragment header and
1298 * fragment alignment (= 8-15 octects, in total).
1299 *
1300 * Note that we may need to "move" the data from the tail of
1ab1457c 1301 * of the buffer to the new fragment when we split
1da177e4
LT
1302 * the message.
1303 *
1ab1457c 1304 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1305 * at once if non-fragmentable extension headers
1306 * are too large.
1ab1457c 1307 * --yoshfuji
1da177e4
LT
1308 */
1309
bdc712b4 1310 cork->length += length;
4b340ae2
BH
1311 if (length > mtu) {
1312 int proto = sk->sk_protocol;
1313 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
4c9483b2 1314 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
4b340ae2
BH
1315 return -EMSGSIZE;
1316 }
e89e9cf5 1317
4b340ae2 1318 if (proto == IPPROTO_UDP &&
d8d1f30b 1319 (rt->dst.dev->features & NETIF_F_UFO)) {
4b340ae2
BH
1320
1321 err = ip6_ufo_append_data(sk, getfrag, from, length,
1322 hh_len, fragheaderlen,
87c48fa3 1323 transhdrlen, mtu, flags, rt);
4b340ae2
BH
1324 if (err)
1325 goto error;
1326 return 0;
1327 }
e89e9cf5 1328 }
1da177e4
LT
1329
1330 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1331 goto alloc_new_skb;
1332
1333 while (length > 0) {
1334 /* Check if the remaining data fits into current packet. */
bdc712b4 1335 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1336 if (copy < length)
1337 copy = maxfraglen - skb->len;
1338
1339 if (copy <= 0) {
1340 char *data;
1341 unsigned int datalen;
1342 unsigned int fraglen;
1343 unsigned int fraggap;
1344 unsigned int alloclen;
1345 struct sk_buff *skb_prev;
1346alloc_new_skb:
1347 skb_prev = skb;
1348
1349 /* There's no room in the current skb */
1350 if (skb_prev)
1351 fraggap = skb_prev->len - maxfraglen;
1352 else
1353 fraggap = 0;
1354
1355 /*
1356 * If remaining data exceeds the mtu,
1357 * we know we need more fragment(s).
1358 */
1359 datalen = length + fraggap;
bdc712b4 1360 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1da177e4
LT
1361 datalen = maxfraglen - fragheaderlen;
1362
1363 fraglen = datalen + fragheaderlen;
1364 if ((flags & MSG_MORE) &&
d8d1f30b 1365 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1366 alloclen = mtu;
1367 else
1368 alloclen = datalen + fragheaderlen;
1369
299b0767
SK
1370 alloclen += dst_exthdrlen;
1371
1da177e4
LT
1372 /*
1373 * The last fragment gets additional space at tail.
1374 * Note: we overallocate on fragments with MSG_MODE
1375 * because we have no idea if we're the last one.
1376 */
1377 if (datalen == length + fraggap)
d8d1f30b 1378 alloclen += rt->dst.trailer_len;
1da177e4
LT
1379
1380 /*
1381 * We just reserve space for fragment header.
1ab1457c 1382 * Note: this may be overallocation if the message
1da177e4
LT
1383 * (without MSG_MORE) fits into the MTU.
1384 */
1385 alloclen += sizeof(struct frag_hdr);
1386
1387 if (transhdrlen) {
1388 skb = sock_alloc_send_skb(sk,
1389 alloclen + hh_len,
1390 (flags & MSG_DONTWAIT), &err);
1391 } else {
1392 skb = NULL;
1393 if (atomic_read(&sk->sk_wmem_alloc) <=
1394 2 * sk->sk_sndbuf)
1395 skb = sock_wmalloc(sk,
1396 alloclen + hh_len, 1,
1397 sk->sk_allocation);
1398 if (unlikely(skb == NULL))
1399 err = -ENOBUFS;
a693e698
AB
1400 else {
1401 /* Only the initial fragment
1402 * is time stamped.
1403 */
1404 tx_flags = 0;
1405 }
1da177e4
LT
1406 }
1407 if (skb == NULL)
1408 goto error;
1409 /*
1410 * Fill in the control structures
1411 */
1412 skb->ip_summed = csummode;
1413 skb->csum = 0;
1414 /* reserve for fragmentation */
1415 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1416
a693e698
AB
1417 if (sk->sk_type == SOCK_DGRAM)
1418 skb_shinfo(skb)->tx_flags = tx_flags;
1419
1da177e4
LT
1420 /*
1421 * Find where to start putting bytes
1422 */
299b0767
SK
1423 data = skb_put(skb, fraglen + dst_exthdrlen);
1424 skb_set_network_header(skb, exthdrlen + dst_exthdrlen);
1425 data += fragheaderlen + dst_exthdrlen;
b0e380b1
ACM
1426 skb->transport_header = (skb->network_header +
1427 fragheaderlen);
1da177e4
LT
1428 if (fraggap) {
1429 skb->csum = skb_copy_and_csum_bits(
1430 skb_prev, maxfraglen,
1431 data + transhdrlen, fraggap, 0);
1432 skb_prev->csum = csum_sub(skb_prev->csum,
1433 skb->csum);
1434 data += fraggap;
e9fa4f7b 1435 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1436 }
1437 copy = datalen - transhdrlen - fraggap;
299b0767 1438
1da177e4
LT
1439 if (copy < 0) {
1440 err = -EINVAL;
1441 kfree_skb(skb);
1442 goto error;
1443 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1444 err = -EFAULT;
1445 kfree_skb(skb);
1446 goto error;
1447 }
1448
1449 offset += copy;
1450 length -= datalen - fraggap;
1451 transhdrlen = 0;
1452 exthdrlen = 0;
299b0767 1453 dst_exthdrlen = 0;
1da177e4
LT
1454 csummode = CHECKSUM_NONE;
1455
1456 /*
1457 * Put the packet on the pending queue
1458 */
1459 __skb_queue_tail(&sk->sk_write_queue, skb);
1460 continue;
1461 }
1462
1463 if (copy > length)
1464 copy = length;
1465
d8d1f30b 1466 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1467 unsigned int off;
1468
1469 off = skb->len;
1470 if (getfrag(from, skb_put(skb, copy),
1471 offset, copy, off, skb) < 0) {
1472 __skb_trim(skb, off);
1473 err = -EFAULT;
1474 goto error;
1475 }
1476 } else {
1477 int i = skb_shinfo(skb)->nr_frags;
1478 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1479 struct page *page = sk->sk_sndmsg_page;
1480 int off = sk->sk_sndmsg_off;
1481 unsigned int left;
1482
1483 if (page && (left = PAGE_SIZE - off) > 0) {
1484 if (copy >= left)
1485 copy = left;
408dadf0 1486 if (page != skb_frag_page(frag)) {
1da177e4
LT
1487 if (i == MAX_SKB_FRAGS) {
1488 err = -EMSGSIZE;
1489 goto error;
1490 }
1da177e4 1491 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
408dadf0 1492 skb_frag_ref(skb, i);
1da177e4
LT
1493 frag = &skb_shinfo(skb)->frags[i];
1494 }
1495 } else if(i < MAX_SKB_FRAGS) {
1496 if (copy > PAGE_SIZE)
1497 copy = PAGE_SIZE;
1498 page = alloc_pages(sk->sk_allocation, 0);
1499 if (page == NULL) {
1500 err = -ENOMEM;
1501 goto error;
1502 }
1503 sk->sk_sndmsg_page = page;
1504 sk->sk_sndmsg_off = 0;
1505
1506 skb_fill_page_desc(skb, i, page, 0, 0);
1507 frag = &skb_shinfo(skb)->frags[i];
1da177e4
LT
1508 } else {
1509 err = -EMSGSIZE;
1510 goto error;
1511 }
9e903e08
ED
1512 if (getfrag(from,
1513 skb_frag_address(frag) + skb_frag_size(frag),
408dadf0 1514 offset, copy, skb->len, skb) < 0) {
1da177e4
LT
1515 err = -EFAULT;
1516 goto error;
1517 }
1518 sk->sk_sndmsg_off += copy;
9e903e08 1519 skb_frag_size_add(frag, copy);
1da177e4
LT
1520 skb->len += copy;
1521 skb->data_len += copy;
f945fa7a
HX
1522 skb->truesize += copy;
1523 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1524 }
1525 offset += copy;
1526 length -= copy;
1527 }
1528 return 0;
1529error:
bdc712b4 1530 cork->length -= length;
3bd653c8 1531 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1532 return err;
1533}
1534
bf138862
PE
1535static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1536{
0178b695
HX
1537 if (np->cork.opt) {
1538 kfree(np->cork.opt->dst0opt);
1539 kfree(np->cork.opt->dst1opt);
1540 kfree(np->cork.opt->hopopt);
1541 kfree(np->cork.opt->srcrt);
1542 kfree(np->cork.opt);
1543 np->cork.opt = NULL;
1544 }
1545
bdc712b4
DM
1546 if (inet->cork.base.dst) {
1547 dst_release(inet->cork.base.dst);
1548 inet->cork.base.dst = NULL;
1549 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
bf138862
PE
1550 }
1551 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1552}
1553
1da177e4
LT
1554int ip6_push_pending_frames(struct sock *sk)
1555{
1556 struct sk_buff *skb, *tmp_skb;
1557 struct sk_buff **tail_skb;
1558 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1559 struct inet_sock *inet = inet_sk(sk);
1560 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1561 struct net *net = sock_net(sk);
1da177e4
LT
1562 struct ipv6hdr *hdr;
1563 struct ipv6_txoptions *opt = np->cork.opt;
bdc712b4 1564 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
4c9483b2
DM
1565 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1566 unsigned char proto = fl6->flowi6_proto;
1da177e4
LT
1567 int err = 0;
1568
1569 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1570 goto out;
1571 tail_skb = &(skb_shinfo(skb)->frag_list);
1572
1573 /* move skb->data to ip header from ext header */
d56f90a7 1574 if (skb->data < skb_network_header(skb))
bbe735e4 1575 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1576 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1577 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1578 *tail_skb = tmp_skb;
1579 tail_skb = &(tmp_skb->next);
1580 skb->len += tmp_skb->len;
1581 skb->data_len += tmp_skb->len;
1da177e4 1582 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1583 tmp_skb->destructor = NULL;
1584 tmp_skb->sk = NULL;
1da177e4
LT
1585 }
1586
28a89453 1587 /* Allow local fragmentation. */
b5c15fc0 1588 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1589 skb->local_df = 1;
1590
4c9483b2 1591 ipv6_addr_copy(final_dst, &fl6->daddr);
cfe1fc77 1592 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1593 if (opt && opt->opt_flen)
1594 ipv6_push_frag_opts(skb, opt, &proto);
1595 if (opt && opt->opt_nflen)
1596 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1597
e2d1bca7
ACM
1598 skb_push(skb, sizeof(struct ipv6hdr));
1599 skb_reset_network_header(skb);
0660e03f 1600 hdr = ipv6_hdr(skb);
1ab1457c 1601
4c9483b2 1602 *(__be32*)hdr = fl6->flowlabel |
41a1f8ea 1603 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4 1604
1da177e4
LT
1605 hdr->hop_limit = np->cork.hop_limit;
1606 hdr->nexthdr = proto;
4c9483b2 1607 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
1da177e4
LT
1608 ipv6_addr_copy(&hdr->daddr, final_dst);
1609
a2c2064f 1610 skb->priority = sk->sk_priority;
4a19ec58 1611 skb->mark = sk->sk_mark;
a2c2064f 1612
d8d1f30b 1613 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1614 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1615 if (proto == IPPROTO_ICMPV6) {
adf30907 1616 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1617
5a57d4c7 1618 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
e41b5368 1619 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1620 }
1621
ef76bc23 1622 err = ip6_local_out(skb);
1da177e4
LT
1623 if (err) {
1624 if (err > 0)
6ce9e7b5 1625 err = net_xmit_errno(err);
1da177e4
LT
1626 if (err)
1627 goto error;
1628 }
1629
1630out:
bf138862 1631 ip6_cork_release(inet, np);
1da177e4
LT
1632 return err;
1633error:
06254914 1634 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1635 goto out;
1636}
1637
1638void ip6_flush_pending_frames(struct sock *sk)
1639{
1da177e4
LT
1640 struct sk_buff *skb;
1641
1642 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
adf30907
ED
1643 if (skb_dst(skb))
1644 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1645 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1646 kfree_skb(skb);
1647 }
1648
bf138862 1649 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1650}
This page took 1.001546 seconds and 4 git commands to generate.