]> Git Repo - linux.git/blame - net/ipv6/ip6_output.c
iwlwifi: don't include iwl-dev.h from iwl-devtrace.h
[linux.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <[email protected]>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
1da177e4
LT
40
41#include <linux/netfilter.h>
42#include <linux/netfilter_ipv6.h>
43
44#include <net/sock.h>
45#include <net/snmp.h>
46
47#include <net/ipv6.h>
48#include <net/ndisc.h>
49#include <net/protocol.h>
50#include <net/ip6_route.h>
51#include <net/addrconf.h>
52#include <net/rawv6.h>
53#include <net/icmp.h>
54#include <net/xfrm.h>
55#include <net/checksum.h>
7bc570c8 56#include <linux/mroute6.h>
1da177e4
LT
57
58static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
ef76bc23
HX
60int __ip6_local_out(struct sk_buff *skb)
61{
62 int len;
63
64 len = skb->len - sizeof(struct ipv6hdr);
65 if (len > IPV6_MAXPLEN)
66 len = 0;
67 ipv6_hdr(skb)->payload_len = htons(len);
68
adf30907 69 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
ef76bc23
HX
70 dst_output);
71}
72
73int ip6_local_out(struct sk_buff *skb)
74{
75 int err;
76
77 err = __ip6_local_out(skb);
78 if (likely(err == 1))
79 err = dst_output(skb);
80
81 return err;
82}
83EXPORT_SYMBOL_GPL(ip6_local_out);
84
ad643a79 85static int ip6_output_finish(struct sk_buff *skb)
1da177e4 86{
adf30907 87 struct dst_entry *dst = skb_dst(skb);
1da177e4 88
3644f0ce
SH
89 if (dst->hh)
90 return neigh_hh_output(dst->hh, skb);
91 else if (dst->neighbour)
1da177e4
LT
92 return dst->neighbour->output(skb);
93
483a47d2
DL
94 IP6_INC_STATS_BH(dev_net(dst->dev),
95 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
96 kfree_skb(skb);
97 return -EINVAL;
98
99}
100
101/* dev_loopback_xmit for use with netfilter. */
102static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
103{
459a98ed 104 skb_reset_mac_header(newskb);
bbe735e4 105 __skb_pull(newskb, skb_network_offset(newskb));
1da177e4
LT
106 newskb->pkt_type = PACKET_LOOPBACK;
107 newskb->ip_summed = CHECKSUM_UNNECESSARY;
adf30907 108 WARN_ON(!skb_dst(newskb));
1da177e4
LT
109
110 netif_rx(newskb);
111 return 0;
112}
113
114
115static int ip6_output2(struct sk_buff *skb)
116{
adf30907 117 struct dst_entry *dst = skb_dst(skb);
1da177e4
LT
118 struct net_device *dev = dst->dev;
119
120 skb->protocol = htons(ETH_P_IPV6);
121 skb->dev = dev;
122
0660e03f 123 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 124 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 125
7ad6848c 126 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
bd91b8bf
BT
127 ((mroute6_socket(dev_net(dev)) &&
128 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
129 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
130 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
131 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
132
133 /* Do not check for IFF_ALLMULTI; multicast routing
134 is not supported in any case.
135 */
136 if (newskb)
6e23ae2a
PM
137 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
138 NULL, newskb->dev,
1da177e4
LT
139 ip6_dev_loopback_xmit);
140
0660e03f 141 if (ipv6_hdr(skb)->hop_limit == 0) {
3bd653c8
DL
142 IP6_INC_STATS(dev_net(dev), idev,
143 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
144 kfree_skb(skb);
145 return 0;
146 }
147 }
148
edf391ff
NH
149 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
150 skb->len);
1da177e4
LT
151 }
152
6e23ae2a
PM
153 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
154 ip6_output_finish);
1da177e4
LT
155}
156
628a5c56
JH
157static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
158{
159 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
160
161 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
adf30907 162 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
628a5c56
JH
163}
164
1da177e4
LT
165int ip6_output(struct sk_buff *skb)
166{
adf30907 167 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
778d80be 168 if (unlikely(idev->cnf.disable_ipv6)) {
adf30907 169 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
3bd653c8 170 IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
171 kfree_skb(skb);
172 return 0;
173 }
174
628a5c56 175 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
adf30907 176 dst_allfrag(skb_dst(skb)))
1da177e4
LT
177 return ip6_fragment(skb, ip6_output2);
178 else
179 return ip6_output2(skb);
180}
181
1da177e4
LT
182/*
183 * xmit an sk_buff (used by TCP)
184 */
185
186int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
187 struct ipv6_txoptions *opt, int ipfragok)
188{
3bd653c8 189 struct net *net = sock_net(sk);
b30bd282 190 struct ipv6_pinfo *np = inet6_sk(sk);
1da177e4 191 struct in6_addr *first_hop = &fl->fl6_dst;
adf30907 192 struct dst_entry *dst = skb_dst(skb);
1da177e4
LT
193 struct ipv6hdr *hdr;
194 u8 proto = fl->proto;
195 int seg_len = skb->len;
e651f03a
GR
196 int hlimit = -1;
197 int tclass = 0;
1da177e4
LT
198 u32 mtu;
199
200 if (opt) {
c2636b4d 201 unsigned int head_room;
1da177e4
LT
202
203 /* First: exthdrs may take lots of space (~8K for now)
204 MAX_HEADER is not enough.
205 */
206 head_room = opt->opt_nflen + opt->opt_flen;
207 seg_len += head_room;
208 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
209
210 if (skb_headroom(skb) < head_room) {
211 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d 212 if (skb2 == NULL) {
adf30907 213 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
214 IPSTATS_MIB_OUTDISCARDS);
215 kfree_skb(skb);
1da177e4
LT
216 return -ENOBUFS;
217 }
a11d206d
YH
218 kfree_skb(skb);
219 skb = skb2;
1da177e4
LT
220 if (sk)
221 skb_set_owner_w(skb, sk);
222 }
223 if (opt->opt_flen)
224 ipv6_push_frag_opts(skb, opt, &proto);
225 if (opt->opt_nflen)
226 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
227 }
228
e2d1bca7
ACM
229 skb_push(skb, sizeof(struct ipv6hdr));
230 skb_reset_network_header(skb);
0660e03f 231 hdr = ipv6_hdr(skb);
1da177e4 232
77e2f14f
WY
233 /* Allow local fragmentation. */
234 if (ipfragok)
235 skb->local_df = 1;
236
1da177e4
LT
237 /*
238 * Fill in the IPv6 header
239 */
e651f03a
GR
240 if (np) {
241 tclass = np->tclass;
1da177e4 242 hlimit = np->hop_limit;
e651f03a 243 }
1da177e4 244 if (hlimit < 0)
6b75d090 245 hlimit = ip6_dst_hoplimit(dst);
1da177e4 246
90bcaf7b 247 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
41a1f8ea 248
1da177e4
LT
249 hdr->payload_len = htons(seg_len);
250 hdr->nexthdr = proto;
251 hdr->hop_limit = hlimit;
252
253 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
254 ipv6_addr_copy(&hdr->daddr, first_hop);
255
a2c2064f 256 skb->priority = sk->sk_priority;
4a19ec58 257 skb->mark = sk->sk_mark;
a2c2064f 258
1da177e4 259 mtu = dst_mtu(dst);
283d07ac 260 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
adf30907 261 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 262 IPSTATS_MIB_OUT, skb->len);
6e23ae2a 263 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
6869c4d8 264 dst_output);
1da177e4
LT
265 }
266
267 if (net_ratelimit())
268 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
269 skb->dev = dst->dev;
3ffe533c 270 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 271 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
272 kfree_skb(skb);
273 return -EMSGSIZE;
274}
275
7159039a
YH
276EXPORT_SYMBOL(ip6_xmit);
277
1da177e4
LT
278/*
279 * To avoid extra problems ND packets are send through this
280 * routine. It's code duplication but I really want to avoid
281 * extra checks since ipv6_build_header is used by TCP (which
282 * is for us performance critical)
283 */
284
285int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
9acd9f3a 286 const struct in6_addr *saddr, const struct in6_addr *daddr,
1da177e4
LT
287 int proto, int len)
288{
289 struct ipv6_pinfo *np = inet6_sk(sk);
290 struct ipv6hdr *hdr;
291 int totlen;
292
293 skb->protocol = htons(ETH_P_IPV6);
294 skb->dev = dev;
295
296 totlen = len + sizeof(struct ipv6hdr);
297
55f79cc0
ACM
298 skb_reset_network_header(skb);
299 skb_put(skb, sizeof(struct ipv6hdr));
0660e03f 300 hdr = ipv6_hdr(skb);
1da177e4 301
ae08e1f0 302 *(__be32*)hdr = htonl(0x60000000);
1da177e4
LT
303
304 hdr->payload_len = htons(len);
305 hdr->nexthdr = proto;
306 hdr->hop_limit = np->hop_limit;
307
308 ipv6_addr_copy(&hdr->saddr, saddr);
309 ipv6_addr_copy(&hdr->daddr, daddr);
310
311 return 0;
312}
313
314static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
315{
316 struct ip6_ra_chain *ra;
317 struct sock *last = NULL;
318
319 read_lock(&ip6_ra_lock);
320 for (ra = ip6_ra_chain; ra; ra = ra->next) {
321 struct sock *sk = ra->sk;
0bd1b59b
AM
322 if (sk && ra->sel == sel &&
323 (!sk->sk_bound_dev_if ||
324 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
325 if (last) {
326 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
327 if (skb2)
328 rawv6_rcv(last, skb2);
329 }
330 last = sk;
331 }
332 }
333
334 if (last) {
335 rawv6_rcv(last, skb);
336 read_unlock(&ip6_ra_lock);
337 return 1;
338 }
339 read_unlock(&ip6_ra_lock);
340 return 0;
341}
342
e21e0b5f
VN
343static int ip6_forward_proxy_check(struct sk_buff *skb)
344{
0660e03f 345 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f
VN
346 u8 nexthdr = hdr->nexthdr;
347 int offset;
348
349 if (ipv6_ext_hdr(nexthdr)) {
350 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
351 if (offset < 0)
352 return 0;
353 } else
354 offset = sizeof(struct ipv6hdr);
355
356 if (nexthdr == IPPROTO_ICMPV6) {
357 struct icmp6hdr *icmp6;
358
d56f90a7
ACM
359 if (!pskb_may_pull(skb, (skb_network_header(skb) +
360 offset + 1 - skb->data)))
e21e0b5f
VN
361 return 0;
362
d56f90a7 363 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
364
365 switch (icmp6->icmp6_type) {
366 case NDISC_ROUTER_SOLICITATION:
367 case NDISC_ROUTER_ADVERTISEMENT:
368 case NDISC_NEIGHBOUR_SOLICITATION:
369 case NDISC_NEIGHBOUR_ADVERTISEMENT:
370 case NDISC_REDIRECT:
371 /* For reaction involving unicast neighbor discovery
372 * message destined to the proxied address, pass it to
373 * input function.
374 */
375 return 1;
376 default:
377 break;
378 }
379 }
380
74553b09
VN
381 /*
382 * The proxying router can't forward traffic sent to a link-local
383 * address, so signal the sender and discard the packet. This
384 * behavior is clarified by the MIPv6 specification.
385 */
386 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
387 dst_link_failure(skb);
388 return -1;
389 }
390
e21e0b5f
VN
391 return 0;
392}
393
1da177e4
LT
394static inline int ip6_forward_finish(struct sk_buff *skb)
395{
396 return dst_output(skb);
397}
398
399int ip6_forward(struct sk_buff *skb)
400{
adf30907 401 struct dst_entry *dst = skb_dst(skb);
0660e03f 402 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 403 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 404 struct net *net = dev_net(dst->dev);
14f3ad6f 405 u32 mtu;
1ab1457c 406
53b7997f 407 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
408 goto error;
409
4497b076
BH
410 if (skb_warn_if_lro(skb))
411 goto drop;
412
1da177e4 413 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
3bd653c8 414 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
415 goto drop;
416 }
417
35fc92a9 418 skb_forward_csum(skb);
1da177e4
LT
419
420 /*
421 * We DO NOT make any processing on
422 * RA packets, pushing them to user level AS IS
423 * without ane WARRANTY that application will be able
424 * to interpret them. The reason is that we
425 * cannot make anything clever here.
426 *
427 * We are not end-node, so that if packet contains
428 * AH/ESP, we cannot make anything.
429 * Defragmentation also would be mistake, RA packets
430 * cannot be fragmented, because there is no warranty
431 * that different fragments will go along one path. --ANK
432 */
433 if (opt->ra) {
d56f90a7 434 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
435 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
436 return 0;
437 }
438
439 /*
440 * check and decrement ttl
441 */
442 if (hdr->hop_limit <= 1) {
443 /* Force OUTPUT device used as source address */
444 skb->dev = dst->dev;
3ffe533c 445 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
483a47d2
DL
446 IP6_INC_STATS_BH(net,
447 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
448
449 kfree_skb(skb);
450 return -ETIMEDOUT;
451 }
452
fbea49e1 453 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 454 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 455 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
456 int proxied = ip6_forward_proxy_check(skb);
457 if (proxied > 0)
e21e0b5f 458 return ip6_input(skb);
74553b09 459 else if (proxied < 0) {
3bd653c8
DL
460 IP6_INC_STATS(net, ip6_dst_idev(dst),
461 IPSTATS_MIB_INDISCARDS);
74553b09
VN
462 goto drop;
463 }
e21e0b5f
VN
464 }
465
1da177e4 466 if (!xfrm6_route_forward(skb)) {
3bd653c8 467 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
468 goto drop;
469 }
adf30907 470 dst = skb_dst(skb);
1da177e4
LT
471
472 /* IPv6 specs say nothing about it, but it is clear that we cannot
473 send redirects to source routed frames.
1e5dc146 474 We don't send redirects to frames decapsulated from IPsec.
1da177e4 475 */
1e5dc146 476 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
def8b4fa 477 !skb_sec_path(skb)) {
1da177e4
LT
478 struct in6_addr *target = NULL;
479 struct rt6_info *rt;
480 struct neighbour *n = dst->neighbour;
481
482 /*
483 * incoming and outgoing devices are the same
484 * send a redirect.
485 */
486
487 rt = (struct rt6_info *) dst;
488 if ((rt->rt6i_flags & RTF_GATEWAY))
489 target = (struct in6_addr*)&n->primary_key;
490 else
491 target = &hdr->daddr;
492
493 /* Limit redirects both by destination (here)
494 and by source (inside ndisc_send_redirect)
495 */
496 if (xrlim_allow(dst, 1*HZ))
497 ndisc_send_redirect(skb, n, target);
5bb1ab09
DS
498 } else {
499 int addrtype = ipv6_addr_type(&hdr->saddr);
500
1da177e4 501 /* This check is security critical. */
f81b2e7d
YH
502 if (addrtype == IPV6_ADDR_ANY ||
503 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
504 goto error;
505 if (addrtype & IPV6_ADDR_LINKLOCAL) {
506 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 507 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
508 goto error;
509 }
1da177e4
LT
510 }
511
14f3ad6f
UW
512 mtu = dst_mtu(dst);
513 if (mtu < IPV6_MIN_MTU)
514 mtu = IPV6_MIN_MTU;
515
516 if (skb->len > mtu) {
1da177e4
LT
517 /* Again, force OUTPUT device used as source address */
518 skb->dev = dst->dev;
14f3ad6f 519 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
483a47d2
DL
520 IP6_INC_STATS_BH(net,
521 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
522 IP6_INC_STATS_BH(net,
523 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
524 kfree_skb(skb);
525 return -EMSGSIZE;
526 }
527
528 if (skb_cow(skb, dst->dev->hard_header_len)) {
3bd653c8 529 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
530 goto drop;
531 }
532
0660e03f 533 hdr = ipv6_hdr(skb);
1da177e4
LT
534
535 /* Mangling hops number delayed to point after skb COW */
1ab1457c 536
1da177e4
LT
537 hdr->hop_limit--;
538
483a47d2 539 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
6e23ae2a
PM
540 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
541 ip6_forward_finish);
1da177e4
LT
542
543error:
483a47d2 544 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
545drop:
546 kfree_skb(skb);
547 return -EINVAL;
548}
549
550static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
551{
552 to->pkt_type = from->pkt_type;
553 to->priority = from->priority;
554 to->protocol = from->protocol;
adf30907
ED
555 skb_dst_drop(to);
556 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 557 to->dev = from->dev;
82e91ffe 558 to->mark = from->mark;
1da177e4
LT
559
560#ifdef CONFIG_NET_SCHED
561 to->tc_index = from->tc_index;
562#endif
e7ac05f3 563 nf_copy(to, from);
ba9dda3a
JK
564#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
565 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
566 to->nf_trace = from->nf_trace;
567#endif
984bc16c 568 skb_copy_secmark(to, from);
1da177e4
LT
569}
570
571int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
572{
573 u16 offset = sizeof(struct ipv6hdr);
0660e03f
ACM
574 struct ipv6_opt_hdr *exthdr =
575 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
27a884dc 576 unsigned int packet_len = skb->tail - skb->network_header;
1da177e4 577 int found_rhdr = 0;
0660e03f 578 *nexthdr = &ipv6_hdr(skb)->nexthdr;
1da177e4
LT
579
580 while (offset + 1 <= packet_len) {
581
582 switch (**nexthdr) {
583
584 case NEXTHDR_HOP:
27637df9 585 break;
1da177e4 586 case NEXTHDR_ROUTING:
27637df9
MN
587 found_rhdr = 1;
588 break;
1da177e4 589 case NEXTHDR_DEST:
59fbb3a6 590#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
27637df9
MN
591 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
592 break;
593#endif
594 if (found_rhdr)
595 return offset;
1da177e4
LT
596 break;
597 default :
598 return offset;
599 }
27637df9
MN
600
601 offset += ipv6_optlen(exthdr);
602 *nexthdr = &exthdr->nexthdr;
d56f90a7
ACM
603 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
604 offset);
1da177e4
LT
605 }
606
607 return offset;
608}
609
610static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
611{
1da177e4 612 struct sk_buff *frag;
adf30907 613 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
d91675f9 614 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
615 struct ipv6hdr *tmp_hdr;
616 struct frag_hdr *fh;
617 unsigned int mtu, hlen, left, len;
ae08e1f0 618 __be32 frag_id = 0;
1da177e4
LT
619 int ptr, offset = 0, err=0;
620 u8 *prevhdr, nexthdr = 0;
adf30907 621 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 622
1da177e4
LT
623 hlen = ip6_find_1stfragopt(skb, &prevhdr);
624 nexthdr = *prevhdr;
625
628a5c56 626 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
627
628 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 629 * or if the skb it not generated by a local socket.
b881ef76 630 */
b5c15fc0 631 if (!skb->local_df) {
adf30907 632 skb->dev = skb_dst(skb)->dev;
3ffe533c 633 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 634 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 635 IPSTATS_MIB_FRAGFAILS);
b881ef76
JH
636 kfree_skb(skb);
637 return -EMSGSIZE;
638 }
639
d91675f9
YH
640 if (np && np->frag_size < mtu) {
641 if (np->frag_size)
642 mtu = np->frag_size;
643 }
644 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 645
4d9092bb 646 if (skb_has_frags(skb)) {
1da177e4 647 int first_len = skb_pagelen(skb);
29ffe1a5 648 int truesizes = 0;
1da177e4
LT
649
650 if (first_len - hlen > mtu ||
651 ((first_len - hlen) & 7) ||
652 skb_cloned(skb))
653 goto slow_path;
654
4d9092bb 655 skb_walk_frags(skb, frag) {
1da177e4
LT
656 /* Correct geometry. */
657 if (frag->len > mtu ||
658 ((frag->len & 7) && frag->next) ||
659 skb_headroom(frag) < hlen)
660 goto slow_path;
661
1da177e4
LT
662 /* Partially cloned skb? */
663 if (skb_shared(frag))
664 goto slow_path;
2fdba6b0
HX
665
666 BUG_ON(frag->sk);
667 if (skb->sk) {
2fdba6b0
HX
668 frag->sk = skb->sk;
669 frag->destructor = sock_wfree;
29ffe1a5 670 truesizes += frag->truesize;
2fdba6b0 671 }
1da177e4
LT
672 }
673
674 err = 0;
675 offset = 0;
676 frag = skb_shinfo(skb)->frag_list;
4d9092bb 677 skb_frag_list_init(skb);
1da177e4
LT
678 /* BUILD HEADER */
679
9a217a1c 680 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 681 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 682 if (!tmp_hdr) {
adf30907 683 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 684 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
685 return -ENOMEM;
686 }
687
1da177e4
LT
688 __skb_pull(skb, hlen);
689 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
690 __skb_push(skb, hlen);
691 skb_reset_network_header(skb);
d56f90a7 692 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 693
7ea2f2c5 694 ipv6_select_ident(fh);
1da177e4
LT
695 fh->nexthdr = nexthdr;
696 fh->reserved = 0;
697 fh->frag_off = htons(IP6_MF);
698 frag_id = fh->identification;
699
700 first_len = skb_pagelen(skb);
701 skb->data_len = first_len - skb_headlen(skb);
29ffe1a5 702 skb->truesize -= truesizes;
1da177e4 703 skb->len = first_len;
0660e03f
ACM
704 ipv6_hdr(skb)->payload_len = htons(first_len -
705 sizeof(struct ipv6hdr));
a11d206d
YH
706
707 dst_hold(&rt->u.dst);
1da177e4
LT
708
709 for (;;) {
710 /* Prepare header of the next frame,
711 * before previous one went down. */
712 if (frag) {
713 frag->ip_summed = CHECKSUM_NONE;
badff6d0 714 skb_reset_transport_header(frag);
1da177e4 715 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
716 __skb_push(frag, hlen);
717 skb_reset_network_header(frag);
d56f90a7
ACM
718 memcpy(skb_network_header(frag), tmp_hdr,
719 hlen);
1da177e4
LT
720 offset += skb->len - hlen - sizeof(struct frag_hdr);
721 fh->nexthdr = nexthdr;
722 fh->reserved = 0;
723 fh->frag_off = htons(offset);
724 if (frag->next != NULL)
725 fh->frag_off |= htons(IP6_MF);
726 fh->identification = frag_id;
0660e03f
ACM
727 ipv6_hdr(frag)->payload_len =
728 htons(frag->len -
729 sizeof(struct ipv6hdr));
1da177e4
LT
730 ip6_copy_metadata(frag, skb);
731 }
1ab1457c 732
1da177e4 733 err = output(skb);
dafee490 734 if(!err)
3bd653c8
DL
735 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
736 IPSTATS_MIB_FRAGCREATES);
dafee490 737
1da177e4
LT
738 if (err || !frag)
739 break;
740
741 skb = frag;
742 frag = skb->next;
743 skb->next = NULL;
744 }
745
a51482bd 746 kfree(tmp_hdr);
1da177e4
LT
747
748 if (err == 0) {
3bd653c8
DL
749 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
750 IPSTATS_MIB_FRAGOKS);
a11d206d 751 dst_release(&rt->u.dst);
1da177e4
LT
752 return 0;
753 }
754
755 while (frag) {
756 skb = frag->next;
757 kfree_skb(frag);
758 frag = skb;
759 }
760
3bd653c8
DL
761 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
762 IPSTATS_MIB_FRAGFAILS);
a11d206d 763 dst_release(&rt->u.dst);
1da177e4
LT
764 return err;
765 }
766
767slow_path:
768 left = skb->len - hlen; /* Space per frame */
769 ptr = hlen; /* Where to start from */
770
771 /*
772 * Fragment the datagram.
773 */
774
775 *prevhdr = NEXTHDR_FRAGMENT;
776
777 /*
778 * Keep copying data until we run out.
779 */
780 while(left > 0) {
781 len = left;
782 /* IF: it doesn't fit, use 'mtu' - the data space left */
783 if (len > mtu)
784 len = mtu;
785 /* IF: we are not sending upto and including the packet end
786 then align the next start on an eight byte boundary */
787 if (len < left) {
788 len &= ~7;
789 }
790 /*
791 * Allocate buffer.
792 */
793
f5184d26 794 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
64ce2073 795 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
adf30907 796 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 797 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
798 err = -ENOMEM;
799 goto fail;
800 }
801
802 /*
803 * Set up data on packet
804 */
805
806 ip6_copy_metadata(frag, skb);
807 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
808 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 809 skb_reset_network_header(frag);
badff6d0 810 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
811 frag->transport_header = (frag->network_header + hlen +
812 sizeof(struct frag_hdr));
1da177e4
LT
813
814 /*
815 * Charge the memory for the fragment to any owner
816 * it might possess
817 */
818 if (skb->sk)
819 skb_set_owner_w(frag, skb->sk);
820
821 /*
822 * Copy the packet header into the new buffer.
823 */
d626f62b 824 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
825
826 /*
827 * Build fragment header.
828 */
829 fh->nexthdr = nexthdr;
830 fh->reserved = 0;
f36d6ab1 831 if (!frag_id) {
7ea2f2c5 832 ipv6_select_ident(fh);
1da177e4
LT
833 frag_id = fh->identification;
834 } else
835 fh->identification = frag_id;
836
837 /*
838 * Copy a block of the IP datagram.
839 */
8984e41d 840 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
841 BUG();
842 left -= len;
843
844 fh->frag_off = htons(offset);
845 if (left > 0)
846 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
847 ipv6_hdr(frag)->payload_len = htons(frag->len -
848 sizeof(struct ipv6hdr));
1da177e4
LT
849
850 ptr += len;
851 offset += len;
852
853 /*
854 * Put this fragment into the sending queue.
855 */
1da177e4
LT
856 err = output(frag);
857 if (err)
858 goto fail;
dafee490 859
adf30907 860 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 861 IPSTATS_MIB_FRAGCREATES);
1da177e4 862 }
adf30907 863 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 864 IPSTATS_MIB_FRAGOKS);
1da177e4 865 kfree_skb(skb);
1da177e4
LT
866 return err;
867
868fail:
adf30907 869 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 870 IPSTATS_MIB_FRAGFAILS);
1ab1457c 871 kfree_skb(skb);
1da177e4
LT
872 return err;
873}
874
cf6b1982
YH
875static inline int ip6_rt_check(struct rt6key *rt_key,
876 struct in6_addr *fl_addr,
877 struct in6_addr *addr_cache)
878{
879 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
880 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
881}
882
497c615a
HX
883static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
884 struct dst_entry *dst,
885 struct flowi *fl)
1da177e4 886{
497c615a
HX
887 struct ipv6_pinfo *np = inet6_sk(sk);
888 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 889
497c615a
HX
890 if (!dst)
891 goto out;
892
893 /* Yes, checking route validity in not connected
894 * case is not very simple. Take into account,
895 * that we do not support routing by source, TOS,
896 * and MSG_DONTROUTE --ANK (980726)
897 *
cf6b1982
YH
898 * 1. ip6_rt_check(): If route was host route,
899 * check that cached destination is current.
497c615a
HX
900 * If it is network route, we still may
901 * check its validity using saved pointer
902 * to the last used address: daddr_cache.
903 * We do not want to save whole address now,
904 * (because main consumer of this service
905 * is tcp, which has not this problem),
906 * so that the last trick works only on connected
907 * sockets.
908 * 2. oif also should be the same.
909 */
cf6b1982 910 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
8e1ef0a9
YH
911#ifdef CONFIG_IPV6_SUBTREES
912 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
913#endif
cf6b1982 914 (fl->oif && fl->oif != dst->dev->ifindex)) {
497c615a
HX
915 dst_release(dst);
916 dst = NULL;
1da177e4
LT
917 }
918
497c615a
HX
919out:
920 return dst;
921}
922
923static int ip6_dst_lookup_tail(struct sock *sk,
924 struct dst_entry **dst, struct flowi *fl)
925{
926 int err;
3b1e0a65 927 struct net *net = sock_net(sk);
497c615a 928
1da177e4 929 if (*dst == NULL)
8a3edd80 930 *dst = ip6_route_output(net, sk, fl);
1da177e4
LT
931
932 if ((err = (*dst)->error))
933 goto out_err_release;
934
935 if (ipv6_addr_any(&fl->fl6_src)) {
191cd582 936 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
7cbca67c
YH
937 &fl->fl6_dst,
938 sk ? inet6_sk(sk)->srcprefs : 0,
939 &fl->fl6_src);
44456d37 940 if (err)
1da177e4 941 goto out_err_release;
1da177e4
LT
942 }
943
95c385b4 944#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
945 /*
946 * Here if the dst entry we've looked up
947 * has a neighbour entry that is in the INCOMPLETE
948 * state and the src address from the flow is
949 * marked as OPTIMISTIC, we release the found
950 * dst entry and replace it instead with the
951 * dst entry of the nexthop router
952 */
953 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
954 struct inet6_ifaddr *ifp;
955 struct flowi fl_gw;
956 int redirect;
957
958 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
959 (*dst)->dev, 1);
960
961 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
962 if (ifp)
963 in6_ifa_put(ifp);
964
965 if (redirect) {
966 /*
967 * We need to get the dst entry for the
968 * default router instead
969 */
970 dst_release(*dst);
971 memcpy(&fl_gw, fl, sizeof(struct flowi));
972 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
973 *dst = ip6_route_output(net, sk, &fl_gw);
974 if ((err = (*dst)->error))
975 goto out_err_release;
95c385b4 976 }
e550dfb0 977 }
95c385b4
NH
978#endif
979
1da177e4
LT
980 return 0;
981
982out_err_release:
ca46f9c8 983 if (err == -ENETUNREACH)
483a47d2 984 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
985 dst_release(*dst);
986 *dst = NULL;
987 return err;
988}
34a0b3cd 989
497c615a
HX
990/**
991 * ip6_dst_lookup - perform route lookup on flow
992 * @sk: socket which provides route info
993 * @dst: pointer to dst_entry * for result
994 * @fl: flow to lookup
995 *
996 * This function performs a route lookup on the given flow.
997 *
998 * It returns zero on success, or a standard errno code on error.
999 */
1000int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1001{
1002 *dst = NULL;
1003 return ip6_dst_lookup_tail(sk, dst, fl);
1004}
3cf3dc6c
ACM
1005EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1006
497c615a
HX
1007/**
1008 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1009 * @sk: socket which provides the dst cache and route info
1010 * @dst: pointer to dst_entry * for result
1011 * @fl: flow to lookup
1012 *
1013 * This function performs a route lookup on the given flow with the
1014 * possibility of using the cached route in the socket if it is valid.
1015 * It will take the socket dst lock when operating on the dst cache.
1016 * As a result, this function can only be used in process context.
1017 *
1018 * It returns zero on success, or a standard errno code on error.
1019 */
1020int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1021{
1022 *dst = NULL;
1023 if (sk) {
1024 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1025 *dst = ip6_sk_dst_check(sk, *dst, fl);
1026 }
1027
1028 return ip6_dst_lookup_tail(sk, dst, fl);
1029}
1030EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1031
34a0b3cd 1032static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1033 int getfrag(void *from, char *to, int offset, int len,
1034 int odd, struct sk_buff *skb),
1035 void *from, int length, int hh_len, int fragheaderlen,
1036 int transhdrlen, int mtu,unsigned int flags)
1037
1038{
1039 struct sk_buff *skb;
1040 int err;
1041
1042 /* There is support for UDP large send offload by network
1043 * device, so create one single skb packet containing complete
1044 * udp datagram
1045 */
1046 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1047 skb = sock_alloc_send_skb(sk,
1048 hh_len + fragheaderlen + transhdrlen + 20,
1049 (flags & MSG_DONTWAIT), &err);
1050 if (skb == NULL)
1051 return -ENOMEM;
1052
1053 /* reserve space for Hardware header */
1054 skb_reserve(skb, hh_len);
1055
1056 /* create space for UDP/IP header */
1057 skb_put(skb,fragheaderlen + transhdrlen);
1058
1059 /* initialize network header pointer */
c1d2bbe1 1060 skb_reset_network_header(skb);
e89e9cf5
AR
1061
1062 /* initialize protocol header pointer */
b0e380b1 1063 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1064
84fa7933 1065 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5
AR
1066 skb->csum = 0;
1067 sk->sk_sndmsg_off = 0;
1068 }
1069
1070 err = skb_append_datato_frags(sk,skb, getfrag, from,
1071 (length - transhdrlen));
1072 if (!err) {
1073 struct frag_hdr fhdr;
1074
c31d5326
SS
1075 /* Specify the length of each IPv6 datagram fragment.
1076 * It has to be a multiple of 8.
1077 */
1078 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1079 sizeof(struct frag_hdr)) & ~7;
f83ef8c0 1080 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
7ea2f2c5 1081 ipv6_select_ident(&fhdr);
e89e9cf5
AR
1082 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1083 __skb_queue_tail(&sk->sk_write_queue, skb);
1084
1085 return 0;
1086 }
1087 /* There is not enough support do UPD LSO,
1088 * so follow normal path
1089 */
1090 kfree_skb(skb);
1091
1092 return err;
1093}
1da177e4 1094
0178b695
HX
1095static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1096 gfp_t gfp)
1097{
1098 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1099}
1100
1101static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1102 gfp_t gfp)
1103{
1104 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1105}
1106
41a1f8ea
YH
1107int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1108 int offset, int len, int odd, struct sk_buff *skb),
1109 void *from, int length, int transhdrlen,
1110 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1111 struct rt6_info *rt, unsigned int flags)
1da177e4
LT
1112{
1113 struct inet_sock *inet = inet_sk(sk);
1114 struct ipv6_pinfo *np = inet6_sk(sk);
1115 struct sk_buff *skb;
1116 unsigned int maxfraglen, fragheaderlen;
1117 int exthdrlen;
1118 int hh_len;
1119 int mtu;
1120 int copy;
1121 int err;
1122 int offset = 0;
1123 int csummode = CHECKSUM_NONE;
1124
1125 if (flags&MSG_PROBE)
1126 return 0;
1127 if (skb_queue_empty(&sk->sk_write_queue)) {
1128 /*
1129 * setup for corking
1130 */
1131 if (opt) {
0178b695 1132 if (WARN_ON(np->cork.opt))
1da177e4 1133 return -EINVAL;
0178b695
HX
1134
1135 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1136 if (unlikely(np->cork.opt == NULL))
1137 return -ENOBUFS;
1138
1139 np->cork.opt->tot_len = opt->tot_len;
1140 np->cork.opt->opt_flen = opt->opt_flen;
1141 np->cork.opt->opt_nflen = opt->opt_nflen;
1142
1143 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1144 sk->sk_allocation);
1145 if (opt->dst0opt && !np->cork.opt->dst0opt)
1146 return -ENOBUFS;
1147
1148 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1149 sk->sk_allocation);
1150 if (opt->dst1opt && !np->cork.opt->dst1opt)
1151 return -ENOBUFS;
1152
1153 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1154 sk->sk_allocation);
1155 if (opt->hopopt && !np->cork.opt->hopopt)
1156 return -ENOBUFS;
1157
1158 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1159 sk->sk_allocation);
1160 if (opt->srcrt && !np->cork.opt->srcrt)
1161 return -ENOBUFS;
1162
1da177e4
LT
1163 /* need source address above miyazawa*/
1164 }
1165 dst_hold(&rt->u.dst);
c8cdaf99 1166 inet->cork.dst = &rt->u.dst;
1da177e4
LT
1167 inet->cork.fl = *fl;
1168 np->cork.hop_limit = hlimit;
41a1f8ea 1169 np->cork.tclass = tclass;
628a5c56
JH
1170 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1171 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
c7503609 1172 if (np->frag_size < mtu) {
d91675f9
YH
1173 if (np->frag_size)
1174 mtu = np->frag_size;
1175 }
1176 inet->cork.fragsize = mtu;
1da177e4
LT
1177 if (dst_allfrag(rt->u.dst.path))
1178 inet->cork.flags |= IPCORK_ALLFRAG;
1179 inet->cork.length = 0;
1180 sk->sk_sndmsg_page = NULL;
1181 sk->sk_sndmsg_off = 0;
01488942 1182 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
a1b05140 1183 rt->rt6i_nfheader_len;
1da177e4
LT
1184 length += exthdrlen;
1185 transhdrlen += exthdrlen;
1186 } else {
c8cdaf99 1187 rt = (struct rt6_info *)inet->cork.dst;
1da177e4 1188 fl = &inet->cork.fl;
0178b695 1189 opt = np->cork.opt;
1da177e4
LT
1190 transhdrlen = 0;
1191 exthdrlen = 0;
1192 mtu = inet->cork.fragsize;
1193 }
1194
1195 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1196
a1b05140 1197 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1198 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1199 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1200
1201 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1202 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1203 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1204 return -EMSGSIZE;
1205 }
1206 }
1207
1208 /*
1209 * Let's try using as much space as possible.
1210 * Use MTU if total length of the message fits into the MTU.
1211 * Otherwise, we need to reserve fragment header and
1212 * fragment alignment (= 8-15 octects, in total).
1213 *
1214 * Note that we may need to "move" the data from the tail of
1ab1457c 1215 * of the buffer to the new fragment when we split
1da177e4
LT
1216 * the message.
1217 *
1ab1457c 1218 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1219 * at once if non-fragmentable extension headers
1220 * are too large.
1ab1457c 1221 * --yoshfuji
1da177e4
LT
1222 */
1223
1224 inet->cork.length += length;
e89e9cf5
AR
1225 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1226 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1227
baa829d8
PM
1228 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1229 fragheaderlen, transhdrlen, mtu,
1230 flags);
1231 if (err)
e89e9cf5 1232 goto error;
e89e9cf5
AR
1233 return 0;
1234 }
1da177e4
LT
1235
1236 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1237 goto alloc_new_skb;
1238
1239 while (length > 0) {
1240 /* Check if the remaining data fits into current packet. */
1241 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1242 if (copy < length)
1243 copy = maxfraglen - skb->len;
1244
1245 if (copy <= 0) {
1246 char *data;
1247 unsigned int datalen;
1248 unsigned int fraglen;
1249 unsigned int fraggap;
1250 unsigned int alloclen;
1251 struct sk_buff *skb_prev;
1252alloc_new_skb:
1253 skb_prev = skb;
1254
1255 /* There's no room in the current skb */
1256 if (skb_prev)
1257 fraggap = skb_prev->len - maxfraglen;
1258 else
1259 fraggap = 0;
1260
1261 /*
1262 * If remaining data exceeds the mtu,
1263 * we know we need more fragment(s).
1264 */
1265 datalen = length + fraggap;
1266 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1267 datalen = maxfraglen - fragheaderlen;
1268
1269 fraglen = datalen + fragheaderlen;
1270 if ((flags & MSG_MORE) &&
1271 !(rt->u.dst.dev->features&NETIF_F_SG))
1272 alloclen = mtu;
1273 else
1274 alloclen = datalen + fragheaderlen;
1275
1276 /*
1277 * The last fragment gets additional space at tail.
1278 * Note: we overallocate on fragments with MSG_MODE
1279 * because we have no idea if we're the last one.
1280 */
1281 if (datalen == length + fraggap)
1282 alloclen += rt->u.dst.trailer_len;
1283
1284 /*
1285 * We just reserve space for fragment header.
1ab1457c 1286 * Note: this may be overallocation if the message
1da177e4
LT
1287 * (without MSG_MORE) fits into the MTU.
1288 */
1289 alloclen += sizeof(struct frag_hdr);
1290
1291 if (transhdrlen) {
1292 skb = sock_alloc_send_skb(sk,
1293 alloclen + hh_len,
1294 (flags & MSG_DONTWAIT), &err);
1295 } else {
1296 skb = NULL;
1297 if (atomic_read(&sk->sk_wmem_alloc) <=
1298 2 * sk->sk_sndbuf)
1299 skb = sock_wmalloc(sk,
1300 alloclen + hh_len, 1,
1301 sk->sk_allocation);
1302 if (unlikely(skb == NULL))
1303 err = -ENOBUFS;
1304 }
1305 if (skb == NULL)
1306 goto error;
1307 /*
1308 * Fill in the control structures
1309 */
1310 skb->ip_summed = csummode;
1311 skb->csum = 0;
1312 /* reserve for fragmentation */
1313 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1314
1315 /*
1316 * Find where to start putting bytes
1317 */
1318 data = skb_put(skb, fraglen);
c14d2450 1319 skb_set_network_header(skb, exthdrlen);
1da177e4 1320 data += fragheaderlen;
b0e380b1
ACM
1321 skb->transport_header = (skb->network_header +
1322 fragheaderlen);
1da177e4
LT
1323 if (fraggap) {
1324 skb->csum = skb_copy_and_csum_bits(
1325 skb_prev, maxfraglen,
1326 data + transhdrlen, fraggap, 0);
1327 skb_prev->csum = csum_sub(skb_prev->csum,
1328 skb->csum);
1329 data += fraggap;
e9fa4f7b 1330 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1331 }
1332 copy = datalen - transhdrlen - fraggap;
1333 if (copy < 0) {
1334 err = -EINVAL;
1335 kfree_skb(skb);
1336 goto error;
1337 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1338 err = -EFAULT;
1339 kfree_skb(skb);
1340 goto error;
1341 }
1342
1343 offset += copy;
1344 length -= datalen - fraggap;
1345 transhdrlen = 0;
1346 exthdrlen = 0;
1347 csummode = CHECKSUM_NONE;
1348
1349 /*
1350 * Put the packet on the pending queue
1351 */
1352 __skb_queue_tail(&sk->sk_write_queue, skb);
1353 continue;
1354 }
1355
1356 if (copy > length)
1357 copy = length;
1358
1359 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1360 unsigned int off;
1361
1362 off = skb->len;
1363 if (getfrag(from, skb_put(skb, copy),
1364 offset, copy, off, skb) < 0) {
1365 __skb_trim(skb, off);
1366 err = -EFAULT;
1367 goto error;
1368 }
1369 } else {
1370 int i = skb_shinfo(skb)->nr_frags;
1371 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1372 struct page *page = sk->sk_sndmsg_page;
1373 int off = sk->sk_sndmsg_off;
1374 unsigned int left;
1375
1376 if (page && (left = PAGE_SIZE - off) > 0) {
1377 if (copy >= left)
1378 copy = left;
1379 if (page != frag->page) {
1380 if (i == MAX_SKB_FRAGS) {
1381 err = -EMSGSIZE;
1382 goto error;
1383 }
1384 get_page(page);
1385 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1386 frag = &skb_shinfo(skb)->frags[i];
1387 }
1388 } else if(i < MAX_SKB_FRAGS) {
1389 if (copy > PAGE_SIZE)
1390 copy = PAGE_SIZE;
1391 page = alloc_pages(sk->sk_allocation, 0);
1392 if (page == NULL) {
1393 err = -ENOMEM;
1394 goto error;
1395 }
1396 sk->sk_sndmsg_page = page;
1397 sk->sk_sndmsg_off = 0;
1398
1399 skb_fill_page_desc(skb, i, page, 0, 0);
1400 frag = &skb_shinfo(skb)->frags[i];
1da177e4
LT
1401 } else {
1402 err = -EMSGSIZE;
1403 goto error;
1404 }
1405 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1406 err = -EFAULT;
1407 goto error;
1408 }
1409 sk->sk_sndmsg_off += copy;
1410 frag->size += copy;
1411 skb->len += copy;
1412 skb->data_len += copy;
f945fa7a
HX
1413 skb->truesize += copy;
1414 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1415 }
1416 offset += copy;
1417 length -= copy;
1418 }
1419 return 0;
1420error:
1421 inet->cork.length -= length;
3bd653c8 1422 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1423 return err;
1424}
1425
bf138862
PE
1426static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1427{
0178b695
HX
1428 if (np->cork.opt) {
1429 kfree(np->cork.opt->dst0opt);
1430 kfree(np->cork.opt->dst1opt);
1431 kfree(np->cork.opt->hopopt);
1432 kfree(np->cork.opt->srcrt);
1433 kfree(np->cork.opt);
1434 np->cork.opt = NULL;
1435 }
1436
c8cdaf99
YH
1437 if (inet->cork.dst) {
1438 dst_release(inet->cork.dst);
1439 inet->cork.dst = NULL;
bf138862
PE
1440 inet->cork.flags &= ~IPCORK_ALLFRAG;
1441 }
1442 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1443}
1444
1da177e4
LT
1445int ip6_push_pending_frames(struct sock *sk)
1446{
1447 struct sk_buff *skb, *tmp_skb;
1448 struct sk_buff **tail_skb;
1449 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1450 struct inet_sock *inet = inet_sk(sk);
1451 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1452 struct net *net = sock_net(sk);
1da177e4
LT
1453 struct ipv6hdr *hdr;
1454 struct ipv6_txoptions *opt = np->cork.opt;
c8cdaf99 1455 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1da177e4
LT
1456 struct flowi *fl = &inet->cork.fl;
1457 unsigned char proto = fl->proto;
1458 int err = 0;
1459
1460 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1461 goto out;
1462 tail_skb = &(skb_shinfo(skb)->frag_list);
1463
1464 /* move skb->data to ip header from ext header */
d56f90a7 1465 if (skb->data < skb_network_header(skb))
bbe735e4 1466 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1467 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1468 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1469 *tail_skb = tmp_skb;
1470 tail_skb = &(tmp_skb->next);
1471 skb->len += tmp_skb->len;
1472 skb->data_len += tmp_skb->len;
1da177e4 1473 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1474 tmp_skb->destructor = NULL;
1475 tmp_skb->sk = NULL;
1da177e4
LT
1476 }
1477
28a89453 1478 /* Allow local fragmentation. */
b5c15fc0 1479 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1480 skb->local_df = 1;
1481
1da177e4 1482 ipv6_addr_copy(final_dst, &fl->fl6_dst);
cfe1fc77 1483 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1484 if (opt && opt->opt_flen)
1485 ipv6_push_frag_opts(skb, opt, &proto);
1486 if (opt && opt->opt_nflen)
1487 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1488
e2d1bca7
ACM
1489 skb_push(skb, sizeof(struct ipv6hdr));
1490 skb_reset_network_header(skb);
0660e03f 1491 hdr = ipv6_hdr(skb);
1ab1457c 1492
90bcaf7b 1493 *(__be32*)hdr = fl->fl6_flowlabel |
41a1f8ea 1494 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4 1495
1da177e4
LT
1496 hdr->hop_limit = np->cork.hop_limit;
1497 hdr->nexthdr = proto;
1498 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1499 ipv6_addr_copy(&hdr->daddr, final_dst);
1500
a2c2064f 1501 skb->priority = sk->sk_priority;
4a19ec58 1502 skb->mark = sk->sk_mark;
a2c2064f 1503
adf30907 1504 skb_dst_set(skb, dst_clone(&rt->u.dst));
edf391ff 1505 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1506 if (proto == IPPROTO_ICMPV6) {
adf30907 1507 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1508
5a57d4c7 1509 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
e41b5368 1510 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1511 }
1512
ef76bc23 1513 err = ip6_local_out(skb);
1da177e4
LT
1514 if (err) {
1515 if (err > 0)
6ce9e7b5 1516 err = net_xmit_errno(err);
1da177e4
LT
1517 if (err)
1518 goto error;
1519 }
1520
1521out:
bf138862 1522 ip6_cork_release(inet, np);
1da177e4
LT
1523 return err;
1524error:
06254914 1525 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1526 goto out;
1527}
1528
1529void ip6_flush_pending_frames(struct sock *sk)
1530{
1da177e4
LT
1531 struct sk_buff *skb;
1532
1533 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
adf30907
ED
1534 if (skb_dst(skb))
1535 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1536 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1537 kfree_skb(skb);
1538 }
1539
bf138862 1540 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1541}
This page took 0.85172 seconds and 4 git commands to generate.