]> Git Repo - linux.git/blame - net/ipv6/ip6_output.c
nvmem: disallow modular CONFIG_NVMEM
[linux.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <[email protected]>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
67ba4152 23 * Imran Patel : frag id should be in NBO
1da177e4
LT
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4 41
33b48679 42#include <linux/bpf-cgroup.h>
1da177e4
LT
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45
46#include <net/sock.h>
47#include <net/snmp.h>
48
49#include <net/ipv6.h>
50#include <net/ndisc.h>
51#include <net/protocol.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/rawv6.h>
55#include <net/icmp.h>
56#include <net/xfrm.h>
57#include <net/checksum.h>
7bc570c8 58#include <linux/mroute6.h>
ca254490 59#include <net/l3mdev.h>
14972cbd 60#include <net/lwtunnel.h>
1da177e4 61
7d8c6e39 62static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 63{
adf30907 64 struct dst_entry *dst = skb_dst(skb);
1da177e4 65 struct net_device *dev = dst->dev;
f6b72b62 66 struct neighbour *neigh;
6fd6ce20
YH
67 struct in6_addr *nexthop;
68 int ret;
1da177e4 69
0660e03f 70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 72
7026b1dd 73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
8571ab47 74 ((mroute6_is_socket(net, skb) &&
bd91b8bf 75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
82 */
83 if (newskb)
b2e0b385 84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
29a26a56 85 net, sk, newskb, NULL, newskb->dev,
95603e22 86 dev_loopback_xmit);
1da177e4 87
0660e03f 88 if (ipv6_hdr(skb)->hop_limit == 0) {
78126c41 89 IP6_INC_STATS(net, idev,
3bd653c8 90 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
91 kfree_skb(skb);
92 return 0;
93 }
94 }
95
78126c41 96 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
dd408515
HFS
97
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 IPV6_ADDR_SCOPE_NODELOCAL &&
100 !(dev->flags & IFF_LOOPBACK)) {
101 kfree_skb(skb);
102 return 0;
103 }
1da177e4
LT
104 }
105
14972cbd
RP
106 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
107 int res = lwtunnel_xmit(skb);
108
109 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
110 return res;
111 }
112
6fd6ce20 113 rcu_read_lock_bh();
2647a9b0 114 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
6fd6ce20
YH
115 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
116 if (unlikely(!neigh))
117 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
118 if (!IS_ERR(neigh)) {
4ff06203 119 sock_confirm_neigh(skb, neigh);
c16ec185 120 ret = neigh_output(neigh, skb);
6fd6ce20
YH
121 rcu_read_unlock_bh();
122 return ret;
123 }
124 rcu_read_unlock_bh();
05e3aa09 125
78126c41 126 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
9e508490
JE
127 kfree_skb(skb);
128 return -EINVAL;
1da177e4
LT
129}
130
0c4b51f0 131static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
9e508490 132{
33b48679
DM
133 int ret;
134
135 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
136 if (ret) {
137 kfree_skb(skb);
138 return ret;
139 }
140
09ee9dba
TB
141#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
142 /* Policy lookup after SNAT yielded a new policy */
143 if (skb_dst(skb)->xfrm) {
144 IPCB(skb)->flags |= IPSKB_REROUTED;
145 return dst_output(net, sk, skb);
146 }
147#endif
148
9e508490 149 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
9037c357
JP
150 dst_allfrag(skb_dst(skb)) ||
151 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
7d8c6e39 152 return ip6_fragment(net, sk, skb, ip6_finish_output2);
9e508490 153 else
7d8c6e39 154 return ip6_finish_output2(net, sk, skb);
9e508490
JE
155}
156
ede2059d 157int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 158{
9e508490 159 struct net_device *dev = skb_dst(skb)->dev;
adf30907 160 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
be10de0a 161
97a7a37a
CF
162 skb->protocol = htons(ETH_P_IPV6);
163 skb->dev = dev;
164
778d80be 165 if (unlikely(idev->cnf.disable_ipv6)) {
19a0644c 166 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
167 kfree_skb(skb);
168 return 0;
169 }
170
29a26a56
EB
171 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
172 net, sk, skb, NULL, dev,
9c6eb28a
JE
173 ip6_finish_output,
174 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
175}
176
e9191ffb 177bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
513674b5
SL
178{
179 if (!np->autoflowlabel_set)
180 return ip6_default_np_autolabel(net);
181 else
182 return np->autoflowlabel;
183}
184
1da177e4 185/*
1c1e9d2b
ED
186 * xmit an sk_buff (used by TCP, SCTP and DCCP)
187 * Note : socket lock is not held for SYNACK packets, but might be modified
188 * by calls to skb_set_owner_w() and ipv6_local_error(),
189 * which are using proper atomic operations or spinlocks.
1da177e4 190 */
1c1e9d2b 191int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
92e55f41 192 __u32 mark, struct ipv6_txoptions *opt, int tclass)
1da177e4 193{
3bd653c8 194 struct net *net = sock_net(sk);
1c1e9d2b 195 const struct ipv6_pinfo *np = inet6_sk(sk);
4c9483b2 196 struct in6_addr *first_hop = &fl6->daddr;
adf30907 197 struct dst_entry *dst = skb_dst(skb);
1da177e4 198 struct ipv6hdr *hdr;
4c9483b2 199 u8 proto = fl6->flowi6_proto;
1da177e4 200 int seg_len = skb->len;
e651f03a 201 int hlimit = -1;
1da177e4
LT
202 u32 mtu;
203
204 if (opt) {
c2636b4d 205 unsigned int head_room;
1da177e4
LT
206
207 /* First: exthdrs may take lots of space (~8K for now)
208 MAX_HEADER is not enough.
209 */
210 head_room = opt->opt_nflen + opt->opt_flen;
211 seg_len += head_room;
212 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
213
214 if (skb_headroom(skb) < head_room) {
215 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
63159f29 216 if (!skb2) {
adf30907 217 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
218 IPSTATS_MIB_OUTDISCARDS);
219 kfree_skb(skb);
1da177e4
LT
220 return -ENOBUFS;
221 }
808db80a 222 consume_skb(skb);
a11d206d 223 skb = skb2;
1c1e9d2b
ED
224 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
225 * it is safe to call in our context (socket lock not held)
226 */
227 skb_set_owner_w(skb, (struct sock *)sk);
1da177e4
LT
228 }
229 if (opt->opt_flen)
230 ipv6_push_frag_opts(skb, opt, &proto);
231 if (opt->opt_nflen)
613fa3ca
DL
232 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
233 &fl6->saddr);
1da177e4
LT
234 }
235
e2d1bca7
ACM
236 skb_push(skb, sizeof(struct ipv6hdr));
237 skb_reset_network_header(skb);
0660e03f 238 hdr = ipv6_hdr(skb);
1da177e4
LT
239
240 /*
241 * Fill in the IPv6 header
242 */
b903d324 243 if (np)
1da177e4
LT
244 hlimit = np->hop_limit;
245 if (hlimit < 0)
6b75d090 246 hlimit = ip6_dst_hoplimit(dst);
1da177e4 247
cb1ce2ef 248 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
513674b5 249 ip6_autoflowlabel(net, np), fl6));
41a1f8ea 250
1da177e4
LT
251 hdr->payload_len = htons(seg_len);
252 hdr->nexthdr = proto;
253 hdr->hop_limit = hlimit;
254
4e3fd7a0
AD
255 hdr->saddr = fl6->saddr;
256 hdr->daddr = *first_hop;
1da177e4 257
9c9c9ad5 258 skb->protocol = htons(ETH_P_IPV6);
a2c2064f 259 skb->priority = sk->sk_priority;
92e55f41 260 skb->mark = mark;
a2c2064f 261
1da177e4 262 mtu = dst_mtu(dst);
60ff7467 263 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
adf30907 264 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 265 IPSTATS_MIB_OUT, skb->len);
a8e3e1a9
DA
266
267 /* if egress device is enslaved to an L3 master device pass the
268 * skb to its handler for processing
269 */
270 skb = l3mdev_ip6_out((struct sock *)sk, skb);
271 if (unlikely(!skb))
272 return 0;
273
1c1e9d2b
ED
274 /* hooks should never assume socket lock is held.
275 * we promote our socket to non const
276 */
29a26a56 277 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
1c1e9d2b 278 net, (struct sock *)sk, skb, NULL, dst->dev,
13206b6b 279 dst_output);
1da177e4
LT
280 }
281
1da177e4 282 skb->dev = dst->dev;
1c1e9d2b
ED
283 /* ipv6_local_error() does not require socket lock,
284 * we promote our socket to non const
285 */
286 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
287
adf30907 288 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
289 kfree_skb(skb);
290 return -EMSGSIZE;
291}
7159039a
YH
292EXPORT_SYMBOL(ip6_xmit);
293
1da177e4
LT
294static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
295{
296 struct ip6_ra_chain *ra;
297 struct sock *last = NULL;
298
299 read_lock(&ip6_ra_lock);
300 for (ra = ip6_ra_chain; ra; ra = ra->next) {
301 struct sock *sk = ra->sk;
0bd1b59b
AM
302 if (sk && ra->sel == sel &&
303 (!sk->sk_bound_dev_if ||
304 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
305 if (last) {
306 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
307 if (skb2)
308 rawv6_rcv(last, skb2);
309 }
310 last = sk;
311 }
312 }
313
314 if (last) {
315 rawv6_rcv(last, skb);
316 read_unlock(&ip6_ra_lock);
317 return 1;
318 }
319 read_unlock(&ip6_ra_lock);
320 return 0;
321}
322
e21e0b5f
VN
323static int ip6_forward_proxy_check(struct sk_buff *skb)
324{
0660e03f 325 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f 326 u8 nexthdr = hdr->nexthdr;
75f2811c 327 __be16 frag_off;
e21e0b5f
VN
328 int offset;
329
330 if (ipv6_ext_hdr(nexthdr)) {
75f2811c 331 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
e21e0b5f
VN
332 if (offset < 0)
333 return 0;
334 } else
335 offset = sizeof(struct ipv6hdr);
336
337 if (nexthdr == IPPROTO_ICMPV6) {
338 struct icmp6hdr *icmp6;
339
d56f90a7
ACM
340 if (!pskb_may_pull(skb, (skb_network_header(skb) +
341 offset + 1 - skb->data)))
e21e0b5f
VN
342 return 0;
343
d56f90a7 344 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
345
346 switch (icmp6->icmp6_type) {
347 case NDISC_ROUTER_SOLICITATION:
348 case NDISC_ROUTER_ADVERTISEMENT:
349 case NDISC_NEIGHBOUR_SOLICITATION:
350 case NDISC_NEIGHBOUR_ADVERTISEMENT:
351 case NDISC_REDIRECT:
352 /* For reaction involving unicast neighbor discovery
353 * message destined to the proxied address, pass it to
354 * input function.
355 */
356 return 1;
357 default:
358 break;
359 }
360 }
361
74553b09
VN
362 /*
363 * The proxying router can't forward traffic sent to a link-local
364 * address, so signal the sender and discard the packet. This
365 * behavior is clarified by the MIPv6 specification.
366 */
367 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
368 dst_link_failure(skb);
369 return -1;
370 }
371
e21e0b5f
VN
372 return 0;
373}
374
0c4b51f0
EB
375static inline int ip6_forward_finish(struct net *net, struct sock *sk,
376 struct sk_buff *skb)
1da177e4 377{
13206b6b 378 return dst_output(net, sk, skb);
1da177e4
LT
379}
380
09952107 381unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
0954cf9c
HFS
382{
383 unsigned int mtu;
384 struct inet6_dev *idev;
385
386 if (dst_metric_locked(dst, RTAX_MTU)) {
387 mtu = dst_metric_raw(dst, RTAX_MTU);
388 if (mtu)
389 return mtu;
390 }
391
392 mtu = IPV6_MIN_MTU;
393 rcu_read_lock();
394 idev = __in6_dev_get(dst->dev);
395 if (idev)
396 mtu = idev->cnf.mtu6;
397 rcu_read_unlock();
398
399 return mtu;
400}
09952107 401EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
0954cf9c 402
fe6cc55f
FW
403static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
404{
418a3156 405 if (skb->len <= mtu)
fe6cc55f
FW
406 return false;
407
60ff7467 408 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
fe6cc55f
FW
409 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
410 return true;
411
60ff7467 412 if (skb->ignore_df)
418a3156
FW
413 return false;
414
779b7931 415 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
fe6cc55f
FW
416 return false;
417
418 return true;
419}
420
1da177e4
LT
421int ip6_forward(struct sk_buff *skb)
422{
adf30907 423 struct dst_entry *dst = skb_dst(skb);
0660e03f 424 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 425 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 426 struct net *net = dev_net(dst->dev);
14f3ad6f 427 u32 mtu;
1ab1457c 428
53b7997f 429 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
430 goto error;
431
090f1166
LR
432 if (skb->pkt_type != PACKET_HOST)
433 goto drop;
434
9ef2e965
HFS
435 if (unlikely(skb->sk))
436 goto drop;
437
4497b076
BH
438 if (skb_warn_if_lro(skb))
439 goto drop;
440
1da177e4 441 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
1d015503
ED
442 __IP6_INC_STATS(net, ip6_dst_idev(dst),
443 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
444 goto drop;
445 }
446
35fc92a9 447 skb_forward_csum(skb);
1da177e4
LT
448
449 /*
450 * We DO NOT make any processing on
451 * RA packets, pushing them to user level AS IS
452 * without ane WARRANTY that application will be able
453 * to interpret them. The reason is that we
454 * cannot make anything clever here.
455 *
456 * We are not end-node, so that if packet contains
457 * AH/ESP, we cannot make anything.
458 * Defragmentation also would be mistake, RA packets
459 * cannot be fragmented, because there is no warranty
460 * that different fragments will go along one path. --ANK
461 */
ab4eb353
YH
462 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
463 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
1da177e4
LT
464 return 0;
465 }
466
467 /*
468 * check and decrement ttl
469 */
470 if (hdr->hop_limit <= 1) {
471 /* Force OUTPUT device used as source address */
472 skb->dev = dst->dev;
3ffe533c 473 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
1d015503
ED
474 __IP6_INC_STATS(net, ip6_dst_idev(dst),
475 IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
476
477 kfree_skb(skb);
478 return -ETIMEDOUT;
479 }
480
fbea49e1 481 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 482 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 483 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
484 int proxied = ip6_forward_proxy_check(skb);
485 if (proxied > 0)
e21e0b5f 486 return ip6_input(skb);
74553b09 487 else if (proxied < 0) {
1d015503
ED
488 __IP6_INC_STATS(net, ip6_dst_idev(dst),
489 IPSTATS_MIB_INDISCARDS);
74553b09
VN
490 goto drop;
491 }
e21e0b5f
VN
492 }
493
1da177e4 494 if (!xfrm6_route_forward(skb)) {
1d015503
ED
495 __IP6_INC_STATS(net, ip6_dst_idev(dst),
496 IPSTATS_MIB_INDISCARDS);
1da177e4
LT
497 goto drop;
498 }
adf30907 499 dst = skb_dst(skb);
1da177e4
LT
500
501 /* IPv6 specs say nothing about it, but it is clear that we cannot
502 send redirects to source routed frames.
1e5dc146 503 We don't send redirects to frames decapsulated from IPsec.
1da177e4 504 */
c45a3dfb 505 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
1da177e4 506 struct in6_addr *target = NULL;
fbfe95a4 507 struct inet_peer *peer;
1da177e4 508 struct rt6_info *rt;
1da177e4
LT
509
510 /*
511 * incoming and outgoing devices are the same
512 * send a redirect.
513 */
514
515 rt = (struct rt6_info *) dst;
c45a3dfb
DM
516 if (rt->rt6i_flags & RTF_GATEWAY)
517 target = &rt->rt6i_gateway;
1da177e4
LT
518 else
519 target = &hdr->daddr;
520
fd0273d7 521 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
92d86829 522
1da177e4
LT
523 /* Limit redirects both by destination (here)
524 and by source (inside ndisc_send_redirect)
525 */
fbfe95a4 526 if (inet_peer_xrlim_allow(peer, 1*HZ))
4991969a 527 ndisc_send_redirect(skb, target);
1d861aa4
DM
528 if (peer)
529 inet_putpeer(peer);
5bb1ab09
DS
530 } else {
531 int addrtype = ipv6_addr_type(&hdr->saddr);
532
1da177e4 533 /* This check is security critical. */
f81b2e7d
YH
534 if (addrtype == IPV6_ADDR_ANY ||
535 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
536 goto error;
537 if (addrtype & IPV6_ADDR_LINKLOCAL) {
538 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 539 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
540 goto error;
541 }
1da177e4
LT
542 }
543
0954cf9c 544 mtu = ip6_dst_mtu_forward(dst);
14f3ad6f
UW
545 if (mtu < IPV6_MIN_MTU)
546 mtu = IPV6_MIN_MTU;
547
fe6cc55f 548 if (ip6_pkt_too_big(skb, mtu)) {
1da177e4
LT
549 /* Again, force OUTPUT device used as source address */
550 skb->dev = dst->dev;
14f3ad6f 551 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1d015503
ED
552 __IP6_INC_STATS(net, ip6_dst_idev(dst),
553 IPSTATS_MIB_INTOOBIGERRORS);
554 __IP6_INC_STATS(net, ip6_dst_idev(dst),
555 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
556 kfree_skb(skb);
557 return -EMSGSIZE;
558 }
559
560 if (skb_cow(skb, dst->dev->hard_header_len)) {
1d015503
ED
561 __IP6_INC_STATS(net, ip6_dst_idev(dst),
562 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
563 goto drop;
564 }
565
0660e03f 566 hdr = ipv6_hdr(skb);
1da177e4
LT
567
568 /* Mangling hops number delayed to point after skb COW */
1ab1457c 569
1da177e4
LT
570 hdr->hop_limit--;
571
1d015503
ED
572 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
573 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
29a26a56
EB
574 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
575 net, NULL, skb, skb->dev, dst->dev,
6e23ae2a 576 ip6_forward_finish);
1da177e4
LT
577
578error:
1d015503 579 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
580drop:
581 kfree_skb(skb);
582 return -EINVAL;
583}
584
585static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
586{
587 to->pkt_type = from->pkt_type;
588 to->priority = from->priority;
589 to->protocol = from->protocol;
adf30907
ED
590 skb_dst_drop(to);
591 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 592 to->dev = from->dev;
82e91ffe 593 to->mark = from->mark;
1da177e4
LT
594
595#ifdef CONFIG_NET_SCHED
596 to->tc_index = from->tc_index;
597#endif
e7ac05f3 598 nf_copy(to, from);
984bc16c 599 skb_copy_secmark(to, from);
1da177e4
LT
600}
601
7d8c6e39
EB
602int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
603 int (*output)(struct net *, struct sock *, struct sk_buff *))
1da177e4 604{
1da177e4 605 struct sk_buff *frag;
67ba4152 606 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
f60e5990 607 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
608 inet6_sk(skb->sk) : NULL;
1da177e4
LT
609 struct ipv6hdr *tmp_hdr;
610 struct frag_hdr *fh;
611 unsigned int mtu, hlen, left, len;
a7ae1992 612 int hroom, troom;
286c2349 613 __be32 frag_id;
67ba4152 614 int ptr, offset = 0, err = 0;
1da177e4
LT
615 u8 *prevhdr, nexthdr = 0;
616
7dd7eb95
DM
617 err = ip6_find_1stfragopt(skb, &prevhdr);
618 if (err < 0)
2423496a 619 goto fail;
7dd7eb95 620 hlen = err;
1da177e4
LT
621 nexthdr = *prevhdr;
622
628a5c56 623 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
624
625 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 626 * or if the skb it not generated by a local socket.
b881ef76 627 */
485fca66
FW
628 if (unlikely(!skb->ignore_df && skb->len > mtu))
629 goto fail_toobig;
a34a101e 630
485fca66
FW
631 if (IP6CB(skb)->frag_max_size) {
632 if (IP6CB(skb)->frag_max_size > mtu)
633 goto fail_toobig;
634
635 /* don't send fragments larger than what we received */
636 mtu = IP6CB(skb)->frag_max_size;
637 if (mtu < IPV6_MIN_MTU)
638 mtu = IPV6_MIN_MTU;
b881ef76
JH
639 }
640
d91675f9
YH
641 if (np && np->frag_size < mtu) {
642 if (np->frag_size)
643 mtu = np->frag_size;
644 }
89bc7848 645 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
b72a2b01 646 goto fail_toobig;
1e0d69a9 647 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 648
fd0273d7
MKL
649 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
650 &ipv6_hdr(skb)->saddr);
286c2349 651
405c92f7
HFS
652 if (skb->ip_summed == CHECKSUM_PARTIAL &&
653 (err = skb_checksum_help(skb)))
654 goto fail;
655
1d325d21 656 hroom = LL_RESERVED_SPACE(rt->dst.dev);
21dc3301 657 if (skb_has_frag_list(skb)) {
c72d8cda 658 unsigned int first_len = skb_pagelen(skb);
3d13008e 659 struct sk_buff *frag2;
1da177e4
LT
660
661 if (first_len - hlen > mtu ||
662 ((first_len - hlen) & 7) ||
1d325d21
FW
663 skb_cloned(skb) ||
664 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
1da177e4
LT
665 goto slow_path;
666
4d9092bb 667 skb_walk_frags(skb, frag) {
1da177e4
LT
668 /* Correct geometry. */
669 if (frag->len > mtu ||
670 ((frag->len & 7) && frag->next) ||
1d325d21 671 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
3d13008e 672 goto slow_path_clean;
1da177e4 673
1da177e4
LT
674 /* Partially cloned skb? */
675 if (skb_shared(frag))
3d13008e 676 goto slow_path_clean;
2fdba6b0
HX
677
678 BUG_ON(frag->sk);
679 if (skb->sk) {
2fdba6b0
HX
680 frag->sk = skb->sk;
681 frag->destructor = sock_wfree;
2fdba6b0 682 }
3d13008e 683 skb->truesize -= frag->truesize;
1da177e4
LT
684 }
685
686 err = 0;
687 offset = 0;
1da177e4
LT
688 /* BUILD HEADER */
689
9a217a1c 690 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 691 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 692 if (!tmp_hdr) {
1d325d21
FW
693 err = -ENOMEM;
694 goto fail;
1da177e4 695 }
1d325d21
FW
696 frag = skb_shinfo(skb)->frag_list;
697 skb_frag_list_init(skb);
1da177e4 698
1da177e4 699 __skb_pull(skb, hlen);
d58ff351 700 fh = __skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
701 __skb_push(skb, hlen);
702 skb_reset_network_header(skb);
d56f90a7 703 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 704
1da177e4
LT
705 fh->nexthdr = nexthdr;
706 fh->reserved = 0;
707 fh->frag_off = htons(IP6_MF);
286c2349 708 fh->identification = frag_id;
1da177e4
LT
709
710 first_len = skb_pagelen(skb);
711 skb->data_len = first_len - skb_headlen(skb);
712 skb->len = first_len;
0660e03f
ACM
713 ipv6_hdr(skb)->payload_len = htons(first_len -
714 sizeof(struct ipv6hdr));
a11d206d 715
1da177e4
LT
716 for (;;) {
717 /* Prepare header of the next frame,
718 * before previous one went down. */
719 if (frag) {
720 frag->ip_summed = CHECKSUM_NONE;
badff6d0 721 skb_reset_transport_header(frag);
d58ff351 722 fh = __skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
723 __skb_push(frag, hlen);
724 skb_reset_network_header(frag);
d56f90a7
ACM
725 memcpy(skb_network_header(frag), tmp_hdr,
726 hlen);
1da177e4
LT
727 offset += skb->len - hlen - sizeof(struct frag_hdr);
728 fh->nexthdr = nexthdr;
729 fh->reserved = 0;
730 fh->frag_off = htons(offset);
53b24b8f 731 if (frag->next)
1da177e4
LT
732 fh->frag_off |= htons(IP6_MF);
733 fh->identification = frag_id;
0660e03f
ACM
734 ipv6_hdr(frag)->payload_len =
735 htons(frag->len -
736 sizeof(struct ipv6hdr));
1da177e4
LT
737 ip6_copy_metadata(frag, skb);
738 }
1ab1457c 739
7d8c6e39 740 err = output(net, sk, skb);
67ba4152 741 if (!err)
d8d1f30b 742 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 743 IPSTATS_MIB_FRAGCREATES);
dafee490 744
1da177e4
LT
745 if (err || !frag)
746 break;
747
748 skb = frag;
749 frag = skb->next;
750 skb->next = NULL;
751 }
752
a51482bd 753 kfree(tmp_hdr);
1da177e4
LT
754
755 if (err == 0) {
d8d1f30b 756 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 757 IPSTATS_MIB_FRAGOKS);
1da177e4
LT
758 return 0;
759 }
760
46cfd725 761 kfree_skb_list(frag);
1da177e4 762
d8d1f30b 763 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 764 IPSTATS_MIB_FRAGFAILS);
1da177e4 765 return err;
3d13008e
ED
766
767slow_path_clean:
768 skb_walk_frags(skb, frag2) {
769 if (frag2 == frag)
770 break;
771 frag2->sk = NULL;
772 frag2->destructor = NULL;
773 skb->truesize += frag2->truesize;
774 }
1da177e4
LT
775 }
776
777slow_path:
778 left = skb->len - hlen; /* Space per frame */
779 ptr = hlen; /* Where to start from */
780
781 /*
782 * Fragment the datagram.
783 */
784
a7ae1992 785 troom = rt->dst.dev->needed_tailroom;
1da177e4
LT
786
787 /*
788 * Keep copying data until we run out.
789 */
67ba4152 790 while (left > 0) {
79e49503
FW
791 u8 *fragnexthdr_offset;
792
1da177e4
LT
793 len = left;
794 /* IF: it doesn't fit, use 'mtu' - the data space left */
795 if (len > mtu)
796 len = mtu;
25985edc 797 /* IF: we are not sending up to and including the packet end
1da177e4
LT
798 then align the next start on an eight byte boundary */
799 if (len < left) {
800 len &= ~7;
801 }
1da177e4 802
cbffccc9
JP
803 /* Allocate buffer */
804 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
805 hroom + troom, GFP_ATOMIC);
806 if (!frag) {
1da177e4
LT
807 err = -ENOMEM;
808 goto fail;
809 }
810
811 /*
812 * Set up data on packet
813 */
814
815 ip6_copy_metadata(frag, skb);
a7ae1992 816 skb_reserve(frag, hroom);
1da177e4 817 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 818 skb_reset_network_header(frag);
badff6d0 819 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
820 frag->transport_header = (frag->network_header + hlen +
821 sizeof(struct frag_hdr));
1da177e4
LT
822
823 /*
824 * Charge the memory for the fragment to any owner
825 * it might possess
826 */
827 if (skb->sk)
828 skb_set_owner_w(frag, skb->sk);
829
830 /*
831 * Copy the packet header into the new buffer.
832 */
d626f62b 833 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4 834
79e49503
FW
835 fragnexthdr_offset = skb_network_header(frag);
836 fragnexthdr_offset += prevhdr - skb_network_header(skb);
837 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
838
1da177e4
LT
839 /*
840 * Build fragment header.
841 */
842 fh->nexthdr = nexthdr;
843 fh->reserved = 0;
286c2349 844 fh->identification = frag_id;
1da177e4
LT
845
846 /*
847 * Copy a block of the IP datagram.
848 */
e3f0b86b
HS
849 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
850 len));
1da177e4
LT
851 left -= len;
852
853 fh->frag_off = htons(offset);
854 if (left > 0)
855 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
856 ipv6_hdr(frag)->payload_len = htons(frag->len -
857 sizeof(struct ipv6hdr));
1da177e4
LT
858
859 ptr += len;
860 offset += len;
861
862 /*
863 * Put this fragment into the sending queue.
864 */
7d8c6e39 865 err = output(net, sk, frag);
1da177e4
LT
866 if (err)
867 goto fail;
dafee490 868
adf30907 869 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 870 IPSTATS_MIB_FRAGCREATES);
1da177e4 871 }
adf30907 872 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 873 IPSTATS_MIB_FRAGOKS);
808db80a 874 consume_skb(skb);
1da177e4
LT
875 return err;
876
485fca66
FW
877fail_toobig:
878 if (skb->sk && dst_allfrag(skb_dst(skb)))
879 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
880
485fca66
FW
881 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
882 err = -EMSGSIZE;
883
1da177e4 884fail:
adf30907 885 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 886 IPSTATS_MIB_FRAGFAILS);
1ab1457c 887 kfree_skb(skb);
1da177e4
LT
888 return err;
889}
890
b71d1d42
ED
891static inline int ip6_rt_check(const struct rt6key *rt_key,
892 const struct in6_addr *fl_addr,
893 const struct in6_addr *addr_cache)
cf6b1982 894{
a02cec21 895 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
63159f29 896 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
897}
898
497c615a
HX
899static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
900 struct dst_entry *dst,
b71d1d42 901 const struct flowi6 *fl6)
1da177e4 902{
497c615a 903 struct ipv6_pinfo *np = inet6_sk(sk);
a963a37d 904 struct rt6_info *rt;
1da177e4 905
497c615a
HX
906 if (!dst)
907 goto out;
908
a963a37d
ED
909 if (dst->ops->family != AF_INET6) {
910 dst_release(dst);
911 return NULL;
912 }
913
914 rt = (struct rt6_info *)dst;
497c615a
HX
915 /* Yes, checking route validity in not connected
916 * case is not very simple. Take into account,
917 * that we do not support routing by source, TOS,
67ba4152 918 * and MSG_DONTROUTE --ANK (980726)
497c615a 919 *
cf6b1982
YH
920 * 1. ip6_rt_check(): If route was host route,
921 * check that cached destination is current.
497c615a
HX
922 * If it is network route, we still may
923 * check its validity using saved pointer
924 * to the last used address: daddr_cache.
925 * We do not want to save whole address now,
926 * (because main consumer of this service
927 * is tcp, which has not this problem),
928 * so that the last trick works only on connected
929 * sockets.
930 * 2. oif also should be the same.
931 */
4c9483b2 932 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
8e1ef0a9 933#ifdef CONFIG_IPV6_SUBTREES
4c9483b2 934 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
8e1ef0a9 935#endif
ca254490
DA
936 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
937 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
497c615a
HX
938 dst_release(dst);
939 dst = NULL;
1da177e4
LT
940 }
941
497c615a
HX
942out:
943 return dst;
944}
945
3aef934f 946static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
4c9483b2 947 struct dst_entry **dst, struct flowi6 *fl6)
497c615a 948{
69cce1d1
DM
949#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
950 struct neighbour *n;
97cac082 951 struct rt6_info *rt;
69cce1d1
DM
952#endif
953 int err;
6f21c96a 954 int flags = 0;
497c615a 955
e16e888b
MS
956 /* The correct way to handle this would be to do
957 * ip6_route_get_saddr, and then ip6_route_output; however,
958 * the route-specific preferred source forces the
959 * ip6_route_output call _before_ ip6_route_get_saddr.
960 *
961 * In source specific routing (no src=any default route),
962 * ip6_route_output will fail given src=any saddr, though, so
963 * that's why we try it again later.
964 */
965 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
966 struct rt6_info *rt;
967 bool had_dst = *dst != NULL;
1da177e4 968
e16e888b
MS
969 if (!had_dst)
970 *dst = ip6_route_output(net, sk, fl6);
971 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
c3968a85
DW
972 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
973 sk ? inet6_sk(sk)->srcprefs : 0,
974 &fl6->saddr);
44456d37 975 if (err)
1da177e4 976 goto out_err_release;
e16e888b
MS
977
978 /* If we had an erroneous initial result, pretend it
979 * never existed and let the SA-enabled version take
980 * over.
981 */
982 if (!had_dst && (*dst)->error) {
983 dst_release(*dst);
984 *dst = NULL;
985 }
6f21c96a
PA
986
987 if (fl6->flowi6_oif)
988 flags |= RT6_LOOKUP_F_IFACE;
1da177e4
LT
989 }
990
e16e888b 991 if (!*dst)
6f21c96a 992 *dst = ip6_route_output_flags(net, sk, fl6, flags);
e16e888b
MS
993
994 err = (*dst)->error;
995 if (err)
996 goto out_err_release;
997
95c385b4 998#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
999 /*
1000 * Here if the dst entry we've looked up
1001 * has a neighbour entry that is in the INCOMPLETE
1002 * state and the src address from the flow is
1003 * marked as OPTIMISTIC, we release the found
1004 * dst entry and replace it instead with the
1005 * dst entry of the nexthop router
1006 */
c56bf6fe 1007 rt = (struct rt6_info *) *dst;
707be1ff 1008 rcu_read_lock_bh();
2647a9b0
MKL
1009 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1010 rt6_nexthop(rt, &fl6->daddr));
707be1ff
YH
1011 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1012 rcu_read_unlock_bh();
1013
1014 if (err) {
e550dfb0 1015 struct inet6_ifaddr *ifp;
4c9483b2 1016 struct flowi6 fl_gw6;
e550dfb0
NH
1017 int redirect;
1018
4c9483b2 1019 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
e550dfb0
NH
1020 (*dst)->dev, 1);
1021
1022 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1023 if (ifp)
1024 in6_ifa_put(ifp);
1025
1026 if (redirect) {
1027 /*
1028 * We need to get the dst entry for the
1029 * default router instead
1030 */
1031 dst_release(*dst);
4c9483b2
DM
1032 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1033 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1034 *dst = ip6_route_output(net, sk, &fl_gw6);
e5d08d71
IM
1035 err = (*dst)->error;
1036 if (err)
e550dfb0 1037 goto out_err_release;
95c385b4 1038 }
e550dfb0 1039 }
95c385b4 1040#endif
ec5e3b0a 1041 if (ipv6_addr_v4mapped(&fl6->saddr) &&
00ea1cee
WB
1042 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1043 err = -EAFNOSUPPORT;
1044 goto out_err_release;
1045 }
95c385b4 1046
1da177e4
LT
1047 return 0;
1048
1049out_err_release:
1050 dst_release(*dst);
1051 *dst = NULL;
8a966fc0 1052
0d240e78
DA
1053 if (err == -ENETUNREACH)
1054 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1055 return err;
1056}
34a0b3cd 1057
497c615a
HX
1058/**
1059 * ip6_dst_lookup - perform route lookup on flow
1060 * @sk: socket which provides route info
1061 * @dst: pointer to dst_entry * for result
4c9483b2 1062 * @fl6: flow to lookup
497c615a
HX
1063 *
1064 * This function performs a route lookup on the given flow.
1065 *
1066 * It returns zero on success, or a standard errno code on error.
1067 */
343d60aa
RP
1068int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1069 struct flowi6 *fl6)
497c615a
HX
1070{
1071 *dst = NULL;
343d60aa 1072 return ip6_dst_lookup_tail(net, sk, dst, fl6);
497c615a 1073}
3cf3dc6c
ACM
1074EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1075
497c615a 1076/**
68d0c6d3
DM
1077 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1078 * @sk: socket which provides route info
4c9483b2 1079 * @fl6: flow to lookup
68d0c6d3 1080 * @final_dst: final destination address for ipsec lookup
68d0c6d3
DM
1081 *
1082 * This function performs a route lookup on the given flow.
1083 *
1084 * It returns a valid dst pointer on success, or a pointer encoded
1085 * error code.
1086 */
3aef934f 1087struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
0e0d44ab 1088 const struct in6_addr *final_dst)
68d0c6d3
DM
1089{
1090 struct dst_entry *dst = NULL;
1091 int err;
1092
343d60aa 1093 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
68d0c6d3
DM
1094 if (err)
1095 return ERR_PTR(err);
1096 if (final_dst)
4e3fd7a0 1097 fl6->daddr = *final_dst;
2774c131 1098
f92ee619 1099 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
68d0c6d3
DM
1100}
1101EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1102
1103/**
1104 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
497c615a 1105 * @sk: socket which provides the dst cache and route info
4c9483b2 1106 * @fl6: flow to lookup
68d0c6d3 1107 * @final_dst: final destination address for ipsec lookup
96818159 1108 * @connected: whether @sk is connected or not
497c615a
HX
1109 *
1110 * This function performs a route lookup on the given flow with the
1111 * possibility of using the cached route in the socket if it is valid.
1112 * It will take the socket dst lock when operating on the dst cache.
1113 * As a result, this function can only be used in process context.
1114 *
96818159
AK
1115 * In addition, for a connected socket, cache the dst in the socket
1116 * if the current cache is not valid.
1117 *
68d0c6d3
DM
1118 * It returns a valid dst pointer on success, or a pointer encoded
1119 * error code.
497c615a 1120 */
4c9483b2 1121struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
96818159
AK
1122 const struct in6_addr *final_dst,
1123 bool connected)
497c615a 1124{
68d0c6d3 1125 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
497c615a 1126
4c9483b2 1127 dst = ip6_sk_dst_check(sk, dst, fl6);
96818159
AK
1128 if (dst)
1129 return dst;
1130
1131 dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1132 if (connected && !IS_ERR(dst))
1133 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
68d0c6d3 1134
00bc0ef5 1135 return dst;
497c615a 1136}
68d0c6d3 1137EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
497c615a 1138
0178b695
HX
1139static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1140 gfp_t gfp)
1141{
1142 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1143}
1144
1145static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1146 gfp_t gfp)
1147{
1148 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1149}
1150
75a493e6 1151static void ip6_append_data_mtu(unsigned int *mtu,
0c183379
G
1152 int *maxfraglen,
1153 unsigned int fragheaderlen,
1154 struct sk_buff *skb,
75a493e6 1155 struct rt6_info *rt,
e367c2d0 1156 unsigned int orig_mtu)
0c183379
G
1157{
1158 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
63159f29 1159 if (!skb) {
0c183379 1160 /* first fragment, reserve header_len */
e367c2d0 1161 *mtu = orig_mtu - rt->dst.header_len;
0c183379
G
1162
1163 } else {
1164 /*
1165 * this fragment is not first, the headers
1166 * space is regarded as data space.
1167 */
e367c2d0 1168 *mtu = orig_mtu;
0c183379
G
1169 }
1170 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1171 + fragheaderlen - sizeof(struct frag_hdr);
1172 }
1173}
1174
366e41d9 1175static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
26879da5 1176 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
366e41d9
VY
1177 struct rt6_info *rt, struct flowi6 *fl6)
1178{
1179 struct ipv6_pinfo *np = inet6_sk(sk);
1180 unsigned int mtu;
26879da5 1181 struct ipv6_txoptions *opt = ipc6->opt;
366e41d9
VY
1182
1183 /*
1184 * setup for corking
1185 */
1186 if (opt) {
1187 if (WARN_ON(v6_cork->opt))
1188 return -EINVAL;
1189
864e2a1f 1190 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
63159f29 1191 if (unlikely(!v6_cork->opt))
366e41d9
VY
1192 return -ENOBUFS;
1193
864e2a1f 1194 v6_cork->opt->tot_len = sizeof(*opt);
366e41d9
VY
1195 v6_cork->opt->opt_flen = opt->opt_flen;
1196 v6_cork->opt->opt_nflen = opt->opt_nflen;
1197
1198 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1199 sk->sk_allocation);
1200 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1201 return -ENOBUFS;
1202
1203 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1204 sk->sk_allocation);
1205 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1206 return -ENOBUFS;
1207
1208 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1209 sk->sk_allocation);
1210 if (opt->hopopt && !v6_cork->opt->hopopt)
1211 return -ENOBUFS;
1212
1213 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1214 sk->sk_allocation);
1215 if (opt->srcrt && !v6_cork->opt->srcrt)
1216 return -ENOBUFS;
1217
1218 /* need source address above miyazawa*/
1219 }
1220 dst_hold(&rt->dst);
1221 cork->base.dst = &rt->dst;
1222 cork->fl.u.ip6 = *fl6;
26879da5
WW
1223 v6_cork->hop_limit = ipc6->hlimit;
1224 v6_cork->tclass = ipc6->tclass;
366e41d9
VY
1225 if (rt->dst.flags & DST_XFRM_TUNNEL)
1226 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
749439bf 1227 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
366e41d9
VY
1228 else
1229 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
c02b3741 1230 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
366e41d9
VY
1231 if (np->frag_size < mtu) {
1232 if (np->frag_size)
1233 mtu = np->frag_size;
1234 }
749439bf
MM
1235 if (mtu < IPV6_MIN_MTU)
1236 return -EINVAL;
366e41d9 1237 cork->base.fragsize = mtu;
0f6c480f 1238 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
366e41d9
VY
1239 cork->base.flags |= IPCORK_ALLFRAG;
1240 cork->base.length = 0;
1241
1242 return 0;
1243}
1244
0bbe84a6
VY
1245static int __ip6_append_data(struct sock *sk,
1246 struct flowi6 *fl6,
1247 struct sk_buff_head *queue,
1248 struct inet_cork *cork,
1249 struct inet6_cork *v6_cork,
1250 struct page_frag *pfrag,
1251 int getfrag(void *from, char *to, int offset,
1252 int len, int odd, struct sk_buff *skb),
1253 void *from, int length, int transhdrlen,
26879da5 1254 unsigned int flags, struct ipcm6_cookie *ipc6,
c14ac945 1255 const struct sockcm_cookie *sockc)
1da177e4 1256{
0c183379 1257 struct sk_buff *skb, *skb_prev = NULL;
10b8a3de 1258 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
0bbe84a6
VY
1259 int exthdrlen = 0;
1260 int dst_exthdrlen = 0;
1da177e4 1261 int hh_len;
1da177e4
LT
1262 int copy;
1263 int err;
1264 int offset = 0;
a693e698 1265 __u8 tx_flags = 0;
09c2d251 1266 u32 tskey = 0;
0bbe84a6
VY
1267 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1268 struct ipv6_txoptions *opt = v6_cork->opt;
32dce968 1269 int csummode = CHECKSUM_NONE;
682b1a9d 1270 unsigned int maxnonfragsize, headersize;
1f4c6eb2 1271 unsigned int wmem_alloc_delta = 0;
1da177e4 1272
0bbe84a6
VY
1273 skb = skb_peek_tail(queue);
1274 if (!skb) {
1275 exthdrlen = opt ? opt->opt_flen : 0;
7efdba5b 1276 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1da177e4 1277 }
0bbe84a6 1278
366e41d9 1279 mtu = cork->fragsize;
e367c2d0 1280 orig_mtu = mtu;
1da177e4 1281
d8d1f30b 1282 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1283
a1b05140 1284 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1285 (opt ? opt->opt_nflen : 0);
4df98e76
HFS
1286 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1287 sizeof(struct frag_hdr);
1da177e4 1288
682b1a9d
HFS
1289 headersize = sizeof(struct ipv6hdr) +
1290 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1291 (dst_allfrag(&rt->dst) ?
1292 sizeof(struct frag_hdr) : 0) +
1293 rt->rt6i_nfheader_len;
1294
10b8a3de
PA
1295 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1296 * the first fragment
1297 */
1298 if (headersize + transhdrlen > mtu)
1299 goto emsgsize;
1300
26879da5 1301 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
682b1a9d
HFS
1302 (sk->sk_protocol == IPPROTO_UDP ||
1303 sk->sk_protocol == IPPROTO_RAW)) {
1304 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1305 sizeof(struct ipv6hdr));
1306 goto emsgsize;
1307 }
4df98e76 1308
682b1a9d
HFS
1309 if (ip6_sk_ignore_df(sk))
1310 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1311 else
1312 maxnonfragsize = mtu;
4df98e76 1313
682b1a9d 1314 if (cork->length + length > maxnonfragsize - headersize) {
4df98e76 1315emsgsize:
10b8a3de
PA
1316 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1317 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
682b1a9d 1318 return -EMSGSIZE;
1da177e4
LT
1319 }
1320
682b1a9d
HFS
1321 /* CHECKSUM_PARTIAL only with no extension headers and when
1322 * we are not going to fragment
1323 */
1324 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1325 headersize == sizeof(struct ipv6hdr) &&
2b89ed65 1326 length <= mtu - headersize &&
682b1a9d 1327 !(flags & MSG_MORE) &&
c8cd0989 1328 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
682b1a9d
HFS
1329 csummode = CHECKSUM_PARTIAL;
1330
09c2d251 1331 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
c14ac945 1332 sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
09c2d251
WB
1333 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1334 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1335 tskey = sk->sk_tskey++;
1336 }
a693e698 1337
1da177e4
LT
1338 /*
1339 * Let's try using as much space as possible.
1340 * Use MTU if total length of the message fits into the MTU.
1341 * Otherwise, we need to reserve fragment header and
1342 * fragment alignment (= 8-15 octects, in total).
1343 *
1344 * Note that we may need to "move" the data from the tail of
1ab1457c 1345 * of the buffer to the new fragment when we split
1da177e4
LT
1346 * the message.
1347 *
1ab1457c 1348 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1349 * at once if non-fragmentable extension headers
1350 * are too large.
1ab1457c 1351 * --yoshfuji
1da177e4
LT
1352 */
1353
2811ebac 1354 cork->length += length;
2811ebac 1355 if (!skb)
1da177e4
LT
1356 goto alloc_new_skb;
1357
1358 while (length > 0) {
1359 /* Check if the remaining data fits into current packet. */
bdc712b4 1360 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1da177e4
LT
1361 if (copy < length)
1362 copy = maxfraglen - skb->len;
1363
1364 if (copy <= 0) {
1365 char *data;
1366 unsigned int datalen;
1367 unsigned int fraglen;
1368 unsigned int fraggap;
1369 unsigned int alloclen;
1da177e4 1370alloc_new_skb:
1da177e4 1371 /* There's no room in the current skb */
0c183379
G
1372 if (skb)
1373 fraggap = skb->len - maxfraglen;
1da177e4
LT
1374 else
1375 fraggap = 0;
0c183379 1376 /* update mtu and maxfraglen if necessary */
63159f29 1377 if (!skb || !skb_prev)
0c183379 1378 ip6_append_data_mtu(&mtu, &maxfraglen,
75a493e6 1379 fragheaderlen, skb, rt,
e367c2d0 1380 orig_mtu);
0c183379
G
1381
1382 skb_prev = skb;
1da177e4
LT
1383
1384 /*
1385 * If remaining data exceeds the mtu,
1386 * we know we need more fragment(s).
1387 */
1388 datalen = length + fraggap;
1da177e4 1389
0c183379
G
1390 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1391 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1da177e4 1392 if ((flags & MSG_MORE) &&
d8d1f30b 1393 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1394 alloclen = mtu;
1395 else
1396 alloclen = datalen + fragheaderlen;
1397
299b0767
SK
1398 alloclen += dst_exthdrlen;
1399
0c183379
G
1400 if (datalen != length + fraggap) {
1401 /*
1402 * this is not the last fragment, the trailer
1403 * space is regarded as data space.
1404 */
1405 datalen += rt->dst.trailer_len;
1406 }
1407
1408 alloclen += rt->dst.trailer_len;
1409 fraglen = datalen + fragheaderlen;
1da177e4
LT
1410
1411 /*
1412 * We just reserve space for fragment header.
1ab1457c 1413 * Note: this may be overallocation if the message
1da177e4
LT
1414 * (without MSG_MORE) fits into the MTU.
1415 */
1416 alloclen += sizeof(struct frag_hdr);
1417
232cd35d
ED
1418 copy = datalen - transhdrlen - fraggap;
1419 if (copy < 0) {
1420 err = -EINVAL;
1421 goto error;
1422 }
1da177e4
LT
1423 if (transhdrlen) {
1424 skb = sock_alloc_send_skb(sk,
1425 alloclen + hh_len,
1426 (flags & MSG_DONTWAIT), &err);
1427 } else {
1428 skb = NULL;
1f4c6eb2 1429 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1da177e4 1430 2 * sk->sk_sndbuf)
1f4c6eb2
ED
1431 skb = alloc_skb(alloclen + hh_len,
1432 sk->sk_allocation);
63159f29 1433 if (unlikely(!skb))
1da177e4
LT
1434 err = -ENOBUFS;
1435 }
63159f29 1436 if (!skb)
1da177e4
LT
1437 goto error;
1438 /*
1439 * Fill in the control structures
1440 */
9c9c9ad5 1441 skb->protocol = htons(ETH_P_IPV6);
32dce968 1442 skb->ip_summed = csummode;
1da177e4 1443 skb->csum = 0;
1f85851e
G
1444 /* reserve for fragmentation and ipsec header */
1445 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1446 dst_exthdrlen);
1da177e4 1447
11878b40
WB
1448 /* Only the initial fragment is time stamped */
1449 skb_shinfo(skb)->tx_flags = tx_flags;
1450 tx_flags = 0;
09c2d251
WB
1451 skb_shinfo(skb)->tskey = tskey;
1452 tskey = 0;
a693e698 1453
1da177e4
LT
1454 /*
1455 * Find where to start putting bytes
1456 */
1f85851e
G
1457 data = skb_put(skb, fraglen);
1458 skb_set_network_header(skb, exthdrlen);
1459 data += fragheaderlen;
b0e380b1
ACM
1460 skb->transport_header = (skb->network_header +
1461 fragheaderlen);
1da177e4
LT
1462 if (fraggap) {
1463 skb->csum = skb_copy_and_csum_bits(
1464 skb_prev, maxfraglen,
1465 data + transhdrlen, fraggap, 0);
1466 skb_prev->csum = csum_sub(skb_prev->csum,
1467 skb->csum);
1468 data += fraggap;
e9fa4f7b 1469 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4 1470 }
232cd35d
ED
1471 if (copy > 0 &&
1472 getfrag(from, data + transhdrlen, offset,
1473 copy, fraggap, skb) < 0) {
1da177e4
LT
1474 err = -EFAULT;
1475 kfree_skb(skb);
1476 goto error;
1477 }
1478
1479 offset += copy;
1480 length -= datalen - fraggap;
1481 transhdrlen = 0;
1482 exthdrlen = 0;
299b0767 1483 dst_exthdrlen = 0;
1da177e4 1484
0dec879f
JA
1485 if ((flags & MSG_CONFIRM) && !skb_prev)
1486 skb_set_dst_pending_confirm(skb, 1);
1487
1da177e4
LT
1488 /*
1489 * Put the packet on the pending queue
1490 */
1f4c6eb2
ED
1491 if (!skb->destructor) {
1492 skb->destructor = sock_wfree;
1493 skb->sk = sk;
1494 wmem_alloc_delta += skb->truesize;
1495 }
0bbe84a6 1496 __skb_queue_tail(queue, skb);
1da177e4
LT
1497 continue;
1498 }
1499
1500 if (copy > length)
1501 copy = length;
1502
d8d1f30b 1503 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1504 unsigned int off;
1505
1506 off = skb->len;
1507 if (getfrag(from, skb_put(skb, copy),
1508 offset, copy, off, skb) < 0) {
1509 __skb_trim(skb, off);
1510 err = -EFAULT;
1511 goto error;
1512 }
1513 } else {
1514 int i = skb_shinfo(skb)->nr_frags;
1da177e4 1515
5640f768
ED
1516 err = -ENOMEM;
1517 if (!sk_page_frag_refill(sk, pfrag))
1da177e4 1518 goto error;
5640f768
ED
1519
1520 if (!skb_can_coalesce(skb, i, pfrag->page,
1521 pfrag->offset)) {
1522 err = -EMSGSIZE;
1523 if (i == MAX_SKB_FRAGS)
1524 goto error;
1525
1526 __skb_fill_page_desc(skb, i, pfrag->page,
1527 pfrag->offset, 0);
1528 skb_shinfo(skb)->nr_frags = ++i;
1529 get_page(pfrag->page);
1da177e4 1530 }
5640f768 1531 copy = min_t(int, copy, pfrag->size - pfrag->offset);
9e903e08 1532 if (getfrag(from,
5640f768
ED
1533 page_address(pfrag->page) + pfrag->offset,
1534 offset, copy, skb->len, skb) < 0)
1535 goto error_efault;
1536
1537 pfrag->offset += copy;
1538 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4
LT
1539 skb->len += copy;
1540 skb->data_len += copy;
f945fa7a 1541 skb->truesize += copy;
1f4c6eb2 1542 wmem_alloc_delta += copy;
1da177e4
LT
1543 }
1544 offset += copy;
1545 length -= copy;
1546 }
5640f768 1547
1f4c6eb2 1548 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1da177e4 1549 return 0;
5640f768
ED
1550
1551error_efault:
1552 err = -EFAULT;
1da177e4 1553error:
bdc712b4 1554 cork->length -= length;
3bd653c8 1555 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1f4c6eb2 1556 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1da177e4
LT
1557 return err;
1558}
0bbe84a6
VY
1559
1560int ip6_append_data(struct sock *sk,
1561 int getfrag(void *from, char *to, int offset, int len,
1562 int odd, struct sk_buff *skb),
26879da5
WW
1563 void *from, int length, int transhdrlen,
1564 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1565 struct rt6_info *rt, unsigned int flags,
c14ac945 1566 const struct sockcm_cookie *sockc)
0bbe84a6
VY
1567{
1568 struct inet_sock *inet = inet_sk(sk);
1569 struct ipv6_pinfo *np = inet6_sk(sk);
1570 int exthdrlen;
1571 int err;
1572
1573 if (flags&MSG_PROBE)
1574 return 0;
1575 if (skb_queue_empty(&sk->sk_write_queue)) {
1576 /*
1577 * setup for corking
1578 */
26879da5
WW
1579 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1580 ipc6, rt, fl6);
0bbe84a6
VY
1581 if (err)
1582 return err;
1583
26879da5 1584 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
0bbe84a6
VY
1585 length += exthdrlen;
1586 transhdrlen += exthdrlen;
1587 } else {
1588 fl6 = &inet->cork.fl.u.ip6;
1589 transhdrlen = 0;
1590 }
1591
1592 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1593 &np->cork, sk_page_frag(sk), getfrag,
26879da5 1594 from, length, transhdrlen, flags, ipc6, sockc);
0bbe84a6 1595}
a495f836 1596EXPORT_SYMBOL_GPL(ip6_append_data);
1da177e4 1597
366e41d9
VY
1598static void ip6_cork_release(struct inet_cork_full *cork,
1599 struct inet6_cork *v6_cork)
bf138862 1600{
366e41d9
VY
1601 if (v6_cork->opt) {
1602 kfree(v6_cork->opt->dst0opt);
1603 kfree(v6_cork->opt->dst1opt);
1604 kfree(v6_cork->opt->hopopt);
1605 kfree(v6_cork->opt->srcrt);
1606 kfree(v6_cork->opt);
1607 v6_cork->opt = NULL;
0178b695
HX
1608 }
1609
366e41d9
VY
1610 if (cork->base.dst) {
1611 dst_release(cork->base.dst);
1612 cork->base.dst = NULL;
1613 cork->base.flags &= ~IPCORK_ALLFRAG;
bf138862 1614 }
366e41d9 1615 memset(&cork->fl, 0, sizeof(cork->fl));
bf138862
PE
1616}
1617
6422398c
VY
1618struct sk_buff *__ip6_make_skb(struct sock *sk,
1619 struct sk_buff_head *queue,
1620 struct inet_cork_full *cork,
1621 struct inet6_cork *v6_cork)
1da177e4
LT
1622{
1623 struct sk_buff *skb, *tmp_skb;
1624 struct sk_buff **tail_skb;
1625 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1da177e4 1626 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1627 struct net *net = sock_net(sk);
1da177e4 1628 struct ipv6hdr *hdr;
6422398c
VY
1629 struct ipv6_txoptions *opt = v6_cork->opt;
1630 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1631 struct flowi6 *fl6 = &cork->fl.u.ip6;
4c9483b2 1632 unsigned char proto = fl6->flowi6_proto;
1da177e4 1633
6422398c 1634 skb = __skb_dequeue(queue);
63159f29 1635 if (!skb)
1da177e4
LT
1636 goto out;
1637 tail_skb = &(skb_shinfo(skb)->frag_list);
1638
1639 /* move skb->data to ip header from ext header */
d56f90a7 1640 if (skb->data < skb_network_header(skb))
bbe735e4 1641 __skb_pull(skb, skb_network_offset(skb));
6422398c 1642 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc77 1643 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1644 *tail_skb = tmp_skb;
1645 tail_skb = &(tmp_skb->next);
1646 skb->len += tmp_skb->len;
1647 skb->data_len += tmp_skb->len;
1da177e4 1648 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1649 tmp_skb->destructor = NULL;
1650 tmp_skb->sk = NULL;
1da177e4
LT
1651 }
1652
28a89453 1653 /* Allow local fragmentation. */
60ff7467 1654 skb->ignore_df = ip6_sk_ignore_df(sk);
28a89453 1655
4e3fd7a0 1656 *final_dst = fl6->daddr;
cfe1fc77 1657 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1658 if (opt && opt->opt_flen)
1659 ipv6_push_frag_opts(skb, opt, &proto);
1660 if (opt && opt->opt_nflen)
613fa3ca 1661 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1da177e4 1662
e2d1bca7
ACM
1663 skb_push(skb, sizeof(struct ipv6hdr));
1664 skb_reset_network_header(skb);
0660e03f 1665 hdr = ipv6_hdr(skb);
1ab1457c 1666
6422398c 1667 ip6_flow_hdr(hdr, v6_cork->tclass,
cb1ce2ef 1668 ip6_make_flowlabel(net, skb, fl6->flowlabel,
513674b5 1669 ip6_autoflowlabel(net, np), fl6));
6422398c 1670 hdr->hop_limit = v6_cork->hop_limit;
1da177e4 1671 hdr->nexthdr = proto;
4e3fd7a0
AD
1672 hdr->saddr = fl6->saddr;
1673 hdr->daddr = *final_dst;
1da177e4 1674
a2c2064f 1675 skb->priority = sk->sk_priority;
4a19ec58 1676 skb->mark = sk->sk_mark;
a2c2064f 1677
d8d1f30b 1678 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1679 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1680 if (proto == IPPROTO_ICMPV6) {
adf30907 1681 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1682
43a43b60
HFS
1683 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1684 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1685 }
1686
6422398c
VY
1687 ip6_cork_release(cork, v6_cork);
1688out:
1689 return skb;
1690}
1691
1692int ip6_send_skb(struct sk_buff *skb)
1693{
1694 struct net *net = sock_net(skb->sk);
1695 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1696 int err;
1697
33224b16 1698 err = ip6_local_out(net, skb->sk, skb);
1da177e4
LT
1699 if (err) {
1700 if (err > 0)
6ce9e7b5 1701 err = net_xmit_errno(err);
1da177e4 1702 if (err)
6422398c
VY
1703 IP6_INC_STATS(net, rt->rt6i_idev,
1704 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1705 }
1706
1da177e4 1707 return err;
6422398c
VY
1708}
1709
1710int ip6_push_pending_frames(struct sock *sk)
1711{
1712 struct sk_buff *skb;
1713
1714 skb = ip6_finish_skb(sk);
1715 if (!skb)
1716 return 0;
1717
1718 return ip6_send_skb(skb);
1da177e4 1719}
a495f836 1720EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1da177e4 1721
0bbe84a6 1722static void __ip6_flush_pending_frames(struct sock *sk,
6422398c
VY
1723 struct sk_buff_head *queue,
1724 struct inet_cork_full *cork,
1725 struct inet6_cork *v6_cork)
1da177e4 1726{
1da177e4
LT
1727 struct sk_buff *skb;
1728
0bbe84a6 1729 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
adf30907
ED
1730 if (skb_dst(skb))
1731 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1732 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1733 kfree_skb(skb);
1734 }
1735
6422398c 1736 ip6_cork_release(cork, v6_cork);
1da177e4 1737}
0bbe84a6
VY
1738
1739void ip6_flush_pending_frames(struct sock *sk)
1740{
6422398c
VY
1741 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1742 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
0bbe84a6 1743}
a495f836 1744EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
6422398c
VY
1745
1746struct sk_buff *ip6_make_skb(struct sock *sk,
1747 int getfrag(void *from, char *to, int offset,
1748 int len, int odd, struct sk_buff *skb),
1749 void *from, int length, int transhdrlen,
26879da5 1750 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
6422398c 1751 struct rt6_info *rt, unsigned int flags,
26879da5 1752 const struct sockcm_cookie *sockc)
6422398c
VY
1753{
1754 struct inet_cork_full cork;
1755 struct inet6_cork v6_cork;
1756 struct sk_buff_head queue;
26879da5 1757 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
6422398c
VY
1758 int err;
1759
1760 if (flags & MSG_PROBE)
1761 return NULL;
1762
1763 __skb_queue_head_init(&queue);
1764
1765 cork.base.flags = 0;
1766 cork.base.addr = 0;
1767 cork.base.opt = NULL;
95ef498d 1768 cork.base.dst = NULL;
6422398c 1769 v6_cork.opt = NULL;
26879da5 1770 err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
862c03ee
ED
1771 if (err) {
1772 ip6_cork_release(&cork, &v6_cork);
6422398c 1773 return ERR_PTR(err);
862c03ee 1774 }
26879da5
WW
1775 if (ipc6->dontfrag < 0)
1776 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
6422398c
VY
1777
1778 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1779 &current->task_frag, getfrag, from,
1780 length + exthdrlen, transhdrlen + exthdrlen,
26879da5 1781 flags, ipc6, sockc);
6422398c
VY
1782 if (err) {
1783 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1784 return ERR_PTR(err);
1785 }
1786
1787 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
1788}
This page took 1.527379 seconds and 4 git commands to generate.