2 * IPv6 output functions
3 * Linux INET6 implementation
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/bpf-cgroup.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
58 #include <linux/mroute6.h>
59 #include <net/l3mdev.h>
60 #include <net/lwtunnel.h>
62 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
64 struct dst_entry *dst = skb_dst(skb);
65 struct net_device *dev = dst->dev;
66 struct neighbour *neigh;
67 struct in6_addr *nexthop;
70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
74 ((mroute6_is_socket(net, skb) &&
75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 net, sk, newskb, NULL, newskb->dev,
88 if (ipv6_hdr(skb)->hop_limit == 0) {
89 IP6_INC_STATS(net, idev,
90 IPSTATS_MIB_OUTDISCARDS);
96 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 IPV6_ADDR_SCOPE_NODELOCAL &&
100 !(dev->flags & IFF_LOOPBACK)) {
106 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
107 int res = lwtunnel_xmit(skb);
109 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
114 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
115 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
116 if (unlikely(!neigh))
117 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
118 if (!IS_ERR(neigh)) {
119 sock_confirm_neigh(skb, neigh);
120 ret = neigh_output(neigh, skb);
121 rcu_read_unlock_bh();
124 rcu_read_unlock_bh();
126 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
131 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
135 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
141 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
142 /* Policy lookup after SNAT yielded a new policy */
143 if (skb_dst(skb)->xfrm) {
144 IPCB(skb)->flags |= IPSKB_REROUTED;
145 return dst_output(net, sk, skb);
149 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
150 dst_allfrag(skb_dst(skb)) ||
151 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
152 return ip6_fragment(net, sk, skb, ip6_finish_output2);
154 return ip6_finish_output2(net, sk, skb);
157 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
159 struct net_device *dev = skb_dst(skb)->dev;
160 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
162 skb->protocol = htons(ETH_P_IPV6);
165 if (unlikely(idev->cnf.disable_ipv6)) {
166 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
171 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
172 net, sk, skb, NULL, dev,
174 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
177 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
179 if (!np->autoflowlabel_set)
180 return ip6_default_np_autolabel(net);
182 return np->autoflowlabel;
186 * xmit an sk_buff (used by TCP, SCTP and DCCP)
187 * Note : socket lock is not held for SYNACK packets, but might be modified
188 * by calls to skb_set_owner_w() and ipv6_local_error(),
189 * which are using proper atomic operations or spinlocks.
191 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
192 __u32 mark, struct ipv6_txoptions *opt, int tclass)
194 struct net *net = sock_net(sk);
195 const struct ipv6_pinfo *np = inet6_sk(sk);
196 struct in6_addr *first_hop = &fl6->daddr;
197 struct dst_entry *dst = skb_dst(skb);
198 unsigned int head_room;
200 u8 proto = fl6->flowi6_proto;
201 int seg_len = skb->len;
205 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
207 head_room += opt->opt_nflen + opt->opt_flen;
209 if (unlikely(skb_headroom(skb) < head_room)) {
210 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
212 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
213 IPSTATS_MIB_OUTDISCARDS);
218 skb_set_owner_w(skb2, skb->sk);
224 seg_len += opt->opt_nflen + opt->opt_flen;
227 ipv6_push_frag_opts(skb, opt, &proto);
230 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
234 skb_push(skb, sizeof(struct ipv6hdr));
235 skb_reset_network_header(skb);
239 * Fill in the IPv6 header
242 hlimit = np->hop_limit;
244 hlimit = ip6_dst_hoplimit(dst);
246 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
247 ip6_autoflowlabel(net, np), fl6));
249 hdr->payload_len = htons(seg_len);
250 hdr->nexthdr = proto;
251 hdr->hop_limit = hlimit;
253 hdr->saddr = fl6->saddr;
254 hdr->daddr = *first_hop;
256 skb->protocol = htons(ETH_P_IPV6);
257 skb->priority = sk->sk_priority;
261 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
262 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
263 IPSTATS_MIB_OUT, skb->len);
265 /* if egress device is enslaved to an L3 master device pass the
266 * skb to its handler for processing
268 skb = l3mdev_ip6_out((struct sock *)sk, skb);
272 /* hooks should never assume socket lock is held.
273 * we promote our socket to non const
275 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
276 net, (struct sock *)sk, skb, NULL, dst->dev,
281 /* ipv6_local_error() does not require socket lock,
282 * we promote our socket to non const
284 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
286 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
290 EXPORT_SYMBOL(ip6_xmit);
292 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
294 struct ip6_ra_chain *ra;
295 struct sock *last = NULL;
297 read_lock(&ip6_ra_lock);
298 for (ra = ip6_ra_chain; ra; ra = ra->next) {
299 struct sock *sk = ra->sk;
300 if (sk && ra->sel == sel &&
301 (!sk->sk_bound_dev_if ||
302 sk->sk_bound_dev_if == skb->dev->ifindex)) {
304 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
306 rawv6_rcv(last, skb2);
313 rawv6_rcv(last, skb);
314 read_unlock(&ip6_ra_lock);
317 read_unlock(&ip6_ra_lock);
321 static int ip6_forward_proxy_check(struct sk_buff *skb)
323 struct ipv6hdr *hdr = ipv6_hdr(skb);
324 u8 nexthdr = hdr->nexthdr;
328 if (ipv6_ext_hdr(nexthdr)) {
329 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
333 offset = sizeof(struct ipv6hdr);
335 if (nexthdr == IPPROTO_ICMPV6) {
336 struct icmp6hdr *icmp6;
338 if (!pskb_may_pull(skb, (skb_network_header(skb) +
339 offset + 1 - skb->data)))
342 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
344 switch (icmp6->icmp6_type) {
345 case NDISC_ROUTER_SOLICITATION:
346 case NDISC_ROUTER_ADVERTISEMENT:
347 case NDISC_NEIGHBOUR_SOLICITATION:
348 case NDISC_NEIGHBOUR_ADVERTISEMENT:
350 /* For reaction involving unicast neighbor discovery
351 * message destined to the proxied address, pass it to
361 * The proxying router can't forward traffic sent to a link-local
362 * address, so signal the sender and discard the packet. This
363 * behavior is clarified by the MIPv6 specification.
365 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
366 dst_link_failure(skb);
373 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
376 struct dst_entry *dst = skb_dst(skb);
378 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
379 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
381 #ifdef CONFIG_NET_SWITCHDEV
382 if (skb->offload_l3_fwd_mark) {
389 return dst_output(net, sk, skb);
392 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
397 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
398 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
404 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
410 int ip6_forward(struct sk_buff *skb)
412 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
413 struct dst_entry *dst = skb_dst(skb);
414 struct ipv6hdr *hdr = ipv6_hdr(skb);
415 struct inet6_skb_parm *opt = IP6CB(skb);
416 struct net *net = dev_net(dst->dev);
419 if (net->ipv6.devconf_all->forwarding == 0)
422 if (skb->pkt_type != PACKET_HOST)
425 if (unlikely(skb->sk))
428 if (skb_warn_if_lro(skb))
431 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
432 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
436 skb_forward_csum(skb);
439 * We DO NOT make any processing on
440 * RA packets, pushing them to user level AS IS
441 * without ane WARRANTY that application will be able
442 * to interpret them. The reason is that we
443 * cannot make anything clever here.
445 * We are not end-node, so that if packet contains
446 * AH/ESP, we cannot make anything.
447 * Defragmentation also would be mistake, RA packets
448 * cannot be fragmented, because there is no warranty
449 * that different fragments will go along one path. --ANK
451 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
452 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
457 * check and decrement ttl
459 if (hdr->hop_limit <= 1) {
460 /* Force OUTPUT device used as source address */
462 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
463 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
469 /* XXX: idev->cnf.proxy_ndp? */
470 if (net->ipv6.devconf_all->proxy_ndp &&
471 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
472 int proxied = ip6_forward_proxy_check(skb);
474 return ip6_input(skb);
475 else if (proxied < 0) {
476 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
481 if (!xfrm6_route_forward(skb)) {
482 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
487 /* IPv6 specs say nothing about it, but it is clear that we cannot
488 send redirects to source routed frames.
489 We don't send redirects to frames decapsulated from IPsec.
491 if (IP6CB(skb)->iif == dst->dev->ifindex &&
492 opt->srcrt == 0 && !skb_sec_path(skb)) {
493 struct in6_addr *target = NULL;
494 struct inet_peer *peer;
498 * incoming and outgoing devices are the same
502 rt = (struct rt6_info *) dst;
503 if (rt->rt6i_flags & RTF_GATEWAY)
504 target = &rt->rt6i_gateway;
506 target = &hdr->daddr;
508 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
510 /* Limit redirects both by destination (here)
511 and by source (inside ndisc_send_redirect)
513 if (inet_peer_xrlim_allow(peer, 1*HZ))
514 ndisc_send_redirect(skb, target);
518 int addrtype = ipv6_addr_type(&hdr->saddr);
520 /* This check is security critical. */
521 if (addrtype == IPV6_ADDR_ANY ||
522 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
524 if (addrtype & IPV6_ADDR_LINKLOCAL) {
525 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
526 ICMPV6_NOT_NEIGHBOUR, 0);
531 mtu = ip6_dst_mtu_forward(dst);
532 if (mtu < IPV6_MIN_MTU)
535 if (ip6_pkt_too_big(skb, mtu)) {
536 /* Again, force OUTPUT device used as source address */
538 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
539 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
540 __IP6_INC_STATS(net, ip6_dst_idev(dst),
541 IPSTATS_MIB_FRAGFAILS);
546 if (skb_cow(skb, dst->dev->hard_header_len)) {
547 __IP6_INC_STATS(net, ip6_dst_idev(dst),
548 IPSTATS_MIB_OUTDISCARDS);
554 /* Mangling hops number delayed to point after skb COW */
558 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
559 net, NULL, skb, skb->dev, dst->dev,
563 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
569 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
571 to->pkt_type = from->pkt_type;
572 to->priority = from->priority;
573 to->protocol = from->protocol;
575 skb_dst_set(to, dst_clone(skb_dst(from)));
577 to->mark = from->mark;
579 skb_copy_hash(to, from);
581 #ifdef CONFIG_NET_SCHED
582 to->tc_index = from->tc_index;
585 skb_ext_copy(to, from);
586 skb_copy_secmark(to, from);
589 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
590 int (*output)(struct net *, struct sock *, struct sk_buff *))
592 struct sk_buff *frag;
593 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
594 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
595 inet6_sk(skb->sk) : NULL;
596 struct ipv6hdr *tmp_hdr;
598 unsigned int mtu, hlen, left, len;
601 int ptr, offset = 0, err = 0;
602 u8 *prevhdr, nexthdr = 0;
604 err = ip6_find_1stfragopt(skb, &prevhdr);
610 mtu = ip6_skb_dst_mtu(skb);
612 /* We must not fragment if the socket is set to force MTU discovery
613 * or if the skb it not generated by a local socket.
615 if (unlikely(!skb->ignore_df && skb->len > mtu))
618 if (IP6CB(skb)->frag_max_size) {
619 if (IP6CB(skb)->frag_max_size > mtu)
622 /* don't send fragments larger than what we received */
623 mtu = IP6CB(skb)->frag_max_size;
624 if (mtu < IPV6_MIN_MTU)
628 if (np && np->frag_size < mtu) {
632 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
634 mtu -= hlen + sizeof(struct frag_hdr);
636 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
637 &ipv6_hdr(skb)->saddr);
639 if (skb->ip_summed == CHECKSUM_PARTIAL &&
640 (err = skb_checksum_help(skb)))
643 hroom = LL_RESERVED_SPACE(rt->dst.dev);
644 if (skb_has_frag_list(skb)) {
645 unsigned int first_len = skb_pagelen(skb);
646 struct sk_buff *frag2;
648 if (first_len - hlen > mtu ||
649 ((first_len - hlen) & 7) ||
651 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
654 skb_walk_frags(skb, frag) {
655 /* Correct geometry. */
656 if (frag->len > mtu ||
657 ((frag->len & 7) && frag->next) ||
658 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
659 goto slow_path_clean;
661 /* Partially cloned skb? */
662 if (skb_shared(frag))
663 goto slow_path_clean;
668 frag->destructor = sock_wfree;
670 skb->truesize -= frag->truesize;
677 *prevhdr = NEXTHDR_FRAGMENT;
678 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
683 frag = skb_shinfo(skb)->frag_list;
684 skb_frag_list_init(skb);
686 __skb_pull(skb, hlen);
687 fh = __skb_push(skb, sizeof(struct frag_hdr));
688 __skb_push(skb, hlen);
689 skb_reset_network_header(skb);
690 memcpy(skb_network_header(skb), tmp_hdr, hlen);
692 fh->nexthdr = nexthdr;
694 fh->frag_off = htons(IP6_MF);
695 fh->identification = frag_id;
697 first_len = skb_pagelen(skb);
698 skb->data_len = first_len - skb_headlen(skb);
699 skb->len = first_len;
700 ipv6_hdr(skb)->payload_len = htons(first_len -
701 sizeof(struct ipv6hdr));
704 /* Prepare header of the next frame,
705 * before previous one went down. */
707 frag->ip_summed = CHECKSUM_NONE;
708 skb_reset_transport_header(frag);
709 fh = __skb_push(frag, sizeof(struct frag_hdr));
710 __skb_push(frag, hlen);
711 skb_reset_network_header(frag);
712 memcpy(skb_network_header(frag), tmp_hdr,
714 offset += skb->len - hlen - sizeof(struct frag_hdr);
715 fh->nexthdr = nexthdr;
717 fh->frag_off = htons(offset);
719 fh->frag_off |= htons(IP6_MF);
720 fh->identification = frag_id;
721 ipv6_hdr(frag)->payload_len =
723 sizeof(struct ipv6hdr));
724 ip6_copy_metadata(frag, skb);
727 err = output(net, sk, skb);
729 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
730 IPSTATS_MIB_FRAGCREATES);
737 skb_mark_not_on_list(skb);
743 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
744 IPSTATS_MIB_FRAGOKS);
748 kfree_skb_list(frag);
750 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
751 IPSTATS_MIB_FRAGFAILS);
755 skb_walk_frags(skb, frag2) {
759 frag2->destructor = NULL;
760 skb->truesize += frag2->truesize;
765 left = skb->len - hlen; /* Space per frame */
766 ptr = hlen; /* Where to start from */
769 * Fragment the datagram.
772 troom = rt->dst.dev->needed_tailroom;
775 * Keep copying data until we run out.
778 u8 *fragnexthdr_offset;
781 /* IF: it doesn't fit, use 'mtu' - the data space left */
784 /* IF: we are not sending up to and including the packet end
785 then align the next start on an eight byte boundary */
790 /* Allocate buffer */
791 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
792 hroom + troom, GFP_ATOMIC);
799 * Set up data on packet
802 ip6_copy_metadata(frag, skb);
803 skb_reserve(frag, hroom);
804 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
805 skb_reset_network_header(frag);
806 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
807 frag->transport_header = (frag->network_header + hlen +
808 sizeof(struct frag_hdr));
811 * Charge the memory for the fragment to any owner
815 skb_set_owner_w(frag, skb->sk);
818 * Copy the packet header into the new buffer.
820 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
822 fragnexthdr_offset = skb_network_header(frag);
823 fragnexthdr_offset += prevhdr - skb_network_header(skb);
824 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
827 * Build fragment header.
829 fh->nexthdr = nexthdr;
831 fh->identification = frag_id;
834 * Copy a block of the IP datagram.
836 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
840 fh->frag_off = htons(offset);
842 fh->frag_off |= htons(IP6_MF);
843 ipv6_hdr(frag)->payload_len = htons(frag->len -
844 sizeof(struct ipv6hdr));
850 * Put this fragment into the sending queue.
852 err = output(net, sk, frag);
856 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
857 IPSTATS_MIB_FRAGCREATES);
859 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
860 IPSTATS_MIB_FRAGOKS);
865 if (skb->sk && dst_allfrag(skb_dst(skb)))
866 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
868 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
872 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
873 IPSTATS_MIB_FRAGFAILS);
878 static inline int ip6_rt_check(const struct rt6key *rt_key,
879 const struct in6_addr *fl_addr,
880 const struct in6_addr *addr_cache)
882 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
883 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
886 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
887 struct dst_entry *dst,
888 const struct flowi6 *fl6)
890 struct ipv6_pinfo *np = inet6_sk(sk);
896 if (dst->ops->family != AF_INET6) {
901 rt = (struct rt6_info *)dst;
902 /* Yes, checking route validity in not connected
903 * case is not very simple. Take into account,
904 * that we do not support routing by source, TOS,
905 * and MSG_DONTROUTE --ANK (980726)
907 * 1. ip6_rt_check(): If route was host route,
908 * check that cached destination is current.
909 * If it is network route, we still may
910 * check its validity using saved pointer
911 * to the last used address: daddr_cache.
912 * We do not want to save whole address now,
913 * (because main consumer of this service
914 * is tcp, which has not this problem),
915 * so that the last trick works only on connected
917 * 2. oif also should be the same.
919 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
920 #ifdef CONFIG_IPV6_SUBTREES
921 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
923 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
924 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
933 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
934 struct dst_entry **dst, struct flowi6 *fl6)
936 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
943 /* The correct way to handle this would be to do
944 * ip6_route_get_saddr, and then ip6_route_output; however,
945 * the route-specific preferred source forces the
946 * ip6_route_output call _before_ ip6_route_get_saddr.
948 * In source specific routing (no src=any default route),
949 * ip6_route_output will fail given src=any saddr, though, so
950 * that's why we try it again later.
952 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
953 struct fib6_info *from;
955 bool had_dst = *dst != NULL;
958 *dst = ip6_route_output(net, sk, fl6);
959 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
962 from = rt ? rcu_dereference(rt->from) : NULL;
963 err = ip6_route_get_saddr(net, from, &fl6->daddr,
964 sk ? inet6_sk(sk)->srcprefs : 0,
969 goto out_err_release;
971 /* If we had an erroneous initial result, pretend it
972 * never existed and let the SA-enabled version take
975 if (!had_dst && (*dst)->error) {
981 flags |= RT6_LOOKUP_F_IFACE;
985 *dst = ip6_route_output_flags(net, sk, fl6, flags);
989 goto out_err_release;
991 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
993 * Here if the dst entry we've looked up
994 * has a neighbour entry that is in the INCOMPLETE
995 * state and the src address from the flow is
996 * marked as OPTIMISTIC, we release the found
997 * dst entry and replace it instead with the
998 * dst entry of the nexthop router
1000 rt = (struct rt6_info *) *dst;
1002 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1003 rt6_nexthop(rt, &fl6->daddr));
1004 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1005 rcu_read_unlock_bh();
1008 struct inet6_ifaddr *ifp;
1009 struct flowi6 fl_gw6;
1012 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1015 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1021 * We need to get the dst entry for the
1022 * default router instead
1025 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1026 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1027 *dst = ip6_route_output(net, sk, &fl_gw6);
1028 err = (*dst)->error;
1030 goto out_err_release;
1034 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1035 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1036 err = -EAFNOSUPPORT;
1037 goto out_err_release;
1046 if (err == -ENETUNREACH)
1047 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1052 * ip6_dst_lookup - perform route lookup on flow
1053 * @sk: socket which provides route info
1054 * @dst: pointer to dst_entry * for result
1055 * @fl6: flow to lookup
1057 * This function performs a route lookup on the given flow.
1059 * It returns zero on success, or a standard errno code on error.
1061 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1065 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1067 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1070 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1071 * @sk: socket which provides route info
1072 * @fl6: flow to lookup
1073 * @final_dst: final destination address for ipsec lookup
1075 * This function performs a route lookup on the given flow.
1077 * It returns a valid dst pointer on success, or a pointer encoded
1080 struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1081 const struct in6_addr *final_dst)
1083 struct dst_entry *dst = NULL;
1086 err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1088 return ERR_PTR(err);
1090 fl6->daddr = *final_dst;
1092 return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1094 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1097 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1098 * @sk: socket which provides the dst cache and route info
1099 * @fl6: flow to lookup
1100 * @final_dst: final destination address for ipsec lookup
1101 * @connected: whether @sk is connected or not
1103 * This function performs a route lookup on the given flow with the
1104 * possibility of using the cached route in the socket if it is valid.
1105 * It will take the socket dst lock when operating on the dst cache.
1106 * As a result, this function can only be used in process context.
1108 * In addition, for a connected socket, cache the dst in the socket
1109 * if the current cache is not valid.
1111 * It returns a valid dst pointer on success, or a pointer encoded
1114 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1115 const struct in6_addr *final_dst,
1118 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1120 dst = ip6_sk_dst_check(sk, dst, fl6);
1124 dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1125 if (connected && !IS_ERR(dst))
1126 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1130 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1132 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1135 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1138 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1141 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1144 static void ip6_append_data_mtu(unsigned int *mtu,
1146 unsigned int fragheaderlen,
1147 struct sk_buff *skb,
1148 struct rt6_info *rt,
1149 unsigned int orig_mtu)
1151 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1153 /* first fragment, reserve header_len */
1154 *mtu = orig_mtu - rt->dst.header_len;
1158 * this fragment is not first, the headers
1159 * space is regarded as data space.
1163 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1164 + fragheaderlen - sizeof(struct frag_hdr);
1168 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1169 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1170 struct rt6_info *rt, struct flowi6 *fl6)
1172 struct ipv6_pinfo *np = inet6_sk(sk);
1174 struct ipv6_txoptions *opt = ipc6->opt;
1180 if (WARN_ON(v6_cork->opt))
1183 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1184 if (unlikely(!v6_cork->opt))
1187 v6_cork->opt->tot_len = sizeof(*opt);
1188 v6_cork->opt->opt_flen = opt->opt_flen;
1189 v6_cork->opt->opt_nflen = opt->opt_nflen;
1191 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1193 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1196 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1198 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1201 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1203 if (opt->hopopt && !v6_cork->opt->hopopt)
1206 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1208 if (opt->srcrt && !v6_cork->opt->srcrt)
1211 /* need source address above miyazawa*/
1214 cork->base.dst = &rt->dst;
1215 cork->fl.u.ip6 = *fl6;
1216 v6_cork->hop_limit = ipc6->hlimit;
1217 v6_cork->tclass = ipc6->tclass;
1218 if (rt->dst.flags & DST_XFRM_TUNNEL)
1219 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1220 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1222 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1223 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1224 if (np->frag_size < mtu) {
1226 mtu = np->frag_size;
1228 if (mtu < IPV6_MIN_MTU)
1230 cork->base.fragsize = mtu;
1231 cork->base.gso_size = ipc6->gso_size;
1232 cork->base.tx_flags = 0;
1233 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1235 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1236 cork->base.flags |= IPCORK_ALLFRAG;
1237 cork->base.length = 0;
1239 cork->base.transmit_time = ipc6->sockc.transmit_time;
1244 static int __ip6_append_data(struct sock *sk,
1246 struct sk_buff_head *queue,
1247 struct inet_cork *cork,
1248 struct inet6_cork *v6_cork,
1249 struct page_frag *pfrag,
1250 int getfrag(void *from, char *to, int offset,
1251 int len, int odd, struct sk_buff *skb),
1252 void *from, int length, int transhdrlen,
1253 unsigned int flags, struct ipcm6_cookie *ipc6)
1255 struct sk_buff *skb, *skb_prev = NULL;
1256 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1257 struct ubuf_info *uarg = NULL;
1259 int dst_exthdrlen = 0;
1265 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1266 struct ipv6_txoptions *opt = v6_cork->opt;
1267 int csummode = CHECKSUM_NONE;
1268 unsigned int maxnonfragsize, headersize;
1269 unsigned int wmem_alloc_delta = 0;
1270 bool paged, extra_uref;
1272 skb = skb_peek_tail(queue);
1274 exthdrlen = opt ? opt->opt_flen : 0;
1275 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1278 paged = !!cork->gso_size;
1279 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1282 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1283 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1284 tskey = sk->sk_tskey++;
1286 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1288 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1289 (opt ? opt->opt_nflen : 0);
1290 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1291 sizeof(struct frag_hdr);
1293 headersize = sizeof(struct ipv6hdr) +
1294 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1295 (dst_allfrag(&rt->dst) ?
1296 sizeof(struct frag_hdr) : 0) +
1297 rt->rt6i_nfheader_len;
1299 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1300 * the first fragment
1302 if (headersize + transhdrlen > mtu)
1305 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1306 (sk->sk_protocol == IPPROTO_UDP ||
1307 sk->sk_protocol == IPPROTO_RAW)) {
1308 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1309 sizeof(struct ipv6hdr));
1313 if (ip6_sk_ignore_df(sk))
1314 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1316 maxnonfragsize = mtu;
1318 if (cork->length + length > maxnonfragsize - headersize) {
1320 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1321 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1325 /* CHECKSUM_PARTIAL only with no extension headers and when
1326 * we are not going to fragment
1328 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1329 headersize == sizeof(struct ipv6hdr) &&
1330 length <= mtu - headersize &&
1331 (!(flags & MSG_MORE) || cork->gso_size) &&
1332 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1333 csummode = CHECKSUM_PARTIAL;
1335 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1336 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1340 if (rt->dst.dev->features & NETIF_F_SG &&
1341 csummode == CHECKSUM_PARTIAL) {
1345 skb_zcopy_set(skb, uarg, &extra_uref);
1350 * Let's try using as much space as possible.
1351 * Use MTU if total length of the message fits into the MTU.
1352 * Otherwise, we need to reserve fragment header and
1353 * fragment alignment (= 8-15 octects, in total).
1355 * Note that we may need to "move" the data from the tail of
1356 * of the buffer to the new fragment when we split
1359 * FIXME: It may be fragmented into multiple chunks
1360 * at once if non-fragmentable extension headers
1365 cork->length += length;
1369 while (length > 0) {
1370 /* Check if the remaining data fits into current packet. */
1371 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1373 copy = maxfraglen - skb->len;
1377 unsigned int datalen;
1378 unsigned int fraglen;
1379 unsigned int fraggap;
1380 unsigned int alloclen;
1381 unsigned int pagedlen;
1383 /* There's no room in the current skb */
1385 fraggap = skb->len - maxfraglen;
1388 /* update mtu and maxfraglen if necessary */
1389 if (!skb || !skb_prev)
1390 ip6_append_data_mtu(&mtu, &maxfraglen,
1391 fragheaderlen, skb, rt,
1397 * If remaining data exceeds the mtu,
1398 * we know we need more fragment(s).
1400 datalen = length + fraggap;
1402 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1403 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1404 fraglen = datalen + fragheaderlen;
1407 if ((flags & MSG_MORE) &&
1408 !(rt->dst.dev->features&NETIF_F_SG))
1413 alloclen = min_t(int, fraglen, MAX_HEADER);
1414 pagedlen = fraglen - alloclen;
1417 alloclen += dst_exthdrlen;
1419 if (datalen != length + fraggap) {
1421 * this is not the last fragment, the trailer
1422 * space is regarded as data space.
1424 datalen += rt->dst.trailer_len;
1427 alloclen += rt->dst.trailer_len;
1428 fraglen = datalen + fragheaderlen;
1431 * We just reserve space for fragment header.
1432 * Note: this may be overallocation if the message
1433 * (without MSG_MORE) fits into the MTU.
1435 alloclen += sizeof(struct frag_hdr);
1437 copy = datalen - transhdrlen - fraggap - pagedlen;
1443 skb = sock_alloc_send_skb(sk,
1445 (flags & MSG_DONTWAIT), &err);
1448 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1450 skb = alloc_skb(alloclen + hh_len,
1458 * Fill in the control structures
1460 skb->protocol = htons(ETH_P_IPV6);
1461 skb->ip_summed = csummode;
1463 /* reserve for fragmentation and ipsec header */
1464 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1468 * Find where to start putting bytes
1470 data = skb_put(skb, fraglen - pagedlen);
1471 skb_set_network_header(skb, exthdrlen);
1472 data += fragheaderlen;
1473 skb->transport_header = (skb->network_header +
1476 skb->csum = skb_copy_and_csum_bits(
1477 skb_prev, maxfraglen,
1478 data + transhdrlen, fraggap, 0);
1479 skb_prev->csum = csum_sub(skb_prev->csum,
1482 pskb_trim_unique(skb_prev, maxfraglen);
1485 getfrag(from, data + transhdrlen, offset,
1486 copy, fraggap, skb) < 0) {
1493 length -= copy + transhdrlen;
1498 /* Only the initial fragment is time stamped */
1499 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1501 skb_shinfo(skb)->tskey = tskey;
1503 skb_zcopy_set(skb, uarg, &extra_uref);
1505 if ((flags & MSG_CONFIRM) && !skb_prev)
1506 skb_set_dst_pending_confirm(skb, 1);
1509 * Put the packet on the pending queue
1511 if (!skb->destructor) {
1512 skb->destructor = sock_wfree;
1514 wmem_alloc_delta += skb->truesize;
1516 __skb_queue_tail(queue, skb);
1523 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1524 skb_tailroom(skb) >= copy) {
1528 if (getfrag(from, skb_put(skb, copy),
1529 offset, copy, off, skb) < 0) {
1530 __skb_trim(skb, off);
1534 } else if (!uarg || !uarg->zerocopy) {
1535 int i = skb_shinfo(skb)->nr_frags;
1538 if (!sk_page_frag_refill(sk, pfrag))
1541 if (!skb_can_coalesce(skb, i, pfrag->page,
1544 if (i == MAX_SKB_FRAGS)
1547 __skb_fill_page_desc(skb, i, pfrag->page,
1549 skb_shinfo(skb)->nr_frags = ++i;
1550 get_page(pfrag->page);
1552 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1554 page_address(pfrag->page) + pfrag->offset,
1555 offset, copy, skb->len, skb) < 0)
1558 pfrag->offset += copy;
1559 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1561 skb->data_len += copy;
1562 skb->truesize += copy;
1563 wmem_alloc_delta += copy;
1565 err = skb_zerocopy_iter_dgram(skb, from, copy);
1573 if (wmem_alloc_delta)
1574 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1581 sock_zerocopy_put_abort(uarg, extra_uref);
1582 cork->length -= length;
1583 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1584 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1588 int ip6_append_data(struct sock *sk,
1589 int getfrag(void *from, char *to, int offset, int len,
1590 int odd, struct sk_buff *skb),
1591 void *from, int length, int transhdrlen,
1592 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1593 struct rt6_info *rt, unsigned int flags)
1595 struct inet_sock *inet = inet_sk(sk);
1596 struct ipv6_pinfo *np = inet6_sk(sk);
1600 if (flags&MSG_PROBE)
1602 if (skb_queue_empty(&sk->sk_write_queue)) {
1606 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1611 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1612 length += exthdrlen;
1613 transhdrlen += exthdrlen;
1615 fl6 = &inet->cork.fl.u.ip6;
1619 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1620 &np->cork, sk_page_frag(sk), getfrag,
1621 from, length, transhdrlen, flags, ipc6);
1623 EXPORT_SYMBOL_GPL(ip6_append_data);
1625 static void ip6_cork_release(struct inet_cork_full *cork,
1626 struct inet6_cork *v6_cork)
1629 kfree(v6_cork->opt->dst0opt);
1630 kfree(v6_cork->opt->dst1opt);
1631 kfree(v6_cork->opt->hopopt);
1632 kfree(v6_cork->opt->srcrt);
1633 kfree(v6_cork->opt);
1634 v6_cork->opt = NULL;
1637 if (cork->base.dst) {
1638 dst_release(cork->base.dst);
1639 cork->base.dst = NULL;
1640 cork->base.flags &= ~IPCORK_ALLFRAG;
1642 memset(&cork->fl, 0, sizeof(cork->fl));
1645 struct sk_buff *__ip6_make_skb(struct sock *sk,
1646 struct sk_buff_head *queue,
1647 struct inet_cork_full *cork,
1648 struct inet6_cork *v6_cork)
1650 struct sk_buff *skb, *tmp_skb;
1651 struct sk_buff **tail_skb;
1652 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1653 struct ipv6_pinfo *np = inet6_sk(sk);
1654 struct net *net = sock_net(sk);
1655 struct ipv6hdr *hdr;
1656 struct ipv6_txoptions *opt = v6_cork->opt;
1657 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1658 struct flowi6 *fl6 = &cork->fl.u.ip6;
1659 unsigned char proto = fl6->flowi6_proto;
1661 skb = __skb_dequeue(queue);
1664 tail_skb = &(skb_shinfo(skb)->frag_list);
1666 /* move skb->data to ip header from ext header */
1667 if (skb->data < skb_network_header(skb))
1668 __skb_pull(skb, skb_network_offset(skb));
1669 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1670 __skb_pull(tmp_skb, skb_network_header_len(skb));
1671 *tail_skb = tmp_skb;
1672 tail_skb = &(tmp_skb->next);
1673 skb->len += tmp_skb->len;
1674 skb->data_len += tmp_skb->len;
1675 skb->truesize += tmp_skb->truesize;
1676 tmp_skb->destructor = NULL;
1680 /* Allow local fragmentation. */
1681 skb->ignore_df = ip6_sk_ignore_df(sk);
1683 *final_dst = fl6->daddr;
1684 __skb_pull(skb, skb_network_header_len(skb));
1685 if (opt && opt->opt_flen)
1686 ipv6_push_frag_opts(skb, opt, &proto);
1687 if (opt && opt->opt_nflen)
1688 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1690 skb_push(skb, sizeof(struct ipv6hdr));
1691 skb_reset_network_header(skb);
1692 hdr = ipv6_hdr(skb);
1694 ip6_flow_hdr(hdr, v6_cork->tclass,
1695 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1696 ip6_autoflowlabel(net, np), fl6));
1697 hdr->hop_limit = v6_cork->hop_limit;
1698 hdr->nexthdr = proto;
1699 hdr->saddr = fl6->saddr;
1700 hdr->daddr = *final_dst;
1702 skb->priority = sk->sk_priority;
1703 skb->mark = sk->sk_mark;
1705 skb->tstamp = cork->base.transmit_time;
1707 skb_dst_set(skb, dst_clone(&rt->dst));
1708 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1709 if (proto == IPPROTO_ICMPV6) {
1710 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1712 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1713 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1716 ip6_cork_release(cork, v6_cork);
1721 int ip6_send_skb(struct sk_buff *skb)
1723 struct net *net = sock_net(skb->sk);
1724 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1727 err = ip6_local_out(net, skb->sk, skb);
1730 err = net_xmit_errno(err);
1732 IP6_INC_STATS(net, rt->rt6i_idev,
1733 IPSTATS_MIB_OUTDISCARDS);
1739 int ip6_push_pending_frames(struct sock *sk)
1741 struct sk_buff *skb;
1743 skb = ip6_finish_skb(sk);
1747 return ip6_send_skb(skb);
1749 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1751 static void __ip6_flush_pending_frames(struct sock *sk,
1752 struct sk_buff_head *queue,
1753 struct inet_cork_full *cork,
1754 struct inet6_cork *v6_cork)
1756 struct sk_buff *skb;
1758 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1760 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1761 IPSTATS_MIB_OUTDISCARDS);
1765 ip6_cork_release(cork, v6_cork);
1768 void ip6_flush_pending_frames(struct sock *sk)
1770 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1771 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1773 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1775 struct sk_buff *ip6_make_skb(struct sock *sk,
1776 int getfrag(void *from, char *to, int offset,
1777 int len, int odd, struct sk_buff *skb),
1778 void *from, int length, int transhdrlen,
1779 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1780 struct rt6_info *rt, unsigned int flags,
1781 struct inet_cork_full *cork)
1783 struct inet6_cork v6_cork;
1784 struct sk_buff_head queue;
1785 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1788 if (flags & MSG_PROBE)
1791 __skb_queue_head_init(&queue);
1793 cork->base.flags = 0;
1794 cork->base.addr = 0;
1795 cork->base.opt = NULL;
1796 cork->base.dst = NULL;
1798 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1800 ip6_cork_release(cork, &v6_cork);
1801 return ERR_PTR(err);
1803 if (ipc6->dontfrag < 0)
1804 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1806 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1807 ¤t->task_frag, getfrag, from,
1808 length + exthdrlen, transhdrlen + exthdrlen,
1811 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1812 return ERR_PTR(err);
1815 return __ip6_make_skb(sk, &queue, cork, &v6_cork);