3 * Linux INET6 implementation
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
42 #include <linux/uaccess.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/inet_common.h>
63 #include <net/secure_seq.h>
64 #include <net/busy_poll.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 #include <crypto/hash.h>
70 #include <linux/scatterlist.h>
72 #include <trace/events/tcp.h>
74 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
75 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
76 struct request_sock *req);
78 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
80 static const struct inet_connection_sock_af_ops ipv6_mapped;
81 static const struct inet_connection_sock_af_ops ipv6_specific;
82 #ifdef CONFIG_TCP_MD5SIG
83 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
84 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
86 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
87 const struct in6_addr *addr)
93 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
95 struct dst_entry *dst = skb_dst(skb);
97 if (dst && dst_hold_safe(dst)) {
98 const struct rt6_info *rt = (const struct rt6_info *)dst;
101 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
102 inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
106 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
108 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
109 ipv6_hdr(skb)->saddr.s6_addr32,
111 tcp_hdr(skb)->source);
114 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
116 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
117 ipv6_hdr(skb)->saddr.s6_addr32);
120 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
123 /* This check is replicated from tcp_v6_connect() and intended to
124 * prevent BPF program called below from accessing bytes that are out
125 * of the bound specified by user in addr_len.
127 if (addr_len < SIN6_LEN_RFC2133)
130 sock_owned_by_me(sk);
132 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
135 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
138 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
139 struct inet_sock *inet = inet_sk(sk);
140 struct inet_connection_sock *icsk = inet_csk(sk);
141 struct ipv6_pinfo *np = inet6_sk(sk);
142 struct tcp_sock *tp = tcp_sk(sk);
143 struct in6_addr *saddr = NULL, *final_p, final;
144 struct ipv6_txoptions *opt;
146 struct dst_entry *dst;
149 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
151 if (addr_len < SIN6_LEN_RFC2133)
154 if (usin->sin6_family != AF_INET6)
155 return -EAFNOSUPPORT;
157 memset(&fl6, 0, sizeof(fl6));
160 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
161 IP6_ECN_flow_init(fl6.flowlabel);
162 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
163 struct ip6_flowlabel *flowlabel;
164 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
167 fl6_sock_release(flowlabel);
172 * connect() to INADDR_ANY means loopback (BSD'ism).
175 if (ipv6_addr_any(&usin->sin6_addr)) {
176 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
177 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
180 usin->sin6_addr = in6addr_loopback;
183 addr_type = ipv6_addr_type(&usin->sin6_addr);
185 if (addr_type & IPV6_ADDR_MULTICAST)
188 if (addr_type&IPV6_ADDR_LINKLOCAL) {
189 if (addr_len >= sizeof(struct sockaddr_in6) &&
190 usin->sin6_scope_id) {
191 /* If interface is set while binding, indices
194 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
197 sk->sk_bound_dev_if = usin->sin6_scope_id;
200 /* Connect to link-local address requires an interface */
201 if (!sk->sk_bound_dev_if)
205 if (tp->rx_opt.ts_recent_stamp &&
206 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
207 tp->rx_opt.ts_recent = 0;
208 tp->rx_opt.ts_recent_stamp = 0;
212 sk->sk_v6_daddr = usin->sin6_addr;
213 np->flow_label = fl6.flowlabel;
219 if (addr_type & IPV6_ADDR_MAPPED) {
220 u32 exthdrlen = icsk->icsk_ext_hdr_len;
221 struct sockaddr_in sin;
223 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
225 if (__ipv6_only_sock(sk))
228 sin.sin_family = AF_INET;
229 sin.sin_port = usin->sin6_port;
230 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
232 icsk->icsk_af_ops = &ipv6_mapped;
233 sk->sk_backlog_rcv = tcp_v4_do_rcv;
234 #ifdef CONFIG_TCP_MD5SIG
235 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
238 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
241 icsk->icsk_ext_hdr_len = exthdrlen;
242 icsk->icsk_af_ops = &ipv6_specific;
243 sk->sk_backlog_rcv = tcp_v6_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245 tp->af_specific = &tcp_sock_ipv6_specific;
249 np->saddr = sk->sk_v6_rcv_saddr;
254 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
255 saddr = &sk->sk_v6_rcv_saddr;
257 fl6.flowi6_proto = IPPROTO_TCP;
258 fl6.daddr = sk->sk_v6_daddr;
259 fl6.saddr = saddr ? *saddr : np->saddr;
260 fl6.flowi6_oif = sk->sk_bound_dev_if;
261 fl6.flowi6_mark = sk->sk_mark;
262 fl6.fl6_dport = usin->sin6_port;
263 fl6.fl6_sport = inet->inet_sport;
264 fl6.flowi6_uid = sk->sk_uid;
266 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
267 final_p = fl6_update_dst(&fl6, opt, &final);
269 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
271 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
279 sk->sk_v6_rcv_saddr = *saddr;
282 /* set the source address */
284 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
286 sk->sk_gso_type = SKB_GSO_TCPV6;
287 ip6_dst_store(sk, dst, NULL, NULL);
289 icsk->icsk_ext_hdr_len = 0;
291 icsk->icsk_ext_hdr_len = opt->opt_flen +
294 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
296 inet->inet_dport = usin->sin6_port;
298 tcp_set_state(sk, TCP_SYN_SENT);
299 err = inet6_hash_connect(tcp_death_row, sk);
305 if (likely(!tp->repair)) {
307 tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
308 sk->sk_v6_daddr.s6_addr32,
311 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
313 sk->sk_v6_daddr.s6_addr32);
316 if (tcp_fastopen_defer_connect(sk, &err))
321 err = tcp_connect(sk);
328 tcp_set_state(sk, TCP_CLOSE);
330 inet->inet_dport = 0;
331 sk->sk_route_caps = 0;
335 static void tcp_v6_mtu_reduced(struct sock *sk)
337 struct dst_entry *dst;
339 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
342 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
346 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
347 tcp_sync_mss(sk, dst_mtu(dst));
348 tcp_simple_retransmit(sk);
352 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
353 u8 type, u8 code, int offset, __be32 info)
355 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
356 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
357 struct net *net = dev_net(skb->dev);
358 struct request_sock *fastopen;
359 struct ipv6_pinfo *np;
366 sk = __inet6_lookup_established(net, &tcp_hashinfo,
367 &hdr->daddr, th->dest,
368 &hdr->saddr, ntohs(th->source),
369 skb->dev->ifindex, inet6_sdif(skb));
372 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
377 if (sk->sk_state == TCP_TIME_WAIT) {
378 inet_twsk_put(inet_twsk(sk));
381 seq = ntohl(th->seq);
382 fatal = icmpv6_err_convert(type, code, &err);
383 if (sk->sk_state == TCP_NEW_SYN_RECV) {
384 tcp_req_err(sk, seq, fatal);
389 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
390 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
392 if (sk->sk_state == TCP_CLOSE)
395 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
396 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
401 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
402 fastopen = tp->fastopen_rsk;
403 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
404 if (sk->sk_state != TCP_LISTEN &&
405 !between(seq, snd_una, tp->snd_nxt)) {
406 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
412 if (type == NDISC_REDIRECT) {
413 if (!sock_owned_by_user(sk)) {
414 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
417 dst->ops->redirect(dst, sk, skb);
422 if (type == ICMPV6_PKT_TOOBIG) {
423 /* We are not interested in TCP_LISTEN and open_requests
424 * (SYN-ACKs send out by Linux are always <576bytes so
425 * they should go through unfragmented).
427 if (sk->sk_state == TCP_LISTEN)
430 if (!ip6_sk_accept_pmtu(sk))
433 tp->mtu_info = ntohl(info);
434 if (!sock_owned_by_user(sk))
435 tcp_v6_mtu_reduced(sk);
436 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
443 /* Might be for an request_sock */
444 switch (sk->sk_state) {
447 /* Only in fast or simultaneous open. If a fast open socket is
448 * is already accepted it is treated as a connected one below.
450 if (fastopen && !fastopen->sk)
453 if (!sock_owned_by_user(sk)) {
455 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
459 sk->sk_err_soft = err;
463 if (!sock_owned_by_user(sk) && np->recverr) {
465 sk->sk_error_report(sk);
467 sk->sk_err_soft = err;
476 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
478 struct request_sock *req,
479 struct tcp_fastopen_cookie *foc,
480 enum tcp_synack_type synack_type)
482 struct inet_request_sock *ireq = inet_rsk(req);
483 struct ipv6_pinfo *np = inet6_sk(sk);
484 struct ipv6_txoptions *opt;
485 struct flowi6 *fl6 = &fl->u.ip6;
489 /* First, grab a route. */
490 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
491 IPPROTO_TCP)) == NULL)
494 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
497 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
498 &ireq->ir_v6_rmt_addr);
500 fl6->daddr = ireq->ir_v6_rmt_addr;
501 if (np->repflow && ireq->pktopts)
502 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
505 opt = ireq->ipv6_opt;
507 opt = rcu_dereference(np->opt);
508 err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
510 err = net_xmit_eval(err);
518 static void tcp_v6_reqsk_destructor(struct request_sock *req)
520 kfree(inet_rsk(req)->ipv6_opt);
521 kfree_skb(inet_rsk(req)->pktopts);
524 #ifdef CONFIG_TCP_MD5SIG
525 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
526 const struct in6_addr *addr)
528 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
531 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
532 const struct sock *addr_sk)
534 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
537 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
538 char __user *optval, int optlen)
540 struct tcp_md5sig cmd;
541 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
544 if (optlen < sizeof(cmd))
547 if (copy_from_user(&cmd, optval, sizeof(cmd)))
550 if (sin6->sin6_family != AF_INET6)
553 if (optname == TCP_MD5SIG_EXT &&
554 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
555 prefixlen = cmd.tcpm_prefixlen;
556 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
560 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
563 if (!cmd.tcpm_keylen) {
564 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
565 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
567 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
568 AF_INET6, prefixlen);
571 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
574 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
575 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
576 AF_INET, prefixlen, cmd.tcpm_key,
577 cmd.tcpm_keylen, GFP_KERNEL);
579 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
580 AF_INET6, prefixlen, cmd.tcpm_key,
581 cmd.tcpm_keylen, GFP_KERNEL);
584 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
585 const struct in6_addr *daddr,
586 const struct in6_addr *saddr,
587 const struct tcphdr *th, int nbytes)
589 struct tcp6_pseudohdr *bp;
590 struct scatterlist sg;
594 /* 1. TCP pseudo-header (RFC2460) */
597 bp->protocol = cpu_to_be32(IPPROTO_TCP);
598 bp->len = cpu_to_be32(nbytes);
600 _th = (struct tcphdr *)(bp + 1);
601 memcpy(_th, th, sizeof(*th));
604 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
605 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
606 sizeof(*bp) + sizeof(*th));
607 return crypto_ahash_update(hp->md5_req);
610 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
611 const struct in6_addr *daddr, struct in6_addr *saddr,
612 const struct tcphdr *th)
614 struct tcp_md5sig_pool *hp;
615 struct ahash_request *req;
617 hp = tcp_get_md5sig_pool();
619 goto clear_hash_noput;
622 if (crypto_ahash_init(req))
624 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
626 if (tcp_md5_hash_key(hp, key))
628 ahash_request_set_crypt(req, NULL, md5_hash, 0);
629 if (crypto_ahash_final(req))
632 tcp_put_md5sig_pool();
636 tcp_put_md5sig_pool();
638 memset(md5_hash, 0, 16);
642 static int tcp_v6_md5_hash_skb(char *md5_hash,
643 const struct tcp_md5sig_key *key,
644 const struct sock *sk,
645 const struct sk_buff *skb)
647 const struct in6_addr *saddr, *daddr;
648 struct tcp_md5sig_pool *hp;
649 struct ahash_request *req;
650 const struct tcphdr *th = tcp_hdr(skb);
652 if (sk) { /* valid for establish/request sockets */
653 saddr = &sk->sk_v6_rcv_saddr;
654 daddr = &sk->sk_v6_daddr;
656 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
657 saddr = &ip6h->saddr;
658 daddr = &ip6h->daddr;
661 hp = tcp_get_md5sig_pool();
663 goto clear_hash_noput;
666 if (crypto_ahash_init(req))
669 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
671 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
673 if (tcp_md5_hash_key(hp, key))
675 ahash_request_set_crypt(req, NULL, md5_hash, 0);
676 if (crypto_ahash_final(req))
679 tcp_put_md5sig_pool();
683 tcp_put_md5sig_pool();
685 memset(md5_hash, 0, 16);
691 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
692 const struct sk_buff *skb)
694 #ifdef CONFIG_TCP_MD5SIG
695 const __u8 *hash_location = NULL;
696 struct tcp_md5sig_key *hash_expected;
697 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
698 const struct tcphdr *th = tcp_hdr(skb);
702 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
703 hash_location = tcp_parse_md5sig_option(th);
705 /* We've parsed the options - do we have a hash? */
706 if (!hash_expected && !hash_location)
709 if (hash_expected && !hash_location) {
710 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
714 if (!hash_expected && hash_location) {
715 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
719 /* check the signature */
720 genhash = tcp_v6_md5_hash_skb(newhash,
724 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
725 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
726 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
727 genhash ? "failed" : "mismatch",
728 &ip6h->saddr, ntohs(th->source),
729 &ip6h->daddr, ntohs(th->dest));
736 static void tcp_v6_init_req(struct request_sock *req,
737 const struct sock *sk_listener,
740 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
741 struct inet_request_sock *ireq = inet_rsk(req);
742 const struct ipv6_pinfo *np = inet6_sk(sk_listener);
744 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
745 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
747 /* So that link locals have meaning */
748 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
749 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
750 ireq->ir_iif = tcp_v6_iif(skb);
752 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
753 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
754 np->rxopt.bits.rxinfo ||
755 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
756 np->rxopt.bits.rxohlim || np->repflow)) {
757 refcount_inc(&skb->users);
762 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
764 const struct request_sock *req)
766 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
769 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
771 .obj_size = sizeof(struct tcp6_request_sock),
772 .rtx_syn_ack = tcp_rtx_synack,
773 .send_ack = tcp_v6_reqsk_send_ack,
774 .destructor = tcp_v6_reqsk_destructor,
775 .send_reset = tcp_v6_send_reset,
776 .syn_ack_timeout = tcp_syn_ack_timeout,
779 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
780 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
781 sizeof(struct ipv6hdr),
782 #ifdef CONFIG_TCP_MD5SIG
783 .req_md5_lookup = tcp_v6_md5_lookup,
784 .calc_md5_hash = tcp_v6_md5_hash_skb,
786 .init_req = tcp_v6_init_req,
787 #ifdef CONFIG_SYN_COOKIES
788 .cookie_init_seq = cookie_v6_init_sequence,
790 .route_req = tcp_v6_route_req,
791 .init_seq = tcp_v6_init_seq,
792 .init_ts_off = tcp_v6_init_ts_off,
793 .send_synack = tcp_v6_send_synack,
796 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
797 u32 ack, u32 win, u32 tsval, u32 tsecr,
798 int oif, struct tcp_md5sig_key *key, int rst,
799 u8 tclass, __be32 label)
801 const struct tcphdr *th = tcp_hdr(skb);
803 struct sk_buff *buff;
805 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
806 struct sock *ctl_sk = net->ipv6.tcp_sk;
807 unsigned int tot_len = sizeof(struct tcphdr);
808 struct dst_entry *dst;
813 tot_len += TCPOLEN_TSTAMP_ALIGNED;
814 #ifdef CONFIG_TCP_MD5SIG
816 tot_len += TCPOLEN_MD5SIG_ALIGNED;
819 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
824 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
826 t1 = skb_push(buff, tot_len);
827 skb_reset_transport_header(buff);
829 /* Swap the send and the receive. */
830 memset(t1, 0, sizeof(*t1));
831 t1->dest = th->source;
832 t1->source = th->dest;
833 t1->doff = tot_len / 4;
834 t1->seq = htonl(seq);
835 t1->ack_seq = htonl(ack);
836 t1->ack = !rst || !th->ack;
838 t1->window = htons(win);
840 topt = (__be32 *)(t1 + 1);
843 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
844 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
845 *topt++ = htonl(tsval);
846 *topt++ = htonl(tsecr);
849 #ifdef CONFIG_TCP_MD5SIG
851 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
852 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
853 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
854 &ipv6_hdr(skb)->saddr,
855 &ipv6_hdr(skb)->daddr, t1);
859 memset(&fl6, 0, sizeof(fl6));
860 fl6.daddr = ipv6_hdr(skb)->saddr;
861 fl6.saddr = ipv6_hdr(skb)->daddr;
862 fl6.flowlabel = label;
864 buff->ip_summed = CHECKSUM_PARTIAL;
867 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
869 fl6.flowi6_proto = IPPROTO_TCP;
870 if (rt6_need_strict(&fl6.daddr) && !oif)
871 fl6.flowi6_oif = tcp_v6_iif(skb);
873 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
876 fl6.flowi6_oif = oif;
880 mark = (sk->sk_state == TCP_TIME_WAIT) ?
881 inet_twsk(sk)->tw_mark : sk->sk_mark;
882 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
883 fl6.fl6_dport = t1->dest;
884 fl6.fl6_sport = t1->source;
885 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
886 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
888 /* Pass a socket to ip6_dst_lookup either it is for RST
889 * Underlying function will use this to retrieve the network
892 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
894 skb_dst_set(buff, dst);
895 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
896 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
898 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
905 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
907 const struct tcphdr *th = tcp_hdr(skb);
908 u32 seq = 0, ack_seq = 0;
909 struct tcp_md5sig_key *key = NULL;
910 #ifdef CONFIG_TCP_MD5SIG
911 const __u8 *hash_location = NULL;
912 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
913 unsigned char newhash[16];
915 struct sock *sk1 = NULL;
922 /* If sk not NULL, it means we did a successful lookup and incoming
923 * route had to be correct. prequeue might have dropped our dst.
925 if (!sk && !ipv6_unicast_destination(skb))
928 #ifdef CONFIG_TCP_MD5SIG
930 hash_location = tcp_parse_md5sig_option(th);
931 if (sk && sk_fullsock(sk)) {
932 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
933 } else if (hash_location) {
935 * active side is lost. Try to find listening socket through
936 * source port, and then find md5 key through listening socket.
937 * we are not loose security here:
938 * Incoming packet is checked with md5 hash with finding key,
939 * no RST generated if md5 hash doesn't match.
941 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
942 &tcp_hashinfo, NULL, 0,
944 th->source, &ipv6h->daddr,
946 tcp_v6_iif_l3_slave(skb),
951 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
955 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
956 if (genhash || memcmp(hash_location, newhash, 16) != 0)
962 seq = ntohl(th->ack_seq);
964 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
968 oif = sk->sk_bound_dev_if;
970 trace_tcp_send_reset(sk, skb);
973 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
975 #ifdef CONFIG_TCP_MD5SIG
981 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
982 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
983 struct tcp_md5sig_key *key, u8 tclass,
986 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
990 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
992 struct inet_timewait_sock *tw = inet_twsk(sk);
993 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
995 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
996 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
997 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
998 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
999 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
1004 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1005 struct request_sock *req)
1007 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1008 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1011 * The window field (SEG.WND) of every outgoing segment, with the
1012 * exception of <SYN> segments, MUST be right-shifted by
1013 * Rcv.Wind.Shift bits:
1015 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1016 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1017 tcp_rsk(req)->rcv_nxt,
1018 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1019 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1020 req->ts_recent, sk->sk_bound_dev_if,
1021 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1026 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1028 #ifdef CONFIG_SYN_COOKIES
1029 const struct tcphdr *th = tcp_hdr(skb);
1032 sk = cookie_v6_check(sk, skb);
1037 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1039 if (skb->protocol == htons(ETH_P_IP))
1040 return tcp_v4_conn_request(sk, skb);
1042 if (!ipv6_unicast_destination(skb))
1045 return tcp_conn_request(&tcp6_request_sock_ops,
1046 &tcp_request_sock_ipv6_ops, sk, skb);
1050 return 0; /* don't send reset */
1053 static void tcp_v6_restore_cb(struct sk_buff *skb)
1055 /* We need to move header back to the beginning if xfrm6_policy_check()
1056 * and tcp_v6_fill_cb() are going to be called again.
1057 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1059 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1060 sizeof(struct inet6_skb_parm));
1063 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1064 struct request_sock *req,
1065 struct dst_entry *dst,
1066 struct request_sock *req_unhash,
1069 struct inet_request_sock *ireq;
1070 struct ipv6_pinfo *newnp;
1071 const struct ipv6_pinfo *np = inet6_sk(sk);
1072 struct ipv6_txoptions *opt;
1073 struct tcp6_sock *newtcp6sk;
1074 struct inet_sock *newinet;
1075 struct tcp_sock *newtp;
1077 #ifdef CONFIG_TCP_MD5SIG
1078 struct tcp_md5sig_key *key;
1082 if (skb->protocol == htons(ETH_P_IP)) {
1087 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1088 req_unhash, own_req);
1093 newtcp6sk = (struct tcp6_sock *)newsk;
1094 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1096 newinet = inet_sk(newsk);
1097 newnp = inet6_sk(newsk);
1098 newtp = tcp_sk(newsk);
1100 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1102 newnp->saddr = newsk->sk_v6_rcv_saddr;
1104 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1105 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1106 #ifdef CONFIG_TCP_MD5SIG
1107 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1110 newnp->ipv6_mc_list = NULL;
1111 newnp->ipv6_ac_list = NULL;
1112 newnp->ipv6_fl_list = NULL;
1113 newnp->pktoptions = NULL;
1115 newnp->mcast_oif = tcp_v6_iif(skb);
1116 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1117 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1119 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1122 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1123 * here, tcp_create_openreq_child now does this for us, see the comment in
1124 * that function for the gory details. -acme
1127 /* It is tricky place. Until this moment IPv4 tcp
1128 worked with IPv6 icsk.icsk_af_ops.
1131 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1136 ireq = inet_rsk(req);
1138 if (sk_acceptq_is_full(sk))
1142 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1147 newsk = tcp_create_openreq_child(sk, req, skb);
1152 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1153 * count here, tcp_create_openreq_child now does this for us, see the
1154 * comment in that function for the gory details. -acme
1157 newsk->sk_gso_type = SKB_GSO_TCPV6;
1158 ip6_dst_store(newsk, dst, NULL, NULL);
1159 inet6_sk_rx_dst_set(newsk, skb);
1161 newtcp6sk = (struct tcp6_sock *)newsk;
1162 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1164 newtp = tcp_sk(newsk);
1165 newinet = inet_sk(newsk);
1166 newnp = inet6_sk(newsk);
1168 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1170 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1171 newnp->saddr = ireq->ir_v6_loc_addr;
1172 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1173 newsk->sk_bound_dev_if = ireq->ir_iif;
1175 /* Now IPv6 options...
1177 First: no IPv4 options.
1179 newinet->inet_opt = NULL;
1180 newnp->ipv6_mc_list = NULL;
1181 newnp->ipv6_ac_list = NULL;
1182 newnp->ipv6_fl_list = NULL;
1185 newnp->rxopt.all = np->rxopt.all;
1187 newnp->pktoptions = NULL;
1189 newnp->mcast_oif = tcp_v6_iif(skb);
1190 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1191 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1193 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1195 /* Clone native IPv6 options from listening socket (if any)
1197 Yes, keeping reference count would be much more clever,
1198 but we make one more one thing there: reattach optmem
1201 opt = ireq->ipv6_opt;
1203 opt = rcu_dereference(np->opt);
1205 opt = ipv6_dup_options(newsk, opt);
1206 RCU_INIT_POINTER(newnp->opt, opt);
1208 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1210 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1213 tcp_ca_openreq_child(newsk, dst);
1215 tcp_sync_mss(newsk, dst_mtu(dst));
1216 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1218 tcp_initialize_rcv_mss(newsk);
1220 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1221 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1223 #ifdef CONFIG_TCP_MD5SIG
1224 /* Copy over the MD5 key from the original socket */
1225 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1227 /* We're using one, so create a matching key
1228 * on the newsk structure. If we fail to get
1229 * memory, then we end up not copying the key
1232 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1233 AF_INET6, 128, key->key, key->keylen,
1234 sk_gfp_mask(sk, GFP_ATOMIC));
1238 if (__inet_inherit_port(sk, newsk) < 0) {
1239 inet_csk_prepare_forced_close(newsk);
1243 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1245 tcp_move_syn(newtp, req);
1247 /* Clone pktoptions received with SYN, if we own the req */
1248 if (ireq->pktopts) {
1249 newnp->pktoptions = skb_clone(ireq->pktopts,
1250 sk_gfp_mask(sk, GFP_ATOMIC));
1251 consume_skb(ireq->pktopts);
1252 ireq->pktopts = NULL;
1253 if (newnp->pktoptions) {
1254 tcp_v6_restore_cb(newnp->pktoptions);
1255 skb_set_owner_r(newnp->pktoptions, newsk);
1263 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1271 /* The socket must have it's spinlock held when we get
1272 * here, unless it is a TCP_LISTEN socket.
1274 * We have a potential double-lock case here, so even when
1275 * doing backlog processing we use the BH locking scheme.
1276 * This is because we cannot sleep with the original spinlock
1279 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1281 struct ipv6_pinfo *np = inet6_sk(sk);
1282 struct tcp_sock *tp;
1283 struct sk_buff *opt_skb = NULL;
1285 /* Imagine: socket is IPv6. IPv4 packet arrives,
1286 goes to IPv4 receive handler and backlogged.
1287 From backlog it always goes here. Kerboom...
1288 Fortunately, tcp_rcv_established and rcv_established
1289 handle them correctly, but it is not case with
1290 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1293 if (skb->protocol == htons(ETH_P_IP))
1294 return tcp_v4_do_rcv(sk, skb);
1297 * socket locking is here for SMP purposes as backlog rcv
1298 * is currently called with bh processing disabled.
1301 /* Do Stevens' IPV6_PKTOPTIONS.
1303 Yes, guys, it is the only place in our code, where we
1304 may make it not affecting IPv4.
1305 The rest of code is protocol independent,
1306 and I do not like idea to uglify IPv4.
1308 Actually, all the idea behind IPV6_PKTOPTIONS
1309 looks not very well thought. For now we latch
1310 options, received in the last packet, enqueued
1311 by tcp. Feel free to propose better solution.
1315 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1317 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1318 struct dst_entry *dst = sk->sk_rx_dst;
1320 sock_rps_save_rxhash(sk, skb);
1321 sk_mark_napi_id(sk, skb);
1323 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1324 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1326 sk->sk_rx_dst = NULL;
1330 tcp_rcv_established(sk, skb);
1332 goto ipv6_pktoptions;
1336 if (tcp_checksum_complete(skb))
1339 if (sk->sk_state == TCP_LISTEN) {
1340 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1346 if (tcp_child_process(sk, nsk, skb))
1349 __kfree_skb(opt_skb);
1353 sock_rps_save_rxhash(sk, skb);
1355 if (tcp_rcv_state_process(sk, skb))
1358 goto ipv6_pktoptions;
1362 tcp_v6_send_reset(sk, skb);
1365 __kfree_skb(opt_skb);
1369 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1370 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1375 /* Do you ask, what is it?
1377 1. skb was enqueued by tcp.
1378 2. skb is added to tail of read queue, rather than out of order.
1379 3. socket is not in passive state.
1380 4. Finally, it really contains options, which user wants to receive.
1383 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1384 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1385 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1386 np->mcast_oif = tcp_v6_iif(opt_skb);
1387 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1388 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1389 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1390 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1392 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1393 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1394 skb_set_owner_r(opt_skb, sk);
1395 tcp_v6_restore_cb(opt_skb);
1396 opt_skb = xchg(&np->pktoptions, opt_skb);
1398 __kfree_skb(opt_skb);
1399 opt_skb = xchg(&np->pktoptions, NULL);
1407 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1408 const struct tcphdr *th)
1410 /* This is tricky: we move IP6CB at its correct location into
1411 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1412 * _decode_session6() uses IP6CB().
1413 * barrier() makes sure compiler won't play aliasing games.
1415 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1416 sizeof(struct inet6_skb_parm));
1419 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1420 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1421 skb->len - th->doff*4);
1422 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1423 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1424 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1425 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1426 TCP_SKB_CB(skb)->sacked = 0;
1427 TCP_SKB_CB(skb)->has_rxtstamp =
1428 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1431 static int tcp_v6_rcv(struct sk_buff *skb)
1433 int sdif = inet6_sdif(skb);
1434 const struct tcphdr *th;
1435 const struct ipv6hdr *hdr;
1439 struct net *net = dev_net(skb->dev);
1441 if (skb->pkt_type != PACKET_HOST)
1445 * Count it even if it's bad.
1447 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1449 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1452 th = (const struct tcphdr *)skb->data;
1454 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1456 if (!pskb_may_pull(skb, th->doff*4))
1459 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1462 th = (const struct tcphdr *)skb->data;
1463 hdr = ipv6_hdr(skb);
1466 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1467 th->source, th->dest, inet6_iif(skb), sdif,
1473 if (sk->sk_state == TCP_TIME_WAIT)
1476 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1477 struct request_sock *req = inet_reqsk(sk);
1478 bool req_stolen = false;
1481 sk = req->rsk_listener;
1482 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1483 sk_drops_add(sk, skb);
1487 if (tcp_checksum_complete(skb)) {
1491 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1492 inet_csk_reqsk_queue_drop_and_put(sk, req);
1498 if (!tcp_filter(sk, skb)) {
1499 th = (const struct tcphdr *)skb->data;
1500 hdr = ipv6_hdr(skb);
1501 tcp_v6_fill_cb(skb, hdr, th);
1502 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1507 /* Another cpu got exclusive access to req
1508 * and created a full blown socket.
1509 * Try to feed this packet to this socket
1510 * instead of discarding it.
1512 tcp_v6_restore_cb(skb);
1516 goto discard_and_relse;
1520 tcp_v6_restore_cb(skb);
1521 } else if (tcp_child_process(sk, nsk, skb)) {
1522 tcp_v6_send_reset(nsk, skb);
1523 goto discard_and_relse;
1529 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1530 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1531 goto discard_and_relse;
1534 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1535 goto discard_and_relse;
1537 if (tcp_v6_inbound_md5_hash(sk, skb))
1538 goto discard_and_relse;
1540 if (tcp_filter(sk, skb))
1541 goto discard_and_relse;
1542 th = (const struct tcphdr *)skb->data;
1543 hdr = ipv6_hdr(skb);
1544 tcp_v6_fill_cb(skb, hdr, th);
1548 if (sk->sk_state == TCP_LISTEN) {
1549 ret = tcp_v6_do_rcv(sk, skb);
1550 goto put_and_return;
1553 sk_incoming_cpu_update(sk);
1555 bh_lock_sock_nested(sk);
1556 tcp_segs_in(tcp_sk(sk), skb);
1558 if (!sock_owned_by_user(sk)) {
1559 ret = tcp_v6_do_rcv(sk, skb);
1560 } else if (tcp_add_backlog(sk, skb)) {
1561 goto discard_and_relse;
1568 return ret ? -1 : 0;
1571 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1574 tcp_v6_fill_cb(skb, hdr, th);
1576 if (tcp_checksum_complete(skb)) {
1578 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1580 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1582 tcp_v6_send_reset(NULL, skb);
1590 sk_drops_add(sk, skb);
1596 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1597 inet_twsk_put(inet_twsk(sk));
1601 tcp_v6_fill_cb(skb, hdr, th);
1603 if (tcp_checksum_complete(skb)) {
1604 inet_twsk_put(inet_twsk(sk));
1608 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1613 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1614 skb, __tcp_hdrlen(th),
1615 &ipv6_hdr(skb)->saddr, th->source,
1616 &ipv6_hdr(skb)->daddr,
1618 tcp_v6_iif_l3_slave(skb),
1621 struct inet_timewait_sock *tw = inet_twsk(sk);
1622 inet_twsk_deschedule_put(tw);
1624 tcp_v6_restore_cb(skb);
1632 tcp_v6_timewait_ack(sk, skb);
1635 tcp_v6_send_reset(sk, skb);
1636 inet_twsk_deschedule_put(inet_twsk(sk));
1638 case TCP_TW_SUCCESS:
1644 static void tcp_v6_early_demux(struct sk_buff *skb)
1646 const struct ipv6hdr *hdr;
1647 const struct tcphdr *th;
1650 if (skb->pkt_type != PACKET_HOST)
1653 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1656 hdr = ipv6_hdr(skb);
1659 if (th->doff < sizeof(struct tcphdr) / 4)
1662 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1663 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1664 &hdr->saddr, th->source,
1665 &hdr->daddr, ntohs(th->dest),
1666 inet6_iif(skb), inet6_sdif(skb));
1669 skb->destructor = sock_edemux;
1670 if (sk_fullsock(sk)) {
1671 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1674 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1676 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1677 skb_dst_set_noref(skb, dst);
1682 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1683 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1684 .twsk_unique = tcp_twsk_unique,
1685 .twsk_destructor = tcp_twsk_destructor,
1688 static const struct inet_connection_sock_af_ops ipv6_specific = {
1689 .queue_xmit = inet6_csk_xmit,
1690 .send_check = tcp_v6_send_check,
1691 .rebuild_header = inet6_sk_rebuild_header,
1692 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1693 .conn_request = tcp_v6_conn_request,
1694 .syn_recv_sock = tcp_v6_syn_recv_sock,
1695 .net_header_len = sizeof(struct ipv6hdr),
1696 .net_frag_header_len = sizeof(struct frag_hdr),
1697 .setsockopt = ipv6_setsockopt,
1698 .getsockopt = ipv6_getsockopt,
1699 .addr2sockaddr = inet6_csk_addr2sockaddr,
1700 .sockaddr_len = sizeof(struct sockaddr_in6),
1701 #ifdef CONFIG_COMPAT
1702 .compat_setsockopt = compat_ipv6_setsockopt,
1703 .compat_getsockopt = compat_ipv6_getsockopt,
1705 .mtu_reduced = tcp_v6_mtu_reduced,
1708 #ifdef CONFIG_TCP_MD5SIG
1709 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1710 .md5_lookup = tcp_v6_md5_lookup,
1711 .calc_md5_hash = tcp_v6_md5_hash_skb,
1712 .md5_parse = tcp_v6_parse_md5_keys,
1717 * TCP over IPv4 via INET6 API
1719 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1720 .queue_xmit = ip_queue_xmit,
1721 .send_check = tcp_v4_send_check,
1722 .rebuild_header = inet_sk_rebuild_header,
1723 .sk_rx_dst_set = inet_sk_rx_dst_set,
1724 .conn_request = tcp_v6_conn_request,
1725 .syn_recv_sock = tcp_v6_syn_recv_sock,
1726 .net_header_len = sizeof(struct iphdr),
1727 .setsockopt = ipv6_setsockopt,
1728 .getsockopt = ipv6_getsockopt,
1729 .addr2sockaddr = inet6_csk_addr2sockaddr,
1730 .sockaddr_len = sizeof(struct sockaddr_in6),
1731 #ifdef CONFIG_COMPAT
1732 .compat_setsockopt = compat_ipv6_setsockopt,
1733 .compat_getsockopt = compat_ipv6_getsockopt,
1735 .mtu_reduced = tcp_v4_mtu_reduced,
1738 #ifdef CONFIG_TCP_MD5SIG
1739 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1740 .md5_lookup = tcp_v4_md5_lookup,
1741 .calc_md5_hash = tcp_v4_md5_hash_skb,
1742 .md5_parse = tcp_v6_parse_md5_keys,
1746 /* NOTE: A lot of things set to zero explicitly by call to
1747 * sk_alloc() so need not be done here.
1749 static int tcp_v6_init_sock(struct sock *sk)
1751 struct inet_connection_sock *icsk = inet_csk(sk);
1755 icsk->icsk_af_ops = &ipv6_specific;
1757 #ifdef CONFIG_TCP_MD5SIG
1758 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1764 static void tcp_v6_destroy_sock(struct sock *sk)
1766 tcp_v4_destroy_sock(sk);
1767 inet6_destroy_sock(sk);
1770 #ifdef CONFIG_PROC_FS
1771 /* Proc filesystem TCPv6 sock list dumping. */
1772 static void get_openreq6(struct seq_file *seq,
1773 const struct request_sock *req, int i)
1775 long ttd = req->rsk_timer.expires - jiffies;
1776 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1777 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1783 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1784 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1786 src->s6_addr32[0], src->s6_addr32[1],
1787 src->s6_addr32[2], src->s6_addr32[3],
1788 inet_rsk(req)->ir_num,
1789 dest->s6_addr32[0], dest->s6_addr32[1],
1790 dest->s6_addr32[2], dest->s6_addr32[3],
1791 ntohs(inet_rsk(req)->ir_rmt_port),
1793 0, 0, /* could print option size, but that is af dependent. */
1794 1, /* timers active (only the expire timer) */
1795 jiffies_to_clock_t(ttd),
1797 from_kuid_munged(seq_user_ns(seq),
1798 sock_i_uid(req->rsk_listener)),
1799 0, /* non standard timer */
1800 0, /* open_requests have no inode */
1804 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1806 const struct in6_addr *dest, *src;
1809 unsigned long timer_expires;
1810 const struct inet_sock *inet = inet_sk(sp);
1811 const struct tcp_sock *tp = tcp_sk(sp);
1812 const struct inet_connection_sock *icsk = inet_csk(sp);
1813 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1817 dest = &sp->sk_v6_daddr;
1818 src = &sp->sk_v6_rcv_saddr;
1819 destp = ntohs(inet->inet_dport);
1820 srcp = ntohs(inet->inet_sport);
1822 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1823 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1824 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1826 timer_expires = icsk->icsk_timeout;
1827 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1829 timer_expires = icsk->icsk_timeout;
1830 } else if (timer_pending(&sp->sk_timer)) {
1832 timer_expires = sp->sk_timer.expires;
1835 timer_expires = jiffies;
1838 state = inet_sk_state_load(sp);
1839 if (state == TCP_LISTEN)
1840 rx_queue = sp->sk_ack_backlog;
1842 /* Because we don't lock the socket,
1843 * we might find a transient negative value.
1845 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1848 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1849 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1851 src->s6_addr32[0], src->s6_addr32[1],
1852 src->s6_addr32[2], src->s6_addr32[3], srcp,
1853 dest->s6_addr32[0], dest->s6_addr32[1],
1854 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1856 tp->write_seq - tp->snd_una,
1859 jiffies_delta_to_clock_t(timer_expires - jiffies),
1860 icsk->icsk_retransmits,
1861 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1862 icsk->icsk_probes_out,
1864 refcount_read(&sp->sk_refcnt), sp,
1865 jiffies_to_clock_t(icsk->icsk_rto),
1866 jiffies_to_clock_t(icsk->icsk_ack.ato),
1867 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1869 state == TCP_LISTEN ?
1870 fastopenq->max_qlen :
1871 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1875 static void get_timewait6_sock(struct seq_file *seq,
1876 struct inet_timewait_sock *tw, int i)
1878 long delta = tw->tw_timer.expires - jiffies;
1879 const struct in6_addr *dest, *src;
1882 dest = &tw->tw_v6_daddr;
1883 src = &tw->tw_v6_rcv_saddr;
1884 destp = ntohs(tw->tw_dport);
1885 srcp = ntohs(tw->tw_sport);
1888 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1889 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1891 src->s6_addr32[0], src->s6_addr32[1],
1892 src->s6_addr32[2], src->s6_addr32[3], srcp,
1893 dest->s6_addr32[0], dest->s6_addr32[1],
1894 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1895 tw->tw_substate, 0, 0,
1896 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1897 refcount_read(&tw->tw_refcnt), tw);
1900 static int tcp6_seq_show(struct seq_file *seq, void *v)
1902 struct tcp_iter_state *st;
1903 struct sock *sk = v;
1905 if (v == SEQ_START_TOKEN) {
1910 "st tx_queue rx_queue tr tm->when retrnsmt"
1911 " uid timeout inode\n");
1916 if (sk->sk_state == TCP_TIME_WAIT)
1917 get_timewait6_sock(seq, v, st->num);
1918 else if (sk->sk_state == TCP_NEW_SYN_RECV)
1919 get_openreq6(seq, v, st->num);
1921 get_tcp6_sock(seq, v, st->num);
1926 static const struct seq_operations tcp6_seq_ops = {
1927 .show = tcp6_seq_show,
1928 .start = tcp_seq_start,
1929 .next = tcp_seq_next,
1930 .stop = tcp_seq_stop,
1933 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1937 int __net_init tcp6_proc_init(struct net *net)
1939 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
1940 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
1945 void tcp6_proc_exit(struct net *net)
1947 remove_proc_entry("tcp6", net->proc_net);
1951 struct proto tcpv6_prot = {
1953 .owner = THIS_MODULE,
1955 .pre_connect = tcp_v6_pre_connect,
1956 .connect = tcp_v6_connect,
1957 .disconnect = tcp_disconnect,
1958 .accept = inet_csk_accept,
1960 .init = tcp_v6_init_sock,
1961 .destroy = tcp_v6_destroy_sock,
1962 .shutdown = tcp_shutdown,
1963 .setsockopt = tcp_setsockopt,
1964 .getsockopt = tcp_getsockopt,
1965 .keepalive = tcp_set_keepalive,
1966 .recvmsg = tcp_recvmsg,
1967 .sendmsg = tcp_sendmsg,
1968 .sendpage = tcp_sendpage,
1969 .backlog_rcv = tcp_v6_do_rcv,
1970 .release_cb = tcp_release_cb,
1972 .unhash = inet_unhash,
1973 .get_port = inet_csk_get_port,
1974 .enter_memory_pressure = tcp_enter_memory_pressure,
1975 .leave_memory_pressure = tcp_leave_memory_pressure,
1976 .stream_memory_free = tcp_stream_memory_free,
1977 .sockets_allocated = &tcp_sockets_allocated,
1978 .memory_allocated = &tcp_memory_allocated,
1979 .memory_pressure = &tcp_memory_pressure,
1980 .orphan_count = &tcp_orphan_count,
1981 .sysctl_mem = sysctl_tcp_mem,
1982 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
1983 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
1984 .max_header = MAX_TCP_HEADER,
1985 .obj_size = sizeof(struct tcp6_sock),
1986 .slab_flags = SLAB_TYPESAFE_BY_RCU,
1987 .twsk_prot = &tcp6_timewait_sock_ops,
1988 .rsk_prot = &tcp6_request_sock_ops,
1989 .h.hashinfo = &tcp_hashinfo,
1990 .no_autobind = true,
1991 #ifdef CONFIG_COMPAT
1992 .compat_setsockopt = compat_tcp_setsockopt,
1993 .compat_getsockopt = compat_tcp_getsockopt,
1995 .diag_destroy = tcp_abort,
1998 /* thinking of making this const? Don't.
1999 * early_demux can change based on sysctl.
2001 static struct inet6_protocol tcpv6_protocol = {
2002 .early_demux = tcp_v6_early_demux,
2003 .early_demux_handler = tcp_v6_early_demux,
2004 .handler = tcp_v6_rcv,
2005 .err_handler = tcp_v6_err,
2006 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2009 static struct inet_protosw tcpv6_protosw = {
2010 .type = SOCK_STREAM,
2011 .protocol = IPPROTO_TCP,
2012 .prot = &tcpv6_prot,
2013 .ops = &inet6_stream_ops,
2014 .flags = INET_PROTOSW_PERMANENT |
2018 static int __net_init tcpv6_net_init(struct net *net)
2020 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2021 SOCK_RAW, IPPROTO_TCP, net);
2024 static void __net_exit tcpv6_net_exit(struct net *net)
2026 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2029 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2031 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2034 static struct pernet_operations tcpv6_net_ops = {
2035 .init = tcpv6_net_init,
2036 .exit = tcpv6_net_exit,
2037 .exit_batch = tcpv6_net_exit_batch,
2040 int __init tcpv6_init(void)
2044 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2048 /* register inet6 protocol */
2049 ret = inet6_register_protosw(&tcpv6_protosw);
2051 goto out_tcpv6_protocol;
2053 ret = register_pernet_subsys(&tcpv6_net_ops);
2055 goto out_tcpv6_protosw;
2060 inet6_unregister_protosw(&tcpv6_protosw);
2062 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2066 void tcpv6_exit(void)
2068 unregister_pernet_subsys(&tcpv6_net_ops);
2069 inet6_unregister_protosw(&tcpv6_protosw);
2070 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);