1 // SPDX-License-Identifier: GPL-2.0-or-later
4 * Linux INET6 implementation
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
69 #include <trace/events/tcp.h>
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
84 /* Helper returning the inet6 address from a given tcp socket.
85 * It can be used in TCP stack instead of inet6_sk(sk).
86 * This avoids a dereference and allow compiler optimizations.
87 * It is a specialized version of inet6_sk_generic().
89 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
90 struct tcp6_sock, tcp)->inet6)
92 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
94 struct dst_entry *dst = skb_dst(skb);
96 if (dst && dst_hold_safe(dst)) {
97 const struct rt6_info *rt = (const struct rt6_info *)dst;
99 rcu_assign_pointer(sk->sk_rx_dst, dst);
100 sk->sk_rx_dst_ifindex = skb->skb_iif;
101 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
105 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
107 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
108 ipv6_hdr(skb)->saddr.s6_addr32,
110 tcp_hdr(skb)->source);
113 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
115 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
116 ipv6_hdr(skb)->saddr.s6_addr32);
119 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
122 /* This check is replicated from tcp_v6_connect() and intended to
123 * prevent BPF program called below from accessing bytes that are out
124 * of the bound specified by user in addr_len.
126 if (addr_len < SIN6_LEN_RFC2133)
129 sock_owned_by_me(sk);
131 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
134 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
137 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
138 struct inet_connection_sock *icsk = inet_csk(sk);
139 struct in6_addr *saddr = NULL, *final_p, final;
140 struct inet_timewait_death_row *tcp_death_row;
141 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
142 struct inet_sock *inet = inet_sk(sk);
143 struct tcp_sock *tp = tcp_sk(sk);
144 struct net *net = sock_net(sk);
145 struct ipv6_txoptions *opt;
146 struct dst_entry *dst;
151 if (addr_len < SIN6_LEN_RFC2133)
154 if (usin->sin6_family != AF_INET6)
155 return -EAFNOSUPPORT;
157 memset(&fl6, 0, sizeof(fl6));
159 if (inet6_test_bit(SNDFLOW, sk)) {
160 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
161 IP6_ECN_flow_init(fl6.flowlabel);
162 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
163 struct ip6_flowlabel *flowlabel;
164 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
165 if (IS_ERR(flowlabel))
167 fl6_sock_release(flowlabel);
172 * connect() to INADDR_ANY means loopback (BSD'ism).
175 if (ipv6_addr_any(&usin->sin6_addr)) {
176 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
177 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
180 usin->sin6_addr = in6addr_loopback;
183 addr_type = ipv6_addr_type(&usin->sin6_addr);
185 if (addr_type & IPV6_ADDR_MULTICAST)
188 if (addr_type&IPV6_ADDR_LINKLOCAL) {
189 if (addr_len >= sizeof(struct sockaddr_in6) &&
190 usin->sin6_scope_id) {
191 /* If interface is set while binding, indices
194 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
197 sk->sk_bound_dev_if = usin->sin6_scope_id;
200 /* Connect to link-local address requires an interface */
201 if (!sk->sk_bound_dev_if)
205 if (tp->rx_opt.ts_recent_stamp &&
206 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
207 tp->rx_opt.ts_recent = 0;
208 tp->rx_opt.ts_recent_stamp = 0;
209 WRITE_ONCE(tp->write_seq, 0);
212 sk->sk_v6_daddr = usin->sin6_addr;
213 np->flow_label = fl6.flowlabel;
219 if (addr_type & IPV6_ADDR_MAPPED) {
220 u32 exthdrlen = icsk->icsk_ext_hdr_len;
221 struct sockaddr_in sin;
223 if (ipv6_only_sock(sk))
226 sin.sin_family = AF_INET;
227 sin.sin_port = usin->sin6_port;
228 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
230 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
231 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
233 mptcpv6_handle_mapped(sk, true);
234 sk->sk_backlog_rcv = tcp_v4_do_rcv;
235 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
236 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
239 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
242 icsk->icsk_ext_hdr_len = exthdrlen;
243 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
244 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
246 mptcpv6_handle_mapped(sk, false);
247 sk->sk_backlog_rcv = tcp_v6_do_rcv;
248 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
249 tp->af_specific = &tcp_sock_ipv6_specific;
253 np->saddr = sk->sk_v6_rcv_saddr;
258 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
259 saddr = &sk->sk_v6_rcv_saddr;
261 fl6.flowi6_proto = IPPROTO_TCP;
262 fl6.daddr = sk->sk_v6_daddr;
263 fl6.saddr = saddr ? *saddr : np->saddr;
264 fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
265 fl6.flowi6_oif = sk->sk_bound_dev_if;
266 fl6.flowi6_mark = sk->sk_mark;
267 fl6.fl6_dport = usin->sin6_port;
268 fl6.fl6_sport = inet->inet_sport;
269 fl6.flowi6_uid = sk->sk_uid;
271 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
272 final_p = fl6_update_dst(&fl6, opt, &final);
274 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
276 dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
282 tp->tcp_usec_ts = dst_tcp_usec_ts(dst);
283 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
288 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
293 /* set the source address */
295 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297 sk->sk_gso_type = SKB_GSO_TCPV6;
298 ip6_dst_store(sk, dst, NULL, NULL);
300 icsk->icsk_ext_hdr_len = 0;
302 icsk->icsk_ext_hdr_len = opt->opt_flen +
305 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307 inet->inet_dport = usin->sin6_port;
309 tcp_set_state(sk, TCP_SYN_SENT);
310 err = inet6_hash_connect(tcp_death_row, sk);
316 if (likely(!tp->repair)) {
318 WRITE_ONCE(tp->write_seq,
319 secure_tcpv6_seq(np->saddr.s6_addr32,
320 sk->sk_v6_daddr.s6_addr32,
323 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
324 sk->sk_v6_daddr.s6_addr32);
327 if (tcp_fastopen_defer_connect(sk, &err))
332 err = tcp_connect(sk);
339 tcp_set_state(sk, TCP_CLOSE);
340 inet_bhash2_reset_saddr(sk);
342 inet->inet_dport = 0;
343 sk->sk_route_caps = 0;
347 static void tcp_v6_mtu_reduced(struct sock *sk)
349 struct dst_entry *dst;
352 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
355 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
357 /* Drop requests trying to increase our current mss.
358 * Check done in __ip6_rt_update_pmtu() is too late.
360 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
363 dst = inet6_csk_update_pmtu(sk, mtu);
367 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
368 tcp_sync_mss(sk, dst_mtu(dst));
369 tcp_simple_retransmit(sk);
373 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
374 u8 type, u8 code, int offset, __be32 info)
376 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
377 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
378 struct net *net = dev_net(skb->dev);
379 struct request_sock *fastopen;
380 struct ipv6_pinfo *np;
387 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
388 &hdr->daddr, th->dest,
389 &hdr->saddr, ntohs(th->source),
390 skb->dev->ifindex, inet6_sdif(skb));
393 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
398 if (sk->sk_state == TCP_TIME_WAIT) {
399 /* To increase the counter of ignored icmps for TCP-AO */
400 tcp_ao_ignore_icmp(sk, AF_INET6, type, code);
401 inet_twsk_put(inet_twsk(sk));
404 seq = ntohl(th->seq);
405 fatal = icmpv6_err_convert(type, code, &err);
406 if (sk->sk_state == TCP_NEW_SYN_RECV) {
407 tcp_req_err(sk, seq, fatal);
411 if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) {
417 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
418 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
420 if (sk->sk_state == TCP_CLOSE)
423 if (static_branch_unlikely(&ip6_min_hopcount)) {
424 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
425 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
426 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
432 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
433 fastopen = rcu_dereference(tp->fastopen_rsk);
434 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
435 if (sk->sk_state != TCP_LISTEN &&
436 !between(seq, snd_una, tp->snd_nxt)) {
437 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
441 np = tcp_inet6_sk(sk);
443 if (type == NDISC_REDIRECT) {
444 if (!sock_owned_by_user(sk)) {
445 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
448 dst->ops->redirect(dst, sk, skb);
453 if (type == ICMPV6_PKT_TOOBIG) {
454 u32 mtu = ntohl(info);
456 /* We are not interested in TCP_LISTEN and open_requests
457 * (SYN-ACKs send out by Linux are always <576bytes so
458 * they should go through unfragmented).
460 if (sk->sk_state == TCP_LISTEN)
463 if (!ip6_sk_accept_pmtu(sk))
466 if (mtu < IPV6_MIN_MTU)
469 WRITE_ONCE(tp->mtu_info, mtu);
471 if (!sock_owned_by_user(sk))
472 tcp_v6_mtu_reduced(sk);
473 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
480 /* Might be for an request_sock */
481 switch (sk->sk_state) {
484 /* Only in fast or simultaneous open. If a fast open socket is
485 * already accepted it is treated as a connected one below.
487 if (fastopen && !fastopen->sk)
490 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
492 if (!sock_owned_by_user(sk)) {
493 WRITE_ONCE(sk->sk_err, err);
494 sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
498 WRITE_ONCE(sk->sk_err_soft, err);
504 /* check if this ICMP message allows revert of backoff.
507 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
508 code == ICMPV6_NOROUTE)
509 tcp_ld_RTO_revert(sk, seq);
512 if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
513 WRITE_ONCE(sk->sk_err, err);
516 WRITE_ONCE(sk->sk_err_soft, err);
525 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
527 struct request_sock *req,
528 struct tcp_fastopen_cookie *foc,
529 enum tcp_synack_type synack_type,
530 struct sk_buff *syn_skb)
532 struct inet_request_sock *ireq = inet_rsk(req);
533 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
534 struct ipv6_txoptions *opt;
535 struct flowi6 *fl6 = &fl->u.ip6;
540 /* First, grab a route. */
541 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
542 IPPROTO_TCP)) == NULL)
545 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
548 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
549 &ireq->ir_v6_rmt_addr);
551 fl6->daddr = ireq->ir_v6_rmt_addr;
552 if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts)
553 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
555 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
556 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
557 (np->tclass & INET_ECN_MASK) :
560 if (!INET_ECN_is_capable(tclass) &&
561 tcp_bpf_ca_needs_ecn((struct sock *)req))
562 tclass |= INET_ECN_ECT_0;
565 opt = ireq->ipv6_opt;
567 opt = rcu_dereference(np->opt);
568 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
569 opt, tclass, READ_ONCE(sk->sk_priority));
571 err = net_xmit_eval(err);
579 static void tcp_v6_reqsk_destructor(struct request_sock *req)
581 kfree(inet_rsk(req)->ipv6_opt);
582 consume_skb(inet_rsk(req)->pktopts);
585 #ifdef CONFIG_TCP_MD5SIG
586 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
587 const struct in6_addr *addr,
590 return tcp_md5_do_lookup(sk, l3index,
591 (union tcp_md5_addr *)addr, AF_INET6);
594 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
595 const struct sock *addr_sk)
599 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
600 addr_sk->sk_bound_dev_if);
601 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
605 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
606 sockptr_t optval, int optlen)
608 struct tcp_md5sig cmd;
609 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
610 union tcp_ao_addr *addr;
616 if (optlen < sizeof(cmd))
619 if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
622 if (sin6->sin6_family != AF_INET6)
625 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
626 l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
628 if (optname == TCP_MD5SIG_EXT &&
629 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
630 prefixlen = cmd.tcpm_prefixlen;
631 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
635 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
638 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
639 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
640 struct net_device *dev;
643 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
644 if (dev && netif_is_l3_master(dev))
645 l3index = dev->ifindex;
648 /* ok to reference set/not set outside of rcu;
649 * right now device MUST be an L3 master
651 if (!dev || !l3index)
655 if (!cmd.tcpm_keylen) {
656 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
657 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
660 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
661 AF_INET6, prefixlen, l3index, flags);
664 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
667 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
668 addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3];
670 /* Don't allow keys for peers that have a matching TCP-AO key.
671 * See the comment in tcp_ao_add_cmd()
673 if (tcp_ao_required(sk, addr, AF_INET,
674 l3flag ? l3index : -1, false))
675 return -EKEYREJECTED;
676 return tcp_md5_do_add(sk, addr,
677 AF_INET, prefixlen, l3index, flags,
678 cmd.tcpm_key, cmd.tcpm_keylen);
681 addr = (union tcp_md5_addr *)&sin6->sin6_addr;
683 /* Don't allow keys for peers that have a matching TCP-AO key.
684 * See the comment in tcp_ao_add_cmd()
686 if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false))
687 return -EKEYREJECTED;
689 return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags,
690 cmd.tcpm_key, cmd.tcpm_keylen);
693 static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp,
694 const struct in6_addr *daddr,
695 const struct in6_addr *saddr,
696 const struct tcphdr *th, int nbytes)
698 struct tcp6_pseudohdr *bp;
699 struct scatterlist sg;
703 /* 1. TCP pseudo-header (RFC2460) */
706 bp->protocol = cpu_to_be32(IPPROTO_TCP);
707 bp->len = cpu_to_be32(nbytes);
709 _th = (struct tcphdr *)(bp + 1);
710 memcpy(_th, th, sizeof(*th));
713 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
714 ahash_request_set_crypt(hp->req, &sg, NULL,
715 sizeof(*bp) + sizeof(*th));
716 return crypto_ahash_update(hp->req);
719 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
720 const struct in6_addr *daddr, struct in6_addr *saddr,
721 const struct tcphdr *th)
723 struct tcp_sigpool hp;
725 if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
726 goto clear_hash_nostart;
728 if (crypto_ahash_init(hp.req))
730 if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2))
732 if (tcp_md5_hash_key(&hp, key))
734 ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
735 if (crypto_ahash_final(hp.req))
738 tcp_sigpool_end(&hp);
742 tcp_sigpool_end(&hp);
744 memset(md5_hash, 0, 16);
748 static int tcp_v6_md5_hash_skb(char *md5_hash,
749 const struct tcp_md5sig_key *key,
750 const struct sock *sk,
751 const struct sk_buff *skb)
753 const struct tcphdr *th = tcp_hdr(skb);
754 const struct in6_addr *saddr, *daddr;
755 struct tcp_sigpool hp;
757 if (sk) { /* valid for establish/request sockets */
758 saddr = &sk->sk_v6_rcv_saddr;
759 daddr = &sk->sk_v6_daddr;
761 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
762 saddr = &ip6h->saddr;
763 daddr = &ip6h->daddr;
766 if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
767 goto clear_hash_nostart;
769 if (crypto_ahash_init(hp.req))
772 if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len))
774 if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2))
776 if (tcp_md5_hash_key(&hp, key))
778 ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
779 if (crypto_ahash_final(hp.req))
782 tcp_sigpool_end(&hp);
786 tcp_sigpool_end(&hp);
788 memset(md5_hash, 0, 16);
793 static void tcp_v6_init_req(struct request_sock *req,
794 const struct sock *sk_listener,
797 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
798 struct inet_request_sock *ireq = inet_rsk(req);
799 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
801 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
802 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
804 /* So that link locals have meaning */
805 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
806 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
807 ireq->ir_iif = tcp_v6_iif(skb);
809 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
810 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
811 np->rxopt.bits.rxinfo ||
812 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
813 np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) {
814 refcount_inc(&skb->users);
819 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
822 struct request_sock *req)
824 tcp_v6_init_req(req, sk, skb);
826 if (security_inet_conn_request(sk, skb, req))
829 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
832 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
834 .obj_size = sizeof(struct tcp6_request_sock),
835 .rtx_syn_ack = tcp_rtx_synack,
836 .send_ack = tcp_v6_reqsk_send_ack,
837 .destructor = tcp_v6_reqsk_destructor,
838 .send_reset = tcp_v6_send_reset,
839 .syn_ack_timeout = tcp_syn_ack_timeout,
842 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
843 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
844 sizeof(struct ipv6hdr),
845 #ifdef CONFIG_TCP_MD5SIG
846 .req_md5_lookup = tcp_v6_md5_lookup,
847 .calc_md5_hash = tcp_v6_md5_hash_skb,
850 .ao_lookup = tcp_v6_ao_lookup_rsk,
851 .ao_calc_key = tcp_v6_ao_calc_key_rsk,
852 .ao_synack_hash = tcp_v6_ao_synack_hash,
854 #ifdef CONFIG_SYN_COOKIES
855 .cookie_init_seq = cookie_v6_init_sequence,
857 .route_req = tcp_v6_route_req,
858 .init_seq = tcp_v6_init_seq,
859 .init_ts_off = tcp_v6_init_ts_off,
860 .send_synack = tcp_v6_send_synack,
863 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
864 u32 ack, u32 win, u32 tsval, u32 tsecr,
865 int oif, int rst, u8 tclass, __be32 label,
866 u32 priority, u32 txhash, struct tcp_key *key)
868 const struct tcphdr *th = tcp_hdr(skb);
870 struct sk_buff *buff;
872 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
873 struct sock *ctl_sk = net->ipv6.tcp_sk;
874 unsigned int tot_len = sizeof(struct tcphdr);
875 __be32 mrst = 0, *topt;
876 struct dst_entry *dst;
880 tot_len += TCPOLEN_TSTAMP_ALIGNED;
881 if (tcp_key_is_md5(key))
882 tot_len += TCPOLEN_MD5SIG_ALIGNED;
883 if (tcp_key_is_ao(key))
884 tot_len += tcp_ao_len(key->ao_key);
887 if (rst && !tcp_key_is_md5(key)) {
888 mrst = mptcp_reset_option(skb);
891 tot_len += sizeof(__be32);
895 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
899 skb_reserve(buff, MAX_TCP_HEADER);
901 t1 = skb_push(buff, tot_len);
902 skb_reset_transport_header(buff);
904 /* Swap the send and the receive. */
905 memset(t1, 0, sizeof(*t1));
906 t1->dest = th->source;
907 t1->source = th->dest;
908 t1->doff = tot_len / 4;
909 t1->seq = htonl(seq);
910 t1->ack_seq = htonl(ack);
911 t1->ack = !rst || !th->ack;
913 t1->window = htons(win);
915 topt = (__be32 *)(t1 + 1);
918 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
919 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
920 *topt++ = htonl(tsval);
921 *topt++ = htonl(tsecr);
927 #ifdef CONFIG_TCP_MD5SIG
928 if (tcp_key_is_md5(key)) {
929 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
930 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
931 tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key,
932 &ipv6_hdr(skb)->saddr,
933 &ipv6_hdr(skb)->daddr, t1);
937 if (tcp_key_is_ao(key)) {
938 *topt++ = htonl((TCPOPT_AO << 24) |
939 (tcp_ao_len(key->ao_key) << 16) |
940 (key->ao_key->sndid << 8) |
943 tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key,
945 (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr,
946 (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr,
951 memset(&fl6, 0, sizeof(fl6));
952 fl6.daddr = ipv6_hdr(skb)->saddr;
953 fl6.saddr = ipv6_hdr(skb)->daddr;
954 fl6.flowlabel = label;
956 buff->ip_summed = CHECKSUM_PARTIAL;
958 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
960 fl6.flowi6_proto = IPPROTO_TCP;
961 if (rt6_need_strict(&fl6.daddr) && !oif)
962 fl6.flowi6_oif = tcp_v6_iif(skb);
964 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
967 fl6.flowi6_oif = oif;
971 if (sk->sk_state == TCP_TIME_WAIT)
972 mark = inet_twsk(sk)->tw_mark;
974 mark = READ_ONCE(sk->sk_mark);
975 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
978 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
979 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
981 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
982 fl6.fl6_dport = t1->dest;
983 fl6.fl6_sport = t1->source;
984 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
985 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
987 /* Pass a socket to ip6_dst_lookup either it is for RST
988 * Underlying function will use this to retrieve the network
991 if (sk && sk->sk_state != TCP_TIME_WAIT)
992 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
994 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
996 skb_dst_set(buff, dst);
997 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
998 tclass & ~INET_ECN_MASK, priority);
999 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1001 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1008 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1010 const struct tcphdr *th = tcp_hdr(skb);
1011 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1012 const __u8 *md5_hash_location = NULL;
1013 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1014 bool allocated_traffic_key = false;
1016 const struct tcp_ao_hdr *aoh;
1017 struct tcp_key key = {};
1018 u32 seq = 0, ack_seq = 0;
1024 #ifdef CONFIG_TCP_MD5SIG
1025 unsigned char newhash[16];
1027 struct sock *sk1 = NULL;
1033 /* If sk not NULL, it means we did a successful lookup and incoming
1034 * route had to be correct. prequeue might have dropped our dst.
1036 if (!sk && !ipv6_unicast_destination(skb))
1039 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1040 /* Invalid TCP option size or twice included auth */
1041 if (tcp_parse_auth_options(th, &md5_hash_location, &aoh))
1043 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1046 #ifdef CONFIG_TCP_MD5SIG
1047 if (sk && sk_fullsock(sk)) {
1050 /* sdif set, means packet ingressed via a device
1051 * in an L3 domain and inet_iif is set to it.
1053 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1054 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1056 key.type = TCP_KEY_MD5;
1057 } else if (md5_hash_location) {
1058 int dif = tcp_v6_iif_l3_slave(skb);
1059 int sdif = tcp_v6_sdif(skb);
1063 * active side is lost. Try to find listening socket through
1064 * source port, and then find md5 key through listening socket.
1065 * we are not loose security here:
1066 * Incoming packet is checked with md5 hash with finding key,
1067 * no RST generated if md5 hash doesn't match.
1069 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1070 NULL, 0, &ipv6h->saddr, th->source,
1071 &ipv6h->daddr, ntohs(th->source),
1076 /* sdif set, means packet ingressed via a device
1077 * in an L3 domain and dif is set to it.
1079 l3index = tcp_v6_sdif(skb) ? dif : 0;
1081 key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1084 key.type = TCP_KEY_MD5;
1086 genhash = tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb);
1087 if (genhash || memcmp(md5_hash_location, newhash, 16) != 0)
1093 seq = ntohl(th->ack_seq);
1095 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1098 #ifdef CONFIG_TCP_AO
1102 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1103 if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq,
1104 &key.ao_key, &key.traffic_key,
1105 &allocated_traffic_key,
1106 &key.rcv_next, &key.sne))
1108 key.type = TCP_KEY_AO;
1113 oif = sk->sk_bound_dev_if;
1114 if (sk_fullsock(sk)) {
1115 trace_tcp_send_reset(sk, skb);
1116 if (inet6_test_bit(REPFLOW, sk))
1117 label = ip6_flowlabel(ipv6h);
1118 priority = READ_ONCE(sk->sk_priority);
1119 txhash = sk->sk_txhash;
1121 if (sk->sk_state == TCP_TIME_WAIT) {
1122 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1123 priority = inet_twsk(sk)->tw_priority;
1124 txhash = inet_twsk(sk)->tw_txhash;
1127 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1128 label = ip6_flowlabel(ipv6h);
1131 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
1132 ipv6_get_dsfield(ipv6h), label, priority, txhash,
1135 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1137 if (allocated_traffic_key)
1138 kfree(key.traffic_key);
1143 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1144 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1145 struct tcp_key *key, u8 tclass,
1146 __be32 label, u32 priority, u32 txhash)
1148 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0,
1149 tclass, label, priority, txhash, key);
1152 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1154 struct inet_timewait_sock *tw = inet_twsk(sk);
1155 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1156 struct tcp_key key = {};
1157 #ifdef CONFIG_TCP_AO
1158 struct tcp_ao_info *ao_info;
1160 if (static_branch_unlikely(&tcp_ao_needed.key)) {
1162 /* FIXME: the segment to-be-acked is not verified yet */
1163 ao_info = rcu_dereference(tcptw->ao_info);
1165 const struct tcp_ao_hdr *aoh;
1167 /* Invalid TCP option size or twice included auth */
1168 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
1171 key.ao_key = tcp_ao_established_key(ao_info,
1172 aoh->rnext_keyid, -1);
1176 struct tcp_ao_key *rnext_key;
1178 key.traffic_key = snd_other_key(key.ao_key);
1179 /* rcv_next switches to our rcv_next */
1180 rnext_key = READ_ONCE(ao_info->rnext_key);
1181 key.rcv_next = rnext_key->rcvid;
1182 key.sne = READ_ONCE(ao_info->snd_sne);
1183 key.type = TCP_KEY_AO;
1187 #ifdef CONFIG_TCP_MD5SIG
1188 } else if (static_branch_unlikely(&tcp_md5_needed.key)) {
1189 key.md5_key = tcp_twsk_md5_key(tcptw);
1191 key.type = TCP_KEY_MD5;
1195 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1196 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1197 tcp_tw_tsval(tcptw),
1198 tcptw->tw_ts_recent, tw->tw_bound_dev_if, &key,
1199 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1202 #ifdef CONFIG_TCP_AO
1208 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1209 struct request_sock *req)
1211 struct tcp_key key = {};
1213 #ifdef CONFIG_TCP_AO
1214 if (static_branch_unlikely(&tcp_ao_needed.key) &&
1215 tcp_rsk_used_ao(req)) {
1216 const struct in6_addr *addr = &ipv6_hdr(skb)->saddr;
1217 const struct tcp_ao_hdr *aoh;
1220 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1221 /* Invalid TCP option size or twice included auth */
1222 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
1226 key.ao_key = tcp_ao_do_lookup(sk, l3index,
1227 (union tcp_ao_addr *)addr,
1228 AF_INET6, aoh->rnext_keyid, -1);
1229 if (unlikely(!key.ao_key)) {
1230 /* Send ACK with any matching MKT for the peer */
1231 key.ao_key = tcp_ao_do_lookup(sk, l3index,
1232 (union tcp_ao_addr *)addr,
1234 /* Matching key disappeared (user removed the key?)
1235 * let the handshake timeout.
1238 net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n",
1240 ntohs(tcp_hdr(skb)->source),
1241 &ipv6_hdr(skb)->daddr,
1242 ntohs(tcp_hdr(skb)->dest));
1246 key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC);
1247 if (!key.traffic_key)
1250 key.type = TCP_KEY_AO;
1251 key.rcv_next = aoh->keyid;
1252 tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req);
1256 #ifdef CONFIG_TCP_MD5SIG
1257 } else if (static_branch_unlikely(&tcp_md5_needed.key)) {
1258 int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1260 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr,
1263 key.type = TCP_KEY_MD5;
1267 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1268 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1271 * The window field (SEG.WND) of every outgoing segment, with the
1272 * exception of <SYN> segments, MUST be right-shifted by
1273 * Rcv.Wind.Shift bits:
1275 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1276 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1277 tcp_rsk(req)->rcv_nxt,
1278 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1279 tcp_rsk_tsval(tcp_rsk(req)),
1280 READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1281 &key, ipv6_get_dsfield(ipv6_hdr(skb)), 0,
1282 READ_ONCE(sk->sk_priority),
1283 READ_ONCE(tcp_rsk(req)->txhash));
1284 if (tcp_key_is_ao(&key))
1285 kfree(key.traffic_key);
1289 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1291 #ifdef CONFIG_SYN_COOKIES
1292 const struct tcphdr *th = tcp_hdr(skb);
1295 sk = cookie_v6_check(sk, skb);
1300 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1301 struct tcphdr *th, u32 *cookie)
1304 #ifdef CONFIG_SYN_COOKIES
1305 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1306 &tcp_request_sock_ipv6_ops, sk, th);
1308 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1309 tcp_synq_overflow(sk);
1315 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1317 if (skb->protocol == htons(ETH_P_IP))
1318 return tcp_v4_conn_request(sk, skb);
1320 if (!ipv6_unicast_destination(skb))
1323 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1324 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1328 return tcp_conn_request(&tcp6_request_sock_ops,
1329 &tcp_request_sock_ipv6_ops, sk, skb);
1333 return 0; /* don't send reset */
1336 static void tcp_v6_restore_cb(struct sk_buff *skb)
1338 /* We need to move header back to the beginning if xfrm6_policy_check()
1339 * and tcp_v6_fill_cb() are going to be called again.
1340 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1342 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1343 sizeof(struct inet6_skb_parm));
1346 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1347 struct request_sock *req,
1348 struct dst_entry *dst,
1349 struct request_sock *req_unhash,
1352 struct inet_request_sock *ireq;
1353 struct ipv6_pinfo *newnp;
1354 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1355 struct ipv6_txoptions *opt;
1356 struct inet_sock *newinet;
1357 bool found_dup_sk = false;
1358 struct tcp_sock *newtp;
1360 #ifdef CONFIG_TCP_MD5SIG
1361 struct tcp_md5sig_key *key;
1366 if (skb->protocol == htons(ETH_P_IP)) {
1371 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1372 req_unhash, own_req);
1377 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1379 newnp = tcp_inet6_sk(newsk);
1380 newtp = tcp_sk(newsk);
1382 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1384 newnp->saddr = newsk->sk_v6_rcv_saddr;
1386 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1387 if (sk_is_mptcp(newsk))
1388 mptcpv6_handle_mapped(newsk, true);
1389 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1390 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
1391 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1394 newnp->ipv6_mc_list = NULL;
1395 newnp->ipv6_ac_list = NULL;
1396 newnp->ipv6_fl_list = NULL;
1397 newnp->pktoptions = NULL;
1399 newnp->mcast_oif = inet_iif(skb);
1400 newnp->mcast_hops = ip_hdr(skb)->ttl;
1401 newnp->rcv_flowinfo = 0;
1402 if (inet6_test_bit(REPFLOW, sk))
1403 newnp->flow_label = 0;
1406 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1407 * here, tcp_create_openreq_child now does this for us, see the comment in
1408 * that function for the gory details. -acme
1411 /* It is tricky place. Until this moment IPv4 tcp
1412 worked with IPv6 icsk.icsk_af_ops.
1415 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1420 ireq = inet_rsk(req);
1422 if (sk_acceptq_is_full(sk))
1426 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1431 newsk = tcp_create_openreq_child(sk, req, skb);
1436 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1437 * count here, tcp_create_openreq_child now does this for us, see the
1438 * comment in that function for the gory details. -acme
1441 newsk->sk_gso_type = SKB_GSO_TCPV6;
1442 ip6_dst_store(newsk, dst, NULL, NULL);
1443 inet6_sk_rx_dst_set(newsk, skb);
1445 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1447 newtp = tcp_sk(newsk);
1448 newinet = inet_sk(newsk);
1449 newnp = tcp_inet6_sk(newsk);
1451 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1453 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1454 newnp->saddr = ireq->ir_v6_loc_addr;
1455 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1456 newsk->sk_bound_dev_if = ireq->ir_iif;
1458 /* Now IPv6 options...
1460 First: no IPv4 options.
1462 newinet->inet_opt = NULL;
1463 newnp->ipv6_mc_list = NULL;
1464 newnp->ipv6_ac_list = NULL;
1465 newnp->ipv6_fl_list = NULL;
1468 newnp->rxopt.all = np->rxopt.all;
1470 newnp->pktoptions = NULL;
1472 newnp->mcast_oif = tcp_v6_iif(skb);
1473 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1474 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1475 if (inet6_test_bit(REPFLOW, sk))
1476 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1478 /* Set ToS of the new socket based upon the value of incoming SYN.
1479 * ECT bits are set later in tcp_init_transfer().
1481 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1482 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1484 /* Clone native IPv6 options from listening socket (if any)
1486 Yes, keeping reference count would be much more clever,
1487 but we make one more one thing there: reattach optmem
1490 opt = ireq->ipv6_opt;
1492 opt = rcu_dereference(np->opt);
1494 opt = ipv6_dup_options(newsk, opt);
1495 RCU_INIT_POINTER(newnp->opt, opt);
1497 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1499 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1502 tcp_ca_openreq_child(newsk, dst);
1504 tcp_sync_mss(newsk, dst_mtu(dst));
1505 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1507 tcp_initialize_rcv_mss(newsk);
1509 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1510 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1512 #ifdef CONFIG_TCP_MD5SIG
1513 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1515 if (!tcp_rsk_used_ao(req)) {
1516 /* Copy over the MD5 key from the original socket */
1517 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1519 const union tcp_md5_addr *addr;
1521 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1522 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
1523 inet_csk_prepare_forced_close(newsk);
1530 #ifdef CONFIG_TCP_AO
1531 /* Copy over tcp_ao_info if any */
1532 if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6))
1536 if (__inet_inherit_port(sk, newsk) < 0) {
1537 inet_csk_prepare_forced_close(newsk);
1541 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1544 tcp_move_syn(newtp, req);
1546 /* Clone pktoptions received with SYN, if we own the req */
1547 if (ireq->pktopts) {
1548 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1549 consume_skb(ireq->pktopts);
1550 ireq->pktopts = NULL;
1551 if (newnp->pktoptions)
1552 tcp_v6_restore_cb(newnp->pktoptions);
1555 if (!req_unhash && found_dup_sk) {
1556 /* This code path should only be executed in the
1557 * syncookie case only
1559 bh_unlock_sock(newsk);
1568 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1576 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1578 /* The socket must have it's spinlock held when we get
1579 * here, unless it is a TCP_LISTEN socket.
1581 * We have a potential double-lock case here, so even when
1582 * doing backlog processing we use the BH locking scheme.
1583 * This is because we cannot sleep with the original spinlock
1586 INDIRECT_CALLABLE_SCOPE
1587 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1589 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1590 struct sk_buff *opt_skb = NULL;
1591 enum skb_drop_reason reason;
1592 struct tcp_sock *tp;
1594 /* Imagine: socket is IPv6. IPv4 packet arrives,
1595 goes to IPv4 receive handler and backlogged.
1596 From backlog it always goes here. Kerboom...
1597 Fortunately, tcp_rcv_established and rcv_established
1598 handle them correctly, but it is not case with
1599 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1602 if (skb->protocol == htons(ETH_P_IP))
1603 return tcp_v4_do_rcv(sk, skb);
1606 * socket locking is here for SMP purposes as backlog rcv
1607 * is currently called with bh processing disabled.
1610 /* Do Stevens' IPV6_PKTOPTIONS.
1612 Yes, guys, it is the only place in our code, where we
1613 may make it not affecting IPv4.
1614 The rest of code is protocol independent,
1615 and I do not like idea to uglify IPv4.
1617 Actually, all the idea behind IPV6_PKTOPTIONS
1618 looks not very well thought. For now we latch
1619 options, received in the last packet, enqueued
1620 by tcp. Feel free to propose better solution.
1624 opt_skb = skb_clone_and_charge_r(skb, sk);
1626 reason = SKB_DROP_REASON_NOT_SPECIFIED;
1627 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1628 struct dst_entry *dst;
1630 dst = rcu_dereference_protected(sk->sk_rx_dst,
1631 lockdep_sock_is_held(sk));
1633 sock_rps_save_rxhash(sk, skb);
1634 sk_mark_napi_id(sk, skb);
1636 if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1637 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1638 dst, sk->sk_rx_dst_cookie) == NULL) {
1639 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1644 tcp_rcv_established(sk, skb);
1646 goto ipv6_pktoptions;
1650 if (tcp_checksum_complete(skb))
1653 if (sk->sk_state == TCP_LISTEN) {
1654 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1660 if (tcp_child_process(sk, nsk, skb))
1663 __kfree_skb(opt_skb);
1667 sock_rps_save_rxhash(sk, skb);
1669 if (tcp_rcv_state_process(sk, skb))
1672 goto ipv6_pktoptions;
1676 tcp_v6_send_reset(sk, skb);
1679 __kfree_skb(opt_skb);
1680 kfree_skb_reason(skb, reason);
1683 reason = SKB_DROP_REASON_TCP_CSUM;
1684 trace_tcp_bad_csum(skb);
1685 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1686 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1691 /* Do you ask, what is it?
1693 1. skb was enqueued by tcp.
1694 2. skb is added to tail of read queue, rather than out of order.
1695 3. socket is not in passive state.
1696 4. Finally, it really contains options, which user wants to receive.
1699 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1700 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1701 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1702 np->mcast_oif = tcp_v6_iif(opt_skb);
1703 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1704 WRITE_ONCE(np->mcast_hops,
1705 ipv6_hdr(opt_skb)->hop_limit);
1706 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1707 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1708 if (inet6_test_bit(REPFLOW, sk))
1709 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1710 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1711 tcp_v6_restore_cb(opt_skb);
1712 opt_skb = xchg(&np->pktoptions, opt_skb);
1714 __kfree_skb(opt_skb);
1715 opt_skb = xchg(&np->pktoptions, NULL);
1719 consume_skb(opt_skb);
1723 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1724 const struct tcphdr *th)
1726 /* This is tricky: we move IP6CB at its correct location into
1727 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1728 * _decode_session6() uses IP6CB().
1729 * barrier() makes sure compiler won't play aliasing games.
1731 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1732 sizeof(struct inet6_skb_parm));
1735 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1736 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1737 skb->len - th->doff*4);
1738 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1739 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1740 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1741 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1742 TCP_SKB_CB(skb)->sacked = 0;
1743 TCP_SKB_CB(skb)->has_rxtstamp =
1744 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1747 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1749 enum skb_drop_reason drop_reason;
1750 int sdif = inet6_sdif(skb);
1751 int dif = inet6_iif(skb);
1752 const struct tcphdr *th;
1753 const struct ipv6hdr *hdr;
1757 struct net *net = dev_net(skb->dev);
1759 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1760 if (skb->pkt_type != PACKET_HOST)
1764 * Count it even if it's bad.
1766 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1768 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1771 th = (const struct tcphdr *)skb->data;
1773 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1774 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1777 if (!pskb_may_pull(skb, th->doff*4))
1780 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1783 th = (const struct tcphdr *)skb->data;
1784 hdr = ipv6_hdr(skb);
1787 sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1788 th->source, th->dest, inet6_iif(skb), sdif,
1794 if (sk->sk_state == TCP_TIME_WAIT)
1797 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1798 struct request_sock *req = inet_reqsk(sk);
1799 bool req_stolen = false;
1802 sk = req->rsk_listener;
1803 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1804 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1806 drop_reason = tcp_inbound_hash(sk, req, skb,
1807 &hdr->saddr, &hdr->daddr,
1808 AF_INET6, dif, sdif);
1810 sk_drops_add(sk, skb);
1814 if (tcp_checksum_complete(skb)) {
1818 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1819 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1821 inet_csk_reqsk_queue_drop_and_put(sk, req);
1825 /* reuseport_migrate_sock() has already held one sk_refcnt
1833 if (!tcp_filter(sk, skb)) {
1834 th = (const struct tcphdr *)skb->data;
1835 hdr = ipv6_hdr(skb);
1836 tcp_v6_fill_cb(skb, hdr, th);
1837 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1839 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1844 /* Another cpu got exclusive access to req
1845 * and created a full blown socket.
1846 * Try to feed this packet to this socket
1847 * instead of discarding it.
1849 tcp_v6_restore_cb(skb);
1853 goto discard_and_relse;
1858 tcp_v6_restore_cb(skb);
1859 } else if (tcp_child_process(sk, nsk, skb)) {
1860 tcp_v6_send_reset(nsk, skb);
1861 goto discard_and_relse;
1868 if (static_branch_unlikely(&ip6_min_hopcount)) {
1869 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1870 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1871 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1872 drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1873 goto discard_and_relse;
1877 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1878 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1879 goto discard_and_relse;
1882 drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr,
1883 AF_INET6, dif, sdif);
1885 goto discard_and_relse;
1889 if (tcp_filter(sk, skb)) {
1890 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1891 goto discard_and_relse;
1893 th = (const struct tcphdr *)skb->data;
1894 hdr = ipv6_hdr(skb);
1895 tcp_v6_fill_cb(skb, hdr, th);
1899 if (sk->sk_state == TCP_LISTEN) {
1900 ret = tcp_v6_do_rcv(sk, skb);
1901 goto put_and_return;
1904 sk_incoming_cpu_update(sk);
1906 bh_lock_sock_nested(sk);
1907 tcp_segs_in(tcp_sk(sk), skb);
1909 if (!sock_owned_by_user(sk)) {
1910 ret = tcp_v6_do_rcv(sk, skb);
1912 if (tcp_add_backlog(sk, skb, &drop_reason))
1913 goto discard_and_relse;
1919 return ret ? -1 : 0;
1922 drop_reason = SKB_DROP_REASON_NO_SOCKET;
1923 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1926 tcp_v6_fill_cb(skb, hdr, th);
1928 if (tcp_checksum_complete(skb)) {
1930 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1931 trace_tcp_bad_csum(skb);
1932 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1934 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1936 tcp_v6_send_reset(NULL, skb);
1940 SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1941 kfree_skb_reason(skb, drop_reason);
1945 sk_drops_add(sk, skb);
1951 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1952 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1953 inet_twsk_put(inet_twsk(sk));
1957 tcp_v6_fill_cb(skb, hdr, th);
1959 if (tcp_checksum_complete(skb)) {
1960 inet_twsk_put(inet_twsk(sk));
1964 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1969 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1970 skb, __tcp_hdrlen(th),
1971 &ipv6_hdr(skb)->saddr, th->source,
1972 &ipv6_hdr(skb)->daddr,
1974 tcp_v6_iif_l3_slave(skb),
1977 struct inet_timewait_sock *tw = inet_twsk(sk);
1978 inet_twsk_deschedule_put(tw);
1980 tcp_v6_restore_cb(skb);
1988 tcp_v6_timewait_ack(sk, skb);
1991 tcp_v6_send_reset(sk, skb);
1992 inet_twsk_deschedule_put(inet_twsk(sk));
1994 case TCP_TW_SUCCESS:
2000 void tcp_v6_early_demux(struct sk_buff *skb)
2002 struct net *net = dev_net(skb->dev);
2003 const struct ipv6hdr *hdr;
2004 const struct tcphdr *th;
2007 if (skb->pkt_type != PACKET_HOST)
2010 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
2013 hdr = ipv6_hdr(skb);
2016 if (th->doff < sizeof(struct tcphdr) / 4)
2019 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
2020 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
2021 &hdr->saddr, th->source,
2022 &hdr->daddr, ntohs(th->dest),
2023 inet6_iif(skb), inet6_sdif(skb));
2026 skb->destructor = sock_edemux;
2027 if (sk_fullsock(sk)) {
2028 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
2031 dst = dst_check(dst, sk->sk_rx_dst_cookie);
2033 sk->sk_rx_dst_ifindex == skb->skb_iif)
2034 skb_dst_set_noref(skb, dst);
2039 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
2040 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
2041 .twsk_unique = tcp_twsk_unique,
2042 .twsk_destructor = tcp_twsk_destructor,
2045 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
2047 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
2050 const struct inet_connection_sock_af_ops ipv6_specific = {
2051 .queue_xmit = inet6_csk_xmit,
2052 .send_check = tcp_v6_send_check,
2053 .rebuild_header = inet6_sk_rebuild_header,
2054 .sk_rx_dst_set = inet6_sk_rx_dst_set,
2055 .conn_request = tcp_v6_conn_request,
2056 .syn_recv_sock = tcp_v6_syn_recv_sock,
2057 .net_header_len = sizeof(struct ipv6hdr),
2058 .setsockopt = ipv6_setsockopt,
2059 .getsockopt = ipv6_getsockopt,
2060 .addr2sockaddr = inet6_csk_addr2sockaddr,
2061 .sockaddr_len = sizeof(struct sockaddr_in6),
2062 .mtu_reduced = tcp_v6_mtu_reduced,
2065 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2066 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
2067 #ifdef CONFIG_TCP_MD5SIG
2068 .md5_lookup = tcp_v6_md5_lookup,
2069 .calc_md5_hash = tcp_v6_md5_hash_skb,
2070 .md5_parse = tcp_v6_parse_md5_keys,
2072 #ifdef CONFIG_TCP_AO
2073 .ao_lookup = tcp_v6_ao_lookup,
2074 .calc_ao_hash = tcp_v6_ao_hash_skb,
2075 .ao_parse = tcp_v6_parse_ao,
2076 .ao_calc_key_sk = tcp_v6_ao_calc_key_sk,
2082 * TCP over IPv4 via INET6 API
2084 static const struct inet_connection_sock_af_ops ipv6_mapped = {
2085 .queue_xmit = ip_queue_xmit,
2086 .send_check = tcp_v4_send_check,
2087 .rebuild_header = inet_sk_rebuild_header,
2088 .sk_rx_dst_set = inet_sk_rx_dst_set,
2089 .conn_request = tcp_v6_conn_request,
2090 .syn_recv_sock = tcp_v6_syn_recv_sock,
2091 .net_header_len = sizeof(struct iphdr),
2092 .setsockopt = ipv6_setsockopt,
2093 .getsockopt = ipv6_getsockopt,
2094 .addr2sockaddr = inet6_csk_addr2sockaddr,
2095 .sockaddr_len = sizeof(struct sockaddr_in6),
2096 .mtu_reduced = tcp_v4_mtu_reduced,
2099 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2100 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
2101 #ifdef CONFIG_TCP_MD5SIG
2102 .md5_lookup = tcp_v4_md5_lookup,
2103 .calc_md5_hash = tcp_v4_md5_hash_skb,
2104 .md5_parse = tcp_v6_parse_md5_keys,
2106 #ifdef CONFIG_TCP_AO
2107 .ao_lookup = tcp_v6_ao_lookup,
2108 .calc_ao_hash = tcp_v4_ao_hash_skb,
2109 .ao_parse = tcp_v6_parse_ao,
2110 .ao_calc_key_sk = tcp_v4_ao_calc_key_sk,
2115 /* NOTE: A lot of things set to zero explicitly by call to
2116 * sk_alloc() so need not be done here.
2118 static int tcp_v6_init_sock(struct sock *sk)
2120 struct inet_connection_sock *icsk = inet_csk(sk);
2124 icsk->icsk_af_ops = &ipv6_specific;
2126 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
2127 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
2133 #ifdef CONFIG_PROC_FS
2134 /* Proc filesystem TCPv6 sock list dumping. */
2135 static void get_openreq6(struct seq_file *seq,
2136 const struct request_sock *req, int i)
2138 long ttd = req->rsk_timer.expires - jiffies;
2139 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
2140 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
2146 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2147 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
2149 src->s6_addr32[0], src->s6_addr32[1],
2150 src->s6_addr32[2], src->s6_addr32[3],
2151 inet_rsk(req)->ir_num,
2152 dest->s6_addr32[0], dest->s6_addr32[1],
2153 dest->s6_addr32[2], dest->s6_addr32[3],
2154 ntohs(inet_rsk(req)->ir_rmt_port),
2156 0, 0, /* could print option size, but that is af dependent. */
2157 1, /* timers active (only the expire timer) */
2158 jiffies_to_clock_t(ttd),
2160 from_kuid_munged(seq_user_ns(seq),
2161 sock_i_uid(req->rsk_listener)),
2162 0, /* non standard timer */
2163 0, /* open_requests have no inode */
2167 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2169 const struct in6_addr *dest, *src;
2172 unsigned long timer_expires;
2173 const struct inet_sock *inet = inet_sk(sp);
2174 const struct tcp_sock *tp = tcp_sk(sp);
2175 const struct inet_connection_sock *icsk = inet_csk(sp);
2176 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2180 dest = &sp->sk_v6_daddr;
2181 src = &sp->sk_v6_rcv_saddr;
2182 destp = ntohs(inet->inet_dport);
2183 srcp = ntohs(inet->inet_sport);
2185 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2186 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2187 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2189 timer_expires = icsk->icsk_timeout;
2190 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2192 timer_expires = icsk->icsk_timeout;
2193 } else if (timer_pending(&sp->sk_timer)) {
2195 timer_expires = sp->sk_timer.expires;
2198 timer_expires = jiffies;
2201 state = inet_sk_state_load(sp);
2202 if (state == TCP_LISTEN)
2203 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2205 /* Because we don't lock the socket,
2206 * we might find a transient negative value.
2208 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2209 READ_ONCE(tp->copied_seq), 0);
2212 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2213 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2215 src->s6_addr32[0], src->s6_addr32[1],
2216 src->s6_addr32[2], src->s6_addr32[3], srcp,
2217 dest->s6_addr32[0], dest->s6_addr32[1],
2218 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2220 READ_ONCE(tp->write_seq) - tp->snd_una,
2223 jiffies_delta_to_clock_t(timer_expires - jiffies),
2224 icsk->icsk_retransmits,
2225 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2226 icsk->icsk_probes_out,
2228 refcount_read(&sp->sk_refcnt), sp,
2229 jiffies_to_clock_t(icsk->icsk_rto),
2230 jiffies_to_clock_t(icsk->icsk_ack.ato),
2231 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2233 state == TCP_LISTEN ?
2234 fastopenq->max_qlen :
2235 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2239 static void get_timewait6_sock(struct seq_file *seq,
2240 struct inet_timewait_sock *tw, int i)
2242 long delta = tw->tw_timer.expires - jiffies;
2243 const struct in6_addr *dest, *src;
2246 dest = &tw->tw_v6_daddr;
2247 src = &tw->tw_v6_rcv_saddr;
2248 destp = ntohs(tw->tw_dport);
2249 srcp = ntohs(tw->tw_sport);
2252 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2253 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2255 src->s6_addr32[0], src->s6_addr32[1],
2256 src->s6_addr32[2], src->s6_addr32[3], srcp,
2257 dest->s6_addr32[0], dest->s6_addr32[1],
2258 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2259 tw->tw_substate, 0, 0,
2260 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2261 refcount_read(&tw->tw_refcnt), tw);
2264 static int tcp6_seq_show(struct seq_file *seq, void *v)
2266 struct tcp_iter_state *st;
2267 struct sock *sk = v;
2269 if (v == SEQ_START_TOKEN) {
2274 "st tx_queue rx_queue tr tm->when retrnsmt"
2275 " uid timeout inode\n");
2280 if (sk->sk_state == TCP_TIME_WAIT)
2281 get_timewait6_sock(seq, v, st->num);
2282 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2283 get_openreq6(seq, v, st->num);
2285 get_tcp6_sock(seq, v, st->num);
2290 static const struct seq_operations tcp6_seq_ops = {
2291 .show = tcp6_seq_show,
2292 .start = tcp_seq_start,
2293 .next = tcp_seq_next,
2294 .stop = tcp_seq_stop,
2297 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2301 int __net_init tcp6_proc_init(struct net *net)
2303 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2304 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2309 void tcp6_proc_exit(struct net *net)
2311 remove_proc_entry("tcp6", net->proc_net);
2315 struct proto tcpv6_prot = {
2317 .owner = THIS_MODULE,
2319 .pre_connect = tcp_v6_pre_connect,
2320 .connect = tcp_v6_connect,
2321 .disconnect = tcp_disconnect,
2322 .accept = inet_csk_accept,
2324 .init = tcp_v6_init_sock,
2325 .destroy = tcp_v4_destroy_sock,
2326 .shutdown = tcp_shutdown,
2327 .setsockopt = tcp_setsockopt,
2328 .getsockopt = tcp_getsockopt,
2329 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
2330 .keepalive = tcp_set_keepalive,
2331 .recvmsg = tcp_recvmsg,
2332 .sendmsg = tcp_sendmsg,
2333 .splice_eof = tcp_splice_eof,
2334 .backlog_rcv = tcp_v6_do_rcv,
2335 .release_cb = tcp_release_cb,
2337 .unhash = inet_unhash,
2338 .get_port = inet_csk_get_port,
2339 .put_port = inet_put_port,
2340 #ifdef CONFIG_BPF_SYSCALL
2341 .psock_update_sk_prot = tcp_bpf_update_proto,
2343 .enter_memory_pressure = tcp_enter_memory_pressure,
2344 .leave_memory_pressure = tcp_leave_memory_pressure,
2345 .stream_memory_free = tcp_stream_memory_free,
2346 .sockets_allocated = &tcp_sockets_allocated,
2348 .memory_allocated = &tcp_memory_allocated,
2349 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
2351 .memory_pressure = &tcp_memory_pressure,
2352 .orphan_count = &tcp_orphan_count,
2353 .sysctl_mem = sysctl_tcp_mem,
2354 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2355 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2356 .max_header = MAX_TCP_HEADER,
2357 .obj_size = sizeof(struct tcp6_sock),
2358 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2359 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2360 .twsk_prot = &tcp6_timewait_sock_ops,
2361 .rsk_prot = &tcp6_request_sock_ops,
2363 .no_autobind = true,
2364 .diag_destroy = tcp_abort,
2366 EXPORT_SYMBOL_GPL(tcpv6_prot);
2368 static const struct inet6_protocol tcpv6_protocol = {
2369 .handler = tcp_v6_rcv,
2370 .err_handler = tcp_v6_err,
2371 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2374 static struct inet_protosw tcpv6_protosw = {
2375 .type = SOCK_STREAM,
2376 .protocol = IPPROTO_TCP,
2377 .prot = &tcpv6_prot,
2378 .ops = &inet6_stream_ops,
2379 .flags = INET_PROTOSW_PERMANENT |
2383 static int __net_init tcpv6_net_init(struct net *net)
2385 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2386 SOCK_RAW, IPPROTO_TCP, net);
2389 static void __net_exit tcpv6_net_exit(struct net *net)
2391 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2394 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2396 tcp_twsk_purge(net_exit_list, AF_INET6);
2399 static struct pernet_operations tcpv6_net_ops = {
2400 .init = tcpv6_net_init,
2401 .exit = tcpv6_net_exit,
2402 .exit_batch = tcpv6_net_exit_batch,
2405 int __init tcpv6_init(void)
2409 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2413 /* register inet6 protocol */
2414 ret = inet6_register_protosw(&tcpv6_protosw);
2416 goto out_tcpv6_protocol;
2418 ret = register_pernet_subsys(&tcpv6_net_ops);
2420 goto out_tcpv6_protosw;
2422 ret = mptcpv6_init();
2424 goto out_tcpv6_pernet_subsys;
2429 out_tcpv6_pernet_subsys:
2430 unregister_pernet_subsys(&tcpv6_net_ops);
2432 inet6_unregister_protosw(&tcpv6_protosw);
2434 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2438 void tcpv6_exit(void)
2440 unregister_pernet_subsys(&tcpv6_net_ops);
2441 inet6_unregister_protosw(&tcpv6_protosw);
2442 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);