1 // SPDX-License-Identifier: GPL-2.0-or-later
4 * Linux INET6 implementation
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
69 #include <trace/events/tcp.h>
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr,
91 /* Helper returning the inet6 address from a given tcp socket.
92 * It can be used in TCP stack instead of inet6_sk(sk).
93 * This avoids a dereference and allow compiler optimizations.
94 * It is a specialized version of inet6_sk_generic().
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
105 struct dst_entry *dst = skb_dst(skb);
107 if (dst && dst_hold_safe(dst)) {
108 const struct rt6_info *rt = (const struct rt6_info *)dst;
110 rcu_assign_pointer(sk->sk_rx_dst, dst);
111 sk->sk_rx_dst_ifindex = skb->skb_iif;
112 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 ipv6_hdr(skb)->saddr.s6_addr32,
121 tcp_hdr(skb)->source);
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 ipv6_hdr(skb)->saddr.s6_addr32);
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
133 /* This check is replicated from tcp_v6_connect() and intended to
134 * prevent BPF program called below from accessing bytes that are out
135 * of the bound specified by user in addr_len.
137 if (addr_len < SIN6_LEN_RFC2133)
140 sock_owned_by_me(sk);
142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 struct inet_connection_sock *icsk = inet_csk(sk);
150 struct in6_addr *saddr = NULL, *final_p, final;
151 struct inet_timewait_death_row *tcp_death_row;
152 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153 struct inet_sock *inet = inet_sk(sk);
154 struct tcp_sock *tp = tcp_sk(sk);
155 struct net *net = sock_net(sk);
156 struct ipv6_txoptions *opt;
157 struct dst_entry *dst;
162 if (addr_len < SIN6_LEN_RFC2133)
165 if (usin->sin6_family != AF_INET6)
166 return -EAFNOSUPPORT;
168 memset(&fl6, 0, sizeof(fl6));
171 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
172 IP6_ECN_flow_init(fl6.flowlabel);
173 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
174 struct ip6_flowlabel *flowlabel;
175 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
176 if (IS_ERR(flowlabel))
178 fl6_sock_release(flowlabel);
183 * connect() to INADDR_ANY means loopback (BSD'ism).
186 if (ipv6_addr_any(&usin->sin6_addr)) {
187 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
188 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
191 usin->sin6_addr = in6addr_loopback;
194 addr_type = ipv6_addr_type(&usin->sin6_addr);
196 if (addr_type & IPV6_ADDR_MULTICAST)
199 if (addr_type&IPV6_ADDR_LINKLOCAL) {
200 if (addr_len >= sizeof(struct sockaddr_in6) &&
201 usin->sin6_scope_id) {
202 /* If interface is set while binding, indices
205 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
208 sk->sk_bound_dev_if = usin->sin6_scope_id;
211 /* Connect to link-local address requires an interface */
212 if (!sk->sk_bound_dev_if)
216 if (tp->rx_opt.ts_recent_stamp &&
217 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
218 tp->rx_opt.ts_recent = 0;
219 tp->rx_opt.ts_recent_stamp = 0;
220 WRITE_ONCE(tp->write_seq, 0);
223 sk->sk_v6_daddr = usin->sin6_addr;
224 np->flow_label = fl6.flowlabel;
230 if (addr_type & IPV6_ADDR_MAPPED) {
231 u32 exthdrlen = icsk->icsk_ext_hdr_len;
232 struct sockaddr_in sin;
234 if (ipv6_only_sock(sk))
237 sin.sin_family = AF_INET;
238 sin.sin_port = usin->sin6_port;
239 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
241 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
242 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
244 mptcpv6_handle_mapped(sk, true);
245 sk->sk_backlog_rcv = tcp_v4_do_rcv;
246 #ifdef CONFIG_TCP_MD5SIG
247 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
250 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
253 icsk->icsk_ext_hdr_len = exthdrlen;
254 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
255 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
257 mptcpv6_handle_mapped(sk, false);
258 sk->sk_backlog_rcv = tcp_v6_do_rcv;
259 #ifdef CONFIG_TCP_MD5SIG
260 tp->af_specific = &tcp_sock_ipv6_specific;
264 np->saddr = sk->sk_v6_rcv_saddr;
269 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
270 saddr = &sk->sk_v6_rcv_saddr;
272 fl6.flowi6_proto = IPPROTO_TCP;
273 fl6.daddr = sk->sk_v6_daddr;
274 fl6.saddr = saddr ? *saddr : np->saddr;
275 fl6.flowi6_oif = sk->sk_bound_dev_if;
276 fl6.flowi6_mark = sk->sk_mark;
277 fl6.fl6_dport = usin->sin6_port;
278 fl6.fl6_sport = inet->inet_sport;
279 fl6.flowi6_uid = sk->sk_uid;
281 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
282 final_p = fl6_update_dst(&fl6, opt, &final);
284 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
286 dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
292 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
295 struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
296 struct in6_addr prev_v6_rcv_saddr;
298 if (icsk->icsk_bind2_hash) {
299 prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo,
300 sk, net, inet->inet_num);
301 prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
304 sk->sk_v6_rcv_saddr = *saddr;
306 if (prev_addr_hashbucket) {
307 err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
309 sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr;
315 /* set the source address */
317 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
319 sk->sk_gso_type = SKB_GSO_TCPV6;
320 ip6_dst_store(sk, dst, NULL, NULL);
322 icsk->icsk_ext_hdr_len = 0;
324 icsk->icsk_ext_hdr_len = opt->opt_flen +
327 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
329 inet->inet_dport = usin->sin6_port;
331 tcp_set_state(sk, TCP_SYN_SENT);
332 err = inet6_hash_connect(tcp_death_row, sk);
338 if (likely(!tp->repair)) {
340 WRITE_ONCE(tp->write_seq,
341 secure_tcpv6_seq(np->saddr.s6_addr32,
342 sk->sk_v6_daddr.s6_addr32,
345 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
346 sk->sk_v6_daddr.s6_addr32);
349 if (tcp_fastopen_defer_connect(sk, &err))
354 err = tcp_connect(sk);
361 tcp_set_state(sk, TCP_CLOSE);
363 inet->inet_dport = 0;
364 sk->sk_route_caps = 0;
368 static void tcp_v6_mtu_reduced(struct sock *sk)
370 struct dst_entry *dst;
373 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
376 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
378 /* Drop requests trying to increase our current mss.
379 * Check done in __ip6_rt_update_pmtu() is too late.
381 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
384 dst = inet6_csk_update_pmtu(sk, mtu);
388 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
389 tcp_sync_mss(sk, dst_mtu(dst));
390 tcp_simple_retransmit(sk);
394 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
395 u8 type, u8 code, int offset, __be32 info)
397 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
398 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
399 struct net *net = dev_net(skb->dev);
400 struct request_sock *fastopen;
401 struct ipv6_pinfo *np;
408 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
409 &hdr->daddr, th->dest,
410 &hdr->saddr, ntohs(th->source),
411 skb->dev->ifindex, inet6_sdif(skb));
414 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
419 if (sk->sk_state == TCP_TIME_WAIT) {
420 inet_twsk_put(inet_twsk(sk));
423 seq = ntohl(th->seq);
424 fatal = icmpv6_err_convert(type, code, &err);
425 if (sk->sk_state == TCP_NEW_SYN_RECV) {
426 tcp_req_err(sk, seq, fatal);
431 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
432 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
434 if (sk->sk_state == TCP_CLOSE)
437 if (static_branch_unlikely(&ip6_min_hopcount)) {
438 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
439 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
440 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
446 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
447 fastopen = rcu_dereference(tp->fastopen_rsk);
448 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
449 if (sk->sk_state != TCP_LISTEN &&
450 !between(seq, snd_una, tp->snd_nxt)) {
451 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
455 np = tcp_inet6_sk(sk);
457 if (type == NDISC_REDIRECT) {
458 if (!sock_owned_by_user(sk)) {
459 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
462 dst->ops->redirect(dst, sk, skb);
467 if (type == ICMPV6_PKT_TOOBIG) {
468 u32 mtu = ntohl(info);
470 /* We are not interested in TCP_LISTEN and open_requests
471 * (SYN-ACKs send out by Linux are always <576bytes so
472 * they should go through unfragmented).
474 if (sk->sk_state == TCP_LISTEN)
477 if (!ip6_sk_accept_pmtu(sk))
480 if (mtu < IPV6_MIN_MTU)
483 WRITE_ONCE(tp->mtu_info, mtu);
485 if (!sock_owned_by_user(sk))
486 tcp_v6_mtu_reduced(sk);
487 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
494 /* Might be for an request_sock */
495 switch (sk->sk_state) {
498 /* Only in fast or simultaneous open. If a fast open socket is
499 * already accepted it is treated as a connected one below.
501 if (fastopen && !fastopen->sk)
504 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
506 if (!sock_owned_by_user(sk)) {
508 sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
512 sk->sk_err_soft = err;
517 /* check if this ICMP message allows revert of backoff.
520 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
521 code == ICMPV6_NOROUTE)
522 tcp_ld_RTO_revert(sk, seq);
525 if (!sock_owned_by_user(sk) && np->recverr) {
529 sk->sk_err_soft = err;
538 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
540 struct request_sock *req,
541 struct tcp_fastopen_cookie *foc,
542 enum tcp_synack_type synack_type,
543 struct sk_buff *syn_skb)
545 struct inet_request_sock *ireq = inet_rsk(req);
546 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
547 struct ipv6_txoptions *opt;
548 struct flowi6 *fl6 = &fl->u.ip6;
553 /* First, grab a route. */
554 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
555 IPPROTO_TCP)) == NULL)
558 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
561 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
562 &ireq->ir_v6_rmt_addr);
564 fl6->daddr = ireq->ir_v6_rmt_addr;
565 if (np->repflow && ireq->pktopts)
566 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
568 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
569 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
570 (np->tclass & INET_ECN_MASK) :
573 if (!INET_ECN_is_capable(tclass) &&
574 tcp_bpf_ca_needs_ecn((struct sock *)req))
575 tclass |= INET_ECN_ECT_0;
578 opt = ireq->ipv6_opt;
580 opt = rcu_dereference(np->opt);
581 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
582 tclass, sk->sk_priority);
584 err = net_xmit_eval(err);
592 static void tcp_v6_reqsk_destructor(struct request_sock *req)
594 kfree(inet_rsk(req)->ipv6_opt);
595 consume_skb(inet_rsk(req)->pktopts);
598 #ifdef CONFIG_TCP_MD5SIG
599 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
600 const struct in6_addr *addr,
603 return tcp_md5_do_lookup(sk, l3index,
604 (union tcp_md5_addr *)addr, AF_INET6);
607 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
608 const struct sock *addr_sk)
612 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
613 addr_sk->sk_bound_dev_if);
614 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
618 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
619 sockptr_t optval, int optlen)
621 struct tcp_md5sig cmd;
622 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
627 if (optlen < sizeof(cmd))
630 if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
633 if (sin6->sin6_family != AF_INET6)
636 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
638 if (optname == TCP_MD5SIG_EXT &&
639 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
640 prefixlen = cmd.tcpm_prefixlen;
641 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
645 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
648 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
649 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
650 struct net_device *dev;
653 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
654 if (dev && netif_is_l3_master(dev))
655 l3index = dev->ifindex;
658 /* ok to reference set/not set outside of rcu;
659 * right now device MUST be an L3 master
661 if (!dev || !l3index)
665 if (!cmd.tcpm_keylen) {
666 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
667 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
670 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
671 AF_INET6, prefixlen, l3index, flags);
674 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
677 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
678 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
679 AF_INET, prefixlen, l3index, flags,
680 cmd.tcpm_key, cmd.tcpm_keylen,
683 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
684 AF_INET6, prefixlen, l3index, flags,
685 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
688 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
689 const struct in6_addr *daddr,
690 const struct in6_addr *saddr,
691 const struct tcphdr *th, int nbytes)
693 struct tcp6_pseudohdr *bp;
694 struct scatterlist sg;
698 /* 1. TCP pseudo-header (RFC2460) */
701 bp->protocol = cpu_to_be32(IPPROTO_TCP);
702 bp->len = cpu_to_be32(nbytes);
704 _th = (struct tcphdr *)(bp + 1);
705 memcpy(_th, th, sizeof(*th));
708 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
709 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
710 sizeof(*bp) + sizeof(*th));
711 return crypto_ahash_update(hp->md5_req);
714 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
715 const struct in6_addr *daddr, struct in6_addr *saddr,
716 const struct tcphdr *th)
718 struct tcp_md5sig_pool *hp;
719 struct ahash_request *req;
721 hp = tcp_get_md5sig_pool();
723 goto clear_hash_noput;
726 if (crypto_ahash_init(req))
728 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
730 if (tcp_md5_hash_key(hp, key))
732 ahash_request_set_crypt(req, NULL, md5_hash, 0);
733 if (crypto_ahash_final(req))
736 tcp_put_md5sig_pool();
740 tcp_put_md5sig_pool();
742 memset(md5_hash, 0, 16);
746 static int tcp_v6_md5_hash_skb(char *md5_hash,
747 const struct tcp_md5sig_key *key,
748 const struct sock *sk,
749 const struct sk_buff *skb)
751 const struct in6_addr *saddr, *daddr;
752 struct tcp_md5sig_pool *hp;
753 struct ahash_request *req;
754 const struct tcphdr *th = tcp_hdr(skb);
756 if (sk) { /* valid for establish/request sockets */
757 saddr = &sk->sk_v6_rcv_saddr;
758 daddr = &sk->sk_v6_daddr;
760 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
761 saddr = &ip6h->saddr;
762 daddr = &ip6h->daddr;
765 hp = tcp_get_md5sig_pool();
767 goto clear_hash_noput;
770 if (crypto_ahash_init(req))
773 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
775 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
777 if (tcp_md5_hash_key(hp, key))
779 ahash_request_set_crypt(req, NULL, md5_hash, 0);
780 if (crypto_ahash_final(req))
783 tcp_put_md5sig_pool();
787 tcp_put_md5sig_pool();
789 memset(md5_hash, 0, 16);
795 static void tcp_v6_init_req(struct request_sock *req,
796 const struct sock *sk_listener,
799 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
800 struct inet_request_sock *ireq = inet_rsk(req);
801 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
803 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
804 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
806 /* So that link locals have meaning */
807 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
808 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
809 ireq->ir_iif = tcp_v6_iif(skb);
811 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
812 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
813 np->rxopt.bits.rxinfo ||
814 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
815 np->rxopt.bits.rxohlim || np->repflow)) {
816 refcount_inc(&skb->users);
821 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
824 struct request_sock *req)
826 tcp_v6_init_req(req, sk, skb);
828 if (security_inet_conn_request(sk, skb, req))
831 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
834 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
836 .obj_size = sizeof(struct tcp6_request_sock),
837 .rtx_syn_ack = tcp_rtx_synack,
838 .send_ack = tcp_v6_reqsk_send_ack,
839 .destructor = tcp_v6_reqsk_destructor,
840 .send_reset = tcp_v6_send_reset,
841 .syn_ack_timeout = tcp_syn_ack_timeout,
844 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
845 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
846 sizeof(struct ipv6hdr),
847 #ifdef CONFIG_TCP_MD5SIG
848 .req_md5_lookup = tcp_v6_md5_lookup,
849 .calc_md5_hash = tcp_v6_md5_hash_skb,
851 #ifdef CONFIG_SYN_COOKIES
852 .cookie_init_seq = cookie_v6_init_sequence,
854 .route_req = tcp_v6_route_req,
855 .init_seq = tcp_v6_init_seq,
856 .init_ts_off = tcp_v6_init_ts_off,
857 .send_synack = tcp_v6_send_synack,
860 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
861 u32 ack, u32 win, u32 tsval, u32 tsecr,
862 int oif, struct tcp_md5sig_key *key, int rst,
863 u8 tclass, __be32 label, u32 priority, u32 txhash)
865 const struct tcphdr *th = tcp_hdr(skb);
867 struct sk_buff *buff;
869 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
870 struct sock *ctl_sk = net->ipv6.tcp_sk;
871 unsigned int tot_len = sizeof(struct tcphdr);
872 __be32 mrst = 0, *topt;
873 struct dst_entry *dst;
877 tot_len += TCPOLEN_TSTAMP_ALIGNED;
878 #ifdef CONFIG_TCP_MD5SIG
880 tot_len += TCPOLEN_MD5SIG_ALIGNED;
885 mrst = mptcp_reset_option(skb);
888 tot_len += sizeof(__be32);
892 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
896 skb_reserve(buff, MAX_TCP_HEADER);
898 t1 = skb_push(buff, tot_len);
899 skb_reset_transport_header(buff);
901 /* Swap the send and the receive. */
902 memset(t1, 0, sizeof(*t1));
903 t1->dest = th->source;
904 t1->source = th->dest;
905 t1->doff = tot_len / 4;
906 t1->seq = htonl(seq);
907 t1->ack_seq = htonl(ack);
908 t1->ack = !rst || !th->ack;
910 t1->window = htons(win);
912 topt = (__be32 *)(t1 + 1);
915 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
916 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
917 *topt++ = htonl(tsval);
918 *topt++ = htonl(tsecr);
924 #ifdef CONFIG_TCP_MD5SIG
926 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
927 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
928 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
929 &ipv6_hdr(skb)->saddr,
930 &ipv6_hdr(skb)->daddr, t1);
934 memset(&fl6, 0, sizeof(fl6));
935 fl6.daddr = ipv6_hdr(skb)->saddr;
936 fl6.saddr = ipv6_hdr(skb)->daddr;
937 fl6.flowlabel = label;
939 buff->ip_summed = CHECKSUM_PARTIAL;
941 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
943 fl6.flowi6_proto = IPPROTO_TCP;
944 if (rt6_need_strict(&fl6.daddr) && !oif)
945 fl6.flowi6_oif = tcp_v6_iif(skb);
947 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
950 fl6.flowi6_oif = oif;
954 if (sk->sk_state == TCP_TIME_WAIT)
955 mark = inet_twsk(sk)->tw_mark;
958 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
961 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
962 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
964 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
965 fl6.fl6_dport = t1->dest;
966 fl6.fl6_sport = t1->source;
967 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
968 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
970 /* Pass a socket to ip6_dst_lookup either it is for RST
971 * Underlying function will use this to retrieve the network
974 if (sk && sk->sk_state != TCP_TIME_WAIT)
975 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
977 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
979 skb_dst_set(buff, dst);
980 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
981 tclass & ~INET_ECN_MASK, priority);
982 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
984 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
991 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
993 const struct tcphdr *th = tcp_hdr(skb);
994 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
995 u32 seq = 0, ack_seq = 0;
996 struct tcp_md5sig_key *key = NULL;
997 #ifdef CONFIG_TCP_MD5SIG
998 const __u8 *hash_location = NULL;
999 unsigned char newhash[16];
1001 struct sock *sk1 = NULL;
1012 /* If sk not NULL, it means we did a successful lookup and incoming
1013 * route had to be correct. prequeue might have dropped our dst.
1015 if (!sk && !ipv6_unicast_destination(skb))
1018 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1019 #ifdef CONFIG_TCP_MD5SIG
1021 hash_location = tcp_parse_md5sig_option(th);
1022 if (sk && sk_fullsock(sk)) {
1025 /* sdif set, means packet ingressed via a device
1026 * in an L3 domain and inet_iif is set to it.
1028 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1029 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1030 } else if (hash_location) {
1031 int dif = tcp_v6_iif_l3_slave(skb);
1032 int sdif = tcp_v6_sdif(skb);
1036 * active side is lost. Try to find listening socket through
1037 * source port, and then find md5 key through listening socket.
1038 * we are not loose security here:
1039 * Incoming packet is checked with md5 hash with finding key,
1040 * no RST generated if md5 hash doesn't match.
1042 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1043 NULL, 0, &ipv6h->saddr, th->source,
1044 &ipv6h->daddr, ntohs(th->source),
1049 /* sdif set, means packet ingressed via a device
1050 * in an L3 domain and dif is set to it.
1052 l3index = tcp_v6_sdif(skb) ? dif : 0;
1054 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1058 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1059 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1065 seq = ntohl(th->ack_seq);
1067 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1071 oif = sk->sk_bound_dev_if;
1072 if (sk_fullsock(sk)) {
1073 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1075 trace_tcp_send_reset(sk, skb);
1077 label = ip6_flowlabel(ipv6h);
1078 priority = sk->sk_priority;
1079 txhash = sk->sk_hash;
1081 if (sk->sk_state == TCP_TIME_WAIT) {
1082 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1083 priority = inet_twsk(sk)->tw_priority;
1084 txhash = inet_twsk(sk)->tw_txhash;
1087 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1088 label = ip6_flowlabel(ipv6h);
1091 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1092 ipv6_get_dsfield(ipv6h), label, priority, txhash);
1094 #ifdef CONFIG_TCP_MD5SIG
1100 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1101 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1102 struct tcp_md5sig_key *key, u8 tclass,
1103 __be32 label, u32 priority, u32 txhash)
1105 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1106 tclass, label, priority, txhash);
1109 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1111 struct inet_timewait_sock *tw = inet_twsk(sk);
1112 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1114 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1115 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1116 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1117 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1118 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1124 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1125 struct request_sock *req)
1129 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1131 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1132 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1135 * The window field (SEG.WND) of every outgoing segment, with the
1136 * exception of <SYN> segments, MUST be right-shifted by
1137 * Rcv.Wind.Shift bits:
1139 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1140 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1141 tcp_rsk(req)->rcv_nxt,
1142 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1143 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1144 req->ts_recent, sk->sk_bound_dev_if,
1145 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1146 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
1147 tcp_rsk(req)->txhash);
1151 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1153 #ifdef CONFIG_SYN_COOKIES
1154 const struct tcphdr *th = tcp_hdr(skb);
1157 sk = cookie_v6_check(sk, skb);
1162 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1163 struct tcphdr *th, u32 *cookie)
1166 #ifdef CONFIG_SYN_COOKIES
1167 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1168 &tcp_request_sock_ipv6_ops, sk, th);
1170 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1171 tcp_synq_overflow(sk);
1177 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1179 if (skb->protocol == htons(ETH_P_IP))
1180 return tcp_v4_conn_request(sk, skb);
1182 if (!ipv6_unicast_destination(skb))
1185 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1186 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1190 return tcp_conn_request(&tcp6_request_sock_ops,
1191 &tcp_request_sock_ipv6_ops, sk, skb);
1195 return 0; /* don't send reset */
1198 static void tcp_v6_restore_cb(struct sk_buff *skb)
1200 /* We need to move header back to the beginning if xfrm6_policy_check()
1201 * and tcp_v6_fill_cb() are going to be called again.
1202 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1204 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1205 sizeof(struct inet6_skb_parm));
1208 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1209 struct request_sock *req,
1210 struct dst_entry *dst,
1211 struct request_sock *req_unhash,
1214 struct inet_request_sock *ireq;
1215 struct ipv6_pinfo *newnp;
1216 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1217 struct ipv6_txoptions *opt;
1218 struct inet_sock *newinet;
1219 bool found_dup_sk = false;
1220 struct tcp_sock *newtp;
1222 #ifdef CONFIG_TCP_MD5SIG
1223 struct tcp_md5sig_key *key;
1228 if (skb->protocol == htons(ETH_P_IP)) {
1233 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1234 req_unhash, own_req);
1239 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1241 newnp = tcp_inet6_sk(newsk);
1242 newtp = tcp_sk(newsk);
1244 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1246 newnp->saddr = newsk->sk_v6_rcv_saddr;
1248 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1249 if (sk_is_mptcp(newsk))
1250 mptcpv6_handle_mapped(newsk, true);
1251 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1252 #ifdef CONFIG_TCP_MD5SIG
1253 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1256 newnp->ipv6_mc_list = NULL;
1257 newnp->ipv6_ac_list = NULL;
1258 newnp->ipv6_fl_list = NULL;
1259 newnp->pktoptions = NULL;
1261 newnp->mcast_oif = inet_iif(skb);
1262 newnp->mcast_hops = ip_hdr(skb)->ttl;
1263 newnp->rcv_flowinfo = 0;
1265 newnp->flow_label = 0;
1268 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1269 * here, tcp_create_openreq_child now does this for us, see the comment in
1270 * that function for the gory details. -acme
1273 /* It is tricky place. Until this moment IPv4 tcp
1274 worked with IPv6 icsk.icsk_af_ops.
1277 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1282 ireq = inet_rsk(req);
1284 if (sk_acceptq_is_full(sk))
1288 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1293 newsk = tcp_create_openreq_child(sk, req, skb);
1298 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1299 * count here, tcp_create_openreq_child now does this for us, see the
1300 * comment in that function for the gory details. -acme
1303 newsk->sk_gso_type = SKB_GSO_TCPV6;
1304 ip6_dst_store(newsk, dst, NULL, NULL);
1305 inet6_sk_rx_dst_set(newsk, skb);
1307 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1309 newtp = tcp_sk(newsk);
1310 newinet = inet_sk(newsk);
1311 newnp = tcp_inet6_sk(newsk);
1313 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1315 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1316 newnp->saddr = ireq->ir_v6_loc_addr;
1317 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1318 newsk->sk_bound_dev_if = ireq->ir_iif;
1320 /* Now IPv6 options...
1322 First: no IPv4 options.
1324 newinet->inet_opt = NULL;
1325 newnp->ipv6_mc_list = NULL;
1326 newnp->ipv6_ac_list = NULL;
1327 newnp->ipv6_fl_list = NULL;
1330 newnp->rxopt.all = np->rxopt.all;
1332 newnp->pktoptions = NULL;
1334 newnp->mcast_oif = tcp_v6_iif(skb);
1335 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1336 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1338 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1340 /* Set ToS of the new socket based upon the value of incoming SYN.
1341 * ECT bits are set later in tcp_init_transfer().
1343 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1344 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1346 /* Clone native IPv6 options from listening socket (if any)
1348 Yes, keeping reference count would be much more clever,
1349 but we make one more one thing there: reattach optmem
1352 opt = ireq->ipv6_opt;
1354 opt = rcu_dereference(np->opt);
1356 opt = ipv6_dup_options(newsk, opt);
1357 RCU_INIT_POINTER(newnp->opt, opt);
1359 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1361 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1364 tcp_ca_openreq_child(newsk, dst);
1366 tcp_sync_mss(newsk, dst_mtu(dst));
1367 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1369 tcp_initialize_rcv_mss(newsk);
1371 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1372 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1374 #ifdef CONFIG_TCP_MD5SIG
1375 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1377 /* Copy over the MD5 key from the original socket */
1378 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1380 /* We're using one, so create a matching key
1381 * on the newsk structure. If we fail to get
1382 * memory, then we end up not copying the key
1385 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1386 AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1387 sk_gfp_mask(sk, GFP_ATOMIC));
1391 if (__inet_inherit_port(sk, newsk) < 0) {
1392 inet_csk_prepare_forced_close(newsk);
1396 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1399 tcp_move_syn(newtp, req);
1401 /* Clone pktoptions received with SYN, if we own the req */
1402 if (ireq->pktopts) {
1403 newnp->pktoptions = skb_clone(ireq->pktopts,
1404 sk_gfp_mask(sk, GFP_ATOMIC));
1405 consume_skb(ireq->pktopts);
1406 ireq->pktopts = NULL;
1407 if (newnp->pktoptions) {
1408 tcp_v6_restore_cb(newnp->pktoptions);
1409 skb_set_owner_r(newnp->pktoptions, newsk);
1413 if (!req_unhash && found_dup_sk) {
1414 /* This code path should only be executed in the
1415 * syncookie case only
1417 bh_unlock_sock(newsk);
1426 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1434 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1436 /* The socket must have it's spinlock held when we get
1437 * here, unless it is a TCP_LISTEN socket.
1439 * We have a potential double-lock case here, so even when
1440 * doing backlog processing we use the BH locking scheme.
1441 * This is because we cannot sleep with the original spinlock
1444 INDIRECT_CALLABLE_SCOPE
1445 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1447 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1448 struct sk_buff *opt_skb = NULL;
1449 enum skb_drop_reason reason;
1450 struct tcp_sock *tp;
1452 /* Imagine: socket is IPv6. IPv4 packet arrives,
1453 goes to IPv4 receive handler and backlogged.
1454 From backlog it always goes here. Kerboom...
1455 Fortunately, tcp_rcv_established and rcv_established
1456 handle them correctly, but it is not case with
1457 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1460 if (skb->protocol == htons(ETH_P_IP))
1461 return tcp_v4_do_rcv(sk, skb);
1464 * socket locking is here for SMP purposes as backlog rcv
1465 * is currently called with bh processing disabled.
1468 /* Do Stevens' IPV6_PKTOPTIONS.
1470 Yes, guys, it is the only place in our code, where we
1471 may make it not affecting IPv4.
1472 The rest of code is protocol independent,
1473 and I do not like idea to uglify IPv4.
1475 Actually, all the idea behind IPV6_PKTOPTIONS
1476 looks not very well thought. For now we latch
1477 options, received in the last packet, enqueued
1478 by tcp. Feel free to propose better solution.
1482 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1484 reason = SKB_DROP_REASON_NOT_SPECIFIED;
1485 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1486 struct dst_entry *dst;
1488 dst = rcu_dereference_protected(sk->sk_rx_dst,
1489 lockdep_sock_is_held(sk));
1491 sock_rps_save_rxhash(sk, skb);
1492 sk_mark_napi_id(sk, skb);
1494 if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1495 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1496 dst, sk->sk_rx_dst_cookie) == NULL) {
1497 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1502 tcp_rcv_established(sk, skb);
1504 goto ipv6_pktoptions;
1508 if (tcp_checksum_complete(skb))
1511 if (sk->sk_state == TCP_LISTEN) {
1512 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1518 if (tcp_child_process(sk, nsk, skb))
1521 __kfree_skb(opt_skb);
1525 sock_rps_save_rxhash(sk, skb);
1527 if (tcp_rcv_state_process(sk, skb))
1530 goto ipv6_pktoptions;
1534 tcp_v6_send_reset(sk, skb);
1537 __kfree_skb(opt_skb);
1538 kfree_skb_reason(skb, reason);
1541 reason = SKB_DROP_REASON_TCP_CSUM;
1542 trace_tcp_bad_csum(skb);
1543 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1544 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1549 /* Do you ask, what is it?
1551 1. skb was enqueued by tcp.
1552 2. skb is added to tail of read queue, rather than out of order.
1553 3. socket is not in passive state.
1554 4. Finally, it really contains options, which user wants to receive.
1557 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1558 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1559 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1560 np->mcast_oif = tcp_v6_iif(opt_skb);
1561 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1562 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1563 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1564 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1566 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1567 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1568 skb_set_owner_r(opt_skb, sk);
1569 tcp_v6_restore_cb(opt_skb);
1570 opt_skb = xchg(&np->pktoptions, opt_skb);
1572 __kfree_skb(opt_skb);
1573 opt_skb = xchg(&np->pktoptions, NULL);
1577 consume_skb(opt_skb);
1581 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1582 const struct tcphdr *th)
1584 /* This is tricky: we move IP6CB at its correct location into
1585 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1586 * _decode_session6() uses IP6CB().
1587 * barrier() makes sure compiler won't play aliasing games.
1589 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1590 sizeof(struct inet6_skb_parm));
1593 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1594 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1595 skb->len - th->doff*4);
1596 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1597 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1598 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1599 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1600 TCP_SKB_CB(skb)->sacked = 0;
1601 TCP_SKB_CB(skb)->has_rxtstamp =
1602 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1605 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1607 enum skb_drop_reason drop_reason;
1608 int sdif = inet6_sdif(skb);
1609 int dif = inet6_iif(skb);
1610 const struct tcphdr *th;
1611 const struct ipv6hdr *hdr;
1615 struct net *net = dev_net(skb->dev);
1617 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1618 if (skb->pkt_type != PACKET_HOST)
1622 * Count it even if it's bad.
1624 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1626 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1629 th = (const struct tcphdr *)skb->data;
1631 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1632 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1635 if (!pskb_may_pull(skb, th->doff*4))
1638 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1641 th = (const struct tcphdr *)skb->data;
1642 hdr = ipv6_hdr(skb);
1645 sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1646 th->source, th->dest, inet6_iif(skb), sdif,
1652 if (sk->sk_state == TCP_TIME_WAIT)
1655 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1656 struct request_sock *req = inet_reqsk(sk);
1657 bool req_stolen = false;
1660 sk = req->rsk_listener;
1661 drop_reason = tcp_inbound_md5_hash(sk, skb,
1662 &hdr->saddr, &hdr->daddr,
1663 AF_INET6, dif, sdif);
1665 sk_drops_add(sk, skb);
1669 if (tcp_checksum_complete(skb)) {
1673 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1674 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1676 inet_csk_reqsk_queue_drop_and_put(sk, req);
1680 /* reuseport_migrate_sock() has already held one sk_refcnt
1688 if (!tcp_filter(sk, skb)) {
1689 th = (const struct tcphdr *)skb->data;
1690 hdr = ipv6_hdr(skb);
1691 tcp_v6_fill_cb(skb, hdr, th);
1692 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1694 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1699 /* Another cpu got exclusive access to req
1700 * and created a full blown socket.
1701 * Try to feed this packet to this socket
1702 * instead of discarding it.
1704 tcp_v6_restore_cb(skb);
1708 goto discard_and_relse;
1712 tcp_v6_restore_cb(skb);
1713 } else if (tcp_child_process(sk, nsk, skb)) {
1714 tcp_v6_send_reset(nsk, skb);
1715 goto discard_and_relse;
1722 if (static_branch_unlikely(&ip6_min_hopcount)) {
1723 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1724 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1725 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1726 goto discard_and_relse;
1730 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1731 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1732 goto discard_and_relse;
1735 drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1736 AF_INET6, dif, sdif);
1738 goto discard_and_relse;
1740 if (tcp_filter(sk, skb)) {
1741 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1742 goto discard_and_relse;
1744 th = (const struct tcphdr *)skb->data;
1745 hdr = ipv6_hdr(skb);
1746 tcp_v6_fill_cb(skb, hdr, th);
1750 if (sk->sk_state == TCP_LISTEN) {
1751 ret = tcp_v6_do_rcv(sk, skb);
1752 goto put_and_return;
1755 sk_incoming_cpu_update(sk);
1757 bh_lock_sock_nested(sk);
1758 tcp_segs_in(tcp_sk(sk), skb);
1760 if (!sock_owned_by_user(sk)) {
1761 ret = tcp_v6_do_rcv(sk, skb);
1763 if (tcp_add_backlog(sk, skb, &drop_reason))
1764 goto discard_and_relse;
1770 return ret ? -1 : 0;
1773 drop_reason = SKB_DROP_REASON_NO_SOCKET;
1774 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1777 tcp_v6_fill_cb(skb, hdr, th);
1779 if (tcp_checksum_complete(skb)) {
1781 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1782 trace_tcp_bad_csum(skb);
1783 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1785 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1787 tcp_v6_send_reset(NULL, skb);
1791 SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1792 kfree_skb_reason(skb, drop_reason);
1796 sk_drops_add(sk, skb);
1802 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1803 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1804 inet_twsk_put(inet_twsk(sk));
1808 tcp_v6_fill_cb(skb, hdr, th);
1810 if (tcp_checksum_complete(skb)) {
1811 inet_twsk_put(inet_twsk(sk));
1815 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1820 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1821 skb, __tcp_hdrlen(th),
1822 &ipv6_hdr(skb)->saddr, th->source,
1823 &ipv6_hdr(skb)->daddr,
1825 tcp_v6_iif_l3_slave(skb),
1828 struct inet_timewait_sock *tw = inet_twsk(sk);
1829 inet_twsk_deschedule_put(tw);
1831 tcp_v6_restore_cb(skb);
1839 tcp_v6_timewait_ack(sk, skb);
1842 tcp_v6_send_reset(sk, skb);
1843 inet_twsk_deschedule_put(inet_twsk(sk));
1845 case TCP_TW_SUCCESS:
1851 void tcp_v6_early_demux(struct sk_buff *skb)
1853 struct net *net = dev_net(skb->dev);
1854 const struct ipv6hdr *hdr;
1855 const struct tcphdr *th;
1858 if (skb->pkt_type != PACKET_HOST)
1861 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1864 hdr = ipv6_hdr(skb);
1867 if (th->doff < sizeof(struct tcphdr) / 4)
1870 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1871 sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1872 &hdr->saddr, th->source,
1873 &hdr->daddr, ntohs(th->dest),
1874 inet6_iif(skb), inet6_sdif(skb));
1877 skb->destructor = sock_edemux;
1878 if (sk_fullsock(sk)) {
1879 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1882 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1884 sk->sk_rx_dst_ifindex == skb->skb_iif)
1885 skb_dst_set_noref(skb, dst);
1890 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1891 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1892 .twsk_unique = tcp_twsk_unique,
1893 .twsk_destructor = tcp_twsk_destructor,
1896 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1898 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1901 const struct inet_connection_sock_af_ops ipv6_specific = {
1902 .queue_xmit = inet6_csk_xmit,
1903 .send_check = tcp_v6_send_check,
1904 .rebuild_header = inet6_sk_rebuild_header,
1905 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1906 .conn_request = tcp_v6_conn_request,
1907 .syn_recv_sock = tcp_v6_syn_recv_sock,
1908 .net_header_len = sizeof(struct ipv6hdr),
1909 .net_frag_header_len = sizeof(struct frag_hdr),
1910 .setsockopt = ipv6_setsockopt,
1911 .getsockopt = ipv6_getsockopt,
1912 .addr2sockaddr = inet6_csk_addr2sockaddr,
1913 .sockaddr_len = sizeof(struct sockaddr_in6),
1914 .mtu_reduced = tcp_v6_mtu_reduced,
1917 #ifdef CONFIG_TCP_MD5SIG
1918 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1919 .md5_lookup = tcp_v6_md5_lookup,
1920 .calc_md5_hash = tcp_v6_md5_hash_skb,
1921 .md5_parse = tcp_v6_parse_md5_keys,
1926 * TCP over IPv4 via INET6 API
1928 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1929 .queue_xmit = ip_queue_xmit,
1930 .send_check = tcp_v4_send_check,
1931 .rebuild_header = inet_sk_rebuild_header,
1932 .sk_rx_dst_set = inet_sk_rx_dst_set,
1933 .conn_request = tcp_v6_conn_request,
1934 .syn_recv_sock = tcp_v6_syn_recv_sock,
1935 .net_header_len = sizeof(struct iphdr),
1936 .setsockopt = ipv6_setsockopt,
1937 .getsockopt = ipv6_getsockopt,
1938 .addr2sockaddr = inet6_csk_addr2sockaddr,
1939 .sockaddr_len = sizeof(struct sockaddr_in6),
1940 .mtu_reduced = tcp_v4_mtu_reduced,
1943 #ifdef CONFIG_TCP_MD5SIG
1944 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1945 .md5_lookup = tcp_v4_md5_lookup,
1946 .calc_md5_hash = tcp_v4_md5_hash_skb,
1947 .md5_parse = tcp_v6_parse_md5_keys,
1951 /* NOTE: A lot of things set to zero explicitly by call to
1952 * sk_alloc() so need not be done here.
1954 static int tcp_v6_init_sock(struct sock *sk)
1956 struct inet_connection_sock *icsk = inet_csk(sk);
1960 icsk->icsk_af_ops = &ipv6_specific;
1962 #ifdef CONFIG_TCP_MD5SIG
1963 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1969 static void tcp_v6_destroy_sock(struct sock *sk)
1971 tcp_v4_destroy_sock(sk);
1972 inet6_destroy_sock(sk);
1975 #ifdef CONFIG_PROC_FS
1976 /* Proc filesystem TCPv6 sock list dumping. */
1977 static void get_openreq6(struct seq_file *seq,
1978 const struct request_sock *req, int i)
1980 long ttd = req->rsk_timer.expires - jiffies;
1981 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1982 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1988 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1989 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1991 src->s6_addr32[0], src->s6_addr32[1],
1992 src->s6_addr32[2], src->s6_addr32[3],
1993 inet_rsk(req)->ir_num,
1994 dest->s6_addr32[0], dest->s6_addr32[1],
1995 dest->s6_addr32[2], dest->s6_addr32[3],
1996 ntohs(inet_rsk(req)->ir_rmt_port),
1998 0, 0, /* could print option size, but that is af dependent. */
1999 1, /* timers active (only the expire timer) */
2000 jiffies_to_clock_t(ttd),
2002 from_kuid_munged(seq_user_ns(seq),
2003 sock_i_uid(req->rsk_listener)),
2004 0, /* non standard timer */
2005 0, /* open_requests have no inode */
2009 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2011 const struct in6_addr *dest, *src;
2014 unsigned long timer_expires;
2015 const struct inet_sock *inet = inet_sk(sp);
2016 const struct tcp_sock *tp = tcp_sk(sp);
2017 const struct inet_connection_sock *icsk = inet_csk(sp);
2018 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2022 dest = &sp->sk_v6_daddr;
2023 src = &sp->sk_v6_rcv_saddr;
2024 destp = ntohs(inet->inet_dport);
2025 srcp = ntohs(inet->inet_sport);
2027 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2028 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2029 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2031 timer_expires = icsk->icsk_timeout;
2032 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2034 timer_expires = icsk->icsk_timeout;
2035 } else if (timer_pending(&sp->sk_timer)) {
2037 timer_expires = sp->sk_timer.expires;
2040 timer_expires = jiffies;
2043 state = inet_sk_state_load(sp);
2044 if (state == TCP_LISTEN)
2045 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2047 /* Because we don't lock the socket,
2048 * we might find a transient negative value.
2050 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2051 READ_ONCE(tp->copied_seq), 0);
2054 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2055 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2057 src->s6_addr32[0], src->s6_addr32[1],
2058 src->s6_addr32[2], src->s6_addr32[3], srcp,
2059 dest->s6_addr32[0], dest->s6_addr32[1],
2060 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2062 READ_ONCE(tp->write_seq) - tp->snd_una,
2065 jiffies_delta_to_clock_t(timer_expires - jiffies),
2066 icsk->icsk_retransmits,
2067 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2068 icsk->icsk_probes_out,
2070 refcount_read(&sp->sk_refcnt), sp,
2071 jiffies_to_clock_t(icsk->icsk_rto),
2072 jiffies_to_clock_t(icsk->icsk_ack.ato),
2073 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2075 state == TCP_LISTEN ?
2076 fastopenq->max_qlen :
2077 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2081 static void get_timewait6_sock(struct seq_file *seq,
2082 struct inet_timewait_sock *tw, int i)
2084 long delta = tw->tw_timer.expires - jiffies;
2085 const struct in6_addr *dest, *src;
2088 dest = &tw->tw_v6_daddr;
2089 src = &tw->tw_v6_rcv_saddr;
2090 destp = ntohs(tw->tw_dport);
2091 srcp = ntohs(tw->tw_sport);
2094 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2095 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2097 src->s6_addr32[0], src->s6_addr32[1],
2098 src->s6_addr32[2], src->s6_addr32[3], srcp,
2099 dest->s6_addr32[0], dest->s6_addr32[1],
2100 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2101 tw->tw_substate, 0, 0,
2102 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2103 refcount_read(&tw->tw_refcnt), tw);
2106 static int tcp6_seq_show(struct seq_file *seq, void *v)
2108 struct tcp_iter_state *st;
2109 struct sock *sk = v;
2111 if (v == SEQ_START_TOKEN) {
2116 "st tx_queue rx_queue tr tm->when retrnsmt"
2117 " uid timeout inode\n");
2122 if (sk->sk_state == TCP_TIME_WAIT)
2123 get_timewait6_sock(seq, v, st->num);
2124 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2125 get_openreq6(seq, v, st->num);
2127 get_tcp6_sock(seq, v, st->num);
2132 static const struct seq_operations tcp6_seq_ops = {
2133 .show = tcp6_seq_show,
2134 .start = tcp_seq_start,
2135 .next = tcp_seq_next,
2136 .stop = tcp_seq_stop,
2139 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2143 int __net_init tcp6_proc_init(struct net *net)
2145 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2146 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2151 void tcp6_proc_exit(struct net *net)
2153 remove_proc_entry("tcp6", net->proc_net);
2157 struct proto tcpv6_prot = {
2159 .owner = THIS_MODULE,
2161 .pre_connect = tcp_v6_pre_connect,
2162 .connect = tcp_v6_connect,
2163 .disconnect = tcp_disconnect,
2164 .accept = inet_csk_accept,
2166 .init = tcp_v6_init_sock,
2167 .destroy = tcp_v6_destroy_sock,
2168 .shutdown = tcp_shutdown,
2169 .setsockopt = tcp_setsockopt,
2170 .getsockopt = tcp_getsockopt,
2171 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
2172 .keepalive = tcp_set_keepalive,
2173 .recvmsg = tcp_recvmsg,
2174 .sendmsg = tcp_sendmsg,
2175 .sendpage = tcp_sendpage,
2176 .backlog_rcv = tcp_v6_do_rcv,
2177 .release_cb = tcp_release_cb,
2179 .unhash = inet_unhash,
2180 .get_port = inet_csk_get_port,
2181 .put_port = inet_put_port,
2182 #ifdef CONFIG_BPF_SYSCALL
2183 .psock_update_sk_prot = tcp_bpf_update_proto,
2185 .enter_memory_pressure = tcp_enter_memory_pressure,
2186 .leave_memory_pressure = tcp_leave_memory_pressure,
2187 .stream_memory_free = tcp_stream_memory_free,
2188 .sockets_allocated = &tcp_sockets_allocated,
2190 .memory_allocated = &tcp_memory_allocated,
2191 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
2193 .memory_pressure = &tcp_memory_pressure,
2194 .orphan_count = &tcp_orphan_count,
2195 .sysctl_mem = sysctl_tcp_mem,
2196 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2197 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2198 .max_header = MAX_TCP_HEADER,
2199 .obj_size = sizeof(struct tcp6_sock),
2200 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2201 .twsk_prot = &tcp6_timewait_sock_ops,
2202 .rsk_prot = &tcp6_request_sock_ops,
2204 .no_autobind = true,
2205 .diag_destroy = tcp_abort,
2207 EXPORT_SYMBOL_GPL(tcpv6_prot);
2209 static const struct inet6_protocol tcpv6_protocol = {
2210 .handler = tcp_v6_rcv,
2211 .err_handler = tcp_v6_err,
2212 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2215 static struct inet_protosw tcpv6_protosw = {
2216 .type = SOCK_STREAM,
2217 .protocol = IPPROTO_TCP,
2218 .prot = &tcpv6_prot,
2219 .ops = &inet6_stream_ops,
2220 .flags = INET_PROTOSW_PERMANENT |
2224 static int __net_init tcpv6_net_init(struct net *net)
2226 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2227 SOCK_RAW, IPPROTO_TCP, net);
2230 static void __net_exit tcpv6_net_exit(struct net *net)
2232 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2235 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2237 tcp_twsk_purge(net_exit_list, AF_INET6);
2240 static struct pernet_operations tcpv6_net_ops = {
2241 .init = tcpv6_net_init,
2242 .exit = tcpv6_net_exit,
2243 .exit_batch = tcpv6_net_exit_batch,
2246 int __init tcpv6_init(void)
2250 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2254 /* register inet6 protocol */
2255 ret = inet6_register_protosw(&tcpv6_protosw);
2257 goto out_tcpv6_protocol;
2259 ret = register_pernet_subsys(&tcpv6_net_ops);
2261 goto out_tcpv6_protosw;
2263 ret = mptcpv6_init();
2265 goto out_tcpv6_pernet_subsys;
2270 out_tcpv6_pernet_subsys:
2271 unregister_pernet_subsys(&tcpv6_net_ops);
2273 inet6_unregister_protosw(&tcpv6_protosw);
2275 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2279 void tcpv6_exit(void)
2281 unregister_pernet_subsys(&tcpv6_net_ops);
2282 inet6_unregister_protosw(&tcpv6_protosw);
2283 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);