3 * Linux INET6 implementation
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
42 #include <linux/uaccess.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
65 #include <net/tcp_memcontrol.h>
66 #include <net/busy_poll.h>
68 #include <linux/proc_fs.h>
69 #include <linux/seq_file.h>
71 #include <linux/crypto.h>
72 #include <linux/scatterlist.h>
74 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
75 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
76 struct request_sock *req);
78 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
80 static const struct inet_connection_sock_af_ops ipv6_mapped;
81 static const struct inet_connection_sock_af_ops ipv6_specific;
82 #ifdef CONFIG_TCP_MD5SIG
83 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
84 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
86 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
87 const struct in6_addr *addr)
93 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
95 struct dst_entry *dst = skb_dst(skb);
96 const struct rt6_info *rt = (const struct rt6_info *)dst;
100 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
102 inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
105 static void tcp_v6_hash(struct sock *sk)
107 if (sk->sk_state != TCP_CLOSE) {
108 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
113 __inet6_hash(sk, NULL);
118 static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
120 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
121 ipv6_hdr(skb)->saddr.s6_addr32,
123 tcp_hdr(skb)->source);
126 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
129 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
130 struct inet_sock *inet = inet_sk(sk);
131 struct inet_connection_sock *icsk = inet_csk(sk);
132 struct ipv6_pinfo *np = inet6_sk(sk);
133 struct tcp_sock *tp = tcp_sk(sk);
134 struct in6_addr *saddr = NULL, *final_p, final;
137 struct dst_entry *dst;
141 if (addr_len < SIN6_LEN_RFC2133)
144 if (usin->sin6_family != AF_INET6)
145 return -EAFNOSUPPORT;
147 memset(&fl6, 0, sizeof(fl6));
150 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
151 IP6_ECN_flow_init(fl6.flowlabel);
152 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
153 struct ip6_flowlabel *flowlabel;
154 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
155 if (flowlabel == NULL)
157 fl6_sock_release(flowlabel);
162 * connect() to INADDR_ANY means loopback (BSD'ism).
165 if (ipv6_addr_any(&usin->sin6_addr))
166 usin->sin6_addr.s6_addr[15] = 0x1;
168 addr_type = ipv6_addr_type(&usin->sin6_addr);
170 if (addr_type & IPV6_ADDR_MULTICAST)
173 if (addr_type&IPV6_ADDR_LINKLOCAL) {
174 if (addr_len >= sizeof(struct sockaddr_in6) &&
175 usin->sin6_scope_id) {
176 /* If interface is set while binding, indices
179 if (sk->sk_bound_dev_if &&
180 sk->sk_bound_dev_if != usin->sin6_scope_id)
183 sk->sk_bound_dev_if = usin->sin6_scope_id;
186 /* Connect to link-local address requires an interface */
187 if (!sk->sk_bound_dev_if)
191 if (tp->rx_opt.ts_recent_stamp &&
192 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
193 tp->rx_opt.ts_recent = 0;
194 tp->rx_opt.ts_recent_stamp = 0;
198 sk->sk_v6_daddr = usin->sin6_addr;
199 np->flow_label = fl6.flowlabel;
205 if (addr_type == IPV6_ADDR_MAPPED) {
206 u32 exthdrlen = icsk->icsk_ext_hdr_len;
207 struct sockaddr_in sin;
209 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
211 if (__ipv6_only_sock(sk))
214 sin.sin_family = AF_INET;
215 sin.sin_port = usin->sin6_port;
216 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
218 icsk->icsk_af_ops = &ipv6_mapped;
219 sk->sk_backlog_rcv = tcp_v4_do_rcv;
220 #ifdef CONFIG_TCP_MD5SIG
221 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
224 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
227 icsk->icsk_ext_hdr_len = exthdrlen;
228 icsk->icsk_af_ops = &ipv6_specific;
229 sk->sk_backlog_rcv = tcp_v6_do_rcv;
230 #ifdef CONFIG_TCP_MD5SIG
231 tp->af_specific = &tcp_sock_ipv6_specific;
235 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
236 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
237 &sk->sk_v6_rcv_saddr);
243 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
244 saddr = &sk->sk_v6_rcv_saddr;
246 fl6.flowi6_proto = IPPROTO_TCP;
247 fl6.daddr = sk->sk_v6_daddr;
248 fl6.saddr = saddr ? *saddr : np->saddr;
249 fl6.flowi6_oif = sk->sk_bound_dev_if;
250 fl6.flowi6_mark = sk->sk_mark;
251 fl6.fl6_dport = usin->sin6_port;
252 fl6.fl6_sport = inet->inet_sport;
254 final_p = fl6_update_dst(&fl6, np->opt, &final);
256 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
258 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
266 sk->sk_v6_rcv_saddr = *saddr;
269 /* set the source address */
271 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
273 sk->sk_gso_type = SKB_GSO_TCPV6;
274 __ip6_dst_store(sk, dst, NULL, NULL);
276 rt = (struct rt6_info *) dst;
277 if (tcp_death_row.sysctl_tw_recycle &&
278 !tp->rx_opt.ts_recent_stamp &&
279 ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr))
280 tcp_fetch_timewait_stamp(sk, dst);
282 icsk->icsk_ext_hdr_len = 0;
284 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
287 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
289 inet->inet_dport = usin->sin6_port;
291 tcp_set_state(sk, TCP_SYN_SENT);
292 err = inet6_hash_connect(&tcp_death_row, sk);
296 if (!tp->write_seq && likely(!tp->repair))
297 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
298 sk->sk_v6_daddr.s6_addr32,
302 err = tcp_connect(sk);
309 tcp_set_state(sk, TCP_CLOSE);
312 inet->inet_dport = 0;
313 sk->sk_route_caps = 0;
317 static void tcp_v6_mtu_reduced(struct sock *sk)
319 struct dst_entry *dst;
321 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
324 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
328 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
329 tcp_sync_mss(sk, dst_mtu(dst));
330 tcp_simple_retransmit(sk);
334 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
335 u8 type, u8 code, int offset, __be32 info)
337 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
338 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
339 struct ipv6_pinfo *np;
343 struct request_sock *fastopen;
345 struct net *net = dev_net(skb->dev);
347 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
348 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
351 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
356 if (sk->sk_state == TCP_TIME_WAIT) {
357 inet_twsk_put(inet_twsk(sk));
362 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
363 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
365 if (sk->sk_state == TCP_CLOSE)
368 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
369 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
374 seq = ntohl(th->seq);
375 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
376 fastopen = tp->fastopen_rsk;
377 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
378 if (sk->sk_state != TCP_LISTEN &&
379 !between(seq, snd_una, tp->snd_nxt)) {
380 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
386 if (type == NDISC_REDIRECT) {
387 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
390 dst->ops->redirect(dst, sk, skb);
394 if (type == ICMPV6_PKT_TOOBIG) {
395 /* We are not interested in TCP_LISTEN and open_requests
396 * (SYN-ACKs send out by Linux are always <576bytes so
397 * they should go through unfragmented).
399 if (sk->sk_state == TCP_LISTEN)
402 if (!ip6_sk_accept_pmtu(sk))
405 tp->mtu_info = ntohl(info);
406 if (!sock_owned_by_user(sk))
407 tcp_v6_mtu_reduced(sk);
408 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
414 icmpv6_err_convert(type, code, &err);
416 /* Might be for an request_sock */
417 switch (sk->sk_state) {
418 struct request_sock *req, **prev;
420 if (sock_owned_by_user(sk))
423 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
424 &hdr->saddr, inet6_iif(skb));
428 /* ICMPs are not backlogged, hence we cannot get
429 * an established socket here.
431 WARN_ON(req->sk != NULL);
433 if (seq != tcp_rsk(req)->snt_isn) {
434 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
438 inet_csk_reqsk_queue_drop(sk, req, prev);
439 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
444 /* Only in fast or simultaneous open. If a fast open socket is
445 * is already accepted it is treated as a connected one below.
447 if (fastopen && fastopen->sk == NULL)
450 if (!sock_owned_by_user(sk)) {
452 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
456 sk->sk_err_soft = err;
460 if (!sock_owned_by_user(sk) && np->recverr) {
462 sk->sk_error_report(sk);
464 sk->sk_err_soft = err;
472 static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
474 struct request_sock *req,
476 struct tcp_fastopen_cookie *foc)
478 struct inet_request_sock *ireq = inet_rsk(req);
479 struct ipv6_pinfo *np = inet6_sk(sk);
483 /* First, grab a route. */
484 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
487 skb = tcp_make_synack(sk, dst, req, foc);
490 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
491 &ireq->ir_v6_rmt_addr);
493 fl6->daddr = ireq->ir_v6_rmt_addr;
494 if (np->repflow && (ireq->pktopts != NULL))
495 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
497 skb_set_queue_mapping(skb, queue_mapping);
498 err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
499 err = net_xmit_eval(err);
506 static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
511 res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0, NULL);
513 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
514 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
519 static void tcp_v6_reqsk_destructor(struct request_sock *req)
521 kfree_skb(inet_rsk(req)->pktopts);
524 #ifdef CONFIG_TCP_MD5SIG
525 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
526 const struct in6_addr *addr)
528 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
531 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
532 struct sock *addr_sk)
534 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
537 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
538 struct request_sock *req)
540 return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr);
543 static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
546 struct tcp_md5sig cmd;
547 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
549 if (optlen < sizeof(cmd))
552 if (copy_from_user(&cmd, optval, sizeof(cmd)))
555 if (sin6->sin6_family != AF_INET6)
558 if (!cmd.tcpm_keylen) {
559 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
560 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
562 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
566 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
569 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
570 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
571 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
573 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
574 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
577 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
578 const struct in6_addr *daddr,
579 const struct in6_addr *saddr, int nbytes)
581 struct tcp6_pseudohdr *bp;
582 struct scatterlist sg;
584 bp = &hp->md5_blk.ip6;
585 /* 1. TCP pseudo-header (RFC2460) */
588 bp->protocol = cpu_to_be32(IPPROTO_TCP);
589 bp->len = cpu_to_be32(nbytes);
591 sg_init_one(&sg, bp, sizeof(*bp));
592 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
595 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
596 const struct in6_addr *daddr, struct in6_addr *saddr,
597 const struct tcphdr *th)
599 struct tcp_md5sig_pool *hp;
600 struct hash_desc *desc;
602 hp = tcp_get_md5sig_pool();
604 goto clear_hash_noput;
605 desc = &hp->md5_desc;
607 if (crypto_hash_init(desc))
609 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
611 if (tcp_md5_hash_header(hp, th))
613 if (tcp_md5_hash_key(hp, key))
615 if (crypto_hash_final(desc, md5_hash))
618 tcp_put_md5sig_pool();
622 tcp_put_md5sig_pool();
624 memset(md5_hash, 0, 16);
628 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
629 const struct sock *sk,
630 const struct request_sock *req,
631 const struct sk_buff *skb)
633 const struct in6_addr *saddr, *daddr;
634 struct tcp_md5sig_pool *hp;
635 struct hash_desc *desc;
636 const struct tcphdr *th = tcp_hdr(skb);
639 saddr = &inet6_sk(sk)->saddr;
640 daddr = &sk->sk_v6_daddr;
642 saddr = &inet_rsk(req)->ir_v6_loc_addr;
643 daddr = &inet_rsk(req)->ir_v6_rmt_addr;
645 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
646 saddr = &ip6h->saddr;
647 daddr = &ip6h->daddr;
650 hp = tcp_get_md5sig_pool();
652 goto clear_hash_noput;
653 desc = &hp->md5_desc;
655 if (crypto_hash_init(desc))
658 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
660 if (tcp_md5_hash_header(hp, th))
662 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
664 if (tcp_md5_hash_key(hp, key))
666 if (crypto_hash_final(desc, md5_hash))
669 tcp_put_md5sig_pool();
673 tcp_put_md5sig_pool();
675 memset(md5_hash, 0, 16);
679 static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
681 const __u8 *hash_location = NULL;
682 struct tcp_md5sig_key *hash_expected;
683 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
684 const struct tcphdr *th = tcp_hdr(skb);
688 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
689 hash_location = tcp_parse_md5sig_option(th);
691 /* We've parsed the options - do we have a hash? */
692 if (!hash_expected && !hash_location)
695 if (hash_expected && !hash_location) {
696 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
700 if (!hash_expected && hash_location) {
701 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
705 /* check the signature */
706 genhash = tcp_v6_md5_hash_skb(newhash,
710 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
711 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
712 genhash ? "failed" : "mismatch",
713 &ip6h->saddr, ntohs(th->source),
714 &ip6h->daddr, ntohs(th->dest));
721 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
723 .obj_size = sizeof(struct tcp6_request_sock),
724 .rtx_syn_ack = tcp_v6_rtx_synack,
725 .send_ack = tcp_v6_reqsk_send_ack,
726 .destructor = tcp_v6_reqsk_destructor,
727 .send_reset = tcp_v6_send_reset,
728 .syn_ack_timeout = tcp_syn_ack_timeout,
731 #ifdef CONFIG_TCP_MD5SIG
732 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
733 .md5_lookup = tcp_v6_reqsk_md5_lookup,
734 .calc_md5_hash = tcp_v6_md5_hash_skb,
738 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
739 u32 tsval, u32 tsecr, int oif,
740 struct tcp_md5sig_key *key, int rst, u8 tclass,
743 const struct tcphdr *th = tcp_hdr(skb);
745 struct sk_buff *buff;
747 struct net *net = dev_net(skb_dst(skb)->dev);
748 struct sock *ctl_sk = net->ipv6.tcp_sk;
749 unsigned int tot_len = sizeof(struct tcphdr);
750 struct dst_entry *dst;
754 tot_len += TCPOLEN_TSTAMP_ALIGNED;
755 #ifdef CONFIG_TCP_MD5SIG
757 tot_len += TCPOLEN_MD5SIG_ALIGNED;
760 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
765 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
767 t1 = (struct tcphdr *) skb_push(buff, tot_len);
768 skb_reset_transport_header(buff);
770 /* Swap the send and the receive. */
771 memset(t1, 0, sizeof(*t1));
772 t1->dest = th->source;
773 t1->source = th->dest;
774 t1->doff = tot_len / 4;
775 t1->seq = htonl(seq);
776 t1->ack_seq = htonl(ack);
777 t1->ack = !rst || !th->ack;
779 t1->window = htons(win);
781 topt = (__be32 *)(t1 + 1);
784 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
785 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
786 *topt++ = htonl(tsval);
787 *topt++ = htonl(tsecr);
790 #ifdef CONFIG_TCP_MD5SIG
792 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
793 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
794 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
795 &ipv6_hdr(skb)->saddr,
796 &ipv6_hdr(skb)->daddr, t1);
800 memset(&fl6, 0, sizeof(fl6));
801 fl6.daddr = ipv6_hdr(skb)->saddr;
802 fl6.saddr = ipv6_hdr(skb)->daddr;
803 fl6.flowlabel = label;
805 buff->ip_summed = CHECKSUM_PARTIAL;
808 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
810 fl6.flowi6_proto = IPPROTO_TCP;
811 if (rt6_need_strict(&fl6.daddr) && !oif)
812 fl6.flowi6_oif = inet6_iif(skb);
814 fl6.flowi6_oif = oif;
815 fl6.fl6_dport = t1->dest;
816 fl6.fl6_sport = t1->source;
817 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
819 /* Pass a socket to ip6_dst_lookup either it is for RST
820 * Underlying function will use this to retrieve the network
823 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
825 skb_dst_set(buff, dst);
826 ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
827 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
829 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
836 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
838 const struct tcphdr *th = tcp_hdr(skb);
839 u32 seq = 0, ack_seq = 0;
840 struct tcp_md5sig_key *key = NULL;
841 #ifdef CONFIG_TCP_MD5SIG
842 const __u8 *hash_location = NULL;
843 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
844 unsigned char newhash[16];
846 struct sock *sk1 = NULL;
853 if (!ipv6_unicast_destination(skb))
856 #ifdef CONFIG_TCP_MD5SIG
857 hash_location = tcp_parse_md5sig_option(th);
858 if (!sk && hash_location) {
860 * active side is lost. Try to find listening socket through
861 * source port, and then find md5 key through listening socket.
862 * we are not loose security here:
863 * Incoming packet is checked with md5 hash with finding key,
864 * no RST generated if md5 hash doesn't match.
866 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
867 &tcp_hashinfo, &ipv6h->saddr,
868 th->source, &ipv6h->daddr,
869 ntohs(th->source), inet6_iif(skb));
874 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
878 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
879 if (genhash || memcmp(hash_location, newhash, 16) != 0)
882 key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
887 seq = ntohl(th->ack_seq);
889 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
892 oif = sk ? sk->sk_bound_dev_if : 0;
893 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
895 #ifdef CONFIG_TCP_MD5SIG
904 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
905 u32 win, u32 tsval, u32 tsecr, int oif,
906 struct tcp_md5sig_key *key, u8 tclass,
909 tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass,
913 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
915 struct inet_timewait_sock *tw = inet_twsk(sk);
916 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
918 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
919 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
920 tcp_time_stamp + tcptw->tw_ts_offset,
921 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
922 tw->tw_tclass, (tw->tw_flowlabel << 12));
927 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
928 struct request_sock *req)
930 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
931 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
933 tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
934 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
935 tcp_rsk(req)->rcv_nxt,
936 req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
937 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
942 static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
944 struct request_sock *req, **prev;
945 const struct tcphdr *th = tcp_hdr(skb);
948 /* Find possible connection requests. */
949 req = inet6_csk_search_req(sk, &prev, th->source,
950 &ipv6_hdr(skb)->saddr,
951 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
953 return tcp_check_req(sk, skb, req, prev, false);
955 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
956 &ipv6_hdr(skb)->saddr, th->source,
957 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
960 if (nsk->sk_state != TCP_TIME_WAIT) {
964 inet_twsk_put(inet_twsk(nsk));
968 #ifdef CONFIG_SYN_COOKIES
970 sk = cookie_v6_check(sk, skb);
975 /* FIXME: this is substantially similar to the ipv4 code.
976 * Can some kind of merge be done? -- erics
978 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
980 struct tcp_options_received tmp_opt;
981 struct request_sock *req;
982 struct inet_request_sock *ireq;
983 struct ipv6_pinfo *np = inet6_sk(sk);
984 struct tcp_sock *tp = tcp_sk(sk);
985 __u32 isn = TCP_SKB_CB(skb)->when;
986 struct dst_entry *dst = NULL;
987 struct tcp_fastopen_cookie foc = { .len = -1 };
988 bool want_cookie = false, fastopen;
992 if (skb->protocol == htons(ETH_P_IP))
993 return tcp_v4_conn_request(sk, skb);
995 if (!ipv6_unicast_destination(skb))
998 if ((sysctl_tcp_syncookies == 2 ||
999 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
1000 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
1005 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
1006 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1010 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1014 #ifdef CONFIG_TCP_MD5SIG
1015 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
1018 tcp_clear_options(&tmp_opt);
1019 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1020 tmp_opt.user_mss = tp->rx_opt.user_mss;
1021 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1023 if (want_cookie && !tmp_opt.saw_tstamp)
1024 tcp_clear_options(&tmp_opt);
1026 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1027 tcp_openreq_init(req, &tmp_opt, skb);
1029 ireq = inet_rsk(req);
1030 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
1031 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
1032 if (!want_cookie || tmp_opt.tstamp_ok)
1033 TCP_ECN_create_request(req, skb, sock_net(sk));
1035 ireq->ir_iif = sk->sk_bound_dev_if;
1037 /* So that link locals have meaning */
1038 if (!sk->sk_bound_dev_if &&
1039 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
1040 ireq->ir_iif = inet6_iif(skb);
1043 if (ipv6_opt_accepted(sk, skb) ||
1044 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1045 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim ||
1047 atomic_inc(&skb->users);
1048 ireq->pktopts = skb;
1052 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1053 req->cookie_ts = tmp_opt.tstamp_ok;
1057 /* VJ's idea. We save last timestamp seen
1058 * from the destination in peer table, when entering
1059 * state TIME-WAIT, and check against it before
1060 * accepting new connection request.
1062 * If "isn" is not zero, this request hit alive
1063 * timewait bucket, so that all the necessary checks
1064 * are made in the function processing timewait state.
1066 if (tmp_opt.saw_tstamp &&
1067 tcp_death_row.sysctl_tw_recycle &&
1068 (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
1069 if (!tcp_peer_is_proven(req, dst, true)) {
1070 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1071 goto drop_and_release;
1074 /* Kill the following clause, if you dislike this way. */
1075 else if (!sysctl_tcp_syncookies &&
1076 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1077 (sysctl_max_syn_backlog >> 2)) &&
1078 !tcp_peer_is_proven(req, dst, false)) {
1079 /* Without syncookies last quarter of
1080 * backlog is filled with destinations,
1081 * proven to be alive.
1082 * It means that we continue to communicate
1083 * to destinations, already remembered
1084 * to the moment of synflood.
1086 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
1087 &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source));
1088 goto drop_and_release;
1091 isn = tcp_v6_init_sequence(skb);
1095 if (security_inet_conn_request(sk, skb, req))
1096 goto drop_and_release;
1098 if (!dst && (dst = inet6_csk_route_req(sk, &fl6, req)) == NULL)
1101 tcp_rsk(req)->snt_isn = isn;
1102 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1103 tcp_openreq_init_rwin(req, sk, dst);
1104 fastopen = !want_cookie &&
1105 tcp_try_fastopen(sk, skb, req, &foc, dst);
1106 err = tcp_v6_send_synack(sk, dst, &fl6, req,
1107 skb_get_queue_mapping(skb), &foc);
1109 if (err || want_cookie)
1112 tcp_rsk(req)->listener = NULL;
1113 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1122 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1123 return 0; /* don't send reset */
1126 static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1127 struct request_sock *req,
1128 struct dst_entry *dst)
1130 struct inet_request_sock *ireq;
1131 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1132 struct tcp6_sock *newtcp6sk;
1133 struct inet_sock *newinet;
1134 struct tcp_sock *newtp;
1136 #ifdef CONFIG_TCP_MD5SIG
1137 struct tcp_md5sig_key *key;
1141 if (skb->protocol == htons(ETH_P_IP)) {
1146 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1151 newtcp6sk = (struct tcp6_sock *)newsk;
1152 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1154 newinet = inet_sk(newsk);
1155 newnp = inet6_sk(newsk);
1156 newtp = tcp_sk(newsk);
1158 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1160 ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);
1162 ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
1164 newsk->sk_v6_rcv_saddr = newnp->saddr;
1166 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1167 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1168 #ifdef CONFIG_TCP_MD5SIG
1169 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1172 newnp->ipv6_ac_list = NULL;
1173 newnp->ipv6_fl_list = NULL;
1174 newnp->pktoptions = NULL;
1176 newnp->mcast_oif = inet6_iif(skb);
1177 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1178 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1180 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1183 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1184 * here, tcp_create_openreq_child now does this for us, see the comment in
1185 * that function for the gory details. -acme
1188 /* It is tricky place. Until this moment IPv4 tcp
1189 worked with IPv6 icsk.icsk_af_ops.
1192 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1197 ireq = inet_rsk(req);
1199 if (sk_acceptq_is_full(sk))
1203 dst = inet6_csk_route_req(sk, &fl6, req);
1208 newsk = tcp_create_openreq_child(sk, req, skb);
1213 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1214 * count here, tcp_create_openreq_child now does this for us, see the
1215 * comment in that function for the gory details. -acme
1218 newsk->sk_gso_type = SKB_GSO_TCPV6;
1219 __ip6_dst_store(newsk, dst, NULL, NULL);
1220 inet6_sk_rx_dst_set(newsk, skb);
1222 newtcp6sk = (struct tcp6_sock *)newsk;
1223 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1225 newtp = tcp_sk(newsk);
1226 newinet = inet_sk(newsk);
1227 newnp = inet6_sk(newsk);
1229 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1231 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1232 newnp->saddr = ireq->ir_v6_loc_addr;
1233 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1234 newsk->sk_bound_dev_if = ireq->ir_iif;
1236 /* Now IPv6 options...
1238 First: no IPv4 options.
1240 newinet->inet_opt = NULL;
1241 newnp->ipv6_ac_list = NULL;
1242 newnp->ipv6_fl_list = NULL;
1245 newnp->rxopt.all = np->rxopt.all;
1247 /* Clone pktoptions received with SYN */
1248 newnp->pktoptions = NULL;
1249 if (ireq->pktopts != NULL) {
1250 newnp->pktoptions = skb_clone(ireq->pktopts,
1251 sk_gfp_atomic(sk, GFP_ATOMIC));
1252 consume_skb(ireq->pktopts);
1253 ireq->pktopts = NULL;
1254 if (newnp->pktoptions)
1255 skb_set_owner_r(newnp->pktoptions, newsk);
1258 newnp->mcast_oif = inet6_iif(skb);
1259 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1260 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1262 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1264 /* Clone native IPv6 options from listening socket (if any)
1266 Yes, keeping reference count would be much more clever,
1267 but we make one more one thing there: reattach optmem
1271 newnp->opt = ipv6_dup_options(newsk, np->opt);
1273 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1275 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1276 newnp->opt->opt_flen);
1278 tcp_sync_mss(newsk, dst_mtu(dst));
1279 newtp->advmss = dst_metric_advmss(dst);
1280 if (tcp_sk(sk)->rx_opt.user_mss &&
1281 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1282 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1284 tcp_initialize_rcv_mss(newsk);
1286 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1287 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1289 #ifdef CONFIG_TCP_MD5SIG
1290 /* Copy over the MD5 key from the original socket */
1291 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1293 /* We're using one, so create a matching key
1294 * on the newsk structure. If we fail to get
1295 * memory, then we end up not copying the key
1298 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1299 AF_INET6, key->key, key->keylen,
1300 sk_gfp_atomic(sk, GFP_ATOMIC));
1304 if (__inet_inherit_port(sk, newsk) < 0) {
1305 inet_csk_prepare_forced_close(newsk);
1309 __inet6_hash(newsk, NULL);
1314 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1318 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1322 /* The socket must have it's spinlock held when we get
1325 * We have a potential double-lock case here, so even when
1326 * doing backlog processing we use the BH locking scheme.
1327 * This is because we cannot sleep with the original spinlock
1330 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1332 struct ipv6_pinfo *np = inet6_sk(sk);
1333 struct tcp_sock *tp;
1334 struct sk_buff *opt_skb = NULL;
1336 /* Imagine: socket is IPv6. IPv4 packet arrives,
1337 goes to IPv4 receive handler and backlogged.
1338 From backlog it always goes here. Kerboom...
1339 Fortunately, tcp_rcv_established and rcv_established
1340 handle them correctly, but it is not case with
1341 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1344 if (skb->protocol == htons(ETH_P_IP))
1345 return tcp_v4_do_rcv(sk, skb);
1347 #ifdef CONFIG_TCP_MD5SIG
1348 if (tcp_v6_inbound_md5_hash(sk, skb))
1352 if (sk_filter(sk, skb))
1356 * socket locking is here for SMP purposes as backlog rcv
1357 * is currently called with bh processing disabled.
1360 /* Do Stevens' IPV6_PKTOPTIONS.
1362 Yes, guys, it is the only place in our code, where we
1363 may make it not affecting IPv4.
1364 The rest of code is protocol independent,
1365 and I do not like idea to uglify IPv4.
1367 Actually, all the idea behind IPV6_PKTOPTIONS
1368 looks not very well thought. For now we latch
1369 options, received in the last packet, enqueued
1370 by tcp. Feel free to propose better solution.
1374 opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
1376 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1377 struct dst_entry *dst = sk->sk_rx_dst;
1379 sock_rps_save_rxhash(sk, skb);
1381 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1382 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1384 sk->sk_rx_dst = NULL;
1388 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1390 goto ipv6_pktoptions;
1394 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1397 if (sk->sk_state == TCP_LISTEN) {
1398 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1403 * Queue it on the new socket if the new socket is active,
1404 * otherwise we just shortcircuit this and continue with
1408 sock_rps_save_rxhash(nsk, skb);
1409 if (tcp_child_process(sk, nsk, skb))
1412 __kfree_skb(opt_skb);
1416 sock_rps_save_rxhash(sk, skb);
1418 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1421 goto ipv6_pktoptions;
1425 tcp_v6_send_reset(sk, skb);
1428 __kfree_skb(opt_skb);
1432 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
1433 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1438 /* Do you ask, what is it?
1440 1. skb was enqueued by tcp.
1441 2. skb is added to tail of read queue, rather than out of order.
1442 3. socket is not in passive state.
1443 4. Finally, it really contains options, which user wants to receive.
1446 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1447 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1448 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1449 np->mcast_oif = inet6_iif(opt_skb);
1450 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1451 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1452 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1453 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1455 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1456 if (ipv6_opt_accepted(sk, opt_skb)) {
1457 skb_set_owner_r(opt_skb, sk);
1458 opt_skb = xchg(&np->pktoptions, opt_skb);
1460 __kfree_skb(opt_skb);
1461 opt_skb = xchg(&np->pktoptions, NULL);
1469 static int tcp_v6_rcv(struct sk_buff *skb)
1471 const struct tcphdr *th;
1472 const struct ipv6hdr *hdr;
1475 struct net *net = dev_net(skb->dev);
1477 if (skb->pkt_type != PACKET_HOST)
1481 * Count it even if it's bad.
1483 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1485 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1490 if (th->doff < sizeof(struct tcphdr)/4)
1492 if (!pskb_may_pull(skb, th->doff*4))
1495 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1499 hdr = ipv6_hdr(skb);
1500 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1501 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1502 skb->len - th->doff*4);
1503 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1504 TCP_SKB_CB(skb)->when = 0;
1505 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1506 TCP_SKB_CB(skb)->sacked = 0;
1508 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1513 if (sk->sk_state == TCP_TIME_WAIT)
1516 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1517 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1518 goto discard_and_relse;
1521 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1522 goto discard_and_relse;
1524 if (sk_filter(sk, skb))
1525 goto discard_and_relse;
1527 sk_mark_napi_id(sk, skb);
1530 bh_lock_sock_nested(sk);
1532 if (!sock_owned_by_user(sk)) {
1533 #ifdef CONFIG_NET_DMA
1534 struct tcp_sock *tp = tcp_sk(sk);
1535 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1536 tp->ucopy.dma_chan = net_dma_find_channel();
1537 if (tp->ucopy.dma_chan)
1538 ret = tcp_v6_do_rcv(sk, skb);
1542 if (!tcp_prequeue(sk, skb))
1543 ret = tcp_v6_do_rcv(sk, skb);
1545 } else if (unlikely(sk_add_backlog(sk, skb,
1546 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1548 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1549 goto discard_and_relse;
1554 return ret ? -1 : 0;
1557 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1560 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1562 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1564 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1566 tcp_v6_send_reset(NULL, skb);
1578 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1579 inet_twsk_put(inet_twsk(sk));
1583 if (skb->len < (th->doff<<2)) {
1584 inet_twsk_put(inet_twsk(sk));
1587 if (tcp_checksum_complete(skb)) {
1588 inet_twsk_put(inet_twsk(sk));
1592 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1597 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1598 &ipv6_hdr(skb)->saddr, th->source,
1599 &ipv6_hdr(skb)->daddr,
1600 ntohs(th->dest), inet6_iif(skb));
1602 struct inet_timewait_sock *tw = inet_twsk(sk);
1603 inet_twsk_deschedule(tw, &tcp_death_row);
1608 /* Fall through to ACK */
1611 tcp_v6_timewait_ack(sk, skb);
1615 case TCP_TW_SUCCESS:
1621 static void tcp_v6_early_demux(struct sk_buff *skb)
1623 const struct ipv6hdr *hdr;
1624 const struct tcphdr *th;
1627 if (skb->pkt_type != PACKET_HOST)
1630 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1633 hdr = ipv6_hdr(skb);
1636 if (th->doff < sizeof(struct tcphdr) / 4)
1639 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1640 &hdr->saddr, th->source,
1641 &hdr->daddr, ntohs(th->dest),
1645 skb->destructor = sock_edemux;
1646 if (sk->sk_state != TCP_TIME_WAIT) {
1647 struct dst_entry *dst = sk->sk_rx_dst;
1650 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1652 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1653 skb_dst_set_noref(skb, dst);
1658 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1659 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1660 .twsk_unique = tcp_twsk_unique,
1661 .twsk_destructor = tcp_twsk_destructor,
1664 static const struct inet_connection_sock_af_ops ipv6_specific = {
1665 .queue_xmit = inet6_csk_xmit,
1666 .send_check = tcp_v6_send_check,
1667 .rebuild_header = inet6_sk_rebuild_header,
1668 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1669 .conn_request = tcp_v6_conn_request,
1670 .syn_recv_sock = tcp_v6_syn_recv_sock,
1671 .net_header_len = sizeof(struct ipv6hdr),
1672 .net_frag_header_len = sizeof(struct frag_hdr),
1673 .setsockopt = ipv6_setsockopt,
1674 .getsockopt = ipv6_getsockopt,
1675 .addr2sockaddr = inet6_csk_addr2sockaddr,
1676 .sockaddr_len = sizeof(struct sockaddr_in6),
1677 .bind_conflict = inet6_csk_bind_conflict,
1678 #ifdef CONFIG_COMPAT
1679 .compat_setsockopt = compat_ipv6_setsockopt,
1680 .compat_getsockopt = compat_ipv6_getsockopt,
1684 #ifdef CONFIG_TCP_MD5SIG
1685 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1686 .md5_lookup = tcp_v6_md5_lookup,
1687 .calc_md5_hash = tcp_v6_md5_hash_skb,
1688 .md5_parse = tcp_v6_parse_md5_keys,
1693 * TCP over IPv4 via INET6 API
1695 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1696 .queue_xmit = ip_queue_xmit,
1697 .send_check = tcp_v4_send_check,
1698 .rebuild_header = inet_sk_rebuild_header,
1699 .sk_rx_dst_set = inet_sk_rx_dst_set,
1700 .conn_request = tcp_v6_conn_request,
1701 .syn_recv_sock = tcp_v6_syn_recv_sock,
1702 .net_header_len = sizeof(struct iphdr),
1703 .setsockopt = ipv6_setsockopt,
1704 .getsockopt = ipv6_getsockopt,
1705 .addr2sockaddr = inet6_csk_addr2sockaddr,
1706 .sockaddr_len = sizeof(struct sockaddr_in6),
1707 .bind_conflict = inet6_csk_bind_conflict,
1708 #ifdef CONFIG_COMPAT
1709 .compat_setsockopt = compat_ipv6_setsockopt,
1710 .compat_getsockopt = compat_ipv6_getsockopt,
1714 #ifdef CONFIG_TCP_MD5SIG
1715 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1716 .md5_lookup = tcp_v4_md5_lookup,
1717 .calc_md5_hash = tcp_v4_md5_hash_skb,
1718 .md5_parse = tcp_v6_parse_md5_keys,
1722 /* NOTE: A lot of things set to zero explicitly by call to
1723 * sk_alloc() so need not be done here.
1725 static int tcp_v6_init_sock(struct sock *sk)
1727 struct inet_connection_sock *icsk = inet_csk(sk);
1731 icsk->icsk_af_ops = &ipv6_specific;
1733 #ifdef CONFIG_TCP_MD5SIG
1734 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1740 static void tcp_v6_destroy_sock(struct sock *sk)
1742 tcp_v4_destroy_sock(sk);
1743 inet6_destroy_sock(sk);
1746 #ifdef CONFIG_PROC_FS
1747 /* Proc filesystem TCPv6 sock list dumping. */
1748 static void get_openreq6(struct seq_file *seq,
1749 const struct sock *sk, struct request_sock *req, int i, kuid_t uid)
1751 int ttd = req->expires - jiffies;
1752 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1753 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1759 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1760 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1762 src->s6_addr32[0], src->s6_addr32[1],
1763 src->s6_addr32[2], src->s6_addr32[3],
1764 inet_rsk(req)->ir_num,
1765 dest->s6_addr32[0], dest->s6_addr32[1],
1766 dest->s6_addr32[2], dest->s6_addr32[3],
1767 ntohs(inet_rsk(req)->ir_rmt_port),
1769 0, 0, /* could print option size, but that is af dependent. */
1770 1, /* timers active (only the expire timer) */
1771 jiffies_to_clock_t(ttd),
1773 from_kuid_munged(seq_user_ns(seq), uid),
1774 0, /* non standard timer */
1775 0, /* open_requests have no inode */
1779 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1781 const struct in6_addr *dest, *src;
1784 unsigned long timer_expires;
1785 const struct inet_sock *inet = inet_sk(sp);
1786 const struct tcp_sock *tp = tcp_sk(sp);
1787 const struct inet_connection_sock *icsk = inet_csk(sp);
1788 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
1790 dest = &sp->sk_v6_daddr;
1791 src = &sp->sk_v6_rcv_saddr;
1792 destp = ntohs(inet->inet_dport);
1793 srcp = ntohs(inet->inet_sport);
1795 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1797 timer_expires = icsk->icsk_timeout;
1798 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1800 timer_expires = icsk->icsk_timeout;
1801 } else if (timer_pending(&sp->sk_timer)) {
1803 timer_expires = sp->sk_timer.expires;
1806 timer_expires = jiffies;
1810 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1811 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1813 src->s6_addr32[0], src->s6_addr32[1],
1814 src->s6_addr32[2], src->s6_addr32[3], srcp,
1815 dest->s6_addr32[0], dest->s6_addr32[1],
1816 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1818 tp->write_seq-tp->snd_una,
1819 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1821 jiffies_delta_to_clock_t(timer_expires - jiffies),
1822 icsk->icsk_retransmits,
1823 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1824 icsk->icsk_probes_out,
1826 atomic_read(&sp->sk_refcnt), sp,
1827 jiffies_to_clock_t(icsk->icsk_rto),
1828 jiffies_to_clock_t(icsk->icsk_ack.ato),
1829 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1831 sp->sk_state == TCP_LISTEN ?
1832 (fastopenq ? fastopenq->max_qlen : 0) :
1833 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1837 static void get_timewait6_sock(struct seq_file *seq,
1838 struct inet_timewait_sock *tw, int i)
1840 const struct in6_addr *dest, *src;
1842 s32 delta = tw->tw_ttd - inet_tw_time_stamp();
1844 dest = &tw->tw_v6_daddr;
1845 src = &tw->tw_v6_rcv_saddr;
1846 destp = ntohs(tw->tw_dport);
1847 srcp = ntohs(tw->tw_sport);
1850 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1851 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1853 src->s6_addr32[0], src->s6_addr32[1],
1854 src->s6_addr32[2], src->s6_addr32[3], srcp,
1855 dest->s6_addr32[0], dest->s6_addr32[1],
1856 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1857 tw->tw_substate, 0, 0,
1858 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1859 atomic_read(&tw->tw_refcnt), tw);
1862 static int tcp6_seq_show(struct seq_file *seq, void *v)
1864 struct tcp_iter_state *st;
1865 struct sock *sk = v;
1867 if (v == SEQ_START_TOKEN) {
1872 "st tx_queue rx_queue tr tm->when retrnsmt"
1873 " uid timeout inode\n");
1878 switch (st->state) {
1879 case TCP_SEQ_STATE_LISTENING:
1880 case TCP_SEQ_STATE_ESTABLISHED:
1881 if (sk->sk_state == TCP_TIME_WAIT)
1882 get_timewait6_sock(seq, v, st->num);
1884 get_tcp6_sock(seq, v, st->num);
1886 case TCP_SEQ_STATE_OPENREQ:
1887 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1894 static const struct file_operations tcp6_afinfo_seq_fops = {
1895 .owner = THIS_MODULE,
1896 .open = tcp_seq_open,
1898 .llseek = seq_lseek,
1899 .release = seq_release_net
1902 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1905 .seq_fops = &tcp6_afinfo_seq_fops,
1907 .show = tcp6_seq_show,
1911 int __net_init tcp6_proc_init(struct net *net)
1913 return tcp_proc_register(net, &tcp6_seq_afinfo);
1916 void tcp6_proc_exit(struct net *net)
1918 tcp_proc_unregister(net, &tcp6_seq_afinfo);
1922 static void tcp_v6_clear_sk(struct sock *sk, int size)
1924 struct inet_sock *inet = inet_sk(sk);
1926 /* we do not want to clear pinet6 field, because of RCU lookups */
1927 sk_prot_clear_nulls(sk, offsetof(struct inet_sock, pinet6));
1929 size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
1930 memset(&inet->pinet6 + 1, 0, size);
1933 struct proto tcpv6_prot = {
1935 .owner = THIS_MODULE,
1937 .connect = tcp_v6_connect,
1938 .disconnect = tcp_disconnect,
1939 .accept = inet_csk_accept,
1941 .init = tcp_v6_init_sock,
1942 .destroy = tcp_v6_destroy_sock,
1943 .shutdown = tcp_shutdown,
1944 .setsockopt = tcp_setsockopt,
1945 .getsockopt = tcp_getsockopt,
1946 .recvmsg = tcp_recvmsg,
1947 .sendmsg = tcp_sendmsg,
1948 .sendpage = tcp_sendpage,
1949 .backlog_rcv = tcp_v6_do_rcv,
1950 .release_cb = tcp_release_cb,
1951 .mtu_reduced = tcp_v6_mtu_reduced,
1952 .hash = tcp_v6_hash,
1953 .unhash = inet_unhash,
1954 .get_port = inet_csk_get_port,
1955 .enter_memory_pressure = tcp_enter_memory_pressure,
1956 .stream_memory_free = tcp_stream_memory_free,
1957 .sockets_allocated = &tcp_sockets_allocated,
1958 .memory_allocated = &tcp_memory_allocated,
1959 .memory_pressure = &tcp_memory_pressure,
1960 .orphan_count = &tcp_orphan_count,
1961 .sysctl_mem = sysctl_tcp_mem,
1962 .sysctl_wmem = sysctl_tcp_wmem,
1963 .sysctl_rmem = sysctl_tcp_rmem,
1964 .max_header = MAX_TCP_HEADER,
1965 .obj_size = sizeof(struct tcp6_sock),
1966 .slab_flags = SLAB_DESTROY_BY_RCU,
1967 .twsk_prot = &tcp6_timewait_sock_ops,
1968 .rsk_prot = &tcp6_request_sock_ops,
1969 .h.hashinfo = &tcp_hashinfo,
1970 .no_autobind = true,
1971 #ifdef CONFIG_COMPAT
1972 .compat_setsockopt = compat_tcp_setsockopt,
1973 .compat_getsockopt = compat_tcp_getsockopt,
1975 #ifdef CONFIG_MEMCG_KMEM
1976 .proto_cgroup = tcp_proto_cgroup,
1978 .clear_sk = tcp_v6_clear_sk,
1981 static const struct inet6_protocol tcpv6_protocol = {
1982 .early_demux = tcp_v6_early_demux,
1983 .handler = tcp_v6_rcv,
1984 .err_handler = tcp_v6_err,
1985 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1988 static struct inet_protosw tcpv6_protosw = {
1989 .type = SOCK_STREAM,
1990 .protocol = IPPROTO_TCP,
1991 .prot = &tcpv6_prot,
1992 .ops = &inet6_stream_ops,
1994 .flags = INET_PROTOSW_PERMANENT |
1998 static int __net_init tcpv6_net_init(struct net *net)
2000 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2001 SOCK_RAW, IPPROTO_TCP, net);
2004 static void __net_exit tcpv6_net_exit(struct net *net)
2006 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2009 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2011 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
2014 static struct pernet_operations tcpv6_net_ops = {
2015 .init = tcpv6_net_init,
2016 .exit = tcpv6_net_exit,
2017 .exit_batch = tcpv6_net_exit_batch,
2020 int __init tcpv6_init(void)
2024 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2028 /* register inet6 protocol */
2029 ret = inet6_register_protosw(&tcpv6_protosw);
2031 goto out_tcpv6_protocol;
2033 ret = register_pernet_subsys(&tcpv6_net_ops);
2035 goto out_tcpv6_protosw;
2040 inet6_unregister_protosw(&tcpv6_protosw);
2042 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2046 void tcpv6_exit(void)
2048 unregister_pernet_subsys(&tcpv6_net_ops);
2049 inet6_unregister_protosw(&tcpv6_protosw);
2050 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);