3 * Linux INET6 implementation
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
42 #include <linux/ipv6.h>
43 #include <linux/icmpv6.h>
44 #include <linux/random.h>
47 #include <net/ndisc.h>
48 #include <net/inet6_hashtables.h>
49 #include <net/inet6_connection_sock.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
59 #include <net/dsfield.h>
60 #include <net/timewait_sock.h>
61 #include <net/netdma.h>
62 #include <net/inet_common.h>
64 #include <asm/uaccess.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 #include <linux/crypto.h>
70 #include <linux/scatterlist.h>
72 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
73 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
74 struct request_sock *req);
76 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
78 static const struct inet_connection_sock_af_ops ipv6_mapped;
79 static const struct inet_connection_sock_af_ops ipv6_specific;
80 #ifdef CONFIG_TCP_MD5SIG
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
82 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
84 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
85 struct in6_addr *addr)
91 static void tcp_v6_hash(struct sock *sk)
93 if (sk->sk_state != TCP_CLOSE) {
94 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
104 static __inline__ __sum16 tcp_v6_check(int len,
105 struct in6_addr *saddr,
106 struct in6_addr *daddr,
109 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
112 static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
114 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
115 ipv6_hdr(skb)->saddr.s6_addr32,
117 tcp_hdr(skb)->source);
120 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
123 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
124 struct inet_sock *inet = inet_sk(sk);
125 struct inet_connection_sock *icsk = inet_csk(sk);
126 struct ipv6_pinfo *np = inet6_sk(sk);
127 struct tcp_sock *tp = tcp_sk(sk);
128 struct in6_addr *saddr = NULL, *final_p = NULL, final;
130 struct dst_entry *dst;
134 if (addr_len < SIN6_LEN_RFC2133)
137 if (usin->sin6_family != AF_INET6)
138 return(-EAFNOSUPPORT);
140 memset(&fl, 0, sizeof(fl));
143 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
144 IP6_ECN_flow_init(fl.fl6_flowlabel);
145 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
146 struct ip6_flowlabel *flowlabel;
147 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
148 if (flowlabel == NULL)
150 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
151 fl6_sock_release(flowlabel);
156 * connect() to INADDR_ANY means loopback (BSD'ism).
159 if(ipv6_addr_any(&usin->sin6_addr))
160 usin->sin6_addr.s6_addr[15] = 0x1;
162 addr_type = ipv6_addr_type(&usin->sin6_addr);
164 if(addr_type & IPV6_ADDR_MULTICAST)
167 if (addr_type&IPV6_ADDR_LINKLOCAL) {
168 if (addr_len >= sizeof(struct sockaddr_in6) &&
169 usin->sin6_scope_id) {
170 /* If interface is set while binding, indices
173 if (sk->sk_bound_dev_if &&
174 sk->sk_bound_dev_if != usin->sin6_scope_id)
177 sk->sk_bound_dev_if = usin->sin6_scope_id;
180 /* Connect to link-local address requires an interface */
181 if (!sk->sk_bound_dev_if)
185 if (tp->rx_opt.ts_recent_stamp &&
186 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
187 tp->rx_opt.ts_recent = 0;
188 tp->rx_opt.ts_recent_stamp = 0;
192 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
193 np->flow_label = fl.fl6_flowlabel;
199 if (addr_type == IPV6_ADDR_MAPPED) {
200 u32 exthdrlen = icsk->icsk_ext_hdr_len;
201 struct sockaddr_in sin;
203 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
205 if (__ipv6_only_sock(sk))
208 sin.sin_family = AF_INET;
209 sin.sin_port = usin->sin6_port;
210 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
212 icsk->icsk_af_ops = &ipv6_mapped;
213 sk->sk_backlog_rcv = tcp_v4_do_rcv;
214 #ifdef CONFIG_TCP_MD5SIG
215 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
218 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
221 icsk->icsk_ext_hdr_len = exthdrlen;
222 icsk->icsk_af_ops = &ipv6_specific;
223 sk->sk_backlog_rcv = tcp_v6_do_rcv;
224 #ifdef CONFIG_TCP_MD5SIG
225 tp->af_specific = &tcp_sock_ipv6_specific;
229 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
230 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
237 if (!ipv6_addr_any(&np->rcv_saddr))
238 saddr = &np->rcv_saddr;
240 fl.proto = IPPROTO_TCP;
241 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
242 ipv6_addr_copy(&fl.fl6_src,
243 (saddr ? saddr : &np->saddr));
244 fl.oif = sk->sk_bound_dev_if;
245 fl.mark = sk->sk_mark;
246 fl.fl_ip_dport = usin->sin6_port;
247 fl.fl_ip_sport = inet->inet_sport;
249 if (np->opt && np->opt->srcrt) {
250 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
251 ipv6_addr_copy(&final, &fl.fl6_dst);
252 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
256 security_sk_classify_flow(sk, &fl);
258 err = ip6_dst_lookup(sk, &dst, &fl);
262 ipv6_addr_copy(&fl.fl6_dst, final_p);
264 err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
267 err = ip6_dst_blackhole(sk, &dst, &fl);
274 ipv6_addr_copy(&np->rcv_saddr, saddr);
277 /* set the source address */
278 ipv6_addr_copy(&np->saddr, saddr);
279 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
281 sk->sk_gso_type = SKB_GSO_TCPV6;
282 __ip6_dst_store(sk, dst, NULL, NULL);
284 icsk->icsk_ext_hdr_len = 0;
286 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
289 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
291 inet->inet_dport = usin->sin6_port;
293 tcp_set_state(sk, TCP_SYN_SENT);
294 err = inet6_hash_connect(&tcp_death_row, sk);
299 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
304 err = tcp_connect(sk);
311 tcp_set_state(sk, TCP_CLOSE);
314 inet->inet_dport = 0;
315 sk->sk_route_caps = 0;
319 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
320 u8 type, u8 code, int offset, __be32 info)
322 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
323 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
324 struct ipv6_pinfo *np;
329 struct net *net = dev_net(skb->dev);
331 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
332 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
335 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
340 if (sk->sk_state == TCP_TIME_WAIT) {
341 inet_twsk_put(inet_twsk(sk));
346 if (sock_owned_by_user(sk))
347 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
349 if (sk->sk_state == TCP_CLOSE)
353 seq = ntohl(th->seq);
354 if (sk->sk_state != TCP_LISTEN &&
355 !between(seq, tp->snd_una, tp->snd_nxt)) {
356 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
362 if (type == ICMPV6_PKT_TOOBIG) {
363 struct dst_entry *dst = NULL;
365 if (sock_owned_by_user(sk))
367 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
370 /* icmp should have updated the destination cache entry */
371 dst = __sk_dst_check(sk, np->dst_cookie);
374 struct inet_sock *inet = inet_sk(sk);
377 /* BUGGG_FUTURE: Again, it is not clear how
378 to handle rthdr case. Ignore this complexity
381 memset(&fl, 0, sizeof(fl));
382 fl.proto = IPPROTO_TCP;
383 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
384 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
385 fl.oif = sk->sk_bound_dev_if;
386 fl.mark = sk->sk_mark;
387 fl.fl_ip_dport = inet->inet_dport;
388 fl.fl_ip_sport = inet->inet_sport;
389 security_skb_classify_flow(skb, &fl);
391 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
392 sk->sk_err_soft = -err;
396 if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) {
397 sk->sk_err_soft = -err;
404 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
405 tcp_sync_mss(sk, dst_mtu(dst));
406 tcp_simple_retransmit(sk);
407 } /* else let the usual retransmit timer handle it */
412 icmpv6_err_convert(type, code, &err);
414 /* Might be for an request_sock */
415 switch (sk->sk_state) {
416 struct request_sock *req, **prev;
418 if (sock_owned_by_user(sk))
421 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
422 &hdr->saddr, inet6_iif(skb));
426 /* ICMPs are not backlogged, hence we cannot get
427 * an established socket here.
429 WARN_ON(req->sk != NULL);
431 if (seq != tcp_rsk(req)->snt_isn) {
432 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
436 inet_csk_reqsk_queue_drop(sk, req, prev);
440 case TCP_SYN_RECV: /* Cannot happen.
441 It can, it SYNs are crossed. --ANK */
442 if (!sock_owned_by_user(sk)) {
444 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
448 sk->sk_err_soft = err;
452 if (!sock_owned_by_user(sk) && np->recverr) {
454 sk->sk_error_report(sk);
456 sk->sk_err_soft = err;
464 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
466 struct inet6_request_sock *treq = inet6_rsk(req);
467 struct ipv6_pinfo *np = inet6_sk(sk);
468 struct sk_buff * skb;
469 struct ipv6_txoptions *opt = NULL;
470 struct in6_addr * final_p = NULL, final;
472 struct dst_entry *dst;
475 memset(&fl, 0, sizeof(fl));
476 fl.proto = IPPROTO_TCP;
477 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
478 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
479 fl.fl6_flowlabel = 0;
481 fl.mark = sk->sk_mark;
482 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
483 fl.fl_ip_sport = inet_rsk(req)->loc_port;
484 security_req_classify_flow(req, &fl);
487 if (opt && opt->srcrt) {
488 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
489 ipv6_addr_copy(&final, &fl.fl6_dst);
490 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
494 err = ip6_dst_lookup(sk, &dst, &fl);
498 ipv6_addr_copy(&fl.fl6_dst, final_p);
499 if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
502 skb = tcp_make_synack(sk, dst, req);
504 struct tcphdr *th = tcp_hdr(skb);
506 th->check = tcp_v6_check(skb->len,
507 &treq->loc_addr, &treq->rmt_addr,
508 csum_partial(th, skb->len, skb->csum));
510 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
511 err = ip6_xmit(sk, skb, &fl, opt, 0);
512 err = net_xmit_eval(err);
516 if (opt && opt != np->opt)
517 sock_kfree_s(sk, opt, opt->tot_len);
522 static inline void syn_flood_warning(struct sk_buff *skb)
524 #ifdef CONFIG_SYN_COOKIES
525 if (sysctl_tcp_syncookies)
527 "TCPv6: Possible SYN flooding on port %d. "
528 "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest));
532 "TCPv6: Possible SYN flooding on port %d. "
533 "Dropping request.\n", ntohs(tcp_hdr(skb)->dest));
536 static void tcp_v6_reqsk_destructor(struct request_sock *req)
538 kfree_skb(inet6_rsk(req)->pktopts);
541 #ifdef CONFIG_TCP_MD5SIG
542 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
543 struct in6_addr *addr)
545 struct tcp_sock *tp = tcp_sk(sk);
550 if (!tp->md5sig_info || !tp->md5sig_info->entries6)
553 for (i = 0; i < tp->md5sig_info->entries6; i++) {
554 if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, addr))
555 return &tp->md5sig_info->keys6[i].base;
560 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
561 struct sock *addr_sk)
563 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
566 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
567 struct request_sock *req)
569 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
572 static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer,
573 char *newkey, u8 newkeylen)
575 /* Add key to the list */
576 struct tcp_md5sig_key *key;
577 struct tcp_sock *tp = tcp_sk(sk);
578 struct tcp6_md5sig_key *keys;
580 key = tcp_v6_md5_do_lookup(sk, peer);
582 /* modify existing entry - just update that one */
585 key->keylen = newkeylen;
587 /* reallocate new list if current one is full. */
588 if (!tp->md5sig_info) {
589 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
590 if (!tp->md5sig_info) {
594 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
596 if (tcp_alloc_md5sig_pool(sk) == NULL) {
600 if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) {
601 keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) *
602 (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
605 tcp_free_md5sig_pool();
610 if (tp->md5sig_info->entries6)
611 memmove(keys, tp->md5sig_info->keys6,
612 (sizeof (tp->md5sig_info->keys6[0]) *
613 tp->md5sig_info->entries6));
615 kfree(tp->md5sig_info->keys6);
616 tp->md5sig_info->keys6 = keys;
617 tp->md5sig_info->alloced6++;
620 ipv6_addr_copy(&tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr,
622 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey;
623 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen;
625 tp->md5sig_info->entries6++;
630 static int tcp_v6_md5_add_func(struct sock *sk, struct sock *addr_sk,
631 u8 *newkey, __u8 newkeylen)
633 return tcp_v6_md5_do_add(sk, &inet6_sk(addr_sk)->daddr,
637 static int tcp_v6_md5_do_del(struct sock *sk, struct in6_addr *peer)
639 struct tcp_sock *tp = tcp_sk(sk);
642 for (i = 0; i < tp->md5sig_info->entries6; i++) {
643 if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, peer)) {
645 kfree(tp->md5sig_info->keys6[i].base.key);
646 tp->md5sig_info->entries6--;
648 if (tp->md5sig_info->entries6 == 0) {
649 kfree(tp->md5sig_info->keys6);
650 tp->md5sig_info->keys6 = NULL;
651 tp->md5sig_info->alloced6 = 0;
653 /* shrink the database */
654 if (tp->md5sig_info->entries6 != i)
655 memmove(&tp->md5sig_info->keys6[i],
656 &tp->md5sig_info->keys6[i+1],
657 (tp->md5sig_info->entries6 - i)
658 * sizeof (tp->md5sig_info->keys6[0]));
660 tcp_free_md5sig_pool();
667 static void tcp_v6_clear_md5_list (struct sock *sk)
669 struct tcp_sock *tp = tcp_sk(sk);
672 if (tp->md5sig_info->entries6) {
673 for (i = 0; i < tp->md5sig_info->entries6; i++)
674 kfree(tp->md5sig_info->keys6[i].base.key);
675 tp->md5sig_info->entries6 = 0;
676 tcp_free_md5sig_pool();
679 kfree(tp->md5sig_info->keys6);
680 tp->md5sig_info->keys6 = NULL;
681 tp->md5sig_info->alloced6 = 0;
683 if (tp->md5sig_info->entries4) {
684 for (i = 0; i < tp->md5sig_info->entries4; i++)
685 kfree(tp->md5sig_info->keys4[i].base.key);
686 tp->md5sig_info->entries4 = 0;
687 tcp_free_md5sig_pool();
690 kfree(tp->md5sig_info->keys4);
691 tp->md5sig_info->keys4 = NULL;
692 tp->md5sig_info->alloced4 = 0;
695 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
698 struct tcp_md5sig cmd;
699 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
702 if (optlen < sizeof(cmd))
705 if (copy_from_user(&cmd, optval, sizeof(cmd)))
708 if (sin6->sin6_family != AF_INET6)
711 if (!cmd.tcpm_keylen) {
712 if (!tcp_sk(sk)->md5sig_info)
714 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
715 return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]);
716 return tcp_v6_md5_do_del(sk, &sin6->sin6_addr);
719 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
722 if (!tcp_sk(sk)->md5sig_info) {
723 struct tcp_sock *tp = tcp_sk(sk);
724 struct tcp_md5sig_info *p;
726 p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL);
731 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
734 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
737 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
738 return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3],
739 newkey, cmd.tcpm_keylen);
741 return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen);
744 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
745 struct in6_addr *daddr,
746 struct in6_addr *saddr, int nbytes)
748 struct tcp6_pseudohdr *bp;
749 struct scatterlist sg;
751 bp = &hp->md5_blk.ip6;
752 /* 1. TCP pseudo-header (RFC2460) */
753 ipv6_addr_copy(&bp->saddr, saddr);
754 ipv6_addr_copy(&bp->daddr, daddr);
755 bp->protocol = cpu_to_be32(IPPROTO_TCP);
756 bp->len = cpu_to_be32(nbytes);
758 sg_init_one(&sg, bp, sizeof(*bp));
759 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
762 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
763 struct in6_addr *daddr, struct in6_addr *saddr,
766 struct tcp_md5sig_pool *hp;
767 struct hash_desc *desc;
769 hp = tcp_get_md5sig_pool();
771 goto clear_hash_noput;
772 desc = &hp->md5_desc;
774 if (crypto_hash_init(desc))
776 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
778 if (tcp_md5_hash_header(hp, th))
780 if (tcp_md5_hash_key(hp, key))
782 if (crypto_hash_final(desc, md5_hash))
785 tcp_put_md5sig_pool();
789 tcp_put_md5sig_pool();
791 memset(md5_hash, 0, 16);
795 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
796 struct sock *sk, struct request_sock *req,
799 struct in6_addr *saddr, *daddr;
800 struct tcp_md5sig_pool *hp;
801 struct hash_desc *desc;
802 struct tcphdr *th = tcp_hdr(skb);
805 saddr = &inet6_sk(sk)->saddr;
806 daddr = &inet6_sk(sk)->daddr;
808 saddr = &inet6_rsk(req)->loc_addr;
809 daddr = &inet6_rsk(req)->rmt_addr;
811 struct ipv6hdr *ip6h = ipv6_hdr(skb);
812 saddr = &ip6h->saddr;
813 daddr = &ip6h->daddr;
816 hp = tcp_get_md5sig_pool();
818 goto clear_hash_noput;
819 desc = &hp->md5_desc;
821 if (crypto_hash_init(desc))
824 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
826 if (tcp_md5_hash_header(hp, th))
828 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
830 if (tcp_md5_hash_key(hp, key))
832 if (crypto_hash_final(desc, md5_hash))
835 tcp_put_md5sig_pool();
839 tcp_put_md5sig_pool();
841 memset(md5_hash, 0, 16);
845 static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
847 __u8 *hash_location = NULL;
848 struct tcp_md5sig_key *hash_expected;
849 struct ipv6hdr *ip6h = ipv6_hdr(skb);
850 struct tcphdr *th = tcp_hdr(skb);
854 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
855 hash_location = tcp_parse_md5sig_option(th);
857 /* We've parsed the options - do we have a hash? */
858 if (!hash_expected && !hash_location)
861 if (hash_expected && !hash_location) {
862 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
866 if (!hash_expected && hash_location) {
867 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
871 /* check the signature */
872 genhash = tcp_v6_md5_hash_skb(newhash,
876 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
877 if (net_ratelimit()) {
878 printk(KERN_INFO "MD5 Hash %s for (%pI6, %u)->(%pI6, %u)\n",
879 genhash ? "failed" : "mismatch",
880 &ip6h->saddr, ntohs(th->source),
881 &ip6h->daddr, ntohs(th->dest));
889 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
891 .obj_size = sizeof(struct tcp6_request_sock),
892 .rtx_syn_ack = tcp_v6_send_synack,
893 .send_ack = tcp_v6_reqsk_send_ack,
894 .destructor = tcp_v6_reqsk_destructor,
895 .send_reset = tcp_v6_send_reset
898 #ifdef CONFIG_TCP_MD5SIG
899 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
900 .md5_lookup = tcp_v6_reqsk_md5_lookup,
901 .calc_md5_hash = tcp_v6_md5_hash_skb,
905 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
906 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
907 .twsk_unique = tcp_twsk_unique,
908 .twsk_destructor= tcp_twsk_destructor,
911 static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
913 struct ipv6_pinfo *np = inet6_sk(sk);
914 struct tcphdr *th = tcp_hdr(skb);
916 if (skb->ip_summed == CHECKSUM_PARTIAL) {
917 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
918 skb->csum_start = skb_transport_header(skb) - skb->head;
919 skb->csum_offset = offsetof(struct tcphdr, check);
921 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
922 csum_partial(th, th->doff<<2,
927 static int tcp_v6_gso_send_check(struct sk_buff *skb)
929 struct ipv6hdr *ipv6h;
932 if (!pskb_may_pull(skb, sizeof(*th)))
935 ipv6h = ipv6_hdr(skb);
939 th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
941 skb->csum_start = skb_transport_header(skb) - skb->head;
942 skb->csum_offset = offsetof(struct tcphdr, check);
943 skb->ip_summed = CHECKSUM_PARTIAL;
947 static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
950 struct ipv6hdr *iph = skb_gro_network_header(skb);
952 switch (skb->ip_summed) {
953 case CHECKSUM_COMPLETE:
954 if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
956 skb->ip_summed = CHECKSUM_UNNECESSARY;
962 NAPI_GRO_CB(skb)->flush = 1;
966 return tcp_gro_receive(head, skb);
969 static int tcp6_gro_complete(struct sk_buff *skb)
971 struct ipv6hdr *iph = ipv6_hdr(skb);
972 struct tcphdr *th = tcp_hdr(skb);
974 th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
975 &iph->saddr, &iph->daddr, 0);
976 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
978 return tcp_gro_complete(skb);
981 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
982 u32 ts, struct tcp_md5sig_key *key, int rst)
984 struct tcphdr *th = tcp_hdr(skb), *t1;
985 struct sk_buff *buff;
987 struct net *net = dev_net(skb_dst(skb)->dev);
988 struct sock *ctl_sk = net->ipv6.tcp_sk;
989 unsigned int tot_len = sizeof(struct tcphdr);
990 struct dst_entry *dst;
994 tot_len += TCPOLEN_TSTAMP_ALIGNED;
995 #ifdef CONFIG_TCP_MD5SIG
997 tot_len += TCPOLEN_MD5SIG_ALIGNED;
1000 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1005 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1007 t1 = (struct tcphdr *) skb_push(buff, tot_len);
1008 skb_reset_transport_header(skb);
1010 /* Swap the send and the receive. */
1011 memset(t1, 0, sizeof(*t1));
1012 t1->dest = th->source;
1013 t1->source = th->dest;
1014 t1->doff = tot_len / 4;
1015 t1->seq = htonl(seq);
1016 t1->ack_seq = htonl(ack);
1017 t1->ack = !rst || !th->ack;
1019 t1->window = htons(win);
1021 topt = (__be32 *)(t1 + 1);
1024 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1025 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1026 *topt++ = htonl(tcp_time_stamp);
1027 *topt++ = htonl(ts);
1030 #ifdef CONFIG_TCP_MD5SIG
1032 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1033 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
1034 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
1035 &ipv6_hdr(skb)->saddr,
1036 &ipv6_hdr(skb)->daddr, t1);
1040 buff->csum = csum_partial(t1, tot_len, 0);
1042 memset(&fl, 0, sizeof(fl));
1043 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
1044 ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
1046 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1047 tot_len, IPPROTO_TCP,
1050 fl.proto = IPPROTO_TCP;
1051 fl.oif = inet6_iif(skb);
1052 fl.fl_ip_dport = t1->dest;
1053 fl.fl_ip_sport = t1->source;
1054 security_skb_classify_flow(skb, &fl);
1056 /* Pass a socket to ip6_dst_lookup either it is for RST
1057 * Underlying function will use this to retrieve the network
1060 if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) {
1061 if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) {
1062 skb_dst_set(buff, dst);
1063 ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
1064 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1066 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
1074 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
1076 struct tcphdr *th = tcp_hdr(skb);
1077 u32 seq = 0, ack_seq = 0;
1078 struct tcp_md5sig_key *key = NULL;
1083 if (!ipv6_unicast_destination(skb))
1086 #ifdef CONFIG_TCP_MD5SIG
1088 key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr);
1092 seq = ntohl(th->ack_seq);
1094 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1097 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1);
1100 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
1101 struct tcp_md5sig_key *key)
1103 tcp_v6_send_response(skb, seq, ack, win, ts, key, 0);
1106 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1108 struct inet_timewait_sock *tw = inet_twsk(sk);
1109 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1111 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1112 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1113 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw));
1118 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
1119 struct request_sock *req)
1121 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
1122 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr));
1126 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1128 struct request_sock *req, **prev;
1129 const struct tcphdr *th = tcp_hdr(skb);
1132 /* Find possible connection requests. */
1133 req = inet6_csk_search_req(sk, &prev, th->source,
1134 &ipv6_hdr(skb)->saddr,
1135 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
1137 return tcp_check_req(sk, skb, req, prev);
1139 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
1140 &ipv6_hdr(skb)->saddr, th->source,
1141 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
1144 if (nsk->sk_state != TCP_TIME_WAIT) {
1148 inet_twsk_put(inet_twsk(nsk));
1152 #ifdef CONFIG_SYN_COOKIES
1153 if (!th->rst && !th->syn && th->ack)
1154 sk = cookie_v6_check(sk, skb);
1159 /* FIXME: this is substantially similar to the ipv4 code.
1160 * Can some kind of merge be done? -- erics
1162 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1164 struct inet6_request_sock *treq;
1165 struct ipv6_pinfo *np = inet6_sk(sk);
1166 struct tcp_options_received tmp_opt;
1167 struct tcp_sock *tp = tcp_sk(sk);
1168 struct request_sock *req = NULL;
1169 __u32 isn = TCP_SKB_CB(skb)->when;
1170 struct dst_entry *dst = __sk_dst_get(sk);
1171 #ifdef CONFIG_SYN_COOKIES
1172 int want_cookie = 0;
1174 #define want_cookie 0
1177 if (skb->protocol == htons(ETH_P_IP))
1178 return tcp_v4_conn_request(sk, skb);
1180 if (!ipv6_unicast_destination(skb))
1183 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1184 if (net_ratelimit())
1185 syn_flood_warning(skb);
1186 #ifdef CONFIG_SYN_COOKIES
1187 if (sysctl_tcp_syncookies)
1194 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1197 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1201 #ifdef CONFIG_TCP_MD5SIG
1202 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
1205 tcp_clear_options(&tmp_opt);
1206 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1207 tmp_opt.user_mss = tp->rx_opt.user_mss;
1209 tcp_parse_options(skb, &tmp_opt, 0, dst);
1211 if (want_cookie && !tmp_opt.saw_tstamp)
1212 tcp_clear_options(&tmp_opt);
1214 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1215 tcp_openreq_init(req, &tmp_opt, skb);
1217 treq = inet6_rsk(req);
1218 ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
1219 ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
1221 TCP_ECN_create_request(req, tcp_hdr(skb));
1224 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1225 req->cookie_ts = tmp_opt.tstamp_ok;
1227 if (ipv6_opt_accepted(sk, skb) ||
1228 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1229 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1230 atomic_inc(&skb->users);
1231 treq->pktopts = skb;
1233 treq->iif = sk->sk_bound_dev_if;
1235 /* So that link locals have meaning */
1236 if (!sk->sk_bound_dev_if &&
1237 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1238 treq->iif = inet6_iif(skb);
1240 isn = tcp_v6_init_sequence(skb);
1243 tcp_rsk(req)->snt_isn = isn;
1245 security_inet_conn_request(sk, skb, req);
1247 if (tcp_v6_send_synack(sk, req))
1251 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1259 return 0; /* don't send reset */
1262 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1263 struct request_sock *req,
1264 struct dst_entry *dst)
1266 struct inet6_request_sock *treq;
1267 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1268 struct tcp6_sock *newtcp6sk;
1269 struct inet_sock *newinet;
1270 struct tcp_sock *newtp;
1272 struct ipv6_txoptions *opt;
1273 #ifdef CONFIG_TCP_MD5SIG
1274 struct tcp_md5sig_key *key;
1277 if (skb->protocol == htons(ETH_P_IP)) {
1282 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1287 newtcp6sk = (struct tcp6_sock *)newsk;
1288 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1290 newinet = inet_sk(newsk);
1291 newnp = inet6_sk(newsk);
1292 newtp = tcp_sk(newsk);
1294 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1296 ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
1298 ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
1300 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1302 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1303 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1304 #ifdef CONFIG_TCP_MD5SIG
1305 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1308 newnp->pktoptions = NULL;
1310 newnp->mcast_oif = inet6_iif(skb);
1311 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1314 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1315 * here, tcp_create_openreq_child now does this for us, see the comment in
1316 * that function for the gory details. -acme
1319 /* It is tricky place. Until this moment IPv4 tcp
1320 worked with IPv6 icsk.icsk_af_ops.
1323 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1328 treq = inet6_rsk(req);
1331 if (sk_acceptq_is_full(sk))
1335 struct in6_addr *final_p = NULL, final;
1338 memset(&fl, 0, sizeof(fl));
1339 fl.proto = IPPROTO_TCP;
1340 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1341 if (opt && opt->srcrt) {
1342 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1343 ipv6_addr_copy(&final, &fl.fl6_dst);
1344 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1347 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1348 fl.oif = sk->sk_bound_dev_if;
1349 fl.mark = sk->sk_mark;
1350 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1351 fl.fl_ip_sport = inet_rsk(req)->loc_port;
1352 security_req_classify_flow(req, &fl);
1354 if (ip6_dst_lookup(sk, &dst, &fl))
1358 ipv6_addr_copy(&fl.fl6_dst, final_p);
1360 if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
1364 newsk = tcp_create_openreq_child(sk, req, skb);
1369 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1370 * count here, tcp_create_openreq_child now does this for us, see the
1371 * comment in that function for the gory details. -acme
1374 newsk->sk_gso_type = SKB_GSO_TCPV6;
1375 __ip6_dst_store(newsk, dst, NULL, NULL);
1377 newtcp6sk = (struct tcp6_sock *)newsk;
1378 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1380 newtp = tcp_sk(newsk);
1381 newinet = inet_sk(newsk);
1382 newnp = inet6_sk(newsk);
1384 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1386 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1387 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1388 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1389 newsk->sk_bound_dev_if = treq->iif;
1391 /* Now IPv6 options...
1393 First: no IPv4 options.
1395 newinet->opt = NULL;
1396 newnp->ipv6_fl_list = NULL;
1399 newnp->rxopt.all = np->rxopt.all;
1401 /* Clone pktoptions received with SYN */
1402 newnp->pktoptions = NULL;
1403 if (treq->pktopts != NULL) {
1404 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1405 kfree_skb(treq->pktopts);
1406 treq->pktopts = NULL;
1407 if (newnp->pktoptions)
1408 skb_set_owner_r(newnp->pktoptions, newsk);
1411 newnp->mcast_oif = inet6_iif(skb);
1412 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1414 /* Clone native IPv6 options from listening socket (if any)
1416 Yes, keeping reference count would be much more clever,
1417 but we make one more one thing there: reattach optmem
1421 newnp->opt = ipv6_dup_options(newsk, opt);
1423 sock_kfree_s(sk, opt, opt->tot_len);
1426 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1428 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1429 newnp->opt->opt_flen);
1431 tcp_mtup_init(newsk);
1432 tcp_sync_mss(newsk, dst_mtu(dst));
1433 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1434 tcp_initialize_rcv_mss(newsk);
1436 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1437 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1439 #ifdef CONFIG_TCP_MD5SIG
1440 /* Copy over the MD5 key from the original socket */
1441 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1442 /* We're using one, so create a matching key
1443 * on the newsk structure. If we fail to get
1444 * memory, then we end up not copying the key
1447 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1449 tcp_v6_md5_do_add(newsk, &newnp->daddr,
1450 newkey, key->keylen);
1454 __inet6_hash(newsk);
1455 __inet_inherit_port(sk, newsk);
1460 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1462 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1463 if (opt && opt != np->opt)
1464 sock_kfree_s(sk, opt, opt->tot_len);
1469 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1471 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1472 if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
1473 &ipv6_hdr(skb)->daddr, skb->csum)) {
1474 skb->ip_summed = CHECKSUM_UNNECESSARY;
1479 skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
1480 &ipv6_hdr(skb)->saddr,
1481 &ipv6_hdr(skb)->daddr, 0));
1483 if (skb->len <= 76) {
1484 return __skb_checksum_complete(skb);
1489 /* The socket must have it's spinlock held when we get
1492 * We have a potential double-lock case here, so even when
1493 * doing backlog processing we use the BH locking scheme.
1494 * This is because we cannot sleep with the original spinlock
1497 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1499 struct ipv6_pinfo *np = inet6_sk(sk);
1500 struct tcp_sock *tp;
1501 struct sk_buff *opt_skb = NULL;
1503 /* Imagine: socket is IPv6. IPv4 packet arrives,
1504 goes to IPv4 receive handler and backlogged.
1505 From backlog it always goes here. Kerboom...
1506 Fortunately, tcp_rcv_established and rcv_established
1507 handle them correctly, but it is not case with
1508 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1511 if (skb->protocol == htons(ETH_P_IP))
1512 return tcp_v4_do_rcv(sk, skb);
1514 #ifdef CONFIG_TCP_MD5SIG
1515 if (tcp_v6_inbound_md5_hash (sk, skb))
1519 if (sk_filter(sk, skb))
1523 * socket locking is here for SMP purposes as backlog rcv
1524 * is currently called with bh processing disabled.
1527 /* Do Stevens' IPV6_PKTOPTIONS.
1529 Yes, guys, it is the only place in our code, where we
1530 may make it not affecting IPv4.
1531 The rest of code is protocol independent,
1532 and I do not like idea to uglify IPv4.
1534 Actually, all the idea behind IPV6_PKTOPTIONS
1535 looks not very well thought. For now we latch
1536 options, received in the last packet, enqueued
1537 by tcp. Feel free to propose better solution.
1541 opt_skb = skb_clone(skb, GFP_ATOMIC);
1543 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1544 TCP_CHECK_TIMER(sk);
1545 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1547 TCP_CHECK_TIMER(sk);
1549 goto ipv6_pktoptions;
1553 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1556 if (sk->sk_state == TCP_LISTEN) {
1557 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1562 * Queue it on the new socket if the new socket is active,
1563 * otherwise we just shortcircuit this and continue with
1567 if (tcp_child_process(sk, nsk, skb))
1570 __kfree_skb(opt_skb);
1575 TCP_CHECK_TIMER(sk);
1576 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1578 TCP_CHECK_TIMER(sk);
1580 goto ipv6_pktoptions;
1584 tcp_v6_send_reset(sk, skb);
1587 __kfree_skb(opt_skb);
1591 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1596 /* Do you ask, what is it?
1598 1. skb was enqueued by tcp.
1599 2. skb is added to tail of read queue, rather than out of order.
1600 3. socket is not in passive state.
1601 4. Finally, it really contains options, which user wants to receive.
1604 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1605 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1606 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1607 np->mcast_oif = inet6_iif(opt_skb);
1608 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1609 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1610 if (ipv6_opt_accepted(sk, opt_skb)) {
1611 skb_set_owner_r(opt_skb, sk);
1612 opt_skb = xchg(&np->pktoptions, opt_skb);
1614 __kfree_skb(opt_skb);
1615 opt_skb = xchg(&np->pktoptions, NULL);
1623 static int tcp_v6_rcv(struct sk_buff *skb)
1628 struct net *net = dev_net(skb->dev);
1630 if (skb->pkt_type != PACKET_HOST)
1634 * Count it even if it's bad.
1636 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1638 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1643 if (th->doff < sizeof(struct tcphdr)/4)
1645 if (!pskb_may_pull(skb, th->doff*4))
1648 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1652 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1653 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1654 skb->len - th->doff*4);
1655 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1656 TCP_SKB_CB(skb)->when = 0;
1657 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
1658 TCP_SKB_CB(skb)->sacked = 0;
1660 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1665 if (sk->sk_state == TCP_TIME_WAIT)
1668 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1669 goto discard_and_relse;
1671 if (sk_filter(sk, skb))
1672 goto discard_and_relse;
1676 bh_lock_sock_nested(sk);
1678 if (!sock_owned_by_user(sk)) {
1679 #ifdef CONFIG_NET_DMA
1680 struct tcp_sock *tp = tcp_sk(sk);
1681 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1682 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1683 if (tp->ucopy.dma_chan)
1684 ret = tcp_v6_do_rcv(sk, skb);
1688 if (!tcp_prequeue(sk, skb))
1689 ret = tcp_v6_do_rcv(sk, skb);
1692 sk_add_backlog(sk, skb);
1696 return ret ? -1 : 0;
1699 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1702 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1704 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1706 tcp_v6_send_reset(NULL, skb);
1723 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1724 inet_twsk_put(inet_twsk(sk));
1728 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1729 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1730 inet_twsk_put(inet_twsk(sk));
1734 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1739 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1740 &ipv6_hdr(skb)->daddr,
1741 ntohs(th->dest), inet6_iif(skb));
1743 struct inet_timewait_sock *tw = inet_twsk(sk);
1744 inet_twsk_deschedule(tw, &tcp_death_row);
1749 /* Fall through to ACK */
1752 tcp_v6_timewait_ack(sk, skb);
1756 case TCP_TW_SUCCESS:;
1761 static int tcp_v6_remember_stamp(struct sock *sk)
1763 /* Alas, not yet... */
1767 static const struct inet_connection_sock_af_ops ipv6_specific = {
1768 .queue_xmit = inet6_csk_xmit,
1769 .send_check = tcp_v6_send_check,
1770 .rebuild_header = inet6_sk_rebuild_header,
1771 .conn_request = tcp_v6_conn_request,
1772 .syn_recv_sock = tcp_v6_syn_recv_sock,
1773 .remember_stamp = tcp_v6_remember_stamp,
1774 .net_header_len = sizeof(struct ipv6hdr),
1775 .setsockopt = ipv6_setsockopt,
1776 .getsockopt = ipv6_getsockopt,
1777 .addr2sockaddr = inet6_csk_addr2sockaddr,
1778 .sockaddr_len = sizeof(struct sockaddr_in6),
1779 .bind_conflict = inet6_csk_bind_conflict,
1780 #ifdef CONFIG_COMPAT
1781 .compat_setsockopt = compat_ipv6_setsockopt,
1782 .compat_getsockopt = compat_ipv6_getsockopt,
1786 #ifdef CONFIG_TCP_MD5SIG
1787 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1788 .md5_lookup = tcp_v6_md5_lookup,
1789 .calc_md5_hash = tcp_v6_md5_hash_skb,
1790 .md5_add = tcp_v6_md5_add_func,
1791 .md5_parse = tcp_v6_parse_md5_keys,
1796 * TCP over IPv4 via INET6 API
1799 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1800 .queue_xmit = ip_queue_xmit,
1801 .send_check = tcp_v4_send_check,
1802 .rebuild_header = inet_sk_rebuild_header,
1803 .conn_request = tcp_v6_conn_request,
1804 .syn_recv_sock = tcp_v6_syn_recv_sock,
1805 .remember_stamp = tcp_v4_remember_stamp,
1806 .net_header_len = sizeof(struct iphdr),
1807 .setsockopt = ipv6_setsockopt,
1808 .getsockopt = ipv6_getsockopt,
1809 .addr2sockaddr = inet6_csk_addr2sockaddr,
1810 .sockaddr_len = sizeof(struct sockaddr_in6),
1811 .bind_conflict = inet6_csk_bind_conflict,
1812 #ifdef CONFIG_COMPAT
1813 .compat_setsockopt = compat_ipv6_setsockopt,
1814 .compat_getsockopt = compat_ipv6_getsockopt,
1818 #ifdef CONFIG_TCP_MD5SIG
1819 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1820 .md5_lookup = tcp_v4_md5_lookup,
1821 .calc_md5_hash = tcp_v4_md5_hash_skb,
1822 .md5_add = tcp_v6_md5_add_func,
1823 .md5_parse = tcp_v6_parse_md5_keys,
1827 /* NOTE: A lot of things set to zero explicitly by call to
1828 * sk_alloc() so need not be done here.
1830 static int tcp_v6_init_sock(struct sock *sk)
1832 struct inet_connection_sock *icsk = inet_csk(sk);
1833 struct tcp_sock *tp = tcp_sk(sk);
1835 skb_queue_head_init(&tp->out_of_order_queue);
1836 tcp_init_xmit_timers(sk);
1837 tcp_prequeue_init(tp);
1839 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1840 tp->mdev = TCP_TIMEOUT_INIT;
1842 /* So many TCP implementations out there (incorrectly) count the
1843 * initial SYN frame in their delayed-ACK and congestion control
1844 * algorithms that we must have the following bandaid to talk
1845 * efficiently to them. -DaveM
1849 /* See draft-stevens-tcpca-spec-01 for discussion of the
1850 * initialization of these values.
1852 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1853 tp->snd_cwnd_clamp = ~0;
1854 tp->mss_cache = TCP_MSS_DEFAULT;
1856 tp->reordering = sysctl_tcp_reordering;
1858 sk->sk_state = TCP_CLOSE;
1860 icsk->icsk_af_ops = &ipv6_specific;
1861 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1862 icsk->icsk_sync_mss = tcp_sync_mss;
1863 sk->sk_write_space = sk_stream_write_space;
1864 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1866 #ifdef CONFIG_TCP_MD5SIG
1867 tp->af_specific = &tcp_sock_ipv6_specific;
1870 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1871 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1874 percpu_counter_inc(&tcp_sockets_allocated);
1880 static void tcp_v6_destroy_sock(struct sock *sk)
1882 #ifdef CONFIG_TCP_MD5SIG
1883 /* Clean up the MD5 key list */
1884 if (tcp_sk(sk)->md5sig_info)
1885 tcp_v6_clear_md5_list(sk);
1887 tcp_v4_destroy_sock(sk);
1888 inet6_destroy_sock(sk);
1891 #ifdef CONFIG_PROC_FS
1892 /* Proc filesystem TCPv6 sock list dumping. */
1893 static void get_openreq6(struct seq_file *seq,
1894 struct sock *sk, struct request_sock *req, int i, int uid)
1896 int ttd = req->expires - jiffies;
1897 struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1898 struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1904 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1905 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1907 src->s6_addr32[0], src->s6_addr32[1],
1908 src->s6_addr32[2], src->s6_addr32[3],
1909 ntohs(inet_rsk(req)->loc_port),
1910 dest->s6_addr32[0], dest->s6_addr32[1],
1911 dest->s6_addr32[2], dest->s6_addr32[3],
1912 ntohs(inet_rsk(req)->rmt_port),
1914 0,0, /* could print option size, but that is af dependent. */
1915 1, /* timers active (only the expire timer) */
1916 jiffies_to_clock_t(ttd),
1919 0, /* non standard timer */
1920 0, /* open_requests have no inode */
1924 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1926 struct in6_addr *dest, *src;
1929 unsigned long timer_expires;
1930 struct inet_sock *inet = inet_sk(sp);
1931 struct tcp_sock *tp = tcp_sk(sp);
1932 const struct inet_connection_sock *icsk = inet_csk(sp);
1933 struct ipv6_pinfo *np = inet6_sk(sp);
1936 src = &np->rcv_saddr;
1937 destp = ntohs(inet->inet_dport);
1938 srcp = ntohs(inet->inet_sport);
1940 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1942 timer_expires = icsk->icsk_timeout;
1943 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1945 timer_expires = icsk->icsk_timeout;
1946 } else if (timer_pending(&sp->sk_timer)) {
1948 timer_expires = sp->sk_timer.expires;
1951 timer_expires = jiffies;
1955 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1956 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %lu %lu %u %u %d\n",
1958 src->s6_addr32[0], src->s6_addr32[1],
1959 src->s6_addr32[2], src->s6_addr32[3], srcp,
1960 dest->s6_addr32[0], dest->s6_addr32[1],
1961 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1963 tp->write_seq-tp->snd_una,
1964 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1966 jiffies_to_clock_t(timer_expires - jiffies),
1967 icsk->icsk_retransmits,
1969 icsk->icsk_probes_out,
1971 atomic_read(&sp->sk_refcnt), sp,
1972 jiffies_to_clock_t(icsk->icsk_rto),
1973 jiffies_to_clock_t(icsk->icsk_ack.ato),
1974 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1976 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
1980 static void get_timewait6_sock(struct seq_file *seq,
1981 struct inet_timewait_sock *tw, int i)
1983 struct in6_addr *dest, *src;
1985 struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1986 int ttd = tw->tw_ttd - jiffies;
1991 dest = &tw6->tw_v6_daddr;
1992 src = &tw6->tw_v6_rcv_saddr;
1993 destp = ntohs(tw->tw_dport);
1994 srcp = ntohs(tw->tw_sport);
1997 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1998 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2000 src->s6_addr32[0], src->s6_addr32[1],
2001 src->s6_addr32[2], src->s6_addr32[3], srcp,
2002 dest->s6_addr32[0], dest->s6_addr32[1],
2003 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2004 tw->tw_substate, 0, 0,
2005 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2006 atomic_read(&tw->tw_refcnt), tw);
2009 static int tcp6_seq_show(struct seq_file *seq, void *v)
2011 struct tcp_iter_state *st;
2013 if (v == SEQ_START_TOKEN) {
2018 "st tx_queue rx_queue tr tm->when retrnsmt"
2019 " uid timeout inode\n");
2024 switch (st->state) {
2025 case TCP_SEQ_STATE_LISTENING:
2026 case TCP_SEQ_STATE_ESTABLISHED:
2027 get_tcp6_sock(seq, v, st->num);
2029 case TCP_SEQ_STATE_OPENREQ:
2030 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2032 case TCP_SEQ_STATE_TIME_WAIT:
2033 get_timewait6_sock(seq, v, st->num);
2040 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2044 .owner = THIS_MODULE,
2047 .show = tcp6_seq_show,
2051 int tcp6_proc_init(struct net *net)
2053 return tcp_proc_register(net, &tcp6_seq_afinfo);
2056 void tcp6_proc_exit(struct net *net)
2058 tcp_proc_unregister(net, &tcp6_seq_afinfo);
2062 struct proto tcpv6_prot = {
2064 .owner = THIS_MODULE,
2066 .connect = tcp_v6_connect,
2067 .disconnect = tcp_disconnect,
2068 .accept = inet_csk_accept,
2070 .init = tcp_v6_init_sock,
2071 .destroy = tcp_v6_destroy_sock,
2072 .shutdown = tcp_shutdown,
2073 .setsockopt = tcp_setsockopt,
2074 .getsockopt = tcp_getsockopt,
2075 .recvmsg = tcp_recvmsg,
2076 .backlog_rcv = tcp_v6_do_rcv,
2077 .hash = tcp_v6_hash,
2078 .unhash = inet_unhash,
2079 .get_port = inet_csk_get_port,
2080 .enter_memory_pressure = tcp_enter_memory_pressure,
2081 .sockets_allocated = &tcp_sockets_allocated,
2082 .memory_allocated = &tcp_memory_allocated,
2083 .memory_pressure = &tcp_memory_pressure,
2084 .orphan_count = &tcp_orphan_count,
2085 .sysctl_mem = sysctl_tcp_mem,
2086 .sysctl_wmem = sysctl_tcp_wmem,
2087 .sysctl_rmem = sysctl_tcp_rmem,
2088 .max_header = MAX_TCP_HEADER,
2089 .obj_size = sizeof(struct tcp6_sock),
2090 .slab_flags = SLAB_DESTROY_BY_RCU,
2091 .twsk_prot = &tcp6_timewait_sock_ops,
2092 .rsk_prot = &tcp6_request_sock_ops,
2093 .h.hashinfo = &tcp_hashinfo,
2094 #ifdef CONFIG_COMPAT
2095 .compat_setsockopt = compat_tcp_setsockopt,
2096 .compat_getsockopt = compat_tcp_getsockopt,
2100 static const struct inet6_protocol tcpv6_protocol = {
2101 .handler = tcp_v6_rcv,
2102 .err_handler = tcp_v6_err,
2103 .gso_send_check = tcp_v6_gso_send_check,
2104 .gso_segment = tcp_tso_segment,
2105 .gro_receive = tcp6_gro_receive,
2106 .gro_complete = tcp6_gro_complete,
2107 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2110 static struct inet_protosw tcpv6_protosw = {
2111 .type = SOCK_STREAM,
2112 .protocol = IPPROTO_TCP,
2113 .prot = &tcpv6_prot,
2114 .ops = &inet6_stream_ops,
2116 .flags = INET_PROTOSW_PERMANENT |
2120 static int tcpv6_net_init(struct net *net)
2122 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2123 SOCK_RAW, IPPROTO_TCP, net);
2126 static void tcpv6_net_exit(struct net *net)
2128 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2129 inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET6);
2132 static struct pernet_operations tcpv6_net_ops = {
2133 .init = tcpv6_net_init,
2134 .exit = tcpv6_net_exit,
2137 int __init tcpv6_init(void)
2141 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2145 /* register inet6 protocol */
2146 ret = inet6_register_protosw(&tcpv6_protosw);
2148 goto out_tcpv6_protocol;
2150 ret = register_pernet_subsys(&tcpv6_net_ops);
2152 goto out_tcpv6_protosw;
2157 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2159 inet6_unregister_protosw(&tcpv6_protosw);
2163 void tcpv6_exit(void)
2165 unregister_pernet_subsys(&tcpv6_net_ops);
2166 inet6_unregister_protosw(&tcpv6_protosw);
2167 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);