]> Git Repo - linux.git/blob - net/ipv6/tcp_ipv6.c
RISC-V: replace cbom instructions with an insn-def
[linux.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <[email protected]>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 rcu_assign_pointer(sk->sk_rx_dst, dst);
111                 sk->sk_rx_dst_ifindex = skb->skb_iif;
112                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_connection_sock *icsk = inet_csk(sk);
150         struct in6_addr *saddr = NULL, *final_p, final;
151         struct inet_timewait_death_row *tcp_death_row;
152         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153         struct inet_sock *inet = inet_sk(sk);
154         struct tcp_sock *tp = tcp_sk(sk);
155         struct net *net = sock_net(sk);
156         struct ipv6_txoptions *opt;
157         struct dst_entry *dst;
158         struct flowi6 fl6;
159         int addr_type;
160         int err;
161
162         if (addr_len < SIN6_LEN_RFC2133)
163                 return -EINVAL;
164
165         if (usin->sin6_family != AF_INET6)
166                 return -EAFNOSUPPORT;
167
168         memset(&fl6, 0, sizeof(fl6));
169
170         if (np->sndflow) {
171                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
172                 IP6_ECN_flow_init(fl6.flowlabel);
173                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
174                         struct ip6_flowlabel *flowlabel;
175                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
176                         if (IS_ERR(flowlabel))
177                                 return -EINVAL;
178                         fl6_sock_release(flowlabel);
179                 }
180         }
181
182         /*
183          *      connect() to INADDR_ANY means loopback (BSD'ism).
184          */
185
186         if (ipv6_addr_any(&usin->sin6_addr)) {
187                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
188                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
189                                                &usin->sin6_addr);
190                 else
191                         usin->sin6_addr = in6addr_loopback;
192         }
193
194         addr_type = ipv6_addr_type(&usin->sin6_addr);
195
196         if (addr_type & IPV6_ADDR_MULTICAST)
197                 return -ENETUNREACH;
198
199         if (addr_type&IPV6_ADDR_LINKLOCAL) {
200                 if (addr_len >= sizeof(struct sockaddr_in6) &&
201                     usin->sin6_scope_id) {
202                         /* If interface is set while binding, indices
203                          * must coincide.
204                          */
205                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
206                                 return -EINVAL;
207
208                         sk->sk_bound_dev_if = usin->sin6_scope_id;
209                 }
210
211                 /* Connect to link-local address requires an interface */
212                 if (!sk->sk_bound_dev_if)
213                         return -EINVAL;
214         }
215
216         if (tp->rx_opt.ts_recent_stamp &&
217             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
218                 tp->rx_opt.ts_recent = 0;
219                 tp->rx_opt.ts_recent_stamp = 0;
220                 WRITE_ONCE(tp->write_seq, 0);
221         }
222
223         sk->sk_v6_daddr = usin->sin6_addr;
224         np->flow_label = fl6.flowlabel;
225
226         /*
227          *      TCP over IPv4
228          */
229
230         if (addr_type & IPV6_ADDR_MAPPED) {
231                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
232                 struct sockaddr_in sin;
233
234                 if (ipv6_only_sock(sk))
235                         return -ENETUNREACH;
236
237                 sin.sin_family = AF_INET;
238                 sin.sin_port = usin->sin6_port;
239                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
240
241                 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
242                 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
243                 if (sk_is_mptcp(sk))
244                         mptcpv6_handle_mapped(sk, true);
245                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
246 #ifdef CONFIG_TCP_MD5SIG
247                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
248 #endif
249
250                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
251
252                 if (err) {
253                         icsk->icsk_ext_hdr_len = exthdrlen;
254                         /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
255                         WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
256                         if (sk_is_mptcp(sk))
257                                 mptcpv6_handle_mapped(sk, false);
258                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
259 #ifdef CONFIG_TCP_MD5SIG
260                         tp->af_specific = &tcp_sock_ipv6_specific;
261 #endif
262                         goto failure;
263                 }
264                 np->saddr = sk->sk_v6_rcv_saddr;
265
266                 return err;
267         }
268
269         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
270                 saddr = &sk->sk_v6_rcv_saddr;
271
272         fl6.flowi6_proto = IPPROTO_TCP;
273         fl6.daddr = sk->sk_v6_daddr;
274         fl6.saddr = saddr ? *saddr : np->saddr;
275         fl6.flowi6_oif = sk->sk_bound_dev_if;
276         fl6.flowi6_mark = sk->sk_mark;
277         fl6.fl6_dport = usin->sin6_port;
278         fl6.fl6_sport = inet->inet_sport;
279         fl6.flowi6_uid = sk->sk_uid;
280
281         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
282         final_p = fl6_update_dst(&fl6, opt, &final);
283
284         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
285
286         dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
287         if (IS_ERR(dst)) {
288                 err = PTR_ERR(dst);
289                 goto failure;
290         }
291
292         tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
293
294         if (!saddr) {
295                 saddr = &fl6.saddr;
296
297                 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
298                 if (err)
299                         goto failure;
300         }
301
302         /* set the source address */
303         np->saddr = *saddr;
304         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
305
306         sk->sk_gso_type = SKB_GSO_TCPV6;
307         ip6_dst_store(sk, dst, NULL, NULL);
308
309         icsk->icsk_ext_hdr_len = 0;
310         if (opt)
311                 icsk->icsk_ext_hdr_len = opt->opt_flen +
312                                          opt->opt_nflen;
313
314         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
315
316         inet->inet_dport = usin->sin6_port;
317
318         tcp_set_state(sk, TCP_SYN_SENT);
319         err = inet6_hash_connect(tcp_death_row, sk);
320         if (err)
321                 goto late_failure;
322
323         sk_set_txhash(sk);
324
325         if (likely(!tp->repair)) {
326                 if (!tp->write_seq)
327                         WRITE_ONCE(tp->write_seq,
328                                    secure_tcpv6_seq(np->saddr.s6_addr32,
329                                                     sk->sk_v6_daddr.s6_addr32,
330                                                     inet->inet_sport,
331                                                     inet->inet_dport));
332                 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
333                                                    sk->sk_v6_daddr.s6_addr32);
334         }
335
336         if (tcp_fastopen_defer_connect(sk, &err))
337                 return err;
338         if (err)
339                 goto late_failure;
340
341         err = tcp_connect(sk);
342         if (err)
343                 goto late_failure;
344
345         return 0;
346
347 late_failure:
348         tcp_set_state(sk, TCP_CLOSE);
349         inet_bhash2_reset_saddr(sk);
350 failure:
351         inet->inet_dport = 0;
352         sk->sk_route_caps = 0;
353         return err;
354 }
355
356 static void tcp_v6_mtu_reduced(struct sock *sk)
357 {
358         struct dst_entry *dst;
359         u32 mtu;
360
361         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
362                 return;
363
364         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
365
366         /* Drop requests trying to increase our current mss.
367          * Check done in __ip6_rt_update_pmtu() is too late.
368          */
369         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
370                 return;
371
372         dst = inet6_csk_update_pmtu(sk, mtu);
373         if (!dst)
374                 return;
375
376         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
377                 tcp_sync_mss(sk, dst_mtu(dst));
378                 tcp_simple_retransmit(sk);
379         }
380 }
381
382 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
383                 u8 type, u8 code, int offset, __be32 info)
384 {
385         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
386         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
387         struct net *net = dev_net(skb->dev);
388         struct request_sock *fastopen;
389         struct ipv6_pinfo *np;
390         struct tcp_sock *tp;
391         __u32 seq, snd_una;
392         struct sock *sk;
393         bool fatal;
394         int err;
395
396         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
397                                         &hdr->daddr, th->dest,
398                                         &hdr->saddr, ntohs(th->source),
399                                         skb->dev->ifindex, inet6_sdif(skb));
400
401         if (!sk) {
402                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
403                                   ICMP6_MIB_INERRORS);
404                 return -ENOENT;
405         }
406
407         if (sk->sk_state == TCP_TIME_WAIT) {
408                 inet_twsk_put(inet_twsk(sk));
409                 return 0;
410         }
411         seq = ntohl(th->seq);
412         fatal = icmpv6_err_convert(type, code, &err);
413         if (sk->sk_state == TCP_NEW_SYN_RECV) {
414                 tcp_req_err(sk, seq, fatal);
415                 return 0;
416         }
417
418         bh_lock_sock(sk);
419         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
420                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
421
422         if (sk->sk_state == TCP_CLOSE)
423                 goto out;
424
425         if (static_branch_unlikely(&ip6_min_hopcount)) {
426                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
427                 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
428                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
429                         goto out;
430                 }
431         }
432
433         tp = tcp_sk(sk);
434         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
435         fastopen = rcu_dereference(tp->fastopen_rsk);
436         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
437         if (sk->sk_state != TCP_LISTEN &&
438             !between(seq, snd_una, tp->snd_nxt)) {
439                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
440                 goto out;
441         }
442
443         np = tcp_inet6_sk(sk);
444
445         if (type == NDISC_REDIRECT) {
446                 if (!sock_owned_by_user(sk)) {
447                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
448
449                         if (dst)
450                                 dst->ops->redirect(dst, sk, skb);
451                 }
452                 goto out;
453         }
454
455         if (type == ICMPV6_PKT_TOOBIG) {
456                 u32 mtu = ntohl(info);
457
458                 /* We are not interested in TCP_LISTEN and open_requests
459                  * (SYN-ACKs send out by Linux are always <576bytes so
460                  * they should go through unfragmented).
461                  */
462                 if (sk->sk_state == TCP_LISTEN)
463                         goto out;
464
465                 if (!ip6_sk_accept_pmtu(sk))
466                         goto out;
467
468                 if (mtu < IPV6_MIN_MTU)
469                         goto out;
470
471                 WRITE_ONCE(tp->mtu_info, mtu);
472
473                 if (!sock_owned_by_user(sk))
474                         tcp_v6_mtu_reduced(sk);
475                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
476                                            &sk->sk_tsq_flags))
477                         sock_hold(sk);
478                 goto out;
479         }
480
481
482         /* Might be for an request_sock */
483         switch (sk->sk_state) {
484         case TCP_SYN_SENT:
485         case TCP_SYN_RECV:
486                 /* Only in fast or simultaneous open. If a fast open socket is
487                  * already accepted it is treated as a connected one below.
488                  */
489                 if (fastopen && !fastopen->sk)
490                         break;
491
492                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
493
494                 if (!sock_owned_by_user(sk)) {
495                         sk->sk_err = err;
496                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
497
498                         tcp_done(sk);
499                 } else
500                         sk->sk_err_soft = err;
501                 goto out;
502         case TCP_LISTEN:
503                 break;
504         default:
505                 /* check if this ICMP message allows revert of backoff.
506                  * (see RFC 6069)
507                  */
508                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
509                     code == ICMPV6_NOROUTE)
510                         tcp_ld_RTO_revert(sk, seq);
511         }
512
513         if (!sock_owned_by_user(sk) && np->recverr) {
514                 sk->sk_err = err;
515                 sk_error_report(sk);
516         } else
517                 sk->sk_err_soft = err;
518
519 out:
520         bh_unlock_sock(sk);
521         sock_put(sk);
522         return 0;
523 }
524
525
526 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
527                               struct flowi *fl,
528                               struct request_sock *req,
529                               struct tcp_fastopen_cookie *foc,
530                               enum tcp_synack_type synack_type,
531                               struct sk_buff *syn_skb)
532 {
533         struct inet_request_sock *ireq = inet_rsk(req);
534         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
535         struct ipv6_txoptions *opt;
536         struct flowi6 *fl6 = &fl->u.ip6;
537         struct sk_buff *skb;
538         int err = -ENOMEM;
539         u8 tclass;
540
541         /* First, grab a route. */
542         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
543                                                IPPROTO_TCP)) == NULL)
544                 goto done;
545
546         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
547
548         if (skb) {
549                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
550                                     &ireq->ir_v6_rmt_addr);
551
552                 fl6->daddr = ireq->ir_v6_rmt_addr;
553                 if (np->repflow && ireq->pktopts)
554                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
555
556                 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
557                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
558                                 (np->tclass & INET_ECN_MASK) :
559                                 np->tclass;
560
561                 if (!INET_ECN_is_capable(tclass) &&
562                     tcp_bpf_ca_needs_ecn((struct sock *)req))
563                         tclass |= INET_ECN_ECT_0;
564
565                 rcu_read_lock();
566                 opt = ireq->ipv6_opt;
567                 if (!opt)
568                         opt = rcu_dereference(np->opt);
569                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
570                                tclass, sk->sk_priority);
571                 rcu_read_unlock();
572                 err = net_xmit_eval(err);
573         }
574
575 done:
576         return err;
577 }
578
579
580 static void tcp_v6_reqsk_destructor(struct request_sock *req)
581 {
582         kfree(inet_rsk(req)->ipv6_opt);
583         consume_skb(inet_rsk(req)->pktopts);
584 }
585
586 #ifdef CONFIG_TCP_MD5SIG
587 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
588                                                    const struct in6_addr *addr,
589                                                    int l3index)
590 {
591         return tcp_md5_do_lookup(sk, l3index,
592                                  (union tcp_md5_addr *)addr, AF_INET6);
593 }
594
595 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
596                                                 const struct sock *addr_sk)
597 {
598         int l3index;
599
600         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
601                                                  addr_sk->sk_bound_dev_if);
602         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
603                                     l3index);
604 }
605
606 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
607                                  sockptr_t optval, int optlen)
608 {
609         struct tcp_md5sig cmd;
610         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
611         int l3index = 0;
612         u8 prefixlen;
613         u8 flags;
614
615         if (optlen < sizeof(cmd))
616                 return -EINVAL;
617
618         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
619                 return -EFAULT;
620
621         if (sin6->sin6_family != AF_INET6)
622                 return -EINVAL;
623
624         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
625
626         if (optname == TCP_MD5SIG_EXT &&
627             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
628                 prefixlen = cmd.tcpm_prefixlen;
629                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
630                                         prefixlen > 32))
631                         return -EINVAL;
632         } else {
633                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
634         }
635
636         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
637             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
638                 struct net_device *dev;
639
640                 rcu_read_lock();
641                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
642                 if (dev && netif_is_l3_master(dev))
643                         l3index = dev->ifindex;
644                 rcu_read_unlock();
645
646                 /* ok to reference set/not set outside of rcu;
647                  * right now device MUST be an L3 master
648                  */
649                 if (!dev || !l3index)
650                         return -EINVAL;
651         }
652
653         if (!cmd.tcpm_keylen) {
654                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
655                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
656                                               AF_INET, prefixlen,
657                                               l3index, flags);
658                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
659                                       AF_INET6, prefixlen, l3index, flags);
660         }
661
662         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
663                 return -EINVAL;
664
665         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
666                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
667                                       AF_INET, prefixlen, l3index, flags,
668                                       cmd.tcpm_key, cmd.tcpm_keylen);
669
670         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
671                               AF_INET6, prefixlen, l3index, flags,
672                               cmd.tcpm_key, cmd.tcpm_keylen);
673 }
674
675 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
676                                    const struct in6_addr *daddr,
677                                    const struct in6_addr *saddr,
678                                    const struct tcphdr *th, int nbytes)
679 {
680         struct tcp6_pseudohdr *bp;
681         struct scatterlist sg;
682         struct tcphdr *_th;
683
684         bp = hp->scratch;
685         /* 1. TCP pseudo-header (RFC2460) */
686         bp->saddr = *saddr;
687         bp->daddr = *daddr;
688         bp->protocol = cpu_to_be32(IPPROTO_TCP);
689         bp->len = cpu_to_be32(nbytes);
690
691         _th = (struct tcphdr *)(bp + 1);
692         memcpy(_th, th, sizeof(*th));
693         _th->check = 0;
694
695         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
696         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
697                                 sizeof(*bp) + sizeof(*th));
698         return crypto_ahash_update(hp->md5_req);
699 }
700
701 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
702                                const struct in6_addr *daddr, struct in6_addr *saddr,
703                                const struct tcphdr *th)
704 {
705         struct tcp_md5sig_pool *hp;
706         struct ahash_request *req;
707
708         hp = tcp_get_md5sig_pool();
709         if (!hp)
710                 goto clear_hash_noput;
711         req = hp->md5_req;
712
713         if (crypto_ahash_init(req))
714                 goto clear_hash;
715         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
716                 goto clear_hash;
717         if (tcp_md5_hash_key(hp, key))
718                 goto clear_hash;
719         ahash_request_set_crypt(req, NULL, md5_hash, 0);
720         if (crypto_ahash_final(req))
721                 goto clear_hash;
722
723         tcp_put_md5sig_pool();
724         return 0;
725
726 clear_hash:
727         tcp_put_md5sig_pool();
728 clear_hash_noput:
729         memset(md5_hash, 0, 16);
730         return 1;
731 }
732
733 static int tcp_v6_md5_hash_skb(char *md5_hash,
734                                const struct tcp_md5sig_key *key,
735                                const struct sock *sk,
736                                const struct sk_buff *skb)
737 {
738         const struct in6_addr *saddr, *daddr;
739         struct tcp_md5sig_pool *hp;
740         struct ahash_request *req;
741         const struct tcphdr *th = tcp_hdr(skb);
742
743         if (sk) { /* valid for establish/request sockets */
744                 saddr = &sk->sk_v6_rcv_saddr;
745                 daddr = &sk->sk_v6_daddr;
746         } else {
747                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
748                 saddr = &ip6h->saddr;
749                 daddr = &ip6h->daddr;
750         }
751
752         hp = tcp_get_md5sig_pool();
753         if (!hp)
754                 goto clear_hash_noput;
755         req = hp->md5_req;
756
757         if (crypto_ahash_init(req))
758                 goto clear_hash;
759
760         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
761                 goto clear_hash;
762         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
763                 goto clear_hash;
764         if (tcp_md5_hash_key(hp, key))
765                 goto clear_hash;
766         ahash_request_set_crypt(req, NULL, md5_hash, 0);
767         if (crypto_ahash_final(req))
768                 goto clear_hash;
769
770         tcp_put_md5sig_pool();
771         return 0;
772
773 clear_hash:
774         tcp_put_md5sig_pool();
775 clear_hash_noput:
776         memset(md5_hash, 0, 16);
777         return 1;
778 }
779
780 #endif
781
782 static void tcp_v6_init_req(struct request_sock *req,
783                             const struct sock *sk_listener,
784                             struct sk_buff *skb)
785 {
786         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
787         struct inet_request_sock *ireq = inet_rsk(req);
788         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
789
790         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
791         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
792
793         /* So that link locals have meaning */
794         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
795             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
796                 ireq->ir_iif = tcp_v6_iif(skb);
797
798         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
799             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
800              np->rxopt.bits.rxinfo ||
801              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
802              np->rxopt.bits.rxohlim || np->repflow)) {
803                 refcount_inc(&skb->users);
804                 ireq->pktopts = skb;
805         }
806 }
807
808 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
809                                           struct sk_buff *skb,
810                                           struct flowi *fl,
811                                           struct request_sock *req)
812 {
813         tcp_v6_init_req(req, sk, skb);
814
815         if (security_inet_conn_request(sk, skb, req))
816                 return NULL;
817
818         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
819 }
820
821 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
822         .family         =       AF_INET6,
823         .obj_size       =       sizeof(struct tcp6_request_sock),
824         .rtx_syn_ack    =       tcp_rtx_synack,
825         .send_ack       =       tcp_v6_reqsk_send_ack,
826         .destructor     =       tcp_v6_reqsk_destructor,
827         .send_reset     =       tcp_v6_send_reset,
828         .syn_ack_timeout =      tcp_syn_ack_timeout,
829 };
830
831 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
832         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
833                                 sizeof(struct ipv6hdr),
834 #ifdef CONFIG_TCP_MD5SIG
835         .req_md5_lookup =       tcp_v6_md5_lookup,
836         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
837 #endif
838 #ifdef CONFIG_SYN_COOKIES
839         .cookie_init_seq =      cookie_v6_init_sequence,
840 #endif
841         .route_req      =       tcp_v6_route_req,
842         .init_seq       =       tcp_v6_init_seq,
843         .init_ts_off    =       tcp_v6_init_ts_off,
844         .send_synack    =       tcp_v6_send_synack,
845 };
846
847 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
848                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
849                                  int oif, struct tcp_md5sig_key *key, int rst,
850                                  u8 tclass, __be32 label, u32 priority, u32 txhash)
851 {
852         const struct tcphdr *th = tcp_hdr(skb);
853         struct tcphdr *t1;
854         struct sk_buff *buff;
855         struct flowi6 fl6;
856         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
857         struct sock *ctl_sk = net->ipv6.tcp_sk;
858         unsigned int tot_len = sizeof(struct tcphdr);
859         __be32 mrst = 0, *topt;
860         struct dst_entry *dst;
861         __u32 mark = 0;
862
863         if (tsecr)
864                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
865 #ifdef CONFIG_TCP_MD5SIG
866         if (key)
867                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
868 #endif
869
870 #ifdef CONFIG_MPTCP
871         if (rst && !key) {
872                 mrst = mptcp_reset_option(skb);
873
874                 if (mrst)
875                         tot_len += sizeof(__be32);
876         }
877 #endif
878
879         buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
880         if (!buff)
881                 return;
882
883         skb_reserve(buff, MAX_TCP_HEADER);
884
885         t1 = skb_push(buff, tot_len);
886         skb_reset_transport_header(buff);
887
888         /* Swap the send and the receive. */
889         memset(t1, 0, sizeof(*t1));
890         t1->dest = th->source;
891         t1->source = th->dest;
892         t1->doff = tot_len / 4;
893         t1->seq = htonl(seq);
894         t1->ack_seq = htonl(ack);
895         t1->ack = !rst || !th->ack;
896         t1->rst = rst;
897         t1->window = htons(win);
898
899         topt = (__be32 *)(t1 + 1);
900
901         if (tsecr) {
902                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
903                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
904                 *topt++ = htonl(tsval);
905                 *topt++ = htonl(tsecr);
906         }
907
908         if (mrst)
909                 *topt++ = mrst;
910
911 #ifdef CONFIG_TCP_MD5SIG
912         if (key) {
913                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
914                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
915                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
916                                     &ipv6_hdr(skb)->saddr,
917                                     &ipv6_hdr(skb)->daddr, t1);
918         }
919 #endif
920
921         memset(&fl6, 0, sizeof(fl6));
922         fl6.daddr = ipv6_hdr(skb)->saddr;
923         fl6.saddr = ipv6_hdr(skb)->daddr;
924         fl6.flowlabel = label;
925
926         buff->ip_summed = CHECKSUM_PARTIAL;
927
928         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
929
930         fl6.flowi6_proto = IPPROTO_TCP;
931         if (rt6_need_strict(&fl6.daddr) && !oif)
932                 fl6.flowi6_oif = tcp_v6_iif(skb);
933         else {
934                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
935                         oif = skb->skb_iif;
936
937                 fl6.flowi6_oif = oif;
938         }
939
940         if (sk) {
941                 if (sk->sk_state == TCP_TIME_WAIT)
942                         mark = inet_twsk(sk)->tw_mark;
943                 else
944                         mark = sk->sk_mark;
945                 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
946         }
947         if (txhash) {
948                 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
949                 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
950         }
951         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
952         fl6.fl6_dport = t1->dest;
953         fl6.fl6_sport = t1->source;
954         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
955         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
956
957         /* Pass a socket to ip6_dst_lookup either it is for RST
958          * Underlying function will use this to retrieve the network
959          * namespace
960          */
961         if (sk && sk->sk_state != TCP_TIME_WAIT)
962                 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
963         else
964                 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
965         if (!IS_ERR(dst)) {
966                 skb_dst_set(buff, dst);
967                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
968                          tclass & ~INET_ECN_MASK, priority);
969                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
970                 if (rst)
971                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
972                 return;
973         }
974
975         kfree_skb(buff);
976 }
977
978 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
979 {
980         const struct tcphdr *th = tcp_hdr(skb);
981         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
982         u32 seq = 0, ack_seq = 0;
983         struct tcp_md5sig_key *key = NULL;
984 #ifdef CONFIG_TCP_MD5SIG
985         const __u8 *hash_location = NULL;
986         unsigned char newhash[16];
987         int genhash;
988         struct sock *sk1 = NULL;
989 #endif
990         __be32 label = 0;
991         u32 priority = 0;
992         struct net *net;
993         u32 txhash = 0;
994         int oif = 0;
995
996         if (th->rst)
997                 return;
998
999         /* If sk not NULL, it means we did a successful lookup and incoming
1000          * route had to be correct. prequeue might have dropped our dst.
1001          */
1002         if (!sk && !ipv6_unicast_destination(skb))
1003                 return;
1004
1005         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1006 #ifdef CONFIG_TCP_MD5SIG
1007         rcu_read_lock();
1008         hash_location = tcp_parse_md5sig_option(th);
1009         if (sk && sk_fullsock(sk)) {
1010                 int l3index;
1011
1012                 /* sdif set, means packet ingressed via a device
1013                  * in an L3 domain and inet_iif is set to it.
1014                  */
1015                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1016                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1017         } else if (hash_location) {
1018                 int dif = tcp_v6_iif_l3_slave(skb);
1019                 int sdif = tcp_v6_sdif(skb);
1020                 int l3index;
1021
1022                 /*
1023                  * active side is lost. Try to find listening socket through
1024                  * source port, and then find md5 key through listening socket.
1025                  * we are not loose security here:
1026                  * Incoming packet is checked with md5 hash with finding key,
1027                  * no RST generated if md5 hash doesn't match.
1028                  */
1029                 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1030                                             NULL, 0, &ipv6h->saddr, th->source,
1031                                             &ipv6h->daddr, ntohs(th->source),
1032                                             dif, sdif);
1033                 if (!sk1)
1034                         goto out;
1035
1036                 /* sdif set, means packet ingressed via a device
1037                  * in an L3 domain and dif is set to it.
1038                  */
1039                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1040
1041                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1042                 if (!key)
1043                         goto out;
1044
1045                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1046                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1047                         goto out;
1048         }
1049 #endif
1050
1051         if (th->ack)
1052                 seq = ntohl(th->ack_seq);
1053         else
1054                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1055                           (th->doff << 2);
1056
1057         if (sk) {
1058                 oif = sk->sk_bound_dev_if;
1059                 if (sk_fullsock(sk)) {
1060                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1061
1062                         trace_tcp_send_reset(sk, skb);
1063                         if (np->repflow)
1064                                 label = ip6_flowlabel(ipv6h);
1065                         priority = sk->sk_priority;
1066                         txhash = sk->sk_hash;
1067                 }
1068                 if (sk->sk_state == TCP_TIME_WAIT) {
1069                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1070                         priority = inet_twsk(sk)->tw_priority;
1071                         txhash = inet_twsk(sk)->tw_txhash;
1072                 }
1073         } else {
1074                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1075                         label = ip6_flowlabel(ipv6h);
1076         }
1077
1078         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1079                              ipv6_get_dsfield(ipv6h), label, priority, txhash);
1080
1081 #ifdef CONFIG_TCP_MD5SIG
1082 out:
1083         rcu_read_unlock();
1084 #endif
1085 }
1086
1087 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1088                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1089                             struct tcp_md5sig_key *key, u8 tclass,
1090                             __be32 label, u32 priority, u32 txhash)
1091 {
1092         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1093                              tclass, label, priority, txhash);
1094 }
1095
1096 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1097 {
1098         struct inet_timewait_sock *tw = inet_twsk(sk);
1099         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1100
1101         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1102                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1103                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1104                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1105                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1106                         tw->tw_txhash);
1107
1108         inet_twsk_put(tw);
1109 }
1110
1111 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1112                                   struct request_sock *req)
1113 {
1114         int l3index;
1115
1116         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1117
1118         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1119          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1120          */
1121         /* RFC 7323 2.3
1122          * The window field (SEG.WND) of every outgoing segment, with the
1123          * exception of <SYN> segments, MUST be right-shifted by
1124          * Rcv.Wind.Shift bits:
1125          */
1126         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1127                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1128                         tcp_rsk(req)->rcv_nxt,
1129                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1130                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1131                         req->ts_recent, sk->sk_bound_dev_if,
1132                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1133                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
1134                         tcp_rsk(req)->txhash);
1135 }
1136
1137
1138 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1139 {
1140 #ifdef CONFIG_SYN_COOKIES
1141         const struct tcphdr *th = tcp_hdr(skb);
1142
1143         if (!th->syn)
1144                 sk = cookie_v6_check(sk, skb);
1145 #endif
1146         return sk;
1147 }
1148
1149 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1150                          struct tcphdr *th, u32 *cookie)
1151 {
1152         u16 mss = 0;
1153 #ifdef CONFIG_SYN_COOKIES
1154         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1155                                     &tcp_request_sock_ipv6_ops, sk, th);
1156         if (mss) {
1157                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1158                 tcp_synq_overflow(sk);
1159         }
1160 #endif
1161         return mss;
1162 }
1163
1164 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1165 {
1166         if (skb->protocol == htons(ETH_P_IP))
1167                 return tcp_v4_conn_request(sk, skb);
1168
1169         if (!ipv6_unicast_destination(skb))
1170                 goto drop;
1171
1172         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1173                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1174                 return 0;
1175         }
1176
1177         return tcp_conn_request(&tcp6_request_sock_ops,
1178                                 &tcp_request_sock_ipv6_ops, sk, skb);
1179
1180 drop:
1181         tcp_listendrop(sk);
1182         return 0; /* don't send reset */
1183 }
1184
1185 static void tcp_v6_restore_cb(struct sk_buff *skb)
1186 {
1187         /* We need to move header back to the beginning if xfrm6_policy_check()
1188          * and tcp_v6_fill_cb() are going to be called again.
1189          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1190          */
1191         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1192                 sizeof(struct inet6_skb_parm));
1193 }
1194
1195 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1196                                          struct request_sock *req,
1197                                          struct dst_entry *dst,
1198                                          struct request_sock *req_unhash,
1199                                          bool *own_req)
1200 {
1201         struct inet_request_sock *ireq;
1202         struct ipv6_pinfo *newnp;
1203         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1204         struct ipv6_txoptions *opt;
1205         struct inet_sock *newinet;
1206         bool found_dup_sk = false;
1207         struct tcp_sock *newtp;
1208         struct sock *newsk;
1209 #ifdef CONFIG_TCP_MD5SIG
1210         struct tcp_md5sig_key *key;
1211         int l3index;
1212 #endif
1213         struct flowi6 fl6;
1214
1215         if (skb->protocol == htons(ETH_P_IP)) {
1216                 /*
1217                  *      v6 mapped
1218                  */
1219
1220                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1221                                              req_unhash, own_req);
1222
1223                 if (!newsk)
1224                         return NULL;
1225
1226                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1227
1228                 newnp = tcp_inet6_sk(newsk);
1229                 newtp = tcp_sk(newsk);
1230
1231                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1232
1233                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1234
1235                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1236                 if (sk_is_mptcp(newsk))
1237                         mptcpv6_handle_mapped(newsk, true);
1238                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1239 #ifdef CONFIG_TCP_MD5SIG
1240                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1241 #endif
1242
1243                 newnp->ipv6_mc_list = NULL;
1244                 newnp->ipv6_ac_list = NULL;
1245                 newnp->ipv6_fl_list = NULL;
1246                 newnp->pktoptions  = NULL;
1247                 newnp->opt         = NULL;
1248                 newnp->mcast_oif   = inet_iif(skb);
1249                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1250                 newnp->rcv_flowinfo = 0;
1251                 if (np->repflow)
1252                         newnp->flow_label = 0;
1253
1254                 /*
1255                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1256                  * here, tcp_create_openreq_child now does this for us, see the comment in
1257                  * that function for the gory details. -acme
1258                  */
1259
1260                 /* It is tricky place. Until this moment IPv4 tcp
1261                    worked with IPv6 icsk.icsk_af_ops.
1262                    Sync it now.
1263                  */
1264                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1265
1266                 return newsk;
1267         }
1268
1269         ireq = inet_rsk(req);
1270
1271         if (sk_acceptq_is_full(sk))
1272                 goto out_overflow;
1273
1274         if (!dst) {
1275                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1276                 if (!dst)
1277                         goto out;
1278         }
1279
1280         newsk = tcp_create_openreq_child(sk, req, skb);
1281         if (!newsk)
1282                 goto out_nonewsk;
1283
1284         /*
1285          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1286          * count here, tcp_create_openreq_child now does this for us, see the
1287          * comment in that function for the gory details. -acme
1288          */
1289
1290         newsk->sk_gso_type = SKB_GSO_TCPV6;
1291         ip6_dst_store(newsk, dst, NULL, NULL);
1292         inet6_sk_rx_dst_set(newsk, skb);
1293
1294         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1295
1296         newtp = tcp_sk(newsk);
1297         newinet = inet_sk(newsk);
1298         newnp = tcp_inet6_sk(newsk);
1299
1300         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1301
1302         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1303         newnp->saddr = ireq->ir_v6_loc_addr;
1304         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1305         newsk->sk_bound_dev_if = ireq->ir_iif;
1306
1307         /* Now IPv6 options...
1308
1309            First: no IPv4 options.
1310          */
1311         newinet->inet_opt = NULL;
1312         newnp->ipv6_mc_list = NULL;
1313         newnp->ipv6_ac_list = NULL;
1314         newnp->ipv6_fl_list = NULL;
1315
1316         /* Clone RX bits */
1317         newnp->rxopt.all = np->rxopt.all;
1318
1319         newnp->pktoptions = NULL;
1320         newnp->opt        = NULL;
1321         newnp->mcast_oif  = tcp_v6_iif(skb);
1322         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1323         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1324         if (np->repflow)
1325                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1326
1327         /* Set ToS of the new socket based upon the value of incoming SYN.
1328          * ECT bits are set later in tcp_init_transfer().
1329          */
1330         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1331                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1332
1333         /* Clone native IPv6 options from listening socket (if any)
1334
1335            Yes, keeping reference count would be much more clever,
1336            but we make one more one thing there: reattach optmem
1337            to newsk.
1338          */
1339         opt = ireq->ipv6_opt;
1340         if (!opt)
1341                 opt = rcu_dereference(np->opt);
1342         if (opt) {
1343                 opt = ipv6_dup_options(newsk, opt);
1344                 RCU_INIT_POINTER(newnp->opt, opt);
1345         }
1346         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1347         if (opt)
1348                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1349                                                     opt->opt_flen;
1350
1351         tcp_ca_openreq_child(newsk, dst);
1352
1353         tcp_sync_mss(newsk, dst_mtu(dst));
1354         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1355
1356         tcp_initialize_rcv_mss(newsk);
1357
1358         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1359         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1360
1361 #ifdef CONFIG_TCP_MD5SIG
1362         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1363
1364         /* Copy over the MD5 key from the original socket */
1365         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1366         if (key) {
1367                 const union tcp_md5_addr *addr;
1368
1369                 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1370                 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
1371                         inet_csk_prepare_forced_close(newsk);
1372                         tcp_done(newsk);
1373                         goto out;
1374                 }
1375         }
1376 #endif
1377
1378         if (__inet_inherit_port(sk, newsk) < 0) {
1379                 inet_csk_prepare_forced_close(newsk);
1380                 tcp_done(newsk);
1381                 goto out;
1382         }
1383         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1384                                        &found_dup_sk);
1385         if (*own_req) {
1386                 tcp_move_syn(newtp, req);
1387
1388                 /* Clone pktoptions received with SYN, if we own the req */
1389                 if (ireq->pktopts) {
1390                         newnp->pktoptions = skb_clone(ireq->pktopts,
1391                                                       sk_gfp_mask(sk, GFP_ATOMIC));
1392                         consume_skb(ireq->pktopts);
1393                         ireq->pktopts = NULL;
1394                         if (newnp->pktoptions) {
1395                                 tcp_v6_restore_cb(newnp->pktoptions);
1396                                 skb_set_owner_r(newnp->pktoptions, newsk);
1397                         }
1398                 }
1399         } else {
1400                 if (!req_unhash && found_dup_sk) {
1401                         /* This code path should only be executed in the
1402                          * syncookie case only
1403                          */
1404                         bh_unlock_sock(newsk);
1405                         sock_put(newsk);
1406                         newsk = NULL;
1407                 }
1408         }
1409
1410         return newsk;
1411
1412 out_overflow:
1413         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1414 out_nonewsk:
1415         dst_release(dst);
1416 out:
1417         tcp_listendrop(sk);
1418         return NULL;
1419 }
1420
1421 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1422                                                            u32));
1423 /* The socket must have it's spinlock held when we get
1424  * here, unless it is a TCP_LISTEN socket.
1425  *
1426  * We have a potential double-lock case here, so even when
1427  * doing backlog processing we use the BH locking scheme.
1428  * This is because we cannot sleep with the original spinlock
1429  * held.
1430  */
1431 INDIRECT_CALLABLE_SCOPE
1432 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1433 {
1434         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1435         struct sk_buff *opt_skb = NULL;
1436         enum skb_drop_reason reason;
1437         struct tcp_sock *tp;
1438
1439         /* Imagine: socket is IPv6. IPv4 packet arrives,
1440            goes to IPv4 receive handler and backlogged.
1441            From backlog it always goes here. Kerboom...
1442            Fortunately, tcp_rcv_established and rcv_established
1443            handle them correctly, but it is not case with
1444            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1445          */
1446
1447         if (skb->protocol == htons(ETH_P_IP))
1448                 return tcp_v4_do_rcv(sk, skb);
1449
1450         /*
1451          *      socket locking is here for SMP purposes as backlog rcv
1452          *      is currently called with bh processing disabled.
1453          */
1454
1455         /* Do Stevens' IPV6_PKTOPTIONS.
1456
1457            Yes, guys, it is the only place in our code, where we
1458            may make it not affecting IPv4.
1459            The rest of code is protocol independent,
1460            and I do not like idea to uglify IPv4.
1461
1462            Actually, all the idea behind IPV6_PKTOPTIONS
1463            looks not very well thought. For now we latch
1464            options, received in the last packet, enqueued
1465            by tcp. Feel free to propose better solution.
1466                                                --ANK (980728)
1467          */
1468         if (np->rxopt.all)
1469                 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1470
1471         reason = SKB_DROP_REASON_NOT_SPECIFIED;
1472         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1473                 struct dst_entry *dst;
1474
1475                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1476                                                 lockdep_sock_is_held(sk));
1477
1478                 sock_rps_save_rxhash(sk, skb);
1479                 sk_mark_napi_id(sk, skb);
1480                 if (dst) {
1481                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1482                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1483                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1484                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1485                                 dst_release(dst);
1486                         }
1487                 }
1488
1489                 tcp_rcv_established(sk, skb);
1490                 if (opt_skb)
1491                         goto ipv6_pktoptions;
1492                 return 0;
1493         }
1494
1495         if (tcp_checksum_complete(skb))
1496                 goto csum_err;
1497
1498         if (sk->sk_state == TCP_LISTEN) {
1499                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1500
1501                 if (!nsk)
1502                         goto discard;
1503
1504                 if (nsk != sk) {
1505                         if (tcp_child_process(sk, nsk, skb))
1506                                 goto reset;
1507                         if (opt_skb)
1508                                 __kfree_skb(opt_skb);
1509                         return 0;
1510                 }
1511         } else
1512                 sock_rps_save_rxhash(sk, skb);
1513
1514         if (tcp_rcv_state_process(sk, skb))
1515                 goto reset;
1516         if (opt_skb)
1517                 goto ipv6_pktoptions;
1518         return 0;
1519
1520 reset:
1521         tcp_v6_send_reset(sk, skb);
1522 discard:
1523         if (opt_skb)
1524                 __kfree_skb(opt_skb);
1525         kfree_skb_reason(skb, reason);
1526         return 0;
1527 csum_err:
1528         reason = SKB_DROP_REASON_TCP_CSUM;
1529         trace_tcp_bad_csum(skb);
1530         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1531         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1532         goto discard;
1533
1534
1535 ipv6_pktoptions:
1536         /* Do you ask, what is it?
1537
1538            1. skb was enqueued by tcp.
1539            2. skb is added to tail of read queue, rather than out of order.
1540            3. socket is not in passive state.
1541            4. Finally, it really contains options, which user wants to receive.
1542          */
1543         tp = tcp_sk(sk);
1544         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1545             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1546                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1547                         np->mcast_oif = tcp_v6_iif(opt_skb);
1548                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1549                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1550                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1551                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1552                 if (np->repflow)
1553                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1554                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1555                         skb_set_owner_r(opt_skb, sk);
1556                         tcp_v6_restore_cb(opt_skb);
1557                         opt_skb = xchg(&np->pktoptions, opt_skb);
1558                 } else {
1559                         __kfree_skb(opt_skb);
1560                         opt_skb = xchg(&np->pktoptions, NULL);
1561                 }
1562         }
1563
1564         consume_skb(opt_skb);
1565         return 0;
1566 }
1567
1568 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1569                            const struct tcphdr *th)
1570 {
1571         /* This is tricky: we move IP6CB at its correct location into
1572          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1573          * _decode_session6() uses IP6CB().
1574          * barrier() makes sure compiler won't play aliasing games.
1575          */
1576         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1577                 sizeof(struct inet6_skb_parm));
1578         barrier();
1579
1580         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1581         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1582                                     skb->len - th->doff*4);
1583         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1584         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1585         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1586         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1587         TCP_SKB_CB(skb)->sacked = 0;
1588         TCP_SKB_CB(skb)->has_rxtstamp =
1589                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1590 }
1591
1592 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1593 {
1594         enum skb_drop_reason drop_reason;
1595         int sdif = inet6_sdif(skb);
1596         int dif = inet6_iif(skb);
1597         const struct tcphdr *th;
1598         const struct ipv6hdr *hdr;
1599         bool refcounted;
1600         struct sock *sk;
1601         int ret;
1602         struct net *net = dev_net(skb->dev);
1603
1604         drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1605         if (skb->pkt_type != PACKET_HOST)
1606                 goto discard_it;
1607
1608         /*
1609          *      Count it even if it's bad.
1610          */
1611         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1612
1613         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1614                 goto discard_it;
1615
1616         th = (const struct tcphdr *)skb->data;
1617
1618         if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1619                 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1620                 goto bad_packet;
1621         }
1622         if (!pskb_may_pull(skb, th->doff*4))
1623                 goto discard_it;
1624
1625         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1626                 goto csum_error;
1627
1628         th = (const struct tcphdr *)skb->data;
1629         hdr = ipv6_hdr(skb);
1630
1631 lookup:
1632         sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1633                                 th->source, th->dest, inet6_iif(skb), sdif,
1634                                 &refcounted);
1635         if (!sk)
1636                 goto no_tcp_socket;
1637
1638 process:
1639         if (sk->sk_state == TCP_TIME_WAIT)
1640                 goto do_time_wait;
1641
1642         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1643                 struct request_sock *req = inet_reqsk(sk);
1644                 bool req_stolen = false;
1645                 struct sock *nsk;
1646
1647                 sk = req->rsk_listener;
1648                 drop_reason = tcp_inbound_md5_hash(sk, skb,
1649                                                    &hdr->saddr, &hdr->daddr,
1650                                                    AF_INET6, dif, sdif);
1651                 if (drop_reason) {
1652                         sk_drops_add(sk, skb);
1653                         reqsk_put(req);
1654                         goto discard_it;
1655                 }
1656                 if (tcp_checksum_complete(skb)) {
1657                         reqsk_put(req);
1658                         goto csum_error;
1659                 }
1660                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1661                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1662                         if (!nsk) {
1663                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1664                                 goto lookup;
1665                         }
1666                         sk = nsk;
1667                         /* reuseport_migrate_sock() has already held one sk_refcnt
1668                          * before returning.
1669                          */
1670                 } else {
1671                         sock_hold(sk);
1672                 }
1673                 refcounted = true;
1674                 nsk = NULL;
1675                 if (!tcp_filter(sk, skb)) {
1676                         th = (const struct tcphdr *)skb->data;
1677                         hdr = ipv6_hdr(skb);
1678                         tcp_v6_fill_cb(skb, hdr, th);
1679                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1680                 } else {
1681                         drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1682                 }
1683                 if (!nsk) {
1684                         reqsk_put(req);
1685                         if (req_stolen) {
1686                                 /* Another cpu got exclusive access to req
1687                                  * and created a full blown socket.
1688                                  * Try to feed this packet to this socket
1689                                  * instead of discarding it.
1690                                  */
1691                                 tcp_v6_restore_cb(skb);
1692                                 sock_put(sk);
1693                                 goto lookup;
1694                         }
1695                         goto discard_and_relse;
1696                 }
1697                 if (nsk == sk) {
1698                         reqsk_put(req);
1699                         tcp_v6_restore_cb(skb);
1700                 } else if (tcp_child_process(sk, nsk, skb)) {
1701                         tcp_v6_send_reset(nsk, skb);
1702                         goto discard_and_relse;
1703                 } else {
1704                         sock_put(sk);
1705                         return 0;
1706                 }
1707         }
1708
1709         if (static_branch_unlikely(&ip6_min_hopcount)) {
1710                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1711                 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1712                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1713                         goto discard_and_relse;
1714                 }
1715         }
1716
1717         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1718                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1719                 goto discard_and_relse;
1720         }
1721
1722         drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1723                                            AF_INET6, dif, sdif);
1724         if (drop_reason)
1725                 goto discard_and_relse;
1726
1727         if (tcp_filter(sk, skb)) {
1728                 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1729                 goto discard_and_relse;
1730         }
1731         th = (const struct tcphdr *)skb->data;
1732         hdr = ipv6_hdr(skb);
1733         tcp_v6_fill_cb(skb, hdr, th);
1734
1735         skb->dev = NULL;
1736
1737         if (sk->sk_state == TCP_LISTEN) {
1738                 ret = tcp_v6_do_rcv(sk, skb);
1739                 goto put_and_return;
1740         }
1741
1742         sk_incoming_cpu_update(sk);
1743
1744         bh_lock_sock_nested(sk);
1745         tcp_segs_in(tcp_sk(sk), skb);
1746         ret = 0;
1747         if (!sock_owned_by_user(sk)) {
1748                 ret = tcp_v6_do_rcv(sk, skb);
1749         } else {
1750                 if (tcp_add_backlog(sk, skb, &drop_reason))
1751                         goto discard_and_relse;
1752         }
1753         bh_unlock_sock(sk);
1754 put_and_return:
1755         if (refcounted)
1756                 sock_put(sk);
1757         return ret ? -1 : 0;
1758
1759 no_tcp_socket:
1760         drop_reason = SKB_DROP_REASON_NO_SOCKET;
1761         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1762                 goto discard_it;
1763
1764         tcp_v6_fill_cb(skb, hdr, th);
1765
1766         if (tcp_checksum_complete(skb)) {
1767 csum_error:
1768                 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1769                 trace_tcp_bad_csum(skb);
1770                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1771 bad_packet:
1772                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1773         } else {
1774                 tcp_v6_send_reset(NULL, skb);
1775         }
1776
1777 discard_it:
1778         SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1779         kfree_skb_reason(skb, drop_reason);
1780         return 0;
1781
1782 discard_and_relse:
1783         sk_drops_add(sk, skb);
1784         if (refcounted)
1785                 sock_put(sk);
1786         goto discard_it;
1787
1788 do_time_wait:
1789         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1790                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1791                 inet_twsk_put(inet_twsk(sk));
1792                 goto discard_it;
1793         }
1794
1795         tcp_v6_fill_cb(skb, hdr, th);
1796
1797         if (tcp_checksum_complete(skb)) {
1798                 inet_twsk_put(inet_twsk(sk));
1799                 goto csum_error;
1800         }
1801
1802         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1803         case TCP_TW_SYN:
1804         {
1805                 struct sock *sk2;
1806
1807                 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1808                                             skb, __tcp_hdrlen(th),
1809                                             &ipv6_hdr(skb)->saddr, th->source,
1810                                             &ipv6_hdr(skb)->daddr,
1811                                             ntohs(th->dest),
1812                                             tcp_v6_iif_l3_slave(skb),
1813                                             sdif);
1814                 if (sk2) {
1815                         struct inet_timewait_sock *tw = inet_twsk(sk);
1816                         inet_twsk_deschedule_put(tw);
1817                         sk = sk2;
1818                         tcp_v6_restore_cb(skb);
1819                         refcounted = false;
1820                         goto process;
1821                 }
1822         }
1823                 /* to ACK */
1824                 fallthrough;
1825         case TCP_TW_ACK:
1826                 tcp_v6_timewait_ack(sk, skb);
1827                 break;
1828         case TCP_TW_RST:
1829                 tcp_v6_send_reset(sk, skb);
1830                 inet_twsk_deschedule_put(inet_twsk(sk));
1831                 goto discard_it;
1832         case TCP_TW_SUCCESS:
1833                 ;
1834         }
1835         goto discard_it;
1836 }
1837
1838 void tcp_v6_early_demux(struct sk_buff *skb)
1839 {
1840         struct net *net = dev_net(skb->dev);
1841         const struct ipv6hdr *hdr;
1842         const struct tcphdr *th;
1843         struct sock *sk;
1844
1845         if (skb->pkt_type != PACKET_HOST)
1846                 return;
1847
1848         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1849                 return;
1850
1851         hdr = ipv6_hdr(skb);
1852         th = tcp_hdr(skb);
1853
1854         if (th->doff < sizeof(struct tcphdr) / 4)
1855                 return;
1856
1857         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1858         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1859                                         &hdr->saddr, th->source,
1860                                         &hdr->daddr, ntohs(th->dest),
1861                                         inet6_iif(skb), inet6_sdif(skb));
1862         if (sk) {
1863                 skb->sk = sk;
1864                 skb->destructor = sock_edemux;
1865                 if (sk_fullsock(sk)) {
1866                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1867
1868                         if (dst)
1869                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1870                         if (dst &&
1871                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1872                                 skb_dst_set_noref(skb, dst);
1873                 }
1874         }
1875 }
1876
1877 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1878         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1879         .twsk_unique    = tcp_twsk_unique,
1880         .twsk_destructor = tcp_twsk_destructor,
1881 };
1882
1883 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1884 {
1885         __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1886 }
1887
1888 const struct inet_connection_sock_af_ops ipv6_specific = {
1889         .queue_xmit        = inet6_csk_xmit,
1890         .send_check        = tcp_v6_send_check,
1891         .rebuild_header    = inet6_sk_rebuild_header,
1892         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1893         .conn_request      = tcp_v6_conn_request,
1894         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1895         .net_header_len    = sizeof(struct ipv6hdr),
1896         .net_frag_header_len = sizeof(struct frag_hdr),
1897         .setsockopt        = ipv6_setsockopt,
1898         .getsockopt        = ipv6_getsockopt,
1899         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1900         .sockaddr_len      = sizeof(struct sockaddr_in6),
1901         .mtu_reduced       = tcp_v6_mtu_reduced,
1902 };
1903
1904 #ifdef CONFIG_TCP_MD5SIG
1905 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1906         .md5_lookup     =       tcp_v6_md5_lookup,
1907         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1908         .md5_parse      =       tcp_v6_parse_md5_keys,
1909 };
1910 #endif
1911
1912 /*
1913  *      TCP over IPv4 via INET6 API
1914  */
1915 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1916         .queue_xmit        = ip_queue_xmit,
1917         .send_check        = tcp_v4_send_check,
1918         .rebuild_header    = inet_sk_rebuild_header,
1919         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1920         .conn_request      = tcp_v6_conn_request,
1921         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1922         .net_header_len    = sizeof(struct iphdr),
1923         .setsockopt        = ipv6_setsockopt,
1924         .getsockopt        = ipv6_getsockopt,
1925         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1926         .sockaddr_len      = sizeof(struct sockaddr_in6),
1927         .mtu_reduced       = tcp_v4_mtu_reduced,
1928 };
1929
1930 #ifdef CONFIG_TCP_MD5SIG
1931 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1932         .md5_lookup     =       tcp_v4_md5_lookup,
1933         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1934         .md5_parse      =       tcp_v6_parse_md5_keys,
1935 };
1936 #endif
1937
1938 /* NOTE: A lot of things set to zero explicitly by call to
1939  *       sk_alloc() so need not be done here.
1940  */
1941 static int tcp_v6_init_sock(struct sock *sk)
1942 {
1943         struct inet_connection_sock *icsk = inet_csk(sk);
1944
1945         tcp_init_sock(sk);
1946
1947         icsk->icsk_af_ops = &ipv6_specific;
1948
1949 #ifdef CONFIG_TCP_MD5SIG
1950         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1951 #endif
1952
1953         return 0;
1954 }
1955
1956 #ifdef CONFIG_PROC_FS
1957 /* Proc filesystem TCPv6 sock list dumping. */
1958 static void get_openreq6(struct seq_file *seq,
1959                          const struct request_sock *req, int i)
1960 {
1961         long ttd = req->rsk_timer.expires - jiffies;
1962         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1963         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1964
1965         if (ttd < 0)
1966                 ttd = 0;
1967
1968         seq_printf(seq,
1969                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1970                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1971                    i,
1972                    src->s6_addr32[0], src->s6_addr32[1],
1973                    src->s6_addr32[2], src->s6_addr32[3],
1974                    inet_rsk(req)->ir_num,
1975                    dest->s6_addr32[0], dest->s6_addr32[1],
1976                    dest->s6_addr32[2], dest->s6_addr32[3],
1977                    ntohs(inet_rsk(req)->ir_rmt_port),
1978                    TCP_SYN_RECV,
1979                    0, 0, /* could print option size, but that is af dependent. */
1980                    1,   /* timers active (only the expire timer) */
1981                    jiffies_to_clock_t(ttd),
1982                    req->num_timeout,
1983                    from_kuid_munged(seq_user_ns(seq),
1984                                     sock_i_uid(req->rsk_listener)),
1985                    0,  /* non standard timer */
1986                    0, /* open_requests have no inode */
1987                    0, req);
1988 }
1989
1990 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1991 {
1992         const struct in6_addr *dest, *src;
1993         __u16 destp, srcp;
1994         int timer_active;
1995         unsigned long timer_expires;
1996         const struct inet_sock *inet = inet_sk(sp);
1997         const struct tcp_sock *tp = tcp_sk(sp);
1998         const struct inet_connection_sock *icsk = inet_csk(sp);
1999         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2000         int rx_queue;
2001         int state;
2002
2003         dest  = &sp->sk_v6_daddr;
2004         src   = &sp->sk_v6_rcv_saddr;
2005         destp = ntohs(inet->inet_dport);
2006         srcp  = ntohs(inet->inet_sport);
2007
2008         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2009             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2010             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2011                 timer_active    = 1;
2012                 timer_expires   = icsk->icsk_timeout;
2013         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2014                 timer_active    = 4;
2015                 timer_expires   = icsk->icsk_timeout;
2016         } else if (timer_pending(&sp->sk_timer)) {
2017                 timer_active    = 2;
2018                 timer_expires   = sp->sk_timer.expires;
2019         } else {
2020                 timer_active    = 0;
2021                 timer_expires = jiffies;
2022         }
2023
2024         state = inet_sk_state_load(sp);
2025         if (state == TCP_LISTEN)
2026                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2027         else
2028                 /* Because we don't lock the socket,
2029                  * we might find a transient negative value.
2030                  */
2031                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2032                                       READ_ONCE(tp->copied_seq), 0);
2033
2034         seq_printf(seq,
2035                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2036                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2037                    i,
2038                    src->s6_addr32[0], src->s6_addr32[1],
2039                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2040                    dest->s6_addr32[0], dest->s6_addr32[1],
2041                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2042                    state,
2043                    READ_ONCE(tp->write_seq) - tp->snd_una,
2044                    rx_queue,
2045                    timer_active,
2046                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2047                    icsk->icsk_retransmits,
2048                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2049                    icsk->icsk_probes_out,
2050                    sock_i_ino(sp),
2051                    refcount_read(&sp->sk_refcnt), sp,
2052                    jiffies_to_clock_t(icsk->icsk_rto),
2053                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2054                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2055                    tcp_snd_cwnd(tp),
2056                    state == TCP_LISTEN ?
2057                         fastopenq->max_qlen :
2058                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2059                    );
2060 }
2061
2062 static void get_timewait6_sock(struct seq_file *seq,
2063                                struct inet_timewait_sock *tw, int i)
2064 {
2065         long delta = tw->tw_timer.expires - jiffies;
2066         const struct in6_addr *dest, *src;
2067         __u16 destp, srcp;
2068
2069         dest = &tw->tw_v6_daddr;
2070         src  = &tw->tw_v6_rcv_saddr;
2071         destp = ntohs(tw->tw_dport);
2072         srcp  = ntohs(tw->tw_sport);
2073
2074         seq_printf(seq,
2075                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2076                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2077                    i,
2078                    src->s6_addr32[0], src->s6_addr32[1],
2079                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2080                    dest->s6_addr32[0], dest->s6_addr32[1],
2081                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2082                    tw->tw_substate, 0, 0,
2083                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2084                    refcount_read(&tw->tw_refcnt), tw);
2085 }
2086
2087 static int tcp6_seq_show(struct seq_file *seq, void *v)
2088 {
2089         struct tcp_iter_state *st;
2090         struct sock *sk = v;
2091
2092         if (v == SEQ_START_TOKEN) {
2093                 seq_puts(seq,
2094                          "  sl  "
2095                          "local_address                         "
2096                          "remote_address                        "
2097                          "st tx_queue rx_queue tr tm->when retrnsmt"
2098                          "   uid  timeout inode\n");
2099                 goto out;
2100         }
2101         st = seq->private;
2102
2103         if (sk->sk_state == TCP_TIME_WAIT)
2104                 get_timewait6_sock(seq, v, st->num);
2105         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2106                 get_openreq6(seq, v, st->num);
2107         else
2108                 get_tcp6_sock(seq, v, st->num);
2109 out:
2110         return 0;
2111 }
2112
2113 static const struct seq_operations tcp6_seq_ops = {
2114         .show           = tcp6_seq_show,
2115         .start          = tcp_seq_start,
2116         .next           = tcp_seq_next,
2117         .stop           = tcp_seq_stop,
2118 };
2119
2120 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2121         .family         = AF_INET6,
2122 };
2123
2124 int __net_init tcp6_proc_init(struct net *net)
2125 {
2126         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2127                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2128                 return -ENOMEM;
2129         return 0;
2130 }
2131
2132 void tcp6_proc_exit(struct net *net)
2133 {
2134         remove_proc_entry("tcp6", net->proc_net);
2135 }
2136 #endif
2137
2138 struct proto tcpv6_prot = {
2139         .name                   = "TCPv6",
2140         .owner                  = THIS_MODULE,
2141         .close                  = tcp_close,
2142         .pre_connect            = tcp_v6_pre_connect,
2143         .connect                = tcp_v6_connect,
2144         .disconnect             = tcp_disconnect,
2145         .accept                 = inet_csk_accept,
2146         .ioctl                  = tcp_ioctl,
2147         .init                   = tcp_v6_init_sock,
2148         .destroy                = tcp_v4_destroy_sock,
2149         .shutdown               = tcp_shutdown,
2150         .setsockopt             = tcp_setsockopt,
2151         .getsockopt             = tcp_getsockopt,
2152         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2153         .keepalive              = tcp_set_keepalive,
2154         .recvmsg                = tcp_recvmsg,
2155         .sendmsg                = tcp_sendmsg,
2156         .sendpage               = tcp_sendpage,
2157         .backlog_rcv            = tcp_v6_do_rcv,
2158         .release_cb             = tcp_release_cb,
2159         .hash                   = inet6_hash,
2160         .unhash                 = inet_unhash,
2161         .get_port               = inet_csk_get_port,
2162         .put_port               = inet_put_port,
2163 #ifdef CONFIG_BPF_SYSCALL
2164         .psock_update_sk_prot   = tcp_bpf_update_proto,
2165 #endif
2166         .enter_memory_pressure  = tcp_enter_memory_pressure,
2167         .leave_memory_pressure  = tcp_leave_memory_pressure,
2168         .stream_memory_free     = tcp_stream_memory_free,
2169         .sockets_allocated      = &tcp_sockets_allocated,
2170
2171         .memory_allocated       = &tcp_memory_allocated,
2172         .per_cpu_fw_alloc       = &tcp_memory_per_cpu_fw_alloc,
2173
2174         .memory_pressure        = &tcp_memory_pressure,
2175         .orphan_count           = &tcp_orphan_count,
2176         .sysctl_mem             = sysctl_tcp_mem,
2177         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2178         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2179         .max_header             = MAX_TCP_HEADER,
2180         .obj_size               = sizeof(struct tcp6_sock),
2181         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2182         .twsk_prot              = &tcp6_timewait_sock_ops,
2183         .rsk_prot               = &tcp6_request_sock_ops,
2184         .h.hashinfo             = NULL,
2185         .no_autobind            = true,
2186         .diag_destroy           = tcp_abort,
2187 };
2188 EXPORT_SYMBOL_GPL(tcpv6_prot);
2189
2190 static const struct inet6_protocol tcpv6_protocol = {
2191         .handler        =       tcp_v6_rcv,
2192         .err_handler    =       tcp_v6_err,
2193         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2194 };
2195
2196 static struct inet_protosw tcpv6_protosw = {
2197         .type           =       SOCK_STREAM,
2198         .protocol       =       IPPROTO_TCP,
2199         .prot           =       &tcpv6_prot,
2200         .ops            =       &inet6_stream_ops,
2201         .flags          =       INET_PROTOSW_PERMANENT |
2202                                 INET_PROTOSW_ICSK,
2203 };
2204
2205 static int __net_init tcpv6_net_init(struct net *net)
2206 {
2207         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2208                                     SOCK_RAW, IPPROTO_TCP, net);
2209 }
2210
2211 static void __net_exit tcpv6_net_exit(struct net *net)
2212 {
2213         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2214 }
2215
2216 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2217 {
2218         tcp_twsk_purge(net_exit_list, AF_INET6);
2219 }
2220
2221 static struct pernet_operations tcpv6_net_ops = {
2222         .init       = tcpv6_net_init,
2223         .exit       = tcpv6_net_exit,
2224         .exit_batch = tcpv6_net_exit_batch,
2225 };
2226
2227 int __init tcpv6_init(void)
2228 {
2229         int ret;
2230
2231         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2232         if (ret)
2233                 goto out;
2234
2235         /* register inet6 protocol */
2236         ret = inet6_register_protosw(&tcpv6_protosw);
2237         if (ret)
2238                 goto out_tcpv6_protocol;
2239
2240         ret = register_pernet_subsys(&tcpv6_net_ops);
2241         if (ret)
2242                 goto out_tcpv6_protosw;
2243
2244         ret = mptcpv6_init();
2245         if (ret)
2246                 goto out_tcpv6_pernet_subsys;
2247
2248 out:
2249         return ret;
2250
2251 out_tcpv6_pernet_subsys:
2252         unregister_pernet_subsys(&tcpv6_net_ops);
2253 out_tcpv6_protosw:
2254         inet6_unregister_protosw(&tcpv6_protosw);
2255 out_tcpv6_protocol:
2256         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2257         goto out;
2258 }
2259
2260 void tcpv6_exit(void)
2261 {
2262         unregister_pernet_subsys(&tcpv6_net_ops);
2263         inet6_unregister_protosw(&tcpv6_protosw);
2264         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2265 }
This page took 0.175713 seconds and 4 git commands to generate.