]> Git Repo - linux.git/blob - net/ipv6/tcp_ipv6.c
net: dpaa: Adjust queue depth on rate change
[linux.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <[email protected]>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 rcu_assign_pointer(sk->sk_rx_dst, dst);
111                 sk->sk_rx_dst_ifindex = skb->skb_iif;
112                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_sock *inet = inet_sk(sk);
150         struct inet_connection_sock *icsk = inet_csk(sk);
151         struct inet_timewait_death_row *tcp_death_row;
152         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153         struct tcp_sock *tp = tcp_sk(sk);
154         struct in6_addr *saddr = NULL, *final_p, final;
155         struct ipv6_txoptions *opt;
156         struct flowi6 fl6;
157         struct dst_entry *dst;
158         int addr_type;
159         int err;
160
161         if (addr_len < SIN6_LEN_RFC2133)
162                 return -EINVAL;
163
164         if (usin->sin6_family != AF_INET6)
165                 return -EAFNOSUPPORT;
166
167         memset(&fl6, 0, sizeof(fl6));
168
169         if (np->sndflow) {
170                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171                 IP6_ECN_flow_init(fl6.flowlabel);
172                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173                         struct ip6_flowlabel *flowlabel;
174                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175                         if (IS_ERR(flowlabel))
176                                 return -EINVAL;
177                         fl6_sock_release(flowlabel);
178                 }
179         }
180
181         /*
182          *      connect() to INADDR_ANY means loopback (BSD'ism).
183          */
184
185         if (ipv6_addr_any(&usin->sin6_addr)) {
186                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188                                                &usin->sin6_addr);
189                 else
190                         usin->sin6_addr = in6addr_loopback;
191         }
192
193         addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195         if (addr_type & IPV6_ADDR_MULTICAST)
196                 return -ENETUNREACH;
197
198         if (addr_type&IPV6_ADDR_LINKLOCAL) {
199                 if (addr_len >= sizeof(struct sockaddr_in6) &&
200                     usin->sin6_scope_id) {
201                         /* If interface is set while binding, indices
202                          * must coincide.
203                          */
204                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205                                 return -EINVAL;
206
207                         sk->sk_bound_dev_if = usin->sin6_scope_id;
208                 }
209
210                 /* Connect to link-local address requires an interface */
211                 if (!sk->sk_bound_dev_if)
212                         return -EINVAL;
213         }
214
215         if (tp->rx_opt.ts_recent_stamp &&
216             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217                 tp->rx_opt.ts_recent = 0;
218                 tp->rx_opt.ts_recent_stamp = 0;
219                 WRITE_ONCE(tp->write_seq, 0);
220         }
221
222         sk->sk_v6_daddr = usin->sin6_addr;
223         np->flow_label = fl6.flowlabel;
224
225         /*
226          *      TCP over IPv4
227          */
228
229         if (addr_type & IPV6_ADDR_MAPPED) {
230                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231                 struct sockaddr_in sin;
232
233                 if (ipv6_only_sock(sk))
234                         return -ENETUNREACH;
235
236                 sin.sin_family = AF_INET;
237                 sin.sin_port = usin->sin6_port;
238                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240                 icsk->icsk_af_ops = &ipv6_mapped;
241                 if (sk_is_mptcp(sk))
242                         mptcpv6_handle_mapped(sk, true);
243                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247
248                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249
250                 if (err) {
251                         icsk->icsk_ext_hdr_len = exthdrlen;
252                         icsk->icsk_af_ops = &ipv6_specific;
253                         if (sk_is_mptcp(sk))
254                                 mptcpv6_handle_mapped(sk, false);
255                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257                         tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259                         goto failure;
260                 }
261                 np->saddr = sk->sk_v6_rcv_saddr;
262
263                 return err;
264         }
265
266         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267                 saddr = &sk->sk_v6_rcv_saddr;
268
269         fl6.flowi6_proto = IPPROTO_TCP;
270         fl6.daddr = sk->sk_v6_daddr;
271         fl6.saddr = saddr ? *saddr : np->saddr;
272         fl6.flowi6_oif = sk->sk_bound_dev_if;
273         fl6.flowi6_mark = sk->sk_mark;
274         fl6.fl6_dport = usin->sin6_port;
275         fl6.fl6_sport = inet->inet_sport;
276         fl6.flowi6_uid = sk->sk_uid;
277
278         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279         final_p = fl6_update_dst(&fl6, opt, &final);
280
281         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282
283         dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284         if (IS_ERR(dst)) {
285                 err = PTR_ERR(dst);
286                 goto failure;
287         }
288
289         if (!saddr) {
290                 struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
291                 struct in6_addr prev_v6_rcv_saddr;
292
293                 if (icsk->icsk_bind2_hash) {
294                         prev_addr_hashbucket = inet_bhashfn_portaddr(&tcp_hashinfo,
295                                                                      sk, sock_net(sk),
296                                                                      inet->inet_num);
297                         prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
298                 }
299                 saddr = &fl6.saddr;
300                 sk->sk_v6_rcv_saddr = *saddr;
301
302                 if (prev_addr_hashbucket) {
303                         err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
304                         if (err) {
305                                 sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr;
306                                 goto failure;
307                         }
308                 }
309         }
310
311         /* set the source address */
312         np->saddr = *saddr;
313         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
314
315         sk->sk_gso_type = SKB_GSO_TCPV6;
316         ip6_dst_store(sk, dst, NULL, NULL);
317
318         icsk->icsk_ext_hdr_len = 0;
319         if (opt)
320                 icsk->icsk_ext_hdr_len = opt->opt_flen +
321                                          opt->opt_nflen;
322
323         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
324
325         inet->inet_dport = usin->sin6_port;
326
327         tcp_set_state(sk, TCP_SYN_SENT);
328         tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
329         err = inet6_hash_connect(tcp_death_row, sk);
330         if (err)
331                 goto late_failure;
332
333         sk_set_txhash(sk);
334
335         if (likely(!tp->repair)) {
336                 if (!tp->write_seq)
337                         WRITE_ONCE(tp->write_seq,
338                                    secure_tcpv6_seq(np->saddr.s6_addr32,
339                                                     sk->sk_v6_daddr.s6_addr32,
340                                                     inet->inet_sport,
341                                                     inet->inet_dport));
342                 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
343                                                    np->saddr.s6_addr32,
344                                                    sk->sk_v6_daddr.s6_addr32);
345         }
346
347         if (tcp_fastopen_defer_connect(sk, &err))
348                 return err;
349         if (err)
350                 goto late_failure;
351
352         err = tcp_connect(sk);
353         if (err)
354                 goto late_failure;
355
356         return 0;
357
358 late_failure:
359         tcp_set_state(sk, TCP_CLOSE);
360 failure:
361         inet->inet_dport = 0;
362         sk->sk_route_caps = 0;
363         return err;
364 }
365
366 static void tcp_v6_mtu_reduced(struct sock *sk)
367 {
368         struct dst_entry *dst;
369         u32 mtu;
370
371         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
372                 return;
373
374         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
375
376         /* Drop requests trying to increase our current mss.
377          * Check done in __ip6_rt_update_pmtu() is too late.
378          */
379         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
380                 return;
381
382         dst = inet6_csk_update_pmtu(sk, mtu);
383         if (!dst)
384                 return;
385
386         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
387                 tcp_sync_mss(sk, dst_mtu(dst));
388                 tcp_simple_retransmit(sk);
389         }
390 }
391
392 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
393                 u8 type, u8 code, int offset, __be32 info)
394 {
395         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
396         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
397         struct net *net = dev_net(skb->dev);
398         struct request_sock *fastopen;
399         struct ipv6_pinfo *np;
400         struct tcp_sock *tp;
401         __u32 seq, snd_una;
402         struct sock *sk;
403         bool fatal;
404         int err;
405
406         sk = __inet6_lookup_established(net, &tcp_hashinfo,
407                                         &hdr->daddr, th->dest,
408                                         &hdr->saddr, ntohs(th->source),
409                                         skb->dev->ifindex, inet6_sdif(skb));
410
411         if (!sk) {
412                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
413                                   ICMP6_MIB_INERRORS);
414                 return -ENOENT;
415         }
416
417         if (sk->sk_state == TCP_TIME_WAIT) {
418                 inet_twsk_put(inet_twsk(sk));
419                 return 0;
420         }
421         seq = ntohl(th->seq);
422         fatal = icmpv6_err_convert(type, code, &err);
423         if (sk->sk_state == TCP_NEW_SYN_RECV) {
424                 tcp_req_err(sk, seq, fatal);
425                 return 0;
426         }
427
428         bh_lock_sock(sk);
429         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
430                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
431
432         if (sk->sk_state == TCP_CLOSE)
433                 goto out;
434
435         if (static_branch_unlikely(&ip6_min_hopcount)) {
436                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
437                 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
438                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
439                         goto out;
440                 }
441         }
442
443         tp = tcp_sk(sk);
444         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
445         fastopen = rcu_dereference(tp->fastopen_rsk);
446         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
447         if (sk->sk_state != TCP_LISTEN &&
448             !between(seq, snd_una, tp->snd_nxt)) {
449                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
450                 goto out;
451         }
452
453         np = tcp_inet6_sk(sk);
454
455         if (type == NDISC_REDIRECT) {
456                 if (!sock_owned_by_user(sk)) {
457                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
458
459                         if (dst)
460                                 dst->ops->redirect(dst, sk, skb);
461                 }
462                 goto out;
463         }
464
465         if (type == ICMPV6_PKT_TOOBIG) {
466                 u32 mtu = ntohl(info);
467
468                 /* We are not interested in TCP_LISTEN and open_requests
469                  * (SYN-ACKs send out by Linux are always <576bytes so
470                  * they should go through unfragmented).
471                  */
472                 if (sk->sk_state == TCP_LISTEN)
473                         goto out;
474
475                 if (!ip6_sk_accept_pmtu(sk))
476                         goto out;
477
478                 if (mtu < IPV6_MIN_MTU)
479                         goto out;
480
481                 WRITE_ONCE(tp->mtu_info, mtu);
482
483                 if (!sock_owned_by_user(sk))
484                         tcp_v6_mtu_reduced(sk);
485                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
486                                            &sk->sk_tsq_flags))
487                         sock_hold(sk);
488                 goto out;
489         }
490
491
492         /* Might be for an request_sock */
493         switch (sk->sk_state) {
494         case TCP_SYN_SENT:
495         case TCP_SYN_RECV:
496                 /* Only in fast or simultaneous open. If a fast open socket is
497                  * already accepted it is treated as a connected one below.
498                  */
499                 if (fastopen && !fastopen->sk)
500                         break;
501
502                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
503
504                 if (!sock_owned_by_user(sk)) {
505                         sk->sk_err = err;
506                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
507
508                         tcp_done(sk);
509                 } else
510                         sk->sk_err_soft = err;
511                 goto out;
512         case TCP_LISTEN:
513                 break;
514         default:
515                 /* check if this ICMP message allows revert of backoff.
516                  * (see RFC 6069)
517                  */
518                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
519                     code == ICMPV6_NOROUTE)
520                         tcp_ld_RTO_revert(sk, seq);
521         }
522
523         if (!sock_owned_by_user(sk) && np->recverr) {
524                 sk->sk_err = err;
525                 sk_error_report(sk);
526         } else
527                 sk->sk_err_soft = err;
528
529 out:
530         bh_unlock_sock(sk);
531         sock_put(sk);
532         return 0;
533 }
534
535
536 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
537                               struct flowi *fl,
538                               struct request_sock *req,
539                               struct tcp_fastopen_cookie *foc,
540                               enum tcp_synack_type synack_type,
541                               struct sk_buff *syn_skb)
542 {
543         struct inet_request_sock *ireq = inet_rsk(req);
544         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
545         struct ipv6_txoptions *opt;
546         struct flowi6 *fl6 = &fl->u.ip6;
547         struct sk_buff *skb;
548         int err = -ENOMEM;
549         u8 tclass;
550
551         /* First, grab a route. */
552         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
553                                                IPPROTO_TCP)) == NULL)
554                 goto done;
555
556         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
557
558         if (skb) {
559                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
560                                     &ireq->ir_v6_rmt_addr);
561
562                 fl6->daddr = ireq->ir_v6_rmt_addr;
563                 if (np->repflow && ireq->pktopts)
564                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
565
566                 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
567                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
568                                 (np->tclass & INET_ECN_MASK) :
569                                 np->tclass;
570
571                 if (!INET_ECN_is_capable(tclass) &&
572                     tcp_bpf_ca_needs_ecn((struct sock *)req))
573                         tclass |= INET_ECN_ECT_0;
574
575                 rcu_read_lock();
576                 opt = ireq->ipv6_opt;
577                 if (!opt)
578                         opt = rcu_dereference(np->opt);
579                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
580                                tclass, sk->sk_priority);
581                 rcu_read_unlock();
582                 err = net_xmit_eval(err);
583         }
584
585 done:
586         return err;
587 }
588
589
590 static void tcp_v6_reqsk_destructor(struct request_sock *req)
591 {
592         kfree(inet_rsk(req)->ipv6_opt);
593         consume_skb(inet_rsk(req)->pktopts);
594 }
595
596 #ifdef CONFIG_TCP_MD5SIG
597 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
598                                                    const struct in6_addr *addr,
599                                                    int l3index)
600 {
601         return tcp_md5_do_lookup(sk, l3index,
602                                  (union tcp_md5_addr *)addr, AF_INET6);
603 }
604
605 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
606                                                 const struct sock *addr_sk)
607 {
608         int l3index;
609
610         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
611                                                  addr_sk->sk_bound_dev_if);
612         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
613                                     l3index);
614 }
615
616 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
617                                  sockptr_t optval, int optlen)
618 {
619         struct tcp_md5sig cmd;
620         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
621         int l3index = 0;
622         u8 prefixlen;
623         u8 flags;
624
625         if (optlen < sizeof(cmd))
626                 return -EINVAL;
627
628         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
629                 return -EFAULT;
630
631         if (sin6->sin6_family != AF_INET6)
632                 return -EINVAL;
633
634         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
635
636         if (optname == TCP_MD5SIG_EXT &&
637             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
638                 prefixlen = cmd.tcpm_prefixlen;
639                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
640                                         prefixlen > 32))
641                         return -EINVAL;
642         } else {
643                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
644         }
645
646         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
647             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
648                 struct net_device *dev;
649
650                 rcu_read_lock();
651                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
652                 if (dev && netif_is_l3_master(dev))
653                         l3index = dev->ifindex;
654                 rcu_read_unlock();
655
656                 /* ok to reference set/not set outside of rcu;
657                  * right now device MUST be an L3 master
658                  */
659                 if (!dev || !l3index)
660                         return -EINVAL;
661         }
662
663         if (!cmd.tcpm_keylen) {
664                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
665                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
666                                               AF_INET, prefixlen,
667                                               l3index, flags);
668                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
669                                       AF_INET6, prefixlen, l3index, flags);
670         }
671
672         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
673                 return -EINVAL;
674
675         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
676                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
677                                       AF_INET, prefixlen, l3index, flags,
678                                       cmd.tcpm_key, cmd.tcpm_keylen,
679                                       GFP_KERNEL);
680
681         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
682                               AF_INET6, prefixlen, l3index, flags,
683                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
684 }
685
686 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
687                                    const struct in6_addr *daddr,
688                                    const struct in6_addr *saddr,
689                                    const struct tcphdr *th, int nbytes)
690 {
691         struct tcp6_pseudohdr *bp;
692         struct scatterlist sg;
693         struct tcphdr *_th;
694
695         bp = hp->scratch;
696         /* 1. TCP pseudo-header (RFC2460) */
697         bp->saddr = *saddr;
698         bp->daddr = *daddr;
699         bp->protocol = cpu_to_be32(IPPROTO_TCP);
700         bp->len = cpu_to_be32(nbytes);
701
702         _th = (struct tcphdr *)(bp + 1);
703         memcpy(_th, th, sizeof(*th));
704         _th->check = 0;
705
706         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
707         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
708                                 sizeof(*bp) + sizeof(*th));
709         return crypto_ahash_update(hp->md5_req);
710 }
711
712 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
713                                const struct in6_addr *daddr, struct in6_addr *saddr,
714                                const struct tcphdr *th)
715 {
716         struct tcp_md5sig_pool *hp;
717         struct ahash_request *req;
718
719         hp = tcp_get_md5sig_pool();
720         if (!hp)
721                 goto clear_hash_noput;
722         req = hp->md5_req;
723
724         if (crypto_ahash_init(req))
725                 goto clear_hash;
726         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
727                 goto clear_hash;
728         if (tcp_md5_hash_key(hp, key))
729                 goto clear_hash;
730         ahash_request_set_crypt(req, NULL, md5_hash, 0);
731         if (crypto_ahash_final(req))
732                 goto clear_hash;
733
734         tcp_put_md5sig_pool();
735         return 0;
736
737 clear_hash:
738         tcp_put_md5sig_pool();
739 clear_hash_noput:
740         memset(md5_hash, 0, 16);
741         return 1;
742 }
743
744 static int tcp_v6_md5_hash_skb(char *md5_hash,
745                                const struct tcp_md5sig_key *key,
746                                const struct sock *sk,
747                                const struct sk_buff *skb)
748 {
749         const struct in6_addr *saddr, *daddr;
750         struct tcp_md5sig_pool *hp;
751         struct ahash_request *req;
752         const struct tcphdr *th = tcp_hdr(skb);
753
754         if (sk) { /* valid for establish/request sockets */
755                 saddr = &sk->sk_v6_rcv_saddr;
756                 daddr = &sk->sk_v6_daddr;
757         } else {
758                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
759                 saddr = &ip6h->saddr;
760                 daddr = &ip6h->daddr;
761         }
762
763         hp = tcp_get_md5sig_pool();
764         if (!hp)
765                 goto clear_hash_noput;
766         req = hp->md5_req;
767
768         if (crypto_ahash_init(req))
769                 goto clear_hash;
770
771         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
772                 goto clear_hash;
773         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
774                 goto clear_hash;
775         if (tcp_md5_hash_key(hp, key))
776                 goto clear_hash;
777         ahash_request_set_crypt(req, NULL, md5_hash, 0);
778         if (crypto_ahash_final(req))
779                 goto clear_hash;
780
781         tcp_put_md5sig_pool();
782         return 0;
783
784 clear_hash:
785         tcp_put_md5sig_pool();
786 clear_hash_noput:
787         memset(md5_hash, 0, 16);
788         return 1;
789 }
790
791 #endif
792
793 static void tcp_v6_init_req(struct request_sock *req,
794                             const struct sock *sk_listener,
795                             struct sk_buff *skb)
796 {
797         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
798         struct inet_request_sock *ireq = inet_rsk(req);
799         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
800
801         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
802         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
803
804         /* So that link locals have meaning */
805         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
806             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
807                 ireq->ir_iif = tcp_v6_iif(skb);
808
809         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
810             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
811              np->rxopt.bits.rxinfo ||
812              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
813              np->rxopt.bits.rxohlim || np->repflow)) {
814                 refcount_inc(&skb->users);
815                 ireq->pktopts = skb;
816         }
817 }
818
819 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
820                                           struct sk_buff *skb,
821                                           struct flowi *fl,
822                                           struct request_sock *req)
823 {
824         tcp_v6_init_req(req, sk, skb);
825
826         if (security_inet_conn_request(sk, skb, req))
827                 return NULL;
828
829         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
830 }
831
832 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
833         .family         =       AF_INET6,
834         .obj_size       =       sizeof(struct tcp6_request_sock),
835         .rtx_syn_ack    =       tcp_rtx_synack,
836         .send_ack       =       tcp_v6_reqsk_send_ack,
837         .destructor     =       tcp_v6_reqsk_destructor,
838         .send_reset     =       tcp_v6_send_reset,
839         .syn_ack_timeout =      tcp_syn_ack_timeout,
840 };
841
842 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
843         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
844                                 sizeof(struct ipv6hdr),
845 #ifdef CONFIG_TCP_MD5SIG
846         .req_md5_lookup =       tcp_v6_md5_lookup,
847         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
848 #endif
849 #ifdef CONFIG_SYN_COOKIES
850         .cookie_init_seq =      cookie_v6_init_sequence,
851 #endif
852         .route_req      =       tcp_v6_route_req,
853         .init_seq       =       tcp_v6_init_seq,
854         .init_ts_off    =       tcp_v6_init_ts_off,
855         .send_synack    =       tcp_v6_send_synack,
856 };
857
858 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
859                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
860                                  int oif, struct tcp_md5sig_key *key, int rst,
861                                  u8 tclass, __be32 label, u32 priority, u32 txhash)
862 {
863         const struct tcphdr *th = tcp_hdr(skb);
864         struct tcphdr *t1;
865         struct sk_buff *buff;
866         struct flowi6 fl6;
867         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
868         struct sock *ctl_sk = net->ipv6.tcp_sk;
869         unsigned int tot_len = sizeof(struct tcphdr);
870         __be32 mrst = 0, *topt;
871         struct dst_entry *dst;
872         __u32 mark = 0;
873
874         if (tsecr)
875                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
876 #ifdef CONFIG_TCP_MD5SIG
877         if (key)
878                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
879 #endif
880
881 #ifdef CONFIG_MPTCP
882         if (rst && !key) {
883                 mrst = mptcp_reset_option(skb);
884
885                 if (mrst)
886                         tot_len += sizeof(__be32);
887         }
888 #endif
889
890         buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
891         if (!buff)
892                 return;
893
894         skb_reserve(buff, MAX_TCP_HEADER);
895
896         t1 = skb_push(buff, tot_len);
897         skb_reset_transport_header(buff);
898
899         /* Swap the send and the receive. */
900         memset(t1, 0, sizeof(*t1));
901         t1->dest = th->source;
902         t1->source = th->dest;
903         t1->doff = tot_len / 4;
904         t1->seq = htonl(seq);
905         t1->ack_seq = htonl(ack);
906         t1->ack = !rst || !th->ack;
907         t1->rst = rst;
908         t1->window = htons(win);
909
910         topt = (__be32 *)(t1 + 1);
911
912         if (tsecr) {
913                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
914                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
915                 *topt++ = htonl(tsval);
916                 *topt++ = htonl(tsecr);
917         }
918
919         if (mrst)
920                 *topt++ = mrst;
921
922 #ifdef CONFIG_TCP_MD5SIG
923         if (key) {
924                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
925                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
926                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
927                                     &ipv6_hdr(skb)->saddr,
928                                     &ipv6_hdr(skb)->daddr, t1);
929         }
930 #endif
931
932         memset(&fl6, 0, sizeof(fl6));
933         fl6.daddr = ipv6_hdr(skb)->saddr;
934         fl6.saddr = ipv6_hdr(skb)->daddr;
935         fl6.flowlabel = label;
936
937         buff->ip_summed = CHECKSUM_PARTIAL;
938
939         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
940
941         fl6.flowi6_proto = IPPROTO_TCP;
942         if (rt6_need_strict(&fl6.daddr) && !oif)
943                 fl6.flowi6_oif = tcp_v6_iif(skb);
944         else {
945                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
946                         oif = skb->skb_iif;
947
948                 fl6.flowi6_oif = oif;
949         }
950
951         if (sk) {
952                 if (sk->sk_state == TCP_TIME_WAIT)
953                         mark = inet_twsk(sk)->tw_mark;
954                 else
955                         mark = sk->sk_mark;
956                 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
957         }
958         if (txhash) {
959                 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
960                 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
961         }
962         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
963         fl6.fl6_dport = t1->dest;
964         fl6.fl6_sport = t1->source;
965         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
966         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
967
968         /* Pass a socket to ip6_dst_lookup either it is for RST
969          * Underlying function will use this to retrieve the network
970          * namespace
971          */
972         if (sk && sk->sk_state != TCP_TIME_WAIT)
973                 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
974         else
975                 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
976         if (!IS_ERR(dst)) {
977                 skb_dst_set(buff, dst);
978                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
979                          tclass & ~INET_ECN_MASK, priority);
980                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
981                 if (rst)
982                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
983                 return;
984         }
985
986         kfree_skb(buff);
987 }
988
989 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
990 {
991         const struct tcphdr *th = tcp_hdr(skb);
992         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
993         u32 seq = 0, ack_seq = 0;
994         struct tcp_md5sig_key *key = NULL;
995 #ifdef CONFIG_TCP_MD5SIG
996         const __u8 *hash_location = NULL;
997         unsigned char newhash[16];
998         int genhash;
999         struct sock *sk1 = NULL;
1000 #endif
1001         __be32 label = 0;
1002         u32 priority = 0;
1003         struct net *net;
1004         int oif = 0;
1005
1006         if (th->rst)
1007                 return;
1008
1009         /* If sk not NULL, it means we did a successful lookup and incoming
1010          * route had to be correct. prequeue might have dropped our dst.
1011          */
1012         if (!sk && !ipv6_unicast_destination(skb))
1013                 return;
1014
1015         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1016 #ifdef CONFIG_TCP_MD5SIG
1017         rcu_read_lock();
1018         hash_location = tcp_parse_md5sig_option(th);
1019         if (sk && sk_fullsock(sk)) {
1020                 int l3index;
1021
1022                 /* sdif set, means packet ingressed via a device
1023                  * in an L3 domain and inet_iif is set to it.
1024                  */
1025                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1026                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1027         } else if (hash_location) {
1028                 int dif = tcp_v6_iif_l3_slave(skb);
1029                 int sdif = tcp_v6_sdif(skb);
1030                 int l3index;
1031
1032                 /*
1033                  * active side is lost. Try to find listening socket through
1034                  * source port, and then find md5 key through listening socket.
1035                  * we are not loose security here:
1036                  * Incoming packet is checked with md5 hash with finding key,
1037                  * no RST generated if md5 hash doesn't match.
1038                  */
1039                 sk1 = inet6_lookup_listener(net,
1040                                            &tcp_hashinfo, NULL, 0,
1041                                            &ipv6h->saddr,
1042                                            th->source, &ipv6h->daddr,
1043                                            ntohs(th->source), dif, sdif);
1044                 if (!sk1)
1045                         goto out;
1046
1047                 /* sdif set, means packet ingressed via a device
1048                  * in an L3 domain and dif is set to it.
1049                  */
1050                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1051
1052                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1053                 if (!key)
1054                         goto out;
1055
1056                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1057                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1058                         goto out;
1059         }
1060 #endif
1061
1062         if (th->ack)
1063                 seq = ntohl(th->ack_seq);
1064         else
1065                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1066                           (th->doff << 2);
1067
1068         if (sk) {
1069                 oif = sk->sk_bound_dev_if;
1070                 if (sk_fullsock(sk)) {
1071                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1072
1073                         trace_tcp_send_reset(sk, skb);
1074                         if (np->repflow)
1075                                 label = ip6_flowlabel(ipv6h);
1076                         priority = sk->sk_priority;
1077                 }
1078                 if (sk->sk_state == TCP_TIME_WAIT) {
1079                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1080                         priority = inet_twsk(sk)->tw_priority;
1081                 }
1082         } else {
1083                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1084                         label = ip6_flowlabel(ipv6h);
1085         }
1086
1087         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1088                              ipv6_get_dsfield(ipv6h), label, priority, 0);
1089
1090 #ifdef CONFIG_TCP_MD5SIG
1091 out:
1092         rcu_read_unlock();
1093 #endif
1094 }
1095
1096 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1097                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1098                             struct tcp_md5sig_key *key, u8 tclass,
1099                             __be32 label, u32 priority, u32 txhash)
1100 {
1101         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1102                              tclass, label, priority, txhash);
1103 }
1104
1105 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1106 {
1107         struct inet_timewait_sock *tw = inet_twsk(sk);
1108         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1109
1110         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1111                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1112                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1113                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1114                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1115                         tw->tw_txhash);
1116
1117         inet_twsk_put(tw);
1118 }
1119
1120 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1121                                   struct request_sock *req)
1122 {
1123         int l3index;
1124
1125         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1126
1127         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1128          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1129          */
1130         /* RFC 7323 2.3
1131          * The window field (SEG.WND) of every outgoing segment, with the
1132          * exception of <SYN> segments, MUST be right-shifted by
1133          * Rcv.Wind.Shift bits:
1134          */
1135         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1136                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1137                         tcp_rsk(req)->rcv_nxt,
1138                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1139                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1140                         req->ts_recent, sk->sk_bound_dev_if,
1141                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1142                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
1143                         tcp_rsk(req)->txhash);
1144 }
1145
1146
1147 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1148 {
1149 #ifdef CONFIG_SYN_COOKIES
1150         const struct tcphdr *th = tcp_hdr(skb);
1151
1152         if (!th->syn)
1153                 sk = cookie_v6_check(sk, skb);
1154 #endif
1155         return sk;
1156 }
1157
1158 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1159                          struct tcphdr *th, u32 *cookie)
1160 {
1161         u16 mss = 0;
1162 #ifdef CONFIG_SYN_COOKIES
1163         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1164                                     &tcp_request_sock_ipv6_ops, sk, th);
1165         if (mss) {
1166                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1167                 tcp_synq_overflow(sk);
1168         }
1169 #endif
1170         return mss;
1171 }
1172
1173 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1174 {
1175         if (skb->protocol == htons(ETH_P_IP))
1176                 return tcp_v4_conn_request(sk, skb);
1177
1178         if (!ipv6_unicast_destination(skb))
1179                 goto drop;
1180
1181         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1182                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1183                 return 0;
1184         }
1185
1186         return tcp_conn_request(&tcp6_request_sock_ops,
1187                                 &tcp_request_sock_ipv6_ops, sk, skb);
1188
1189 drop:
1190         tcp_listendrop(sk);
1191         return 0; /* don't send reset */
1192 }
1193
1194 static void tcp_v6_restore_cb(struct sk_buff *skb)
1195 {
1196         /* We need to move header back to the beginning if xfrm6_policy_check()
1197          * and tcp_v6_fill_cb() are going to be called again.
1198          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1199          */
1200         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1201                 sizeof(struct inet6_skb_parm));
1202 }
1203
1204 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1205                                          struct request_sock *req,
1206                                          struct dst_entry *dst,
1207                                          struct request_sock *req_unhash,
1208                                          bool *own_req)
1209 {
1210         struct inet_request_sock *ireq;
1211         struct ipv6_pinfo *newnp;
1212         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1213         struct ipv6_txoptions *opt;
1214         struct inet_sock *newinet;
1215         bool found_dup_sk = false;
1216         struct tcp_sock *newtp;
1217         struct sock *newsk;
1218 #ifdef CONFIG_TCP_MD5SIG
1219         struct tcp_md5sig_key *key;
1220         int l3index;
1221 #endif
1222         struct flowi6 fl6;
1223
1224         if (skb->protocol == htons(ETH_P_IP)) {
1225                 /*
1226                  *      v6 mapped
1227                  */
1228
1229                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1230                                              req_unhash, own_req);
1231
1232                 if (!newsk)
1233                         return NULL;
1234
1235                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1236
1237                 newnp = tcp_inet6_sk(newsk);
1238                 newtp = tcp_sk(newsk);
1239
1240                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1241
1242                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1243
1244                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1245                 if (sk_is_mptcp(newsk))
1246                         mptcpv6_handle_mapped(newsk, true);
1247                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1248 #ifdef CONFIG_TCP_MD5SIG
1249                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1250 #endif
1251
1252                 newnp->ipv6_mc_list = NULL;
1253                 newnp->ipv6_ac_list = NULL;
1254                 newnp->ipv6_fl_list = NULL;
1255                 newnp->pktoptions  = NULL;
1256                 newnp->opt         = NULL;
1257                 newnp->mcast_oif   = inet_iif(skb);
1258                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1259                 newnp->rcv_flowinfo = 0;
1260                 if (np->repflow)
1261                         newnp->flow_label = 0;
1262
1263                 /*
1264                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1265                  * here, tcp_create_openreq_child now does this for us, see the comment in
1266                  * that function for the gory details. -acme
1267                  */
1268
1269                 /* It is tricky place. Until this moment IPv4 tcp
1270                    worked with IPv6 icsk.icsk_af_ops.
1271                    Sync it now.
1272                  */
1273                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1274
1275                 return newsk;
1276         }
1277
1278         ireq = inet_rsk(req);
1279
1280         if (sk_acceptq_is_full(sk))
1281                 goto out_overflow;
1282
1283         if (!dst) {
1284                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1285                 if (!dst)
1286                         goto out;
1287         }
1288
1289         newsk = tcp_create_openreq_child(sk, req, skb);
1290         if (!newsk)
1291                 goto out_nonewsk;
1292
1293         /*
1294          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1295          * count here, tcp_create_openreq_child now does this for us, see the
1296          * comment in that function for the gory details. -acme
1297          */
1298
1299         newsk->sk_gso_type = SKB_GSO_TCPV6;
1300         ip6_dst_store(newsk, dst, NULL, NULL);
1301         inet6_sk_rx_dst_set(newsk, skb);
1302
1303         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1304
1305         newtp = tcp_sk(newsk);
1306         newinet = inet_sk(newsk);
1307         newnp = tcp_inet6_sk(newsk);
1308
1309         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1310
1311         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1312         newnp->saddr = ireq->ir_v6_loc_addr;
1313         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1314         newsk->sk_bound_dev_if = ireq->ir_iif;
1315
1316         /* Now IPv6 options...
1317
1318            First: no IPv4 options.
1319          */
1320         newinet->inet_opt = NULL;
1321         newnp->ipv6_mc_list = NULL;
1322         newnp->ipv6_ac_list = NULL;
1323         newnp->ipv6_fl_list = NULL;
1324
1325         /* Clone RX bits */
1326         newnp->rxopt.all = np->rxopt.all;
1327
1328         newnp->pktoptions = NULL;
1329         newnp->opt        = NULL;
1330         newnp->mcast_oif  = tcp_v6_iif(skb);
1331         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1332         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1333         if (np->repflow)
1334                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1335
1336         /* Set ToS of the new socket based upon the value of incoming SYN.
1337          * ECT bits are set later in tcp_init_transfer().
1338          */
1339         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1340                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1341
1342         /* Clone native IPv6 options from listening socket (if any)
1343
1344            Yes, keeping reference count would be much more clever,
1345            but we make one more one thing there: reattach optmem
1346            to newsk.
1347          */
1348         opt = ireq->ipv6_opt;
1349         if (!opt)
1350                 opt = rcu_dereference(np->opt);
1351         if (opt) {
1352                 opt = ipv6_dup_options(newsk, opt);
1353                 RCU_INIT_POINTER(newnp->opt, opt);
1354         }
1355         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1356         if (opt)
1357                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1358                                                     opt->opt_flen;
1359
1360         tcp_ca_openreq_child(newsk, dst);
1361
1362         tcp_sync_mss(newsk, dst_mtu(dst));
1363         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1364
1365         tcp_initialize_rcv_mss(newsk);
1366
1367         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1368         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1369
1370 #ifdef CONFIG_TCP_MD5SIG
1371         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1372
1373         /* Copy over the MD5 key from the original socket */
1374         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1375         if (key) {
1376                 /* We're using one, so create a matching key
1377                  * on the newsk structure. If we fail to get
1378                  * memory, then we end up not copying the key
1379                  * across. Shucks.
1380                  */
1381                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1382                                AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1383                                sk_gfp_mask(sk, GFP_ATOMIC));
1384         }
1385 #endif
1386
1387         if (__inet_inherit_port(sk, newsk) < 0) {
1388                 inet_csk_prepare_forced_close(newsk);
1389                 tcp_done(newsk);
1390                 goto out;
1391         }
1392         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1393                                        &found_dup_sk);
1394         if (*own_req) {
1395                 tcp_move_syn(newtp, req);
1396
1397                 /* Clone pktoptions received with SYN, if we own the req */
1398                 if (ireq->pktopts) {
1399                         newnp->pktoptions = skb_clone(ireq->pktopts,
1400                                                       sk_gfp_mask(sk, GFP_ATOMIC));
1401                         consume_skb(ireq->pktopts);
1402                         ireq->pktopts = NULL;
1403                         if (newnp->pktoptions) {
1404                                 tcp_v6_restore_cb(newnp->pktoptions);
1405                                 skb_set_owner_r(newnp->pktoptions, newsk);
1406                         }
1407                 }
1408         } else {
1409                 if (!req_unhash && found_dup_sk) {
1410                         /* This code path should only be executed in the
1411                          * syncookie case only
1412                          */
1413                         bh_unlock_sock(newsk);
1414                         sock_put(newsk);
1415                         newsk = NULL;
1416                 }
1417         }
1418
1419         return newsk;
1420
1421 out_overflow:
1422         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1423 out_nonewsk:
1424         dst_release(dst);
1425 out:
1426         tcp_listendrop(sk);
1427         return NULL;
1428 }
1429
1430 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1431                                                            u32));
1432 /* The socket must have it's spinlock held when we get
1433  * here, unless it is a TCP_LISTEN socket.
1434  *
1435  * We have a potential double-lock case here, so even when
1436  * doing backlog processing we use the BH locking scheme.
1437  * This is because we cannot sleep with the original spinlock
1438  * held.
1439  */
1440 INDIRECT_CALLABLE_SCOPE
1441 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1442 {
1443         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1444         struct sk_buff *opt_skb = NULL;
1445         enum skb_drop_reason reason;
1446         struct tcp_sock *tp;
1447
1448         /* Imagine: socket is IPv6. IPv4 packet arrives,
1449            goes to IPv4 receive handler and backlogged.
1450            From backlog it always goes here. Kerboom...
1451            Fortunately, tcp_rcv_established and rcv_established
1452            handle them correctly, but it is not case with
1453            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1454          */
1455
1456         if (skb->protocol == htons(ETH_P_IP))
1457                 return tcp_v4_do_rcv(sk, skb);
1458
1459         /*
1460          *      socket locking is here for SMP purposes as backlog rcv
1461          *      is currently called with bh processing disabled.
1462          */
1463
1464         /* Do Stevens' IPV6_PKTOPTIONS.
1465
1466            Yes, guys, it is the only place in our code, where we
1467            may make it not affecting IPv4.
1468            The rest of code is protocol independent,
1469            and I do not like idea to uglify IPv4.
1470
1471            Actually, all the idea behind IPV6_PKTOPTIONS
1472            looks not very well thought. For now we latch
1473            options, received in the last packet, enqueued
1474            by tcp. Feel free to propose better solution.
1475                                                --ANK (980728)
1476          */
1477         if (np->rxopt.all)
1478                 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1479
1480         reason = SKB_DROP_REASON_NOT_SPECIFIED;
1481         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1482                 struct dst_entry *dst;
1483
1484                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1485                                                 lockdep_sock_is_held(sk));
1486
1487                 sock_rps_save_rxhash(sk, skb);
1488                 sk_mark_napi_id(sk, skb);
1489                 if (dst) {
1490                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1491                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1492                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1493                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1494                                 dst_release(dst);
1495                         }
1496                 }
1497
1498                 tcp_rcv_established(sk, skb);
1499                 if (opt_skb)
1500                         goto ipv6_pktoptions;
1501                 return 0;
1502         }
1503
1504         if (tcp_checksum_complete(skb))
1505                 goto csum_err;
1506
1507         if (sk->sk_state == TCP_LISTEN) {
1508                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1509
1510                 if (!nsk)
1511                         goto discard;
1512
1513                 if (nsk != sk) {
1514                         if (tcp_child_process(sk, nsk, skb))
1515                                 goto reset;
1516                         if (opt_skb)
1517                                 __kfree_skb(opt_skb);
1518                         return 0;
1519                 }
1520         } else
1521                 sock_rps_save_rxhash(sk, skb);
1522
1523         if (tcp_rcv_state_process(sk, skb))
1524                 goto reset;
1525         if (opt_skb)
1526                 goto ipv6_pktoptions;
1527         return 0;
1528
1529 reset:
1530         tcp_v6_send_reset(sk, skb);
1531 discard:
1532         if (opt_skb)
1533                 __kfree_skb(opt_skb);
1534         kfree_skb_reason(skb, reason);
1535         return 0;
1536 csum_err:
1537         reason = SKB_DROP_REASON_TCP_CSUM;
1538         trace_tcp_bad_csum(skb);
1539         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1540         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1541         goto discard;
1542
1543
1544 ipv6_pktoptions:
1545         /* Do you ask, what is it?
1546
1547            1. skb was enqueued by tcp.
1548            2. skb is added to tail of read queue, rather than out of order.
1549            3. socket is not in passive state.
1550            4. Finally, it really contains options, which user wants to receive.
1551          */
1552         tp = tcp_sk(sk);
1553         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1554             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1555                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1556                         np->mcast_oif = tcp_v6_iif(opt_skb);
1557                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1558                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1559                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1560                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1561                 if (np->repflow)
1562                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1563                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1564                         skb_set_owner_r(opt_skb, sk);
1565                         tcp_v6_restore_cb(opt_skb);
1566                         opt_skb = xchg(&np->pktoptions, opt_skb);
1567                 } else {
1568                         __kfree_skb(opt_skb);
1569                         opt_skb = xchg(&np->pktoptions, NULL);
1570                 }
1571         }
1572
1573         consume_skb(opt_skb);
1574         return 0;
1575 }
1576
1577 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1578                            const struct tcphdr *th)
1579 {
1580         /* This is tricky: we move IP6CB at its correct location into
1581          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1582          * _decode_session6() uses IP6CB().
1583          * barrier() makes sure compiler won't play aliasing games.
1584          */
1585         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1586                 sizeof(struct inet6_skb_parm));
1587         barrier();
1588
1589         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1590         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1591                                     skb->len - th->doff*4);
1592         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1593         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1594         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1595         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1596         TCP_SKB_CB(skb)->sacked = 0;
1597         TCP_SKB_CB(skb)->has_rxtstamp =
1598                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1599 }
1600
1601 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1602 {
1603         enum skb_drop_reason drop_reason;
1604         int sdif = inet6_sdif(skb);
1605         int dif = inet6_iif(skb);
1606         const struct tcphdr *th;
1607         const struct ipv6hdr *hdr;
1608         bool refcounted;
1609         struct sock *sk;
1610         int ret;
1611         struct net *net = dev_net(skb->dev);
1612
1613         drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1614         if (skb->pkt_type != PACKET_HOST)
1615                 goto discard_it;
1616
1617         /*
1618          *      Count it even if it's bad.
1619          */
1620         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1621
1622         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1623                 goto discard_it;
1624
1625         th = (const struct tcphdr *)skb->data;
1626
1627         if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1628                 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1629                 goto bad_packet;
1630         }
1631         if (!pskb_may_pull(skb, th->doff*4))
1632                 goto discard_it;
1633
1634         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1635                 goto csum_error;
1636
1637         th = (const struct tcphdr *)skb->data;
1638         hdr = ipv6_hdr(skb);
1639
1640 lookup:
1641         sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1642                                 th->source, th->dest, inet6_iif(skb), sdif,
1643                                 &refcounted);
1644         if (!sk)
1645                 goto no_tcp_socket;
1646
1647 process:
1648         if (sk->sk_state == TCP_TIME_WAIT)
1649                 goto do_time_wait;
1650
1651         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1652                 struct request_sock *req = inet_reqsk(sk);
1653                 bool req_stolen = false;
1654                 struct sock *nsk;
1655
1656                 sk = req->rsk_listener;
1657                 drop_reason = tcp_inbound_md5_hash(sk, skb,
1658                                                    &hdr->saddr, &hdr->daddr,
1659                                                    AF_INET6, dif, sdif);
1660                 if (drop_reason) {
1661                         sk_drops_add(sk, skb);
1662                         reqsk_put(req);
1663                         goto discard_it;
1664                 }
1665                 if (tcp_checksum_complete(skb)) {
1666                         reqsk_put(req);
1667                         goto csum_error;
1668                 }
1669                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1670                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1671                         if (!nsk) {
1672                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1673                                 goto lookup;
1674                         }
1675                         sk = nsk;
1676                         /* reuseport_migrate_sock() has already held one sk_refcnt
1677                          * before returning.
1678                          */
1679                 } else {
1680                         sock_hold(sk);
1681                 }
1682                 refcounted = true;
1683                 nsk = NULL;
1684                 if (!tcp_filter(sk, skb)) {
1685                         th = (const struct tcphdr *)skb->data;
1686                         hdr = ipv6_hdr(skb);
1687                         tcp_v6_fill_cb(skb, hdr, th);
1688                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1689                 } else {
1690                         drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1691                 }
1692                 if (!nsk) {
1693                         reqsk_put(req);
1694                         if (req_stolen) {
1695                                 /* Another cpu got exclusive access to req
1696                                  * and created a full blown socket.
1697                                  * Try to feed this packet to this socket
1698                                  * instead of discarding it.
1699                                  */
1700                                 tcp_v6_restore_cb(skb);
1701                                 sock_put(sk);
1702                                 goto lookup;
1703                         }
1704                         goto discard_and_relse;
1705                 }
1706                 if (nsk == sk) {
1707                         reqsk_put(req);
1708                         tcp_v6_restore_cb(skb);
1709                 } else if (tcp_child_process(sk, nsk, skb)) {
1710                         tcp_v6_send_reset(nsk, skb);
1711                         goto discard_and_relse;
1712                 } else {
1713                         sock_put(sk);
1714                         return 0;
1715                 }
1716         }
1717
1718         if (static_branch_unlikely(&ip6_min_hopcount)) {
1719                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1720                 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1721                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1722                         goto discard_and_relse;
1723                 }
1724         }
1725
1726         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1727                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1728                 goto discard_and_relse;
1729         }
1730
1731         drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1732                                            AF_INET6, dif, sdif);
1733         if (drop_reason)
1734                 goto discard_and_relse;
1735
1736         if (tcp_filter(sk, skb)) {
1737                 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1738                 goto discard_and_relse;
1739         }
1740         th = (const struct tcphdr *)skb->data;
1741         hdr = ipv6_hdr(skb);
1742         tcp_v6_fill_cb(skb, hdr, th);
1743
1744         skb->dev = NULL;
1745
1746         if (sk->sk_state == TCP_LISTEN) {
1747                 ret = tcp_v6_do_rcv(sk, skb);
1748                 goto put_and_return;
1749         }
1750
1751         sk_incoming_cpu_update(sk);
1752
1753         bh_lock_sock_nested(sk);
1754         tcp_segs_in(tcp_sk(sk), skb);
1755         ret = 0;
1756         if (!sock_owned_by_user(sk)) {
1757                 ret = tcp_v6_do_rcv(sk, skb);
1758         } else {
1759                 if (tcp_add_backlog(sk, skb, &drop_reason))
1760                         goto discard_and_relse;
1761         }
1762         bh_unlock_sock(sk);
1763 put_and_return:
1764         if (refcounted)
1765                 sock_put(sk);
1766         return ret ? -1 : 0;
1767
1768 no_tcp_socket:
1769         drop_reason = SKB_DROP_REASON_NO_SOCKET;
1770         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1771                 goto discard_it;
1772
1773         tcp_v6_fill_cb(skb, hdr, th);
1774
1775         if (tcp_checksum_complete(skb)) {
1776 csum_error:
1777                 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1778                 trace_tcp_bad_csum(skb);
1779                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1780 bad_packet:
1781                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1782         } else {
1783                 tcp_v6_send_reset(NULL, skb);
1784         }
1785
1786 discard_it:
1787         SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1788         kfree_skb_reason(skb, drop_reason);
1789         return 0;
1790
1791 discard_and_relse:
1792         sk_drops_add(sk, skb);
1793         if (refcounted)
1794                 sock_put(sk);
1795         goto discard_it;
1796
1797 do_time_wait:
1798         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1799                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1800                 inet_twsk_put(inet_twsk(sk));
1801                 goto discard_it;
1802         }
1803
1804         tcp_v6_fill_cb(skb, hdr, th);
1805
1806         if (tcp_checksum_complete(skb)) {
1807                 inet_twsk_put(inet_twsk(sk));
1808                 goto csum_error;
1809         }
1810
1811         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1812         case TCP_TW_SYN:
1813         {
1814                 struct sock *sk2;
1815
1816                 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1817                                             skb, __tcp_hdrlen(th),
1818                                             &ipv6_hdr(skb)->saddr, th->source,
1819                                             &ipv6_hdr(skb)->daddr,
1820                                             ntohs(th->dest),
1821                                             tcp_v6_iif_l3_slave(skb),
1822                                             sdif);
1823                 if (sk2) {
1824                         struct inet_timewait_sock *tw = inet_twsk(sk);
1825                         inet_twsk_deschedule_put(tw);
1826                         sk = sk2;
1827                         tcp_v6_restore_cb(skb);
1828                         refcounted = false;
1829                         goto process;
1830                 }
1831         }
1832                 /* to ACK */
1833                 fallthrough;
1834         case TCP_TW_ACK:
1835                 tcp_v6_timewait_ack(sk, skb);
1836                 break;
1837         case TCP_TW_RST:
1838                 tcp_v6_send_reset(sk, skb);
1839                 inet_twsk_deschedule_put(inet_twsk(sk));
1840                 goto discard_it;
1841         case TCP_TW_SUCCESS:
1842                 ;
1843         }
1844         goto discard_it;
1845 }
1846
1847 void tcp_v6_early_demux(struct sk_buff *skb)
1848 {
1849         const struct ipv6hdr *hdr;
1850         const struct tcphdr *th;
1851         struct sock *sk;
1852
1853         if (skb->pkt_type != PACKET_HOST)
1854                 return;
1855
1856         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1857                 return;
1858
1859         hdr = ipv6_hdr(skb);
1860         th = tcp_hdr(skb);
1861
1862         if (th->doff < sizeof(struct tcphdr) / 4)
1863                 return;
1864
1865         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1866         sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1867                                         &hdr->saddr, th->source,
1868                                         &hdr->daddr, ntohs(th->dest),
1869                                         inet6_iif(skb), inet6_sdif(skb));
1870         if (sk) {
1871                 skb->sk = sk;
1872                 skb->destructor = sock_edemux;
1873                 if (sk_fullsock(sk)) {
1874                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1875
1876                         if (dst)
1877                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1878                         if (dst &&
1879                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1880                                 skb_dst_set_noref(skb, dst);
1881                 }
1882         }
1883 }
1884
1885 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1886         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1887         .twsk_unique    = tcp_twsk_unique,
1888         .twsk_destructor = tcp_twsk_destructor,
1889 };
1890
1891 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1892 {
1893         __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1894 }
1895
1896 const struct inet_connection_sock_af_ops ipv6_specific = {
1897         .queue_xmit        = inet6_csk_xmit,
1898         .send_check        = tcp_v6_send_check,
1899         .rebuild_header    = inet6_sk_rebuild_header,
1900         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1901         .conn_request      = tcp_v6_conn_request,
1902         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1903         .net_header_len    = sizeof(struct ipv6hdr),
1904         .net_frag_header_len = sizeof(struct frag_hdr),
1905         .setsockopt        = ipv6_setsockopt,
1906         .getsockopt        = ipv6_getsockopt,
1907         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1908         .sockaddr_len      = sizeof(struct sockaddr_in6),
1909         .mtu_reduced       = tcp_v6_mtu_reduced,
1910 };
1911
1912 #ifdef CONFIG_TCP_MD5SIG
1913 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1914         .md5_lookup     =       tcp_v6_md5_lookup,
1915         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1916         .md5_parse      =       tcp_v6_parse_md5_keys,
1917 };
1918 #endif
1919
1920 /*
1921  *      TCP over IPv4 via INET6 API
1922  */
1923 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1924         .queue_xmit        = ip_queue_xmit,
1925         .send_check        = tcp_v4_send_check,
1926         .rebuild_header    = inet_sk_rebuild_header,
1927         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1928         .conn_request      = tcp_v6_conn_request,
1929         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1930         .net_header_len    = sizeof(struct iphdr),
1931         .setsockopt        = ipv6_setsockopt,
1932         .getsockopt        = ipv6_getsockopt,
1933         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1934         .sockaddr_len      = sizeof(struct sockaddr_in6),
1935         .mtu_reduced       = tcp_v4_mtu_reduced,
1936 };
1937
1938 #ifdef CONFIG_TCP_MD5SIG
1939 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1940         .md5_lookup     =       tcp_v4_md5_lookup,
1941         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1942         .md5_parse      =       tcp_v6_parse_md5_keys,
1943 };
1944 #endif
1945
1946 /* NOTE: A lot of things set to zero explicitly by call to
1947  *       sk_alloc() so need not be done here.
1948  */
1949 static int tcp_v6_init_sock(struct sock *sk)
1950 {
1951         struct inet_connection_sock *icsk = inet_csk(sk);
1952
1953         tcp_init_sock(sk);
1954
1955         icsk->icsk_af_ops = &ipv6_specific;
1956
1957 #ifdef CONFIG_TCP_MD5SIG
1958         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1959 #endif
1960
1961         return 0;
1962 }
1963
1964 static void tcp_v6_destroy_sock(struct sock *sk)
1965 {
1966         tcp_v4_destroy_sock(sk);
1967         inet6_destroy_sock(sk);
1968 }
1969
1970 #ifdef CONFIG_PROC_FS
1971 /* Proc filesystem TCPv6 sock list dumping. */
1972 static void get_openreq6(struct seq_file *seq,
1973                          const struct request_sock *req, int i)
1974 {
1975         long ttd = req->rsk_timer.expires - jiffies;
1976         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1977         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1978
1979         if (ttd < 0)
1980                 ttd = 0;
1981
1982         seq_printf(seq,
1983                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1984                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1985                    i,
1986                    src->s6_addr32[0], src->s6_addr32[1],
1987                    src->s6_addr32[2], src->s6_addr32[3],
1988                    inet_rsk(req)->ir_num,
1989                    dest->s6_addr32[0], dest->s6_addr32[1],
1990                    dest->s6_addr32[2], dest->s6_addr32[3],
1991                    ntohs(inet_rsk(req)->ir_rmt_port),
1992                    TCP_SYN_RECV,
1993                    0, 0, /* could print option size, but that is af dependent. */
1994                    1,   /* timers active (only the expire timer) */
1995                    jiffies_to_clock_t(ttd),
1996                    req->num_timeout,
1997                    from_kuid_munged(seq_user_ns(seq),
1998                                     sock_i_uid(req->rsk_listener)),
1999                    0,  /* non standard timer */
2000                    0, /* open_requests have no inode */
2001                    0, req);
2002 }
2003
2004 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2005 {
2006         const struct in6_addr *dest, *src;
2007         __u16 destp, srcp;
2008         int timer_active;
2009         unsigned long timer_expires;
2010         const struct inet_sock *inet = inet_sk(sp);
2011         const struct tcp_sock *tp = tcp_sk(sp);
2012         const struct inet_connection_sock *icsk = inet_csk(sp);
2013         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2014         int rx_queue;
2015         int state;
2016
2017         dest  = &sp->sk_v6_daddr;
2018         src   = &sp->sk_v6_rcv_saddr;
2019         destp = ntohs(inet->inet_dport);
2020         srcp  = ntohs(inet->inet_sport);
2021
2022         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2023             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2024             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2025                 timer_active    = 1;
2026                 timer_expires   = icsk->icsk_timeout;
2027         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2028                 timer_active    = 4;
2029                 timer_expires   = icsk->icsk_timeout;
2030         } else if (timer_pending(&sp->sk_timer)) {
2031                 timer_active    = 2;
2032                 timer_expires   = sp->sk_timer.expires;
2033         } else {
2034                 timer_active    = 0;
2035                 timer_expires = jiffies;
2036         }
2037
2038         state = inet_sk_state_load(sp);
2039         if (state == TCP_LISTEN)
2040                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2041         else
2042                 /* Because we don't lock the socket,
2043                  * we might find a transient negative value.
2044                  */
2045                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2046                                       READ_ONCE(tp->copied_seq), 0);
2047
2048         seq_printf(seq,
2049                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2050                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2051                    i,
2052                    src->s6_addr32[0], src->s6_addr32[1],
2053                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2054                    dest->s6_addr32[0], dest->s6_addr32[1],
2055                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2056                    state,
2057                    READ_ONCE(tp->write_seq) - tp->snd_una,
2058                    rx_queue,
2059                    timer_active,
2060                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2061                    icsk->icsk_retransmits,
2062                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2063                    icsk->icsk_probes_out,
2064                    sock_i_ino(sp),
2065                    refcount_read(&sp->sk_refcnt), sp,
2066                    jiffies_to_clock_t(icsk->icsk_rto),
2067                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2068                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2069                    tcp_snd_cwnd(tp),
2070                    state == TCP_LISTEN ?
2071                         fastopenq->max_qlen :
2072                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2073                    );
2074 }
2075
2076 static void get_timewait6_sock(struct seq_file *seq,
2077                                struct inet_timewait_sock *tw, int i)
2078 {
2079         long delta = tw->tw_timer.expires - jiffies;
2080         const struct in6_addr *dest, *src;
2081         __u16 destp, srcp;
2082
2083         dest = &tw->tw_v6_daddr;
2084         src  = &tw->tw_v6_rcv_saddr;
2085         destp = ntohs(tw->tw_dport);
2086         srcp  = ntohs(tw->tw_sport);
2087
2088         seq_printf(seq,
2089                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2090                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2091                    i,
2092                    src->s6_addr32[0], src->s6_addr32[1],
2093                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2094                    dest->s6_addr32[0], dest->s6_addr32[1],
2095                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2096                    tw->tw_substate, 0, 0,
2097                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2098                    refcount_read(&tw->tw_refcnt), tw);
2099 }
2100
2101 static int tcp6_seq_show(struct seq_file *seq, void *v)
2102 {
2103         struct tcp_iter_state *st;
2104         struct sock *sk = v;
2105
2106         if (v == SEQ_START_TOKEN) {
2107                 seq_puts(seq,
2108                          "  sl  "
2109                          "local_address                         "
2110                          "remote_address                        "
2111                          "st tx_queue rx_queue tr tm->when retrnsmt"
2112                          "   uid  timeout inode\n");
2113                 goto out;
2114         }
2115         st = seq->private;
2116
2117         if (sk->sk_state == TCP_TIME_WAIT)
2118                 get_timewait6_sock(seq, v, st->num);
2119         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2120                 get_openreq6(seq, v, st->num);
2121         else
2122                 get_tcp6_sock(seq, v, st->num);
2123 out:
2124         return 0;
2125 }
2126
2127 static const struct seq_operations tcp6_seq_ops = {
2128         .show           = tcp6_seq_show,
2129         .start          = tcp_seq_start,
2130         .next           = tcp_seq_next,
2131         .stop           = tcp_seq_stop,
2132 };
2133
2134 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2135         .family         = AF_INET6,
2136 };
2137
2138 int __net_init tcp6_proc_init(struct net *net)
2139 {
2140         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2141                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2142                 return -ENOMEM;
2143         return 0;
2144 }
2145
2146 void tcp6_proc_exit(struct net *net)
2147 {
2148         remove_proc_entry("tcp6", net->proc_net);
2149 }
2150 #endif
2151
2152 struct proto tcpv6_prot = {
2153         .name                   = "TCPv6",
2154         .owner                  = THIS_MODULE,
2155         .close                  = tcp_close,
2156         .pre_connect            = tcp_v6_pre_connect,
2157         .connect                = tcp_v6_connect,
2158         .disconnect             = tcp_disconnect,
2159         .accept                 = inet_csk_accept,
2160         .ioctl                  = tcp_ioctl,
2161         .init                   = tcp_v6_init_sock,
2162         .destroy                = tcp_v6_destroy_sock,
2163         .shutdown               = tcp_shutdown,
2164         .setsockopt             = tcp_setsockopt,
2165         .getsockopt             = tcp_getsockopt,
2166         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2167         .keepalive              = tcp_set_keepalive,
2168         .recvmsg                = tcp_recvmsg,
2169         .sendmsg                = tcp_sendmsg,
2170         .sendpage               = tcp_sendpage,
2171         .backlog_rcv            = tcp_v6_do_rcv,
2172         .release_cb             = tcp_release_cb,
2173         .hash                   = inet6_hash,
2174         .unhash                 = inet_unhash,
2175         .get_port               = inet_csk_get_port,
2176         .put_port               = inet_put_port,
2177 #ifdef CONFIG_BPF_SYSCALL
2178         .psock_update_sk_prot   = tcp_bpf_update_proto,
2179 #endif
2180         .enter_memory_pressure  = tcp_enter_memory_pressure,
2181         .leave_memory_pressure  = tcp_leave_memory_pressure,
2182         .stream_memory_free     = tcp_stream_memory_free,
2183         .sockets_allocated      = &tcp_sockets_allocated,
2184
2185         .memory_allocated       = &tcp_memory_allocated,
2186         .per_cpu_fw_alloc       = &tcp_memory_per_cpu_fw_alloc,
2187
2188         .memory_pressure        = &tcp_memory_pressure,
2189         .orphan_count           = &tcp_orphan_count,
2190         .sysctl_mem             = sysctl_tcp_mem,
2191         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2192         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2193         .max_header             = MAX_TCP_HEADER,
2194         .obj_size               = sizeof(struct tcp6_sock),
2195         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2196         .twsk_prot              = &tcp6_timewait_sock_ops,
2197         .rsk_prot               = &tcp6_request_sock_ops,
2198         .h.hashinfo             = &tcp_hashinfo,
2199         .no_autobind            = true,
2200         .diag_destroy           = tcp_abort,
2201 };
2202 EXPORT_SYMBOL_GPL(tcpv6_prot);
2203
2204 static const struct inet6_protocol tcpv6_protocol = {
2205         .handler        =       tcp_v6_rcv,
2206         .err_handler    =       tcp_v6_err,
2207         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2208 };
2209
2210 static struct inet_protosw tcpv6_protosw = {
2211         .type           =       SOCK_STREAM,
2212         .protocol       =       IPPROTO_TCP,
2213         .prot           =       &tcpv6_prot,
2214         .ops            =       &inet6_stream_ops,
2215         .flags          =       INET_PROTOSW_PERMANENT |
2216                                 INET_PROTOSW_ICSK,
2217 };
2218
2219 static int __net_init tcpv6_net_init(struct net *net)
2220 {
2221         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2222                                     SOCK_RAW, IPPROTO_TCP, net);
2223 }
2224
2225 static void __net_exit tcpv6_net_exit(struct net *net)
2226 {
2227         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2228 }
2229
2230 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2231 {
2232         inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2233 }
2234
2235 static struct pernet_operations tcpv6_net_ops = {
2236         .init       = tcpv6_net_init,
2237         .exit       = tcpv6_net_exit,
2238         .exit_batch = tcpv6_net_exit_batch,
2239 };
2240
2241 int __init tcpv6_init(void)
2242 {
2243         int ret;
2244
2245         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2246         if (ret)
2247                 goto out;
2248
2249         /* register inet6 protocol */
2250         ret = inet6_register_protosw(&tcpv6_protosw);
2251         if (ret)
2252                 goto out_tcpv6_protocol;
2253
2254         ret = register_pernet_subsys(&tcpv6_net_ops);
2255         if (ret)
2256                 goto out_tcpv6_protosw;
2257
2258         ret = mptcpv6_init();
2259         if (ret)
2260                 goto out_tcpv6_pernet_subsys;
2261
2262 out:
2263         return ret;
2264
2265 out_tcpv6_pernet_subsys:
2266         unregister_pernet_subsys(&tcpv6_net_ops);
2267 out_tcpv6_protosw:
2268         inet6_unregister_protosw(&tcpv6_protosw);
2269 out_tcpv6_protocol:
2270         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2271         goto out;
2272 }
2273
2274 void tcpv6_exit(void)
2275 {
2276         unregister_pernet_subsys(&tcpv6_net_ops);
2277         inet6_unregister_protosw(&tcpv6_protosw);
2278         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2279 }
This page took 0.158329 seconds and 4 git commands to generate.