]> Git Repo - linux.git/blobdiff - net/ipv4/tcp_input.c
perf tools: Fix maps__find_symbol_by_name()
[linux.git] / net / ipv4 / tcp_input.c
index 8e5522c6833ad5b01c32590f62e2b20131869327..4c2dd9f863f76eb6e55202b19e1acebf4fbac887 100644 (file)
@@ -78,6 +78,7 @@
 #include <linux/errqueue.h>
 #include <trace/events/tcp.h>
 #include <linux/static_key.h>
+#include <net/busy_poll.h>
 
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 
@@ -215,7 +216,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
                icsk->icsk_ack.quick = quickacks;
 }
 
-static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
+void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
 
@@ -223,6 +224,7 @@ static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
        icsk->icsk_ack.pingpong = 0;
        icsk->icsk_ack.ato = TCP_ATO_MIN;
 }
+EXPORT_SYMBOL(tcp_enter_quickack_mode);
 
 /* Send ACKs quickly, if "quick" count is not exhausted
  * and the session is not interactive.
@@ -243,10 +245,17 @@ static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
                tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
 }
 
-static void tcp_ecn_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
+static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb)
 {
-       if (tcp_hdr(skb)->cwr)
-               tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+       if (tcp_hdr(skb)->cwr) {
+               tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+
+               /* If the sender is telling us it has entered CWR, then its
+                * cwnd may be very low (even just 1 packet), so we should ACK
+                * immediately.
+                */
+               inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
+       }
 }
 
 static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
@@ -582,9 +591,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
 {
        struct tcp_sock *tp = tcp_sk(sk);
 
-       if (tp->rx_opt.rcv_tsecr &&
-           (TCP_SKB_CB(skb)->end_seq -
-            TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
+       if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr)
+               return;
+       tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
+
+       if (TCP_SKB_CB(skb)->end_seq -
+           TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
                u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
                u32 delta_us;
 
@@ -869,6 +881,7 @@ static void tcp_dsack_seen(struct tcp_sock *tp)
 {
        tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
        tp->rack.dsack_seen = 1;
+       tp->dsack_dups++;
 }
 
 /* It's reordering when higher sequence was delivered (i.e. sacked) before
@@ -900,8 +913,8 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
                                       sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
        }
 
-       tp->rack.reord = 1;
        /* This exciting event is worth to be remembered. 8) */
+       tp->reord_seen++;
        NET_INC_STATS(sock_net(sk),
                      ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
 }
@@ -1865,6 +1878,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
 
        tp->reordering = min_t(u32, tp->packets_out + addend,
                               sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+       tp->reord_seen++;
        NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
 }
 
@@ -3458,7 +3472,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
 static void tcp_store_ts_recent(struct tcp_sock *tp)
 {
        tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
-       tp->rx_opt.ts_recent_stamp = get_seconds();
+       tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
 }
 
 static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
@@ -4339,6 +4353,11 @@ static bool tcp_try_coalesce(struct sock *sk,
        if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
                return false;
 
+#ifdef CONFIG_TLS_DEVICE
+       if (from->decrypted != to->decrypted)
+               return false;
+#endif
+
        if (!skb_try_coalesce(to, from, fragstolen, &delta))
                return false;
 
@@ -4357,6 +4376,23 @@ static bool tcp_try_coalesce(struct sock *sk,
        return true;
 }
 
+static bool tcp_ooo_try_coalesce(struct sock *sk,
+                            struct sk_buff *to,
+                            struct sk_buff *from,
+                            bool *fragstolen)
+{
+       bool res = tcp_try_coalesce(sk, to, from, fragstolen);
+
+       /* In case tcp_drop() is called later, update to->gso_segs */
+       if (res) {
+               u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
+                              max_t(u16, 1, skb_shinfo(from)->gso_segs);
+
+               skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
+       }
+       return res;
+}
+
 static void tcp_drop(struct sock *sk, struct sk_buff *skb)
 {
        sk_drops_add(sk, skb);
@@ -4480,8 +4516,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
        /* In the typical case, we are adding an skb to the end of the list.
         * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
         */
-       if (tcp_try_coalesce(sk, tp->ooo_last_skb,
-                            skb, &fragstolen)) {
+       if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
+                                skb, &fragstolen)) {
 coalesce_done:
                tcp_grow_window(sk, skb);
                kfree_skb_partial(skb, fragstolen);
@@ -4509,7 +4545,7 @@ coalesce_done:
                                /* All the bits are present. Drop. */
                                NET_INC_STATS(sock_net(sk),
                                              LINUX_MIB_TCPOFOMERGE);
-                               __kfree_skb(skb);
+                               tcp_drop(sk, skb);
                                skb = NULL;
                                tcp_dsack_set(sk, seq, end_seq);
                                goto add_sack;
@@ -4528,11 +4564,11 @@ coalesce_done:
                                                 TCP_SKB_CB(skb1)->end_seq);
                                NET_INC_STATS(sock_net(sk),
                                              LINUX_MIB_TCPOFOMERGE);
-                               __kfree_skb(skb1);
+                               tcp_drop(sk, skb1);
                                goto merge_right;
                        }
-               } else if (tcp_try_coalesce(sk, skb1,
-                                           skb, &fragstolen)) {
+               } else if (tcp_ooo_try_coalesce(sk, skb1,
+                                               skb, &fragstolen)) {
                        goto coalesce_done;
                }
                p = &parent->rb_right;
@@ -4617,8 +4653,10 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
        skb->data_len = data_len;
        skb->len = size;
 
-       if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
+       if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
+               NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
                goto err_free;
+       }
 
        err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
        if (err)
@@ -4665,7 +4703,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
        skb_dst_drop(skb);
        __skb_pull(skb, tcp_hdr(skb)->doff * 4);
 
-       tcp_ecn_accept_cwr(tp, skb);
+       tcp_ecn_accept_cwr(sk, skb);
 
        tp->rx_opt.dsack = 0;
 
@@ -4674,18 +4712,21 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
         *  Out of sequence packets to the out_of_order_queue.
         */
        if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
-               if (tcp_receive_window(tp) == 0)
+               if (tcp_receive_window(tp) == 0) {
+                       NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
                        goto out_of_window;
+               }
 
                /* Ok. In sequence. In window. */
 queue_and_out:
                if (skb_queue_len(&sk->sk_receive_queue) == 0)
                        sk_forced_mem_schedule(sk, skb->truesize);
-               else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
+               else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
+                       NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
                        goto drop;
+               }
 
                eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
-               tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
                if (skb->len)
                        tcp_event_data_recv(sk, skb);
                if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -4694,11 +4735,11 @@ queue_and_out:
                if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
                        tcp_ofo_queue(sk);
 
-                       /* RFC2581. 4.2. SHOULD send immediate ACK, when
+                       /* RFC5681. 4.2. SHOULD send immediate ACK, when
                         * gap in queue is filled.
                         */
                        if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
-                               inet_csk(sk)->icsk_ack.pingpong = 0;
+                               inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
                }
 
                if (tp->rx_opt.num_sacks)
@@ -4741,8 +4782,10 @@ drop:
                /* If window is closed, drop tail of packet. But after
                 * remembering D-SACK for its head made in previous line.
                 */
-               if (!tcp_receive_window(tp))
+               if (!tcp_receive_window(tp)) {
+                       NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
                        goto out_of_window;
+               }
                goto queue_and_out;
        }
 
@@ -4860,6 +4903,9 @@ restart:
                        break;
 
                memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
+#ifdef CONFIG_TLS_DEVICE
+               nskb->decrypted = skb->decrypted;
+#endif
                TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
                if (list)
                        __skb_queue_before(list, skb, nskb);
@@ -4887,6 +4933,10 @@ restart:
                                    skb == tail ||
                                    (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
                                        goto end;
+#ifdef CONFIG_TLS_DEVICE
+                               if (skb->decrypted != nskb->decrypted)
+                                       goto end;
+#endif
                        }
                }
        }
@@ -4901,6 +4951,7 @@ end:
 static void tcp_collapse_ofo_queue(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
+       u32 range_truesize, sum_tiny = 0;
        struct sk_buff *skb, *head;
        u32 start, end;
 
@@ -4912,6 +4963,7 @@ new_range:
        }
        start = TCP_SKB_CB(skb)->seq;
        end = TCP_SKB_CB(skb)->end_seq;
+       range_truesize = skb->truesize;
 
        for (head = skb;;) {
                skb = skb_rb_next(skb);
@@ -4922,11 +4974,20 @@ new_range:
                if (!skb ||
                    after(TCP_SKB_CB(skb)->seq, end) ||
                    before(TCP_SKB_CB(skb)->end_seq, start)) {
-                       tcp_collapse(sk, NULL, &tp->out_of_order_queue,
-                                    head, skb, start, end);
+                       /* Do not attempt collapsing tiny skbs */
+                       if (range_truesize != head->truesize ||
+                           end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
+                               tcp_collapse(sk, NULL, &tp->out_of_order_queue,
+                                            head, skb, start, end);
+                       } else {
+                               sum_tiny += range_truesize;
+                               if (sum_tiny > sk->sk_rcvbuf >> 3)
+                                       return;
+                       }
                        goto new_range;
                }
 
+               range_truesize += skb->truesize;
                if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
                        start = TCP_SKB_CB(skb)->seq;
                if (after(TCP_SKB_CB(skb)->end_seq, end))
@@ -4941,6 +5002,7 @@ new_range:
  * 2) not add too big latencies if thousands of packets sit there.
  *    (But if application shrinks SO_RCVBUF, we could still end up
  *     freeing whole queue here)
+ * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks.
  *
  * Return true if queue has shrunk.
  */
@@ -4948,20 +5010,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct rb_node *node, *prev;
+       int goal;
 
        if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
                return false;
 
        NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
+       goal = sk->sk_rcvbuf >> 3;
        node = &tp->ooo_last_skb->rbnode;
        do {
                prev = rb_prev(node);
                rb_erase(node, &tp->out_of_order_queue);
+               goal -= rb_to_skb(node)->truesize;
                tcp_drop(sk, rb_to_skb(node));
-               sk_mem_reclaim(sk);
-               if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
-                   !tcp_under_memory_pressure(sk))
-                       break;
+               if (!prev || goal <= 0) {
+                       sk_mem_reclaim(sk);
+                       if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+                           !tcp_under_memory_pressure(sk))
+                               break;
+                       goal = sk->sk_rcvbuf >> 3;
+               }
                node = prev;
        } while (node);
        tp->ooo_last_skb = rb_to_skb(prev);
@@ -4996,6 +5064,9 @@ static int tcp_prune_queue(struct sock *sk)
        else if (tcp_under_memory_pressure(sk))
                tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
 
+       if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
+               return 0;
+
        tcp_collapse_ofo_queue(sk);
        if (!skb_queue_empty(&sk->sk_receive_queue))
                tcp_collapse(sk, &sk->sk_receive_queue, NULL,
@@ -5108,7 +5179,9 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
            (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
             __tcp_select_window(sk) >= tp->rcv_wnd)) ||
            /* We ACK each frame or... */
-           tcp_in_quickack_mode(sk)) {
+           tcp_in_quickack_mode(sk) ||
+           /* Protocol state mandates a one-time immediate ACK */
+           inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) {
 send_now:
                tcp_send_ack(sk);
                return;
@@ -5484,6 +5557,11 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
                                tcp_ack(sk, skb, 0);
                                __kfree_skb(skb);
                                tcp_data_snd_check(sk);
+                               /* When receiving pure ack in fast path, update
+                                * last ts ecr directly instead of calling
+                                * tcp_rcv_rtt_measure_ts()
+                                */
+                               tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
                                return;
                        } else { /* Header too small */
                                TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
@@ -5585,6 +5663,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
        if (skb) {
                icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
                security_inet_conn_established(sk, skb);
+               sk_mark_napi_id(sk, skb);
        }
 
        tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
@@ -6413,6 +6492,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
        tcp_rsk(req)->snt_isn = isn;
        tcp_rsk(req)->txhash = net_tx_rndhash();
        tcp_openreq_init_rwin(req, sk, dst);
+       sk_rx_queue_set(req_to_sk(req), skb);
        if (!want_cookie) {
                tcp_reqsk_record_syn(sk, req, skb);
                fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
This page took 0.047622 seconds and 4 git commands to generate.