]> Git Repo - linux.git/commitdiff
skmsg: Avoid lock_sock() in sk_psock_backlog()
authorCong Wang <[email protected]>
Wed, 31 Mar 2021 02:32:25 +0000 (19:32 -0700)
committerAlexei Starovoitov <[email protected]>
Thu, 1 Apr 2021 17:56:13 +0000 (10:56 -0700)
We do not have to lock the sock to avoid losing sk_socket,
instead we can purge all the ingress queues when we close
the socket. Sending or receiving packets after orphaning
socket makes no sense.

We do purge these queues when psock refcnt reaches zero but
here we want to purge them explicitly in sock_map_close().
There are also some nasty race conditions on testing bit
SK_PSOCK_TX_ENABLED and queuing/canceling the psock work,
we can expand psock->ingress_lock a bit to protect them too.

As noticed by John, we still have to lock the psock->work,
because the same work item could be running concurrently on
different CPU's.

Signed-off-by: Cong Wang <[email protected]>
Signed-off-by: Alexei Starovoitov <[email protected]>
Acked-by: John Fastabend <[email protected]>
Link: https://lore.kernel.org/bpf/[email protected]
include/linux/skmsg.h
net/core/skmsg.c
net/core/sock_map.c

index f2d45a73b2b25b9970f3c8d6c65d62e4ea5f6e20..7382c4b518d75c5d54142fa59f49380a02ba39ca 100644 (file)
@@ -99,6 +99,7 @@ struct sk_psock {
        void (*saved_write_space)(struct sock *sk);
        void (*saved_data_ready)(struct sock *sk);
        struct proto                    *sk_proto;
+       struct mutex                    work_mutex;
        struct sk_psock_work_state      work_state;
        struct work_struct              work;
        union {
@@ -347,6 +348,7 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)
 }
 
 struct sk_psock *sk_psock_init(struct sock *sk, int node);
+void sk_psock_stop(struct sk_psock *psock, bool wait);
 
 #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
 int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
index 305dddc5185738ab1c0af4ce9c11e3fbe1f2b4cf..9c25020086a9c1ece2aa1a933655fe51e6f01d4d 100644 (file)
@@ -497,7 +497,7 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
        if (!ingress) {
                if (!sock_writeable(psock->sk))
                        return -EAGAIN;
-               return skb_send_sock_locked(psock->sk, skb, off, len);
+               return skb_send_sock(psock->sk, skb, off, len);
        }
        return sk_psock_skb_ingress(psock, skb);
 }
@@ -511,8 +511,7 @@ static void sk_psock_backlog(struct work_struct *work)
        u32 len, off;
        int ret;
 
-       /* Lock sock to avoid losing sk_socket during loop. */
-       lock_sock(psock->sk);
+       mutex_lock(&psock->work_mutex);
        if (state->skb) {
                skb = state->skb;
                len = state->len;
@@ -529,7 +528,7 @@ start:
                skb_bpf_redirect_clear(skb);
                do {
                        ret = -EIO;
-                       if (likely(psock->sk->sk_socket))
+                       if (!sock_flag(psock->sk, SOCK_DEAD))
                                ret = sk_psock_handle_skb(psock, skb, off,
                                                          len, ingress);
                        if (ret <= 0) {
@@ -553,7 +552,7 @@ start:
                        kfree_skb(skb);
        }
 end:
-       release_sock(psock->sk);
+       mutex_unlock(&psock->work_mutex);
 }
 
 struct sk_psock *sk_psock_init(struct sock *sk, int node)
@@ -591,6 +590,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
        spin_lock_init(&psock->link_lock);
 
        INIT_WORK(&psock->work, sk_psock_backlog);
+       mutex_init(&psock->work_mutex);
        INIT_LIST_HEAD(&psock->ingress_msg);
        spin_lock_init(&psock->ingress_lock);
        skb_queue_head_init(&psock->ingress_skb);
@@ -631,7 +631,7 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
        }
 }
 
-static void sk_psock_zap_ingress(struct sk_psock *psock)
+static void __sk_psock_zap_ingress(struct sk_psock *psock)
 {
        struct sk_buff *skb;
 
@@ -639,9 +639,7 @@ static void sk_psock_zap_ingress(struct sk_psock *psock)
                skb_bpf_redirect_clear(skb);
                kfree_skb(skb);
        }
-       spin_lock_bh(&psock->ingress_lock);
        __sk_psock_purge_ingress_msg(psock);
-       spin_unlock_bh(&psock->ingress_lock);
 }
 
 static void sk_psock_link_destroy(struct sk_psock *psock)
@@ -654,6 +652,18 @@ static void sk_psock_link_destroy(struct sk_psock *psock)
        }
 }
 
+void sk_psock_stop(struct sk_psock *psock, bool wait)
+{
+       spin_lock_bh(&psock->ingress_lock);
+       sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
+       sk_psock_cork_free(psock);
+       __sk_psock_zap_ingress(psock);
+       spin_unlock_bh(&psock->ingress_lock);
+
+       if (wait)
+               cancel_work_sync(&psock->work);
+}
+
 static void sk_psock_done_strp(struct sk_psock *psock);
 
 static void sk_psock_destroy_deferred(struct work_struct *gc)
@@ -665,12 +675,12 @@ static void sk_psock_destroy_deferred(struct work_struct *gc)
        sk_psock_done_strp(psock);
 
        cancel_work_sync(&psock->work);
+       mutex_destroy(&psock->work_mutex);
 
        psock_progs_drop(&psock->progs);
 
        sk_psock_link_destroy(psock);
        sk_psock_cork_free(psock);
-       sk_psock_zap_ingress(psock);
 
        if (psock->sk_redir)
                sock_put(psock->sk_redir);
@@ -688,8 +698,7 @@ static void sk_psock_destroy(struct rcu_head *rcu)
 
 void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
 {
-       sk_psock_cork_free(psock);
-       sk_psock_zap_ingress(psock);
+       sk_psock_stop(psock, false);
 
        write_lock_bh(&sk->sk_callback_lock);
        sk_psock_restore_proto(sk, psock);
@@ -699,7 +708,6 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
        else if (psock->progs.stream_verdict)
                sk_psock_stop_verdict(sk, psock);
        write_unlock_bh(&sk->sk_callback_lock);
-       sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
 
        call_rcu(&psock->rcu, sk_psock_destroy);
 }
@@ -770,14 +778,20 @@ static void sk_psock_skb_redirect(struct sk_buff *skb)
         * error that caused the pipe to break. We can't send a packet on
         * a socket that is in this state so we drop the skb.
         */
-       if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
-           !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
+       if (!psock_other || sock_flag(sk_other, SOCK_DEAD)) {
+               kfree_skb(skb);
+               return;
+       }
+       spin_lock_bh(&psock_other->ingress_lock);
+       if (!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
+               spin_unlock_bh(&psock_other->ingress_lock);
                kfree_skb(skb);
                return;
        }
 
        skb_queue_tail(&psock_other->ingress_skb, skb);
        schedule_work(&psock_other->work);
+       spin_unlock_bh(&psock_other->ingress_lock);
 }
 
 static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sock *sk, int verdict)
@@ -845,8 +859,12 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
                        err = sk_psock_skb_ingress_self(psock, skb);
                }
                if (err < 0) {
-                       skb_queue_tail(&psock->ingress_skb, skb);
-                       schedule_work(&psock->work);
+                       spin_lock_bh(&psock->ingress_lock);
+                       if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+                               skb_queue_tail(&psock->ingress_skb, skb);
+                               schedule_work(&psock->work);
+                       }
+                       spin_unlock_bh(&psock->ingress_lock);
                }
                break;
        case __SK_REDIRECT:
index dd53a7771d7e8874fef4b6b615c0201f1542a94c..e564fdeaada1175a54a7ee3abfebd61368497e8d 100644 (file)
@@ -1540,6 +1540,7 @@ void sock_map_close(struct sock *sk, long timeout)
        saved_close = psock->saved_close;
        sock_map_remove_links(sk, psock);
        rcu_read_unlock();
+       sk_psock_stop(psock, true);
        release_sock(sk);
        saved_close(sk, timeout);
 }
This page took 0.070455 seconds and 4 git commands to generate.