1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * NET4: Implementation of BSD Unix domain sockets.
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
48 * Known differences from reference BSD that was tested:
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
91 #include <linux/fcntl.h>
92 #include <linux/filter.h>
93 #include <linux/termios.h>
94 #include <linux/sockios.h>
95 #include <linux/net.h>
98 #include <linux/slab.h>
99 #include <linux/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <net/net_namespace.h>
103 #include <net/sock.h>
104 #include <net/tcp_states.h>
105 #include <net/af_unix.h>
106 #include <linux/proc_fs.h>
107 #include <linux/seq_file.h>
109 #include <linux/init.h>
110 #include <linux/poll.h>
111 #include <linux/rtnetlink.h>
112 #include <linux/mount.h>
113 #include <net/checksum.h>
114 #include <linux/security.h>
115 #include <linux/splice.h>
116 #include <linux/freezer.h>
117 #include <linux/file.h>
118 #include <linux/btf_ids.h>
119 #include <linux/bpf-cgroup.h>
121 static atomic_long_t unix_nr_socks;
122 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
123 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
125 /* SMP locking strategy:
126 * hash table is protected with spinlock.
127 * each socket state is protected by separate spinlock.
130 static unsigned int unix_unbound_hash(struct sock *sk)
132 unsigned long hash = (unsigned long)sk;
138 return hash & UNIX_HASH_MOD;
141 static unsigned int unix_bsd_hash(struct inode *i)
143 return i->i_ino & UNIX_HASH_MOD;
146 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
147 int addr_len, int type)
149 __wsum csum = csum_partial(sunaddr, addr_len, 0);
152 hash = (__force unsigned int)csum_fold(csum);
156 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
159 static void unix_table_double_lock(struct net *net,
160 unsigned int hash1, unsigned int hash2)
162 if (hash1 == hash2) {
163 spin_lock(&net->unx.table.locks[hash1]);
170 spin_lock(&net->unx.table.locks[hash1]);
171 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
174 static void unix_table_double_unlock(struct net *net,
175 unsigned int hash1, unsigned int hash2)
177 if (hash1 == hash2) {
178 spin_unlock(&net->unx.table.locks[hash1]);
182 spin_unlock(&net->unx.table.locks[hash1]);
183 spin_unlock(&net->unx.table.locks[hash2]);
186 #ifdef CONFIG_SECURITY_NETWORK
187 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
189 UNIXCB(skb).secid = scm->secid;
192 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
194 scm->secid = UNIXCB(skb).secid;
197 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
199 return (scm->secid == UNIXCB(skb).secid);
202 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
205 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
208 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
212 #endif /* CONFIG_SECURITY_NETWORK */
214 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
216 return unix_peer(osk) == sk;
219 static inline int unix_may_send(struct sock *sk, struct sock *osk)
221 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
224 static inline int unix_recvq_full_lockless(const struct sock *sk)
226 return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
229 struct sock *unix_peer_get(struct sock *s)
237 unix_state_unlock(s);
240 EXPORT_SYMBOL_GPL(unix_peer_get);
242 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
245 struct unix_address *addr;
247 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
251 refcount_set(&addr->refcnt, 1);
252 addr->len = addr_len;
253 memcpy(addr->name, sunaddr, addr_len);
258 static inline void unix_release_addr(struct unix_address *addr)
260 if (refcount_dec_and_test(&addr->refcnt))
265 * Check unix socket name:
266 * - should be not zero length.
267 * - if started by not zero, should be NULL terminated (FS object)
268 * - if started by zero, it is abstract name.
271 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
273 if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
274 addr_len > sizeof(*sunaddr))
277 if (sunaddr->sun_family != AF_UNIX)
283 static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
285 struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
286 short offset = offsetof(struct sockaddr_storage, __data);
288 BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
290 /* This may look like an off by one error but it is a bit more
291 * subtle. 108 is the longest valid AF_UNIX path for a binding.
292 * sun_path[108] doesn't as such exist. However in kernel space
293 * we are guaranteed that it is a valid memory location in our
294 * kernel address buffer because syscall functions always pass
295 * a pointer of struct sockaddr_storage which has a bigger buffer
296 * than 108. Also, we must terminate sun_path for strlen() in
299 addr->__data[addr_len - offset] = 0;
301 /* Don't pass sunaddr->sun_path to strlen(). Otherwise, 108 will
302 * cause panic if CONFIG_FORTIFY_SOURCE=y. Let __fortify_strlen()
303 * know the actual buffer.
305 return strlen(addr->__data) + offset + 1;
308 static void __unix_remove_socket(struct sock *sk)
310 sk_del_node_init(sk);
313 static void __unix_insert_socket(struct net *net, struct sock *sk)
315 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
316 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
319 static void __unix_set_addr_hash(struct net *net, struct sock *sk,
320 struct unix_address *addr, unsigned int hash)
322 __unix_remove_socket(sk);
323 smp_store_release(&unix_sk(sk)->addr, addr);
326 __unix_insert_socket(net, sk);
329 static void unix_remove_socket(struct net *net, struct sock *sk)
331 spin_lock(&net->unx.table.locks[sk->sk_hash]);
332 __unix_remove_socket(sk);
333 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
336 static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
338 spin_lock(&net->unx.table.locks[sk->sk_hash]);
339 __unix_insert_socket(net, sk);
340 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
343 static void unix_insert_bsd_socket(struct sock *sk)
345 spin_lock(&bsd_socket_locks[sk->sk_hash]);
346 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
347 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
350 static void unix_remove_bsd_socket(struct sock *sk)
352 if (!hlist_unhashed(&sk->sk_bind_node)) {
353 spin_lock(&bsd_socket_locks[sk->sk_hash]);
354 __sk_del_bind_node(sk);
355 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
357 sk_node_init(&sk->sk_bind_node);
361 static struct sock *__unix_find_socket_byname(struct net *net,
362 struct sockaddr_un *sunname,
363 int len, unsigned int hash)
367 sk_for_each(s, &net->unx.table.buckets[hash]) {
368 struct unix_sock *u = unix_sk(s);
370 if (u->addr->len == len &&
371 !memcmp(u->addr->name, sunname, len))
377 static inline struct sock *unix_find_socket_byname(struct net *net,
378 struct sockaddr_un *sunname,
379 int len, unsigned int hash)
383 spin_lock(&net->unx.table.locks[hash]);
384 s = __unix_find_socket_byname(net, sunname, len, hash);
387 spin_unlock(&net->unx.table.locks[hash]);
391 static struct sock *unix_find_socket_byinode(struct inode *i)
393 unsigned int hash = unix_bsd_hash(i);
396 spin_lock(&bsd_socket_locks[hash]);
397 sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
398 struct dentry *dentry = unix_sk(s)->path.dentry;
400 if (dentry && d_backing_inode(dentry) == i) {
402 spin_unlock(&bsd_socket_locks[hash]);
406 spin_unlock(&bsd_socket_locks[hash]);
410 /* Support code for asymmetrically connected dgram sockets
412 * If a datagram socket is connected to a socket not itself connected
413 * to the first socket (eg, /dev/log), clients may only enqueue more
414 * messages if the present receive queue of the server socket is not
415 * "too large". This means there's a second writeability condition
416 * poll and sendmsg need to test. The dgram recv code will do a wake
417 * up on the peer_wait wait queue of a socket upon reception of a
418 * datagram which needs to be propagated to sleeping would-be writers
419 * since these might not have sent anything so far. This can't be
420 * accomplished via poll_wait because the lifetime of the server
421 * socket might be less than that of its clients if these break their
422 * association with it or if the server socket is closed while clients
423 * are still connected to it and there's no way to inform "a polling
424 * implementation" that it should let go of a certain wait queue
426 * In order to propagate a wake up, a wait_queue_entry_t of the client
427 * socket is enqueued on the peer_wait queue of the server socket
428 * whose wake function does a wake_up on the ordinary client socket
429 * wait queue. This connection is established whenever a write (or
430 * poll for write) hit the flow control condition and broken when the
431 * association to the server socket is dissolved or after a wake up
435 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
439 wait_queue_head_t *u_sleep;
441 u = container_of(q, struct unix_sock, peer_wake);
443 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
445 u->peer_wake.private = NULL;
447 /* relaying can only happen while the wq still exists */
448 u_sleep = sk_sleep(&u->sk);
450 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
455 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
457 struct unix_sock *u, *u_other;
461 u_other = unix_sk(other);
463 spin_lock(&u_other->peer_wait.lock);
465 if (!u->peer_wake.private) {
466 u->peer_wake.private = other;
467 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
472 spin_unlock(&u_other->peer_wait.lock);
476 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
479 struct unix_sock *u, *u_other;
482 u_other = unix_sk(other);
483 spin_lock(&u_other->peer_wait.lock);
485 if (u->peer_wake.private == other) {
486 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
487 u->peer_wake.private = NULL;
490 spin_unlock(&u_other->peer_wait.lock);
493 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
496 unix_dgram_peer_wake_disconnect(sk, other);
497 wake_up_interruptible_poll(sk_sleep(sk),
504 * - unix_peer(sk) == other
505 * - association is stable
507 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
511 connected = unix_dgram_peer_wake_connect(sk, other);
513 /* If other is SOCK_DEAD, we want to make sure we signal
514 * POLLOUT, such that a subsequent write() can get a
515 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
516 * to other and its full, we will hang waiting for POLLOUT.
518 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
522 unix_dgram_peer_wake_disconnect(sk, other);
527 static int unix_writable(const struct sock *sk, unsigned char state)
529 return state != TCP_LISTEN &&
530 (refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf);
533 static void unix_write_space(struct sock *sk)
535 struct socket_wq *wq;
538 if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
539 wq = rcu_dereference(sk->sk_wq);
540 if (skwq_has_sleeper(wq))
541 wake_up_interruptible_sync_poll(&wq->wait,
542 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
543 sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
548 /* When dgram socket disconnects (or changes its peer), we clear its receive
549 * queue of packets arrived from previous peer. First, it allows to do
550 * flow control based only on wmem_alloc; second, sk connected to peer
551 * may receive messages only from that peer. */
552 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
554 if (!skb_queue_empty(&sk->sk_receive_queue)) {
555 skb_queue_purge(&sk->sk_receive_queue);
556 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
558 /* If one link of bidirectional dgram pipe is disconnected,
559 * we signal error. Messages are lost. Do not make this,
560 * when peer was not connected to us.
562 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
563 WRITE_ONCE(other->sk_err, ECONNRESET);
564 sk_error_report(other);
569 static void unix_sock_destructor(struct sock *sk)
571 struct unix_sock *u = unix_sk(sk);
573 skb_queue_purge(&sk->sk_receive_queue);
575 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
576 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
577 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
578 if (!sock_flag(sk, SOCK_DEAD)) {
579 pr_info("Attempt to release alive unix socket: %p\n", sk);
584 unix_release_addr(u->addr);
586 atomic_long_dec(&unix_nr_socks);
587 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
588 #ifdef UNIX_REFCNT_DEBUG
589 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
590 atomic_long_read(&unix_nr_socks));
594 static void unix_release_sock(struct sock *sk, int embrion)
596 struct unix_sock *u = unix_sk(sk);
602 unix_remove_socket(sock_net(sk), sk);
603 unix_remove_bsd_socket(sk);
608 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
610 u->path.dentry = NULL;
612 state = sk->sk_state;
613 WRITE_ONCE(sk->sk_state, TCP_CLOSE);
615 skpair = unix_peer(sk);
616 unix_peer(sk) = NULL;
618 unix_state_unlock(sk);
620 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
622 kfree_skb(u->oob_skb);
627 wake_up_interruptible_all(&u->peer_wait);
629 if (skpair != NULL) {
630 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
631 unix_state_lock(skpair);
633 WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
634 if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion)
635 WRITE_ONCE(skpair->sk_err, ECONNRESET);
636 unix_state_unlock(skpair);
637 skpair->sk_state_change(skpair);
638 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
641 unix_dgram_peer_wake_disconnect(sk, skpair);
642 sock_put(skpair); /* It may now die */
645 /* Try to flush out this socket. Throw out buffers at least */
647 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
648 if (state == TCP_LISTEN)
649 unix_release_sock(skb->sk, 1);
650 /* passed fds are erased in the kfree_skb hook */
651 UNIXCB(skb).consumed = skb->len;
660 /* ---- Socket is dead now and most probably destroyed ---- */
663 * Fixme: BSD difference: In BSD all sockets connected to us get
664 * ECONNRESET and we die on the spot. In Linux we behave
665 * like files and pipes do and wait for the last
668 * Can't we simply set sock->err?
670 * What the above comment does talk about? --ANK(980817)
673 if (READ_ONCE(unix_tot_inflight))
674 unix_gc(); /* Garbage collect fds */
677 static void init_peercred(struct sock *sk)
679 const struct cred *old_cred;
682 spin_lock(&sk->sk_peer_lock);
683 old_pid = sk->sk_peer_pid;
684 old_cred = sk->sk_peer_cred;
685 sk->sk_peer_pid = get_pid(task_tgid(current));
686 sk->sk_peer_cred = get_current_cred();
687 spin_unlock(&sk->sk_peer_lock);
693 static void copy_peercred(struct sock *sk, struct sock *peersk)
695 const struct cred *old_cred;
699 spin_lock(&sk->sk_peer_lock);
700 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
702 spin_lock(&peersk->sk_peer_lock);
703 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
705 old_pid = sk->sk_peer_pid;
706 old_cred = sk->sk_peer_cred;
707 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
708 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
710 spin_unlock(&sk->sk_peer_lock);
711 spin_unlock(&peersk->sk_peer_lock);
717 static int unix_listen(struct socket *sock, int backlog)
720 struct sock *sk = sock->sk;
721 struct unix_sock *u = unix_sk(sk);
724 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
725 goto out; /* Only stream/seqpacket sockets accept */
727 if (!READ_ONCE(u->addr))
728 goto out; /* No listens on an unbound socket */
730 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
732 if (backlog > sk->sk_max_ack_backlog)
733 wake_up_interruptible_all(&u->peer_wait);
734 sk->sk_max_ack_backlog = backlog;
735 WRITE_ONCE(sk->sk_state, TCP_LISTEN);
737 /* set credentials so connect can copy them */
742 unix_state_unlock(sk);
747 static int unix_release(struct socket *);
748 static int unix_bind(struct socket *, struct sockaddr *, int);
749 static int unix_stream_connect(struct socket *, struct sockaddr *,
750 int addr_len, int flags);
751 static int unix_socketpair(struct socket *, struct socket *);
752 static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg);
753 static int unix_getname(struct socket *, struct sockaddr *, int);
754 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
755 static __poll_t unix_dgram_poll(struct file *, struct socket *,
757 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
759 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
761 static int unix_shutdown(struct socket *, int);
762 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
763 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
764 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
765 struct pipe_inode_info *, size_t size,
767 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
768 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
769 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
770 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
771 static int unix_dgram_connect(struct socket *, struct sockaddr *,
773 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
774 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
777 #ifdef CONFIG_PROC_FS
778 static int unix_count_nr_fds(struct sock *sk)
784 spin_lock(&sk->sk_receive_queue.lock);
785 skb = skb_peek(&sk->sk_receive_queue);
787 u = unix_sk(skb->sk);
788 nr_fds += atomic_read(&u->scm_stat.nr_fds);
789 skb = skb_peek_next(skb, &sk->sk_receive_queue);
791 spin_unlock(&sk->sk_receive_queue.lock);
796 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
798 struct sock *sk = sock->sk;
799 unsigned char s_state;
804 s_state = READ_ONCE(sk->sk_state);
807 /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
808 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
809 * SOCK_DGRAM is ordinary. So, no lock is needed.
811 if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
812 nr_fds = atomic_read(&u->scm_stat.nr_fds);
813 else if (s_state == TCP_LISTEN)
814 nr_fds = unix_count_nr_fds(sk);
816 seq_printf(m, "scm_fds: %u\n", nr_fds);
820 #define unix_show_fdinfo NULL
823 static const struct proto_ops unix_stream_ops = {
825 .owner = THIS_MODULE,
826 .release = unix_release,
828 .connect = unix_stream_connect,
829 .socketpair = unix_socketpair,
830 .accept = unix_accept,
831 .getname = unix_getname,
835 .compat_ioctl = unix_compat_ioctl,
837 .listen = unix_listen,
838 .shutdown = unix_shutdown,
839 .sendmsg = unix_stream_sendmsg,
840 .recvmsg = unix_stream_recvmsg,
841 .read_skb = unix_stream_read_skb,
842 .mmap = sock_no_mmap,
843 .splice_read = unix_stream_splice_read,
844 .set_peek_off = sk_set_peek_off,
845 .show_fdinfo = unix_show_fdinfo,
848 static const struct proto_ops unix_dgram_ops = {
850 .owner = THIS_MODULE,
851 .release = unix_release,
853 .connect = unix_dgram_connect,
854 .socketpair = unix_socketpair,
855 .accept = sock_no_accept,
856 .getname = unix_getname,
857 .poll = unix_dgram_poll,
860 .compat_ioctl = unix_compat_ioctl,
862 .listen = sock_no_listen,
863 .shutdown = unix_shutdown,
864 .sendmsg = unix_dgram_sendmsg,
865 .read_skb = unix_read_skb,
866 .recvmsg = unix_dgram_recvmsg,
867 .mmap = sock_no_mmap,
868 .set_peek_off = sk_set_peek_off,
869 .show_fdinfo = unix_show_fdinfo,
872 static const struct proto_ops unix_seqpacket_ops = {
874 .owner = THIS_MODULE,
875 .release = unix_release,
877 .connect = unix_stream_connect,
878 .socketpair = unix_socketpair,
879 .accept = unix_accept,
880 .getname = unix_getname,
881 .poll = unix_dgram_poll,
884 .compat_ioctl = unix_compat_ioctl,
886 .listen = unix_listen,
887 .shutdown = unix_shutdown,
888 .sendmsg = unix_seqpacket_sendmsg,
889 .recvmsg = unix_seqpacket_recvmsg,
890 .mmap = sock_no_mmap,
891 .set_peek_off = sk_set_peek_off,
892 .show_fdinfo = unix_show_fdinfo,
895 static void unix_close(struct sock *sk, long timeout)
897 /* Nothing to do here, unix socket does not need a ->close().
898 * This is merely for sockmap.
902 static void unix_unhash(struct sock *sk)
904 /* Nothing to do here, unix socket does not need a ->unhash().
905 * This is merely for sockmap.
909 static bool unix_bpf_bypass_getsockopt(int level, int optname)
911 if (level == SOL_SOCKET) {
923 struct proto unix_dgram_proto = {
925 .owner = THIS_MODULE,
926 .obj_size = sizeof(struct unix_sock),
928 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
929 #ifdef CONFIG_BPF_SYSCALL
930 .psock_update_sk_prot = unix_dgram_bpf_update_proto,
934 struct proto unix_stream_proto = {
935 .name = "UNIX-STREAM",
936 .owner = THIS_MODULE,
937 .obj_size = sizeof(struct unix_sock),
939 .unhash = unix_unhash,
940 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
941 #ifdef CONFIG_BPF_SYSCALL
942 .psock_update_sk_prot = unix_stream_bpf_update_proto,
946 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
952 atomic_long_inc(&unix_nr_socks);
953 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
958 if (type == SOCK_STREAM)
959 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
960 else /*dgram and seqpacket */
961 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
968 sock_init_data(sock, sk);
970 sk->sk_hash = unix_unbound_hash(sk);
971 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
972 sk->sk_write_space = unix_write_space;
973 sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen);
974 sk->sk_destruct = unix_sock_destructor;
978 u->path.dentry = NULL;
980 spin_lock_init(&u->lock);
981 mutex_init(&u->iolock); /* single task reading lock */
982 mutex_init(&u->bindlock); /* single task binding lock */
983 init_waitqueue_head(&u->peer_wait);
984 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
985 memset(&u->scm_stat, 0, sizeof(struct scm_stat));
986 unix_insert_unbound_socket(net, sk);
988 sock_prot_inuse_add(net, sk->sk_prot, 1);
993 atomic_long_dec(&unix_nr_socks);
997 static int unix_create(struct net *net, struct socket *sock, int protocol,
1002 if (protocol && protocol != PF_UNIX)
1003 return -EPROTONOSUPPORT;
1005 sock->state = SS_UNCONNECTED;
1007 switch (sock->type) {
1009 sock->ops = &unix_stream_ops;
1012 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
1016 sock->type = SOCK_DGRAM;
1019 sock->ops = &unix_dgram_ops;
1021 case SOCK_SEQPACKET:
1022 sock->ops = &unix_seqpacket_ops;
1025 return -ESOCKTNOSUPPORT;
1028 sk = unix_create1(net, sock, kern, sock->type);
1035 static int unix_release(struct socket *sock)
1037 struct sock *sk = sock->sk;
1042 sk->sk_prot->close(sk, 0);
1043 unix_release_sock(sk, 0);
1049 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
1052 struct inode *inode;
1057 unix_mkname_bsd(sunaddr, addr_len);
1058 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1062 err = path_permission(&path, MAY_WRITE);
1066 err = -ECONNREFUSED;
1067 inode = d_backing_inode(path.dentry);
1068 if (!S_ISSOCK(inode->i_mode))
1071 sk = unix_find_socket_byinode(inode);
1076 if (sk->sk_type == type)
1090 return ERR_PTR(err);
1093 static struct sock *unix_find_abstract(struct net *net,
1094 struct sockaddr_un *sunaddr,
1095 int addr_len, int type)
1097 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1098 struct dentry *dentry;
1101 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1103 return ERR_PTR(-ECONNREFUSED);
1105 dentry = unix_sk(sk)->path.dentry;
1107 touch_atime(&unix_sk(sk)->path);
1112 static struct sock *unix_find_other(struct net *net,
1113 struct sockaddr_un *sunaddr,
1114 int addr_len, int type)
1118 if (sunaddr->sun_path[0])
1119 sk = unix_find_bsd(sunaddr, addr_len, type);
1121 sk = unix_find_abstract(net, sunaddr, addr_len, type);
1126 static int unix_autobind(struct sock *sk)
1128 struct unix_sock *u = unix_sk(sk);
1129 unsigned int new_hash, old_hash;
1130 struct net *net = sock_net(sk);
1131 struct unix_address *addr;
1132 u32 lastnum, ordernum;
1135 err = mutex_lock_interruptible(&u->bindlock);
1143 addr = kzalloc(sizeof(*addr) +
1144 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1148 addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1149 addr->name->sun_family = AF_UNIX;
1150 refcount_set(&addr->refcnt, 1);
1152 old_hash = sk->sk_hash;
1153 ordernum = get_random_u32();
1154 lastnum = ordernum & 0xFFFFF;
1156 ordernum = (ordernum + 1) & 0xFFFFF;
1157 sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1159 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1160 unix_table_double_lock(net, old_hash, new_hash);
1162 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1163 unix_table_double_unlock(net, old_hash, new_hash);
1165 /* __unix_find_socket_byname() may take long time if many names
1166 * are already in use.
1170 if (ordernum == lastnum) {
1171 /* Give up if all names seems to be in use. */
1173 unix_release_addr(addr);
1180 __unix_set_addr_hash(net, sk, addr, new_hash);
1181 unix_table_double_unlock(net, old_hash, new_hash);
1184 out: mutex_unlock(&u->bindlock);
1188 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1191 umode_t mode = S_IFSOCK |
1192 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1193 struct unix_sock *u = unix_sk(sk);
1194 unsigned int new_hash, old_hash;
1195 struct net *net = sock_net(sk);
1196 struct mnt_idmap *idmap;
1197 struct unix_address *addr;
1198 struct dentry *dentry;
1202 addr_len = unix_mkname_bsd(sunaddr, addr_len);
1203 addr = unix_create_addr(sunaddr, addr_len);
1208 * Get the parent directory, calculate the hash for last
1211 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
1212 if (IS_ERR(dentry)) {
1213 err = PTR_ERR(dentry);
1218 * All right, let's create it.
1220 idmap = mnt_idmap(parent.mnt);
1221 err = security_path_mknod(&parent, dentry, mode, 0);
1223 err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
1226 err = mutex_lock_interruptible(&u->bindlock);
1232 old_hash = sk->sk_hash;
1233 new_hash = unix_bsd_hash(d_backing_inode(dentry));
1234 unix_table_double_lock(net, old_hash, new_hash);
1235 u->path.mnt = mntget(parent.mnt);
1236 u->path.dentry = dget(dentry);
1237 __unix_set_addr_hash(net, sk, addr, new_hash);
1238 unix_table_double_unlock(net, old_hash, new_hash);
1239 unix_insert_bsd_socket(sk);
1240 mutex_unlock(&u->bindlock);
1241 done_path_create(&parent, dentry);
1245 mutex_unlock(&u->bindlock);
1248 /* failed after successful mknod? unlink what we'd created... */
1249 vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
1251 done_path_create(&parent, dentry);
1253 unix_release_addr(addr);
1254 return err == -EEXIST ? -EADDRINUSE : err;
1257 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1260 struct unix_sock *u = unix_sk(sk);
1261 unsigned int new_hash, old_hash;
1262 struct net *net = sock_net(sk);
1263 struct unix_address *addr;
1266 addr = unix_create_addr(sunaddr, addr_len);
1270 err = mutex_lock_interruptible(&u->bindlock);
1279 old_hash = sk->sk_hash;
1280 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1281 unix_table_double_lock(net, old_hash, new_hash);
1283 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
1286 __unix_set_addr_hash(net, sk, addr, new_hash);
1287 unix_table_double_unlock(net, old_hash, new_hash);
1288 mutex_unlock(&u->bindlock);
1292 unix_table_double_unlock(net, old_hash, new_hash);
1295 mutex_unlock(&u->bindlock);
1297 unix_release_addr(addr);
1301 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1303 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1304 struct sock *sk = sock->sk;
1307 if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1308 sunaddr->sun_family == AF_UNIX)
1309 return unix_autobind(sk);
1311 err = unix_validate_addr(sunaddr, addr_len);
1315 if (sunaddr->sun_path[0])
1316 err = unix_bind_bsd(sk, sunaddr, addr_len);
1318 err = unix_bind_abstract(sk, sunaddr, addr_len);
1323 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1325 if (unlikely(sk1 == sk2) || !sk2) {
1326 unix_state_lock(sk1);
1332 unix_state_lock(sk1);
1333 unix_state_lock_nested(sk2, U_LOCK_SECOND);
1336 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1338 if (unlikely(sk1 == sk2) || !sk2) {
1339 unix_state_unlock(sk1);
1342 unix_state_unlock(sk1);
1343 unix_state_unlock(sk2);
1346 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1347 int alen, int flags)
1349 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1350 struct sock *sk = sock->sk;
1355 if (alen < offsetofend(struct sockaddr, sa_family))
1358 if (addr->sa_family != AF_UNSPEC) {
1359 err = unix_validate_addr(sunaddr, alen);
1363 err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen);
1367 if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1368 test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1369 !READ_ONCE(unix_sk(sk)->addr)) {
1370 err = unix_autobind(sk);
1376 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);
1377 if (IS_ERR(other)) {
1378 err = PTR_ERR(other);
1382 unix_state_double_lock(sk, other);
1384 /* Apparently VFS overslept socket death. Retry. */
1385 if (sock_flag(other, SOCK_DEAD)) {
1386 unix_state_double_unlock(sk, other);
1392 if (!unix_may_send(sk, other))
1395 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1399 WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1400 WRITE_ONCE(other->sk_state, TCP_ESTABLISHED);
1403 * 1003.1g breaking connected state with AF_UNSPEC
1406 unix_state_double_lock(sk, other);
1410 * If it was connected, reconnect.
1412 if (unix_peer(sk)) {
1413 struct sock *old_peer = unix_peer(sk);
1415 unix_peer(sk) = other;
1417 WRITE_ONCE(sk->sk_state, TCP_CLOSE);
1418 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1420 unix_state_double_unlock(sk, other);
1422 if (other != old_peer) {
1423 unix_dgram_disconnected(sk, old_peer);
1425 unix_state_lock(old_peer);
1426 if (!unix_peer(old_peer))
1427 WRITE_ONCE(old_peer->sk_state, TCP_CLOSE);
1428 unix_state_unlock(old_peer);
1433 unix_peer(sk) = other;
1434 unix_state_double_unlock(sk, other);
1440 unix_state_double_unlock(sk, other);
1446 static long unix_wait_for_peer(struct sock *other, long timeo)
1447 __releases(&unix_sk(other)->lock)
1449 struct unix_sock *u = unix_sk(other);
1453 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1455 sched = !sock_flag(other, SOCK_DEAD) &&
1456 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1457 unix_recvq_full_lockless(other);
1459 unix_state_unlock(other);
1462 timeo = schedule_timeout(timeo);
1464 finish_wait(&u->peer_wait, &wait);
1468 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1469 int addr_len, int flags)
1471 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1472 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1473 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1474 struct net *net = sock_net(sk);
1475 struct sk_buff *skb = NULL;
1479 err = unix_validate_addr(sunaddr, addr_len);
1483 err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len);
1487 if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1488 test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1489 !READ_ONCE(u->addr)) {
1490 err = unix_autobind(sk);
1495 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1497 /* First of all allocate resources.
1498 If we will make it after state is locked,
1499 we will have to recheck all again in any case.
1502 /* create new sock for complete connection */
1503 newsk = unix_create1(net, NULL, 0, sock->type);
1504 if (IS_ERR(newsk)) {
1505 err = PTR_ERR(newsk);
1512 /* Allocate skb for sending to listening sock */
1513 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1518 /* Find listening sock. */
1519 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
1520 if (IS_ERR(other)) {
1521 err = PTR_ERR(other);
1526 /* Latch state of peer */
1527 unix_state_lock(other);
1529 /* Apparently VFS overslept socket death. Retry. */
1530 if (sock_flag(other, SOCK_DEAD)) {
1531 unix_state_unlock(other);
1536 err = -ECONNREFUSED;
1537 if (other->sk_state != TCP_LISTEN)
1539 if (other->sk_shutdown & RCV_SHUTDOWN)
1542 if (unix_recvq_full_lockless(other)) {
1547 timeo = unix_wait_for_peer(other, timeo);
1549 err = sock_intr_errno(timeo);
1550 if (signal_pending(current))
1558 It is tricky place. We need to grab our state lock and cannot
1559 drop lock on peer. It is dangerous because deadlock is
1560 possible. Connect to self case and simultaneous
1561 attempt to connect are eliminated by checking socket
1562 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1563 check this before attempt to grab lock.
1565 Well, and we have to recheck the state after socket locked.
1567 switch (READ_ONCE(sk->sk_state)) {
1569 /* This is ok... continue with connect */
1571 case TCP_ESTABLISHED:
1572 /* Socket is already connected */
1580 unix_state_lock_nested(sk, U_LOCK_SECOND);
1582 if (sk->sk_state != TCP_CLOSE) {
1583 unix_state_unlock(sk);
1584 unix_state_unlock(other);
1589 err = security_unix_stream_connect(sk, other, newsk);
1591 unix_state_unlock(sk);
1595 /* The way is open! Fastly set all the necessary fields... */
1598 unix_peer(newsk) = sk;
1599 newsk->sk_state = TCP_ESTABLISHED;
1600 newsk->sk_type = sk->sk_type;
1601 init_peercred(newsk);
1602 newu = unix_sk(newsk);
1603 newu->listener = other;
1604 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1605 otheru = unix_sk(other);
1607 /* copy address information from listening to new sock
1609 * The contents of *(otheru->addr) and otheru->path
1610 * are seen fully set up here, since we have found
1611 * otheru in hash under its lock. Insertion into the
1612 * hash chain we'd found it in had been done in an
1613 * earlier critical area protected by the chain's lock,
1614 * the same one where we'd set *(otheru->addr) contents,
1615 * as well as otheru->path and otheru->addr itself.
1617 * Using smp_store_release() here to set newu->addr
1618 * is enough to make those stores, as well as stores
1619 * to newu->path visible to anyone who gets newu->addr
1620 * by smp_load_acquire(). IOW, the same warranties
1621 * as for unix_sock instances bound in unix_bind() or
1622 * in unix_autobind().
1624 if (otheru->path.dentry) {
1625 path_get(&otheru->path);
1626 newu->path = otheru->path;
1628 refcount_inc(&otheru->addr->refcnt);
1629 smp_store_release(&newu->addr, otheru->addr);
1631 /* Set credentials */
1632 copy_peercred(sk, other);
1634 sock->state = SS_CONNECTED;
1635 WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1638 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1639 unix_peer(sk) = newsk;
1641 unix_state_unlock(sk);
1643 /* take ten and send info to listening sock */
1644 spin_lock(&other->sk_receive_queue.lock);
1645 __skb_queue_tail(&other->sk_receive_queue, skb);
1646 spin_unlock(&other->sk_receive_queue.lock);
1647 unix_state_unlock(other);
1648 other->sk_data_ready(other);
1654 unix_state_unlock(other);
1659 unix_release_sock(newsk, 0);
1665 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1667 struct sock *ska = socka->sk, *skb = sockb->sk;
1669 /* Join our sockets back to back */
1672 unix_peer(ska) = skb;
1673 unix_peer(skb) = ska;
1677 ska->sk_state = TCP_ESTABLISHED;
1678 skb->sk_state = TCP_ESTABLISHED;
1679 socka->state = SS_CONNECTED;
1680 sockb->state = SS_CONNECTED;
1684 static void unix_sock_inherit_flags(const struct socket *old,
1687 if (test_bit(SOCK_PASSCRED, &old->flags))
1688 set_bit(SOCK_PASSCRED, &new->flags);
1689 if (test_bit(SOCK_PASSPIDFD, &old->flags))
1690 set_bit(SOCK_PASSPIDFD, &new->flags);
1691 if (test_bit(SOCK_PASSSEC, &old->flags))
1692 set_bit(SOCK_PASSSEC, &new->flags);
1695 static int unix_accept(struct socket *sock, struct socket *newsock,
1696 struct proto_accept_arg *arg)
1698 struct sock *sk = sock->sk;
1699 struct sk_buff *skb;
1702 arg->err = -EOPNOTSUPP;
1703 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1707 if (READ_ONCE(sk->sk_state) != TCP_LISTEN)
1710 /* If socket state is TCP_LISTEN it cannot change (for now...),
1711 * so that no locks are necessary.
1714 skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1717 /* This means receive shutdown. */
1724 skb_free_datagram(sk, skb);
1725 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1727 /* attach accepted sock to socket */
1728 unix_state_lock(tsk);
1729 unix_update_edges(unix_sk(tsk));
1730 newsock->state = SS_CONNECTED;
1731 unix_sock_inherit_flags(sock, newsock);
1732 sock_graft(tsk, newsock);
1733 unix_state_unlock(tsk);
1741 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1743 struct sock *sk = sock->sk;
1744 struct unix_address *addr;
1745 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1749 sk = unix_peer_get(sk);
1759 addr = smp_load_acquire(&unix_sk(sk)->addr);
1761 sunaddr->sun_family = AF_UNIX;
1762 sunaddr->sun_path[0] = 0;
1763 err = offsetof(struct sockaddr_un, sun_path);
1766 memcpy(sunaddr, addr->name, addr->len);
1769 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1770 CGROUP_UNIX_GETPEERNAME);
1772 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1773 CGROUP_UNIX_GETSOCKNAME);
1780 /* The "user->unix_inflight" variable is protected by the garbage
1781 * collection lock, and we just read it locklessly here. If you go
1782 * over the limit, there might be a tiny race in actually noticing
1783 * it across threads. Tough.
1785 static inline bool too_many_unix_fds(struct task_struct *p)
1787 struct user_struct *user = current_user();
1789 if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
1790 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1794 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1796 if (too_many_unix_fds(current))
1797 return -ETOOMANYREFS;
1799 UNIXCB(skb).fp = scm->fp;
1802 if (unix_prepare_fpl(UNIXCB(skb).fp))
1808 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1810 scm->fp = UNIXCB(skb).fp;
1811 UNIXCB(skb).fp = NULL;
1813 unix_destroy_fpl(scm->fp);
1816 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1818 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1821 static void unix_destruct_scm(struct sk_buff *skb)
1823 struct scm_cookie scm;
1825 memset(&scm, 0, sizeof(scm));
1826 scm.pid = UNIXCB(skb).pid;
1828 unix_detach_fds(&scm, skb);
1830 /* Alas, it calls VFS */
1831 /* So fscking what? fput() had been SMP-safe since the last Summer */
1836 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1840 UNIXCB(skb).pid = get_pid(scm->pid);
1841 UNIXCB(skb).uid = scm->creds.uid;
1842 UNIXCB(skb).gid = scm->creds.gid;
1843 UNIXCB(skb).fp = NULL;
1844 unix_get_secdata(scm, skb);
1845 if (scm->fp && send_fds)
1846 err = unix_attach_fds(scm, skb);
1848 skb->destructor = unix_destruct_scm;
1852 static bool unix_passcred_enabled(const struct socket *sock,
1853 const struct sock *other)
1855 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1856 test_bit(SOCK_PASSPIDFD, &sock->flags) ||
1857 !other->sk_socket ||
1858 test_bit(SOCK_PASSCRED, &other->sk_socket->flags) ||
1859 test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags);
1863 * Some apps rely on write() giving SCM_CREDENTIALS
1864 * We include credentials if source or destination socket
1865 * asserted SOCK_PASSCRED.
1867 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1868 const struct sock *other)
1870 if (UNIXCB(skb).pid)
1872 if (unix_passcred_enabled(sock, other)) {
1873 UNIXCB(skb).pid = get_pid(task_tgid(current));
1874 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1878 static bool unix_skb_scm_eq(struct sk_buff *skb,
1879 struct scm_cookie *scm)
1881 return UNIXCB(skb).pid == scm->pid &&
1882 uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
1883 gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
1884 unix_secdata_eq(scm, skb);
1887 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1889 struct scm_fp_list *fp = UNIXCB(skb).fp;
1890 struct unix_sock *u = unix_sk(sk);
1892 if (unlikely(fp && fp->count)) {
1893 atomic_add(fp->count, &u->scm_stat.nr_fds);
1894 unix_add_edges(fp, u);
1898 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1900 struct scm_fp_list *fp = UNIXCB(skb).fp;
1901 struct unix_sock *u = unix_sk(sk);
1903 if (unlikely(fp && fp->count)) {
1904 atomic_sub(fp->count, &u->scm_stat.nr_fds);
1910 * Send AF_UNIX data.
1913 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1916 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1917 struct sock *sk = sock->sk, *other = NULL;
1918 struct unix_sock *u = unix_sk(sk);
1919 struct scm_cookie scm;
1920 struct sk_buff *skb;
1926 err = scm_send(sock, msg, &scm, false);
1930 wait_for_unix_gc(scm.fp);
1933 if (msg->msg_flags&MSG_OOB)
1936 if (msg->msg_namelen) {
1937 err = unix_validate_addr(sunaddr, msg->msg_namelen);
1941 err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk,
1950 other = unix_peer_get(sk);
1955 if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1956 test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1957 !READ_ONCE(u->addr)) {
1958 err = unix_autobind(sk);
1964 if (len > READ_ONCE(sk->sk_sndbuf) - 32)
1967 if (len > SKB_MAX_ALLOC) {
1968 data_len = min_t(size_t,
1969 len - SKB_MAX_ALLOC,
1970 MAX_SKB_FRAGS * PAGE_SIZE);
1971 data_len = PAGE_ALIGN(data_len);
1973 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1976 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1977 msg->msg_flags & MSG_DONTWAIT, &err,
1978 PAGE_ALLOC_COSTLY_ORDER);
1982 err = unix_scm_to_skb(&scm, skb, true);
1986 skb_put(skb, len - data_len);
1987 skb->data_len = data_len;
1989 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1993 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1998 if (sunaddr == NULL)
2001 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen,
2003 if (IS_ERR(other)) {
2004 err = PTR_ERR(other);
2010 if (sk_filter(other, skb) < 0) {
2011 /* Toss the packet but do not return any error to the sender */
2017 unix_state_lock(other);
2020 if (!unix_may_send(sk, other))
2023 if (unlikely(sock_flag(other, SOCK_DEAD))) {
2025 * Check with 1003.1g - what should
2028 unix_state_unlock(other);
2032 unix_state_lock(sk);
2035 if (sk->sk_type == SOCK_SEQPACKET) {
2036 /* We are here only when racing with unix_release_sock()
2037 * is clearing @other. Never change state to TCP_CLOSE
2038 * unlike SOCK_DGRAM wants.
2040 unix_state_unlock(sk);
2042 } else if (unix_peer(sk) == other) {
2043 unix_peer(sk) = NULL;
2044 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
2046 WRITE_ONCE(sk->sk_state, TCP_CLOSE);
2047 unix_state_unlock(sk);
2049 unix_dgram_disconnected(sk, other);
2051 err = -ECONNREFUSED;
2053 unix_state_unlock(sk);
2063 if (other->sk_shutdown & RCV_SHUTDOWN)
2066 if (sk->sk_type != SOCK_SEQPACKET) {
2067 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
2072 /* other == sk && unix_peer(other) != sk if
2073 * - unix_peer(sk) == NULL, destination address bound to sk
2074 * - unix_peer(sk) == sk by time of get but disconnected before lock
2077 unlikely(unix_peer(other) != sk &&
2078 unix_recvq_full_lockless(other))) {
2080 timeo = unix_wait_for_peer(other, timeo);
2082 err = sock_intr_errno(timeo);
2083 if (signal_pending(current))
2090 unix_state_unlock(other);
2091 unix_state_double_lock(sk, other);
2094 if (unix_peer(sk) != other ||
2095 unix_dgram_peer_wake_me(sk, other)) {
2103 goto restart_locked;
2107 if (unlikely(sk_locked))
2108 unix_state_unlock(sk);
2110 if (sock_flag(other, SOCK_RCVTSTAMP))
2111 __net_timestamp(skb);
2112 maybe_add_creds(skb, sock, other);
2113 scm_stat_add(other, skb);
2114 skb_queue_tail(&other->sk_receive_queue, skb);
2115 unix_state_unlock(other);
2116 other->sk_data_ready(other);
2123 unix_state_unlock(sk);
2124 unix_state_unlock(other);
2134 /* We use paged skbs for stream sockets, and limit occupancy to 32768
2135 * bytes, and a minimum of a full page.
2137 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
2139 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2140 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
2141 struct scm_cookie *scm, bool fds_sent)
2143 struct unix_sock *ousk = unix_sk(other);
2144 struct sk_buff *skb;
2147 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2152 err = unix_scm_to_skb(scm, skb, !fds_sent);
2158 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2165 unix_state_lock(other);
2167 if (sock_flag(other, SOCK_DEAD) ||
2168 (other->sk_shutdown & RCV_SHUTDOWN)) {
2169 unix_state_unlock(other);
2174 maybe_add_creds(skb, sock, other);
2177 scm_stat_add(other, skb);
2179 spin_lock(&other->sk_receive_queue.lock);
2181 consume_skb(ousk->oob_skb);
2182 WRITE_ONCE(ousk->oob_skb, skb);
2183 __skb_queue_tail(&other->sk_receive_queue, skb);
2184 spin_unlock(&other->sk_receive_queue.lock);
2186 sk_send_sigurg(other);
2187 unix_state_unlock(other);
2188 other->sk_data_ready(other);
2194 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2197 struct sock *sk = sock->sk;
2198 struct sock *other = NULL;
2200 struct sk_buff *skb;
2202 struct scm_cookie scm;
2203 bool fds_sent = false;
2206 err = scm_send(sock, msg, &scm, false);
2210 wait_for_unix_gc(scm.fp);
2213 if (msg->msg_flags & MSG_OOB) {
2214 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2222 if (msg->msg_namelen) {
2223 err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2227 other = unix_peer(sk);
2232 if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2235 while (sent < len) {
2238 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2239 skb = sock_alloc_send_pskb(sk, 0, 0,
2240 msg->msg_flags & MSG_DONTWAIT,
2243 /* Keep two messages in the pipe so it schedules better */
2244 size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64);
2246 /* allow fallback to order-0 allocations */
2247 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2249 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2251 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2253 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2254 msg->msg_flags & MSG_DONTWAIT, &err,
2255 get_order(UNIX_SKB_FRAGS_SZ));
2260 /* Only send the fds in the first buffer */
2261 err = unix_scm_to_skb(&scm, skb, !fds_sent);
2268 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2269 err = skb_splice_from_iter(skb, &msg->msg_iter, size,
2276 refcount_add(size, &sk->sk_wmem_alloc);
2278 skb_put(skb, size - data_len);
2279 skb->data_len = data_len;
2281 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2288 unix_state_lock(other);
2290 if (sock_flag(other, SOCK_DEAD) ||
2291 (other->sk_shutdown & RCV_SHUTDOWN))
2294 maybe_add_creds(skb, sock, other);
2295 scm_stat_add(other, skb);
2296 skb_queue_tail(&other->sk_receive_queue, skb);
2297 unix_state_unlock(other);
2298 other->sk_data_ready(other);
2302 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2303 if (msg->msg_flags & MSG_OOB) {
2304 err = queue_oob(sock, msg, other, &scm, fds_sent);
2316 unix_state_unlock(other);
2319 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2320 send_sig(SIGPIPE, current, 0);
2324 return sent ? : err;
2327 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2331 struct sock *sk = sock->sk;
2333 err = sock_error(sk);
2337 if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
2340 if (msg->msg_namelen)
2341 msg->msg_namelen = 0;
2343 return unix_dgram_sendmsg(sock, msg, len);
2346 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2347 size_t size, int flags)
2349 struct sock *sk = sock->sk;
2351 if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
2354 return unix_dgram_recvmsg(sock, msg, size, flags);
2357 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2359 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2362 msg->msg_namelen = addr->len;
2363 memcpy(msg->msg_name, addr->name, addr->len);
2367 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2370 struct scm_cookie scm;
2371 struct socket *sock = sk->sk_socket;
2372 struct unix_sock *u = unix_sk(sk);
2373 struct sk_buff *skb, *last;
2382 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2385 mutex_lock(&u->iolock);
2387 skip = sk_peek_offset(sk, flags);
2388 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2389 &skip, &err, &last);
2391 if (!(flags & MSG_PEEK))
2392 scm_stat_del(sk, skb);
2396 mutex_unlock(&u->iolock);
2401 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2402 &err, &timeo, last));
2404 if (!skb) { /* implies iolock unlocked */
2405 unix_state_lock(sk);
2406 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2407 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2408 (sk->sk_shutdown & RCV_SHUTDOWN))
2410 unix_state_unlock(sk);
2414 if (wq_has_sleeper(&u->peer_wait))
2415 wake_up_interruptible_sync_poll(&u->peer_wait,
2416 EPOLLOUT | EPOLLWRNORM |
2419 if (msg->msg_name) {
2420 unix_copy_addr(msg, skb->sk);
2422 BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2427 if (size > skb->len - skip)
2428 size = skb->len - skip;
2429 else if (size < skb->len - skip)
2430 msg->msg_flags |= MSG_TRUNC;
2432 err = skb_copy_datagram_msg(skb, skip, msg, size);
2436 if (sock_flag(sk, SOCK_RCVTSTAMP))
2437 __sock_recv_timestamp(msg, sk, skb);
2439 memset(&scm, 0, sizeof(scm));
2441 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2442 unix_set_secdata(&scm, skb);
2444 if (!(flags & MSG_PEEK)) {
2446 unix_detach_fds(&scm, skb);
2448 sk_peek_offset_bwd(sk, skb->len);
2450 /* It is questionable: on PEEK we could:
2451 - do not return fds - good, but too simple 8)
2452 - return fds, and do not return them on read (old strategy,
2454 - clone fds (I chose it for now, it is the most universal
2457 POSIX 1003.1g does not actually define this clearly
2458 at all. POSIX 1003.1g doesn't define a lot of things
2463 sk_peek_offset_fwd(sk, size);
2466 unix_peek_fds(&scm, skb);
2468 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2470 scm_recv_unix(sock, msg, &scm, flags);
2473 skb_free_datagram(sk, skb);
2474 mutex_unlock(&u->iolock);
2479 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2482 struct sock *sk = sock->sk;
2484 #ifdef CONFIG_BPF_SYSCALL
2485 const struct proto *prot = READ_ONCE(sk->sk_prot);
2487 if (prot != &unix_dgram_proto)
2488 return prot->recvmsg(sk, msg, size, flags, NULL);
2490 return __unix_dgram_recvmsg(sk, msg, size, flags);
2493 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2495 struct unix_sock *u = unix_sk(sk);
2496 struct sk_buff *skb;
2499 mutex_lock(&u->iolock);
2500 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2501 mutex_unlock(&u->iolock);
2505 return recv_actor(sk, skb);
2509 * Sleep until more data has arrived. But check for races..
2511 static long unix_stream_data_wait(struct sock *sk, long timeo,
2512 struct sk_buff *last, unsigned int last_len,
2515 unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
2516 struct sk_buff *tail;
2519 unix_state_lock(sk);
2522 prepare_to_wait(sk_sleep(sk), &wait, state);
2524 tail = skb_peek_tail(&sk->sk_receive_queue);
2526 (tail && tail->len != last_len) ||
2528 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2529 signal_pending(current) ||
2533 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2534 unix_state_unlock(sk);
2535 timeo = schedule_timeout(timeo);
2536 unix_state_lock(sk);
2538 if (sock_flag(sk, SOCK_DEAD))
2541 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2544 finish_wait(sk_sleep(sk), &wait);
2545 unix_state_unlock(sk);
2549 static unsigned int unix_skb_len(const struct sk_buff *skb)
2551 return skb->len - UNIXCB(skb).consumed;
2554 struct unix_stream_read_state {
2555 int (*recv_actor)(struct sk_buff *, int, int,
2556 struct unix_stream_read_state *);
2557 struct socket *socket;
2559 struct pipe_inode_info *pipe;
2562 unsigned int splice_flags;
2565 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2566 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2568 struct socket *sock = state->socket;
2569 struct sock *sk = sock->sk;
2570 struct unix_sock *u = unix_sk(sk);
2572 struct sk_buff *oob_skb;
2574 mutex_lock(&u->iolock);
2575 unix_state_lock(sk);
2576 spin_lock(&sk->sk_receive_queue.lock);
2578 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2579 spin_unlock(&sk->sk_receive_queue.lock);
2580 unix_state_unlock(sk);
2581 mutex_unlock(&u->iolock);
2585 oob_skb = u->oob_skb;
2587 if (!(state->flags & MSG_PEEK))
2588 WRITE_ONCE(u->oob_skb, NULL);
2592 spin_unlock(&sk->sk_receive_queue.lock);
2593 unix_state_unlock(sk);
2595 chunk = state->recv_actor(oob_skb, 0, chunk, state);
2597 if (!(state->flags & MSG_PEEK))
2598 UNIXCB(oob_skb).consumed += 1;
2600 consume_skb(oob_skb);
2602 mutex_unlock(&u->iolock);
2607 state->msg->msg_flags |= MSG_OOB;
2611 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2612 int flags, int copied)
2614 struct unix_sock *u = unix_sk(sk);
2616 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2617 skb_unlink(skb, &sk->sk_receive_queue);
2621 struct sk_buff *unlinked_skb = NULL;
2623 spin_lock(&sk->sk_receive_queue.lock);
2625 if (skb == u->oob_skb) {
2628 } else if (sock_flag(sk, SOCK_URGINLINE)) {
2629 if (!(flags & MSG_PEEK)) {
2630 WRITE_ONCE(u->oob_skb, NULL);
2633 } else if (flags & MSG_PEEK) {
2636 __skb_unlink(skb, &sk->sk_receive_queue);
2637 WRITE_ONCE(u->oob_skb, NULL);
2639 skb = skb_peek(&sk->sk_receive_queue);
2643 spin_unlock(&sk->sk_receive_queue.lock);
2646 WARN_ON_ONCE(skb_unref(unlinked_skb));
2647 kfree_skb(unlinked_skb);
2654 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2656 if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED))
2659 return unix_read_skb(sk, recv_actor);
2662 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2665 struct scm_cookie scm;
2666 struct socket *sock = state->socket;
2667 struct sock *sk = sock->sk;
2668 struct unix_sock *u = unix_sk(sk);
2670 int flags = state->flags;
2671 int noblock = flags & MSG_DONTWAIT;
2672 bool check_creds = false;
2677 size_t size = state->size;
2678 unsigned int last_len;
2680 if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) {
2685 if (unlikely(flags & MSG_OOB)) {
2687 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2688 err = unix_stream_recv_urg(state);
2693 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2694 timeo = sock_rcvtimeo(sk, noblock);
2696 memset(&scm, 0, sizeof(scm));
2698 /* Lock the socket to prevent queue disordering
2699 * while sleeps in memcpy_tomsg
2701 mutex_lock(&u->iolock);
2703 skip = max(sk_peek_offset(sk, flags), 0);
2708 struct sk_buff *skb, *last;
2711 unix_state_lock(sk);
2712 if (sock_flag(sk, SOCK_DEAD)) {
2716 last = skb = skb_peek(&sk->sk_receive_queue);
2717 last_len = last ? last->len : 0;
2720 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2722 skb = manage_oob(skb, sk, flags, copied);
2723 if (!skb && copied) {
2724 unix_state_unlock(sk);
2730 if (copied >= target)
2734 * POSIX 1003.1g mandates this order.
2737 err = sock_error(sk);
2740 if (sk->sk_shutdown & RCV_SHUTDOWN)
2743 unix_state_unlock(sk);
2749 mutex_unlock(&u->iolock);
2751 timeo = unix_stream_data_wait(sk, timeo, last,
2752 last_len, freezable);
2754 if (signal_pending(current)) {
2755 err = sock_intr_errno(timeo);
2760 mutex_lock(&u->iolock);
2763 unix_state_unlock(sk);
2767 while (skip >= unix_skb_len(skb)) {
2768 skip -= unix_skb_len(skb);
2770 last_len = skb->len;
2771 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2776 unix_state_unlock(sk);
2779 /* Never glue messages from different writers */
2780 if (!unix_skb_scm_eq(skb, &scm))
2782 } else if (test_bit(SOCK_PASSCRED, &sock->flags) ||
2783 test_bit(SOCK_PASSPIDFD, &sock->flags)) {
2784 /* Copy credentials */
2785 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2786 unix_set_secdata(&scm, skb);
2790 /* Copy address just once */
2791 if (state->msg && state->msg->msg_name) {
2792 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2793 state->msg->msg_name);
2794 unix_copy_addr(state->msg, skb->sk);
2796 BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2797 state->msg->msg_name,
2798 &state->msg->msg_namelen);
2803 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2805 chunk = state->recv_actor(skb, skip, chunk, state);
2806 drop_skb = !unix_skb_len(skb);
2807 /* skb is only safe to use if !drop_skb */
2818 /* the skb was touched by a concurrent reader;
2819 * we should not expect anything from this skb
2820 * anymore and assume it invalid - we can be
2821 * sure it was dropped from the socket queue
2823 * let's report a short read
2829 /* Mark read part of skb as used */
2830 if (!(flags & MSG_PEEK)) {
2831 UNIXCB(skb).consumed += chunk;
2833 sk_peek_offset_bwd(sk, chunk);
2835 if (UNIXCB(skb).fp) {
2836 scm_stat_del(sk, skb);
2837 unix_detach_fds(&scm, skb);
2840 if (unix_skb_len(skb))
2843 skb_unlink(skb, &sk->sk_receive_queue);
2849 /* It is questionable, see note in unix_dgram_recvmsg.
2852 unix_peek_fds(&scm, skb);
2854 sk_peek_offset_fwd(sk, chunk);
2861 last_len = skb->len;
2862 unix_state_lock(sk);
2863 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2866 unix_state_unlock(sk);
2871 mutex_unlock(&u->iolock);
2873 scm_recv_unix(sock, state->msg, &scm, flags);
2877 return copied ? : err;
2880 static int unix_stream_read_actor(struct sk_buff *skb,
2881 int skip, int chunk,
2882 struct unix_stream_read_state *state)
2886 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2888 return ret ?: chunk;
2891 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2892 size_t size, int flags)
2894 struct unix_stream_read_state state = {
2895 .recv_actor = unix_stream_read_actor,
2896 .socket = sk->sk_socket,
2902 return unix_stream_read_generic(&state, true);
2905 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2906 size_t size, int flags)
2908 struct unix_stream_read_state state = {
2909 .recv_actor = unix_stream_read_actor,
2916 #ifdef CONFIG_BPF_SYSCALL
2917 struct sock *sk = sock->sk;
2918 const struct proto *prot = READ_ONCE(sk->sk_prot);
2920 if (prot != &unix_stream_proto)
2921 return prot->recvmsg(sk, msg, size, flags, NULL);
2923 return unix_stream_read_generic(&state, true);
2926 static int unix_stream_splice_actor(struct sk_buff *skb,
2927 int skip, int chunk,
2928 struct unix_stream_read_state *state)
2930 return skb_splice_bits(skb, state->socket->sk,
2931 UNIXCB(skb).consumed + skip,
2932 state->pipe, chunk, state->splice_flags);
2935 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2936 struct pipe_inode_info *pipe,
2937 size_t size, unsigned int flags)
2939 struct unix_stream_read_state state = {
2940 .recv_actor = unix_stream_splice_actor,
2944 .splice_flags = flags,
2947 if (unlikely(*ppos))
2950 if (sock->file->f_flags & O_NONBLOCK ||
2951 flags & SPLICE_F_NONBLOCK)
2952 state.flags = MSG_DONTWAIT;
2954 return unix_stream_read_generic(&state, false);
2957 static int unix_shutdown(struct socket *sock, int mode)
2959 struct sock *sk = sock->sk;
2962 if (mode < SHUT_RD || mode > SHUT_RDWR)
2965 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2966 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2967 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2971 unix_state_lock(sk);
2972 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
2973 other = unix_peer(sk);
2976 unix_state_unlock(sk);
2977 sk->sk_state_change(sk);
2980 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2983 const struct proto *prot = READ_ONCE(other->sk_prot);
2986 prot->unhash(other);
2987 if (mode&RCV_SHUTDOWN)
2988 peer_mode |= SEND_SHUTDOWN;
2989 if (mode&SEND_SHUTDOWN)
2990 peer_mode |= RCV_SHUTDOWN;
2991 unix_state_lock(other);
2992 WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
2993 unix_state_unlock(other);
2994 other->sk_state_change(other);
2995 if (peer_mode == SHUTDOWN_MASK)
2996 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2997 else if (peer_mode & RCV_SHUTDOWN)
2998 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
3006 long unix_inq_len(struct sock *sk)
3008 struct sk_buff *skb;
3011 if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
3014 spin_lock(&sk->sk_receive_queue.lock);
3015 if (sk->sk_type == SOCK_STREAM ||
3016 sk->sk_type == SOCK_SEQPACKET) {
3017 skb_queue_walk(&sk->sk_receive_queue, skb)
3018 amount += unix_skb_len(skb);
3020 skb = skb_peek(&sk->sk_receive_queue);
3024 spin_unlock(&sk->sk_receive_queue.lock);
3028 EXPORT_SYMBOL_GPL(unix_inq_len);
3030 long unix_outq_len(struct sock *sk)
3032 return sk_wmem_alloc_get(sk);
3034 EXPORT_SYMBOL_GPL(unix_outq_len);
3036 static int unix_open_file(struct sock *sk)
3042 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3045 if (!smp_load_acquire(&unix_sk(sk)->addr))
3048 path = unix_sk(sk)->path;
3054 fd = get_unused_fd_flags(O_CLOEXEC);
3058 f = dentry_open(&path, O_PATH, current_cred());
3072 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3074 struct sock *sk = sock->sk;
3080 amount = unix_outq_len(sk);
3081 err = put_user(amount, (int __user *)arg);
3084 amount = unix_inq_len(sk);
3088 err = put_user(amount, (int __user *)arg);
3091 err = unix_open_file(sk);
3093 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3096 struct sk_buff *skb;
3099 skb = skb_peek(&sk->sk_receive_queue);
3100 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
3102 err = put_user(answ, (int __user *)arg);
3113 #ifdef CONFIG_COMPAT
3114 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3116 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3120 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3122 struct sock *sk = sock->sk;
3123 unsigned char state;
3127 sock_poll_wait(file, sock, wait);
3129 shutdown = READ_ONCE(sk->sk_shutdown);
3130 state = READ_ONCE(sk->sk_state);
3132 /* exceptional events? */
3133 if (READ_ONCE(sk->sk_err))
3135 if (shutdown == SHUTDOWN_MASK)
3137 if (shutdown & RCV_SHUTDOWN)
3138 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3141 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3142 mask |= EPOLLIN | EPOLLRDNORM;
3143 if (sk_is_readable(sk))
3144 mask |= EPOLLIN | EPOLLRDNORM;
3145 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3146 if (READ_ONCE(unix_sk(sk)->oob_skb))
3150 /* Connection-based need to check for termination and startup */
3151 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3156 * we set writable also when the other side has shut down the
3157 * connection. This prevents stuck sockets.
3159 if (unix_writable(sk, state))
3160 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3165 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3168 struct sock *sk = sock->sk, *other;
3169 unsigned int writable;
3170 unsigned char state;
3174 sock_poll_wait(file, sock, wait);
3176 shutdown = READ_ONCE(sk->sk_shutdown);
3177 state = READ_ONCE(sk->sk_state);
3179 /* exceptional events? */
3180 if (READ_ONCE(sk->sk_err) ||
3181 !skb_queue_empty_lockless(&sk->sk_error_queue))
3183 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3185 if (shutdown & RCV_SHUTDOWN)
3186 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3187 if (shutdown == SHUTDOWN_MASK)
3191 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3192 mask |= EPOLLIN | EPOLLRDNORM;
3193 if (sk_is_readable(sk))
3194 mask |= EPOLLIN | EPOLLRDNORM;
3196 /* Connection-based need to check for termination and startup */
3197 if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
3200 /* No write status requested, avoid expensive OUT tests. */
3201 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3204 writable = unix_writable(sk, state);
3206 unix_state_lock(sk);
3208 other = unix_peer(sk);
3209 if (other && unix_peer(other) != sk &&
3210 unix_recvq_full_lockless(other) &&
3211 unix_dgram_peer_wake_me(sk, other))
3214 unix_state_unlock(sk);
3218 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3220 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3225 #ifdef CONFIG_PROC_FS
3227 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3229 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3230 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
3231 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3233 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3235 unsigned long offset = get_offset(*pos);
3236 unsigned long bucket = get_bucket(*pos);
3237 unsigned long count = 0;
3240 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3241 sk; sk = sk_next(sk)) {
3242 if (++count == offset)
3249 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
3251 unsigned long bucket = get_bucket(*pos);
3252 struct net *net = seq_file_net(seq);
3255 while (bucket < UNIX_HASH_SIZE) {
3256 spin_lock(&net->unx.table.locks[bucket]);
3258 sk = unix_from_bucket(seq, pos);
3262 spin_unlock(&net->unx.table.locks[bucket]);
3264 *pos = set_bucket_offset(++bucket, 1);
3270 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3273 unsigned long bucket = get_bucket(*pos);
3280 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
3282 *pos = set_bucket_offset(++bucket, 1);
3284 return unix_get_first(seq, pos);
3287 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3290 return SEQ_START_TOKEN;
3292 return unix_get_first(seq, pos);
3295 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3299 if (v == SEQ_START_TOKEN)
3300 return unix_get_first(seq, pos);
3302 return unix_get_next(seq, v, pos);
3305 static void unix_seq_stop(struct seq_file *seq, void *v)
3307 struct sock *sk = v;
3310 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
3313 static int unix_seq_show(struct seq_file *seq, void *v)
3316 if (v == SEQ_START_TOKEN)
3317 seq_puts(seq, "Num RefCount Protocol Flags Type St "
3321 struct unix_sock *u = unix_sk(s);
3324 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3326 refcount_read(&s->sk_refcnt),
3328 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3331 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3332 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3335 if (u->addr) { // under a hash table lock here
3340 len = u->addr->len -
3341 offsetof(struct sockaddr_un, sun_path);
3342 if (u->addr->name->sun_path[0]) {
3348 for ( ; i < len; i++)
3349 seq_putc(seq, u->addr->name->sun_path[i] ?:
3352 unix_state_unlock(s);
3353 seq_putc(seq, '\n');
3359 static const struct seq_operations unix_seq_ops = {
3360 .start = unix_seq_start,
3361 .next = unix_seq_next,
3362 .stop = unix_seq_stop,
3363 .show = unix_seq_show,
3366 #ifdef CONFIG_BPF_SYSCALL
3367 struct bpf_unix_iter_state {
3368 struct seq_net_private p;
3369 unsigned int cur_sk;
3370 unsigned int end_sk;
3371 unsigned int max_sk;
3372 struct sock **batch;
3373 bool st_bucket_done;
3376 struct bpf_iter__unix {
3377 __bpf_md_ptr(struct bpf_iter_meta *, meta);
3378 __bpf_md_ptr(struct unix_sock *, unix_sk);
3379 uid_t uid __aligned(8);
3382 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3383 struct unix_sock *unix_sk, uid_t uid)
3385 struct bpf_iter__unix ctx;
3387 meta->seq_num--; /* skip SEQ_START_TOKEN */
3389 ctx.unix_sk = unix_sk;
3391 return bpf_iter_run_prog(prog, &ctx);
3394 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3397 struct bpf_unix_iter_state *iter = seq->private;
3398 unsigned int expected = 1;
3401 sock_hold(start_sk);
3402 iter->batch[iter->end_sk++] = start_sk;
3404 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
3405 if (iter->end_sk < iter->max_sk) {
3407 iter->batch[iter->end_sk++] = sk;
3413 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
3418 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3420 while (iter->cur_sk < iter->end_sk)
3421 sock_put(iter->batch[iter->cur_sk++]);
3424 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3425 unsigned int new_batch_sz)
3427 struct sock **new_batch;
3429 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3430 GFP_USER | __GFP_NOWARN);
3434 bpf_iter_unix_put_batch(iter);
3435 kvfree(iter->batch);
3436 iter->batch = new_batch;
3437 iter->max_sk = new_batch_sz;
3442 static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3445 struct bpf_unix_iter_state *iter = seq->private;
3446 unsigned int expected;
3447 bool resized = false;
3450 if (iter->st_bucket_done)
3451 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3454 /* Get a new batch */
3458 sk = unix_get_first(seq, pos);
3460 return NULL; /* Done */
3462 expected = bpf_iter_unix_hold_batch(seq, sk);
3464 if (iter->end_sk == expected) {
3465 iter->st_bucket_done = true;
3469 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3477 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3480 return SEQ_START_TOKEN;
3482 /* bpf iter does not support lseek, so it always
3483 * continue from where it was stop()-ped.
3485 return bpf_iter_unix_batch(seq, pos);
3488 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3490 struct bpf_unix_iter_state *iter = seq->private;
3493 /* Whenever seq_next() is called, the iter->cur_sk is
3494 * done with seq_show(), so advance to the next sk in
3497 if (iter->cur_sk < iter->end_sk)
3498 sock_put(iter->batch[iter->cur_sk++]);
3502 if (iter->cur_sk < iter->end_sk)
3503 sk = iter->batch[iter->cur_sk];
3505 sk = bpf_iter_unix_batch(seq, pos);
3510 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3512 struct bpf_iter_meta meta;
3513 struct bpf_prog *prog;
3514 struct sock *sk = v;
3519 if (v == SEQ_START_TOKEN)
3522 slow = lock_sock_fast(sk);
3524 if (unlikely(sk_unhashed(sk))) {
3529 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3531 prog = bpf_iter_get_info(&meta, false);
3532 ret = unix_prog_seq_show(prog, &meta, v, uid);
3534 unlock_sock_fast(sk, slow);
3538 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3540 struct bpf_unix_iter_state *iter = seq->private;
3541 struct bpf_iter_meta meta;
3542 struct bpf_prog *prog;
3546 prog = bpf_iter_get_info(&meta, true);
3548 (void)unix_prog_seq_show(prog, &meta, v, 0);
3551 if (iter->cur_sk < iter->end_sk)
3552 bpf_iter_unix_put_batch(iter);
3555 static const struct seq_operations bpf_iter_unix_seq_ops = {
3556 .start = bpf_iter_unix_seq_start,
3557 .next = bpf_iter_unix_seq_next,
3558 .stop = bpf_iter_unix_seq_stop,
3559 .show = bpf_iter_unix_seq_show,
3564 static const struct net_proto_family unix_family_ops = {
3566 .create = unix_create,
3567 .owner = THIS_MODULE,
3571 static int __net_init unix_net_init(struct net *net)
3575 net->unx.sysctl_max_dgram_qlen = 10;
3576 if (unix_sysctl_register(net))
3579 #ifdef CONFIG_PROC_FS
3580 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3581 sizeof(struct seq_net_private)))
3585 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3586 sizeof(spinlock_t), GFP_KERNEL);
3587 if (!net->unx.table.locks)
3590 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3591 sizeof(struct hlist_head),
3593 if (!net->unx.table.buckets)
3596 for (i = 0; i < UNIX_HASH_SIZE; i++) {
3597 spin_lock_init(&net->unx.table.locks[i]);
3598 INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
3604 kvfree(net->unx.table.locks);
3606 #ifdef CONFIG_PROC_FS
3607 remove_proc_entry("unix", net->proc_net);
3610 unix_sysctl_unregister(net);
3615 static void __net_exit unix_net_exit(struct net *net)
3617 kvfree(net->unx.table.buckets);
3618 kvfree(net->unx.table.locks);
3619 unix_sysctl_unregister(net);
3620 remove_proc_entry("unix", net->proc_net);
3623 static struct pernet_operations unix_net_ops = {
3624 .init = unix_net_init,
3625 .exit = unix_net_exit,
3628 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3629 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3630 struct unix_sock *unix_sk, uid_t uid)
3632 #define INIT_BATCH_SZ 16
3634 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3636 struct bpf_unix_iter_state *iter = priv_data;
3639 err = bpf_iter_init_seq_net(priv_data, aux);
3643 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3645 bpf_iter_fini_seq_net(priv_data);
3652 static void bpf_iter_fini_unix(void *priv_data)
3654 struct bpf_unix_iter_state *iter = priv_data;
3656 bpf_iter_fini_seq_net(priv_data);
3657 kvfree(iter->batch);
3660 static const struct bpf_iter_seq_info unix_seq_info = {
3661 .seq_ops = &bpf_iter_unix_seq_ops,
3662 .init_seq_private = bpf_iter_init_unix,
3663 .fini_seq_private = bpf_iter_fini_unix,
3664 .seq_priv_size = sizeof(struct bpf_unix_iter_state),
3667 static const struct bpf_func_proto *
3668 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3669 const struct bpf_prog *prog)
3672 case BPF_FUNC_setsockopt:
3673 return &bpf_sk_setsockopt_proto;
3674 case BPF_FUNC_getsockopt:
3675 return &bpf_sk_getsockopt_proto;
3681 static struct bpf_iter_reg unix_reg_info = {
3683 .ctx_arg_info_size = 1,
3685 { offsetof(struct bpf_iter__unix, unix_sk),
3686 PTR_TO_BTF_ID_OR_NULL },
3688 .get_func_proto = bpf_iter_unix_get_func_proto,
3689 .seq_info = &unix_seq_info,
3692 static void __init bpf_iter_register(void)
3694 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3695 if (bpf_iter_reg_target(&unix_reg_info))
3696 pr_warn("Warning: could not register bpf iterator unix\n");
3700 static int __init af_unix_init(void)
3704 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3706 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3707 spin_lock_init(&bsd_socket_locks[i]);
3708 INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3711 rc = proto_register(&unix_dgram_proto, 1);
3713 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3717 rc = proto_register(&unix_stream_proto, 1);
3719 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3720 proto_unregister(&unix_dgram_proto);
3724 sock_register(&unix_family_ops);
3725 register_pernet_subsys(&unix_net_ops);
3726 unix_bpf_build_proto();
3728 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3729 bpf_iter_register();
3736 /* Later than subsys_initcall() because we depend on stuff initialised there */
3737 fs_initcall(af_unix_init);