net/unix/af_unix.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * NET4:        Implementation of BSD Unix domain sockets.
   4  *
   5  * Authors:     Alan Cox, <[email protected]>
   6  *
   7  * Fixes:
   8  *              Linus Torvalds  :       Assorted bug cures.
   9  *              Niibe Yutaka    :       async I/O support.
  10  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  11  *              Alan Cox        :       Limit size of allocated blocks.
  12  *              Alan Cox        :       Fixed the stupid socketpair bug.
  13  *              Alan Cox        :       BSD compatibility fine tuning.
  14  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  15  *              Alan Cox        :       Sorted out a proper draft version of
  16  *                                      file descriptor passing hacked up from
  17  *                                      Mike Shaver's work.
  18  *              Marty Leisner   :       Fixes to fd passing
  19  *              Nick Nevin      :       recvmsg bugfix.
  20  *              Alan Cox        :       Started proper garbage collector
  21  *              Heiko EiBfeldt  :       Missing verify_area check
  22  *              Alan Cox        :       Started POSIXisms
  23  *              Andreas Schwab  :       Replace inode by dentry for proper
  24  *                                      reference counting
  25  *              Kirk Petersen   :       Made this a module
  26  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  27  *                                      Lots of bug fixes.
  28  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  29  *                                      by above two patches.
  30  *           Andrea Arcangeli   :       If possible we block in connect(2)
  31  *                                      if the max backlog of the listen socket
  32  *                                      is been reached. This won't break
  33  *                                      old apps and it will avoid huge amount
  34  *                                      of socks hashed (this for unix_gc()
  35  *                                      performances reasons).
  36  *                                      Security fix that limits the max
  37  *                                      number of socks to 2*max_files and
  38  *                                      the number of skb queueable in the
  39  *                                      dgram receiver.
  40  *              Artur Skawina   :       Hash function optimizations
  41  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  42  *            Malcolm Beattie   :       Set peercred for socketpair
  43  *           Michal Ostrowski   :       Module initialization cleanup.
  44  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  45  *                                      the core infrastructure is doing that
  46  *                                      for all net proto families now (2.5.69+)
  47  *
  48  * Known differences from reference BSD that was tested:
  49  *
  50  *      [TO FIX]
  51  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  52  *              other the moment one end closes.
  53  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  54  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  55  *      [NOT TO FIX]
  56  *      accept() returns a path name even if the connecting socket has closed
  57  *              in the meantime (BSD loses the path and gives up).
  58  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  59  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  60  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  61  *      BSD af_unix apparently has connect forgetting to block properly.
  62  *              (need to check this with the POSIX spec in detail)
  63  *
  64  * Differences from 2.0.0-11-... (ANK)
  65  *      Bug fixes and improvements.
  66  *              - client shutdown killed server socket.
  67  *              - removed all useless cli/sti pairs.
  68  *
  69  *      Semantic changes/extensions.
  70  *              - generic control message passing.
  71  *              - SCM_CREDENTIALS control message.
  72  *              - "Abstract" (not FS based) socket bindings.
  73  *                Abstract names are sequences of bytes (not zero terminated)
  74  *                started by 0, so that this name space does not intersect
  75  *                with BSD names.
  76  */
  77
  78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  79
  80 #include <linux/module.h>
  81 #include <linux/kernel.h>
  82 #include <linux/signal.h>
  83 #include <linux/sched/signal.h>
  84 #include <linux/errno.h>
  85 #include <linux/string.h>
  86 #include <linux/stat.h>
  87 #include <linux/dcache.h>
  88 #include <linux/namei.h>
  89 #include <linux/socket.h>
  90 #include <linux/un.h>
  91 #include <linux/fcntl.h>
  92 #include <linux/termios.h>
  93 #include <linux/sockios.h>
  94 #include <linux/net.h>
  95 #include <linux/in.h>
  96 #include <linux/fs.h>
  97 #include <linux/slab.h>
  98 #include <linux/uaccess.h>
  99 #include <linux/skbuff.h>
 100 #include <linux/netdevice.h>
 101 #include <net/net_namespace.h>
 102 #include <net/sock.h>
 103 #include <net/tcp_states.h>
 104 #include <net/af_unix.h>
 105 #include <linux/proc_fs.h>
 106 #include <linux/seq_file.h>
 107 #include <net/scm.h>
 108 #include <linux/init.h>
 109 #include <linux/poll.h>
 110 #include <linux/rtnetlink.h>
 111 #include <linux/mount.h>
 112 #include <net/checksum.h>
 113 #include <linux/security.h>
 114 #include <linux/freezer.h>
 115 #include <linux/file.h>
 116
 117 #include "scm.h"
 118
 119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 120 EXPORT_SYMBOL_GPL(unix_socket_table);
 121 DEFINE_SPINLOCK(unix_table_lock);
 122 EXPORT_SYMBOL_GPL(unix_table_lock);
 123 static atomic_long_t unix_nr_socks;
 124
 125
 126 static struct hlist_head *unix_sockets_unbound(void *addr)
 127 {
 128         unsigned long hash = (unsigned long)addr;
 129
 130         hash ^= hash >> 16;
 131         hash ^= hash >> 8;
 132         hash %= UNIX_HASH_SIZE;
 133         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 134 }
 135
 136 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 137
 138 #ifdef CONFIG_SECURITY_NETWORK
 139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 140 {
 141         UNIXCB(skb).secid = scm->secid;
 142 }
 143
 144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 145 {
 146         scm->secid = UNIXCB(skb).secid;
 147 }
 148
 149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 150 {
 151         return (scm->secid == UNIXCB(skb).secid);
 152 }
 153 #else
 154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 155 { }
 156
 157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158 { }
 159
 160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 161 {
 162         return true;
 163 }
 164 #endif /* CONFIG_SECURITY_NETWORK */
 165
 166 /*
 167  *  SMP locking strategy:
 168  *    hash table is protected with spinlock unix_table_lock
 169  *    each socket state is protected by separate spin lock.
 170  */
 171
 172 static inline unsigned int unix_hash_fold(__wsum n)
 173 {
 174         unsigned int hash = (__force unsigned int)csum_fold(n);
 175
 176         hash ^= hash>>8;
 177         return hash&(UNIX_HASH_SIZE-1);
 178 }
 179
 180 #define unix_peer(sk) (unix_sk(sk)->peer)
 181
 182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 183 {
 184         return unix_peer(osk) == sk;
 185 }
 186
 187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 188 {
 189         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 190 }
 191
 192 static inline int unix_recvq_full(const struct sock *sk)
 193 {
 194         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 195 }
 196
 197 static inline int unix_recvq_full_lockless(const struct sock *sk)
 198 {
 199         return skb_queue_len_lockless(&sk->sk_receive_queue) >
 200                 READ_ONCE(sk->sk_max_ack_backlog);
 201 }
 202
 203 struct sock *unix_peer_get(struct sock *s)
 204 {
 205         struct sock *peer;
 206
 207         unix_state_lock(s);
 208         peer = unix_peer(s);
 209         if (peer)
 210                 sock_hold(peer);
 211         unix_state_unlock(s);
 212         return peer;
 213 }
 214 EXPORT_SYMBOL_GPL(unix_peer_get);
 215
 216 static inline void unix_release_addr(struct unix_address *addr)
 217 {
 218         if (refcount_dec_and_test(&addr->refcnt))
 219                 kfree(addr);
 220 }
 221
 222 /*
 223  *      Check unix socket name:
 224  *              - should be not zero length.
 225  *              - if started by not zero, should be NULL terminated (FS object)
 226  *              - if started by zero, it is abstract name.
 227  */
 228
 229 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 230 {
 231         *hashp = 0;
 232
 233         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 234                 return -EINVAL;
 235         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 236                 return -EINVAL;
 237         if (sunaddr->sun_path[0]) {
 238                 /*
 239                  * This may look like an off by one error but it is a bit more
 240                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 241                  * sun_path[108] doesn't as such exist.  However in kernel space
 242                  * we are guaranteed that it is a valid memory location in our
 243                  * kernel address buffer.
 244                  */
 245                 ((char *)sunaddr)[len] = 0;
 246                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 247                 return len;
 248         }
 249
 250         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 251         return len;
 252 }
 253
 254 static void __unix_remove_socket(struct sock *sk)
 255 {
 256         sk_del_node_init(sk);
 257 }
 258
 259 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 260 {
 261         WARN_ON(!sk_unhashed(sk));
 262         sk_add_node(sk, list);
 263 }
 264
 265 static inline void unix_remove_socket(struct sock *sk)
 266 {
 267         spin_lock(&unix_table_lock);
 268         __unix_remove_socket(sk);
 269         spin_unlock(&unix_table_lock);
 270 }
 271
 272 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 273 {
 274         spin_lock(&unix_table_lock);
 275         __unix_insert_socket(list, sk);
 276         spin_unlock(&unix_table_lock);
 277 }
 278
 279 static struct sock *__unix_find_socket_byname(struct net *net,
 280                                               struct sockaddr_un *sunname,
 281                                               int len, int type, unsigned int hash)
 282 {
 283         struct sock *s;
 284
 285         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 286                 struct unix_sock *u = unix_sk(s);
 287
 288                 if (!net_eq(sock_net(s), net))
 289                         continue;
 290
 291                 if (u->addr->len == len &&
 292                     !memcmp(u->addr->name, sunname, len))
 293                         return s;
 294         }
 295         return NULL;
 296 }
 297
 298 static inline struct sock *unix_find_socket_byname(struct net *net,
 299                                                    struct sockaddr_un *sunname,
 300                                                    int len, int type,
 301                                                    unsigned int hash)
 302 {
 303         struct sock *s;
 304
 305         spin_lock(&unix_table_lock);
 306         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 307         if (s)
 308                 sock_hold(s);
 309         spin_unlock(&unix_table_lock);
 310         return s;
 311 }
 312
 313 static struct sock *unix_find_socket_byinode(struct inode *i)
 314 {
 315         struct sock *s;
 316
 317         spin_lock(&unix_table_lock);
 318         sk_for_each(s,
 319                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 320                 struct dentry *dentry = unix_sk(s)->path.dentry;
 321
 322                 if (dentry && d_backing_inode(dentry) == i) {
 323                         sock_hold(s);
 324                         goto found;
 325                 }
 326         }
 327         s = NULL;
 328 found:
 329         spin_unlock(&unix_table_lock);
 330         return s;
 331 }
 332
 333 /* Support code for asymmetrically connected dgram sockets
 334  *
 335  * If a datagram socket is connected to a socket not itself connected
 336  * to the first socket (eg, /dev/log), clients may only enqueue more
 337  * messages if the present receive queue of the server socket is not
 338  * "too large". This means there's a second writeability condition
 339  * poll and sendmsg need to test. The dgram recv code will do a wake
 340  * up on the peer_wait wait queue of a socket upon reception of a
 341  * datagram which needs to be propagated to sleeping would-be writers
 342  * since these might not have sent anything so far. This can't be
 343  * accomplished via poll_wait because the lifetime of the server
 344  * socket might be less than that of its clients if these break their
 345  * association with it or if the server socket is closed while clients
 346  * are still connected to it and there's no way to inform "a polling
 347  * implementation" that it should let go of a certain wait queue
 348  *
 349  * In order to propagate a wake up, a wait_queue_entry_t of the client
 350  * socket is enqueued on the peer_wait queue of the server socket
 351  * whose wake function does a wake_up on the ordinary client socket
 352  * wait queue. This connection is established whenever a write (or
 353  * poll for write) hit the flow control condition and broken when the
 354  * association to the server socket is dissolved or after a wake up
 355  * was relayed.
 356  */
 357
 358 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 359                                       void *key)
 360 {
 361         struct unix_sock *u;
 362         wait_queue_head_t *u_sleep;
 363
 364         u = container_of(q, struct unix_sock, peer_wake);
 365
 366         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 367                             q);
 368         u->peer_wake.private = NULL;
 369
 370         /* relaying can only happen while the wq still exists */
 371         u_sleep = sk_sleep(&u->sk);
 372         if (u_sleep)
 373                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 374
 375         return 0;
 376 }
 377
 378 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 379 {
 380         struct unix_sock *u, *u_other;
 381         int rc;
 382
 383         u = unix_sk(sk);
 384         u_other = unix_sk(other);
 385         rc = 0;
 386         spin_lock(&u_other->peer_wait.lock);
 387
 388         if (!u->peer_wake.private) {
 389                 u->peer_wake.private = other;
 390                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 391
 392                 rc = 1;
 393         }
 394
 395         spin_unlock(&u_other->peer_wait.lock);
 396         return rc;
 397 }
 398
 399 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 400                                             struct sock *other)
 401 {
 402         struct unix_sock *u, *u_other;
 403
 404         u = unix_sk(sk);
 405         u_other = unix_sk(other);
 406         spin_lock(&u_other->peer_wait.lock);
 407
 408         if (u->peer_wake.private == other) {
 409                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 410                 u->peer_wake.private = NULL;
 411         }
 412
 413         spin_unlock(&u_other->peer_wait.lock);
 414 }
 415
 416 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 417                                                    struct sock *other)
 418 {
 419         unix_dgram_peer_wake_disconnect(sk, other);
 420         wake_up_interruptible_poll(sk_sleep(sk),
 421                                    EPOLLOUT |
 422                                    EPOLLWRNORM |
 423                                    EPOLLWRBAND);
 424 }
 425
 426 /* preconditions:
 427  *      - unix_peer(sk) == other
 428  *      - association is stable
 429  */
 430 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 431 {
 432         int connected;
 433
 434         connected = unix_dgram_peer_wake_connect(sk, other);
 435
 436         /* If other is SOCK_DEAD, we want to make sure we signal
 437          * POLLOUT, such that a subsequent write() can get a
 438          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
 439          * to other and its full, we will hang waiting for POLLOUT.
 440          */
 441         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
 442                 return 1;
 443
 444         if (connected)
 445                 unix_dgram_peer_wake_disconnect(sk, other);
 446
 447         return 0;
 448 }
 449
 450 static int unix_writable(const struct sock *sk)
 451 {
 452         return sk->sk_state != TCP_LISTEN &&
 453                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 454 }
 455
 456 static void unix_write_space(struct sock *sk)
 457 {
 458         struct socket_wq *wq;
 459
 460         rcu_read_lock();
 461         if (unix_writable(sk)) {
 462                 wq = rcu_dereference(sk->sk_wq);
 463                 if (skwq_has_sleeper(wq))
 464                         wake_up_interruptible_sync_poll(&wq->wait,
 465                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 466                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 467         }
 468         rcu_read_unlock();
 469 }
 470
 471 /* When dgram socket disconnects (or changes its peer), we clear its receive
 472  * queue of packets arrived from previous peer. First, it allows to do
 473  * flow control based only on wmem_alloc; second, sk connected to peer
 474  * may receive messages only from that peer. */
 475 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 476 {
 477         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 478                 skb_queue_purge(&sk->sk_receive_queue);
 479                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 480
 481                 /* If one link of bidirectional dgram pipe is disconnected,
 482                  * we signal error. Messages are lost. Do not make this,
 483                  * when peer was not connected to us.
 484                  */
 485                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 486                         other->sk_err = ECONNRESET;
 487                         other->sk_error_report(other);
 488                 }
 489         }
 490 }
 491
 492 static void unix_sock_destructor(struct sock *sk)
 493 {
 494         struct unix_sock *u = unix_sk(sk);
 495
 496         skb_queue_purge(&sk->sk_receive_queue);
 497
 498         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 499         WARN_ON(!sk_unhashed(sk));
 500         WARN_ON(sk->sk_socket);
 501         if (!sock_flag(sk, SOCK_DEAD)) {
 502                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 503                 return;
 504         }
 505
 506         if (u->addr)
 507                 unix_release_addr(u->addr);
 508
 509         atomic_long_dec(&unix_nr_socks);
 510         local_bh_disable();
 511         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 512         local_bh_enable();
 513 #ifdef UNIX_REFCNT_DEBUG
 514         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 515                 atomic_long_read(&unix_nr_socks));
 516 #endif
 517 }
 518
 519 static void unix_release_sock(struct sock *sk, int embrion)
 520 {
 521         struct unix_sock *u = unix_sk(sk);
 522         struct path path;
 523         struct sock *skpair;
 524         struct sk_buff *skb;
 525         int state;
 526
 527         unix_remove_socket(sk);
 528
 529         /* Clear state */
 530         unix_state_lock(sk);
 531         sock_orphan(sk);
 532         sk->sk_shutdown = SHUTDOWN_MASK;
 533         path         = u->path;
 534         u->path.dentry = NULL;
 535         u->path.mnt = NULL;
 536         state = sk->sk_state;
 537         sk->sk_state = TCP_CLOSE;
 538         unix_state_unlock(sk);
 539
 540         wake_up_interruptible_all(&u->peer_wait);
 541
 542         skpair = unix_peer(sk);
 543
 544         if (skpair != NULL) {
 545                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 546                         unix_state_lock(skpair);
 547                         /* No more writes */
 548                         skpair->sk_shutdown = SHUTDOWN_MASK;
 549                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 550                                 skpair->sk_err = ECONNRESET;
 551                         unix_state_unlock(skpair);
 552                         skpair->sk_state_change(skpair);
 553                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 554                 }
 555
 556                 unix_dgram_peer_wake_disconnect(sk, skpair);
 557                 sock_put(skpair); /* It may now die */
 558                 unix_peer(sk) = NULL;
 559         }
 560
 561         /* Try to flush out this socket. Throw out buffers at least */
 562
 563         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 564                 if (state == TCP_LISTEN)
 565                         unix_release_sock(skb->sk, 1);
 566                 /* passed fds are erased in the kfree_skb hook        */
 567                 UNIXCB(skb).consumed = skb->len;
 568                 kfree_skb(skb);
 569         }
 570
 571         if (path.dentry)
 572                 path_put(&path);
 573
 574         sock_put(sk);
 575
 576         /* ---- Socket is dead now and most probably destroyed ---- */
 577
 578         /*
 579          * Fixme: BSD difference: In BSD all sockets connected to us get
 580          *        ECONNRESET and we die on the spot. In Linux we behave
 581          *        like files and pipes do and wait for the last
 582          *        dereference.
 583          *
 584          * Can't we simply set sock->err?
 585          *
 586          *        What the above comment does talk about? --ANK(980817)
 587          */
 588
 589         if (unix_tot_inflight)
 590                 unix_gc();              /* Garbage collect fds */
 591 }
 592
 593 static void init_peercred(struct sock *sk)
 594 {
 595         put_pid(sk->sk_peer_pid);
 596         if (sk->sk_peer_cred)
 597                 put_cred(sk->sk_peer_cred);
 598         sk->sk_peer_pid  = get_pid(task_tgid(current));
 599         sk->sk_peer_cred = get_current_cred();
 600 }
 601
 602 static void copy_peercred(struct sock *sk, struct sock *peersk)
 603 {
 604         put_pid(sk->sk_peer_pid);
 605         if (sk->sk_peer_cred)
 606                 put_cred(sk->sk_peer_cred);
 607         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 608         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 609 }
 610
 611 static int unix_listen(struct socket *sock, int backlog)
 612 {
 613         int err;
 614         struct sock *sk = sock->sk;
 615         struct unix_sock *u = unix_sk(sk);
 616         struct pid *old_pid = NULL;
 617
 618         err = -EOPNOTSUPP;
 619         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 620                 goto out;       /* Only stream/seqpacket sockets accept */
 621         err = -EINVAL;
 622         if (!u->addr)
 623                 goto out;       /* No listens on an unbound socket */
 624         unix_state_lock(sk);
 625         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 626                 goto out_unlock;
 627         if (backlog > sk->sk_max_ack_backlog)
 628                 wake_up_interruptible_all(&u->peer_wait);
 629         sk->sk_max_ack_backlog  = backlog;
 630         sk->sk_state            = TCP_LISTEN;
 631         /* set credentials so connect can copy them */
 632         init_peercred(sk);
 633         err = 0;
 634
 635 out_unlock:
 636         unix_state_unlock(sk);
 637         put_pid(old_pid);
 638 out:
 639         return err;
 640 }
 641
 642 static int unix_release(struct socket *);
 643 static int unix_bind(struct socket *, struct sockaddr *, int);
 644 static int unix_stream_connect(struct socket *, struct sockaddr *,
 645                                int addr_len, int flags);
 646 static int unix_socketpair(struct socket *, struct socket *);
 647 static int unix_accept(struct socket *, struct socket *, int, bool);
 648 static int unix_getname(struct socket *, struct sockaddr *, int);
 649 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 650 static __poll_t unix_dgram_poll(struct file *, struct socket *,
 651                                     poll_table *);
 652 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 653 #ifdef CONFIG_COMPAT
 654 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 655 #endif
 656 static int unix_shutdown(struct socket *, int);
 657 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 658 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 659 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 660                                     size_t size, int flags);
 661 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 662                                        struct pipe_inode_info *, size_t size,
 663                                        unsigned int flags);
 664 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 665 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 666 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 667                               int, int);
 668 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 669 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 670                                   int);
 671
 672 static int unix_set_peek_off(struct sock *sk, int val)
 673 {
 674         struct unix_sock *u = unix_sk(sk);
 675
 676         if (mutex_lock_interruptible(&u->iolock))
 677                 return -EINTR;
 678
 679         sk->sk_peek_off = val;
 680         mutex_unlock(&u->iolock);
 681
 682         return 0;
 683 }
 684
 685 #ifdef CONFIG_PROC_FS
 686 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
 687 {
 688         struct sock *sk = sock->sk;
 689         struct unix_sock *u;
 690
 691         if (sk) {
 692                 u = unix_sk(sock->sk);
 693                 seq_printf(m, "scm_fds: %u\n",
 694                            atomic_read(&u->scm_stat.nr_fds));
 695         }
 696 }
 697 #else
 698 #define unix_show_fdinfo NULL
 699 #endif
 700
 701 static const struct proto_ops unix_stream_ops = {
 702         .family =       PF_UNIX,
 703         .owner =        THIS_MODULE,
 704         .release =      unix_release,
 705         .bind =         unix_bind,
 706         .connect =      unix_stream_connect,
 707         .socketpair =   unix_socketpair,
 708         .accept =       unix_accept,
 709         .getname =      unix_getname,
 710         .poll =         unix_poll,
 711         .ioctl =        unix_ioctl,
 712 #ifdef CONFIG_COMPAT
 713         .compat_ioctl = unix_compat_ioctl,
 714 #endif
 715         .listen =       unix_listen,
 716         .shutdown =     unix_shutdown,
 717         .setsockopt =   sock_no_setsockopt,
 718         .getsockopt =   sock_no_getsockopt,
 719         .sendmsg =      unix_stream_sendmsg,
 720         .recvmsg =      unix_stream_recvmsg,
 721         .mmap =         sock_no_mmap,
 722         .sendpage =     unix_stream_sendpage,
 723         .splice_read =  unix_stream_splice_read,
 724         .set_peek_off = unix_set_peek_off,
 725         .show_fdinfo =  unix_show_fdinfo,
 726 };
 727
 728 static const struct proto_ops unix_dgram_ops = {
 729         .family =       PF_UNIX,
 730         .owner =        THIS_MODULE,
 731         .release =      unix_release,
 732         .bind =         unix_bind,
 733         .connect =      unix_dgram_connect,
 734         .socketpair =   unix_socketpair,
 735         .accept =       sock_no_accept,
 736         .getname =      unix_getname,
 737         .poll =         unix_dgram_poll,
 738         .ioctl =        unix_ioctl,
 739 #ifdef CONFIG_COMPAT
 740         .compat_ioctl = unix_compat_ioctl,
 741 #endif
 742         .listen =       sock_no_listen,
 743         .shutdown =     unix_shutdown,
 744         .setsockopt =   sock_no_setsockopt,
 745         .getsockopt =   sock_no_getsockopt,
 746         .sendmsg =      unix_dgram_sendmsg,
 747         .recvmsg =      unix_dgram_recvmsg,
 748         .mmap =         sock_no_mmap,
 749         .sendpage =     sock_no_sendpage,
 750         .set_peek_off = unix_set_peek_off,
 751         .show_fdinfo =  unix_show_fdinfo,
 752 };
 753
 754 static const struct proto_ops unix_seqpacket_ops = {
 755         .family =       PF_UNIX,
 756         .owner =        THIS_MODULE,
 757         .release =      unix_release,
 758         .bind =         unix_bind,
 759         .connect =      unix_stream_connect,
 760         .socketpair =   unix_socketpair,
 761         .accept =       unix_accept,
 762         .getname =      unix_getname,
 763         .poll =         unix_dgram_poll,
 764         .ioctl =        unix_ioctl,
 765 #ifdef CONFIG_COMPAT
 766         .compat_ioctl = unix_compat_ioctl,
 767 #endif
 768         .listen =       unix_listen,
 769         .shutdown =     unix_shutdown,
 770         .setsockopt =   sock_no_setsockopt,
 771         .getsockopt =   sock_no_getsockopt,
 772         .sendmsg =      unix_seqpacket_sendmsg,
 773         .recvmsg =      unix_seqpacket_recvmsg,
 774         .mmap =         sock_no_mmap,
 775         .sendpage =     sock_no_sendpage,
 776         .set_peek_off = unix_set_peek_off,
 777         .show_fdinfo =  unix_show_fdinfo,
 778 };
 779
 780 static struct proto unix_proto = {
 781         .name                   = "UNIX",
 782         .owner                  = THIS_MODULE,
 783         .obj_size               = sizeof(struct unix_sock),
 784 };
 785
 786 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 787 {
 788         struct sock *sk = NULL;
 789         struct unix_sock *u;
 790
 791         atomic_long_inc(&unix_nr_socks);
 792         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 793                 goto out;
 794
 795         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 796         if (!sk)
 797                 goto out;
 798
 799         sock_init_data(sock, sk);
 800
 801         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 802         sk->sk_write_space      = unix_write_space;
 803         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 804         sk->sk_destruct         = unix_sock_destructor;
 805         u         = unix_sk(sk);
 806         u->path.dentry = NULL;
 807         u->path.mnt = NULL;
 808         spin_lock_init(&u->lock);
 809         atomic_long_set(&u->inflight, 0);
 810         INIT_LIST_HEAD(&u->link);
 811         mutex_init(&u->iolock); /* single task reading lock */
 812         mutex_init(&u->bindlock); /* single task binding lock */
 813         init_waitqueue_head(&u->peer_wait);
 814         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 815         memset(&u->scm_stat, 0, sizeof(struct scm_stat));
 816         unix_insert_socket(unix_sockets_unbound(sk), sk);
 817 out:
 818         if (sk == NULL)
 819                 atomic_long_dec(&unix_nr_socks);
 820         else {
 821                 local_bh_disable();
 822                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 823                 local_bh_enable();
 824         }
 825         return sk;
 826 }
 827
 828 static int unix_create(struct net *net, struct socket *sock, int protocol,
 829                        int kern)
 830 {
 831         if (protocol && protocol != PF_UNIX)
 832                 return -EPROTONOSUPPORT;
 833
 834         sock->state = SS_UNCONNECTED;
 835
 836         switch (sock->type) {
 837         case SOCK_STREAM:
 838                 sock->ops = &unix_stream_ops;
 839                 break;
 840                 /*
 841                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 842                  *      nothing uses it.
 843                  */
 844         case SOCK_RAW:
 845                 sock->type = SOCK_DGRAM;
 846                 /* fall through */
 847         case SOCK_DGRAM:
 848                 sock->ops = &unix_dgram_ops;
 849                 break;
 850         case SOCK_SEQPACKET:
 851                 sock->ops = &unix_seqpacket_ops;
 852                 break;
 853         default:
 854                 return -ESOCKTNOSUPPORT;
 855         }
 856
 857         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 858 }
 859
 860 static int unix_release(struct socket *sock)
 861 {
 862         struct sock *sk = sock->sk;
 863
 864         if (!sk)
 865                 return 0;
 866
 867         unix_release_sock(sk, 0);
 868         sock->sk = NULL;
 869
 870         return 0;
 871 }
 872
 873 static int unix_autobind(struct socket *sock)
 874 {
 875         struct sock *sk = sock->sk;
 876         struct net *net = sock_net(sk);
 877         struct unix_sock *u = unix_sk(sk);
 878         static u32 ordernum = 1;
 879         struct unix_address *addr;
 880         int err;
 881         unsigned int retries = 0;
 882
 883         err = mutex_lock_interruptible(&u->bindlock);
 884         if (err)
 885                 return err;
 886
 887         err = 0;
 888         if (u->addr)
 889                 goto out;
 890
 891         err = -ENOMEM;
 892         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 893         if (!addr)
 894                 goto out;
 895
 896         addr->name->sun_family = AF_UNIX;
 897         refcount_set(&addr->refcnt, 1);
 898
 899 retry:
 900         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 901         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 902
 903         spin_lock(&unix_table_lock);
 904         ordernum = (ordernum+1)&0xFFFFF;
 905
 906         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 907                                       addr->hash)) {
 908                 spin_unlock(&unix_table_lock);
 909                 /*
 910                  * __unix_find_socket_byname() may take long time if many names
 911                  * are already in use.
 912                  */
 913                 cond_resched();
 914                 /* Give up if all names seems to be in use. */
 915                 if (retries++ == 0xFFFFF) {
 916                         err = -ENOSPC;
 917                         kfree(addr);
 918                         goto out;
 919                 }
 920                 goto retry;
 921         }
 922         addr->hash ^= sk->sk_type;
 923
 924         __unix_remove_socket(sk);
 925         smp_store_release(&u->addr, addr);
 926         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 927         spin_unlock(&unix_table_lock);
 928         err = 0;
 929
 930 out:    mutex_unlock(&u->bindlock);
 931         return err;
 932 }
 933
 934 static struct sock *unix_find_other(struct net *net,
 935                                     struct sockaddr_un *sunname, int len,
 936                                     int type, unsigned int hash, int *error)
 937 {
 938         struct sock *u;
 939         struct path path;
 940         int err = 0;
 941
 942         if (sunname->sun_path[0]) {
 943                 struct inode *inode;
 944                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 945                 if (err)
 946                         goto fail;
 947                 inode = d_backing_inode(path.dentry);
 948                 err = inode_permission(inode, MAY_WRITE);
 949                 if (err)
 950                         goto put_fail;
 951
 952                 err = -ECONNREFUSED;
 953                 if (!S_ISSOCK(inode->i_mode))
 954                         goto put_fail;
 955                 u = unix_find_socket_byinode(inode);
 956                 if (!u)
 957                         goto put_fail;
 958
 959                 if (u->sk_type == type)
 960                         touch_atime(&path);
 961
 962                 path_put(&path);
 963
 964                 err = -EPROTOTYPE;
 965                 if (u->sk_type != type) {
 966                         sock_put(u);
 967                         goto fail;
 968                 }
 969         } else {
 970                 err = -ECONNREFUSED;
 971                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 972                 if (u) {
 973                         struct dentry *dentry;
 974                         dentry = unix_sk(u)->path.dentry;
 975                         if (dentry)
 976                                 touch_atime(&unix_sk(u)->path);
 977                 } else
 978                         goto fail;
 979         }
 980         return u;
 981
 982 put_fail:
 983         path_put(&path);
 984 fail:
 985         *error = err;
 986         return NULL;
 987 }
 988
 989 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 990 {
 991         struct dentry *dentry;
 992         struct path path;
 993         int err = 0;
 994         /*
 995          * Get the parent directory, calculate the hash for last
 996          * component.
 997          */
 998         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 999         err = PTR_ERR(dentry);
1000         if (IS_ERR(dentry))
1001                 return err;
1002
1003         /*
1004          * All right, let's create it.
1005          */
1006         err = security_path_mknod(&path, dentry, mode, 0);
1007         if (!err) {
1008                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
1009                 if (!err) {
1010                         res->mnt = mntget(path.mnt);
1011                         res->dentry = dget(dentry);
1012                 }
1013         }
1014         done_path_create(&path, dentry);
1015         return err;
1016 }
1017
1018 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1019 {
1020         struct sock *sk = sock->sk;
1021         struct net *net = sock_net(sk);
1022         struct unix_sock *u = unix_sk(sk);
1023         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1024         char *sun_path = sunaddr->sun_path;
1025         int err;
1026         unsigned int hash;
1027         struct unix_address *addr;
1028         struct hlist_head *list;
1029         struct path path = { };
1030
1031         err = -EINVAL;
1032         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1033             sunaddr->sun_family != AF_UNIX)
1034                 goto out;
1035
1036         if (addr_len == sizeof(short)) {
1037                 err = unix_autobind(sock);
1038                 goto out;
1039         }
1040
1041         err = unix_mkname(sunaddr, addr_len, &hash);
1042         if (err < 0)
1043                 goto out;
1044         addr_len = err;
1045
1046         if (sun_path[0]) {
1047                 umode_t mode = S_IFSOCK |
1048                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1049                 err = unix_mknod(sun_path, mode, &path);
1050                 if (err) {
1051                         if (err == -EEXIST)
1052                                 err = -EADDRINUSE;
1053                         goto out;
1054                 }
1055         }
1056
1057         err = mutex_lock_interruptible(&u->bindlock);
1058         if (err)
1059                 goto out_put;
1060
1061         err = -EINVAL;
1062         if (u->addr)
1063                 goto out_up;
1064
1065         err = -ENOMEM;
1066         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1067         if (!addr)
1068                 goto out_up;
1069
1070         memcpy(addr->name, sunaddr, addr_len);
1071         addr->len = addr_len;
1072         addr->hash = hash ^ sk->sk_type;
1073         refcount_set(&addr->refcnt, 1);
1074
1075         if (sun_path[0]) {
1076                 addr->hash = UNIX_HASH_SIZE;
1077                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1078                 spin_lock(&unix_table_lock);
1079                 u->path = path;
1080                 list = &unix_socket_table[hash];
1081         } else {
1082                 spin_lock(&unix_table_lock);
1083                 err = -EADDRINUSE;
1084                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1085                                               sk->sk_type, hash)) {
1086                         unix_release_addr(addr);
1087                         goto out_unlock;
1088                 }
1089
1090                 list = &unix_socket_table[addr->hash];
1091         }
1092
1093         err = 0;
1094         __unix_remove_socket(sk);
1095         smp_store_release(&u->addr, addr);
1096         __unix_insert_socket(list, sk);
1097
1098 out_unlock:
1099         spin_unlock(&unix_table_lock);
1100 out_up:
1101         mutex_unlock(&u->bindlock);
1102 out_put:
1103         if (err)
1104                 path_put(&path);
1105 out:
1106         return err;
1107 }
1108
1109 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1110 {
1111         if (unlikely(sk1 == sk2) || !sk2) {
1112                 unix_state_lock(sk1);
1113                 return;
1114         }
1115         if (sk1 < sk2) {
1116                 unix_state_lock(sk1);
1117                 unix_state_lock_nested(sk2);
1118         } else {
1119                 unix_state_lock(sk2);
1120                 unix_state_lock_nested(sk1);
1121         }
1122 }
1123
1124 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1125 {
1126         if (unlikely(sk1 == sk2) || !sk2) {
1127                 unix_state_unlock(sk1);
1128                 return;
1129         }
1130         unix_state_unlock(sk1);
1131         unix_state_unlock(sk2);
1132 }
1133
1134 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1135                               int alen, int flags)
1136 {
1137         struct sock *sk = sock->sk;
1138         struct net *net = sock_net(sk);
1139         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1140         struct sock *other;
1141         unsigned int hash;
1142         int err;
1143
1144         err = -EINVAL;
1145         if (alen < offsetofend(struct sockaddr, sa_family))
1146                 goto out;
1147
1148         if (addr->sa_family != AF_UNSPEC) {
1149                 err = unix_mkname(sunaddr, alen, &hash);
1150                 if (err < 0)
1151                         goto out;
1152                 alen = err;
1153
1154                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1155                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1156                         goto out;
1157
1158 restart:
1159                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1160                 if (!other)
1161                         goto out;
1162
1163                 unix_state_double_lock(sk, other);
1164
1165                 /* Apparently VFS overslept socket death. Retry. */
1166                 if (sock_flag(other, SOCK_DEAD)) {
1167                         unix_state_double_unlock(sk, other);
1168                         sock_put(other);
1169                         goto restart;
1170                 }
1171
1172                 err = -EPERM;
1173                 if (!unix_may_send(sk, other))
1174                         goto out_unlock;
1175
1176                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1177                 if (err)
1178                         goto out_unlock;
1179
1180         } else {
1181                 /*
1182                  *      1003.1g breaking connected state with AF_UNSPEC
1183                  */
1184                 other = NULL;
1185                 unix_state_double_lock(sk, other);
1186         }
1187
1188         /*
1189          * If it was connected, reconnect.
1190          */
1191         if (unix_peer(sk)) {
1192                 struct sock *old_peer = unix_peer(sk);
1193                 unix_peer(sk) = other;
1194                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1195
1196                 unix_state_double_unlock(sk, other);
1197
1198                 if (other != old_peer)
1199                         unix_dgram_disconnected(sk, old_peer);
1200                 sock_put(old_peer);
1201         } else {
1202                 unix_peer(sk) = other;
1203                 unix_state_double_unlock(sk, other);
1204         }
1205         return 0;
1206
1207 out_unlock:
1208         unix_state_double_unlock(sk, other);
1209         sock_put(other);
1210 out:
1211         return err;
1212 }
1213
1214 static long unix_wait_for_peer(struct sock *other, long timeo)
1215         __releases(&unix_sk(other)->lock)
1216 {
1217         struct unix_sock *u = unix_sk(other);
1218         int sched;
1219         DEFINE_WAIT(wait);
1220
1221         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1222
1223         sched = !sock_flag(other, SOCK_DEAD) &&
1224                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1225                 unix_recvq_full(other);
1226
1227         unix_state_unlock(other);
1228
1229         if (sched)
1230                 timeo = schedule_timeout(timeo);
1231
1232         finish_wait(&u->peer_wait, &wait);
1233         return timeo;
1234 }
1235
1236 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1237                                int addr_len, int flags)
1238 {
1239         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1240         struct sock *sk = sock->sk;
1241         struct net *net = sock_net(sk);
1242         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1243         struct sock *newsk = NULL;
1244         struct sock *other = NULL;
1245         struct sk_buff *skb = NULL;
1246         unsigned int hash;
1247         int st;
1248         int err;
1249         long timeo;
1250
1251         err = unix_mkname(sunaddr, addr_len, &hash);
1252         if (err < 0)
1253                 goto out;
1254         addr_len = err;
1255
1256         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1257             (err = unix_autobind(sock)) != 0)
1258                 goto out;
1259
1260         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1261
1262         /* First of all allocate resources.
1263            If we will make it after state is locked,
1264            we will have to recheck all again in any case.
1265          */
1266
1267         err = -ENOMEM;
1268
1269         /* create new sock for complete connection */
1270         newsk = unix_create1(sock_net(sk), NULL, 0);
1271         if (newsk == NULL)
1272                 goto out;
1273
1274         /* Allocate skb for sending to listening sock */
1275         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1276         if (skb == NULL)
1277                 goto out;
1278
1279 restart:
1280         /*  Find listening sock. */
1281         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1282         if (!other)
1283                 goto out;
1284
1285         /* Latch state of peer */
1286         unix_state_lock(other);
1287
1288         /* Apparently VFS overslept socket death. Retry. */
1289         if (sock_flag(other, SOCK_DEAD)) {
1290                 unix_state_unlock(other);
1291                 sock_put(other);
1292                 goto restart;
1293         }
1294
1295         err = -ECONNREFUSED;
1296         if (other->sk_state != TCP_LISTEN)
1297                 goto out_unlock;
1298         if (other->sk_shutdown & RCV_SHUTDOWN)
1299                 goto out_unlock;
1300
1301         if (unix_recvq_full(other)) {
1302                 err = -EAGAIN;
1303                 if (!timeo)
1304                         goto out_unlock;
1305
1306                 timeo = unix_wait_for_peer(other, timeo);
1307
1308                 err = sock_intr_errno(timeo);
1309                 if (signal_pending(current))
1310                         goto out;
1311                 sock_put(other);
1312                 goto restart;
1313         }
1314
1315         /* Latch our state.
1316
1317            It is tricky place. We need to grab our state lock and cannot
1318            drop lock on peer. It is dangerous because deadlock is
1319            possible. Connect to self case and simultaneous
1320            attempt to connect are eliminated by checking socket
1321            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1322            check this before attempt to grab lock.
1323
1324            Well, and we have to recheck the state after socket locked.
1325          */
1326         st = sk->sk_state;
1327
1328         switch (st) {
1329         case TCP_CLOSE:
1330                 /* This is ok... continue with connect */
1331                 break;
1332         case TCP_ESTABLISHED:
1333                 /* Socket is already connected */
1334                 err = -EISCONN;
1335                 goto out_unlock;
1336         default:
1337                 err = -EINVAL;
1338                 goto out_unlock;
1339         }
1340
1341         unix_state_lock_nested(sk);
1342
1343         if (sk->sk_state != st) {
1344                 unix_state_unlock(sk);
1345                 unix_state_unlock(other);
1346                 sock_put(other);
1347                 goto restart;
1348         }
1349
1350         err = security_unix_stream_connect(sk, other, newsk);
1351         if (err) {
1352                 unix_state_unlock(sk);
1353                 goto out_unlock;
1354         }
1355
1356         /* The way is open! Fastly set all the necessary fields... */
1357
1358         sock_hold(sk);
1359         unix_peer(newsk)        = sk;
1360         newsk->sk_state         = TCP_ESTABLISHED;
1361         newsk->sk_type          = sk->sk_type;
1362         init_peercred(newsk);
1363         newu = unix_sk(newsk);
1364         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1365         otheru = unix_sk(other);
1366
1367         /* copy address information from listening to new sock
1368          *
1369          * The contents of *(otheru->addr) and otheru->path
1370          * are seen fully set up here, since we have found
1371          * otheru in hash under unix_table_lock.  Insertion
1372          * into the hash chain we'd found it in had been done
1373          * in an earlier critical area protected by unix_table_lock,
1374          * the same one where we'd set *(otheru->addr) contents,
1375          * as well as otheru->path and otheru->addr itself.
1376          *
1377          * Using smp_store_release() here to set newu->addr
1378          * is enough to make those stores, as well as stores
1379          * to newu->path visible to anyone who gets newu->addr
1380          * by smp_load_acquire().  IOW, the same warranties
1381          * as for unix_sock instances bound in unix_bind() or
1382          * in unix_autobind().
1383          */
1384         if (otheru->path.dentry) {
1385                 path_get(&otheru->path);
1386                 newu->path = otheru->path;
1387         }
1388         refcount_inc(&otheru->addr->refcnt);
1389         smp_store_release(&newu->addr, otheru->addr);
1390
1391         /* Set credentials */
1392         copy_peercred(sk, other);
1393
1394         sock->state     = SS_CONNECTED;
1395         sk->sk_state    = TCP_ESTABLISHED;
1396         sock_hold(newsk);
1397
1398         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1399         unix_peer(sk)   = newsk;
1400
1401         unix_state_unlock(sk);
1402
1403         /* take ten and and send info to listening sock */
1404         spin_lock(&other->sk_receive_queue.lock);
1405         __skb_queue_tail(&other->sk_receive_queue, skb);
1406         spin_unlock(&other->sk_receive_queue.lock);
1407         unix_state_unlock(other);
1408         other->sk_data_ready(other);
1409         sock_put(other);
1410         return 0;
1411
1412 out_unlock:
1413         if (other)
1414                 unix_state_unlock(other);
1415
1416 out:
1417         kfree_skb(skb);
1418         if (newsk)
1419                 unix_release_sock(newsk, 0);
1420         if (other)
1421                 sock_put(other);
1422         return err;
1423 }
1424
1425 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1426 {
1427         struct sock *ska = socka->sk, *skb = sockb->sk;
1428
1429         /* Join our sockets back to back */
1430         sock_hold(ska);
1431         sock_hold(skb);
1432         unix_peer(ska) = skb;
1433         unix_peer(skb) = ska;
1434         init_peercred(ska);
1435         init_peercred(skb);
1436
1437         if (ska->sk_type != SOCK_DGRAM) {
1438                 ska->sk_state = TCP_ESTABLISHED;
1439                 skb->sk_state = TCP_ESTABLISHED;
1440                 socka->state  = SS_CONNECTED;
1441                 sockb->state  = SS_CONNECTED;
1442         }
1443         return 0;
1444 }
1445
1446 static void unix_sock_inherit_flags(const struct socket *old,
1447                                     struct socket *new)
1448 {
1449         if (test_bit(SOCK_PASSCRED, &old->flags))
1450                 set_bit(SOCK_PASSCRED, &new->flags);
1451         if (test_bit(SOCK_PASSSEC, &old->flags))
1452                 set_bit(SOCK_PASSSEC, &new->flags);
1453 }
1454
1455 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1456                        bool kern)
1457 {
1458         struct sock *sk = sock->sk;
1459         struct sock *tsk;
1460         struct sk_buff *skb;
1461         int err;
1462
1463         err = -EOPNOTSUPP;
1464         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1465                 goto out;
1466
1467         err = -EINVAL;
1468         if (sk->sk_state != TCP_LISTEN)
1469                 goto out;
1470
1471         /* If socket state is TCP_LISTEN it cannot change (for now...),
1472          * so that no locks are necessary.
1473          */
1474
1475         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1476         if (!skb) {
1477                 /* This means receive shutdown. */
1478                 if (err == 0)
1479                         err = -EINVAL;
1480                 goto out;
1481         }
1482
1483         tsk = skb->sk;
1484         skb_free_datagram(sk, skb);
1485         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1486
1487         /* attach accepted sock to socket */
1488         unix_state_lock(tsk);
1489         newsock->state = SS_CONNECTED;
1490         unix_sock_inherit_flags(sock, newsock);
1491         sock_graft(tsk, newsock);
1492         unix_state_unlock(tsk);
1493         return 0;
1494
1495 out:
1496         return err;
1497 }
1498
1499
1500 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1501 {
1502         struct sock *sk = sock->sk;
1503         struct unix_address *addr;
1504         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1505         int err = 0;
1506
1507         if (peer) {
1508                 sk = unix_peer_get(sk);
1509
1510                 err = -ENOTCONN;
1511                 if (!sk)
1512                         goto out;
1513                 err = 0;
1514         } else {
1515                 sock_hold(sk);
1516         }
1517
1518         addr = smp_load_acquire(&unix_sk(sk)->addr);
1519         if (!addr) {
1520                 sunaddr->sun_family = AF_UNIX;
1521                 sunaddr->sun_path[0] = 0;
1522                 err = sizeof(short);
1523         } else {
1524                 err = addr->len;
1525                 memcpy(sunaddr, addr->name, addr->len);
1526         }
1527         sock_put(sk);
1528 out:
1529         return err;
1530 }
1531
1532 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1533 {
1534         int err = 0;
1535
1536         UNIXCB(skb).pid  = get_pid(scm->pid);
1537         UNIXCB(skb).uid = scm->creds.uid;
1538         UNIXCB(skb).gid = scm->creds.gid;
1539         UNIXCB(skb).fp = NULL;
1540         unix_get_secdata(scm, skb);
1541         if (scm->fp && send_fds)
1542                 err = unix_attach_fds(scm, skb);
1543
1544         skb->destructor = unix_destruct_scm;
1545         return err;
1546 }
1547
1548 static bool unix_passcred_enabled(const struct socket *sock,
1549                                   const struct sock *other)
1550 {
1551         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1552                !other->sk_socket ||
1553                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1554 }
1555
1556 /*
1557  * Some apps rely on write() giving SCM_CREDENTIALS
1558  * We include credentials if source or destination socket
1559  * asserted SOCK_PASSCRED.
1560  */
1561 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1562                             const struct sock *other)
1563 {
1564         if (UNIXCB(skb).pid)
1565                 return;
1566         if (unix_passcred_enabled(sock, other)) {
1567                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1568                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1569         }
1570 }
1571
1572 static int maybe_init_creds(struct scm_cookie *scm,
1573                             struct socket *socket,
1574                             const struct sock *other)
1575 {
1576         int err;
1577         struct msghdr msg = { .msg_controllen = 0 };
1578
1579         err = scm_send(socket, &msg, scm, false);
1580         if (err)
1581                 return err;
1582
1583         if (unix_passcred_enabled(socket, other)) {
1584                 scm->pid = get_pid(task_tgid(current));
1585                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1586         }
1587         return err;
1588 }
1589
1590 static bool unix_skb_scm_eq(struct sk_buff *skb,
1591                             struct scm_cookie *scm)
1592 {
1593         const struct unix_skb_parms *u = &UNIXCB(skb);
1594
1595         return u->pid == scm->pid &&
1596                uid_eq(u->uid, scm->creds.uid) &&
1597                gid_eq(u->gid, scm->creds.gid) &&
1598                unix_secdata_eq(scm, skb);
1599 }
1600
1601 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1602 {
1603         struct scm_fp_list *fp = UNIXCB(skb).fp;
1604         struct unix_sock *u = unix_sk(sk);
1605
1606         if (unlikely(fp && fp->count))
1607                 atomic_add(fp->count, &u->scm_stat.nr_fds);
1608 }
1609
1610 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1611 {
1612         struct scm_fp_list *fp = UNIXCB(skb).fp;
1613         struct unix_sock *u = unix_sk(sk);
1614
1615         if (unlikely(fp && fp->count))
1616                 atomic_sub(fp->count, &u->scm_stat.nr_fds);
1617 }
1618
1619 /*
1620  *      Send AF_UNIX data.
1621  */
1622
1623 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1624                               size_t len)
1625 {
1626         struct sock *sk = sock->sk;
1627         struct net *net = sock_net(sk);
1628         struct unix_sock *u = unix_sk(sk);
1629         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1630         struct sock *other = NULL;
1631         int namelen = 0; /* fake GCC */
1632         int err;
1633         unsigned int hash;
1634         struct sk_buff *skb;
1635         long timeo;
1636         struct scm_cookie scm;
1637         int data_len = 0;
1638         int sk_locked;
1639
1640         wait_for_unix_gc();
1641         err = scm_send(sock, msg, &scm, false);
1642         if (err < 0)
1643                 return err;
1644
1645         err = -EOPNOTSUPP;
1646         if (msg->msg_flags&MSG_OOB)
1647                 goto out;
1648
1649         if (msg->msg_namelen) {
1650                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1651                 if (err < 0)
1652                         goto out;
1653                 namelen = err;
1654         } else {
1655                 sunaddr = NULL;
1656                 err = -ENOTCONN;
1657                 other = unix_peer_get(sk);
1658                 if (!other)
1659                         goto out;
1660         }
1661
1662         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1663             && (err = unix_autobind(sock)) != 0)
1664                 goto out;
1665
1666         err = -EMSGSIZE;
1667         if (len > sk->sk_sndbuf - 32)
1668                 goto out;
1669
1670         if (len > SKB_MAX_ALLOC) {
1671                 data_len = min_t(size_t,
1672                                  len - SKB_MAX_ALLOC,
1673                                  MAX_SKB_FRAGS * PAGE_SIZE);
1674                 data_len = PAGE_ALIGN(data_len);
1675
1676                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1677         }
1678
1679         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1680                                    msg->msg_flags & MSG_DONTWAIT, &err,
1681                                    PAGE_ALLOC_COSTLY_ORDER);
1682         if (skb == NULL)
1683                 goto out;
1684
1685         err = unix_scm_to_skb(&scm, skb, true);
1686         if (err < 0)
1687                 goto out_free;
1688
1689         skb_put(skb, len - data_len);
1690         skb->data_len = data_len;
1691         skb->len = len;
1692         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1693         if (err)
1694                 goto out_free;
1695
1696         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1697
1698 restart:
1699         if (!other) {
1700                 err = -ECONNRESET;
1701                 if (sunaddr == NULL)
1702                         goto out_free;
1703
1704                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1705                                         hash, &err);
1706                 if (other == NULL)
1707                         goto out_free;
1708         }
1709
1710         if (sk_filter(other, skb) < 0) {
1711                 /* Toss the packet but do not return any error to the sender */
1712                 err = len;
1713                 goto out_free;
1714         }
1715
1716         sk_locked = 0;
1717         unix_state_lock(other);
1718 restart_locked:
1719         err = -EPERM;
1720         if (!unix_may_send(sk, other))
1721                 goto out_unlock;
1722
1723         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1724                 /*
1725                  *      Check with 1003.1g - what should
1726                  *      datagram error
1727                  */
1728                 unix_state_unlock(other);
1729                 sock_put(other);
1730
1731                 if (!sk_locked)
1732                         unix_state_lock(sk);
1733
1734                 err = 0;
1735                 if (unix_peer(sk) == other) {
1736                         unix_peer(sk) = NULL;
1737                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1738
1739                         unix_state_unlock(sk);
1740
1741                         unix_dgram_disconnected(sk, other);
1742                         sock_put(other);
1743                         err = -ECONNREFUSED;
1744                 } else {
1745                         unix_state_unlock(sk);
1746                 }
1747
1748                 other = NULL;
1749                 if (err)
1750                         goto out_free;
1751                 goto restart;
1752         }
1753
1754         err = -EPIPE;
1755         if (other->sk_shutdown & RCV_SHUTDOWN)
1756                 goto out_unlock;
1757
1758         if (sk->sk_type != SOCK_SEQPACKET) {
1759                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1760                 if (err)
1761                         goto out_unlock;
1762         }
1763
1764         /* other == sk && unix_peer(other) != sk if
1765          * - unix_peer(sk) == NULL, destination address bound to sk
1766          * - unix_peer(sk) == sk by time of get but disconnected before lock
1767          */
1768         if (other != sk &&
1769             unlikely(unix_peer(other) != sk &&
1770             unix_recvq_full_lockless(other))) {
1771                 if (timeo) {
1772                         timeo = unix_wait_for_peer(other, timeo);
1773
1774                         err = sock_intr_errno(timeo);
1775                         if (signal_pending(current))
1776                                 goto out_free;
1777
1778                         goto restart;
1779                 }
1780
1781                 if (!sk_locked) {
1782                         unix_state_unlock(other);
1783                         unix_state_double_lock(sk, other);
1784                 }
1785
1786                 if (unix_peer(sk) != other ||
1787                     unix_dgram_peer_wake_me(sk, other)) {
1788                         err = -EAGAIN;
1789                         sk_locked = 1;
1790                         goto out_unlock;
1791                 }
1792
1793                 if (!sk_locked) {
1794                         sk_locked = 1;
1795                         goto restart_locked;
1796                 }
1797         }
1798
1799         if (unlikely(sk_locked))
1800                 unix_state_unlock(sk);
1801
1802         if (sock_flag(other, SOCK_RCVTSTAMP))
1803                 __net_timestamp(skb);
1804         maybe_add_creds(skb, sock, other);
1805         scm_stat_add(other, skb);
1806         skb_queue_tail(&other->sk_receive_queue, skb);
1807         unix_state_unlock(other);
1808         other->sk_data_ready(other);
1809         sock_put(other);
1810         scm_destroy(&scm);
1811         return len;
1812
1813 out_unlock:
1814         if (sk_locked)
1815                 unix_state_unlock(sk);
1816         unix_state_unlock(other);
1817 out_free:
1818         kfree_skb(skb);
1819 out:
1820         if (other)
1821                 sock_put(other);
1822         scm_destroy(&scm);
1823         return err;
1824 }
1825
1826 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1827  * bytes, and a minimum of a full page.
1828  */
1829 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1830
1831 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1832                                size_t len)
1833 {
1834         struct sock *sk = sock->sk;
1835         struct sock *other = NULL;
1836         int err, size;
1837         struct sk_buff *skb;
1838         int sent = 0;
1839         struct scm_cookie scm;
1840         bool fds_sent = false;
1841         int data_len;
1842
1843         wait_for_unix_gc();
1844         err = scm_send(sock, msg, &scm, false);
1845         if (err < 0)
1846                 return err;
1847
1848         err = -EOPNOTSUPP;
1849         if (msg->msg_flags&MSG_OOB)
1850                 goto out_err;
1851
1852         if (msg->msg_namelen) {
1853                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1854                 goto out_err;
1855         } else {
1856                 err = -ENOTCONN;
1857                 other = unix_peer(sk);
1858                 if (!other)
1859                         goto out_err;
1860         }
1861
1862         if (sk->sk_shutdown & SEND_SHUTDOWN)
1863                 goto pipe_err;
1864
1865         while (sent < len) {
1866                 size = len - sent;
1867
1868                 /* Keep two messages in the pipe so it schedules better */
1869                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1870
1871                 /* allow fallback to order-0 allocations */
1872                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1873
1874                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1875
1876                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1877
1878                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1879                                            msg->msg_flags & MSG_DONTWAIT, &err,
1880                                            get_order(UNIX_SKB_FRAGS_SZ));
1881                 if (!skb)
1882                         goto out_err;
1883
1884                 /* Only send the fds in the first buffer */
1885                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1886                 if (err < 0) {
1887                         kfree_skb(skb);
1888                         goto out_err;
1889                 }
1890                 fds_sent = true;
1891
1892                 skb_put(skb, size - data_len);
1893                 skb->data_len = data_len;
1894                 skb->len = size;
1895                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1896                 if (err) {
1897                         kfree_skb(skb);
1898                         goto out_err;
1899                 }
1900
1901                 unix_state_lock(other);
1902
1903                 if (sock_flag(other, SOCK_DEAD) ||
1904                     (other->sk_shutdown & RCV_SHUTDOWN))
1905                         goto pipe_err_free;
1906
1907                 maybe_add_creds(skb, sock, other);
1908                 scm_stat_add(other, skb);
1909                 skb_queue_tail(&other->sk_receive_queue, skb);
1910                 unix_state_unlock(other);
1911                 other->sk_data_ready(other);
1912                 sent += size;
1913         }
1914
1915         scm_destroy(&scm);
1916
1917         return sent;
1918
1919 pipe_err_free:
1920         unix_state_unlock(other);
1921         kfree_skb(skb);
1922 pipe_err:
1923         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1924                 send_sig(SIGPIPE, current, 0);
1925         err = -EPIPE;
1926 out_err:
1927         scm_destroy(&scm);
1928         return sent ? : err;
1929 }
1930
1931 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1932                                     int offset, size_t size, int flags)
1933 {
1934         int err;
1935         bool send_sigpipe = false;
1936         bool init_scm = true;
1937         struct scm_cookie scm;
1938         struct sock *other, *sk = socket->sk;
1939         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1940
1941         if (flags & MSG_OOB)
1942                 return -EOPNOTSUPP;
1943
1944         other = unix_peer(sk);
1945         if (!other || sk->sk_state != TCP_ESTABLISHED)
1946                 return -ENOTCONN;
1947
1948         if (false) {
1949 alloc_skb:
1950                 unix_state_unlock(other);
1951                 mutex_unlock(&unix_sk(other)->iolock);
1952                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1953                                               &err, 0);
1954                 if (!newskb)
1955                         goto err;
1956         }
1957
1958         /* we must acquire iolock as we modify already present
1959          * skbs in the sk_receive_queue and mess with skb->len
1960          */
1961         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1962         if (err) {
1963                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1964                 goto err;
1965         }
1966
1967         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1968                 err = -EPIPE;
1969                 send_sigpipe = true;
1970                 goto err_unlock;
1971         }
1972
1973         unix_state_lock(other);
1974
1975         if (sock_flag(other, SOCK_DEAD) ||
1976             other->sk_shutdown & RCV_SHUTDOWN) {
1977                 err = -EPIPE;
1978                 send_sigpipe = true;
1979                 goto err_state_unlock;
1980         }
1981
1982         if (init_scm) {
1983                 err = maybe_init_creds(&scm, socket, other);
1984                 if (err)
1985                         goto err_state_unlock;
1986                 init_scm = false;
1987         }
1988
1989         skb = skb_peek_tail(&other->sk_receive_queue);
1990         if (tail && tail == skb) {
1991                 skb = newskb;
1992         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1993                 if (newskb) {
1994                         skb = newskb;
1995                 } else {
1996                         tail = skb;
1997                         goto alloc_skb;
1998                 }
1999         } else if (newskb) {
2000                 /* this is fast path, we don't necessarily need to
2001                  * call to kfree_skb even though with newskb == NULL
2002                  * this - does no harm
2003                  */
2004                 consume_skb(newskb);
2005                 newskb = NULL;
2006         }
2007
2008         if (skb_append_pagefrags(skb, page, offset, size)) {
2009                 tail = skb;
2010                 goto alloc_skb;
2011         }
2012
2013         skb->len += size;
2014         skb->data_len += size;
2015         skb->truesize += size;
2016         refcount_add(size, &sk->sk_wmem_alloc);
2017
2018         if (newskb) {
2019                 err = unix_scm_to_skb(&scm, skb, false);
2020                 if (err)
2021                         goto err_state_unlock;
2022                 spin_lock(&other->sk_receive_queue.lock);
2023                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2024                 spin_unlock(&other->sk_receive_queue.lock);
2025         }
2026
2027         unix_state_unlock(other);
2028         mutex_unlock(&unix_sk(other)->iolock);
2029
2030         other->sk_data_ready(other);
2031         scm_destroy(&scm);
2032         return size;
2033
2034 err_state_unlock:
2035         unix_state_unlock(other);
2036 err_unlock:
2037         mutex_unlock(&unix_sk(other)->iolock);
2038 err:
2039         kfree_skb(newskb);
2040         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2041                 send_sig(SIGPIPE, current, 0);
2042         if (!init_scm)
2043                 scm_destroy(&scm);
2044         return err;
2045 }
2046
2047 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2048                                   size_t len)
2049 {
2050         int err;
2051         struct sock *sk = sock->sk;
2052
2053         err = sock_error(sk);
2054         if (err)
2055                 return err;
2056
2057         if (sk->sk_state != TCP_ESTABLISHED)
2058                 return -ENOTCONN;
2059
2060         if (msg->msg_namelen)
2061                 msg->msg_namelen = 0;
2062
2063         return unix_dgram_sendmsg(sock, msg, len);
2064 }
2065
2066 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2067                                   size_t size, int flags)
2068 {
2069         struct sock *sk = sock->sk;
2070
2071         if (sk->sk_state != TCP_ESTABLISHED)
2072                 return -ENOTCONN;
2073
2074         return unix_dgram_recvmsg(sock, msg, size, flags);
2075 }
2076
2077 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2078 {
2079         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2080
2081         if (addr) {
2082                 msg->msg_namelen = addr->len;
2083                 memcpy(msg->msg_name, addr->name, addr->len);
2084         }
2085 }
2086
2087 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2088                               size_t size, int flags)
2089 {
2090         struct scm_cookie scm;
2091         struct sock *sk = sock->sk;
2092         struct unix_sock *u = unix_sk(sk);
2093         struct sk_buff *skb, *last;
2094         long timeo;
2095         int skip;
2096         int err;
2097
2098         err = -EOPNOTSUPP;
2099         if (flags&MSG_OOB)
2100                 goto out;
2101
2102         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2103
2104         do {
2105                 mutex_lock(&u->iolock);
2106
2107                 skip = sk_peek_offset(sk, flags);
2108                 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2109                                               &skip, &err, &last);
2110                 if (skb) {
2111                         if (!(flags & MSG_PEEK))
2112                                 scm_stat_del(sk, skb);
2113                         break;
2114                 }
2115
2116                 mutex_unlock(&u->iolock);
2117
2118                 if (err != -EAGAIN)
2119                         break;
2120         } while (timeo &&
2121                  !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2122                                               &err, &timeo, last));
2123
2124         if (!skb) { /* implies iolock unlocked */
2125                 unix_state_lock(sk);
2126                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2127                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2128                     (sk->sk_shutdown & RCV_SHUTDOWN))
2129                         err = 0;
2130                 unix_state_unlock(sk);
2131                 goto out;
2132         }
2133
2134         if (wq_has_sleeper(&u->peer_wait))
2135                 wake_up_interruptible_sync_poll(&u->peer_wait,
2136                                                 EPOLLOUT | EPOLLWRNORM |
2137                                                 EPOLLWRBAND);
2138
2139         if (msg->msg_name)
2140                 unix_copy_addr(msg, skb->sk);
2141
2142         if (size > skb->len - skip)
2143                 size = skb->len - skip;
2144         else if (size < skb->len - skip)
2145                 msg->msg_flags |= MSG_TRUNC;
2146
2147         err = skb_copy_datagram_msg(skb, skip, msg, size);
2148         if (err)
2149                 goto out_free;
2150
2151         if (sock_flag(sk, SOCK_RCVTSTAMP))
2152                 __sock_recv_timestamp(msg, sk, skb);
2153
2154         memset(&scm, 0, sizeof(scm));
2155
2156         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2157         unix_set_secdata(&scm, skb);
2158
2159         if (!(flags & MSG_PEEK)) {
2160                 if (UNIXCB(skb).fp)
2161                         unix_detach_fds(&scm, skb);
2162
2163                 sk_peek_offset_bwd(sk, skb->len);
2164         } else {
2165                 /* It is questionable: on PEEK we could:
2166                    - do not return fds - good, but too simple 8)
2167                    - return fds, and do not return them on read (old strategy,
2168                      apparently wrong)
2169                    - clone fds (I chose it for now, it is the most universal
2170                      solution)
2171
2172                    POSIX 1003.1g does not actually define this clearly
2173                    at all. POSIX 1003.1g doesn't define a lot of things
2174                    clearly however!
2175
2176                 */
2177
2178                 sk_peek_offset_fwd(sk, size);
2179
2180                 if (UNIXCB(skb).fp)
2181                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2182         }
2183         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2184
2185         scm_recv(sock, msg, &scm, flags);
2186
2187 out_free:
2188         skb_free_datagram(sk, skb);
2189         mutex_unlock(&u->iolock);
2190 out:
2191         return err;
2192 }
2193
2194 /*
2195  *      Sleep until more data has arrived. But check for races..
2196  */
2197 static long unix_stream_data_wait(struct sock *sk, long timeo,
2198                                   struct sk_buff *last, unsigned int last_len,
2199                                   bool freezable)
2200 {
2201         struct sk_buff *tail;
2202         DEFINE_WAIT(wait);
2203
2204         unix_state_lock(sk);
2205
2206         for (;;) {
2207                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2208
2209                 tail = skb_peek_tail(&sk->sk_receive_queue);
2210                 if (tail != last ||
2211                     (tail && tail->len != last_len) ||
2212                     sk->sk_err ||
2213                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2214                     signal_pending(current) ||
2215                     !timeo)
2216                         break;
2217
2218                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2219                 unix_state_unlock(sk);
2220                 if (freezable)
2221                         timeo = freezable_schedule_timeout(timeo);
2222                 else
2223                         timeo = schedule_timeout(timeo);
2224                 unix_state_lock(sk);
2225
2226                 if (sock_flag(sk, SOCK_DEAD))
2227                         break;
2228
2229                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2230         }
2231
2232         finish_wait(sk_sleep(sk), &wait);
2233         unix_state_unlock(sk);
2234         return timeo;
2235 }
2236
2237 static unsigned int unix_skb_len(const struct sk_buff *skb)
2238 {
2239         return skb->len - UNIXCB(skb).consumed;
2240 }
2241
2242 struct unix_stream_read_state {
2243         int (*recv_actor)(struct sk_buff *, int, int,
2244                           struct unix_stream_read_state *);
2245         struct socket *socket;
2246         struct msghdr *msg;
2247         struct pipe_inode_info *pipe;
2248         size_t size;
2249         int flags;
2250         unsigned int splice_flags;
2251 };
2252
2253 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2254                                     bool freezable)
2255 {
2256         struct scm_cookie scm;
2257         struct socket *sock = state->socket;
2258         struct sock *sk = sock->sk;
2259         struct unix_sock *u = unix_sk(sk);
2260         int copied = 0;
2261         int flags = state->flags;
2262         int noblock = flags & MSG_DONTWAIT;
2263         bool check_creds = false;
2264         int target;
2265         int err = 0;
2266         long timeo;
2267         int skip;
2268         size_t size = state->size;
2269         unsigned int last_len;
2270
2271         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2272                 err = -EINVAL;
2273                 goto out;
2274         }
2275
2276         if (unlikely(flags & MSG_OOB)) {
2277                 err = -EOPNOTSUPP;
2278                 goto out;
2279         }
2280
2281         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2282         timeo = sock_rcvtimeo(sk, noblock);
2283
2284         memset(&scm, 0, sizeof(scm));
2285
2286         /* Lock the socket to prevent queue disordering
2287          * while sleeps in memcpy_tomsg
2288          */
2289         mutex_lock(&u->iolock);
2290
2291         skip = max(sk_peek_offset(sk, flags), 0);
2292
2293         do {
2294                 int chunk;
2295                 bool drop_skb;
2296                 struct sk_buff *skb, *last;
2297
2298 redo:
2299                 unix_state_lock(sk);
2300                 if (sock_flag(sk, SOCK_DEAD)) {
2301                         err = -ECONNRESET;
2302                         goto unlock;
2303                 }
2304                 last = skb = skb_peek(&sk->sk_receive_queue);
2305                 last_len = last ? last->len : 0;
2306 again:
2307                 if (skb == NULL) {
2308                         if (copied >= target)
2309                                 goto unlock;
2310
2311                         /*
2312                          *      POSIX 1003.1g mandates this order.
2313                          */
2314
2315                         err = sock_error(sk);
2316                         if (err)
2317                                 goto unlock;
2318                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2319                                 goto unlock;
2320
2321                         unix_state_unlock(sk);
2322                         if (!timeo) {
2323                                 err = -EAGAIN;
2324                                 break;
2325                         }
2326
2327                         mutex_unlock(&u->iolock);
2328
2329                         timeo = unix_stream_data_wait(sk, timeo, last,
2330                                                       last_len, freezable);
2331
2332                         if (signal_pending(current)) {
2333                                 err = sock_intr_errno(timeo);
2334                                 scm_destroy(&scm);
2335                                 goto out;
2336                         }
2337
2338                         mutex_lock(&u->iolock);
2339                         goto redo;
2340 unlock:
2341                         unix_state_unlock(sk);
2342                         break;
2343                 }
2344
2345                 while (skip >= unix_skb_len(skb)) {
2346                         skip -= unix_skb_len(skb);
2347                         last = skb;
2348                         last_len = skb->len;
2349                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2350                         if (!skb)
2351                                 goto again;
2352                 }
2353
2354                 unix_state_unlock(sk);
2355
2356                 if (check_creds) {
2357                         /* Never glue messages from different writers */
2358                         if (!unix_skb_scm_eq(skb, &scm))
2359                                 break;
2360                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2361                         /* Copy credentials */
2362                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2363                         unix_set_secdata(&scm, skb);
2364                         check_creds = true;
2365                 }
2366
2367                 /* Copy address just once */
2368                 if (state->msg && state->msg->msg_name) {
2369                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2370                                          state->msg->msg_name);
2371                         unix_copy_addr(state->msg, skb->sk);
2372                         sunaddr = NULL;
2373                 }
2374
2375                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2376                 skb_get(skb);
2377                 chunk = state->recv_actor(skb, skip, chunk, state);
2378                 drop_skb = !unix_skb_len(skb);
2379                 /* skb is only safe to use if !drop_skb */
2380                 consume_skb(skb);
2381                 if (chunk < 0) {
2382                         if (copied == 0)
2383                                 copied = -EFAULT;
2384                         break;
2385                 }
2386                 copied += chunk;
2387                 size -= chunk;
2388
2389                 if (drop_skb) {
2390                         /* the skb was touched by a concurrent reader;
2391                          * we should not expect anything from this skb
2392                          * anymore and assume it invalid - we can be
2393                          * sure it was dropped from the socket queue
2394                          *
2395                          * let's report a short read
2396                          */
2397                         err = 0;
2398                         break;
2399                 }
2400
2401                 /* Mark read part of skb as used */
2402                 if (!(flags & MSG_PEEK)) {
2403                         UNIXCB(skb).consumed += chunk;
2404
2405                         sk_peek_offset_bwd(sk, chunk);
2406
2407                         if (UNIXCB(skb).fp) {
2408                                 scm_stat_del(sk, skb);
2409                                 unix_detach_fds(&scm, skb);
2410                         }
2411
2412                         if (unix_skb_len(skb))
2413                                 break;
2414
2415                         skb_unlink(skb, &sk->sk_receive_queue);
2416                         consume_skb(skb);
2417
2418                         if (scm.fp)
2419                                 break;
2420                 } else {
2421                         /* It is questionable, see note in unix_dgram_recvmsg.
2422                          */
2423                         if (UNIXCB(skb).fp)
2424                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2425
2426                         sk_peek_offset_fwd(sk, chunk);
2427
2428                         if (UNIXCB(skb).fp)
2429                                 break;
2430
2431                         skip = 0;
2432                         last = skb;
2433                         last_len = skb->len;
2434                         unix_state_lock(sk);
2435                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2436                         if (skb)
2437                                 goto again;
2438                         unix_state_unlock(sk);
2439                         break;
2440                 }
2441         } while (size);
2442
2443         mutex_unlock(&u->iolock);
2444         if (state->msg)
2445                 scm_recv(sock, state->msg, &scm, flags);
2446         else
2447                 scm_destroy(&scm);
2448 out:
2449         return copied ? : err;
2450 }
2451
2452 static int unix_stream_read_actor(struct sk_buff *skb,
2453                                   int skip, int chunk,
2454                                   struct unix_stream_read_state *state)
2455 {
2456         int ret;
2457
2458         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2459                                     state->msg, chunk);
2460         return ret ?: chunk;
2461 }
2462
2463 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2464                                size_t size, int flags)
2465 {
2466         struct unix_stream_read_state state = {
2467                 .recv_actor = unix_stream_read_actor,
2468                 .socket = sock,
2469                 .msg = msg,
2470                 .size = size,
2471                 .flags = flags
2472         };
2473
2474         return unix_stream_read_generic(&state, true);
2475 }
2476
2477 static int unix_stream_splice_actor(struct sk_buff *skb,
2478                                     int skip, int chunk,
2479                                     struct unix_stream_read_state *state)
2480 {
2481         return skb_splice_bits(skb, state->socket->sk,
2482                                UNIXCB(skb).consumed + skip,
2483                                state->pipe, chunk, state->splice_flags);
2484 }
2485
2486 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2487                                        struct pipe_inode_info *pipe,
2488                                        size_t size, unsigned int flags)
2489 {
2490         struct unix_stream_read_state state = {
2491                 .recv_actor = unix_stream_splice_actor,
2492                 .socket = sock,
2493                 .pipe = pipe,
2494                 .size = size,
2495                 .splice_flags = flags,
2496         };
2497
2498         if (unlikely(*ppos))
2499                 return -ESPIPE;
2500
2501         if (sock->file->f_flags & O_NONBLOCK ||
2502             flags & SPLICE_F_NONBLOCK)
2503                 state.flags = MSG_DONTWAIT;
2504
2505         return unix_stream_read_generic(&state, false);
2506 }
2507
2508 static int unix_shutdown(struct socket *sock, int mode)
2509 {
2510         struct sock *sk = sock->sk;
2511         struct sock *other;
2512
2513         if (mode < SHUT_RD || mode > SHUT_RDWR)
2514                 return -EINVAL;
2515         /* This maps:
2516          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2517          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2518          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2519          */
2520         ++mode;
2521
2522         unix_state_lock(sk);
2523         sk->sk_shutdown |= mode;
2524         other = unix_peer(sk);
2525         if (other)
2526                 sock_hold(other);
2527         unix_state_unlock(sk);
2528         sk->sk_state_change(sk);
2529
2530         if (other &&
2531                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2532
2533                 int peer_mode = 0;
2534
2535                 if (mode&RCV_SHUTDOWN)
2536                         peer_mode |= SEND_SHUTDOWN;
2537                 if (mode&SEND_SHUTDOWN)
2538                         peer_mode |= RCV_SHUTDOWN;
2539                 unix_state_lock(other);
2540                 other->sk_shutdown |= peer_mode;
2541                 unix_state_unlock(other);
2542                 other->sk_state_change(other);
2543                 if (peer_mode == SHUTDOWN_MASK)
2544                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2545                 else if (peer_mode & RCV_SHUTDOWN)
2546                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2547         }
2548         if (other)
2549                 sock_put(other);
2550
2551         return 0;
2552 }
2553
2554 long unix_inq_len(struct sock *sk)
2555 {
2556         struct sk_buff *skb;
2557         long amount = 0;
2558
2559         if (sk->sk_state == TCP_LISTEN)
2560                 return -EINVAL;
2561
2562         spin_lock(&sk->sk_receive_queue.lock);
2563         if (sk->sk_type == SOCK_STREAM ||
2564             sk->sk_type == SOCK_SEQPACKET) {
2565                 skb_queue_walk(&sk->sk_receive_queue, skb)
2566                         amount += unix_skb_len(skb);
2567         } else {
2568                 skb = skb_peek(&sk->sk_receive_queue);
2569                 if (skb)
2570                         amount = skb->len;
2571         }
2572         spin_unlock(&sk->sk_receive_queue.lock);
2573
2574         return amount;
2575 }
2576 EXPORT_SYMBOL_GPL(unix_inq_len);
2577
2578 long unix_outq_len(struct sock *sk)
2579 {
2580         return sk_wmem_alloc_get(sk);
2581 }
2582 EXPORT_SYMBOL_GPL(unix_outq_len);
2583
2584 static int unix_open_file(struct sock *sk)
2585 {
2586         struct path path;
2587         struct file *f;
2588         int fd;
2589
2590         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2591                 return -EPERM;
2592
2593         if (!smp_load_acquire(&unix_sk(sk)->addr))
2594                 return -ENOENT;
2595
2596         path = unix_sk(sk)->path;
2597         if (!path.dentry)
2598                 return -ENOENT;
2599
2600         path_get(&path);
2601
2602         fd = get_unused_fd_flags(O_CLOEXEC);
2603         if (fd < 0)
2604                 goto out;
2605
2606         f = dentry_open(&path, O_PATH, current_cred());
2607         if (IS_ERR(f)) {
2608                 put_unused_fd(fd);
2609                 fd = PTR_ERR(f);
2610                 goto out;
2611         }
2612
2613         fd_install(fd, f);
2614 out:
2615         path_put(&path);
2616
2617         return fd;
2618 }
2619
2620 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2621 {
2622         struct sock *sk = sock->sk;
2623         long amount = 0;
2624         int err;
2625
2626         switch (cmd) {
2627         case SIOCOUTQ:
2628                 amount = unix_outq_len(sk);
2629                 err = put_user(amount, (int __user *)arg);
2630                 break;
2631         case SIOCINQ:
2632                 amount = unix_inq_len(sk);
2633                 if (amount < 0)
2634                         err = amount;
2635                 else
2636                         err = put_user(amount, (int __user *)arg);
2637                 break;
2638         case SIOCUNIXFILE:
2639                 err = unix_open_file(sk);
2640                 break;
2641         default:
2642                 err = -ENOIOCTLCMD;
2643                 break;
2644         }
2645         return err;
2646 }
2647
2648 #ifdef CONFIG_COMPAT
2649 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2650 {
2651         return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2652 }
2653 #endif
2654
2655 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2656 {
2657         struct sock *sk = sock->sk;
2658         __poll_t mask;
2659
2660         sock_poll_wait(file, sock, wait);
2661         mask = 0;
2662
2663         /* exceptional events? */
2664         if (sk->sk_err)
2665                 mask |= EPOLLERR;
2666         if (sk->sk_shutdown == SHUTDOWN_MASK)
2667                 mask |= EPOLLHUP;
2668         if (sk->sk_shutdown & RCV_SHUTDOWN)
2669                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2670
2671         /* readable? */
2672         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2673                 mask |= EPOLLIN | EPOLLRDNORM;
2674
2675         /* Connection-based need to check for termination and startup */
2676         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2677             sk->sk_state == TCP_CLOSE)
2678                 mask |= EPOLLHUP;
2679
2680         /*
2681          * we set writable also when the other side has shut down the
2682          * connection. This prevents stuck sockets.
2683          */
2684         if (unix_writable(sk))
2685                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2686
2687         return mask;
2688 }
2689
2690 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2691                                     poll_table *wait)
2692 {
2693         struct sock *sk = sock->sk, *other;
2694         unsigned int writable;
2695         __poll_t mask;
2696
2697         sock_poll_wait(file, sock, wait);
2698         mask = 0;
2699
2700         /* exceptional events? */
2701         if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2702                 mask |= EPOLLERR |
2703                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2704
2705         if (sk->sk_shutdown & RCV_SHUTDOWN)
2706                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2707         if (sk->sk_shutdown == SHUTDOWN_MASK)
2708                 mask |= EPOLLHUP;
2709
2710         /* readable? */
2711         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2712                 mask |= EPOLLIN | EPOLLRDNORM;
2713
2714         /* Connection-based need to check for termination and startup */
2715         if (sk->sk_type == SOCK_SEQPACKET) {
2716                 if (sk->sk_state == TCP_CLOSE)
2717                         mask |= EPOLLHUP;
2718                 /* connection hasn't started yet? */
2719                 if (sk->sk_state == TCP_SYN_SENT)
2720                         return mask;
2721         }
2722
2723         /* No write status requested, avoid expensive OUT tests. */
2724         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2725                 return mask;
2726
2727         writable = unix_writable(sk);
2728         if (writable) {
2729                 unix_state_lock(sk);
2730
2731                 other = unix_peer(sk);
2732                 if (other && unix_peer(other) != sk &&
2733                     unix_recvq_full(other) &&
2734                     unix_dgram_peer_wake_me(sk, other))
2735                         writable = 0;
2736
2737                 unix_state_unlock(sk);
2738         }
2739
2740         if (writable)
2741                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2742         else
2743                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2744
2745         return mask;
2746 }
2747
2748 #ifdef CONFIG_PROC_FS
2749
2750 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2751
2752 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2753 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2754 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2755
2756 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2757 {
2758         unsigned long offset = get_offset(*pos);
2759         unsigned long bucket = get_bucket(*pos);
2760         struct sock *sk;
2761         unsigned long count = 0;
2762
2763         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2764                 if (sock_net(sk) != seq_file_net(seq))
2765                         continue;
2766                 if (++count == offset)
2767                         break;
2768         }
2769
2770         return sk;
2771 }
2772
2773 static struct sock *unix_next_socket(struct seq_file *seq,
2774                                      struct sock *sk,
2775                                      loff_t *pos)
2776 {
2777         unsigned long bucket;
2778
2779         while (sk > (struct sock *)SEQ_START_TOKEN) {
2780                 sk = sk_next(sk);
2781                 if (!sk)
2782                         goto next_bucket;
2783                 if (sock_net(sk) == seq_file_net(seq))
2784                         return sk;
2785         }
2786
2787         do {
2788                 sk = unix_from_bucket(seq, pos);
2789                 if (sk)
2790                         return sk;
2791
2792 next_bucket:
2793                 bucket = get_bucket(*pos) + 1;
2794                 *pos = set_bucket_offset(bucket, 1);
2795         } while (bucket < ARRAY_SIZE(unix_socket_table));
2796
2797         return NULL;
2798 }
2799
2800 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2801         __acquires(unix_table_lock)
2802 {
2803         spin_lock(&unix_table_lock);
2804
2805         if (!*pos)
2806                 return SEQ_START_TOKEN;
2807
2808         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2809                 return NULL;
2810
2811         return unix_next_socket(seq, NULL, pos);
2812 }
2813
2814 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2815 {
2816         ++*pos;
2817         return unix_next_socket(seq, v, pos);
2818 }
2819
2820 static void unix_seq_stop(struct seq_file *seq, void *v)
2821         __releases(unix_table_lock)
2822 {
2823         spin_unlock(&unix_table_lock);
2824 }
2825
2826 static int unix_seq_show(struct seq_file *seq, void *v)
2827 {
2828
2829         if (v == SEQ_START_TOKEN)
2830                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2831                          "Inode Path\n");
2832         else {
2833                 struct sock *s = v;
2834                 struct unix_sock *u = unix_sk(s);
2835                 unix_state_lock(s);
2836
2837                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2838                         s,
2839                         refcount_read(&s->sk_refcnt),
2840                         0,
2841                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2842                         s->sk_type,
2843                         s->sk_socket ?
2844                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2845                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2846                         sock_i_ino(s));
2847
2848                 if (u->addr) {  // under unix_table_lock here
2849                         int i, len;
2850                         seq_putc(seq, ' ');
2851
2852                         i = 0;
2853                         len = u->addr->len - sizeof(short);
2854                         if (!UNIX_ABSTRACT(s))
2855                                 len--;
2856                         else {
2857                                 seq_putc(seq, '@');
2858                                 i++;
2859                         }
2860                         for ( ; i < len; i++)
2861                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2862                                          '@');
2863                 }
2864                 unix_state_unlock(s);
2865                 seq_putc(seq, '\n');
2866         }
2867
2868         return 0;
2869 }
2870
2871 static const struct seq_operations unix_seq_ops = {
2872         .start  = unix_seq_start,
2873         .next   = unix_seq_next,
2874         .stop   = unix_seq_stop,
2875         .show   = unix_seq_show,
2876 };
2877 #endif
2878
2879 static const struct net_proto_family unix_family_ops = {
2880         .family = PF_UNIX,
2881         .create = unix_create,
2882         .owner  = THIS_MODULE,
2883 };
2884
2885
2886 static int __net_init unix_net_init(struct net *net)
2887 {
2888         int error = -ENOMEM;
2889
2890         net->unx.sysctl_max_dgram_qlen = 10;
2891         if (unix_sysctl_register(net))
2892                 goto out;
2893
2894 #ifdef CONFIG_PROC_FS
2895         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2896                         sizeof(struct seq_net_private))) {
2897                 unix_sysctl_unregister(net);
2898                 goto out;
2899         }
2900 #endif
2901         error = 0;
2902 out:
2903         return error;
2904 }
2905
2906 static void __net_exit unix_net_exit(struct net *net)
2907 {
2908         unix_sysctl_unregister(net);
2909         remove_proc_entry("unix", net->proc_net);
2910 }
2911
2912 static struct pernet_operations unix_net_ops = {
2913         .init = unix_net_init,
2914         .exit = unix_net_exit,
2915 };
2916
2917 static int __init af_unix_init(void)
2918 {
2919         int rc = -1;
2920
2921         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
2922
2923         rc = proto_register(&unix_proto, 1);
2924         if (rc != 0) {
2925                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2926                 goto out;
2927         }
2928
2929         sock_register(&unix_family_ops);
2930         register_pernet_subsys(&unix_net_ops);
2931 out:
2932         return rc;
2933 }
2934
2935 static void __exit af_unix_exit(void)
2936 {
2937         sock_unregister(PF_UNIX);
2938         proto_unregister(&unix_proto);
2939         unregister_pernet_subsys(&unix_net_ops);
2940 }
2941
2942 /* Earlier than device_initcall() so that other drivers invoking
2943    request_module() don't end up in a loop when modprobe tries
2944    to use a UNIX socket. But later than subsys_initcall() because
2945    we depend on stuff initialised there */
2946 fs_initcall(af_unix_init);
2947 module_exit(af_unix_exit);
2948
2949 MODULE_LICENSE("GPL");
2950 MODULE_ALIAS_NETPROTO(PF_UNIX);