net/unix/af_unix.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * NET4:        Implementation of BSD Unix domain sockets.
   4  *
   5  * Authors:     Alan Cox, <[email protected]>
   6  *
   7  * Fixes:
   8  *              Linus Torvalds  :       Assorted bug cures.
   9  *              Niibe Yutaka    :       async I/O support.
  10  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  11  *              Alan Cox        :       Limit size of allocated blocks.
  12  *              Alan Cox        :       Fixed the stupid socketpair bug.
  13  *              Alan Cox        :       BSD compatibility fine tuning.
  14  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  15  *              Alan Cox        :       Sorted out a proper draft version of
  16  *                                      file descriptor passing hacked up from
  17  *                                      Mike Shaver's work.
  18  *              Marty Leisner   :       Fixes to fd passing
  19  *              Nick Nevin      :       recvmsg bugfix.
  20  *              Alan Cox        :       Started proper garbage collector
  21  *              Heiko EiBfeldt  :       Missing verify_area check
  22  *              Alan Cox        :       Started POSIXisms
  23  *              Andreas Schwab  :       Replace inode by dentry for proper
  24  *                                      reference counting
  25  *              Kirk Petersen   :       Made this a module
  26  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  27  *                                      Lots of bug fixes.
  28  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  29  *                                      by above two patches.
  30  *           Andrea Arcangeli   :       If possible we block in connect(2)
  31  *                                      if the max backlog of the listen socket
  32  *                                      is been reached. This won't break
  33  *                                      old apps and it will avoid huge amount
  34  *                                      of socks hashed (this for unix_gc()
  35  *                                      performances reasons).
  36  *                                      Security fix that limits the max
  37  *                                      number of socks to 2*max_files and
  38  *                                      the number of skb queueable in the
  39  *                                      dgram receiver.
  40  *              Artur Skawina   :       Hash function optimizations
  41  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  42  *            Malcolm Beattie   :       Set peercred for socketpair
  43  *           Michal Ostrowski   :       Module initialization cleanup.
  44  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  45  *                                      the core infrastructure is doing that
  46  *                                      for all net proto families now (2.5.69+)
  47  *
  48  * Known differences from reference BSD that was tested:
  49  *
  50  *      [TO FIX]
  51  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  52  *              other the moment one end closes.
  53  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  54  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  55  *      [NOT TO FIX]
  56  *      accept() returns a path name even if the connecting socket has closed
  57  *              in the meantime (BSD loses the path and gives up).
  58  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  59  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  60  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  61  *      BSD af_unix apparently has connect forgetting to block properly.
  62  *              (need to check this with the POSIX spec in detail)
  63  *
  64  * Differences from 2.0.0-11-... (ANK)
  65  *      Bug fixes and improvements.
  66  *              - client shutdown killed server socket.
  67  *              - removed all useless cli/sti pairs.
  68  *
  69  *      Semantic changes/extensions.
  70  *              - generic control message passing.
  71  *              - SCM_CREDENTIALS control message.
  72  *              - "Abstract" (not FS based) socket bindings.
  73  *                Abstract names are sequences of bytes (not zero terminated)
  74  *                started by 0, so that this name space does not intersect
  75  *                with BSD names.
  76  */
  77
  78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  79
  80 #include <linux/module.h>
  81 #include <linux/kernel.h>
  82 #include <linux/signal.h>
  83 #include <linux/sched/signal.h>
  84 #include <linux/errno.h>
  85 #include <linux/string.h>
  86 #include <linux/stat.h>
  87 #include <linux/dcache.h>
  88 #include <linux/namei.h>
  89 #include <linux/socket.h>
  90 #include <linux/un.h>
  91 #include <linux/fcntl.h>
  92 #include <linux/termios.h>
  93 #include <linux/sockios.h>
  94 #include <linux/net.h>
  95 #include <linux/in.h>
  96 #include <linux/fs.h>
  97 #include <linux/slab.h>
  98 #include <linux/uaccess.h>
  99 #include <linux/skbuff.h>
 100 #include <linux/netdevice.h>
 101 #include <net/net_namespace.h>
 102 #include <net/sock.h>
 103 #include <net/tcp_states.h>
 104 #include <net/af_unix.h>
 105 #include <linux/proc_fs.h>
 106 #include <linux/seq_file.h>
 107 #include <net/scm.h>
 108 #include <linux/init.h>
 109 #include <linux/poll.h>
 110 #include <linux/rtnetlink.h>
 111 #include <linux/mount.h>
 112 #include <net/checksum.h>
 113 #include <linux/security.h>
 114 #include <linux/freezer.h>
 115 #include <linux/file.h>
 116
 117 #include "scm.h"
 118
 119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 120 EXPORT_SYMBOL_GPL(unix_socket_table);
 121 DEFINE_SPINLOCK(unix_table_lock);
 122 EXPORT_SYMBOL_GPL(unix_table_lock);
 123 static atomic_long_t unix_nr_socks;
 124
 125
 126 static struct hlist_head *unix_sockets_unbound(void *addr)
 127 {
 128         unsigned long hash = (unsigned long)addr;
 129
 130         hash ^= hash >> 16;
 131         hash ^= hash >> 8;
 132         hash %= UNIX_HASH_SIZE;
 133         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 134 }
 135
 136 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 137
 138 #ifdef CONFIG_SECURITY_NETWORK
 139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 140 {
 141         UNIXCB(skb).secid = scm->secid;
 142 }
 143
 144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 145 {
 146         scm->secid = UNIXCB(skb).secid;
 147 }
 148
 149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 150 {
 151         return (scm->secid == UNIXCB(skb).secid);
 152 }
 153 #else
 154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 155 { }
 156
 157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158 { }
 159
 160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 161 {
 162         return true;
 163 }
 164 #endif /* CONFIG_SECURITY_NETWORK */
 165
 166 /*
 167  *  SMP locking strategy:
 168  *    hash table is protected with spinlock unix_table_lock
 169  *    each socket state is protected by separate spin lock.
 170  */
 171
 172 static inline unsigned int unix_hash_fold(__wsum n)
 173 {
 174         unsigned int hash = (__force unsigned int)csum_fold(n);
 175
 176         hash ^= hash>>8;
 177         return hash&(UNIX_HASH_SIZE-1);
 178 }
 179
 180 #define unix_peer(sk) (unix_sk(sk)->peer)
 181
 182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 183 {
 184         return unix_peer(osk) == sk;
 185 }
 186
 187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 188 {
 189         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 190 }
 191
 192 static inline int unix_recvq_full(struct sock const *sk)
 193 {
 194         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 195 }
 196
 197 struct sock *unix_peer_get(struct sock *s)
 198 {
 199         struct sock *peer;
 200
 201         unix_state_lock(s);
 202         peer = unix_peer(s);
 203         if (peer)
 204                 sock_hold(peer);
 205         unix_state_unlock(s);
 206         return peer;
 207 }
 208 EXPORT_SYMBOL_GPL(unix_peer_get);
 209
 210 static inline void unix_release_addr(struct unix_address *addr)
 211 {
 212         if (refcount_dec_and_test(&addr->refcnt))
 213                 kfree(addr);
 214 }
 215
 216 /*
 217  *      Check unix socket name:
 218  *              - should be not zero length.
 219  *              - if started by not zero, should be NULL terminated (FS object)
 220  *              - if started by zero, it is abstract name.
 221  */
 222
 223 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 224 {
 225         *hashp = 0;
 226
 227         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 228                 return -EINVAL;
 229         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 230                 return -EINVAL;
 231         if (sunaddr->sun_path[0]) {
 232                 /*
 233                  * This may look like an off by one error but it is a bit more
 234                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 235                  * sun_path[108] doesn't as such exist.  However in kernel space
 236                  * we are guaranteed that it is a valid memory location in our
 237                  * kernel address buffer.
 238                  */
 239                 ((char *)sunaddr)[len] = 0;
 240                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 241                 return len;
 242         }
 243
 244         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 245         return len;
 246 }
 247
 248 static void __unix_remove_socket(struct sock *sk)
 249 {
 250         sk_del_node_init(sk);
 251 }
 252
 253 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 254 {
 255         WARN_ON(!sk_unhashed(sk));
 256         sk_add_node(sk, list);
 257 }
 258
 259 static inline void unix_remove_socket(struct sock *sk)
 260 {
 261         spin_lock(&unix_table_lock);
 262         __unix_remove_socket(sk);
 263         spin_unlock(&unix_table_lock);
 264 }
 265
 266 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 267 {
 268         spin_lock(&unix_table_lock);
 269         __unix_insert_socket(list, sk);
 270         spin_unlock(&unix_table_lock);
 271 }
 272
 273 static struct sock *__unix_find_socket_byname(struct net *net,
 274                                               struct sockaddr_un *sunname,
 275                                               int len, int type, unsigned int hash)
 276 {
 277         struct sock *s;
 278
 279         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 280                 struct unix_sock *u = unix_sk(s);
 281
 282                 if (!net_eq(sock_net(s), net))
 283                         continue;
 284
 285                 if (u->addr->len == len &&
 286                     !memcmp(u->addr->name, sunname, len))
 287                         goto found;
 288         }
 289         s = NULL;
 290 found:
 291         return s;
 292 }
 293
 294 static inline struct sock *unix_find_socket_byname(struct net *net,
 295                                                    struct sockaddr_un *sunname,
 296                                                    int len, int type,
 297                                                    unsigned int hash)
 298 {
 299         struct sock *s;
 300
 301         spin_lock(&unix_table_lock);
 302         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 303         if (s)
 304                 sock_hold(s);
 305         spin_unlock(&unix_table_lock);
 306         return s;
 307 }
 308
 309 static struct sock *unix_find_socket_byinode(struct inode *i)
 310 {
 311         struct sock *s;
 312
 313         spin_lock(&unix_table_lock);
 314         sk_for_each(s,
 315                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 316                 struct dentry *dentry = unix_sk(s)->path.dentry;
 317
 318                 if (dentry && d_backing_inode(dentry) == i) {
 319                         sock_hold(s);
 320                         goto found;
 321                 }
 322         }
 323         s = NULL;
 324 found:
 325         spin_unlock(&unix_table_lock);
 326         return s;
 327 }
 328
 329 /* Support code for asymmetrically connected dgram sockets
 330  *
 331  * If a datagram socket is connected to a socket not itself connected
 332  * to the first socket (eg, /dev/log), clients may only enqueue more
 333  * messages if the present receive queue of the server socket is not
 334  * "too large". This means there's a second writeability condition
 335  * poll and sendmsg need to test. The dgram recv code will do a wake
 336  * up on the peer_wait wait queue of a socket upon reception of a
 337  * datagram which needs to be propagated to sleeping would-be writers
 338  * since these might not have sent anything so far. This can't be
 339  * accomplished via poll_wait because the lifetime of the server
 340  * socket might be less than that of its clients if these break their
 341  * association with it or if the server socket is closed while clients
 342  * are still connected to it and there's no way to inform "a polling
 343  * implementation" that it should let go of a certain wait queue
 344  *
 345  * In order to propagate a wake up, a wait_queue_entry_t of the client
 346  * socket is enqueued on the peer_wait queue of the server socket
 347  * whose wake function does a wake_up on the ordinary client socket
 348  * wait queue. This connection is established whenever a write (or
 349  * poll for write) hit the flow control condition and broken when the
 350  * association to the server socket is dissolved or after a wake up
 351  * was relayed.
 352  */
 353
 354 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 355                                       void *key)
 356 {
 357         struct unix_sock *u;
 358         wait_queue_head_t *u_sleep;
 359
 360         u = container_of(q, struct unix_sock, peer_wake);
 361
 362         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 363                             q);
 364         u->peer_wake.private = NULL;
 365
 366         /* relaying can only happen while the wq still exists */
 367         u_sleep = sk_sleep(&u->sk);
 368         if (u_sleep)
 369                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 370
 371         return 0;
 372 }
 373
 374 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 375 {
 376         struct unix_sock *u, *u_other;
 377         int rc;
 378
 379         u = unix_sk(sk);
 380         u_other = unix_sk(other);
 381         rc = 0;
 382         spin_lock(&u_other->peer_wait.lock);
 383
 384         if (!u->peer_wake.private) {
 385                 u->peer_wake.private = other;
 386                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 387
 388                 rc = 1;
 389         }
 390
 391         spin_unlock(&u_other->peer_wait.lock);
 392         return rc;
 393 }
 394
 395 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 396                                             struct sock *other)
 397 {
 398         struct unix_sock *u, *u_other;
 399
 400         u = unix_sk(sk);
 401         u_other = unix_sk(other);
 402         spin_lock(&u_other->peer_wait.lock);
 403
 404         if (u->peer_wake.private == other) {
 405                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 406                 u->peer_wake.private = NULL;
 407         }
 408
 409         spin_unlock(&u_other->peer_wait.lock);
 410 }
 411
 412 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 413                                                    struct sock *other)
 414 {
 415         unix_dgram_peer_wake_disconnect(sk, other);
 416         wake_up_interruptible_poll(sk_sleep(sk),
 417                                    EPOLLOUT |
 418                                    EPOLLWRNORM |
 419                                    EPOLLWRBAND);
 420 }
 421
 422 /* preconditions:
 423  *      - unix_peer(sk) == other
 424  *      - association is stable
 425  */
 426 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 427 {
 428         int connected;
 429
 430         connected = unix_dgram_peer_wake_connect(sk, other);
 431
 432         /* If other is SOCK_DEAD, we want to make sure we signal
 433          * POLLOUT, such that a subsequent write() can get a
 434          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
 435          * to other and its full, we will hang waiting for POLLOUT.
 436          */
 437         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
 438                 return 1;
 439
 440         if (connected)
 441                 unix_dgram_peer_wake_disconnect(sk, other);
 442
 443         return 0;
 444 }
 445
 446 static int unix_writable(const struct sock *sk)
 447 {
 448         return sk->sk_state != TCP_LISTEN &&
 449                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 450 }
 451
 452 static void unix_write_space(struct sock *sk)
 453 {
 454         struct socket_wq *wq;
 455
 456         rcu_read_lock();
 457         if (unix_writable(sk)) {
 458                 wq = rcu_dereference(sk->sk_wq);
 459                 if (skwq_has_sleeper(wq))
 460                         wake_up_interruptible_sync_poll(&wq->wait,
 461                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 462                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 463         }
 464         rcu_read_unlock();
 465 }
 466
 467 /* When dgram socket disconnects (or changes its peer), we clear its receive
 468  * queue of packets arrived from previous peer. First, it allows to do
 469  * flow control based only on wmem_alloc; second, sk connected to peer
 470  * may receive messages only from that peer. */
 471 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 472 {
 473         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 474                 skb_queue_purge(&sk->sk_receive_queue);
 475                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 476
 477                 /* If one link of bidirectional dgram pipe is disconnected,
 478                  * we signal error. Messages are lost. Do not make this,
 479                  * when peer was not connected to us.
 480                  */
 481                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 482                         other->sk_err = ECONNRESET;
 483                         other->sk_error_report(other);
 484                 }
 485         }
 486 }
 487
 488 static void unix_sock_destructor(struct sock *sk)
 489 {
 490         struct unix_sock *u = unix_sk(sk);
 491
 492         skb_queue_purge(&sk->sk_receive_queue);
 493
 494         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 495         WARN_ON(!sk_unhashed(sk));
 496         WARN_ON(sk->sk_socket);
 497         if (!sock_flag(sk, SOCK_DEAD)) {
 498                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 499                 return;
 500         }
 501
 502         if (u->addr)
 503                 unix_release_addr(u->addr);
 504
 505         atomic_long_dec(&unix_nr_socks);
 506         local_bh_disable();
 507         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 508         local_bh_enable();
 509 #ifdef UNIX_REFCNT_DEBUG
 510         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 511                 atomic_long_read(&unix_nr_socks));
 512 #endif
 513 }
 514
 515 static void unix_release_sock(struct sock *sk, int embrion)
 516 {
 517         struct unix_sock *u = unix_sk(sk);
 518         struct path path;
 519         struct sock *skpair;
 520         struct sk_buff *skb;
 521         int state;
 522
 523         unix_remove_socket(sk);
 524
 525         /* Clear state */
 526         unix_state_lock(sk);
 527         sock_orphan(sk);
 528         sk->sk_shutdown = SHUTDOWN_MASK;
 529         path         = u->path;
 530         u->path.dentry = NULL;
 531         u->path.mnt = NULL;
 532         state = sk->sk_state;
 533         sk->sk_state = TCP_CLOSE;
 534         unix_state_unlock(sk);
 535
 536         wake_up_interruptible_all(&u->peer_wait);
 537
 538         skpair = unix_peer(sk);
 539
 540         if (skpair != NULL) {
 541                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 542                         unix_state_lock(skpair);
 543                         /* No more writes */
 544                         skpair->sk_shutdown = SHUTDOWN_MASK;
 545                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 546                                 skpair->sk_err = ECONNRESET;
 547                         unix_state_unlock(skpair);
 548                         skpair->sk_state_change(skpair);
 549                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 550                 }
 551
 552                 unix_dgram_peer_wake_disconnect(sk, skpair);
 553                 sock_put(skpair); /* It may now die */
 554                 unix_peer(sk) = NULL;
 555         }
 556
 557         /* Try to flush out this socket. Throw out buffers at least */
 558
 559         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 560                 if (state == TCP_LISTEN)
 561                         unix_release_sock(skb->sk, 1);
 562                 /* passed fds are erased in the kfree_skb hook        */
 563                 UNIXCB(skb).consumed = skb->len;
 564                 kfree_skb(skb);
 565         }
 566
 567         if (path.dentry)
 568                 path_put(&path);
 569
 570         sock_put(sk);
 571
 572         /* ---- Socket is dead now and most probably destroyed ---- */
 573
 574         /*
 575          * Fixme: BSD difference: In BSD all sockets connected to us get
 576          *        ECONNRESET and we die on the spot. In Linux we behave
 577          *        like files and pipes do and wait for the last
 578          *        dereference.
 579          *
 580          * Can't we simply set sock->err?
 581          *
 582          *        What the above comment does talk about? --ANK(980817)
 583          */
 584
 585         if (unix_tot_inflight)
 586                 unix_gc();              /* Garbage collect fds */
 587 }
 588
 589 static void init_peercred(struct sock *sk)
 590 {
 591         put_pid(sk->sk_peer_pid);
 592         if (sk->sk_peer_cred)
 593                 put_cred(sk->sk_peer_cred);
 594         sk->sk_peer_pid  = get_pid(task_tgid(current));
 595         sk->sk_peer_cred = get_current_cred();
 596 }
 597
 598 static void copy_peercred(struct sock *sk, struct sock *peersk)
 599 {
 600         put_pid(sk->sk_peer_pid);
 601         if (sk->sk_peer_cred)
 602                 put_cred(sk->sk_peer_cred);
 603         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 604         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 605 }
 606
 607 static int unix_listen(struct socket *sock, int backlog)
 608 {
 609         int err;
 610         struct sock *sk = sock->sk;
 611         struct unix_sock *u = unix_sk(sk);
 612         struct pid *old_pid = NULL;
 613
 614         err = -EOPNOTSUPP;
 615         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 616                 goto out;       /* Only stream/seqpacket sockets accept */
 617         err = -EINVAL;
 618         if (!u->addr)
 619                 goto out;       /* No listens on an unbound socket */
 620         unix_state_lock(sk);
 621         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 622                 goto out_unlock;
 623         if (backlog > sk->sk_max_ack_backlog)
 624                 wake_up_interruptible_all(&u->peer_wait);
 625         sk->sk_max_ack_backlog  = backlog;
 626         sk->sk_state            = TCP_LISTEN;
 627         /* set credentials so connect can copy them */
 628         init_peercred(sk);
 629         err = 0;
 630
 631 out_unlock:
 632         unix_state_unlock(sk);
 633         put_pid(old_pid);
 634 out:
 635         return err;
 636 }
 637
 638 static int unix_release(struct socket *);
 639 static int unix_bind(struct socket *, struct sockaddr *, int);
 640 static int unix_stream_connect(struct socket *, struct sockaddr *,
 641                                int addr_len, int flags);
 642 static int unix_socketpair(struct socket *, struct socket *);
 643 static int unix_accept(struct socket *, struct socket *, int, bool);
 644 static int unix_getname(struct socket *, struct sockaddr *, int);
 645 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 646 static __poll_t unix_dgram_poll(struct file *, struct socket *,
 647                                     poll_table *);
 648 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 649 static int unix_shutdown(struct socket *, int);
 650 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 651 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 652 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 653                                     size_t size, int flags);
 654 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 655                                        struct pipe_inode_info *, size_t size,
 656                                        unsigned int flags);
 657 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 658 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 659 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 660                               int, int);
 661 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 662 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 663                                   int);
 664
 665 static int unix_set_peek_off(struct sock *sk, int val)
 666 {
 667         struct unix_sock *u = unix_sk(sk);
 668
 669         if (mutex_lock_interruptible(&u->iolock))
 670                 return -EINTR;
 671
 672         sk->sk_peek_off = val;
 673         mutex_unlock(&u->iolock);
 674
 675         return 0;
 676 }
 677
 678
 679 static const struct proto_ops unix_stream_ops = {
 680         .family =       PF_UNIX,
 681         .owner =        THIS_MODULE,
 682         .release =      unix_release,
 683         .bind =         unix_bind,
 684         .connect =      unix_stream_connect,
 685         .socketpair =   unix_socketpair,
 686         .accept =       unix_accept,
 687         .getname =      unix_getname,
 688         .poll =         unix_poll,
 689         .ioctl =        unix_ioctl,
 690         .listen =       unix_listen,
 691         .shutdown =     unix_shutdown,
 692         .setsockopt =   sock_no_setsockopt,
 693         .getsockopt =   sock_no_getsockopt,
 694         .sendmsg =      unix_stream_sendmsg,
 695         .recvmsg =      unix_stream_recvmsg,
 696         .mmap =         sock_no_mmap,
 697         .sendpage =     unix_stream_sendpage,
 698         .splice_read =  unix_stream_splice_read,
 699         .set_peek_off = unix_set_peek_off,
 700 };
 701
 702 static const struct proto_ops unix_dgram_ops = {
 703         .family =       PF_UNIX,
 704         .owner =        THIS_MODULE,
 705         .release =      unix_release,
 706         .bind =         unix_bind,
 707         .connect =      unix_dgram_connect,
 708         .socketpair =   unix_socketpair,
 709         .accept =       sock_no_accept,
 710         .getname =      unix_getname,
 711         .poll =         unix_dgram_poll,
 712         .ioctl =        unix_ioctl,
 713         .listen =       sock_no_listen,
 714         .shutdown =     unix_shutdown,
 715         .setsockopt =   sock_no_setsockopt,
 716         .getsockopt =   sock_no_getsockopt,
 717         .sendmsg =      unix_dgram_sendmsg,
 718         .recvmsg =      unix_dgram_recvmsg,
 719         .mmap =         sock_no_mmap,
 720         .sendpage =     sock_no_sendpage,
 721         .set_peek_off = unix_set_peek_off,
 722 };
 723
 724 static const struct proto_ops unix_seqpacket_ops = {
 725         .family =       PF_UNIX,
 726         .owner =        THIS_MODULE,
 727         .release =      unix_release,
 728         .bind =         unix_bind,
 729         .connect =      unix_stream_connect,
 730         .socketpair =   unix_socketpair,
 731         .accept =       unix_accept,
 732         .getname =      unix_getname,
 733         .poll =         unix_dgram_poll,
 734         .ioctl =        unix_ioctl,
 735         .listen =       unix_listen,
 736         .shutdown =     unix_shutdown,
 737         .setsockopt =   sock_no_setsockopt,
 738         .getsockopt =   sock_no_getsockopt,
 739         .sendmsg =      unix_seqpacket_sendmsg,
 740         .recvmsg =      unix_seqpacket_recvmsg,
 741         .mmap =         sock_no_mmap,
 742         .sendpage =     sock_no_sendpage,
 743         .set_peek_off = unix_set_peek_off,
 744 };
 745
 746 static struct proto unix_proto = {
 747         .name                   = "UNIX",
 748         .owner                  = THIS_MODULE,
 749         .obj_size               = sizeof(struct unix_sock),
 750 };
 751
 752 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 753 {
 754         struct sock *sk = NULL;
 755         struct unix_sock *u;
 756
 757         atomic_long_inc(&unix_nr_socks);
 758         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 759                 goto out;
 760
 761         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 762         if (!sk)
 763                 goto out;
 764
 765         sock_init_data(sock, sk);
 766
 767         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 768         sk->sk_write_space      = unix_write_space;
 769         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 770         sk->sk_destruct         = unix_sock_destructor;
 771         u         = unix_sk(sk);
 772         u->path.dentry = NULL;
 773         u->path.mnt = NULL;
 774         spin_lock_init(&u->lock);
 775         atomic_long_set(&u->inflight, 0);
 776         INIT_LIST_HEAD(&u->link);
 777         mutex_init(&u->iolock); /* single task reading lock */
 778         mutex_init(&u->bindlock); /* single task binding lock */
 779         init_waitqueue_head(&u->peer_wait);
 780         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 781         unix_insert_socket(unix_sockets_unbound(sk), sk);
 782 out:
 783         if (sk == NULL)
 784                 atomic_long_dec(&unix_nr_socks);
 785         else {
 786                 local_bh_disable();
 787                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 788                 local_bh_enable();
 789         }
 790         return sk;
 791 }
 792
 793 static int unix_create(struct net *net, struct socket *sock, int protocol,
 794                        int kern)
 795 {
 796         if (protocol && protocol != PF_UNIX)
 797                 return -EPROTONOSUPPORT;
 798
 799         sock->state = SS_UNCONNECTED;
 800
 801         switch (sock->type) {
 802         case SOCK_STREAM:
 803                 sock->ops = &unix_stream_ops;
 804                 break;
 805                 /*
 806                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 807                  *      nothing uses it.
 808                  */
 809         case SOCK_RAW:
 810                 sock->type = SOCK_DGRAM;
 811                 /* fall through */
 812         case SOCK_DGRAM:
 813                 sock->ops = &unix_dgram_ops;
 814                 break;
 815         case SOCK_SEQPACKET:
 816                 sock->ops = &unix_seqpacket_ops;
 817                 break;
 818         default:
 819                 return -ESOCKTNOSUPPORT;
 820         }
 821
 822         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 823 }
 824
 825 static int unix_release(struct socket *sock)
 826 {
 827         struct sock *sk = sock->sk;
 828
 829         if (!sk)
 830                 return 0;
 831
 832         unix_release_sock(sk, 0);
 833         sock->sk = NULL;
 834
 835         return 0;
 836 }
 837
 838 static int unix_autobind(struct socket *sock)
 839 {
 840         struct sock *sk = sock->sk;
 841         struct net *net = sock_net(sk);
 842         struct unix_sock *u = unix_sk(sk);
 843         static u32 ordernum = 1;
 844         struct unix_address *addr;
 845         int err;
 846         unsigned int retries = 0;
 847
 848         err = mutex_lock_interruptible(&u->bindlock);
 849         if (err)
 850                 return err;
 851
 852         err = 0;
 853         if (u->addr)
 854                 goto out;
 855
 856         err = -ENOMEM;
 857         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 858         if (!addr)
 859                 goto out;
 860
 861         addr->name->sun_family = AF_UNIX;
 862         refcount_set(&addr->refcnt, 1);
 863
 864 retry:
 865         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 866         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 867
 868         spin_lock(&unix_table_lock);
 869         ordernum = (ordernum+1)&0xFFFFF;
 870
 871         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 872                                       addr->hash)) {
 873                 spin_unlock(&unix_table_lock);
 874                 /*
 875                  * __unix_find_socket_byname() may take long time if many names
 876                  * are already in use.
 877                  */
 878                 cond_resched();
 879                 /* Give up if all names seems to be in use. */
 880                 if (retries++ == 0xFFFFF) {
 881                         err = -ENOSPC;
 882                         kfree(addr);
 883                         goto out;
 884                 }
 885                 goto retry;
 886         }
 887         addr->hash ^= sk->sk_type;
 888
 889         __unix_remove_socket(sk);
 890         smp_store_release(&u->addr, addr);
 891         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 892         spin_unlock(&unix_table_lock);
 893         err = 0;
 894
 895 out:    mutex_unlock(&u->bindlock);
 896         return err;
 897 }
 898
 899 static struct sock *unix_find_other(struct net *net,
 900                                     struct sockaddr_un *sunname, int len,
 901                                     int type, unsigned int hash, int *error)
 902 {
 903         struct sock *u;
 904         struct path path;
 905         int err = 0;
 906
 907         if (sunname->sun_path[0]) {
 908                 struct inode *inode;
 909                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 910                 if (err)
 911                         goto fail;
 912                 inode = d_backing_inode(path.dentry);
 913                 err = inode_permission(inode, MAY_WRITE);
 914                 if (err)
 915                         goto put_fail;
 916
 917                 err = -ECONNREFUSED;
 918                 if (!S_ISSOCK(inode->i_mode))
 919                         goto put_fail;
 920                 u = unix_find_socket_byinode(inode);
 921                 if (!u)
 922                         goto put_fail;
 923
 924                 if (u->sk_type == type)
 925                         touch_atime(&path);
 926
 927                 path_put(&path);
 928
 929                 err = -EPROTOTYPE;
 930                 if (u->sk_type != type) {
 931                         sock_put(u);
 932                         goto fail;
 933                 }
 934         } else {
 935                 err = -ECONNREFUSED;
 936                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 937                 if (u) {
 938                         struct dentry *dentry;
 939                         dentry = unix_sk(u)->path.dentry;
 940                         if (dentry)
 941                                 touch_atime(&unix_sk(u)->path);
 942                 } else
 943                         goto fail;
 944         }
 945         return u;
 946
 947 put_fail:
 948         path_put(&path);
 949 fail:
 950         *error = err;
 951         return NULL;
 952 }
 953
 954 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 955 {
 956         struct dentry *dentry;
 957         struct path path;
 958         int err = 0;
 959         /*
 960          * Get the parent directory, calculate the hash for last
 961          * component.
 962          */
 963         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 964         err = PTR_ERR(dentry);
 965         if (IS_ERR(dentry))
 966                 return err;
 967
 968         /*
 969          * All right, let's create it.
 970          */
 971         err = security_path_mknod(&path, dentry, mode, 0);
 972         if (!err) {
 973                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 974                 if (!err) {
 975                         res->mnt = mntget(path.mnt);
 976                         res->dentry = dget(dentry);
 977                 }
 978         }
 979         done_path_create(&path, dentry);
 980         return err;
 981 }
 982
 983 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 984 {
 985         struct sock *sk = sock->sk;
 986         struct net *net = sock_net(sk);
 987         struct unix_sock *u = unix_sk(sk);
 988         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 989         char *sun_path = sunaddr->sun_path;
 990         int err;
 991         unsigned int hash;
 992         struct unix_address *addr;
 993         struct hlist_head *list;
 994         struct path path = { };
 995
 996         err = -EINVAL;
 997         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
 998             sunaddr->sun_family != AF_UNIX)
 999                 goto out;
1000
1001         if (addr_len == sizeof(short)) {
1002                 err = unix_autobind(sock);
1003                 goto out;
1004         }
1005
1006         err = unix_mkname(sunaddr, addr_len, &hash);
1007         if (err < 0)
1008                 goto out;
1009         addr_len = err;
1010
1011         if (sun_path[0]) {
1012                 umode_t mode = S_IFSOCK |
1013                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1014                 err = unix_mknod(sun_path, mode, &path);
1015                 if (err) {
1016                         if (err == -EEXIST)
1017                                 err = -EADDRINUSE;
1018                         goto out;
1019                 }
1020         }
1021
1022         err = mutex_lock_interruptible(&u->bindlock);
1023         if (err)
1024                 goto out_put;
1025
1026         err = -EINVAL;
1027         if (u->addr)
1028                 goto out_up;
1029
1030         err = -ENOMEM;
1031         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1032         if (!addr)
1033                 goto out_up;
1034
1035         memcpy(addr->name, sunaddr, addr_len);
1036         addr->len = addr_len;
1037         addr->hash = hash ^ sk->sk_type;
1038         refcount_set(&addr->refcnt, 1);
1039
1040         if (sun_path[0]) {
1041                 addr->hash = UNIX_HASH_SIZE;
1042                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1043                 spin_lock(&unix_table_lock);
1044                 u->path = path;
1045                 list = &unix_socket_table[hash];
1046         } else {
1047                 spin_lock(&unix_table_lock);
1048                 err = -EADDRINUSE;
1049                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1050                                               sk->sk_type, hash)) {
1051                         unix_release_addr(addr);
1052                         goto out_unlock;
1053                 }
1054
1055                 list = &unix_socket_table[addr->hash];
1056         }
1057
1058         err = 0;
1059         __unix_remove_socket(sk);
1060         smp_store_release(&u->addr, addr);
1061         __unix_insert_socket(list, sk);
1062
1063 out_unlock:
1064         spin_unlock(&unix_table_lock);
1065 out_up:
1066         mutex_unlock(&u->bindlock);
1067 out_put:
1068         if (err)
1069                 path_put(&path);
1070 out:
1071         return err;
1072 }
1073
1074 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1075 {
1076         if (unlikely(sk1 == sk2) || !sk2) {
1077                 unix_state_lock(sk1);
1078                 return;
1079         }
1080         if (sk1 < sk2) {
1081                 unix_state_lock(sk1);
1082                 unix_state_lock_nested(sk2);
1083         } else {
1084                 unix_state_lock(sk2);
1085                 unix_state_lock_nested(sk1);
1086         }
1087 }
1088
1089 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1090 {
1091         if (unlikely(sk1 == sk2) || !sk2) {
1092                 unix_state_unlock(sk1);
1093                 return;
1094         }
1095         unix_state_unlock(sk1);
1096         unix_state_unlock(sk2);
1097 }
1098
1099 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1100                               int alen, int flags)
1101 {
1102         struct sock *sk = sock->sk;
1103         struct net *net = sock_net(sk);
1104         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1105         struct sock *other;
1106         unsigned int hash;
1107         int err;
1108
1109         err = -EINVAL;
1110         if (alen < offsetofend(struct sockaddr, sa_family))
1111                 goto out;
1112
1113         if (addr->sa_family != AF_UNSPEC) {
1114                 err = unix_mkname(sunaddr, alen, &hash);
1115                 if (err < 0)
1116                         goto out;
1117                 alen = err;
1118
1119                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1120                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1121                         goto out;
1122
1123 restart:
1124                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1125                 if (!other)
1126                         goto out;
1127
1128                 unix_state_double_lock(sk, other);
1129
1130                 /* Apparently VFS overslept socket death. Retry. */
1131                 if (sock_flag(other, SOCK_DEAD)) {
1132                         unix_state_double_unlock(sk, other);
1133                         sock_put(other);
1134                         goto restart;
1135                 }
1136
1137                 err = -EPERM;
1138                 if (!unix_may_send(sk, other))
1139                         goto out_unlock;
1140
1141                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1142                 if (err)
1143                         goto out_unlock;
1144
1145         } else {
1146                 /*
1147                  *      1003.1g breaking connected state with AF_UNSPEC
1148                  */
1149                 other = NULL;
1150                 unix_state_double_lock(sk, other);
1151         }
1152
1153         /*
1154          * If it was connected, reconnect.
1155          */
1156         if (unix_peer(sk)) {
1157                 struct sock *old_peer = unix_peer(sk);
1158                 unix_peer(sk) = other;
1159                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1160
1161                 unix_state_double_unlock(sk, other);
1162
1163                 if (other != old_peer)
1164                         unix_dgram_disconnected(sk, old_peer);
1165                 sock_put(old_peer);
1166         } else {
1167                 unix_peer(sk) = other;
1168                 unix_state_double_unlock(sk, other);
1169         }
1170         return 0;
1171
1172 out_unlock:
1173         unix_state_double_unlock(sk, other);
1174         sock_put(other);
1175 out:
1176         return err;
1177 }
1178
1179 static long unix_wait_for_peer(struct sock *other, long timeo)
1180 {
1181         struct unix_sock *u = unix_sk(other);
1182         int sched;
1183         DEFINE_WAIT(wait);
1184
1185         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1186
1187         sched = !sock_flag(other, SOCK_DEAD) &&
1188                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1189                 unix_recvq_full(other);
1190
1191         unix_state_unlock(other);
1192
1193         if (sched)
1194                 timeo = schedule_timeout(timeo);
1195
1196         finish_wait(&u->peer_wait, &wait);
1197         return timeo;
1198 }
1199
1200 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1201                                int addr_len, int flags)
1202 {
1203         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1204         struct sock *sk = sock->sk;
1205         struct net *net = sock_net(sk);
1206         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1207         struct sock *newsk = NULL;
1208         struct sock *other = NULL;
1209         struct sk_buff *skb = NULL;
1210         unsigned int hash;
1211         int st;
1212         int err;
1213         long timeo;
1214
1215         err = unix_mkname(sunaddr, addr_len, &hash);
1216         if (err < 0)
1217                 goto out;
1218         addr_len = err;
1219
1220         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1221             (err = unix_autobind(sock)) != 0)
1222                 goto out;
1223
1224         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1225
1226         /* First of all allocate resources.
1227            If we will make it after state is locked,
1228            we will have to recheck all again in any case.
1229          */
1230
1231         err = -ENOMEM;
1232
1233         /* create new sock for complete connection */
1234         newsk = unix_create1(sock_net(sk), NULL, 0);
1235         if (newsk == NULL)
1236                 goto out;
1237
1238         /* Allocate skb for sending to listening sock */
1239         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1240         if (skb == NULL)
1241                 goto out;
1242
1243 restart:
1244         /*  Find listening sock. */
1245         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1246         if (!other)
1247                 goto out;
1248
1249         /* Latch state of peer */
1250         unix_state_lock(other);
1251
1252         /* Apparently VFS overslept socket death. Retry. */
1253         if (sock_flag(other, SOCK_DEAD)) {
1254                 unix_state_unlock(other);
1255                 sock_put(other);
1256                 goto restart;
1257         }
1258
1259         err = -ECONNREFUSED;
1260         if (other->sk_state != TCP_LISTEN)
1261                 goto out_unlock;
1262         if (other->sk_shutdown & RCV_SHUTDOWN)
1263                 goto out_unlock;
1264
1265         if (unix_recvq_full(other)) {
1266                 err = -EAGAIN;
1267                 if (!timeo)
1268                         goto out_unlock;
1269
1270                 timeo = unix_wait_for_peer(other, timeo);
1271
1272                 err = sock_intr_errno(timeo);
1273                 if (signal_pending(current))
1274                         goto out;
1275                 sock_put(other);
1276                 goto restart;
1277         }
1278
1279         /* Latch our state.
1280
1281            It is tricky place. We need to grab our state lock and cannot
1282            drop lock on peer. It is dangerous because deadlock is
1283            possible. Connect to self case and simultaneous
1284            attempt to connect are eliminated by checking socket
1285            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1286            check this before attempt to grab lock.
1287
1288            Well, and we have to recheck the state after socket locked.
1289          */
1290         st = sk->sk_state;
1291
1292         switch (st) {
1293         case TCP_CLOSE:
1294                 /* This is ok... continue with connect */
1295                 break;
1296         case TCP_ESTABLISHED:
1297                 /* Socket is already connected */
1298                 err = -EISCONN;
1299                 goto out_unlock;
1300         default:
1301                 err = -EINVAL;
1302                 goto out_unlock;
1303         }
1304
1305         unix_state_lock_nested(sk);
1306
1307         if (sk->sk_state != st) {
1308                 unix_state_unlock(sk);
1309                 unix_state_unlock(other);
1310                 sock_put(other);
1311                 goto restart;
1312         }
1313
1314         err = security_unix_stream_connect(sk, other, newsk);
1315         if (err) {
1316                 unix_state_unlock(sk);
1317                 goto out_unlock;
1318         }
1319
1320         /* The way is open! Fastly set all the necessary fields... */
1321
1322         sock_hold(sk);
1323         unix_peer(newsk)        = sk;
1324         newsk->sk_state         = TCP_ESTABLISHED;
1325         newsk->sk_type          = sk->sk_type;
1326         init_peercred(newsk);
1327         newu = unix_sk(newsk);
1328         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1329         otheru = unix_sk(other);
1330
1331         /* copy address information from listening to new sock
1332          *
1333          * The contents of *(otheru->addr) and otheru->path
1334          * are seen fully set up here, since we have found
1335          * otheru in hash under unix_table_lock.  Insertion
1336          * into the hash chain we'd found it in had been done
1337          * in an earlier critical area protected by unix_table_lock,
1338          * the same one where we'd set *(otheru->addr) contents,
1339          * as well as otheru->path and otheru->addr itself.
1340          *
1341          * Using smp_store_release() here to set newu->addr
1342          * is enough to make those stores, as well as stores
1343          * to newu->path visible to anyone who gets newu->addr
1344          * by smp_load_acquire().  IOW, the same warranties
1345          * as for unix_sock instances bound in unix_bind() or
1346          * in unix_autobind().
1347          */
1348         if (otheru->path.dentry) {
1349                 path_get(&otheru->path);
1350                 newu->path = otheru->path;
1351         }
1352         refcount_inc(&otheru->addr->refcnt);
1353         smp_store_release(&newu->addr, otheru->addr);
1354
1355         /* Set credentials */
1356         copy_peercred(sk, other);
1357
1358         sock->state     = SS_CONNECTED;
1359         sk->sk_state    = TCP_ESTABLISHED;
1360         sock_hold(newsk);
1361
1362         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1363         unix_peer(sk)   = newsk;
1364
1365         unix_state_unlock(sk);
1366
1367         /* take ten and and send info to listening sock */
1368         spin_lock(&other->sk_receive_queue.lock);
1369         __skb_queue_tail(&other->sk_receive_queue, skb);
1370         spin_unlock(&other->sk_receive_queue.lock);
1371         unix_state_unlock(other);
1372         other->sk_data_ready(other);
1373         sock_put(other);
1374         return 0;
1375
1376 out_unlock:
1377         if (other)
1378                 unix_state_unlock(other);
1379
1380 out:
1381         kfree_skb(skb);
1382         if (newsk)
1383                 unix_release_sock(newsk, 0);
1384         if (other)
1385                 sock_put(other);
1386         return err;
1387 }
1388
1389 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1390 {
1391         struct sock *ska = socka->sk, *skb = sockb->sk;
1392
1393         /* Join our sockets back to back */
1394         sock_hold(ska);
1395         sock_hold(skb);
1396         unix_peer(ska) = skb;
1397         unix_peer(skb) = ska;
1398         init_peercred(ska);
1399         init_peercred(skb);
1400
1401         if (ska->sk_type != SOCK_DGRAM) {
1402                 ska->sk_state = TCP_ESTABLISHED;
1403                 skb->sk_state = TCP_ESTABLISHED;
1404                 socka->state  = SS_CONNECTED;
1405                 sockb->state  = SS_CONNECTED;
1406         }
1407         return 0;
1408 }
1409
1410 static void unix_sock_inherit_flags(const struct socket *old,
1411                                     struct socket *new)
1412 {
1413         if (test_bit(SOCK_PASSCRED, &old->flags))
1414                 set_bit(SOCK_PASSCRED, &new->flags);
1415         if (test_bit(SOCK_PASSSEC, &old->flags))
1416                 set_bit(SOCK_PASSSEC, &new->flags);
1417 }
1418
1419 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1420                        bool kern)
1421 {
1422         struct sock *sk = sock->sk;
1423         struct sock *tsk;
1424         struct sk_buff *skb;
1425         int err;
1426
1427         err = -EOPNOTSUPP;
1428         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1429                 goto out;
1430
1431         err = -EINVAL;
1432         if (sk->sk_state != TCP_LISTEN)
1433                 goto out;
1434
1435         /* If socket state is TCP_LISTEN it cannot change (for now...),
1436          * so that no locks are necessary.
1437          */
1438
1439         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1440         if (!skb) {
1441                 /* This means receive shutdown. */
1442                 if (err == 0)
1443                         err = -EINVAL;
1444                 goto out;
1445         }
1446
1447         tsk = skb->sk;
1448         skb_free_datagram(sk, skb);
1449         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1450
1451         /* attach accepted sock to socket */
1452         unix_state_lock(tsk);
1453         newsock->state = SS_CONNECTED;
1454         unix_sock_inherit_flags(sock, newsock);
1455         sock_graft(tsk, newsock);
1456         unix_state_unlock(tsk);
1457         return 0;
1458
1459 out:
1460         return err;
1461 }
1462
1463
1464 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1465 {
1466         struct sock *sk = sock->sk;
1467         struct unix_address *addr;
1468         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1469         int err = 0;
1470
1471         if (peer) {
1472                 sk = unix_peer_get(sk);
1473
1474                 err = -ENOTCONN;
1475                 if (!sk)
1476                         goto out;
1477                 err = 0;
1478         } else {
1479                 sock_hold(sk);
1480         }
1481
1482         addr = smp_load_acquire(&unix_sk(sk)->addr);
1483         if (!addr) {
1484                 sunaddr->sun_family = AF_UNIX;
1485                 sunaddr->sun_path[0] = 0;
1486                 err = sizeof(short);
1487         } else {
1488                 err = addr->len;
1489                 memcpy(sunaddr, addr->name, addr->len);
1490         }
1491         sock_put(sk);
1492 out:
1493         return err;
1494 }
1495
1496 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1497 {
1498         int err = 0;
1499
1500         UNIXCB(skb).pid  = get_pid(scm->pid);
1501         UNIXCB(skb).uid = scm->creds.uid;
1502         UNIXCB(skb).gid = scm->creds.gid;
1503         UNIXCB(skb).fp = NULL;
1504         unix_get_secdata(scm, skb);
1505         if (scm->fp && send_fds)
1506                 err = unix_attach_fds(scm, skb);
1507
1508         skb->destructor = unix_destruct_scm;
1509         return err;
1510 }
1511
1512 static bool unix_passcred_enabled(const struct socket *sock,
1513                                   const struct sock *other)
1514 {
1515         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1516                !other->sk_socket ||
1517                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1518 }
1519
1520 /*
1521  * Some apps rely on write() giving SCM_CREDENTIALS
1522  * We include credentials if source or destination socket
1523  * asserted SOCK_PASSCRED.
1524  */
1525 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1526                             const struct sock *other)
1527 {
1528         if (UNIXCB(skb).pid)
1529                 return;
1530         if (unix_passcred_enabled(sock, other)) {
1531                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1532                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1533         }
1534 }
1535
1536 static int maybe_init_creds(struct scm_cookie *scm,
1537                             struct socket *socket,
1538                             const struct sock *other)
1539 {
1540         int err;
1541         struct msghdr msg = { .msg_controllen = 0 };
1542
1543         err = scm_send(socket, &msg, scm, false);
1544         if (err)
1545                 return err;
1546
1547         if (unix_passcred_enabled(socket, other)) {
1548                 scm->pid = get_pid(task_tgid(current));
1549                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1550         }
1551         return err;
1552 }
1553
1554 static bool unix_skb_scm_eq(struct sk_buff *skb,
1555                             struct scm_cookie *scm)
1556 {
1557         const struct unix_skb_parms *u = &UNIXCB(skb);
1558
1559         return u->pid == scm->pid &&
1560                uid_eq(u->uid, scm->creds.uid) &&
1561                gid_eq(u->gid, scm->creds.gid) &&
1562                unix_secdata_eq(scm, skb);
1563 }
1564
1565 /*
1566  *      Send AF_UNIX data.
1567  */
1568
1569 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1570                               size_t len)
1571 {
1572         struct sock *sk = sock->sk;
1573         struct net *net = sock_net(sk);
1574         struct unix_sock *u = unix_sk(sk);
1575         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1576         struct sock *other = NULL;
1577         int namelen = 0; /* fake GCC */
1578         int err;
1579         unsigned int hash;
1580         struct sk_buff *skb;
1581         long timeo;
1582         struct scm_cookie scm;
1583         int data_len = 0;
1584         int sk_locked;
1585
1586         wait_for_unix_gc();
1587         err = scm_send(sock, msg, &scm, false);
1588         if (err < 0)
1589                 return err;
1590
1591         err = -EOPNOTSUPP;
1592         if (msg->msg_flags&MSG_OOB)
1593                 goto out;
1594
1595         if (msg->msg_namelen) {
1596                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1597                 if (err < 0)
1598                         goto out;
1599                 namelen = err;
1600         } else {
1601                 sunaddr = NULL;
1602                 err = -ENOTCONN;
1603                 other = unix_peer_get(sk);
1604                 if (!other)
1605                         goto out;
1606         }
1607
1608         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1609             && (err = unix_autobind(sock)) != 0)
1610                 goto out;
1611
1612         err = -EMSGSIZE;
1613         if (len > sk->sk_sndbuf - 32)
1614                 goto out;
1615
1616         if (len > SKB_MAX_ALLOC) {
1617                 data_len = min_t(size_t,
1618                                  len - SKB_MAX_ALLOC,
1619                                  MAX_SKB_FRAGS * PAGE_SIZE);
1620                 data_len = PAGE_ALIGN(data_len);
1621
1622                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1623         }
1624
1625         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1626                                    msg->msg_flags & MSG_DONTWAIT, &err,
1627                                    PAGE_ALLOC_COSTLY_ORDER);
1628         if (skb == NULL)
1629                 goto out;
1630
1631         err = unix_scm_to_skb(&scm, skb, true);
1632         if (err < 0)
1633                 goto out_free;
1634
1635         skb_put(skb, len - data_len);
1636         skb->data_len = data_len;
1637         skb->len = len;
1638         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1639         if (err)
1640                 goto out_free;
1641
1642         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1643
1644 restart:
1645         if (!other) {
1646                 err = -ECONNRESET;
1647                 if (sunaddr == NULL)
1648                         goto out_free;
1649
1650                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1651                                         hash, &err);
1652                 if (other == NULL)
1653                         goto out_free;
1654         }
1655
1656         if (sk_filter(other, skb) < 0) {
1657                 /* Toss the packet but do not return any error to the sender */
1658                 err = len;
1659                 goto out_free;
1660         }
1661
1662         sk_locked = 0;
1663         unix_state_lock(other);
1664 restart_locked:
1665         err = -EPERM;
1666         if (!unix_may_send(sk, other))
1667                 goto out_unlock;
1668
1669         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1670                 /*
1671                  *      Check with 1003.1g - what should
1672                  *      datagram error
1673                  */
1674                 unix_state_unlock(other);
1675                 sock_put(other);
1676
1677                 if (!sk_locked)
1678                         unix_state_lock(sk);
1679
1680                 err = 0;
1681                 if (unix_peer(sk) == other) {
1682                         unix_peer(sk) = NULL;
1683                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1684
1685                         unix_state_unlock(sk);
1686
1687                         unix_dgram_disconnected(sk, other);
1688                         sock_put(other);
1689                         err = -ECONNREFUSED;
1690                 } else {
1691                         unix_state_unlock(sk);
1692                 }
1693
1694                 other = NULL;
1695                 if (err)
1696                         goto out_free;
1697                 goto restart;
1698         }
1699
1700         err = -EPIPE;
1701         if (other->sk_shutdown & RCV_SHUTDOWN)
1702                 goto out_unlock;
1703
1704         if (sk->sk_type != SOCK_SEQPACKET) {
1705                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1706                 if (err)
1707                         goto out_unlock;
1708         }
1709
1710         /* other == sk && unix_peer(other) != sk if
1711          * - unix_peer(sk) == NULL, destination address bound to sk
1712          * - unix_peer(sk) == sk by time of get but disconnected before lock
1713          */
1714         if (other != sk &&
1715             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1716                 if (timeo) {
1717                         timeo = unix_wait_for_peer(other, timeo);
1718
1719                         err = sock_intr_errno(timeo);
1720                         if (signal_pending(current))
1721                                 goto out_free;
1722
1723                         goto restart;
1724                 }
1725
1726                 if (!sk_locked) {
1727                         unix_state_unlock(other);
1728                         unix_state_double_lock(sk, other);
1729                 }
1730
1731                 if (unix_peer(sk) != other ||
1732                     unix_dgram_peer_wake_me(sk, other)) {
1733                         err = -EAGAIN;
1734                         sk_locked = 1;
1735                         goto out_unlock;
1736                 }
1737
1738                 if (!sk_locked) {
1739                         sk_locked = 1;
1740                         goto restart_locked;
1741                 }
1742         }
1743
1744         if (unlikely(sk_locked))
1745                 unix_state_unlock(sk);
1746
1747         if (sock_flag(other, SOCK_RCVTSTAMP))
1748                 __net_timestamp(skb);
1749         maybe_add_creds(skb, sock, other);
1750         skb_queue_tail(&other->sk_receive_queue, skb);
1751         unix_state_unlock(other);
1752         other->sk_data_ready(other);
1753         sock_put(other);
1754         scm_destroy(&scm);
1755         return len;
1756
1757 out_unlock:
1758         if (sk_locked)
1759                 unix_state_unlock(sk);
1760         unix_state_unlock(other);
1761 out_free:
1762         kfree_skb(skb);
1763 out:
1764         if (other)
1765                 sock_put(other);
1766         scm_destroy(&scm);
1767         return err;
1768 }
1769
1770 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1771  * bytes, and a minimum of a full page.
1772  */
1773 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1774
1775 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1776                                size_t len)
1777 {
1778         struct sock *sk = sock->sk;
1779         struct sock *other = NULL;
1780         int err, size;
1781         struct sk_buff *skb;
1782         int sent = 0;
1783         struct scm_cookie scm;
1784         bool fds_sent = false;
1785         int data_len;
1786
1787         wait_for_unix_gc();
1788         err = scm_send(sock, msg, &scm, false);
1789         if (err < 0)
1790                 return err;
1791
1792         err = -EOPNOTSUPP;
1793         if (msg->msg_flags&MSG_OOB)
1794                 goto out_err;
1795
1796         if (msg->msg_namelen) {
1797                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1798                 goto out_err;
1799         } else {
1800                 err = -ENOTCONN;
1801                 other = unix_peer(sk);
1802                 if (!other)
1803                         goto out_err;
1804         }
1805
1806         if (sk->sk_shutdown & SEND_SHUTDOWN)
1807                 goto pipe_err;
1808
1809         while (sent < len) {
1810                 size = len - sent;
1811
1812                 /* Keep two messages in the pipe so it schedules better */
1813                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1814
1815                 /* allow fallback to order-0 allocations */
1816                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1817
1818                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1819
1820                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1821
1822                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1823                                            msg->msg_flags & MSG_DONTWAIT, &err,
1824                                            get_order(UNIX_SKB_FRAGS_SZ));
1825                 if (!skb)
1826                         goto out_err;
1827
1828                 /* Only send the fds in the first buffer */
1829                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1830                 if (err < 0) {
1831                         kfree_skb(skb);
1832                         goto out_err;
1833                 }
1834                 fds_sent = true;
1835
1836                 skb_put(skb, size - data_len);
1837                 skb->data_len = data_len;
1838                 skb->len = size;
1839                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1840                 if (err) {
1841                         kfree_skb(skb);
1842                         goto out_err;
1843                 }
1844
1845                 unix_state_lock(other);
1846
1847                 if (sock_flag(other, SOCK_DEAD) ||
1848                     (other->sk_shutdown & RCV_SHUTDOWN))
1849                         goto pipe_err_free;
1850
1851                 maybe_add_creds(skb, sock, other);
1852                 skb_queue_tail(&other->sk_receive_queue, skb);
1853                 unix_state_unlock(other);
1854                 other->sk_data_ready(other);
1855                 sent += size;
1856         }
1857
1858         scm_destroy(&scm);
1859
1860         return sent;
1861
1862 pipe_err_free:
1863         unix_state_unlock(other);
1864         kfree_skb(skb);
1865 pipe_err:
1866         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1867                 send_sig(SIGPIPE, current, 0);
1868         err = -EPIPE;
1869 out_err:
1870         scm_destroy(&scm);
1871         return sent ? : err;
1872 }
1873
1874 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1875                                     int offset, size_t size, int flags)
1876 {
1877         int err;
1878         bool send_sigpipe = false;
1879         bool init_scm = true;
1880         struct scm_cookie scm;
1881         struct sock *other, *sk = socket->sk;
1882         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1883
1884         if (flags & MSG_OOB)
1885                 return -EOPNOTSUPP;
1886
1887         other = unix_peer(sk);
1888         if (!other || sk->sk_state != TCP_ESTABLISHED)
1889                 return -ENOTCONN;
1890
1891         if (false) {
1892 alloc_skb:
1893                 unix_state_unlock(other);
1894                 mutex_unlock(&unix_sk(other)->iolock);
1895                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1896                                               &err, 0);
1897                 if (!newskb)
1898                         goto err;
1899         }
1900
1901         /* we must acquire iolock as we modify already present
1902          * skbs in the sk_receive_queue and mess with skb->len
1903          */
1904         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1905         if (err) {
1906                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1907                 goto err;
1908         }
1909
1910         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1911                 err = -EPIPE;
1912                 send_sigpipe = true;
1913                 goto err_unlock;
1914         }
1915
1916         unix_state_lock(other);
1917
1918         if (sock_flag(other, SOCK_DEAD) ||
1919             other->sk_shutdown & RCV_SHUTDOWN) {
1920                 err = -EPIPE;
1921                 send_sigpipe = true;
1922                 goto err_state_unlock;
1923         }
1924
1925         if (init_scm) {
1926                 err = maybe_init_creds(&scm, socket, other);
1927                 if (err)
1928                         goto err_state_unlock;
1929                 init_scm = false;
1930         }
1931
1932         skb = skb_peek_tail(&other->sk_receive_queue);
1933         if (tail && tail == skb) {
1934                 skb = newskb;
1935         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1936                 if (newskb) {
1937                         skb = newskb;
1938                 } else {
1939                         tail = skb;
1940                         goto alloc_skb;
1941                 }
1942         } else if (newskb) {
1943                 /* this is fast path, we don't necessarily need to
1944                  * call to kfree_skb even though with newskb == NULL
1945                  * this - does no harm
1946                  */
1947                 consume_skb(newskb);
1948                 newskb = NULL;
1949         }
1950
1951         if (skb_append_pagefrags(skb, page, offset, size)) {
1952                 tail = skb;
1953                 goto alloc_skb;
1954         }
1955
1956         skb->len += size;
1957         skb->data_len += size;
1958         skb->truesize += size;
1959         refcount_add(size, &sk->sk_wmem_alloc);
1960
1961         if (newskb) {
1962                 err = unix_scm_to_skb(&scm, skb, false);
1963                 if (err)
1964                         goto err_state_unlock;
1965                 spin_lock(&other->sk_receive_queue.lock);
1966                 __skb_queue_tail(&other->sk_receive_queue, newskb);
1967                 spin_unlock(&other->sk_receive_queue.lock);
1968         }
1969
1970         unix_state_unlock(other);
1971         mutex_unlock(&unix_sk(other)->iolock);
1972
1973         other->sk_data_ready(other);
1974         scm_destroy(&scm);
1975         return size;
1976
1977 err_state_unlock:
1978         unix_state_unlock(other);
1979 err_unlock:
1980         mutex_unlock(&unix_sk(other)->iolock);
1981 err:
1982         kfree_skb(newskb);
1983         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
1984                 send_sig(SIGPIPE, current, 0);
1985         if (!init_scm)
1986                 scm_destroy(&scm);
1987         return err;
1988 }
1989
1990 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
1991                                   size_t len)
1992 {
1993         int err;
1994         struct sock *sk = sock->sk;
1995
1996         err = sock_error(sk);
1997         if (err)
1998                 return err;
1999
2000         if (sk->sk_state != TCP_ESTABLISHED)
2001                 return -ENOTCONN;
2002
2003         if (msg->msg_namelen)
2004                 msg->msg_namelen = 0;
2005
2006         return unix_dgram_sendmsg(sock, msg, len);
2007 }
2008
2009 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2010                                   size_t size, int flags)
2011 {
2012         struct sock *sk = sock->sk;
2013
2014         if (sk->sk_state != TCP_ESTABLISHED)
2015                 return -ENOTCONN;
2016
2017         return unix_dgram_recvmsg(sock, msg, size, flags);
2018 }
2019
2020 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2021 {
2022         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2023
2024         if (addr) {
2025                 msg->msg_namelen = addr->len;
2026                 memcpy(msg->msg_name, addr->name, addr->len);
2027         }
2028 }
2029
2030 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2031                               size_t size, int flags)
2032 {
2033         struct scm_cookie scm;
2034         struct sock *sk = sock->sk;
2035         struct unix_sock *u = unix_sk(sk);
2036         struct sk_buff *skb, *last;
2037         long timeo;
2038         int skip;
2039         int err;
2040
2041         err = -EOPNOTSUPP;
2042         if (flags&MSG_OOB)
2043                 goto out;
2044
2045         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2046
2047         do {
2048                 mutex_lock(&u->iolock);
2049
2050                 skip = sk_peek_offset(sk, flags);
2051                 skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
2052                                               &last);
2053                 if (skb)
2054                         break;
2055
2056                 mutex_unlock(&u->iolock);
2057
2058                 if (err != -EAGAIN)
2059                         break;
2060         } while (timeo &&
2061                  !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2062
2063         if (!skb) { /* implies iolock unlocked */
2064                 unix_state_lock(sk);
2065                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2066                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2067                     (sk->sk_shutdown & RCV_SHUTDOWN))
2068                         err = 0;
2069                 unix_state_unlock(sk);
2070                 goto out;
2071         }
2072
2073         if (wq_has_sleeper(&u->peer_wait))
2074                 wake_up_interruptible_sync_poll(&u->peer_wait,
2075                                                 EPOLLOUT | EPOLLWRNORM |
2076                                                 EPOLLWRBAND);
2077
2078         if (msg->msg_name)
2079                 unix_copy_addr(msg, skb->sk);
2080
2081         if (size > skb->len - skip)
2082                 size = skb->len - skip;
2083         else if (size < skb->len - skip)
2084                 msg->msg_flags |= MSG_TRUNC;
2085
2086         err = skb_copy_datagram_msg(skb, skip, msg, size);
2087         if (err)
2088                 goto out_free;
2089
2090         if (sock_flag(sk, SOCK_RCVTSTAMP))
2091                 __sock_recv_timestamp(msg, sk, skb);
2092
2093         memset(&scm, 0, sizeof(scm));
2094
2095         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2096         unix_set_secdata(&scm, skb);
2097
2098         if (!(flags & MSG_PEEK)) {
2099                 if (UNIXCB(skb).fp)
2100                         unix_detach_fds(&scm, skb);
2101
2102                 sk_peek_offset_bwd(sk, skb->len);
2103         } else {
2104                 /* It is questionable: on PEEK we could:
2105                    - do not return fds - good, but too simple 8)
2106                    - return fds, and do not return them on read (old strategy,
2107                      apparently wrong)
2108                    - clone fds (I chose it for now, it is the most universal
2109                      solution)
2110
2111                    POSIX 1003.1g does not actually define this clearly
2112                    at all. POSIX 1003.1g doesn't define a lot of things
2113                    clearly however!
2114
2115                 */
2116
2117                 sk_peek_offset_fwd(sk, size);
2118
2119                 if (UNIXCB(skb).fp)
2120                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2121         }
2122         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2123
2124         scm_recv(sock, msg, &scm, flags);
2125
2126 out_free:
2127         skb_free_datagram(sk, skb);
2128         mutex_unlock(&u->iolock);
2129 out:
2130         return err;
2131 }
2132
2133 /*
2134  *      Sleep until more data has arrived. But check for races..
2135  */
2136 static long unix_stream_data_wait(struct sock *sk, long timeo,
2137                                   struct sk_buff *last, unsigned int last_len,
2138                                   bool freezable)
2139 {
2140         struct sk_buff *tail;
2141         DEFINE_WAIT(wait);
2142
2143         unix_state_lock(sk);
2144
2145         for (;;) {
2146                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2147
2148                 tail = skb_peek_tail(&sk->sk_receive_queue);
2149                 if (tail != last ||
2150                     (tail && tail->len != last_len) ||
2151                     sk->sk_err ||
2152                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2153                     signal_pending(current) ||
2154                     !timeo)
2155                         break;
2156
2157                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2158                 unix_state_unlock(sk);
2159                 if (freezable)
2160                         timeo = freezable_schedule_timeout(timeo);
2161                 else
2162                         timeo = schedule_timeout(timeo);
2163                 unix_state_lock(sk);
2164
2165                 if (sock_flag(sk, SOCK_DEAD))
2166                         break;
2167
2168                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2169         }
2170
2171         finish_wait(sk_sleep(sk), &wait);
2172         unix_state_unlock(sk);
2173         return timeo;
2174 }
2175
2176 static unsigned int unix_skb_len(const struct sk_buff *skb)
2177 {
2178         return skb->len - UNIXCB(skb).consumed;
2179 }
2180
2181 struct unix_stream_read_state {
2182         int (*recv_actor)(struct sk_buff *, int, int,
2183                           struct unix_stream_read_state *);
2184         struct socket *socket;
2185         struct msghdr *msg;
2186         struct pipe_inode_info *pipe;
2187         size_t size;
2188         int flags;
2189         unsigned int splice_flags;
2190 };
2191
2192 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2193                                     bool freezable)
2194 {
2195         struct scm_cookie scm;
2196         struct socket *sock = state->socket;
2197         struct sock *sk = sock->sk;
2198         struct unix_sock *u = unix_sk(sk);
2199         int copied = 0;
2200         int flags = state->flags;
2201         int noblock = flags & MSG_DONTWAIT;
2202         bool check_creds = false;
2203         int target;
2204         int err = 0;
2205         long timeo;
2206         int skip;
2207         size_t size = state->size;
2208         unsigned int last_len;
2209
2210         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2211                 err = -EINVAL;
2212                 goto out;
2213         }
2214
2215         if (unlikely(flags & MSG_OOB)) {
2216                 err = -EOPNOTSUPP;
2217                 goto out;
2218         }
2219
2220         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2221         timeo = sock_rcvtimeo(sk, noblock);
2222
2223         memset(&scm, 0, sizeof(scm));
2224
2225         /* Lock the socket to prevent queue disordering
2226          * while sleeps in memcpy_tomsg
2227          */
2228         mutex_lock(&u->iolock);
2229
2230         skip = max(sk_peek_offset(sk, flags), 0);
2231
2232         do {
2233                 int chunk;
2234                 bool drop_skb;
2235                 struct sk_buff *skb, *last;
2236
2237 redo:
2238                 unix_state_lock(sk);
2239                 if (sock_flag(sk, SOCK_DEAD)) {
2240                         err = -ECONNRESET;
2241                         goto unlock;
2242                 }
2243                 last = skb = skb_peek(&sk->sk_receive_queue);
2244                 last_len = last ? last->len : 0;
2245 again:
2246                 if (skb == NULL) {
2247                         if (copied >= target)
2248                                 goto unlock;
2249
2250                         /*
2251                          *      POSIX 1003.1g mandates this order.
2252                          */
2253
2254                         err = sock_error(sk);
2255                         if (err)
2256                                 goto unlock;
2257                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2258                                 goto unlock;
2259
2260                         unix_state_unlock(sk);
2261                         if (!timeo) {
2262                                 err = -EAGAIN;
2263                                 break;
2264                         }
2265
2266                         mutex_unlock(&u->iolock);
2267
2268                         timeo = unix_stream_data_wait(sk, timeo, last,
2269                                                       last_len, freezable);
2270
2271                         if (signal_pending(current)) {
2272                                 err = sock_intr_errno(timeo);
2273                                 scm_destroy(&scm);
2274                                 goto out;
2275                         }
2276
2277                         mutex_lock(&u->iolock);
2278                         goto redo;
2279 unlock:
2280                         unix_state_unlock(sk);
2281                         break;
2282                 }
2283
2284                 while (skip >= unix_skb_len(skb)) {
2285                         skip -= unix_skb_len(skb);
2286                         last = skb;
2287                         last_len = skb->len;
2288                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2289                         if (!skb)
2290                                 goto again;
2291                 }
2292
2293                 unix_state_unlock(sk);
2294
2295                 if (check_creds) {
2296                         /* Never glue messages from different writers */
2297                         if (!unix_skb_scm_eq(skb, &scm))
2298                                 break;
2299                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2300                         /* Copy credentials */
2301                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2302                         unix_set_secdata(&scm, skb);
2303                         check_creds = true;
2304                 }
2305
2306                 /* Copy address just once */
2307                 if (state->msg && state->msg->msg_name) {
2308                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2309                                          state->msg->msg_name);
2310                         unix_copy_addr(state->msg, skb->sk);
2311                         sunaddr = NULL;
2312                 }
2313
2314                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2315                 skb_get(skb);
2316                 chunk = state->recv_actor(skb, skip, chunk, state);
2317                 drop_skb = !unix_skb_len(skb);
2318                 /* skb is only safe to use if !drop_skb */
2319                 consume_skb(skb);
2320                 if (chunk < 0) {
2321                         if (copied == 0)
2322                                 copied = -EFAULT;
2323                         break;
2324                 }
2325                 copied += chunk;
2326                 size -= chunk;
2327
2328                 if (drop_skb) {
2329                         /* the skb was touched by a concurrent reader;
2330                          * we should not expect anything from this skb
2331                          * anymore and assume it invalid - we can be
2332                          * sure it was dropped from the socket queue
2333                          *
2334                          * let's report a short read
2335                          */
2336                         err = 0;
2337                         break;
2338                 }
2339
2340                 /* Mark read part of skb as used */
2341                 if (!(flags & MSG_PEEK)) {
2342                         UNIXCB(skb).consumed += chunk;
2343
2344                         sk_peek_offset_bwd(sk, chunk);
2345
2346                         if (UNIXCB(skb).fp)
2347                                 unix_detach_fds(&scm, skb);
2348
2349                         if (unix_skb_len(skb))
2350                                 break;
2351
2352                         skb_unlink(skb, &sk->sk_receive_queue);
2353                         consume_skb(skb);
2354
2355                         if (scm.fp)
2356                                 break;
2357                 } else {
2358                         /* It is questionable, see note in unix_dgram_recvmsg.
2359                          */
2360                         if (UNIXCB(skb).fp)
2361                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2362
2363                         sk_peek_offset_fwd(sk, chunk);
2364
2365                         if (UNIXCB(skb).fp)
2366                                 break;
2367
2368                         skip = 0;
2369                         last = skb;
2370                         last_len = skb->len;
2371                         unix_state_lock(sk);
2372                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2373                         if (skb)
2374                                 goto again;
2375                         unix_state_unlock(sk);
2376                         break;
2377                 }
2378         } while (size);
2379
2380         mutex_unlock(&u->iolock);
2381         if (state->msg)
2382                 scm_recv(sock, state->msg, &scm, flags);
2383         else
2384                 scm_destroy(&scm);
2385 out:
2386         return copied ? : err;
2387 }
2388
2389 static int unix_stream_read_actor(struct sk_buff *skb,
2390                                   int skip, int chunk,
2391                                   struct unix_stream_read_state *state)
2392 {
2393         int ret;
2394
2395         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2396                                     state->msg, chunk);
2397         return ret ?: chunk;
2398 }
2399
2400 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2401                                size_t size, int flags)
2402 {
2403         struct unix_stream_read_state state = {
2404                 .recv_actor = unix_stream_read_actor,
2405                 .socket = sock,
2406                 .msg = msg,
2407                 .size = size,
2408                 .flags = flags
2409         };
2410
2411         return unix_stream_read_generic(&state, true);
2412 }
2413
2414 static int unix_stream_splice_actor(struct sk_buff *skb,
2415                                     int skip, int chunk,
2416                                     struct unix_stream_read_state *state)
2417 {
2418         return skb_splice_bits(skb, state->socket->sk,
2419                                UNIXCB(skb).consumed + skip,
2420                                state->pipe, chunk, state->splice_flags);
2421 }
2422
2423 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2424                                        struct pipe_inode_info *pipe,
2425                                        size_t size, unsigned int flags)
2426 {
2427         struct unix_stream_read_state state = {
2428                 .recv_actor = unix_stream_splice_actor,
2429                 .socket = sock,
2430                 .pipe = pipe,
2431                 .size = size,
2432                 .splice_flags = flags,
2433         };
2434
2435         if (unlikely(*ppos))
2436                 return -ESPIPE;
2437
2438         if (sock->file->f_flags & O_NONBLOCK ||
2439             flags & SPLICE_F_NONBLOCK)
2440                 state.flags = MSG_DONTWAIT;
2441
2442         return unix_stream_read_generic(&state, false);
2443 }
2444
2445 static int unix_shutdown(struct socket *sock, int mode)
2446 {
2447         struct sock *sk = sock->sk;
2448         struct sock *other;
2449
2450         if (mode < SHUT_RD || mode > SHUT_RDWR)
2451                 return -EINVAL;
2452         /* This maps:
2453          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2454          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2455          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2456          */
2457         ++mode;
2458
2459         unix_state_lock(sk);
2460         sk->sk_shutdown |= mode;
2461         other = unix_peer(sk);
2462         if (other)
2463                 sock_hold(other);
2464         unix_state_unlock(sk);
2465         sk->sk_state_change(sk);
2466
2467         if (other &&
2468                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2469
2470                 int peer_mode = 0;
2471
2472                 if (mode&RCV_SHUTDOWN)
2473                         peer_mode |= SEND_SHUTDOWN;
2474                 if (mode&SEND_SHUTDOWN)
2475                         peer_mode |= RCV_SHUTDOWN;
2476                 unix_state_lock(other);
2477                 other->sk_shutdown |= peer_mode;
2478                 unix_state_unlock(other);
2479                 other->sk_state_change(other);
2480                 if (peer_mode == SHUTDOWN_MASK)
2481                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2482                 else if (peer_mode & RCV_SHUTDOWN)
2483                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2484         }
2485         if (other)
2486                 sock_put(other);
2487
2488         return 0;
2489 }
2490
2491 long unix_inq_len(struct sock *sk)
2492 {
2493         struct sk_buff *skb;
2494         long amount = 0;
2495
2496         if (sk->sk_state == TCP_LISTEN)
2497                 return -EINVAL;
2498
2499         spin_lock(&sk->sk_receive_queue.lock);
2500         if (sk->sk_type == SOCK_STREAM ||
2501             sk->sk_type == SOCK_SEQPACKET) {
2502                 skb_queue_walk(&sk->sk_receive_queue, skb)
2503                         amount += unix_skb_len(skb);
2504         } else {
2505                 skb = skb_peek(&sk->sk_receive_queue);
2506                 if (skb)
2507                         amount = skb->len;
2508         }
2509         spin_unlock(&sk->sk_receive_queue.lock);
2510
2511         return amount;
2512 }
2513 EXPORT_SYMBOL_GPL(unix_inq_len);
2514
2515 long unix_outq_len(struct sock *sk)
2516 {
2517         return sk_wmem_alloc_get(sk);
2518 }
2519 EXPORT_SYMBOL_GPL(unix_outq_len);
2520
2521 static int unix_open_file(struct sock *sk)
2522 {
2523         struct path path;
2524         struct file *f;
2525         int fd;
2526
2527         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2528                 return -EPERM;
2529
2530         if (!smp_load_acquire(&unix_sk(sk)->addr))
2531                 return -ENOENT;
2532
2533         path = unix_sk(sk)->path;
2534         if (!path.dentry)
2535                 return -ENOENT;
2536
2537         path_get(&path);
2538
2539         fd = get_unused_fd_flags(O_CLOEXEC);
2540         if (fd < 0)
2541                 goto out;
2542
2543         f = dentry_open(&path, O_PATH, current_cred());
2544         if (IS_ERR(f)) {
2545                 put_unused_fd(fd);
2546                 fd = PTR_ERR(f);
2547                 goto out;
2548         }
2549
2550         fd_install(fd, f);
2551 out:
2552         path_put(&path);
2553
2554         return fd;
2555 }
2556
2557 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2558 {
2559         struct sock *sk = sock->sk;
2560         long amount = 0;
2561         int err;
2562
2563         switch (cmd) {
2564         case SIOCOUTQ:
2565                 amount = unix_outq_len(sk);
2566                 err = put_user(amount, (int __user *)arg);
2567                 break;
2568         case SIOCINQ:
2569                 amount = unix_inq_len(sk);
2570                 if (amount < 0)
2571                         err = amount;
2572                 else
2573                         err = put_user(amount, (int __user *)arg);
2574                 break;
2575         case SIOCUNIXFILE:
2576                 err = unix_open_file(sk);
2577                 break;
2578         default:
2579                 err = -ENOIOCTLCMD;
2580                 break;
2581         }
2582         return err;
2583 }
2584
2585 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2586 {
2587         struct sock *sk = sock->sk;
2588         __poll_t mask;
2589
2590         sock_poll_wait(file, sock, wait);
2591         mask = 0;
2592
2593         /* exceptional events? */
2594         if (sk->sk_err)
2595                 mask |= EPOLLERR;
2596         if (sk->sk_shutdown == SHUTDOWN_MASK)
2597                 mask |= EPOLLHUP;
2598         if (sk->sk_shutdown & RCV_SHUTDOWN)
2599                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2600
2601         /* readable? */
2602         if (!skb_queue_empty(&sk->sk_receive_queue))
2603                 mask |= EPOLLIN | EPOLLRDNORM;
2604
2605         /* Connection-based need to check for termination and startup */
2606         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2607             sk->sk_state == TCP_CLOSE)
2608                 mask |= EPOLLHUP;
2609
2610         /*
2611          * we set writable also when the other side has shut down the
2612          * connection. This prevents stuck sockets.
2613          */
2614         if (unix_writable(sk))
2615                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2616
2617         return mask;
2618 }
2619
2620 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2621                                     poll_table *wait)
2622 {
2623         struct sock *sk = sock->sk, *other;
2624         unsigned int writable;
2625         __poll_t mask;
2626
2627         sock_poll_wait(file, sock, wait);
2628         mask = 0;
2629
2630         /* exceptional events? */
2631         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2632                 mask |= EPOLLERR |
2633                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2634
2635         if (sk->sk_shutdown & RCV_SHUTDOWN)
2636                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2637         if (sk->sk_shutdown == SHUTDOWN_MASK)
2638                 mask |= EPOLLHUP;
2639
2640         /* readable? */
2641         if (!skb_queue_empty(&sk->sk_receive_queue))
2642                 mask |= EPOLLIN | EPOLLRDNORM;
2643
2644         /* Connection-based need to check for termination and startup */
2645         if (sk->sk_type == SOCK_SEQPACKET) {
2646                 if (sk->sk_state == TCP_CLOSE)
2647                         mask |= EPOLLHUP;
2648                 /* connection hasn't started yet? */
2649                 if (sk->sk_state == TCP_SYN_SENT)
2650                         return mask;
2651         }
2652
2653         /* No write status requested, avoid expensive OUT tests. */
2654         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2655                 return mask;
2656
2657         writable = unix_writable(sk);
2658         if (writable) {
2659                 unix_state_lock(sk);
2660
2661                 other = unix_peer(sk);
2662                 if (other && unix_peer(other) != sk &&
2663                     unix_recvq_full(other) &&
2664                     unix_dgram_peer_wake_me(sk, other))
2665                         writable = 0;
2666
2667                 unix_state_unlock(sk);
2668         }
2669
2670         if (writable)
2671                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2672         else
2673                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2674
2675         return mask;
2676 }
2677
2678 #ifdef CONFIG_PROC_FS
2679
2680 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2681
2682 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2683 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2684 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2685
2686 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2687 {
2688         unsigned long offset = get_offset(*pos);
2689         unsigned long bucket = get_bucket(*pos);
2690         struct sock *sk;
2691         unsigned long count = 0;
2692
2693         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2694                 if (sock_net(sk) != seq_file_net(seq))
2695                         continue;
2696                 if (++count == offset)
2697                         break;
2698         }
2699
2700         return sk;
2701 }
2702
2703 static struct sock *unix_next_socket(struct seq_file *seq,
2704                                      struct sock *sk,
2705                                      loff_t *pos)
2706 {
2707         unsigned long bucket;
2708
2709         while (sk > (struct sock *)SEQ_START_TOKEN) {
2710                 sk = sk_next(sk);
2711                 if (!sk)
2712                         goto next_bucket;
2713                 if (sock_net(sk) == seq_file_net(seq))
2714                         return sk;
2715         }
2716
2717         do {
2718                 sk = unix_from_bucket(seq, pos);
2719                 if (sk)
2720                         return sk;
2721
2722 next_bucket:
2723                 bucket = get_bucket(*pos) + 1;
2724                 *pos = set_bucket_offset(bucket, 1);
2725         } while (bucket < ARRAY_SIZE(unix_socket_table));
2726
2727         return NULL;
2728 }
2729
2730 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2731         __acquires(unix_table_lock)
2732 {
2733         spin_lock(&unix_table_lock);
2734
2735         if (!*pos)
2736                 return SEQ_START_TOKEN;
2737
2738         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2739                 return NULL;
2740
2741         return unix_next_socket(seq, NULL, pos);
2742 }
2743
2744 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2745 {
2746         ++*pos;
2747         return unix_next_socket(seq, v, pos);
2748 }
2749
2750 static void unix_seq_stop(struct seq_file *seq, void *v)
2751         __releases(unix_table_lock)
2752 {
2753         spin_unlock(&unix_table_lock);
2754 }
2755
2756 static int unix_seq_show(struct seq_file *seq, void *v)
2757 {
2758
2759         if (v == SEQ_START_TOKEN)
2760                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2761                          "Inode Path\n");
2762         else {
2763                 struct sock *s = v;
2764                 struct unix_sock *u = unix_sk(s);
2765                 unix_state_lock(s);
2766
2767                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2768                         s,
2769                         refcount_read(&s->sk_refcnt),
2770                         0,
2771                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2772                         s->sk_type,
2773                         s->sk_socket ?
2774                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2775                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2776                         sock_i_ino(s));
2777
2778                 if (u->addr) {  // under unix_table_lock here
2779                         int i, len;
2780                         seq_putc(seq, ' ');
2781
2782                         i = 0;
2783                         len = u->addr->len - sizeof(short);
2784                         if (!UNIX_ABSTRACT(s))
2785                                 len--;
2786                         else {
2787                                 seq_putc(seq, '@');
2788                                 i++;
2789                         }
2790                         for ( ; i < len; i++)
2791                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2792                                          '@');
2793                 }
2794                 unix_state_unlock(s);
2795                 seq_putc(seq, '\n');
2796         }
2797
2798         return 0;
2799 }
2800
2801 static const struct seq_operations unix_seq_ops = {
2802         .start  = unix_seq_start,
2803         .next   = unix_seq_next,
2804         .stop   = unix_seq_stop,
2805         .show   = unix_seq_show,
2806 };
2807 #endif
2808
2809 static const struct net_proto_family unix_family_ops = {
2810         .family = PF_UNIX,
2811         .create = unix_create,
2812         .owner  = THIS_MODULE,
2813 };
2814
2815
2816 static int __net_init unix_net_init(struct net *net)
2817 {
2818         int error = -ENOMEM;
2819
2820         net->unx.sysctl_max_dgram_qlen = 10;
2821         if (unix_sysctl_register(net))
2822                 goto out;
2823
2824 #ifdef CONFIG_PROC_FS
2825         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2826                         sizeof(struct seq_net_private))) {
2827                 unix_sysctl_unregister(net);
2828                 goto out;
2829         }
2830 #endif
2831         error = 0;
2832 out:
2833         return error;
2834 }
2835
2836 static void __net_exit unix_net_exit(struct net *net)
2837 {
2838         unix_sysctl_unregister(net);
2839         remove_proc_entry("unix", net->proc_net);
2840 }
2841
2842 static struct pernet_operations unix_net_ops = {
2843         .init = unix_net_init,
2844         .exit = unix_net_exit,
2845 };
2846
2847 static int __init af_unix_init(void)
2848 {
2849         int rc = -1;
2850
2851         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2852
2853         rc = proto_register(&unix_proto, 1);
2854         if (rc != 0) {
2855                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2856                 goto out;
2857         }
2858
2859         sock_register(&unix_family_ops);
2860         register_pernet_subsys(&unix_net_ops);
2861 out:
2862         return rc;
2863 }
2864
2865 static void __exit af_unix_exit(void)
2866 {
2867         sock_unregister(PF_UNIX);
2868         proto_unregister(&unix_proto);
2869         unregister_pernet_subsys(&unix_net_ops);
2870 }
2871
2872 /* Earlier than device_initcall() so that other drivers invoking
2873    request_module() don't end up in a loop when modprobe tries
2874    to use a UNIX socket. But later than subsys_initcall() because
2875    we depend on stuff initialised there */
2876 fs_initcall(af_unix_init);
2877 module_exit(af_unix_exit);
2878
2879 MODULE_LICENSE("GPL");
2880 MODULE_ALIAS_NETPROTO(PF_UNIX);