net/unix/af_unix.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * NET4:        Implementation of BSD Unix domain sockets.
   4  *
   5  * Authors:     Alan Cox, <[email protected]>
   6  *
   7  * Fixes:
   8  *              Linus Torvalds  :       Assorted bug cures.
   9  *              Niibe Yutaka    :       async I/O support.
  10  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  11  *              Alan Cox        :       Limit size of allocated blocks.
  12  *              Alan Cox        :       Fixed the stupid socketpair bug.
  13  *              Alan Cox        :       BSD compatibility fine tuning.
  14  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  15  *              Alan Cox        :       Sorted out a proper draft version of
  16  *                                      file descriptor passing hacked up from
  17  *                                      Mike Shaver's work.
  18  *              Marty Leisner   :       Fixes to fd passing
  19  *              Nick Nevin      :       recvmsg bugfix.
  20  *              Alan Cox        :       Started proper garbage collector
  21  *              Heiko EiBfeldt  :       Missing verify_area check
  22  *              Alan Cox        :       Started POSIXisms
  23  *              Andreas Schwab  :       Replace inode by dentry for proper
  24  *                                      reference counting
  25  *              Kirk Petersen   :       Made this a module
  26  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  27  *                                      Lots of bug fixes.
  28  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  29  *                                      by above two patches.
  30  *           Andrea Arcangeli   :       If possible we block in connect(2)
  31  *                                      if the max backlog of the listen socket
  32  *                                      is been reached. This won't break
  33  *                                      old apps and it will avoid huge amount
  34  *                                      of socks hashed (this for unix_gc()
  35  *                                      performances reasons).
  36  *                                      Security fix that limits the max
  37  *                                      number of socks to 2*max_files and
  38  *                                      the number of skb queueable in the
  39  *                                      dgram receiver.
  40  *              Artur Skawina   :       Hash function optimizations
  41  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  42  *            Malcolm Beattie   :       Set peercred for socketpair
  43  *           Michal Ostrowski   :       Module initialization cleanup.
  44  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  45  *                                      the core infrastructure is doing that
  46  *                                      for all net proto families now (2.5.69+)
  47  *
  48  * Known differences from reference BSD that was tested:
  49  *
  50  *      [TO FIX]
  51  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  52  *              other the moment one end closes.
  53  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  54  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  55  *      [NOT TO FIX]
  56  *      accept() returns a path name even if the connecting socket has closed
  57  *              in the meantime (BSD loses the path and gives up).
  58  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  59  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  60  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  61  *      BSD af_unix apparently has connect forgetting to block properly.
  62  *              (need to check this with the POSIX spec in detail)
  63  *
  64  * Differences from 2.0.0-11-... (ANK)
  65  *      Bug fixes and improvements.
  66  *              - client shutdown killed server socket.
  67  *              - removed all useless cli/sti pairs.
  68  *
  69  *      Semantic changes/extensions.
  70  *              - generic control message passing.
  71  *              - SCM_CREDENTIALS control message.
  72  *              - "Abstract" (not FS based) socket bindings.
  73  *                Abstract names are sequences of bytes (not zero terminated)
  74  *                started by 0, so that this name space does not intersect
  75  *                with BSD names.
  76  */
  77
  78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  79
  80 #include <linux/module.h>
  81 #include <linux/kernel.h>
  82 #include <linux/signal.h>
  83 #include <linux/sched/signal.h>
  84 #include <linux/errno.h>
  85 #include <linux/string.h>
  86 #include <linux/stat.h>
  87 #include <linux/dcache.h>
  88 #include <linux/namei.h>
  89 #include <linux/socket.h>
  90 #include <linux/un.h>
  91 #include <linux/fcntl.h>
  92 #include <linux/termios.h>
  93 #include <linux/sockios.h>
  94 #include <linux/net.h>
  95 #include <linux/in.h>
  96 #include <linux/fs.h>
  97 #include <linux/slab.h>
  98 #include <linux/uaccess.h>
  99 #include <linux/skbuff.h>
 100 #include <linux/netdevice.h>
 101 #include <net/net_namespace.h>
 102 #include <net/sock.h>
 103 #include <net/tcp_states.h>
 104 #include <net/af_unix.h>
 105 #include <linux/proc_fs.h>
 106 #include <linux/seq_file.h>
 107 #include <net/scm.h>
 108 #include <linux/init.h>
 109 #include <linux/poll.h>
 110 #include <linux/rtnetlink.h>
 111 #include <linux/mount.h>
 112 #include <net/checksum.h>
 113 #include <linux/security.h>
 114 #include <linux/freezer.h>
 115 #include <linux/file.h>
 116
 117 #include "scm.h"
 118
 119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 120 EXPORT_SYMBOL_GPL(unix_socket_table);
 121 DEFINE_SPINLOCK(unix_table_lock);
 122 EXPORT_SYMBOL_GPL(unix_table_lock);
 123 static atomic_long_t unix_nr_socks;
 124
 125
 126 static struct hlist_head *unix_sockets_unbound(void *addr)
 127 {
 128         unsigned long hash = (unsigned long)addr;
 129
 130         hash ^= hash >> 16;
 131         hash ^= hash >> 8;
 132         hash %= UNIX_HASH_SIZE;
 133         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 134 }
 135
 136 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 137
 138 #ifdef CONFIG_SECURITY_NETWORK
 139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 140 {
 141         UNIXCB(skb).secid = scm->secid;
 142 }
 143
 144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 145 {
 146         scm->secid = UNIXCB(skb).secid;
 147 }
 148
 149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 150 {
 151         return (scm->secid == UNIXCB(skb).secid);
 152 }
 153 #else
 154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 155 { }
 156
 157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158 { }
 159
 160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 161 {
 162         return true;
 163 }
 164 #endif /* CONFIG_SECURITY_NETWORK */
 165
 166 /*
 167  *  SMP locking strategy:
 168  *    hash table is protected with spinlock unix_table_lock
 169  *    each socket state is protected by separate spin lock.
 170  */
 171
 172 static inline unsigned int unix_hash_fold(__wsum n)
 173 {
 174         unsigned int hash = (__force unsigned int)csum_fold(n);
 175
 176         hash ^= hash>>8;
 177         return hash&(UNIX_HASH_SIZE-1);
 178 }
 179
 180 #define unix_peer(sk) (unix_sk(sk)->peer)
 181
 182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 183 {
 184         return unix_peer(osk) == sk;
 185 }
 186
 187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 188 {
 189         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 190 }
 191
 192 static inline int unix_recvq_full(struct sock const *sk)
 193 {
 194         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 195 }
 196
 197 struct sock *unix_peer_get(struct sock *s)
 198 {
 199         struct sock *peer;
 200
 201         unix_state_lock(s);
 202         peer = unix_peer(s);
 203         if (peer)
 204                 sock_hold(peer);
 205         unix_state_unlock(s);
 206         return peer;
 207 }
 208 EXPORT_SYMBOL_GPL(unix_peer_get);
 209
 210 static inline void unix_release_addr(struct unix_address *addr)
 211 {
 212         if (refcount_dec_and_test(&addr->refcnt))
 213                 kfree(addr);
 214 }
 215
 216 /*
 217  *      Check unix socket name:
 218  *              - should be not zero length.
 219  *              - if started by not zero, should be NULL terminated (FS object)
 220  *              - if started by zero, it is abstract name.
 221  */
 222
 223 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 224 {
 225         *hashp = 0;
 226
 227         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 228                 return -EINVAL;
 229         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 230                 return -EINVAL;
 231         if (sunaddr->sun_path[0]) {
 232                 /*
 233                  * This may look like an off by one error but it is a bit more
 234                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 235                  * sun_path[108] doesn't as such exist.  However in kernel space
 236                  * we are guaranteed that it is a valid memory location in our
 237                  * kernel address buffer.
 238                  */
 239                 ((char *)sunaddr)[len] = 0;
 240                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 241                 return len;
 242         }
 243
 244         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 245         return len;
 246 }
 247
 248 static void __unix_remove_socket(struct sock *sk)
 249 {
 250         sk_del_node_init(sk);
 251 }
 252
 253 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 254 {
 255         WARN_ON(!sk_unhashed(sk));
 256         sk_add_node(sk, list);
 257 }
 258
 259 static inline void unix_remove_socket(struct sock *sk)
 260 {
 261         spin_lock(&unix_table_lock);
 262         __unix_remove_socket(sk);
 263         spin_unlock(&unix_table_lock);
 264 }
 265
 266 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 267 {
 268         spin_lock(&unix_table_lock);
 269         __unix_insert_socket(list, sk);
 270         spin_unlock(&unix_table_lock);
 271 }
 272
 273 static struct sock *__unix_find_socket_byname(struct net *net,
 274                                               struct sockaddr_un *sunname,
 275                                               int len, int type, unsigned int hash)
 276 {
 277         struct sock *s;
 278
 279         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 280                 struct unix_sock *u = unix_sk(s);
 281
 282                 if (!net_eq(sock_net(s), net))
 283                         continue;
 284
 285                 if (u->addr->len == len &&
 286                     !memcmp(u->addr->name, sunname, len))
 287                         return s;
 288         }
 289         return NULL;
 290 }
 291
 292 static inline struct sock *unix_find_socket_byname(struct net *net,
 293                                                    struct sockaddr_un *sunname,
 294                                                    int len, int type,
 295                                                    unsigned int hash)
 296 {
 297         struct sock *s;
 298
 299         spin_lock(&unix_table_lock);
 300         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 301         if (s)
 302                 sock_hold(s);
 303         spin_unlock(&unix_table_lock);
 304         return s;
 305 }
 306
 307 static struct sock *unix_find_socket_byinode(struct inode *i)
 308 {
 309         struct sock *s;
 310
 311         spin_lock(&unix_table_lock);
 312         sk_for_each(s,
 313                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 314                 struct dentry *dentry = unix_sk(s)->path.dentry;
 315
 316                 if (dentry && d_backing_inode(dentry) == i) {
 317                         sock_hold(s);
 318                         goto found;
 319                 }
 320         }
 321         s = NULL;
 322 found:
 323         spin_unlock(&unix_table_lock);
 324         return s;
 325 }
 326
 327 /* Support code for asymmetrically connected dgram sockets
 328  *
 329  * If a datagram socket is connected to a socket not itself connected
 330  * to the first socket (eg, /dev/log), clients may only enqueue more
 331  * messages if the present receive queue of the server socket is not
 332  * "too large". This means there's a second writeability condition
 333  * poll and sendmsg need to test. The dgram recv code will do a wake
 334  * up on the peer_wait wait queue of a socket upon reception of a
 335  * datagram which needs to be propagated to sleeping would-be writers
 336  * since these might not have sent anything so far. This can't be
 337  * accomplished via poll_wait because the lifetime of the server
 338  * socket might be less than that of its clients if these break their
 339  * association with it or if the server socket is closed while clients
 340  * are still connected to it and there's no way to inform "a polling
 341  * implementation" that it should let go of a certain wait queue
 342  *
 343  * In order to propagate a wake up, a wait_queue_entry_t of the client
 344  * socket is enqueued on the peer_wait queue of the server socket
 345  * whose wake function does a wake_up on the ordinary client socket
 346  * wait queue. This connection is established whenever a write (or
 347  * poll for write) hit the flow control condition and broken when the
 348  * association to the server socket is dissolved or after a wake up
 349  * was relayed.
 350  */
 351
 352 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 353                                       void *key)
 354 {
 355         struct unix_sock *u;
 356         wait_queue_head_t *u_sleep;
 357
 358         u = container_of(q, struct unix_sock, peer_wake);
 359
 360         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 361                             q);
 362         u->peer_wake.private = NULL;
 363
 364         /* relaying can only happen while the wq still exists */
 365         u_sleep = sk_sleep(&u->sk);
 366         if (u_sleep)
 367                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 368
 369         return 0;
 370 }
 371
 372 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 373 {
 374         struct unix_sock *u, *u_other;
 375         int rc;
 376
 377         u = unix_sk(sk);
 378         u_other = unix_sk(other);
 379         rc = 0;
 380         spin_lock(&u_other->peer_wait.lock);
 381
 382         if (!u->peer_wake.private) {
 383                 u->peer_wake.private = other;
 384                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 385
 386                 rc = 1;
 387         }
 388
 389         spin_unlock(&u_other->peer_wait.lock);
 390         return rc;
 391 }
 392
 393 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 394                                             struct sock *other)
 395 {
 396         struct unix_sock *u, *u_other;
 397
 398         u = unix_sk(sk);
 399         u_other = unix_sk(other);
 400         spin_lock(&u_other->peer_wait.lock);
 401
 402         if (u->peer_wake.private == other) {
 403                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 404                 u->peer_wake.private = NULL;
 405         }
 406
 407         spin_unlock(&u_other->peer_wait.lock);
 408 }
 409
 410 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 411                                                    struct sock *other)
 412 {
 413         unix_dgram_peer_wake_disconnect(sk, other);
 414         wake_up_interruptible_poll(sk_sleep(sk),
 415                                    EPOLLOUT |
 416                                    EPOLLWRNORM |
 417                                    EPOLLWRBAND);
 418 }
 419
 420 /* preconditions:
 421  *      - unix_peer(sk) == other
 422  *      - association is stable
 423  */
 424 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 425 {
 426         int connected;
 427
 428         connected = unix_dgram_peer_wake_connect(sk, other);
 429
 430         /* If other is SOCK_DEAD, we want to make sure we signal
 431          * POLLOUT, such that a subsequent write() can get a
 432          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
 433          * to other and its full, we will hang waiting for POLLOUT.
 434          */
 435         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
 436                 return 1;
 437
 438         if (connected)
 439                 unix_dgram_peer_wake_disconnect(sk, other);
 440
 441         return 0;
 442 }
 443
 444 static int unix_writable(const struct sock *sk)
 445 {
 446         return sk->sk_state != TCP_LISTEN &&
 447                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 448 }
 449
 450 static void unix_write_space(struct sock *sk)
 451 {
 452         struct socket_wq *wq;
 453
 454         rcu_read_lock();
 455         if (unix_writable(sk)) {
 456                 wq = rcu_dereference(sk->sk_wq);
 457                 if (skwq_has_sleeper(wq))
 458                         wake_up_interruptible_sync_poll(&wq->wait,
 459                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 460                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 461         }
 462         rcu_read_unlock();
 463 }
 464
 465 /* When dgram socket disconnects (or changes its peer), we clear its receive
 466  * queue of packets arrived from previous peer. First, it allows to do
 467  * flow control based only on wmem_alloc; second, sk connected to peer
 468  * may receive messages only from that peer. */
 469 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 470 {
 471         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 472                 skb_queue_purge(&sk->sk_receive_queue);
 473                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 474
 475                 /* If one link of bidirectional dgram pipe is disconnected,
 476                  * we signal error. Messages are lost. Do not make this,
 477                  * when peer was not connected to us.
 478                  */
 479                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 480                         other->sk_err = ECONNRESET;
 481                         other->sk_error_report(other);
 482                 }
 483         }
 484 }
 485
 486 static void unix_sock_destructor(struct sock *sk)
 487 {
 488         struct unix_sock *u = unix_sk(sk);
 489
 490         skb_queue_purge(&sk->sk_receive_queue);
 491
 492         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 493         WARN_ON(!sk_unhashed(sk));
 494         WARN_ON(sk->sk_socket);
 495         if (!sock_flag(sk, SOCK_DEAD)) {
 496                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 497                 return;
 498         }
 499
 500         if (u->addr)
 501                 unix_release_addr(u->addr);
 502
 503         atomic_long_dec(&unix_nr_socks);
 504         local_bh_disable();
 505         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 506         local_bh_enable();
 507 #ifdef UNIX_REFCNT_DEBUG
 508         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 509                 atomic_long_read(&unix_nr_socks));
 510 #endif
 511 }
 512
 513 static void unix_release_sock(struct sock *sk, int embrion)
 514 {
 515         struct unix_sock *u = unix_sk(sk);
 516         struct path path;
 517         struct sock *skpair;
 518         struct sk_buff *skb;
 519         int state;
 520
 521         unix_remove_socket(sk);
 522
 523         /* Clear state */
 524         unix_state_lock(sk);
 525         sock_orphan(sk);
 526         sk->sk_shutdown = SHUTDOWN_MASK;
 527         path         = u->path;
 528         u->path.dentry = NULL;
 529         u->path.mnt = NULL;
 530         state = sk->sk_state;
 531         sk->sk_state = TCP_CLOSE;
 532         unix_state_unlock(sk);
 533
 534         wake_up_interruptible_all(&u->peer_wait);
 535
 536         skpair = unix_peer(sk);
 537
 538         if (skpair != NULL) {
 539                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 540                         unix_state_lock(skpair);
 541                         /* No more writes */
 542                         skpair->sk_shutdown = SHUTDOWN_MASK;
 543                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 544                                 skpair->sk_err = ECONNRESET;
 545                         unix_state_unlock(skpair);
 546                         skpair->sk_state_change(skpair);
 547                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 548                 }
 549
 550                 unix_dgram_peer_wake_disconnect(sk, skpair);
 551                 sock_put(skpair); /* It may now die */
 552                 unix_peer(sk) = NULL;
 553         }
 554
 555         /* Try to flush out this socket. Throw out buffers at least */
 556
 557         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 558                 if (state == TCP_LISTEN)
 559                         unix_release_sock(skb->sk, 1);
 560                 /* passed fds are erased in the kfree_skb hook        */
 561                 UNIXCB(skb).consumed = skb->len;
 562                 kfree_skb(skb);
 563         }
 564
 565         if (path.dentry)
 566                 path_put(&path);
 567
 568         sock_put(sk);
 569
 570         /* ---- Socket is dead now and most probably destroyed ---- */
 571
 572         /*
 573          * Fixme: BSD difference: In BSD all sockets connected to us get
 574          *        ECONNRESET and we die on the spot. In Linux we behave
 575          *        like files and pipes do and wait for the last
 576          *        dereference.
 577          *
 578          * Can't we simply set sock->err?
 579          *
 580          *        What the above comment does talk about? --ANK(980817)
 581          */
 582
 583         if (unix_tot_inflight)
 584                 unix_gc();              /* Garbage collect fds */
 585 }
 586
 587 static void init_peercred(struct sock *sk)
 588 {
 589         put_pid(sk->sk_peer_pid);
 590         if (sk->sk_peer_cred)
 591                 put_cred(sk->sk_peer_cred);
 592         sk->sk_peer_pid  = get_pid(task_tgid(current));
 593         sk->sk_peer_cred = get_current_cred();
 594 }
 595
 596 static void copy_peercred(struct sock *sk, struct sock *peersk)
 597 {
 598         put_pid(sk->sk_peer_pid);
 599         if (sk->sk_peer_cred)
 600                 put_cred(sk->sk_peer_cred);
 601         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 602         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 603 }
 604
 605 static int unix_listen(struct socket *sock, int backlog)
 606 {
 607         int err;
 608         struct sock *sk = sock->sk;
 609         struct unix_sock *u = unix_sk(sk);
 610         struct pid *old_pid = NULL;
 611
 612         err = -EOPNOTSUPP;
 613         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 614                 goto out;       /* Only stream/seqpacket sockets accept */
 615         err = -EINVAL;
 616         if (!u->addr)
 617                 goto out;       /* No listens on an unbound socket */
 618         unix_state_lock(sk);
 619         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 620                 goto out_unlock;
 621         if (backlog > sk->sk_max_ack_backlog)
 622                 wake_up_interruptible_all(&u->peer_wait);
 623         sk->sk_max_ack_backlog  = backlog;
 624         sk->sk_state            = TCP_LISTEN;
 625         /* set credentials so connect can copy them */
 626         init_peercred(sk);
 627         err = 0;
 628
 629 out_unlock:
 630         unix_state_unlock(sk);
 631         put_pid(old_pid);
 632 out:
 633         return err;
 634 }
 635
 636 static int unix_release(struct socket *);
 637 static int unix_bind(struct socket *, struct sockaddr *, int);
 638 static int unix_stream_connect(struct socket *, struct sockaddr *,
 639                                int addr_len, int flags);
 640 static int unix_socketpair(struct socket *, struct socket *);
 641 static int unix_accept(struct socket *, struct socket *, int, bool);
 642 static int unix_getname(struct socket *, struct sockaddr *, int);
 643 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 644 static __poll_t unix_dgram_poll(struct file *, struct socket *,
 645                                     poll_table *);
 646 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 647 static int unix_shutdown(struct socket *, int);
 648 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 649 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 650 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 651                                     size_t size, int flags);
 652 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 653                                        struct pipe_inode_info *, size_t size,
 654                                        unsigned int flags);
 655 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 656 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 657 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 658                               int, int);
 659 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 660 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 661                                   int);
 662
 663 static int unix_set_peek_off(struct sock *sk, int val)
 664 {
 665         struct unix_sock *u = unix_sk(sk);
 666
 667         if (mutex_lock_interruptible(&u->iolock))
 668                 return -EINTR;
 669
 670         sk->sk_peek_off = val;
 671         mutex_unlock(&u->iolock);
 672
 673         return 0;
 674 }
 675
 676
 677 static const struct proto_ops unix_stream_ops = {
 678         .family =       PF_UNIX,
 679         .owner =        THIS_MODULE,
 680         .release =      unix_release,
 681         .bind =         unix_bind,
 682         .connect =      unix_stream_connect,
 683         .socketpair =   unix_socketpair,
 684         .accept =       unix_accept,
 685         .getname =      unix_getname,
 686         .poll =         unix_poll,
 687         .ioctl =        unix_ioctl,
 688         .listen =       unix_listen,
 689         .shutdown =     unix_shutdown,
 690         .setsockopt =   sock_no_setsockopt,
 691         .getsockopt =   sock_no_getsockopt,
 692         .sendmsg =      unix_stream_sendmsg,
 693         .recvmsg =      unix_stream_recvmsg,
 694         .mmap =         sock_no_mmap,
 695         .sendpage =     unix_stream_sendpage,
 696         .splice_read =  unix_stream_splice_read,
 697         .set_peek_off = unix_set_peek_off,
 698 };
 699
 700 static const struct proto_ops unix_dgram_ops = {
 701         .family =       PF_UNIX,
 702         .owner =        THIS_MODULE,
 703         .release =      unix_release,
 704         .bind =         unix_bind,
 705         .connect =      unix_dgram_connect,
 706         .socketpair =   unix_socketpair,
 707         .accept =       sock_no_accept,
 708         .getname =      unix_getname,
 709         .poll =         unix_dgram_poll,
 710         .ioctl =        unix_ioctl,
 711         .listen =       sock_no_listen,
 712         .shutdown =     unix_shutdown,
 713         .setsockopt =   sock_no_setsockopt,
 714         .getsockopt =   sock_no_getsockopt,
 715         .sendmsg =      unix_dgram_sendmsg,
 716         .recvmsg =      unix_dgram_recvmsg,
 717         .mmap =         sock_no_mmap,
 718         .sendpage =     sock_no_sendpage,
 719         .set_peek_off = unix_set_peek_off,
 720 };
 721
 722 static const struct proto_ops unix_seqpacket_ops = {
 723         .family =       PF_UNIX,
 724         .owner =        THIS_MODULE,
 725         .release =      unix_release,
 726         .bind =         unix_bind,
 727         .connect =      unix_stream_connect,
 728         .socketpair =   unix_socketpair,
 729         .accept =       unix_accept,
 730         .getname =      unix_getname,
 731         .poll =         unix_dgram_poll,
 732         .ioctl =        unix_ioctl,
 733         .listen =       unix_listen,
 734         .shutdown =     unix_shutdown,
 735         .setsockopt =   sock_no_setsockopt,
 736         .getsockopt =   sock_no_getsockopt,
 737         .sendmsg =      unix_seqpacket_sendmsg,
 738         .recvmsg =      unix_seqpacket_recvmsg,
 739         .mmap =         sock_no_mmap,
 740         .sendpage =     sock_no_sendpage,
 741         .set_peek_off = unix_set_peek_off,
 742 };
 743
 744 static struct proto unix_proto = {
 745         .name                   = "UNIX",
 746         .owner                  = THIS_MODULE,
 747         .obj_size               = sizeof(struct unix_sock),
 748 };
 749
 750 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 751 {
 752         struct sock *sk = NULL;
 753         struct unix_sock *u;
 754
 755         atomic_long_inc(&unix_nr_socks);
 756         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 757                 goto out;
 758
 759         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 760         if (!sk)
 761                 goto out;
 762
 763         sock_init_data(sock, sk);
 764
 765         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 766         sk->sk_write_space      = unix_write_space;
 767         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 768         sk->sk_destruct         = unix_sock_destructor;
 769         u         = unix_sk(sk);
 770         u->path.dentry = NULL;
 771         u->path.mnt = NULL;
 772         spin_lock_init(&u->lock);
 773         atomic_long_set(&u->inflight, 0);
 774         INIT_LIST_HEAD(&u->link);
 775         mutex_init(&u->iolock); /* single task reading lock */
 776         mutex_init(&u->bindlock); /* single task binding lock */
 777         init_waitqueue_head(&u->peer_wait);
 778         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 779         unix_insert_socket(unix_sockets_unbound(sk), sk);
 780 out:
 781         if (sk == NULL)
 782                 atomic_long_dec(&unix_nr_socks);
 783         else {
 784                 local_bh_disable();
 785                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 786                 local_bh_enable();
 787         }
 788         return sk;
 789 }
 790
 791 static int unix_create(struct net *net, struct socket *sock, int protocol,
 792                        int kern)
 793 {
 794         if (protocol && protocol != PF_UNIX)
 795                 return -EPROTONOSUPPORT;
 796
 797         sock->state = SS_UNCONNECTED;
 798
 799         switch (sock->type) {
 800         case SOCK_STREAM:
 801                 sock->ops = &unix_stream_ops;
 802                 break;
 803                 /*
 804                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 805                  *      nothing uses it.
 806                  */
 807         case SOCK_RAW:
 808                 sock->type = SOCK_DGRAM;
 809                 /* fall through */
 810         case SOCK_DGRAM:
 811                 sock->ops = &unix_dgram_ops;
 812                 break;
 813         case SOCK_SEQPACKET:
 814                 sock->ops = &unix_seqpacket_ops;
 815                 break;
 816         default:
 817                 return -ESOCKTNOSUPPORT;
 818         }
 819
 820         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 821 }
 822
 823 static int unix_release(struct socket *sock)
 824 {
 825         struct sock *sk = sock->sk;
 826
 827         if (!sk)
 828                 return 0;
 829
 830         unix_release_sock(sk, 0);
 831         sock->sk = NULL;
 832
 833         return 0;
 834 }
 835
 836 static int unix_autobind(struct socket *sock)
 837 {
 838         struct sock *sk = sock->sk;
 839         struct net *net = sock_net(sk);
 840         struct unix_sock *u = unix_sk(sk);
 841         static u32 ordernum = 1;
 842         struct unix_address *addr;
 843         int err;
 844         unsigned int retries = 0;
 845
 846         err = mutex_lock_interruptible(&u->bindlock);
 847         if (err)
 848                 return err;
 849
 850         err = 0;
 851         if (u->addr)
 852                 goto out;
 853
 854         err = -ENOMEM;
 855         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 856         if (!addr)
 857                 goto out;
 858
 859         addr->name->sun_family = AF_UNIX;
 860         refcount_set(&addr->refcnt, 1);
 861
 862 retry:
 863         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 864         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 865
 866         spin_lock(&unix_table_lock);
 867         ordernum = (ordernum+1)&0xFFFFF;
 868
 869         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 870                                       addr->hash)) {
 871                 spin_unlock(&unix_table_lock);
 872                 /*
 873                  * __unix_find_socket_byname() may take long time if many names
 874                  * are already in use.
 875                  */
 876                 cond_resched();
 877                 /* Give up if all names seems to be in use. */
 878                 if (retries++ == 0xFFFFF) {
 879                         err = -ENOSPC;
 880                         kfree(addr);
 881                         goto out;
 882                 }
 883                 goto retry;
 884         }
 885         addr->hash ^= sk->sk_type;
 886
 887         __unix_remove_socket(sk);
 888         smp_store_release(&u->addr, addr);
 889         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 890         spin_unlock(&unix_table_lock);
 891         err = 0;
 892
 893 out:    mutex_unlock(&u->bindlock);
 894         return err;
 895 }
 896
 897 static struct sock *unix_find_other(struct net *net,
 898                                     struct sockaddr_un *sunname, int len,
 899                                     int type, unsigned int hash, int *error)
 900 {
 901         struct sock *u;
 902         struct path path;
 903         int err = 0;
 904
 905         if (sunname->sun_path[0]) {
 906                 struct inode *inode;
 907                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 908                 if (err)
 909                         goto fail;
 910                 inode = d_backing_inode(path.dentry);
 911                 err = inode_permission(inode, MAY_WRITE);
 912                 if (err)
 913                         goto put_fail;
 914
 915                 err = -ECONNREFUSED;
 916                 if (!S_ISSOCK(inode->i_mode))
 917                         goto put_fail;
 918                 u = unix_find_socket_byinode(inode);
 919                 if (!u)
 920                         goto put_fail;
 921
 922                 if (u->sk_type == type)
 923                         touch_atime(&path);
 924
 925                 path_put(&path);
 926
 927                 err = -EPROTOTYPE;
 928                 if (u->sk_type != type) {
 929                         sock_put(u);
 930                         goto fail;
 931                 }
 932         } else {
 933                 err = -ECONNREFUSED;
 934                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 935                 if (u) {
 936                         struct dentry *dentry;
 937                         dentry = unix_sk(u)->path.dentry;
 938                         if (dentry)
 939                                 touch_atime(&unix_sk(u)->path);
 940                 } else
 941                         goto fail;
 942         }
 943         return u;
 944
 945 put_fail:
 946         path_put(&path);
 947 fail:
 948         *error = err;
 949         return NULL;
 950 }
 951
 952 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 953 {
 954         struct dentry *dentry;
 955         struct path path;
 956         int err = 0;
 957         /*
 958          * Get the parent directory, calculate the hash for last
 959          * component.
 960          */
 961         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 962         err = PTR_ERR(dentry);
 963         if (IS_ERR(dentry))
 964                 return err;
 965
 966         /*
 967          * All right, let's create it.
 968          */
 969         err = security_path_mknod(&path, dentry, mode, 0);
 970         if (!err) {
 971                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 972                 if (!err) {
 973                         res->mnt = mntget(path.mnt);
 974                         res->dentry = dget(dentry);
 975                 }
 976         }
 977         done_path_create(&path, dentry);
 978         return err;
 979 }
 980
 981 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 982 {
 983         struct sock *sk = sock->sk;
 984         struct net *net = sock_net(sk);
 985         struct unix_sock *u = unix_sk(sk);
 986         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 987         char *sun_path = sunaddr->sun_path;
 988         int err;
 989         unsigned int hash;
 990         struct unix_address *addr;
 991         struct hlist_head *list;
 992         struct path path = { };
 993
 994         err = -EINVAL;
 995         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
 996             sunaddr->sun_family != AF_UNIX)
 997                 goto out;
 998
 999         if (addr_len == sizeof(short)) {
1000                 err = unix_autobind(sock);
1001                 goto out;
1002         }
1003
1004         err = unix_mkname(sunaddr, addr_len, &hash);
1005         if (err < 0)
1006                 goto out;
1007         addr_len = err;
1008
1009         if (sun_path[0]) {
1010                 umode_t mode = S_IFSOCK |
1011                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1012                 err = unix_mknod(sun_path, mode, &path);
1013                 if (err) {
1014                         if (err == -EEXIST)
1015                                 err = -EADDRINUSE;
1016                         goto out;
1017                 }
1018         }
1019
1020         err = mutex_lock_interruptible(&u->bindlock);
1021         if (err)
1022                 goto out_put;
1023
1024         err = -EINVAL;
1025         if (u->addr)
1026                 goto out_up;
1027
1028         err = -ENOMEM;
1029         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1030         if (!addr)
1031                 goto out_up;
1032
1033         memcpy(addr->name, sunaddr, addr_len);
1034         addr->len = addr_len;
1035         addr->hash = hash ^ sk->sk_type;
1036         refcount_set(&addr->refcnt, 1);
1037
1038         if (sun_path[0]) {
1039                 addr->hash = UNIX_HASH_SIZE;
1040                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1041                 spin_lock(&unix_table_lock);
1042                 u->path = path;
1043                 list = &unix_socket_table[hash];
1044         } else {
1045                 spin_lock(&unix_table_lock);
1046                 err = -EADDRINUSE;
1047                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1048                                               sk->sk_type, hash)) {
1049                         unix_release_addr(addr);
1050                         goto out_unlock;
1051                 }
1052
1053                 list = &unix_socket_table[addr->hash];
1054         }
1055
1056         err = 0;
1057         __unix_remove_socket(sk);
1058         smp_store_release(&u->addr, addr);
1059         __unix_insert_socket(list, sk);
1060
1061 out_unlock:
1062         spin_unlock(&unix_table_lock);
1063 out_up:
1064         mutex_unlock(&u->bindlock);
1065 out_put:
1066         if (err)
1067                 path_put(&path);
1068 out:
1069         return err;
1070 }
1071
1072 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1073 {
1074         if (unlikely(sk1 == sk2) || !sk2) {
1075                 unix_state_lock(sk1);
1076                 return;
1077         }
1078         if (sk1 < sk2) {
1079                 unix_state_lock(sk1);
1080                 unix_state_lock_nested(sk2);
1081         } else {
1082                 unix_state_lock(sk2);
1083                 unix_state_lock_nested(sk1);
1084         }
1085 }
1086
1087 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1088 {
1089         if (unlikely(sk1 == sk2) || !sk2) {
1090                 unix_state_unlock(sk1);
1091                 return;
1092         }
1093         unix_state_unlock(sk1);
1094         unix_state_unlock(sk2);
1095 }
1096
1097 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1098                               int alen, int flags)
1099 {
1100         struct sock *sk = sock->sk;
1101         struct net *net = sock_net(sk);
1102         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1103         struct sock *other;
1104         unsigned int hash;
1105         int err;
1106
1107         err = -EINVAL;
1108         if (alen < offsetofend(struct sockaddr, sa_family))
1109                 goto out;
1110
1111         if (addr->sa_family != AF_UNSPEC) {
1112                 err = unix_mkname(sunaddr, alen, &hash);
1113                 if (err < 0)
1114                         goto out;
1115                 alen = err;
1116
1117                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1118                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1119                         goto out;
1120
1121 restart:
1122                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1123                 if (!other)
1124                         goto out;
1125
1126                 unix_state_double_lock(sk, other);
1127
1128                 /* Apparently VFS overslept socket death. Retry. */
1129                 if (sock_flag(other, SOCK_DEAD)) {
1130                         unix_state_double_unlock(sk, other);
1131                         sock_put(other);
1132                         goto restart;
1133                 }
1134
1135                 err = -EPERM;
1136                 if (!unix_may_send(sk, other))
1137                         goto out_unlock;
1138
1139                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1140                 if (err)
1141                         goto out_unlock;
1142
1143         } else {
1144                 /*
1145                  *      1003.1g breaking connected state with AF_UNSPEC
1146                  */
1147                 other = NULL;
1148                 unix_state_double_lock(sk, other);
1149         }
1150
1151         /*
1152          * If it was connected, reconnect.
1153          */
1154         if (unix_peer(sk)) {
1155                 struct sock *old_peer = unix_peer(sk);
1156                 unix_peer(sk) = other;
1157                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1158
1159                 unix_state_double_unlock(sk, other);
1160
1161                 if (other != old_peer)
1162                         unix_dgram_disconnected(sk, old_peer);
1163                 sock_put(old_peer);
1164         } else {
1165                 unix_peer(sk) = other;
1166                 unix_state_double_unlock(sk, other);
1167         }
1168         return 0;
1169
1170 out_unlock:
1171         unix_state_double_unlock(sk, other);
1172         sock_put(other);
1173 out:
1174         return err;
1175 }
1176
1177 static long unix_wait_for_peer(struct sock *other, long timeo)
1178 {
1179         struct unix_sock *u = unix_sk(other);
1180         int sched;
1181         DEFINE_WAIT(wait);
1182
1183         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1184
1185         sched = !sock_flag(other, SOCK_DEAD) &&
1186                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1187                 unix_recvq_full(other);
1188
1189         unix_state_unlock(other);
1190
1191         if (sched)
1192                 timeo = schedule_timeout(timeo);
1193
1194         finish_wait(&u->peer_wait, &wait);
1195         return timeo;
1196 }
1197
1198 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1199                                int addr_len, int flags)
1200 {
1201         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1202         struct sock *sk = sock->sk;
1203         struct net *net = sock_net(sk);
1204         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1205         struct sock *newsk = NULL;
1206         struct sock *other = NULL;
1207         struct sk_buff *skb = NULL;
1208         unsigned int hash;
1209         int st;
1210         int err;
1211         long timeo;
1212
1213         err = unix_mkname(sunaddr, addr_len, &hash);
1214         if (err < 0)
1215                 goto out;
1216         addr_len = err;
1217
1218         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1219             (err = unix_autobind(sock)) != 0)
1220                 goto out;
1221
1222         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1223
1224         /* First of all allocate resources.
1225            If we will make it after state is locked,
1226            we will have to recheck all again in any case.
1227          */
1228
1229         err = -ENOMEM;
1230
1231         /* create new sock for complete connection */
1232         newsk = unix_create1(sock_net(sk), NULL, 0);
1233         if (newsk == NULL)
1234                 goto out;
1235
1236         /* Allocate skb for sending to listening sock */
1237         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1238         if (skb == NULL)
1239                 goto out;
1240
1241 restart:
1242         /*  Find listening sock. */
1243         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1244         if (!other)
1245                 goto out;
1246
1247         /* Latch state of peer */
1248         unix_state_lock(other);
1249
1250         /* Apparently VFS overslept socket death. Retry. */
1251         if (sock_flag(other, SOCK_DEAD)) {
1252                 unix_state_unlock(other);
1253                 sock_put(other);
1254                 goto restart;
1255         }
1256
1257         err = -ECONNREFUSED;
1258         if (other->sk_state != TCP_LISTEN)
1259                 goto out_unlock;
1260         if (other->sk_shutdown & RCV_SHUTDOWN)
1261                 goto out_unlock;
1262
1263         if (unix_recvq_full(other)) {
1264                 err = -EAGAIN;
1265                 if (!timeo)
1266                         goto out_unlock;
1267
1268                 timeo = unix_wait_for_peer(other, timeo);
1269
1270                 err = sock_intr_errno(timeo);
1271                 if (signal_pending(current))
1272                         goto out;
1273                 sock_put(other);
1274                 goto restart;
1275         }
1276
1277         /* Latch our state.
1278
1279            It is tricky place. We need to grab our state lock and cannot
1280            drop lock on peer. It is dangerous because deadlock is
1281            possible. Connect to self case and simultaneous
1282            attempt to connect are eliminated by checking socket
1283            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1284            check this before attempt to grab lock.
1285
1286            Well, and we have to recheck the state after socket locked.
1287          */
1288         st = sk->sk_state;
1289
1290         switch (st) {
1291         case TCP_CLOSE:
1292                 /* This is ok... continue with connect */
1293                 break;
1294         case TCP_ESTABLISHED:
1295                 /* Socket is already connected */
1296                 err = -EISCONN;
1297                 goto out_unlock;
1298         default:
1299                 err = -EINVAL;
1300                 goto out_unlock;
1301         }
1302
1303         unix_state_lock_nested(sk);
1304
1305         if (sk->sk_state != st) {
1306                 unix_state_unlock(sk);
1307                 unix_state_unlock(other);
1308                 sock_put(other);
1309                 goto restart;
1310         }
1311
1312         err = security_unix_stream_connect(sk, other, newsk);
1313         if (err) {
1314                 unix_state_unlock(sk);
1315                 goto out_unlock;
1316         }
1317
1318         /* The way is open! Fastly set all the necessary fields... */
1319
1320         sock_hold(sk);
1321         unix_peer(newsk)        = sk;
1322         newsk->sk_state         = TCP_ESTABLISHED;
1323         newsk->sk_type          = sk->sk_type;
1324         init_peercred(newsk);
1325         newu = unix_sk(newsk);
1326         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1327         otheru = unix_sk(other);
1328
1329         /* copy address information from listening to new sock
1330          *
1331          * The contents of *(otheru->addr) and otheru->path
1332          * are seen fully set up here, since we have found
1333          * otheru in hash under unix_table_lock.  Insertion
1334          * into the hash chain we'd found it in had been done
1335          * in an earlier critical area protected by unix_table_lock,
1336          * the same one where we'd set *(otheru->addr) contents,
1337          * as well as otheru->path and otheru->addr itself.
1338          *
1339          * Using smp_store_release() here to set newu->addr
1340          * is enough to make those stores, as well as stores
1341          * to newu->path visible to anyone who gets newu->addr
1342          * by smp_load_acquire().  IOW, the same warranties
1343          * as for unix_sock instances bound in unix_bind() or
1344          * in unix_autobind().
1345          */
1346         if (otheru->path.dentry) {
1347                 path_get(&otheru->path);
1348                 newu->path = otheru->path;
1349         }
1350         refcount_inc(&otheru->addr->refcnt);
1351         smp_store_release(&newu->addr, otheru->addr);
1352
1353         /* Set credentials */
1354         copy_peercred(sk, other);
1355
1356         sock->state     = SS_CONNECTED;
1357         sk->sk_state    = TCP_ESTABLISHED;
1358         sock_hold(newsk);
1359
1360         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1361         unix_peer(sk)   = newsk;
1362
1363         unix_state_unlock(sk);
1364
1365         /* take ten and and send info to listening sock */
1366         spin_lock(&other->sk_receive_queue.lock);
1367         __skb_queue_tail(&other->sk_receive_queue, skb);
1368         spin_unlock(&other->sk_receive_queue.lock);
1369         unix_state_unlock(other);
1370         other->sk_data_ready(other);
1371         sock_put(other);
1372         return 0;
1373
1374 out_unlock:
1375         if (other)
1376                 unix_state_unlock(other);
1377
1378 out:
1379         kfree_skb(skb);
1380         if (newsk)
1381                 unix_release_sock(newsk, 0);
1382         if (other)
1383                 sock_put(other);
1384         return err;
1385 }
1386
1387 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1388 {
1389         struct sock *ska = socka->sk, *skb = sockb->sk;
1390
1391         /* Join our sockets back to back */
1392         sock_hold(ska);
1393         sock_hold(skb);
1394         unix_peer(ska) = skb;
1395         unix_peer(skb) = ska;
1396         init_peercred(ska);
1397         init_peercred(skb);
1398
1399         if (ska->sk_type != SOCK_DGRAM) {
1400                 ska->sk_state = TCP_ESTABLISHED;
1401                 skb->sk_state = TCP_ESTABLISHED;
1402                 socka->state  = SS_CONNECTED;
1403                 sockb->state  = SS_CONNECTED;
1404         }
1405         return 0;
1406 }
1407
1408 static void unix_sock_inherit_flags(const struct socket *old,
1409                                     struct socket *new)
1410 {
1411         if (test_bit(SOCK_PASSCRED, &old->flags))
1412                 set_bit(SOCK_PASSCRED, &new->flags);
1413         if (test_bit(SOCK_PASSSEC, &old->flags))
1414                 set_bit(SOCK_PASSSEC, &new->flags);
1415 }
1416
1417 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1418                        bool kern)
1419 {
1420         struct sock *sk = sock->sk;
1421         struct sock *tsk;
1422         struct sk_buff *skb;
1423         int err;
1424
1425         err = -EOPNOTSUPP;
1426         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1427                 goto out;
1428
1429         err = -EINVAL;
1430         if (sk->sk_state != TCP_LISTEN)
1431                 goto out;
1432
1433         /* If socket state is TCP_LISTEN it cannot change (for now...),
1434          * so that no locks are necessary.
1435          */
1436
1437         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1438         if (!skb) {
1439                 /* This means receive shutdown. */
1440                 if (err == 0)
1441                         err = -EINVAL;
1442                 goto out;
1443         }
1444
1445         tsk = skb->sk;
1446         skb_free_datagram(sk, skb);
1447         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1448
1449         /* attach accepted sock to socket */
1450         unix_state_lock(tsk);
1451         newsock->state = SS_CONNECTED;
1452         unix_sock_inherit_flags(sock, newsock);
1453         sock_graft(tsk, newsock);
1454         unix_state_unlock(tsk);
1455         return 0;
1456
1457 out:
1458         return err;
1459 }
1460
1461
1462 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1463 {
1464         struct sock *sk = sock->sk;
1465         struct unix_address *addr;
1466         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1467         int err = 0;
1468
1469         if (peer) {
1470                 sk = unix_peer_get(sk);
1471
1472                 err = -ENOTCONN;
1473                 if (!sk)
1474                         goto out;
1475                 err = 0;
1476         } else {
1477                 sock_hold(sk);
1478         }
1479
1480         addr = smp_load_acquire(&unix_sk(sk)->addr);
1481         if (!addr) {
1482                 sunaddr->sun_family = AF_UNIX;
1483                 sunaddr->sun_path[0] = 0;
1484                 err = sizeof(short);
1485         } else {
1486                 err = addr->len;
1487                 memcpy(sunaddr, addr->name, addr->len);
1488         }
1489         sock_put(sk);
1490 out:
1491         return err;
1492 }
1493
1494 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1495 {
1496         int err = 0;
1497
1498         UNIXCB(skb).pid  = get_pid(scm->pid);
1499         UNIXCB(skb).uid = scm->creds.uid;
1500         UNIXCB(skb).gid = scm->creds.gid;
1501         UNIXCB(skb).fp = NULL;
1502         unix_get_secdata(scm, skb);
1503         if (scm->fp && send_fds)
1504                 err = unix_attach_fds(scm, skb);
1505
1506         skb->destructor = unix_destruct_scm;
1507         return err;
1508 }
1509
1510 static bool unix_passcred_enabled(const struct socket *sock,
1511                                   const struct sock *other)
1512 {
1513         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1514                !other->sk_socket ||
1515                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1516 }
1517
1518 /*
1519  * Some apps rely on write() giving SCM_CREDENTIALS
1520  * We include credentials if source or destination socket
1521  * asserted SOCK_PASSCRED.
1522  */
1523 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1524                             const struct sock *other)
1525 {
1526         if (UNIXCB(skb).pid)
1527                 return;
1528         if (unix_passcred_enabled(sock, other)) {
1529                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1530                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1531         }
1532 }
1533
1534 static int maybe_init_creds(struct scm_cookie *scm,
1535                             struct socket *socket,
1536                             const struct sock *other)
1537 {
1538         int err;
1539         struct msghdr msg = { .msg_controllen = 0 };
1540
1541         err = scm_send(socket, &msg, scm, false);
1542         if (err)
1543                 return err;
1544
1545         if (unix_passcred_enabled(socket, other)) {
1546                 scm->pid = get_pid(task_tgid(current));
1547                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1548         }
1549         return err;
1550 }
1551
1552 static bool unix_skb_scm_eq(struct sk_buff *skb,
1553                             struct scm_cookie *scm)
1554 {
1555         const struct unix_skb_parms *u = &UNIXCB(skb);
1556
1557         return u->pid == scm->pid &&
1558                uid_eq(u->uid, scm->creds.uid) &&
1559                gid_eq(u->gid, scm->creds.gid) &&
1560                unix_secdata_eq(scm, skb);
1561 }
1562
1563 /*
1564  *      Send AF_UNIX data.
1565  */
1566
1567 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1568                               size_t len)
1569 {
1570         struct sock *sk = sock->sk;
1571         struct net *net = sock_net(sk);
1572         struct unix_sock *u = unix_sk(sk);
1573         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1574         struct sock *other = NULL;
1575         int namelen = 0; /* fake GCC */
1576         int err;
1577         unsigned int hash;
1578         struct sk_buff *skb;
1579         long timeo;
1580         struct scm_cookie scm;
1581         int data_len = 0;
1582         int sk_locked;
1583
1584         wait_for_unix_gc();
1585         err = scm_send(sock, msg, &scm, false);
1586         if (err < 0)
1587                 return err;
1588
1589         err = -EOPNOTSUPP;
1590         if (msg->msg_flags&MSG_OOB)
1591                 goto out;
1592
1593         if (msg->msg_namelen) {
1594                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1595                 if (err < 0)
1596                         goto out;
1597                 namelen = err;
1598         } else {
1599                 sunaddr = NULL;
1600                 err = -ENOTCONN;
1601                 other = unix_peer_get(sk);
1602                 if (!other)
1603                         goto out;
1604         }
1605
1606         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1607             && (err = unix_autobind(sock)) != 0)
1608                 goto out;
1609
1610         err = -EMSGSIZE;
1611         if (len > sk->sk_sndbuf - 32)
1612                 goto out;
1613
1614         if (len > SKB_MAX_ALLOC) {
1615                 data_len = min_t(size_t,
1616                                  len - SKB_MAX_ALLOC,
1617                                  MAX_SKB_FRAGS * PAGE_SIZE);
1618                 data_len = PAGE_ALIGN(data_len);
1619
1620                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1621         }
1622
1623         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1624                                    msg->msg_flags & MSG_DONTWAIT, &err,
1625                                    PAGE_ALLOC_COSTLY_ORDER);
1626         if (skb == NULL)
1627                 goto out;
1628
1629         err = unix_scm_to_skb(&scm, skb, true);
1630         if (err < 0)
1631                 goto out_free;
1632
1633         skb_put(skb, len - data_len);
1634         skb->data_len = data_len;
1635         skb->len = len;
1636         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1637         if (err)
1638                 goto out_free;
1639
1640         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1641
1642 restart:
1643         if (!other) {
1644                 err = -ECONNRESET;
1645                 if (sunaddr == NULL)
1646                         goto out_free;
1647
1648                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1649                                         hash, &err);
1650                 if (other == NULL)
1651                         goto out_free;
1652         }
1653
1654         if (sk_filter(other, skb) < 0) {
1655                 /* Toss the packet but do not return any error to the sender */
1656                 err = len;
1657                 goto out_free;
1658         }
1659
1660         sk_locked = 0;
1661         unix_state_lock(other);
1662 restart_locked:
1663         err = -EPERM;
1664         if (!unix_may_send(sk, other))
1665                 goto out_unlock;
1666
1667         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1668                 /*
1669                  *      Check with 1003.1g - what should
1670                  *      datagram error
1671                  */
1672                 unix_state_unlock(other);
1673                 sock_put(other);
1674
1675                 if (!sk_locked)
1676                         unix_state_lock(sk);
1677
1678                 err = 0;
1679                 if (unix_peer(sk) == other) {
1680                         unix_peer(sk) = NULL;
1681                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1682
1683                         unix_state_unlock(sk);
1684
1685                         unix_dgram_disconnected(sk, other);
1686                         sock_put(other);
1687                         err = -ECONNREFUSED;
1688                 } else {
1689                         unix_state_unlock(sk);
1690                 }
1691
1692                 other = NULL;
1693                 if (err)
1694                         goto out_free;
1695                 goto restart;
1696         }
1697
1698         err = -EPIPE;
1699         if (other->sk_shutdown & RCV_SHUTDOWN)
1700                 goto out_unlock;
1701
1702         if (sk->sk_type != SOCK_SEQPACKET) {
1703                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1704                 if (err)
1705                         goto out_unlock;
1706         }
1707
1708         /* other == sk && unix_peer(other) != sk if
1709          * - unix_peer(sk) == NULL, destination address bound to sk
1710          * - unix_peer(sk) == sk by time of get but disconnected before lock
1711          */
1712         if (other != sk &&
1713             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1714                 if (timeo) {
1715                         timeo = unix_wait_for_peer(other, timeo);
1716
1717                         err = sock_intr_errno(timeo);
1718                         if (signal_pending(current))
1719                                 goto out_free;
1720
1721                         goto restart;
1722                 }
1723
1724                 if (!sk_locked) {
1725                         unix_state_unlock(other);
1726                         unix_state_double_lock(sk, other);
1727                 }
1728
1729                 if (unix_peer(sk) != other ||
1730                     unix_dgram_peer_wake_me(sk, other)) {
1731                         err = -EAGAIN;
1732                         sk_locked = 1;
1733                         goto out_unlock;
1734                 }
1735
1736                 if (!sk_locked) {
1737                         sk_locked = 1;
1738                         goto restart_locked;
1739                 }
1740         }
1741
1742         if (unlikely(sk_locked))
1743                 unix_state_unlock(sk);
1744
1745         if (sock_flag(other, SOCK_RCVTSTAMP))
1746                 __net_timestamp(skb);
1747         maybe_add_creds(skb, sock, other);
1748         skb_queue_tail(&other->sk_receive_queue, skb);
1749         unix_state_unlock(other);
1750         other->sk_data_ready(other);
1751         sock_put(other);
1752         scm_destroy(&scm);
1753         return len;
1754
1755 out_unlock:
1756         if (sk_locked)
1757                 unix_state_unlock(sk);
1758         unix_state_unlock(other);
1759 out_free:
1760         kfree_skb(skb);
1761 out:
1762         if (other)
1763                 sock_put(other);
1764         scm_destroy(&scm);
1765         return err;
1766 }
1767
1768 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1769  * bytes, and a minimum of a full page.
1770  */
1771 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1772
1773 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1774                                size_t len)
1775 {
1776         struct sock *sk = sock->sk;
1777         struct sock *other = NULL;
1778         int err, size;
1779         struct sk_buff *skb;
1780         int sent = 0;
1781         struct scm_cookie scm;
1782         bool fds_sent = false;
1783         int data_len;
1784
1785         wait_for_unix_gc();
1786         err = scm_send(sock, msg, &scm, false);
1787         if (err < 0)
1788                 return err;
1789
1790         err = -EOPNOTSUPP;
1791         if (msg->msg_flags&MSG_OOB)
1792                 goto out_err;
1793
1794         if (msg->msg_namelen) {
1795                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1796                 goto out_err;
1797         } else {
1798                 err = -ENOTCONN;
1799                 other = unix_peer(sk);
1800                 if (!other)
1801                         goto out_err;
1802         }
1803
1804         if (sk->sk_shutdown & SEND_SHUTDOWN)
1805                 goto pipe_err;
1806
1807         while (sent < len) {
1808                 size = len - sent;
1809
1810                 /* Keep two messages in the pipe so it schedules better */
1811                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1812
1813                 /* allow fallback to order-0 allocations */
1814                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1815
1816                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1817
1818                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1819
1820                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1821                                            msg->msg_flags & MSG_DONTWAIT, &err,
1822                                            get_order(UNIX_SKB_FRAGS_SZ));
1823                 if (!skb)
1824                         goto out_err;
1825
1826                 /* Only send the fds in the first buffer */
1827                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1828                 if (err < 0) {
1829                         kfree_skb(skb);
1830                         goto out_err;
1831                 }
1832                 fds_sent = true;
1833
1834                 skb_put(skb, size - data_len);
1835                 skb->data_len = data_len;
1836                 skb->len = size;
1837                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1838                 if (err) {
1839                         kfree_skb(skb);
1840                         goto out_err;
1841                 }
1842
1843                 unix_state_lock(other);
1844
1845                 if (sock_flag(other, SOCK_DEAD) ||
1846                     (other->sk_shutdown & RCV_SHUTDOWN))
1847                         goto pipe_err_free;
1848
1849                 maybe_add_creds(skb, sock, other);
1850                 skb_queue_tail(&other->sk_receive_queue, skb);
1851                 unix_state_unlock(other);
1852                 other->sk_data_ready(other);
1853                 sent += size;
1854         }
1855
1856         scm_destroy(&scm);
1857
1858         return sent;
1859
1860 pipe_err_free:
1861         unix_state_unlock(other);
1862         kfree_skb(skb);
1863 pipe_err:
1864         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1865                 send_sig(SIGPIPE, current, 0);
1866         err = -EPIPE;
1867 out_err:
1868         scm_destroy(&scm);
1869         return sent ? : err;
1870 }
1871
1872 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1873                                     int offset, size_t size, int flags)
1874 {
1875         int err;
1876         bool send_sigpipe = false;
1877         bool init_scm = true;
1878         struct scm_cookie scm;
1879         struct sock *other, *sk = socket->sk;
1880         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1881
1882         if (flags & MSG_OOB)
1883                 return -EOPNOTSUPP;
1884
1885         other = unix_peer(sk);
1886         if (!other || sk->sk_state != TCP_ESTABLISHED)
1887                 return -ENOTCONN;
1888
1889         if (false) {
1890 alloc_skb:
1891                 unix_state_unlock(other);
1892                 mutex_unlock(&unix_sk(other)->iolock);
1893                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1894                                               &err, 0);
1895                 if (!newskb)
1896                         goto err;
1897         }
1898
1899         /* we must acquire iolock as we modify already present
1900          * skbs in the sk_receive_queue and mess with skb->len
1901          */
1902         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1903         if (err) {
1904                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1905                 goto err;
1906         }
1907
1908         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1909                 err = -EPIPE;
1910                 send_sigpipe = true;
1911                 goto err_unlock;
1912         }
1913
1914         unix_state_lock(other);
1915
1916         if (sock_flag(other, SOCK_DEAD) ||
1917             other->sk_shutdown & RCV_SHUTDOWN) {
1918                 err = -EPIPE;
1919                 send_sigpipe = true;
1920                 goto err_state_unlock;
1921         }
1922
1923         if (init_scm) {
1924                 err = maybe_init_creds(&scm, socket, other);
1925                 if (err)
1926                         goto err_state_unlock;
1927                 init_scm = false;
1928         }
1929
1930         skb = skb_peek_tail(&other->sk_receive_queue);
1931         if (tail && tail == skb) {
1932                 skb = newskb;
1933         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1934                 if (newskb) {
1935                         skb = newskb;
1936                 } else {
1937                         tail = skb;
1938                         goto alloc_skb;
1939                 }
1940         } else if (newskb) {
1941                 /* this is fast path, we don't necessarily need to
1942                  * call to kfree_skb even though with newskb == NULL
1943                  * this - does no harm
1944                  */
1945                 consume_skb(newskb);
1946                 newskb = NULL;
1947         }
1948
1949         if (skb_append_pagefrags(skb, page, offset, size)) {
1950                 tail = skb;
1951                 goto alloc_skb;
1952         }
1953
1954         skb->len += size;
1955         skb->data_len += size;
1956         skb->truesize += size;
1957         refcount_add(size, &sk->sk_wmem_alloc);
1958
1959         if (newskb) {
1960                 err = unix_scm_to_skb(&scm, skb, false);
1961                 if (err)
1962                         goto err_state_unlock;
1963                 spin_lock(&other->sk_receive_queue.lock);
1964                 __skb_queue_tail(&other->sk_receive_queue, newskb);
1965                 spin_unlock(&other->sk_receive_queue.lock);
1966         }
1967
1968         unix_state_unlock(other);
1969         mutex_unlock(&unix_sk(other)->iolock);
1970
1971         other->sk_data_ready(other);
1972         scm_destroy(&scm);
1973         return size;
1974
1975 err_state_unlock:
1976         unix_state_unlock(other);
1977 err_unlock:
1978         mutex_unlock(&unix_sk(other)->iolock);
1979 err:
1980         kfree_skb(newskb);
1981         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
1982                 send_sig(SIGPIPE, current, 0);
1983         if (!init_scm)
1984                 scm_destroy(&scm);
1985         return err;
1986 }
1987
1988 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
1989                                   size_t len)
1990 {
1991         int err;
1992         struct sock *sk = sock->sk;
1993
1994         err = sock_error(sk);
1995         if (err)
1996                 return err;
1997
1998         if (sk->sk_state != TCP_ESTABLISHED)
1999                 return -ENOTCONN;
2000
2001         if (msg->msg_namelen)
2002                 msg->msg_namelen = 0;
2003
2004         return unix_dgram_sendmsg(sock, msg, len);
2005 }
2006
2007 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2008                                   size_t size, int flags)
2009 {
2010         struct sock *sk = sock->sk;
2011
2012         if (sk->sk_state != TCP_ESTABLISHED)
2013                 return -ENOTCONN;
2014
2015         return unix_dgram_recvmsg(sock, msg, size, flags);
2016 }
2017
2018 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2019 {
2020         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2021
2022         if (addr) {
2023                 msg->msg_namelen = addr->len;
2024                 memcpy(msg->msg_name, addr->name, addr->len);
2025         }
2026 }
2027
2028 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2029                               size_t size, int flags)
2030 {
2031         struct scm_cookie scm;
2032         struct sock *sk = sock->sk;
2033         struct unix_sock *u = unix_sk(sk);
2034         struct sk_buff *skb, *last;
2035         long timeo;
2036         int skip;
2037         int err;
2038
2039         err = -EOPNOTSUPP;
2040         if (flags&MSG_OOB)
2041                 goto out;
2042
2043         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2044
2045         do {
2046                 mutex_lock(&u->iolock);
2047
2048                 skip = sk_peek_offset(sk, flags);
2049                 skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
2050                                               &last);
2051                 if (skb)
2052                         break;
2053
2054                 mutex_unlock(&u->iolock);
2055
2056                 if (err != -EAGAIN)
2057                         break;
2058         } while (timeo &&
2059                  !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2060
2061         if (!skb) { /* implies iolock unlocked */
2062                 unix_state_lock(sk);
2063                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2064                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2065                     (sk->sk_shutdown & RCV_SHUTDOWN))
2066                         err = 0;
2067                 unix_state_unlock(sk);
2068                 goto out;
2069         }
2070
2071         if (wq_has_sleeper(&u->peer_wait))
2072                 wake_up_interruptible_sync_poll(&u->peer_wait,
2073                                                 EPOLLOUT | EPOLLWRNORM |
2074                                                 EPOLLWRBAND);
2075
2076         if (msg->msg_name)
2077                 unix_copy_addr(msg, skb->sk);
2078
2079         if (size > skb->len - skip)
2080                 size = skb->len - skip;
2081         else if (size < skb->len - skip)
2082                 msg->msg_flags |= MSG_TRUNC;
2083
2084         err = skb_copy_datagram_msg(skb, skip, msg, size);
2085         if (err)
2086                 goto out_free;
2087
2088         if (sock_flag(sk, SOCK_RCVTSTAMP))
2089                 __sock_recv_timestamp(msg, sk, skb);
2090
2091         memset(&scm, 0, sizeof(scm));
2092
2093         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2094         unix_set_secdata(&scm, skb);
2095
2096         if (!(flags & MSG_PEEK)) {
2097                 if (UNIXCB(skb).fp)
2098                         unix_detach_fds(&scm, skb);
2099
2100                 sk_peek_offset_bwd(sk, skb->len);
2101         } else {
2102                 /* It is questionable: on PEEK we could:
2103                    - do not return fds - good, but too simple 8)
2104                    - return fds, and do not return them on read (old strategy,
2105                      apparently wrong)
2106                    - clone fds (I chose it for now, it is the most universal
2107                      solution)
2108
2109                    POSIX 1003.1g does not actually define this clearly
2110                    at all. POSIX 1003.1g doesn't define a lot of things
2111                    clearly however!
2112
2113                 */
2114
2115                 sk_peek_offset_fwd(sk, size);
2116
2117                 if (UNIXCB(skb).fp)
2118                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2119         }
2120         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2121
2122         scm_recv(sock, msg, &scm, flags);
2123
2124 out_free:
2125         skb_free_datagram(sk, skb);
2126         mutex_unlock(&u->iolock);
2127 out:
2128         return err;
2129 }
2130
2131 /*
2132  *      Sleep until more data has arrived. But check for races..
2133  */
2134 static long unix_stream_data_wait(struct sock *sk, long timeo,
2135                                   struct sk_buff *last, unsigned int last_len,
2136                                   bool freezable)
2137 {
2138         struct sk_buff *tail;
2139         DEFINE_WAIT(wait);
2140
2141         unix_state_lock(sk);
2142
2143         for (;;) {
2144                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2145
2146                 tail = skb_peek_tail(&sk->sk_receive_queue);
2147                 if (tail != last ||
2148                     (tail && tail->len != last_len) ||
2149                     sk->sk_err ||
2150                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2151                     signal_pending(current) ||
2152                     !timeo)
2153                         break;
2154
2155                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2156                 unix_state_unlock(sk);
2157                 if (freezable)
2158                         timeo = freezable_schedule_timeout(timeo);
2159                 else
2160                         timeo = schedule_timeout(timeo);
2161                 unix_state_lock(sk);
2162
2163                 if (sock_flag(sk, SOCK_DEAD))
2164                         break;
2165
2166                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2167         }
2168
2169         finish_wait(sk_sleep(sk), &wait);
2170         unix_state_unlock(sk);
2171         return timeo;
2172 }
2173
2174 static unsigned int unix_skb_len(const struct sk_buff *skb)
2175 {
2176         return skb->len - UNIXCB(skb).consumed;
2177 }
2178
2179 struct unix_stream_read_state {
2180         int (*recv_actor)(struct sk_buff *, int, int,
2181                           struct unix_stream_read_state *);
2182         struct socket *socket;
2183         struct msghdr *msg;
2184         struct pipe_inode_info *pipe;
2185         size_t size;
2186         int flags;
2187         unsigned int splice_flags;
2188 };
2189
2190 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2191                                     bool freezable)
2192 {
2193         struct scm_cookie scm;
2194         struct socket *sock = state->socket;
2195         struct sock *sk = sock->sk;
2196         struct unix_sock *u = unix_sk(sk);
2197         int copied = 0;
2198         int flags = state->flags;
2199         int noblock = flags & MSG_DONTWAIT;
2200         bool check_creds = false;
2201         int target;
2202         int err = 0;
2203         long timeo;
2204         int skip;
2205         size_t size = state->size;
2206         unsigned int last_len;
2207
2208         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2209                 err = -EINVAL;
2210                 goto out;
2211         }
2212
2213         if (unlikely(flags & MSG_OOB)) {
2214                 err = -EOPNOTSUPP;
2215                 goto out;
2216         }
2217
2218         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2219         timeo = sock_rcvtimeo(sk, noblock);
2220
2221         memset(&scm, 0, sizeof(scm));
2222
2223         /* Lock the socket to prevent queue disordering
2224          * while sleeps in memcpy_tomsg
2225          */
2226         mutex_lock(&u->iolock);
2227
2228         skip = max(sk_peek_offset(sk, flags), 0);
2229
2230         do {
2231                 int chunk;
2232                 bool drop_skb;
2233                 struct sk_buff *skb, *last;
2234
2235 redo:
2236                 unix_state_lock(sk);
2237                 if (sock_flag(sk, SOCK_DEAD)) {
2238                         err = -ECONNRESET;
2239                         goto unlock;
2240                 }
2241                 last = skb = skb_peek(&sk->sk_receive_queue);
2242                 last_len = last ? last->len : 0;
2243 again:
2244                 if (skb == NULL) {
2245                         if (copied >= target)
2246                                 goto unlock;
2247
2248                         /*
2249                          *      POSIX 1003.1g mandates this order.
2250                          */
2251
2252                         err = sock_error(sk);
2253                         if (err)
2254                                 goto unlock;
2255                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2256                                 goto unlock;
2257
2258                         unix_state_unlock(sk);
2259                         if (!timeo) {
2260                                 err = -EAGAIN;
2261                                 break;
2262                         }
2263
2264                         mutex_unlock(&u->iolock);
2265
2266                         timeo = unix_stream_data_wait(sk, timeo, last,
2267                                                       last_len, freezable);
2268
2269                         if (signal_pending(current)) {
2270                                 err = sock_intr_errno(timeo);
2271                                 scm_destroy(&scm);
2272                                 goto out;
2273                         }
2274
2275                         mutex_lock(&u->iolock);
2276                         goto redo;
2277 unlock:
2278                         unix_state_unlock(sk);
2279                         break;
2280                 }
2281
2282                 while (skip >= unix_skb_len(skb)) {
2283                         skip -= unix_skb_len(skb);
2284                         last = skb;
2285                         last_len = skb->len;
2286                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2287                         if (!skb)
2288                                 goto again;
2289                 }
2290
2291                 unix_state_unlock(sk);
2292
2293                 if (check_creds) {
2294                         /* Never glue messages from different writers */
2295                         if (!unix_skb_scm_eq(skb, &scm))
2296                                 break;
2297                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2298                         /* Copy credentials */
2299                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2300                         unix_set_secdata(&scm, skb);
2301                         check_creds = true;
2302                 }
2303
2304                 /* Copy address just once */
2305                 if (state->msg && state->msg->msg_name) {
2306                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2307                                          state->msg->msg_name);
2308                         unix_copy_addr(state->msg, skb->sk);
2309                         sunaddr = NULL;
2310                 }
2311
2312                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2313                 skb_get(skb);
2314                 chunk = state->recv_actor(skb, skip, chunk, state);
2315                 drop_skb = !unix_skb_len(skb);
2316                 /* skb is only safe to use if !drop_skb */
2317                 consume_skb(skb);
2318                 if (chunk < 0) {
2319                         if (copied == 0)
2320                                 copied = -EFAULT;
2321                         break;
2322                 }
2323                 copied += chunk;
2324                 size -= chunk;
2325
2326                 if (drop_skb) {
2327                         /* the skb was touched by a concurrent reader;
2328                          * we should not expect anything from this skb
2329                          * anymore and assume it invalid - we can be
2330                          * sure it was dropped from the socket queue
2331                          *
2332                          * let's report a short read
2333                          */
2334                         err = 0;
2335                         break;
2336                 }
2337
2338                 /* Mark read part of skb as used */
2339                 if (!(flags & MSG_PEEK)) {
2340                         UNIXCB(skb).consumed += chunk;
2341
2342                         sk_peek_offset_bwd(sk, chunk);
2343
2344                         if (UNIXCB(skb).fp)
2345                                 unix_detach_fds(&scm, skb);
2346
2347                         if (unix_skb_len(skb))
2348                                 break;
2349
2350                         skb_unlink(skb, &sk->sk_receive_queue);
2351                         consume_skb(skb);
2352
2353                         if (scm.fp)
2354                                 break;
2355                 } else {
2356                         /* It is questionable, see note in unix_dgram_recvmsg.
2357                          */
2358                         if (UNIXCB(skb).fp)
2359                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2360
2361                         sk_peek_offset_fwd(sk, chunk);
2362
2363                         if (UNIXCB(skb).fp)
2364                                 break;
2365
2366                         skip = 0;
2367                         last = skb;
2368                         last_len = skb->len;
2369                         unix_state_lock(sk);
2370                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2371                         if (skb)
2372                                 goto again;
2373                         unix_state_unlock(sk);
2374                         break;
2375                 }
2376         } while (size);
2377
2378         mutex_unlock(&u->iolock);
2379         if (state->msg)
2380                 scm_recv(sock, state->msg, &scm, flags);
2381         else
2382                 scm_destroy(&scm);
2383 out:
2384         return copied ? : err;
2385 }
2386
2387 static int unix_stream_read_actor(struct sk_buff *skb,
2388                                   int skip, int chunk,
2389                                   struct unix_stream_read_state *state)
2390 {
2391         int ret;
2392
2393         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2394                                     state->msg, chunk);
2395         return ret ?: chunk;
2396 }
2397
2398 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2399                                size_t size, int flags)
2400 {
2401         struct unix_stream_read_state state = {
2402                 .recv_actor = unix_stream_read_actor,
2403                 .socket = sock,
2404                 .msg = msg,
2405                 .size = size,
2406                 .flags = flags
2407         };
2408
2409         return unix_stream_read_generic(&state, true);
2410 }
2411
2412 static int unix_stream_splice_actor(struct sk_buff *skb,
2413                                     int skip, int chunk,
2414                                     struct unix_stream_read_state *state)
2415 {
2416         return skb_splice_bits(skb, state->socket->sk,
2417                                UNIXCB(skb).consumed + skip,
2418                                state->pipe, chunk, state->splice_flags);
2419 }
2420
2421 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2422                                        struct pipe_inode_info *pipe,
2423                                        size_t size, unsigned int flags)
2424 {
2425         struct unix_stream_read_state state = {
2426                 .recv_actor = unix_stream_splice_actor,
2427                 .socket = sock,
2428                 .pipe = pipe,
2429                 .size = size,
2430                 .splice_flags = flags,
2431         };
2432
2433         if (unlikely(*ppos))
2434                 return -ESPIPE;
2435
2436         if (sock->file->f_flags & O_NONBLOCK ||
2437             flags & SPLICE_F_NONBLOCK)
2438                 state.flags = MSG_DONTWAIT;
2439
2440         return unix_stream_read_generic(&state, false);
2441 }
2442
2443 static int unix_shutdown(struct socket *sock, int mode)
2444 {
2445         struct sock *sk = sock->sk;
2446         struct sock *other;
2447
2448         if (mode < SHUT_RD || mode > SHUT_RDWR)
2449                 return -EINVAL;
2450         /* This maps:
2451          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2452          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2453          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2454          */
2455         ++mode;
2456
2457         unix_state_lock(sk);
2458         sk->sk_shutdown |= mode;
2459         other = unix_peer(sk);
2460         if (other)
2461                 sock_hold(other);
2462         unix_state_unlock(sk);
2463         sk->sk_state_change(sk);
2464
2465         if (other &&
2466                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2467
2468                 int peer_mode = 0;
2469
2470                 if (mode&RCV_SHUTDOWN)
2471                         peer_mode |= SEND_SHUTDOWN;
2472                 if (mode&SEND_SHUTDOWN)
2473                         peer_mode |= RCV_SHUTDOWN;
2474                 unix_state_lock(other);
2475                 other->sk_shutdown |= peer_mode;
2476                 unix_state_unlock(other);
2477                 other->sk_state_change(other);
2478                 if (peer_mode == SHUTDOWN_MASK)
2479                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2480                 else if (peer_mode & RCV_SHUTDOWN)
2481                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2482         }
2483         if (other)
2484                 sock_put(other);
2485
2486         return 0;
2487 }
2488
2489 long unix_inq_len(struct sock *sk)
2490 {
2491         struct sk_buff *skb;
2492         long amount = 0;
2493
2494         if (sk->sk_state == TCP_LISTEN)
2495                 return -EINVAL;
2496
2497         spin_lock(&sk->sk_receive_queue.lock);
2498         if (sk->sk_type == SOCK_STREAM ||
2499             sk->sk_type == SOCK_SEQPACKET) {
2500                 skb_queue_walk(&sk->sk_receive_queue, skb)
2501                         amount += unix_skb_len(skb);
2502         } else {
2503                 skb = skb_peek(&sk->sk_receive_queue);
2504                 if (skb)
2505                         amount = skb->len;
2506         }
2507         spin_unlock(&sk->sk_receive_queue.lock);
2508
2509         return amount;
2510 }
2511 EXPORT_SYMBOL_GPL(unix_inq_len);
2512
2513 long unix_outq_len(struct sock *sk)
2514 {
2515         return sk_wmem_alloc_get(sk);
2516 }
2517 EXPORT_SYMBOL_GPL(unix_outq_len);
2518
2519 static int unix_open_file(struct sock *sk)
2520 {
2521         struct path path;
2522         struct file *f;
2523         int fd;
2524
2525         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2526                 return -EPERM;
2527
2528         if (!smp_load_acquire(&unix_sk(sk)->addr))
2529                 return -ENOENT;
2530
2531         path = unix_sk(sk)->path;
2532         if (!path.dentry)
2533                 return -ENOENT;
2534
2535         path_get(&path);
2536
2537         fd = get_unused_fd_flags(O_CLOEXEC);
2538         if (fd < 0)
2539                 goto out;
2540
2541         f = dentry_open(&path, O_PATH, current_cred());
2542         if (IS_ERR(f)) {
2543                 put_unused_fd(fd);
2544                 fd = PTR_ERR(f);
2545                 goto out;
2546         }
2547
2548         fd_install(fd, f);
2549 out:
2550         path_put(&path);
2551
2552         return fd;
2553 }
2554
2555 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2556 {
2557         struct sock *sk = sock->sk;
2558         long amount = 0;
2559         int err;
2560
2561         switch (cmd) {
2562         case SIOCOUTQ:
2563                 amount = unix_outq_len(sk);
2564                 err = put_user(amount, (int __user *)arg);
2565                 break;
2566         case SIOCINQ:
2567                 amount = unix_inq_len(sk);
2568                 if (amount < 0)
2569                         err = amount;
2570                 else
2571                         err = put_user(amount, (int __user *)arg);
2572                 break;
2573         case SIOCUNIXFILE:
2574                 err = unix_open_file(sk);
2575                 break;
2576         default:
2577                 err = -ENOIOCTLCMD;
2578                 break;
2579         }
2580         return err;
2581 }
2582
2583 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2584 {
2585         struct sock *sk = sock->sk;
2586         __poll_t mask;
2587
2588         sock_poll_wait(file, sock, wait);
2589         mask = 0;
2590
2591         /* exceptional events? */
2592         if (sk->sk_err)
2593                 mask |= EPOLLERR;
2594         if (sk->sk_shutdown == SHUTDOWN_MASK)
2595                 mask |= EPOLLHUP;
2596         if (sk->sk_shutdown & RCV_SHUTDOWN)
2597                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2598
2599         /* readable? */
2600         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2601                 mask |= EPOLLIN | EPOLLRDNORM;
2602
2603         /* Connection-based need to check for termination and startup */
2604         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2605             sk->sk_state == TCP_CLOSE)
2606                 mask |= EPOLLHUP;
2607
2608         /*
2609          * we set writable also when the other side has shut down the
2610          * connection. This prevents stuck sockets.
2611          */
2612         if (unix_writable(sk))
2613                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2614
2615         return mask;
2616 }
2617
2618 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2619                                     poll_table *wait)
2620 {
2621         struct sock *sk = sock->sk, *other;
2622         unsigned int writable;
2623         __poll_t mask;
2624
2625         sock_poll_wait(file, sock, wait);
2626         mask = 0;
2627
2628         /* exceptional events? */
2629         if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2630                 mask |= EPOLLERR |
2631                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2632
2633         if (sk->sk_shutdown & RCV_SHUTDOWN)
2634                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2635         if (sk->sk_shutdown == SHUTDOWN_MASK)
2636                 mask |= EPOLLHUP;
2637
2638         /* readable? */
2639         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2640                 mask |= EPOLLIN | EPOLLRDNORM;
2641
2642         /* Connection-based need to check for termination and startup */
2643         if (sk->sk_type == SOCK_SEQPACKET) {
2644                 if (sk->sk_state == TCP_CLOSE)
2645                         mask |= EPOLLHUP;
2646                 /* connection hasn't started yet? */
2647                 if (sk->sk_state == TCP_SYN_SENT)
2648                         return mask;
2649         }
2650
2651         /* No write status requested, avoid expensive OUT tests. */
2652         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2653                 return mask;
2654
2655         writable = unix_writable(sk);
2656         if (writable) {
2657                 unix_state_lock(sk);
2658
2659                 other = unix_peer(sk);
2660                 if (other && unix_peer(other) != sk &&
2661                     unix_recvq_full(other) &&
2662                     unix_dgram_peer_wake_me(sk, other))
2663                         writable = 0;
2664
2665                 unix_state_unlock(sk);
2666         }
2667
2668         if (writable)
2669                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2670         else
2671                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2672
2673         return mask;
2674 }
2675
2676 #ifdef CONFIG_PROC_FS
2677
2678 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2679
2680 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2681 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2682 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2683
2684 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2685 {
2686         unsigned long offset = get_offset(*pos);
2687         unsigned long bucket = get_bucket(*pos);
2688         struct sock *sk;
2689         unsigned long count = 0;
2690
2691         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2692                 if (sock_net(sk) != seq_file_net(seq))
2693                         continue;
2694                 if (++count == offset)
2695                         break;
2696         }
2697
2698         return sk;
2699 }
2700
2701 static struct sock *unix_next_socket(struct seq_file *seq,
2702                                      struct sock *sk,
2703                                      loff_t *pos)
2704 {
2705         unsigned long bucket;
2706
2707         while (sk > (struct sock *)SEQ_START_TOKEN) {
2708                 sk = sk_next(sk);
2709                 if (!sk)
2710                         goto next_bucket;
2711                 if (sock_net(sk) == seq_file_net(seq))
2712                         return sk;
2713         }
2714
2715         do {
2716                 sk = unix_from_bucket(seq, pos);
2717                 if (sk)
2718                         return sk;
2719
2720 next_bucket:
2721                 bucket = get_bucket(*pos) + 1;
2722                 *pos = set_bucket_offset(bucket, 1);
2723         } while (bucket < ARRAY_SIZE(unix_socket_table));
2724
2725         return NULL;
2726 }
2727
2728 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2729         __acquires(unix_table_lock)
2730 {
2731         spin_lock(&unix_table_lock);
2732
2733         if (!*pos)
2734                 return SEQ_START_TOKEN;
2735
2736         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2737                 return NULL;
2738
2739         return unix_next_socket(seq, NULL, pos);
2740 }
2741
2742 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2743 {
2744         ++*pos;
2745         return unix_next_socket(seq, v, pos);
2746 }
2747
2748 static void unix_seq_stop(struct seq_file *seq, void *v)
2749         __releases(unix_table_lock)
2750 {
2751         spin_unlock(&unix_table_lock);
2752 }
2753
2754 static int unix_seq_show(struct seq_file *seq, void *v)
2755 {
2756
2757         if (v == SEQ_START_TOKEN)
2758                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2759                          "Inode Path\n");
2760         else {
2761                 struct sock *s = v;
2762                 struct unix_sock *u = unix_sk(s);
2763                 unix_state_lock(s);
2764
2765                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2766                         s,
2767                         refcount_read(&s->sk_refcnt),
2768                         0,
2769                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2770                         s->sk_type,
2771                         s->sk_socket ?
2772                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2773                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2774                         sock_i_ino(s));
2775
2776                 if (u->addr) {  // under unix_table_lock here
2777                         int i, len;
2778                         seq_putc(seq, ' ');
2779
2780                         i = 0;
2781                         len = u->addr->len - sizeof(short);
2782                         if (!UNIX_ABSTRACT(s))
2783                                 len--;
2784                         else {
2785                                 seq_putc(seq, '@');
2786                                 i++;
2787                         }
2788                         for ( ; i < len; i++)
2789                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2790                                          '@');
2791                 }
2792                 unix_state_unlock(s);
2793                 seq_putc(seq, '\n');
2794         }
2795
2796         return 0;
2797 }
2798
2799 static const struct seq_operations unix_seq_ops = {
2800         .start  = unix_seq_start,
2801         .next   = unix_seq_next,
2802         .stop   = unix_seq_stop,
2803         .show   = unix_seq_show,
2804 };
2805 #endif
2806
2807 static const struct net_proto_family unix_family_ops = {
2808         .family = PF_UNIX,
2809         .create = unix_create,
2810         .owner  = THIS_MODULE,
2811 };
2812
2813
2814 static int __net_init unix_net_init(struct net *net)
2815 {
2816         int error = -ENOMEM;
2817
2818         net->unx.sysctl_max_dgram_qlen = 10;
2819         if (unix_sysctl_register(net))
2820                 goto out;
2821
2822 #ifdef CONFIG_PROC_FS
2823         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2824                         sizeof(struct seq_net_private))) {
2825                 unix_sysctl_unregister(net);
2826                 goto out;
2827         }
2828 #endif
2829         error = 0;
2830 out:
2831         return error;
2832 }
2833
2834 static void __net_exit unix_net_exit(struct net *net)
2835 {
2836         unix_sysctl_unregister(net);
2837         remove_proc_entry("unix", net->proc_net);
2838 }
2839
2840 static struct pernet_operations unix_net_ops = {
2841         .init = unix_net_init,
2842         .exit = unix_net_exit,
2843 };
2844
2845 static int __init af_unix_init(void)
2846 {
2847         int rc = -1;
2848
2849         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2850
2851         rc = proto_register(&unix_proto, 1);
2852         if (rc != 0) {
2853                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2854                 goto out;
2855         }
2856
2857         sock_register(&unix_family_ops);
2858         register_pernet_subsys(&unix_net_ops);
2859 out:
2860         return rc;
2861 }
2862
2863 static void __exit af_unix_exit(void)
2864 {
2865         sock_unregister(PF_UNIX);
2866         proto_unregister(&unix_proto);
2867         unregister_pernet_subsys(&unix_net_ops);
2868 }
2869
2870 /* Earlier than device_initcall() so that other drivers invoking
2871    request_module() don't end up in a loop when modprobe tries
2872    to use a UNIX socket. But later than subsys_initcall() because
2873    we depend on stuff initialised there */
2874 fs_initcall(af_unix_init);
2875 module_exit(af_unix_exit);
2876
2877 MODULE_LICENSE("GPL");
2878 MODULE_ALIAS_NETPROTO(PF_UNIX);