net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <[email protected]>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Fixes:
  12  *              Linus Torvalds  :       Assorted bug cures.
  13  *              Niibe Yutaka    :       async I/O support.
  14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15  *              Alan Cox        :       Limit size of allocated blocks.
  16  *              Alan Cox        :       Fixed the stupid socketpair bug.
  17  *              Alan Cox        :       BSD compatibility fine tuning.
  18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19  *              Alan Cox        :       Sorted out a proper draft version of
  20  *                                      file descriptor passing hacked up from
  21  *                                      Mike Shaver's work.
  22  *              Marty Leisner   :       Fixes to fd passing
  23  *              Nick Nevin      :       recvmsg bugfix.
  24  *              Alan Cox        :       Started proper garbage collector
  25  *              Heiko EiBfeldt  :       Missing verify_area check
  26  *              Alan Cox        :       Started POSIXisms
  27  *              Andreas Schwab  :       Replace inode by dentry for proper
  28  *                                      reference counting
  29  *              Kirk Petersen   :       Made this a module
  30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31  *                                      Lots of bug fixes.
  32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33  *                                      by above two patches.
  34  *           Andrea Arcangeli   :       If possible we block in connect(2)
  35  *                                      if the max backlog of the listen socket
  36  *                                      is been reached. This won't break
  37  *                                      old apps and it will avoid huge amount
  38  *                                      of socks hashed (this for unix_gc()
  39  *                                      performances reasons).
  40  *                                      Security fix that limits the max
  41  *                                      number of socks to 2*max_files and
  42  *                                      the number of skb queueable in the
  43  *                                      dgram receiver.
  44  *              Artur Skawina   :       Hash function optimizations
  45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46  *            Malcolm Beattie   :       Set peercred for socketpair
  47  *           Michal Ostrowski   :       Module initialization cleanup.
  48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49  *                                      the core infrastructure is doing that
  50  *                                      for all net proto families now (2.5.69+)
  51  *
  52  *
  53  * Known differences from reference BSD that was tested:
  54  *
  55  *      [TO FIX]
  56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57  *              other the moment one end closes.
  58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60  *      [NOT TO FIX]
  61  *      accept() returns a path name even if the connecting socket has closed
  62  *              in the meantime (BSD loses the path and gives up).
  63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66  *      BSD af_unix apparently has connect forgetting to block properly.
  67  *              (need to check this with the POSIX spec in detail)
  68  *
  69  * Differences from 2.0.0-11-... (ANK)
  70  *      Bug fixes and improvements.
  71  *              - client shutdown killed server socket.
  72  *              - removed all useless cli/sti pairs.
  73  *
  74  *      Semantic changes/extensions.
  75  *              - generic control message passing.
  76  *              - SCM_CREDENTIALS control message.
  77  *              - "Abstract" (not FS based) socket bindings.
  78  *                Abstract names are sequences of bytes (not zero terminated)
  79  *                started by 0, so that this name space does not intersect
  80  *                with BSD names.
  81  */
  82
  83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  84
  85 #include <linux/module.h>
  86 #include <linux/kernel.h>
  87 #include <linux/signal.h>
  88 #include <linux/sched/signal.h>
  89 #include <linux/errno.h>
  90 #include <linux/string.h>
  91 #include <linux/stat.h>
  92 #include <linux/dcache.h>
  93 #include <linux/namei.h>
  94 #include <linux/socket.h>
  95 #include <linux/un.h>
  96 #include <linux/fcntl.h>
  97 #include <linux/termios.h>
  98 #include <linux/sockios.h>
  99 #include <linux/net.h>
 100 #include <linux/in.h>
 101 #include <linux/fs.h>
 102 #include <linux/slab.h>
 103 #include <linux/uaccess.h>
 104 #include <linux/skbuff.h>
 105 #include <linux/netdevice.h>
 106 #include <net/net_namespace.h>
 107 #include <net/sock.h>
 108 #include <net/tcp_states.h>
 109 #include <net/af_unix.h>
 110 #include <linux/proc_fs.h>
 111 #include <linux/seq_file.h>
 112 #include <net/scm.h>
 113 #include <linux/init.h>
 114 #include <linux/poll.h>
 115 #include <linux/rtnetlink.h>
 116 #include <linux/mount.h>
 117 #include <net/checksum.h>
 118 #include <linux/security.h>
 119 #include <linux/freezer.h>
 120 #include <linux/file.h>
 121
 122 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 123 EXPORT_SYMBOL_GPL(unix_socket_table);
 124 DEFINE_SPINLOCK(unix_table_lock);
 125 EXPORT_SYMBOL_GPL(unix_table_lock);
 126 static atomic_long_t unix_nr_socks;
 127
 128
 129 static struct hlist_head *unix_sockets_unbound(void *addr)
 130 {
 131         unsigned long hash = (unsigned long)addr;
 132
 133         hash ^= hash >> 16;
 134         hash ^= hash >> 8;
 135         hash %= UNIX_HASH_SIZE;
 136         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 137 }
 138
 139 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 140
 141 #ifdef CONFIG_SECURITY_NETWORK
 142 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 143 {
 144         UNIXCB(skb).secid = scm->secid;
 145 }
 146
 147 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 148 {
 149         scm->secid = UNIXCB(skb).secid;
 150 }
 151
 152 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 153 {
 154         return (scm->secid == UNIXCB(skb).secid);
 155 }
 156 #else
 157 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158 { }
 159
 160 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 161 { }
 162
 163 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 164 {
 165         return true;
 166 }
 167 #endif /* CONFIG_SECURITY_NETWORK */
 168
 169 /*
 170  *  SMP locking strategy:
 171  *    hash table is protected with spinlock unix_table_lock
 172  *    each socket state is protected by separate spin lock.
 173  */
 174
 175 static inline unsigned int unix_hash_fold(__wsum n)
 176 {
 177         unsigned int hash = (__force unsigned int)csum_fold(n);
 178
 179         hash ^= hash>>8;
 180         return hash&(UNIX_HASH_SIZE-1);
 181 }
 182
 183 #define unix_peer(sk) (unix_sk(sk)->peer)
 184
 185 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 186 {
 187         return unix_peer(osk) == sk;
 188 }
 189
 190 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 191 {
 192         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 193 }
 194
 195 static inline int unix_recvq_full(struct sock const *sk)
 196 {
 197         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 198 }
 199
 200 struct sock *unix_peer_get(struct sock *s)
 201 {
 202         struct sock *peer;
 203
 204         unix_state_lock(s);
 205         peer = unix_peer(s);
 206         if (peer)
 207                 sock_hold(peer);
 208         unix_state_unlock(s);
 209         return peer;
 210 }
 211 EXPORT_SYMBOL_GPL(unix_peer_get);
 212
 213 static inline void unix_release_addr(struct unix_address *addr)
 214 {
 215         if (refcount_dec_and_test(&addr->refcnt))
 216                 kfree(addr);
 217 }
 218
 219 /*
 220  *      Check unix socket name:
 221  *              - should be not zero length.
 222  *              - if started by not zero, should be NULL terminated (FS object)
 223  *              - if started by zero, it is abstract name.
 224  */
 225
 226 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 227 {
 228         *hashp = 0;
 229
 230         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 231                 return -EINVAL;
 232         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 233                 return -EINVAL;
 234         if (sunaddr->sun_path[0]) {
 235                 /*
 236                  * This may look like an off by one error but it is a bit more
 237                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 238                  * sun_path[108] doesn't as such exist.  However in kernel space
 239                  * we are guaranteed that it is a valid memory location in our
 240                  * kernel address buffer.
 241                  */
 242                 ((char *)sunaddr)[len] = 0;
 243                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 244                 return len;
 245         }
 246
 247         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 248         return len;
 249 }
 250
 251 static void __unix_remove_socket(struct sock *sk)
 252 {
 253         sk_del_node_init(sk);
 254 }
 255
 256 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 257 {
 258         WARN_ON(!sk_unhashed(sk));
 259         sk_add_node(sk, list);
 260 }
 261
 262 static inline void unix_remove_socket(struct sock *sk)
 263 {
 264         spin_lock(&unix_table_lock);
 265         __unix_remove_socket(sk);
 266         spin_unlock(&unix_table_lock);
 267 }
 268
 269 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 270 {
 271         spin_lock(&unix_table_lock);
 272         __unix_insert_socket(list, sk);
 273         spin_unlock(&unix_table_lock);
 274 }
 275
 276 static struct sock *__unix_find_socket_byname(struct net *net,
 277                                               struct sockaddr_un *sunname,
 278                                               int len, int type, unsigned int hash)
 279 {
 280         struct sock *s;
 281
 282         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 283                 struct unix_sock *u = unix_sk(s);
 284
 285                 if (!net_eq(sock_net(s), net))
 286                         continue;
 287
 288                 if (u->addr->len == len &&
 289                     !memcmp(u->addr->name, sunname, len))
 290                         goto found;
 291         }
 292         s = NULL;
 293 found:
 294         return s;
 295 }
 296
 297 static inline struct sock *unix_find_socket_byname(struct net *net,
 298                                                    struct sockaddr_un *sunname,
 299                                                    int len, int type,
 300                                                    unsigned int hash)
 301 {
 302         struct sock *s;
 303
 304         spin_lock(&unix_table_lock);
 305         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 306         if (s)
 307                 sock_hold(s);
 308         spin_unlock(&unix_table_lock);
 309         return s;
 310 }
 311
 312 static struct sock *unix_find_socket_byinode(struct inode *i)
 313 {
 314         struct sock *s;
 315
 316         spin_lock(&unix_table_lock);
 317         sk_for_each(s,
 318                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 319                 struct dentry *dentry = unix_sk(s)->path.dentry;
 320
 321                 if (dentry && d_backing_inode(dentry) == i) {
 322                         sock_hold(s);
 323                         goto found;
 324                 }
 325         }
 326         s = NULL;
 327 found:
 328         spin_unlock(&unix_table_lock);
 329         return s;
 330 }
 331
 332 /* Support code for asymmetrically connected dgram sockets
 333  *
 334  * If a datagram socket is connected to a socket not itself connected
 335  * to the first socket (eg, /dev/log), clients may only enqueue more
 336  * messages if the present receive queue of the server socket is not
 337  * "too large". This means there's a second writeability condition
 338  * poll and sendmsg need to test. The dgram recv code will do a wake
 339  * up on the peer_wait wait queue of a socket upon reception of a
 340  * datagram which needs to be propagated to sleeping would-be writers
 341  * since these might not have sent anything so far. This can't be
 342  * accomplished via poll_wait because the lifetime of the server
 343  * socket might be less than that of its clients if these break their
 344  * association with it or if the server socket is closed while clients
 345  * are still connected to it and there's no way to inform "a polling
 346  * implementation" that it should let go of a certain wait queue
 347  *
 348  * In order to propagate a wake up, a wait_queue_entry_t of the client
 349  * socket is enqueued on the peer_wait queue of the server socket
 350  * whose wake function does a wake_up on the ordinary client socket
 351  * wait queue. This connection is established whenever a write (or
 352  * poll for write) hit the flow control condition and broken when the
 353  * association to the server socket is dissolved or after a wake up
 354  * was relayed.
 355  */
 356
 357 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 358                                       void *key)
 359 {
 360         struct unix_sock *u;
 361         wait_queue_head_t *u_sleep;
 362
 363         u = container_of(q, struct unix_sock, peer_wake);
 364
 365         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 366                             q);
 367         u->peer_wake.private = NULL;
 368
 369         /* relaying can only happen while the wq still exists */
 370         u_sleep = sk_sleep(&u->sk);
 371         if (u_sleep)
 372                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 373
 374         return 0;
 375 }
 376
 377 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 378 {
 379         struct unix_sock *u, *u_other;
 380         int rc;
 381
 382         u = unix_sk(sk);
 383         u_other = unix_sk(other);
 384         rc = 0;
 385         spin_lock(&u_other->peer_wait.lock);
 386
 387         if (!u->peer_wake.private) {
 388                 u->peer_wake.private = other;
 389                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 390
 391                 rc = 1;
 392         }
 393
 394         spin_unlock(&u_other->peer_wait.lock);
 395         return rc;
 396 }
 397
 398 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 399                                             struct sock *other)
 400 {
 401         struct unix_sock *u, *u_other;
 402
 403         u = unix_sk(sk);
 404         u_other = unix_sk(other);
 405         spin_lock(&u_other->peer_wait.lock);
 406
 407         if (u->peer_wake.private == other) {
 408                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 409                 u->peer_wake.private = NULL;
 410         }
 411
 412         spin_unlock(&u_other->peer_wait.lock);
 413 }
 414
 415 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 416                                                    struct sock *other)
 417 {
 418         unix_dgram_peer_wake_disconnect(sk, other);
 419         wake_up_interruptible_poll(sk_sleep(sk),
 420                                    EPOLLOUT |
 421                                    EPOLLWRNORM |
 422                                    EPOLLWRBAND);
 423 }
 424
 425 /* preconditions:
 426  *      - unix_peer(sk) == other
 427  *      - association is stable
 428  */
 429 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 430 {
 431         int connected;
 432
 433         connected = unix_dgram_peer_wake_connect(sk, other);
 434
 435         /* If other is SOCK_DEAD, we want to make sure we signal
 436          * POLLOUT, such that a subsequent write() can get a
 437          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
 438          * to other and its full, we will hang waiting for POLLOUT.
 439          */
 440         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
 441                 return 1;
 442
 443         if (connected)
 444                 unix_dgram_peer_wake_disconnect(sk, other);
 445
 446         return 0;
 447 }
 448
 449 static int unix_writable(const struct sock *sk)
 450 {
 451         return sk->sk_state != TCP_LISTEN &&
 452                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 453 }
 454
 455 static void unix_write_space(struct sock *sk)
 456 {
 457         struct socket_wq *wq;
 458
 459         rcu_read_lock();
 460         if (unix_writable(sk)) {
 461                 wq = rcu_dereference(sk->sk_wq);
 462                 if (skwq_has_sleeper(wq))
 463                         wake_up_interruptible_sync_poll(&wq->wait,
 464                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 465                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 466         }
 467         rcu_read_unlock();
 468 }
 469
 470 /* When dgram socket disconnects (or changes its peer), we clear its receive
 471  * queue of packets arrived from previous peer. First, it allows to do
 472  * flow control based only on wmem_alloc; second, sk connected to peer
 473  * may receive messages only from that peer. */
 474 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 475 {
 476         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 477                 skb_queue_purge(&sk->sk_receive_queue);
 478                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 479
 480                 /* If one link of bidirectional dgram pipe is disconnected,
 481                  * we signal error. Messages are lost. Do not make this,
 482                  * when peer was not connected to us.
 483                  */
 484                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 485                         other->sk_err = ECONNRESET;
 486                         other->sk_error_report(other);
 487                 }
 488         }
 489 }
 490
 491 static void unix_sock_destructor(struct sock *sk)
 492 {
 493         struct unix_sock *u = unix_sk(sk);
 494
 495         skb_queue_purge(&sk->sk_receive_queue);
 496
 497         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 498         WARN_ON(!sk_unhashed(sk));
 499         WARN_ON(sk->sk_socket);
 500         if (!sock_flag(sk, SOCK_DEAD)) {
 501                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 502                 return;
 503         }
 504
 505         if (u->addr)
 506                 unix_release_addr(u->addr);
 507
 508         atomic_long_dec(&unix_nr_socks);
 509         local_bh_disable();
 510         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 511         local_bh_enable();
 512 #ifdef UNIX_REFCNT_DEBUG
 513         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 514                 atomic_long_read(&unix_nr_socks));
 515 #endif
 516 }
 517
 518 static void unix_release_sock(struct sock *sk, int embrion)
 519 {
 520         struct unix_sock *u = unix_sk(sk);
 521         struct path path;
 522         struct sock *skpair;
 523         struct sk_buff *skb;
 524         int state;
 525
 526         unix_remove_socket(sk);
 527
 528         /* Clear state */
 529         unix_state_lock(sk);
 530         sock_orphan(sk);
 531         sk->sk_shutdown = SHUTDOWN_MASK;
 532         path         = u->path;
 533         u->path.dentry = NULL;
 534         u->path.mnt = NULL;
 535         state = sk->sk_state;
 536         sk->sk_state = TCP_CLOSE;
 537         unix_state_unlock(sk);
 538
 539         wake_up_interruptible_all(&u->peer_wait);
 540
 541         skpair = unix_peer(sk);
 542
 543         if (skpair != NULL) {
 544                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 545                         unix_state_lock(skpair);
 546                         /* No more writes */
 547                         skpair->sk_shutdown = SHUTDOWN_MASK;
 548                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 549                                 skpair->sk_err = ECONNRESET;
 550                         unix_state_unlock(skpair);
 551                         skpair->sk_state_change(skpair);
 552                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 553                 }
 554
 555                 unix_dgram_peer_wake_disconnect(sk, skpair);
 556                 sock_put(skpair); /* It may now die */
 557                 unix_peer(sk) = NULL;
 558         }
 559
 560         /* Try to flush out this socket. Throw out buffers at least */
 561
 562         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 563                 if (state == TCP_LISTEN)
 564                         unix_release_sock(skb->sk, 1);
 565                 /* passed fds are erased in the kfree_skb hook        */
 566                 UNIXCB(skb).consumed = skb->len;
 567                 kfree_skb(skb);
 568         }
 569
 570         if (path.dentry)
 571                 path_put(&path);
 572
 573         sock_put(sk);
 574
 575         /* ---- Socket is dead now and most probably destroyed ---- */
 576
 577         /*
 578          * Fixme: BSD difference: In BSD all sockets connected to us get
 579          *        ECONNRESET and we die on the spot. In Linux we behave
 580          *        like files and pipes do and wait for the last
 581          *        dereference.
 582          *
 583          * Can't we simply set sock->err?
 584          *
 585          *        What the above comment does talk about? --ANK(980817)
 586          */
 587
 588         if (unix_tot_inflight)
 589                 unix_gc();              /* Garbage collect fds */
 590 }
 591
 592 static void init_peercred(struct sock *sk)
 593 {
 594         put_pid(sk->sk_peer_pid);
 595         if (sk->sk_peer_cred)
 596                 put_cred(sk->sk_peer_cred);
 597         sk->sk_peer_pid  = get_pid(task_tgid(current));
 598         sk->sk_peer_cred = get_current_cred();
 599 }
 600
 601 static void copy_peercred(struct sock *sk, struct sock *peersk)
 602 {
 603         put_pid(sk->sk_peer_pid);
 604         if (sk->sk_peer_cred)
 605                 put_cred(sk->sk_peer_cred);
 606         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 607         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 608 }
 609
 610 static int unix_listen(struct socket *sock, int backlog)
 611 {
 612         int err;
 613         struct sock *sk = sock->sk;
 614         struct unix_sock *u = unix_sk(sk);
 615         struct pid *old_pid = NULL;
 616
 617         err = -EOPNOTSUPP;
 618         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 619                 goto out;       /* Only stream/seqpacket sockets accept */
 620         err = -EINVAL;
 621         if (!u->addr)
 622                 goto out;       /* No listens on an unbound socket */
 623         unix_state_lock(sk);
 624         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 625                 goto out_unlock;
 626         if (backlog > sk->sk_max_ack_backlog)
 627                 wake_up_interruptible_all(&u->peer_wait);
 628         sk->sk_max_ack_backlog  = backlog;
 629         sk->sk_state            = TCP_LISTEN;
 630         /* set credentials so connect can copy them */
 631         init_peercred(sk);
 632         err = 0;
 633
 634 out_unlock:
 635         unix_state_unlock(sk);
 636         put_pid(old_pid);
 637 out:
 638         return err;
 639 }
 640
 641 static int unix_release(struct socket *);
 642 static int unix_bind(struct socket *, struct sockaddr *, int);
 643 static int unix_stream_connect(struct socket *, struct sockaddr *,
 644                                int addr_len, int flags);
 645 static int unix_socketpair(struct socket *, struct socket *);
 646 static int unix_accept(struct socket *, struct socket *, int, bool);
 647 static int unix_getname(struct socket *, struct sockaddr *, int);
 648 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 649 static __poll_t unix_dgram_poll(struct file *, struct socket *,
 650                                     poll_table *);
 651 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 652 static int unix_shutdown(struct socket *, int);
 653 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 654 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 655 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 656                                     size_t size, int flags);
 657 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 658                                        struct pipe_inode_info *, size_t size,
 659                                        unsigned int flags);
 660 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 661 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 662 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 663                               int, int);
 664 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 665 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 666                                   int);
 667
 668 static int unix_set_peek_off(struct sock *sk, int val)
 669 {
 670         struct unix_sock *u = unix_sk(sk);
 671
 672         if (mutex_lock_interruptible(&u->iolock))
 673                 return -EINTR;
 674
 675         sk->sk_peek_off = val;
 676         mutex_unlock(&u->iolock);
 677
 678         return 0;
 679 }
 680
 681
 682 static const struct proto_ops unix_stream_ops = {
 683         .family =       PF_UNIX,
 684         .owner =        THIS_MODULE,
 685         .release =      unix_release,
 686         .bind =         unix_bind,
 687         .connect =      unix_stream_connect,
 688         .socketpair =   unix_socketpair,
 689         .accept =       unix_accept,
 690         .getname =      unix_getname,
 691         .poll =         unix_poll,
 692         .ioctl =        unix_ioctl,
 693         .listen =       unix_listen,
 694         .shutdown =     unix_shutdown,
 695         .setsockopt =   sock_no_setsockopt,
 696         .getsockopt =   sock_no_getsockopt,
 697         .sendmsg =      unix_stream_sendmsg,
 698         .recvmsg =      unix_stream_recvmsg,
 699         .mmap =         sock_no_mmap,
 700         .sendpage =     unix_stream_sendpage,
 701         .splice_read =  unix_stream_splice_read,
 702         .set_peek_off = unix_set_peek_off,
 703 };
 704
 705 static const struct proto_ops unix_dgram_ops = {
 706         .family =       PF_UNIX,
 707         .owner =        THIS_MODULE,
 708         .release =      unix_release,
 709         .bind =         unix_bind,
 710         .connect =      unix_dgram_connect,
 711         .socketpair =   unix_socketpair,
 712         .accept =       sock_no_accept,
 713         .getname =      unix_getname,
 714         .poll =         unix_dgram_poll,
 715         .ioctl =        unix_ioctl,
 716         .listen =       sock_no_listen,
 717         .shutdown =     unix_shutdown,
 718         .setsockopt =   sock_no_setsockopt,
 719         .getsockopt =   sock_no_getsockopt,
 720         .sendmsg =      unix_dgram_sendmsg,
 721         .recvmsg =      unix_dgram_recvmsg,
 722         .mmap =         sock_no_mmap,
 723         .sendpage =     sock_no_sendpage,
 724         .set_peek_off = unix_set_peek_off,
 725 };
 726
 727 static const struct proto_ops unix_seqpacket_ops = {
 728         .family =       PF_UNIX,
 729         .owner =        THIS_MODULE,
 730         .release =      unix_release,
 731         .bind =         unix_bind,
 732         .connect =      unix_stream_connect,
 733         .socketpair =   unix_socketpair,
 734         .accept =       unix_accept,
 735         .getname =      unix_getname,
 736         .poll =         unix_dgram_poll,
 737         .ioctl =        unix_ioctl,
 738         .listen =       unix_listen,
 739         .shutdown =     unix_shutdown,
 740         .setsockopt =   sock_no_setsockopt,
 741         .getsockopt =   sock_no_getsockopt,
 742         .sendmsg =      unix_seqpacket_sendmsg,
 743         .recvmsg =      unix_seqpacket_recvmsg,
 744         .mmap =         sock_no_mmap,
 745         .sendpage =     sock_no_sendpage,
 746         .set_peek_off = unix_set_peek_off,
 747 };
 748
 749 static struct proto unix_proto = {
 750         .name                   = "UNIX",
 751         .owner                  = THIS_MODULE,
 752         .obj_size               = sizeof(struct unix_sock),
 753 };
 754
 755 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 756 {
 757         struct sock *sk = NULL;
 758         struct unix_sock *u;
 759
 760         atomic_long_inc(&unix_nr_socks);
 761         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 762                 goto out;
 763
 764         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 765         if (!sk)
 766                 goto out;
 767
 768         sock_init_data(sock, sk);
 769
 770         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 771         sk->sk_write_space      = unix_write_space;
 772         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 773         sk->sk_destruct         = unix_sock_destructor;
 774         u         = unix_sk(sk);
 775         u->path.dentry = NULL;
 776         u->path.mnt = NULL;
 777         spin_lock_init(&u->lock);
 778         atomic_long_set(&u->inflight, 0);
 779         INIT_LIST_HEAD(&u->link);
 780         mutex_init(&u->iolock); /* single task reading lock */
 781         mutex_init(&u->bindlock); /* single task binding lock */
 782         init_waitqueue_head(&u->peer_wait);
 783         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 784         unix_insert_socket(unix_sockets_unbound(sk), sk);
 785 out:
 786         if (sk == NULL)
 787                 atomic_long_dec(&unix_nr_socks);
 788         else {
 789                 local_bh_disable();
 790                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 791                 local_bh_enable();
 792         }
 793         return sk;
 794 }
 795
 796 static int unix_create(struct net *net, struct socket *sock, int protocol,
 797                        int kern)
 798 {
 799         if (protocol && protocol != PF_UNIX)
 800                 return -EPROTONOSUPPORT;
 801
 802         sock->state = SS_UNCONNECTED;
 803
 804         switch (sock->type) {
 805         case SOCK_STREAM:
 806                 sock->ops = &unix_stream_ops;
 807                 break;
 808                 /*
 809                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 810                  *      nothing uses it.
 811                  */
 812         case SOCK_RAW:
 813                 sock->type = SOCK_DGRAM;
 814                 /* fall through */
 815         case SOCK_DGRAM:
 816                 sock->ops = &unix_dgram_ops;
 817                 break;
 818         case SOCK_SEQPACKET:
 819                 sock->ops = &unix_seqpacket_ops;
 820                 break;
 821         default:
 822                 return -ESOCKTNOSUPPORT;
 823         }
 824
 825         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 826 }
 827
 828 static int unix_release(struct socket *sock)
 829 {
 830         struct sock *sk = sock->sk;
 831
 832         if (!sk)
 833                 return 0;
 834
 835         unix_release_sock(sk, 0);
 836         sock->sk = NULL;
 837
 838         return 0;
 839 }
 840
 841 static int unix_autobind(struct socket *sock)
 842 {
 843         struct sock *sk = sock->sk;
 844         struct net *net = sock_net(sk);
 845         struct unix_sock *u = unix_sk(sk);
 846         static u32 ordernum = 1;
 847         struct unix_address *addr;
 848         int err;
 849         unsigned int retries = 0;
 850
 851         err = mutex_lock_interruptible(&u->bindlock);
 852         if (err)
 853                 return err;
 854
 855         err = 0;
 856         if (u->addr)
 857                 goto out;
 858
 859         err = -ENOMEM;
 860         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 861         if (!addr)
 862                 goto out;
 863
 864         addr->name->sun_family = AF_UNIX;
 865         refcount_set(&addr->refcnt, 1);
 866
 867 retry:
 868         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 869         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 870
 871         spin_lock(&unix_table_lock);
 872         ordernum = (ordernum+1)&0xFFFFF;
 873
 874         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 875                                       addr->hash)) {
 876                 spin_unlock(&unix_table_lock);
 877                 /*
 878                  * __unix_find_socket_byname() may take long time if many names
 879                  * are already in use.
 880                  */
 881                 cond_resched();
 882                 /* Give up if all names seems to be in use. */
 883                 if (retries++ == 0xFFFFF) {
 884                         err = -ENOSPC;
 885                         kfree(addr);
 886                         goto out;
 887                 }
 888                 goto retry;
 889         }
 890         addr->hash ^= sk->sk_type;
 891
 892         __unix_remove_socket(sk);
 893         u->addr = addr;
 894         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 895         spin_unlock(&unix_table_lock);
 896         err = 0;
 897
 898 out:    mutex_unlock(&u->bindlock);
 899         return err;
 900 }
 901
 902 static struct sock *unix_find_other(struct net *net,
 903                                     struct sockaddr_un *sunname, int len,
 904                                     int type, unsigned int hash, int *error)
 905 {
 906         struct sock *u;
 907         struct path path;
 908         int err = 0;
 909
 910         if (sunname->sun_path[0]) {
 911                 struct inode *inode;
 912                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 913                 if (err)
 914                         goto fail;
 915                 inode = d_backing_inode(path.dentry);
 916                 err = inode_permission(inode, MAY_WRITE);
 917                 if (err)
 918                         goto put_fail;
 919
 920                 err = -ECONNREFUSED;
 921                 if (!S_ISSOCK(inode->i_mode))
 922                         goto put_fail;
 923                 u = unix_find_socket_byinode(inode);
 924                 if (!u)
 925                         goto put_fail;
 926
 927                 if (u->sk_type == type)
 928                         touch_atime(&path);
 929
 930                 path_put(&path);
 931
 932                 err = -EPROTOTYPE;
 933                 if (u->sk_type != type) {
 934                         sock_put(u);
 935                         goto fail;
 936                 }
 937         } else {
 938                 err = -ECONNREFUSED;
 939                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 940                 if (u) {
 941                         struct dentry *dentry;
 942                         dentry = unix_sk(u)->path.dentry;
 943                         if (dentry)
 944                                 touch_atime(&unix_sk(u)->path);
 945                 } else
 946                         goto fail;
 947         }
 948         return u;
 949
 950 put_fail:
 951         path_put(&path);
 952 fail:
 953         *error = err;
 954         return NULL;
 955 }
 956
 957 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 958 {
 959         struct dentry *dentry;
 960         struct path path;
 961         int err = 0;
 962         /*
 963          * Get the parent directory, calculate the hash for last
 964          * component.
 965          */
 966         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 967         err = PTR_ERR(dentry);
 968         if (IS_ERR(dentry))
 969                 return err;
 970
 971         /*
 972          * All right, let's create it.
 973          */
 974         err = security_path_mknod(&path, dentry, mode, 0);
 975         if (!err) {
 976                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 977                 if (!err) {
 978                         res->mnt = mntget(path.mnt);
 979                         res->dentry = dget(dentry);
 980                 }
 981         }
 982         done_path_create(&path, dentry);
 983         return err;
 984 }
 985
 986 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 987 {
 988         struct sock *sk = sock->sk;
 989         struct net *net = sock_net(sk);
 990         struct unix_sock *u = unix_sk(sk);
 991         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 992         char *sun_path = sunaddr->sun_path;
 993         int err;
 994         unsigned int hash;
 995         struct unix_address *addr;
 996         struct hlist_head *list;
 997         struct path path = { };
 998
 999         err = -EINVAL;
1000         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1001             sunaddr->sun_family != AF_UNIX)
1002                 goto out;
1003
1004         if (addr_len == sizeof(short)) {
1005                 err = unix_autobind(sock);
1006                 goto out;
1007         }
1008
1009         err = unix_mkname(sunaddr, addr_len, &hash);
1010         if (err < 0)
1011                 goto out;
1012         addr_len = err;
1013
1014         if (sun_path[0]) {
1015                 umode_t mode = S_IFSOCK |
1016                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1017                 err = unix_mknod(sun_path, mode, &path);
1018                 if (err) {
1019                         if (err == -EEXIST)
1020                                 err = -EADDRINUSE;
1021                         goto out;
1022                 }
1023         }
1024
1025         err = mutex_lock_interruptible(&u->bindlock);
1026         if (err)
1027                 goto out_put;
1028
1029         err = -EINVAL;
1030         if (u->addr)
1031                 goto out_up;
1032
1033         err = -ENOMEM;
1034         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1035         if (!addr)
1036                 goto out_up;
1037
1038         memcpy(addr->name, sunaddr, addr_len);
1039         addr->len = addr_len;
1040         addr->hash = hash ^ sk->sk_type;
1041         refcount_set(&addr->refcnt, 1);
1042
1043         if (sun_path[0]) {
1044                 addr->hash = UNIX_HASH_SIZE;
1045                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1046                 spin_lock(&unix_table_lock);
1047                 u->path = path;
1048                 list = &unix_socket_table[hash];
1049         } else {
1050                 spin_lock(&unix_table_lock);
1051                 err = -EADDRINUSE;
1052                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1053                                               sk->sk_type, hash)) {
1054                         unix_release_addr(addr);
1055                         goto out_unlock;
1056                 }
1057
1058                 list = &unix_socket_table[addr->hash];
1059         }
1060
1061         err = 0;
1062         __unix_remove_socket(sk);
1063         u->addr = addr;
1064         __unix_insert_socket(list, sk);
1065
1066 out_unlock:
1067         spin_unlock(&unix_table_lock);
1068 out_up:
1069         mutex_unlock(&u->bindlock);
1070 out_put:
1071         if (err)
1072                 path_put(&path);
1073 out:
1074         return err;
1075 }
1076
1077 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1078 {
1079         if (unlikely(sk1 == sk2) || !sk2) {
1080                 unix_state_lock(sk1);
1081                 return;
1082         }
1083         if (sk1 < sk2) {
1084                 unix_state_lock(sk1);
1085                 unix_state_lock_nested(sk2);
1086         } else {
1087                 unix_state_lock(sk2);
1088                 unix_state_lock_nested(sk1);
1089         }
1090 }
1091
1092 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1093 {
1094         if (unlikely(sk1 == sk2) || !sk2) {
1095                 unix_state_unlock(sk1);
1096                 return;
1097         }
1098         unix_state_unlock(sk1);
1099         unix_state_unlock(sk2);
1100 }
1101
1102 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1103                               int alen, int flags)
1104 {
1105         struct sock *sk = sock->sk;
1106         struct net *net = sock_net(sk);
1107         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1108         struct sock *other;
1109         unsigned int hash;
1110         int err;
1111
1112         err = -EINVAL;
1113         if (alen < offsetofend(struct sockaddr, sa_family))
1114                 goto out;
1115
1116         if (addr->sa_family != AF_UNSPEC) {
1117                 err = unix_mkname(sunaddr, alen, &hash);
1118                 if (err < 0)
1119                         goto out;
1120                 alen = err;
1121
1122                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1123                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1124                         goto out;
1125
1126 restart:
1127                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1128                 if (!other)
1129                         goto out;
1130
1131                 unix_state_double_lock(sk, other);
1132
1133                 /* Apparently VFS overslept socket death. Retry. */
1134                 if (sock_flag(other, SOCK_DEAD)) {
1135                         unix_state_double_unlock(sk, other);
1136                         sock_put(other);
1137                         goto restart;
1138                 }
1139
1140                 err = -EPERM;
1141                 if (!unix_may_send(sk, other))
1142                         goto out_unlock;
1143
1144                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1145                 if (err)
1146                         goto out_unlock;
1147
1148         } else {
1149                 /*
1150                  *      1003.1g breaking connected state with AF_UNSPEC
1151                  */
1152                 other = NULL;
1153                 unix_state_double_lock(sk, other);
1154         }
1155
1156         /*
1157          * If it was connected, reconnect.
1158          */
1159         if (unix_peer(sk)) {
1160                 struct sock *old_peer = unix_peer(sk);
1161                 unix_peer(sk) = other;
1162                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1163
1164                 unix_state_double_unlock(sk, other);
1165
1166                 if (other != old_peer)
1167                         unix_dgram_disconnected(sk, old_peer);
1168                 sock_put(old_peer);
1169         } else {
1170                 unix_peer(sk) = other;
1171                 unix_state_double_unlock(sk, other);
1172         }
1173         return 0;
1174
1175 out_unlock:
1176         unix_state_double_unlock(sk, other);
1177         sock_put(other);
1178 out:
1179         return err;
1180 }
1181
1182 static long unix_wait_for_peer(struct sock *other, long timeo)
1183 {
1184         struct unix_sock *u = unix_sk(other);
1185         int sched;
1186         DEFINE_WAIT(wait);
1187
1188         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1189
1190         sched = !sock_flag(other, SOCK_DEAD) &&
1191                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1192                 unix_recvq_full(other);
1193
1194         unix_state_unlock(other);
1195
1196         if (sched)
1197                 timeo = schedule_timeout(timeo);
1198
1199         finish_wait(&u->peer_wait, &wait);
1200         return timeo;
1201 }
1202
1203 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1204                                int addr_len, int flags)
1205 {
1206         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1207         struct sock *sk = sock->sk;
1208         struct net *net = sock_net(sk);
1209         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1210         struct sock *newsk = NULL;
1211         struct sock *other = NULL;
1212         struct sk_buff *skb = NULL;
1213         unsigned int hash;
1214         int st;
1215         int err;
1216         long timeo;
1217
1218         err = unix_mkname(sunaddr, addr_len, &hash);
1219         if (err < 0)
1220                 goto out;
1221         addr_len = err;
1222
1223         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1224             (err = unix_autobind(sock)) != 0)
1225                 goto out;
1226
1227         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1228
1229         /* First of all allocate resources.
1230            If we will make it after state is locked,
1231            we will have to recheck all again in any case.
1232          */
1233
1234         err = -ENOMEM;
1235
1236         /* create new sock for complete connection */
1237         newsk = unix_create1(sock_net(sk), NULL, 0);
1238         if (newsk == NULL)
1239                 goto out;
1240
1241         /* Allocate skb for sending to listening sock */
1242         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1243         if (skb == NULL)
1244                 goto out;
1245
1246 restart:
1247         /*  Find listening sock. */
1248         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1249         if (!other)
1250                 goto out;
1251
1252         /* Latch state of peer */
1253         unix_state_lock(other);
1254
1255         /* Apparently VFS overslept socket death. Retry. */
1256         if (sock_flag(other, SOCK_DEAD)) {
1257                 unix_state_unlock(other);
1258                 sock_put(other);
1259                 goto restart;
1260         }
1261
1262         err = -ECONNREFUSED;
1263         if (other->sk_state != TCP_LISTEN)
1264                 goto out_unlock;
1265         if (other->sk_shutdown & RCV_SHUTDOWN)
1266                 goto out_unlock;
1267
1268         if (unix_recvq_full(other)) {
1269                 err = -EAGAIN;
1270                 if (!timeo)
1271                         goto out_unlock;
1272
1273                 timeo = unix_wait_for_peer(other, timeo);
1274
1275                 err = sock_intr_errno(timeo);
1276                 if (signal_pending(current))
1277                         goto out;
1278                 sock_put(other);
1279                 goto restart;
1280         }
1281
1282         /* Latch our state.
1283
1284            It is tricky place. We need to grab our state lock and cannot
1285            drop lock on peer. It is dangerous because deadlock is
1286            possible. Connect to self case and simultaneous
1287            attempt to connect are eliminated by checking socket
1288            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1289            check this before attempt to grab lock.
1290
1291            Well, and we have to recheck the state after socket locked.
1292          */
1293         st = sk->sk_state;
1294
1295         switch (st) {
1296         case TCP_CLOSE:
1297                 /* This is ok... continue with connect */
1298                 break;
1299         case TCP_ESTABLISHED:
1300                 /* Socket is already connected */
1301                 err = -EISCONN;
1302                 goto out_unlock;
1303         default:
1304                 err = -EINVAL;
1305                 goto out_unlock;
1306         }
1307
1308         unix_state_lock_nested(sk);
1309
1310         if (sk->sk_state != st) {
1311                 unix_state_unlock(sk);
1312                 unix_state_unlock(other);
1313                 sock_put(other);
1314                 goto restart;
1315         }
1316
1317         err = security_unix_stream_connect(sk, other, newsk);
1318         if (err) {
1319                 unix_state_unlock(sk);
1320                 goto out_unlock;
1321         }
1322
1323         /* The way is open! Fastly set all the necessary fields... */
1324
1325         sock_hold(sk);
1326         unix_peer(newsk)        = sk;
1327         newsk->sk_state         = TCP_ESTABLISHED;
1328         newsk->sk_type          = sk->sk_type;
1329         init_peercred(newsk);
1330         newu = unix_sk(newsk);
1331         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1332         otheru = unix_sk(other);
1333
1334         /* copy address information from listening to new sock*/
1335         if (otheru->addr) {
1336                 refcount_inc(&otheru->addr->refcnt);
1337                 newu->addr = otheru->addr;
1338         }
1339         if (otheru->path.dentry) {
1340                 path_get(&otheru->path);
1341                 newu->path = otheru->path;
1342         }
1343
1344         /* Set credentials */
1345         copy_peercred(sk, other);
1346
1347         sock->state     = SS_CONNECTED;
1348         sk->sk_state    = TCP_ESTABLISHED;
1349         sock_hold(newsk);
1350
1351         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1352         unix_peer(sk)   = newsk;
1353
1354         unix_state_unlock(sk);
1355
1356         /* take ten and and send info to listening sock */
1357         spin_lock(&other->sk_receive_queue.lock);
1358         __skb_queue_tail(&other->sk_receive_queue, skb);
1359         spin_unlock(&other->sk_receive_queue.lock);
1360         unix_state_unlock(other);
1361         other->sk_data_ready(other);
1362         sock_put(other);
1363         return 0;
1364
1365 out_unlock:
1366         if (other)
1367                 unix_state_unlock(other);
1368
1369 out:
1370         kfree_skb(skb);
1371         if (newsk)
1372                 unix_release_sock(newsk, 0);
1373         if (other)
1374                 sock_put(other);
1375         return err;
1376 }
1377
1378 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1379 {
1380         struct sock *ska = socka->sk, *skb = sockb->sk;
1381
1382         /* Join our sockets back to back */
1383         sock_hold(ska);
1384         sock_hold(skb);
1385         unix_peer(ska) = skb;
1386         unix_peer(skb) = ska;
1387         init_peercred(ska);
1388         init_peercred(skb);
1389
1390         if (ska->sk_type != SOCK_DGRAM) {
1391                 ska->sk_state = TCP_ESTABLISHED;
1392                 skb->sk_state = TCP_ESTABLISHED;
1393                 socka->state  = SS_CONNECTED;
1394                 sockb->state  = SS_CONNECTED;
1395         }
1396         return 0;
1397 }
1398
1399 static void unix_sock_inherit_flags(const struct socket *old,
1400                                     struct socket *new)
1401 {
1402         if (test_bit(SOCK_PASSCRED, &old->flags))
1403                 set_bit(SOCK_PASSCRED, &new->flags);
1404         if (test_bit(SOCK_PASSSEC, &old->flags))
1405                 set_bit(SOCK_PASSSEC, &new->flags);
1406 }
1407
1408 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1409                        bool kern)
1410 {
1411         struct sock *sk = sock->sk;
1412         struct sock *tsk;
1413         struct sk_buff *skb;
1414         int err;
1415
1416         err = -EOPNOTSUPP;
1417         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1418                 goto out;
1419
1420         err = -EINVAL;
1421         if (sk->sk_state != TCP_LISTEN)
1422                 goto out;
1423
1424         /* If socket state is TCP_LISTEN it cannot change (for now...),
1425          * so that no locks are necessary.
1426          */
1427
1428         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1429         if (!skb) {
1430                 /* This means receive shutdown. */
1431                 if (err == 0)
1432                         err = -EINVAL;
1433                 goto out;
1434         }
1435
1436         tsk = skb->sk;
1437         skb_free_datagram(sk, skb);
1438         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1439
1440         /* attach accepted sock to socket */
1441         unix_state_lock(tsk);
1442         newsock->state = SS_CONNECTED;
1443         unix_sock_inherit_flags(sock, newsock);
1444         sock_graft(tsk, newsock);
1445         unix_state_unlock(tsk);
1446         return 0;
1447
1448 out:
1449         return err;
1450 }
1451
1452
1453 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1454 {
1455         struct sock *sk = sock->sk;
1456         struct unix_sock *u;
1457         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1458         int err = 0;
1459
1460         if (peer) {
1461                 sk = unix_peer_get(sk);
1462
1463                 err = -ENOTCONN;
1464                 if (!sk)
1465                         goto out;
1466                 err = 0;
1467         } else {
1468                 sock_hold(sk);
1469         }
1470
1471         u = unix_sk(sk);
1472         unix_state_lock(sk);
1473         if (!u->addr) {
1474                 sunaddr->sun_family = AF_UNIX;
1475                 sunaddr->sun_path[0] = 0;
1476                 err = sizeof(short);
1477         } else {
1478                 struct unix_address *addr = u->addr;
1479
1480                 err = addr->len;
1481                 memcpy(sunaddr, addr->name, addr->len);
1482         }
1483         unix_state_unlock(sk);
1484         sock_put(sk);
1485 out:
1486         return err;
1487 }
1488
1489 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1490 {
1491         int i;
1492
1493         scm->fp = UNIXCB(skb).fp;
1494         UNIXCB(skb).fp = NULL;
1495
1496         for (i = scm->fp->count-1; i >= 0; i--)
1497                 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1498 }
1499
1500 static void unix_destruct_scm(struct sk_buff *skb)
1501 {
1502         struct scm_cookie scm;
1503         memset(&scm, 0, sizeof(scm));
1504         scm.pid  = UNIXCB(skb).pid;
1505         if (UNIXCB(skb).fp)
1506                 unix_detach_fds(&scm, skb);
1507
1508         /* Alas, it calls VFS */
1509         /* So fscking what? fput() had been SMP-safe since the last Summer */
1510         scm_destroy(&scm);
1511         sock_wfree(skb);
1512 }
1513
1514 /*
1515  * The "user->unix_inflight" variable is protected by the garbage
1516  * collection lock, and we just read it locklessly here. If you go
1517  * over the limit, there might be a tiny race in actually noticing
1518  * it across threads. Tough.
1519  */
1520 static inline bool too_many_unix_fds(struct task_struct *p)
1521 {
1522         struct user_struct *user = current_user();
1523
1524         if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1525                 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1526         return false;
1527 }
1528
1529 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1530 {
1531         int i;
1532
1533         if (too_many_unix_fds(current))
1534                 return -ETOOMANYREFS;
1535
1536         /*
1537          * Need to duplicate file references for the sake of garbage
1538          * collection.  Otherwise a socket in the fps might become a
1539          * candidate for GC while the skb is not yet queued.
1540          */
1541         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1542         if (!UNIXCB(skb).fp)
1543                 return -ENOMEM;
1544
1545         for (i = scm->fp->count - 1; i >= 0; i--)
1546                 unix_inflight(scm->fp->user, scm->fp->fp[i]);
1547         return 0;
1548 }
1549
1550 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1551 {
1552         int err = 0;
1553
1554         UNIXCB(skb).pid  = get_pid(scm->pid);
1555         UNIXCB(skb).uid = scm->creds.uid;
1556         UNIXCB(skb).gid = scm->creds.gid;
1557         UNIXCB(skb).fp = NULL;
1558         unix_get_secdata(scm, skb);
1559         if (scm->fp && send_fds)
1560                 err = unix_attach_fds(scm, skb);
1561
1562         skb->destructor = unix_destruct_scm;
1563         return err;
1564 }
1565
1566 static bool unix_passcred_enabled(const struct socket *sock,
1567                                   const struct sock *other)
1568 {
1569         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1570                !other->sk_socket ||
1571                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1572 }
1573
1574 /*
1575  * Some apps rely on write() giving SCM_CREDENTIALS
1576  * We include credentials if source or destination socket
1577  * asserted SOCK_PASSCRED.
1578  */
1579 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1580                             const struct sock *other)
1581 {
1582         if (UNIXCB(skb).pid)
1583                 return;
1584         if (unix_passcred_enabled(sock, other)) {
1585                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1586                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1587         }
1588 }
1589
1590 static int maybe_init_creds(struct scm_cookie *scm,
1591                             struct socket *socket,
1592                             const struct sock *other)
1593 {
1594         int err;
1595         struct msghdr msg = { .msg_controllen = 0 };
1596
1597         err = scm_send(socket, &msg, scm, false);
1598         if (err)
1599                 return err;
1600
1601         if (unix_passcred_enabled(socket, other)) {
1602                 scm->pid = get_pid(task_tgid(current));
1603                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1604         }
1605         return err;
1606 }
1607
1608 static bool unix_skb_scm_eq(struct sk_buff *skb,
1609                             struct scm_cookie *scm)
1610 {
1611         const struct unix_skb_parms *u = &UNIXCB(skb);
1612
1613         return u->pid == scm->pid &&
1614                uid_eq(u->uid, scm->creds.uid) &&
1615                gid_eq(u->gid, scm->creds.gid) &&
1616                unix_secdata_eq(scm, skb);
1617 }
1618
1619 /*
1620  *      Send AF_UNIX data.
1621  */
1622
1623 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1624                               size_t len)
1625 {
1626         struct sock *sk = sock->sk;
1627         struct net *net = sock_net(sk);
1628         struct unix_sock *u = unix_sk(sk);
1629         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1630         struct sock *other = NULL;
1631         int namelen = 0; /* fake GCC */
1632         int err;
1633         unsigned int hash;
1634         struct sk_buff *skb;
1635         long timeo;
1636         struct scm_cookie scm;
1637         int data_len = 0;
1638         int sk_locked;
1639
1640         wait_for_unix_gc();
1641         err = scm_send(sock, msg, &scm, false);
1642         if (err < 0)
1643                 return err;
1644
1645         err = -EOPNOTSUPP;
1646         if (msg->msg_flags&MSG_OOB)
1647                 goto out;
1648
1649         if (msg->msg_namelen) {
1650                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1651                 if (err < 0)
1652                         goto out;
1653                 namelen = err;
1654         } else {
1655                 sunaddr = NULL;
1656                 err = -ENOTCONN;
1657                 other = unix_peer_get(sk);
1658                 if (!other)
1659                         goto out;
1660         }
1661
1662         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1663             && (err = unix_autobind(sock)) != 0)
1664                 goto out;
1665
1666         err = -EMSGSIZE;
1667         if (len > sk->sk_sndbuf - 32)
1668                 goto out;
1669
1670         if (len > SKB_MAX_ALLOC) {
1671                 data_len = min_t(size_t,
1672                                  len - SKB_MAX_ALLOC,
1673                                  MAX_SKB_FRAGS * PAGE_SIZE);
1674                 data_len = PAGE_ALIGN(data_len);
1675
1676                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1677         }
1678
1679         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1680                                    msg->msg_flags & MSG_DONTWAIT, &err,
1681                                    PAGE_ALLOC_COSTLY_ORDER);
1682         if (skb == NULL)
1683                 goto out;
1684
1685         err = unix_scm_to_skb(&scm, skb, true);
1686         if (err < 0)
1687                 goto out_free;
1688
1689         skb_put(skb, len - data_len);
1690         skb->data_len = data_len;
1691         skb->len = len;
1692         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1693         if (err)
1694                 goto out_free;
1695
1696         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1697
1698 restart:
1699         if (!other) {
1700                 err = -ECONNRESET;
1701                 if (sunaddr == NULL)
1702                         goto out_free;
1703
1704                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1705                                         hash, &err);
1706                 if (other == NULL)
1707                         goto out_free;
1708         }
1709
1710         if (sk_filter(other, skb) < 0) {
1711                 /* Toss the packet but do not return any error to the sender */
1712                 err = len;
1713                 goto out_free;
1714         }
1715
1716         sk_locked = 0;
1717         unix_state_lock(other);
1718 restart_locked:
1719         err = -EPERM;
1720         if (!unix_may_send(sk, other))
1721                 goto out_unlock;
1722
1723         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1724                 /*
1725                  *      Check with 1003.1g - what should
1726                  *      datagram error
1727                  */
1728                 unix_state_unlock(other);
1729                 sock_put(other);
1730
1731                 if (!sk_locked)
1732                         unix_state_lock(sk);
1733
1734                 err = 0;
1735                 if (unix_peer(sk) == other) {
1736                         unix_peer(sk) = NULL;
1737                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1738
1739                         unix_state_unlock(sk);
1740
1741                         unix_dgram_disconnected(sk, other);
1742                         sock_put(other);
1743                         err = -ECONNREFUSED;
1744                 } else {
1745                         unix_state_unlock(sk);
1746                 }
1747
1748                 other = NULL;
1749                 if (err)
1750                         goto out_free;
1751                 goto restart;
1752         }
1753
1754         err = -EPIPE;
1755         if (other->sk_shutdown & RCV_SHUTDOWN)
1756                 goto out_unlock;
1757
1758         if (sk->sk_type != SOCK_SEQPACKET) {
1759                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1760                 if (err)
1761                         goto out_unlock;
1762         }
1763
1764         /* other == sk && unix_peer(other) != sk if
1765          * - unix_peer(sk) == NULL, destination address bound to sk
1766          * - unix_peer(sk) == sk by time of get but disconnected before lock
1767          */
1768         if (other != sk &&
1769             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1770                 if (timeo) {
1771                         timeo = unix_wait_for_peer(other, timeo);
1772
1773                         err = sock_intr_errno(timeo);
1774                         if (signal_pending(current))
1775                                 goto out_free;
1776
1777                         goto restart;
1778                 }
1779
1780                 if (!sk_locked) {
1781                         unix_state_unlock(other);
1782                         unix_state_double_lock(sk, other);
1783                 }
1784
1785                 if (unix_peer(sk) != other ||
1786                     unix_dgram_peer_wake_me(sk, other)) {
1787                         err = -EAGAIN;
1788                         sk_locked = 1;
1789                         goto out_unlock;
1790                 }
1791
1792                 if (!sk_locked) {
1793                         sk_locked = 1;
1794                         goto restart_locked;
1795                 }
1796         }
1797
1798         if (unlikely(sk_locked))
1799                 unix_state_unlock(sk);
1800
1801         if (sock_flag(other, SOCK_RCVTSTAMP))
1802                 __net_timestamp(skb);
1803         maybe_add_creds(skb, sock, other);
1804         skb_queue_tail(&other->sk_receive_queue, skb);
1805         unix_state_unlock(other);
1806         other->sk_data_ready(other);
1807         sock_put(other);
1808         scm_destroy(&scm);
1809         return len;
1810
1811 out_unlock:
1812         if (sk_locked)
1813                 unix_state_unlock(sk);
1814         unix_state_unlock(other);
1815 out_free:
1816         kfree_skb(skb);
1817 out:
1818         if (other)
1819                 sock_put(other);
1820         scm_destroy(&scm);
1821         return err;
1822 }
1823
1824 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1825  * bytes, and a minimum of a full page.
1826  */
1827 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1828
1829 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1830                                size_t len)
1831 {
1832         struct sock *sk = sock->sk;
1833         struct sock *other = NULL;
1834         int err, size;
1835         struct sk_buff *skb;
1836         int sent = 0;
1837         struct scm_cookie scm;
1838         bool fds_sent = false;
1839         int data_len;
1840
1841         wait_for_unix_gc();
1842         err = scm_send(sock, msg, &scm, false);
1843         if (err < 0)
1844                 return err;
1845
1846         err = -EOPNOTSUPP;
1847         if (msg->msg_flags&MSG_OOB)
1848                 goto out_err;
1849
1850         if (msg->msg_namelen) {
1851                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1852                 goto out_err;
1853         } else {
1854                 err = -ENOTCONN;
1855                 other = unix_peer(sk);
1856                 if (!other)
1857                         goto out_err;
1858         }
1859
1860         if (sk->sk_shutdown & SEND_SHUTDOWN)
1861                 goto pipe_err;
1862
1863         while (sent < len) {
1864                 size = len - sent;
1865
1866                 /* Keep two messages in the pipe so it schedules better */
1867                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1868
1869                 /* allow fallback to order-0 allocations */
1870                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1871
1872                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1873
1874                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1875
1876                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1877                                            msg->msg_flags & MSG_DONTWAIT, &err,
1878                                            get_order(UNIX_SKB_FRAGS_SZ));
1879                 if (!skb)
1880                         goto out_err;
1881
1882                 /* Only send the fds in the first buffer */
1883                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1884                 if (err < 0) {
1885                         kfree_skb(skb);
1886                         goto out_err;
1887                 }
1888                 fds_sent = true;
1889
1890                 skb_put(skb, size - data_len);
1891                 skb->data_len = data_len;
1892                 skb->len = size;
1893                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1894                 if (err) {
1895                         kfree_skb(skb);
1896                         goto out_err;
1897                 }
1898
1899                 unix_state_lock(other);
1900
1901                 if (sock_flag(other, SOCK_DEAD) ||
1902                     (other->sk_shutdown & RCV_SHUTDOWN))
1903                         goto pipe_err_free;
1904
1905                 maybe_add_creds(skb, sock, other);
1906                 skb_queue_tail(&other->sk_receive_queue, skb);
1907                 unix_state_unlock(other);
1908                 other->sk_data_ready(other);
1909                 sent += size;
1910         }
1911
1912         scm_destroy(&scm);
1913
1914         return sent;
1915
1916 pipe_err_free:
1917         unix_state_unlock(other);
1918         kfree_skb(skb);
1919 pipe_err:
1920         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1921                 send_sig(SIGPIPE, current, 0);
1922         err = -EPIPE;
1923 out_err:
1924         scm_destroy(&scm);
1925         return sent ? : err;
1926 }
1927
1928 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1929                                     int offset, size_t size, int flags)
1930 {
1931         int err;
1932         bool send_sigpipe = false;
1933         bool init_scm = true;
1934         struct scm_cookie scm;
1935         struct sock *other, *sk = socket->sk;
1936         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1937
1938         if (flags & MSG_OOB)
1939                 return -EOPNOTSUPP;
1940
1941         other = unix_peer(sk);
1942         if (!other || sk->sk_state != TCP_ESTABLISHED)
1943                 return -ENOTCONN;
1944
1945         if (false) {
1946 alloc_skb:
1947                 unix_state_unlock(other);
1948                 mutex_unlock(&unix_sk(other)->iolock);
1949                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1950                                               &err, 0);
1951                 if (!newskb)
1952                         goto err;
1953         }
1954
1955         /* we must acquire iolock as we modify already present
1956          * skbs in the sk_receive_queue and mess with skb->len
1957          */
1958         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1959         if (err) {
1960                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1961                 goto err;
1962         }
1963
1964         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1965                 err = -EPIPE;
1966                 send_sigpipe = true;
1967                 goto err_unlock;
1968         }
1969
1970         unix_state_lock(other);
1971
1972         if (sock_flag(other, SOCK_DEAD) ||
1973             other->sk_shutdown & RCV_SHUTDOWN) {
1974                 err = -EPIPE;
1975                 send_sigpipe = true;
1976                 goto err_state_unlock;
1977         }
1978
1979         if (init_scm) {
1980                 err = maybe_init_creds(&scm, socket, other);
1981                 if (err)
1982                         goto err_state_unlock;
1983                 init_scm = false;
1984         }
1985
1986         skb = skb_peek_tail(&other->sk_receive_queue);
1987         if (tail && tail == skb) {
1988                 skb = newskb;
1989         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1990                 if (newskb) {
1991                         skb = newskb;
1992                 } else {
1993                         tail = skb;
1994                         goto alloc_skb;
1995                 }
1996         } else if (newskb) {
1997                 /* this is fast path, we don't necessarily need to
1998                  * call to kfree_skb even though with newskb == NULL
1999                  * this - does no harm
2000                  */
2001                 consume_skb(newskb);
2002                 newskb = NULL;
2003         }
2004
2005         if (skb_append_pagefrags(skb, page, offset, size)) {
2006                 tail = skb;
2007                 goto alloc_skb;
2008         }
2009
2010         skb->len += size;
2011         skb->data_len += size;
2012         skb->truesize += size;
2013         refcount_add(size, &sk->sk_wmem_alloc);
2014
2015         if (newskb) {
2016                 err = unix_scm_to_skb(&scm, skb, false);
2017                 if (err)
2018                         goto err_state_unlock;
2019                 spin_lock(&other->sk_receive_queue.lock);
2020                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2021                 spin_unlock(&other->sk_receive_queue.lock);
2022         }
2023
2024         unix_state_unlock(other);
2025         mutex_unlock(&unix_sk(other)->iolock);
2026
2027         other->sk_data_ready(other);
2028         scm_destroy(&scm);
2029         return size;
2030
2031 err_state_unlock:
2032         unix_state_unlock(other);
2033 err_unlock:
2034         mutex_unlock(&unix_sk(other)->iolock);
2035 err:
2036         kfree_skb(newskb);
2037         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2038                 send_sig(SIGPIPE, current, 0);
2039         if (!init_scm)
2040                 scm_destroy(&scm);
2041         return err;
2042 }
2043
2044 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2045                                   size_t len)
2046 {
2047         int err;
2048         struct sock *sk = sock->sk;
2049
2050         err = sock_error(sk);
2051         if (err)
2052                 return err;
2053
2054         if (sk->sk_state != TCP_ESTABLISHED)
2055                 return -ENOTCONN;
2056
2057         if (msg->msg_namelen)
2058                 msg->msg_namelen = 0;
2059
2060         return unix_dgram_sendmsg(sock, msg, len);
2061 }
2062
2063 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2064                                   size_t size, int flags)
2065 {
2066         struct sock *sk = sock->sk;
2067
2068         if (sk->sk_state != TCP_ESTABLISHED)
2069                 return -ENOTCONN;
2070
2071         return unix_dgram_recvmsg(sock, msg, size, flags);
2072 }
2073
2074 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2075 {
2076         struct unix_sock *u = unix_sk(sk);
2077
2078         if (u->addr) {
2079                 msg->msg_namelen = u->addr->len;
2080                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
2081         }
2082 }
2083
2084 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2085                               size_t size, int flags)
2086 {
2087         struct scm_cookie scm;
2088         struct sock *sk = sock->sk;
2089         struct unix_sock *u = unix_sk(sk);
2090         struct sk_buff *skb, *last;
2091         long timeo;
2092         int err;
2093         int peeked, skip;
2094
2095         err = -EOPNOTSUPP;
2096         if (flags&MSG_OOB)
2097                 goto out;
2098
2099         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2100
2101         do {
2102                 mutex_lock(&u->iolock);
2103
2104                 skip = sk_peek_offset(sk, flags);
2105                 skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2106                                               &err, &last);
2107                 if (skb)
2108                         break;
2109
2110                 mutex_unlock(&u->iolock);
2111
2112                 if (err != -EAGAIN)
2113                         break;
2114         } while (timeo &&
2115                  !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2116
2117         if (!skb) { /* implies iolock unlocked */
2118                 unix_state_lock(sk);
2119                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2120                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2121                     (sk->sk_shutdown & RCV_SHUTDOWN))
2122                         err = 0;
2123                 unix_state_unlock(sk);
2124                 goto out;
2125         }
2126
2127         if (wq_has_sleeper(&u->peer_wait))
2128                 wake_up_interruptible_sync_poll(&u->peer_wait,
2129                                                 EPOLLOUT | EPOLLWRNORM |
2130                                                 EPOLLWRBAND);
2131
2132         if (msg->msg_name)
2133                 unix_copy_addr(msg, skb->sk);
2134
2135         if (size > skb->len - skip)
2136                 size = skb->len - skip;
2137         else if (size < skb->len - skip)
2138                 msg->msg_flags |= MSG_TRUNC;
2139
2140         err = skb_copy_datagram_msg(skb, skip, msg, size);
2141         if (err)
2142                 goto out_free;
2143
2144         if (sock_flag(sk, SOCK_RCVTSTAMP))
2145                 __sock_recv_timestamp(msg, sk, skb);
2146
2147         memset(&scm, 0, sizeof(scm));
2148
2149         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2150         unix_set_secdata(&scm, skb);
2151
2152         if (!(flags & MSG_PEEK)) {
2153                 if (UNIXCB(skb).fp)
2154                         unix_detach_fds(&scm, skb);
2155
2156                 sk_peek_offset_bwd(sk, skb->len);
2157         } else {
2158                 /* It is questionable: on PEEK we could:
2159                    - do not return fds - good, but too simple 8)
2160                    - return fds, and do not return them on read (old strategy,
2161                      apparently wrong)
2162                    - clone fds (I chose it for now, it is the most universal
2163                      solution)
2164
2165                    POSIX 1003.1g does not actually define this clearly
2166                    at all. POSIX 1003.1g doesn't define a lot of things
2167                    clearly however!
2168
2169                 */
2170
2171                 sk_peek_offset_fwd(sk, size);
2172
2173                 if (UNIXCB(skb).fp)
2174                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2175         }
2176         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2177
2178         scm_recv(sock, msg, &scm, flags);
2179
2180 out_free:
2181         skb_free_datagram(sk, skb);
2182         mutex_unlock(&u->iolock);
2183 out:
2184         return err;
2185 }
2186
2187 /*
2188  *      Sleep until more data has arrived. But check for races..
2189  */
2190 static long unix_stream_data_wait(struct sock *sk, long timeo,
2191                                   struct sk_buff *last, unsigned int last_len,
2192                                   bool freezable)
2193 {
2194         struct sk_buff *tail;
2195         DEFINE_WAIT(wait);
2196
2197         unix_state_lock(sk);
2198
2199         for (;;) {
2200                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2201
2202                 tail = skb_peek_tail(&sk->sk_receive_queue);
2203                 if (tail != last ||
2204                     (tail && tail->len != last_len) ||
2205                     sk->sk_err ||
2206                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2207                     signal_pending(current) ||
2208                     !timeo)
2209                         break;
2210
2211                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2212                 unix_state_unlock(sk);
2213                 if (freezable)
2214                         timeo = freezable_schedule_timeout(timeo);
2215                 else
2216                         timeo = schedule_timeout(timeo);
2217                 unix_state_lock(sk);
2218
2219                 if (sock_flag(sk, SOCK_DEAD))
2220                         break;
2221
2222                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2223         }
2224
2225         finish_wait(sk_sleep(sk), &wait);
2226         unix_state_unlock(sk);
2227         return timeo;
2228 }
2229
2230 static unsigned int unix_skb_len(const struct sk_buff *skb)
2231 {
2232         return skb->len - UNIXCB(skb).consumed;
2233 }
2234
2235 struct unix_stream_read_state {
2236         int (*recv_actor)(struct sk_buff *, int, int,
2237                           struct unix_stream_read_state *);
2238         struct socket *socket;
2239         struct msghdr *msg;
2240         struct pipe_inode_info *pipe;
2241         size_t size;
2242         int flags;
2243         unsigned int splice_flags;
2244 };
2245
2246 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2247                                     bool freezable)
2248 {
2249         struct scm_cookie scm;
2250         struct socket *sock = state->socket;
2251         struct sock *sk = sock->sk;
2252         struct unix_sock *u = unix_sk(sk);
2253         int copied = 0;
2254         int flags = state->flags;
2255         int noblock = flags & MSG_DONTWAIT;
2256         bool check_creds = false;
2257         int target;
2258         int err = 0;
2259         long timeo;
2260         int skip;
2261         size_t size = state->size;
2262         unsigned int last_len;
2263
2264         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2265                 err = -EINVAL;
2266                 goto out;
2267         }
2268
2269         if (unlikely(flags & MSG_OOB)) {
2270                 err = -EOPNOTSUPP;
2271                 goto out;
2272         }
2273
2274         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2275         timeo = sock_rcvtimeo(sk, noblock);
2276
2277         memset(&scm, 0, sizeof(scm));
2278
2279         /* Lock the socket to prevent queue disordering
2280          * while sleeps in memcpy_tomsg
2281          */
2282         mutex_lock(&u->iolock);
2283
2284         skip = max(sk_peek_offset(sk, flags), 0);
2285
2286         do {
2287                 int chunk;
2288                 bool drop_skb;
2289                 struct sk_buff *skb, *last;
2290
2291 redo:
2292                 unix_state_lock(sk);
2293                 if (sock_flag(sk, SOCK_DEAD)) {
2294                         err = -ECONNRESET;
2295                         goto unlock;
2296                 }
2297                 last = skb = skb_peek(&sk->sk_receive_queue);
2298                 last_len = last ? last->len : 0;
2299 again:
2300                 if (skb == NULL) {
2301                         if (copied >= target)
2302                                 goto unlock;
2303
2304                         /*
2305                          *      POSIX 1003.1g mandates this order.
2306                          */
2307
2308                         err = sock_error(sk);
2309                         if (err)
2310                                 goto unlock;
2311                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2312                                 goto unlock;
2313
2314                         unix_state_unlock(sk);
2315                         if (!timeo) {
2316                                 err = -EAGAIN;
2317                                 break;
2318                         }
2319
2320                         mutex_unlock(&u->iolock);
2321
2322                         timeo = unix_stream_data_wait(sk, timeo, last,
2323                                                       last_len, freezable);
2324
2325                         if (signal_pending(current)) {
2326                                 err = sock_intr_errno(timeo);
2327                                 scm_destroy(&scm);
2328                                 goto out;
2329                         }
2330
2331                         mutex_lock(&u->iolock);
2332                         goto redo;
2333 unlock:
2334                         unix_state_unlock(sk);
2335                         break;
2336                 }
2337
2338                 while (skip >= unix_skb_len(skb)) {
2339                         skip -= unix_skb_len(skb);
2340                         last = skb;
2341                         last_len = skb->len;
2342                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2343                         if (!skb)
2344                                 goto again;
2345                 }
2346
2347                 unix_state_unlock(sk);
2348
2349                 if (check_creds) {
2350                         /* Never glue messages from different writers */
2351                         if (!unix_skb_scm_eq(skb, &scm))
2352                                 break;
2353                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2354                         /* Copy credentials */
2355                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2356                         unix_set_secdata(&scm, skb);
2357                         check_creds = true;
2358                 }
2359
2360                 /* Copy address just once */
2361                 if (state->msg && state->msg->msg_name) {
2362                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2363                                          state->msg->msg_name);
2364                         unix_copy_addr(state->msg, skb->sk);
2365                         sunaddr = NULL;
2366                 }
2367
2368                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2369                 skb_get(skb);
2370                 chunk = state->recv_actor(skb, skip, chunk, state);
2371                 drop_skb = !unix_skb_len(skb);
2372                 /* skb is only safe to use if !drop_skb */
2373                 consume_skb(skb);
2374                 if (chunk < 0) {
2375                         if (copied == 0)
2376                                 copied = -EFAULT;
2377                         break;
2378                 }
2379                 copied += chunk;
2380                 size -= chunk;
2381
2382                 if (drop_skb) {
2383                         /* the skb was touched by a concurrent reader;
2384                          * we should not expect anything from this skb
2385                          * anymore and assume it invalid - we can be
2386                          * sure it was dropped from the socket queue
2387                          *
2388                          * let's report a short read
2389                          */
2390                         err = 0;
2391                         break;
2392                 }
2393
2394                 /* Mark read part of skb as used */
2395                 if (!(flags & MSG_PEEK)) {
2396                         UNIXCB(skb).consumed += chunk;
2397
2398                         sk_peek_offset_bwd(sk, chunk);
2399
2400                         if (UNIXCB(skb).fp)
2401                                 unix_detach_fds(&scm, skb);
2402
2403                         if (unix_skb_len(skb))
2404                                 break;
2405
2406                         skb_unlink(skb, &sk->sk_receive_queue);
2407                         consume_skb(skb);
2408
2409                         if (scm.fp)
2410                                 break;
2411                 } else {
2412                         /* It is questionable, see note in unix_dgram_recvmsg.
2413                          */
2414                         if (UNIXCB(skb).fp)
2415                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2416
2417                         sk_peek_offset_fwd(sk, chunk);
2418
2419                         if (UNIXCB(skb).fp)
2420                                 break;
2421
2422                         skip = 0;
2423                         last = skb;
2424                         last_len = skb->len;
2425                         unix_state_lock(sk);
2426                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2427                         if (skb)
2428                                 goto again;
2429                         unix_state_unlock(sk);
2430                         break;
2431                 }
2432         } while (size);
2433
2434         mutex_unlock(&u->iolock);
2435         if (state->msg)
2436                 scm_recv(sock, state->msg, &scm, flags);
2437         else
2438                 scm_destroy(&scm);
2439 out:
2440         return copied ? : err;
2441 }
2442
2443 static int unix_stream_read_actor(struct sk_buff *skb,
2444                                   int skip, int chunk,
2445                                   struct unix_stream_read_state *state)
2446 {
2447         int ret;
2448
2449         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2450                                     state->msg, chunk);
2451         return ret ?: chunk;
2452 }
2453
2454 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2455                                size_t size, int flags)
2456 {
2457         struct unix_stream_read_state state = {
2458                 .recv_actor = unix_stream_read_actor,
2459                 .socket = sock,
2460                 .msg = msg,
2461                 .size = size,
2462                 .flags = flags
2463         };
2464
2465         return unix_stream_read_generic(&state, true);
2466 }
2467
2468 static int unix_stream_splice_actor(struct sk_buff *skb,
2469                                     int skip, int chunk,
2470                                     struct unix_stream_read_state *state)
2471 {
2472         return skb_splice_bits(skb, state->socket->sk,
2473                                UNIXCB(skb).consumed + skip,
2474                                state->pipe, chunk, state->splice_flags);
2475 }
2476
2477 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2478                                        struct pipe_inode_info *pipe,
2479                                        size_t size, unsigned int flags)
2480 {
2481         struct unix_stream_read_state state = {
2482                 .recv_actor = unix_stream_splice_actor,
2483                 .socket = sock,
2484                 .pipe = pipe,
2485                 .size = size,
2486                 .splice_flags = flags,
2487         };
2488
2489         if (unlikely(*ppos))
2490                 return -ESPIPE;
2491
2492         if (sock->file->f_flags & O_NONBLOCK ||
2493             flags & SPLICE_F_NONBLOCK)
2494                 state.flags = MSG_DONTWAIT;
2495
2496         return unix_stream_read_generic(&state, false);
2497 }
2498
2499 static int unix_shutdown(struct socket *sock, int mode)
2500 {
2501         struct sock *sk = sock->sk;
2502         struct sock *other;
2503
2504         if (mode < SHUT_RD || mode > SHUT_RDWR)
2505                 return -EINVAL;
2506         /* This maps:
2507          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2508          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2509          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2510          */
2511         ++mode;
2512
2513         unix_state_lock(sk);
2514         sk->sk_shutdown |= mode;
2515         other = unix_peer(sk);
2516         if (other)
2517                 sock_hold(other);
2518         unix_state_unlock(sk);
2519         sk->sk_state_change(sk);
2520
2521         if (other &&
2522                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2523
2524                 int peer_mode = 0;
2525
2526                 if (mode&RCV_SHUTDOWN)
2527                         peer_mode |= SEND_SHUTDOWN;
2528                 if (mode&SEND_SHUTDOWN)
2529                         peer_mode |= RCV_SHUTDOWN;
2530                 unix_state_lock(other);
2531                 other->sk_shutdown |= peer_mode;
2532                 unix_state_unlock(other);
2533                 other->sk_state_change(other);
2534                 if (peer_mode == SHUTDOWN_MASK)
2535                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2536                 else if (peer_mode & RCV_SHUTDOWN)
2537                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2538         }
2539         if (other)
2540                 sock_put(other);
2541
2542         return 0;
2543 }
2544
2545 long unix_inq_len(struct sock *sk)
2546 {
2547         struct sk_buff *skb;
2548         long amount = 0;
2549
2550         if (sk->sk_state == TCP_LISTEN)
2551                 return -EINVAL;
2552
2553         spin_lock(&sk->sk_receive_queue.lock);
2554         if (sk->sk_type == SOCK_STREAM ||
2555             sk->sk_type == SOCK_SEQPACKET) {
2556                 skb_queue_walk(&sk->sk_receive_queue, skb)
2557                         amount += unix_skb_len(skb);
2558         } else {
2559                 skb = skb_peek(&sk->sk_receive_queue);
2560                 if (skb)
2561                         amount = skb->len;
2562         }
2563         spin_unlock(&sk->sk_receive_queue.lock);
2564
2565         return amount;
2566 }
2567 EXPORT_SYMBOL_GPL(unix_inq_len);
2568
2569 long unix_outq_len(struct sock *sk)
2570 {
2571         return sk_wmem_alloc_get(sk);
2572 }
2573 EXPORT_SYMBOL_GPL(unix_outq_len);
2574
2575 static int unix_open_file(struct sock *sk)
2576 {
2577         struct path path;
2578         struct file *f;
2579         int fd;
2580
2581         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2582                 return -EPERM;
2583
2584         unix_state_lock(sk);
2585         path = unix_sk(sk)->path;
2586         if (!path.dentry) {
2587                 unix_state_unlock(sk);
2588                 return -ENOENT;
2589         }
2590
2591         path_get(&path);
2592         unix_state_unlock(sk);
2593
2594         fd = get_unused_fd_flags(O_CLOEXEC);
2595         if (fd < 0)
2596                 goto out;
2597
2598         f = dentry_open(&path, O_PATH, current_cred());
2599         if (IS_ERR(f)) {
2600                 put_unused_fd(fd);
2601                 fd = PTR_ERR(f);
2602                 goto out;
2603         }
2604
2605         fd_install(fd, f);
2606 out:
2607         path_put(&path);
2608
2609         return fd;
2610 }
2611
2612 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2613 {
2614         struct sock *sk = sock->sk;
2615         long amount = 0;
2616         int err;
2617
2618         switch (cmd) {
2619         case SIOCOUTQ:
2620                 amount = unix_outq_len(sk);
2621                 err = put_user(amount, (int __user *)arg);
2622                 break;
2623         case SIOCINQ:
2624                 amount = unix_inq_len(sk);
2625                 if (amount < 0)
2626                         err = amount;
2627                 else
2628                         err = put_user(amount, (int __user *)arg);
2629                 break;
2630         case SIOCUNIXFILE:
2631                 err = unix_open_file(sk);
2632                 break;
2633         default:
2634                 err = -ENOIOCTLCMD;
2635                 break;
2636         }
2637         return err;
2638 }
2639
2640 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2641 {
2642         struct sock *sk = sock->sk;
2643         __poll_t mask;
2644
2645         sock_poll_wait(file, sock, wait);
2646         mask = 0;
2647
2648         /* exceptional events? */
2649         if (sk->sk_err)
2650                 mask |= EPOLLERR;
2651         if (sk->sk_shutdown == SHUTDOWN_MASK)
2652                 mask |= EPOLLHUP;
2653         if (sk->sk_shutdown & RCV_SHUTDOWN)
2654                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2655
2656         /* readable? */
2657         if (!skb_queue_empty(&sk->sk_receive_queue))
2658                 mask |= EPOLLIN | EPOLLRDNORM;
2659
2660         /* Connection-based need to check for termination and startup */
2661         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2662             sk->sk_state == TCP_CLOSE)
2663                 mask |= EPOLLHUP;
2664
2665         /*
2666          * we set writable also when the other side has shut down the
2667          * connection. This prevents stuck sockets.
2668          */
2669         if (unix_writable(sk))
2670                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2671
2672         return mask;
2673 }
2674
2675 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2676                                     poll_table *wait)
2677 {
2678         struct sock *sk = sock->sk, *other;
2679         unsigned int writable;
2680         __poll_t mask;
2681
2682         sock_poll_wait(file, sock, wait);
2683         mask = 0;
2684
2685         /* exceptional events? */
2686         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2687                 mask |= EPOLLERR |
2688                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2689
2690         if (sk->sk_shutdown & RCV_SHUTDOWN)
2691                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2692         if (sk->sk_shutdown == SHUTDOWN_MASK)
2693                 mask |= EPOLLHUP;
2694
2695         /* readable? */
2696         if (!skb_queue_empty(&sk->sk_receive_queue))
2697                 mask |= EPOLLIN | EPOLLRDNORM;
2698
2699         /* Connection-based need to check for termination and startup */
2700         if (sk->sk_type == SOCK_SEQPACKET) {
2701                 if (sk->sk_state == TCP_CLOSE)
2702                         mask |= EPOLLHUP;
2703                 /* connection hasn't started yet? */
2704                 if (sk->sk_state == TCP_SYN_SENT)
2705                         return mask;
2706         }
2707
2708         /* No write status requested, avoid expensive OUT tests. */
2709         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2710                 return mask;
2711
2712         writable = unix_writable(sk);
2713         if (writable) {
2714                 unix_state_lock(sk);
2715
2716                 other = unix_peer(sk);
2717                 if (other && unix_peer(other) != sk &&
2718                     unix_recvq_full(other) &&
2719                     unix_dgram_peer_wake_me(sk, other))
2720                         writable = 0;
2721
2722                 unix_state_unlock(sk);
2723         }
2724
2725         if (writable)
2726                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2727         else
2728                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2729
2730         return mask;
2731 }
2732
2733 #ifdef CONFIG_PROC_FS
2734
2735 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2736
2737 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2738 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2739 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2740
2741 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2742 {
2743         unsigned long offset = get_offset(*pos);
2744         unsigned long bucket = get_bucket(*pos);
2745         struct sock *sk;
2746         unsigned long count = 0;
2747
2748         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2749                 if (sock_net(sk) != seq_file_net(seq))
2750                         continue;
2751                 if (++count == offset)
2752                         break;
2753         }
2754
2755         return sk;
2756 }
2757
2758 static struct sock *unix_next_socket(struct seq_file *seq,
2759                                      struct sock *sk,
2760                                      loff_t *pos)
2761 {
2762         unsigned long bucket;
2763
2764         while (sk > (struct sock *)SEQ_START_TOKEN) {
2765                 sk = sk_next(sk);
2766                 if (!sk)
2767                         goto next_bucket;
2768                 if (sock_net(sk) == seq_file_net(seq))
2769                         return sk;
2770         }
2771
2772         do {
2773                 sk = unix_from_bucket(seq, pos);
2774                 if (sk)
2775                         return sk;
2776
2777 next_bucket:
2778                 bucket = get_bucket(*pos) + 1;
2779                 *pos = set_bucket_offset(bucket, 1);
2780         } while (bucket < ARRAY_SIZE(unix_socket_table));
2781
2782         return NULL;
2783 }
2784
2785 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2786         __acquires(unix_table_lock)
2787 {
2788         spin_lock(&unix_table_lock);
2789
2790         if (!*pos)
2791                 return SEQ_START_TOKEN;
2792
2793         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2794                 return NULL;
2795
2796         return unix_next_socket(seq, NULL, pos);
2797 }
2798
2799 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2800 {
2801         ++*pos;
2802         return unix_next_socket(seq, v, pos);
2803 }
2804
2805 static void unix_seq_stop(struct seq_file *seq, void *v)
2806         __releases(unix_table_lock)
2807 {
2808         spin_unlock(&unix_table_lock);
2809 }
2810
2811 static int unix_seq_show(struct seq_file *seq, void *v)
2812 {
2813
2814         if (v == SEQ_START_TOKEN)
2815                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2816                          "Inode Path\n");
2817         else {
2818                 struct sock *s = v;
2819                 struct unix_sock *u = unix_sk(s);
2820                 unix_state_lock(s);
2821
2822                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2823                         s,
2824                         refcount_read(&s->sk_refcnt),
2825                         0,
2826                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2827                         s->sk_type,
2828                         s->sk_socket ?
2829                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2830                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2831                         sock_i_ino(s));
2832
2833                 if (u->addr) {
2834                         int i, len;
2835                         seq_putc(seq, ' ');
2836
2837                         i = 0;
2838                         len = u->addr->len - sizeof(short);
2839                         if (!UNIX_ABSTRACT(s))
2840                                 len--;
2841                         else {
2842                                 seq_putc(seq, '@');
2843                                 i++;
2844                         }
2845                         for ( ; i < len; i++)
2846                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2847                                          '@');
2848                 }
2849                 unix_state_unlock(s);
2850                 seq_putc(seq, '\n');
2851         }
2852
2853         return 0;
2854 }
2855
2856 static const struct seq_operations unix_seq_ops = {
2857         .start  = unix_seq_start,
2858         .next   = unix_seq_next,
2859         .stop   = unix_seq_stop,
2860         .show   = unix_seq_show,
2861 };
2862 #endif
2863
2864 static const struct net_proto_family unix_family_ops = {
2865         .family = PF_UNIX,
2866         .create = unix_create,
2867         .owner  = THIS_MODULE,
2868 };
2869
2870
2871 static int __net_init unix_net_init(struct net *net)
2872 {
2873         int error = -ENOMEM;
2874
2875         net->unx.sysctl_max_dgram_qlen = 10;
2876         if (unix_sysctl_register(net))
2877                 goto out;
2878
2879 #ifdef CONFIG_PROC_FS
2880         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2881                         sizeof(struct seq_net_private))) {
2882                 unix_sysctl_unregister(net);
2883                 goto out;
2884         }
2885 #endif
2886         error = 0;
2887 out:
2888         return error;
2889 }
2890
2891 static void __net_exit unix_net_exit(struct net *net)
2892 {
2893         unix_sysctl_unregister(net);
2894         remove_proc_entry("unix", net->proc_net);
2895 }
2896
2897 static struct pernet_operations unix_net_ops = {
2898         .init = unix_net_init,
2899         .exit = unix_net_exit,
2900 };
2901
2902 static int __init af_unix_init(void)
2903 {
2904         int rc = -1;
2905
2906         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2907
2908         rc = proto_register(&unix_proto, 1);
2909         if (rc != 0) {
2910                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2911                 goto out;
2912         }
2913
2914         sock_register(&unix_family_ops);
2915         register_pernet_subsys(&unix_net_ops);
2916 out:
2917         return rc;
2918 }
2919
2920 static void __exit af_unix_exit(void)
2921 {
2922         sock_unregister(PF_UNIX);
2923         proto_unregister(&unix_proto);
2924         unregister_pernet_subsys(&unix_net_ops);
2925 }
2926
2927 /* Earlier than device_initcall() so that other drivers invoking
2928    request_module() don't end up in a loop when modprobe tries
2929    to use a UNIX socket. But later than subsys_initcall() because
2930    we depend on stuff initialised there */
2931 fs_initcall(af_unix_init);
2932 module_exit(af_unix_exit);
2933
2934 MODULE_LICENSE("GPL");
2935 MODULE_ALIAS_NETPROTO(PF_UNIX);