net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <[email protected]>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Fixes:
  12  *              Linus Torvalds  :       Assorted bug cures.
  13  *              Niibe Yutaka    :       async I/O support.
  14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15  *              Alan Cox        :       Limit size of allocated blocks.
  16  *              Alan Cox        :       Fixed the stupid socketpair bug.
  17  *              Alan Cox        :       BSD compatibility fine tuning.
  18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19  *              Alan Cox        :       Sorted out a proper draft version of
  20  *                                      file descriptor passing hacked up from
  21  *                                      Mike Shaver's work.
  22  *              Marty Leisner   :       Fixes to fd passing
  23  *              Nick Nevin      :       recvmsg bugfix.
  24  *              Alan Cox        :       Started proper garbage collector
  25  *              Heiko EiBfeldt  :       Missing verify_area check
  26  *              Alan Cox        :       Started POSIXisms
  27  *              Andreas Schwab  :       Replace inode by dentry for proper
  28  *                                      reference counting
  29  *              Kirk Petersen   :       Made this a module
  30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31  *                                      Lots of bug fixes.
  32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33  *                                      by above two patches.
  34  *           Andrea Arcangeli   :       If possible we block in connect(2)
  35  *                                      if the max backlog of the listen socket
  36  *                                      is been reached. This won't break
  37  *                                      old apps and it will avoid huge amount
  38  *                                      of socks hashed (this for unix_gc()
  39  *                                      performances reasons).
  40  *                                      Security fix that limits the max
  41  *                                      number of socks to 2*max_files and
  42  *                                      the number of skb queueable in the
  43  *                                      dgram receiver.
  44  *              Artur Skawina   :       Hash function optimizations
  45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46  *            Malcolm Beattie   :       Set peercred for socketpair
  47  *           Michal Ostrowski   :       Module initialization cleanup.
  48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49  *                                      the core infrastructure is doing that
  50  *                                      for all net proto families now (2.5.69+)
  51  *
  52  *
  53  * Known differences from reference BSD that was tested:
  54  *
  55  *      [TO FIX]
  56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57  *              other the moment one end closes.
  58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60  *      [NOT TO FIX]
  61  *      accept() returns a path name even if the connecting socket has closed
  62  *              in the meantime (BSD loses the path and gives up).
  63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66  *      BSD af_unix apparently has connect forgetting to block properly.
  67  *              (need to check this with the POSIX spec in detail)
  68  *
  69  * Differences from 2.0.0-11-... (ANK)
  70  *      Bug fixes and improvements.
  71  *              - client shutdown killed server socket.
  72  *              - removed all useless cli/sti pairs.
  73  *
  74  *      Semantic changes/extensions.
  75  *              - generic control message passing.
  76  *              - SCM_CREDENTIALS control message.
  77  *              - "Abstract" (not FS based) socket bindings.
  78  *                Abstract names are sequences of bytes (not zero terminated)
  79  *                started by 0, so that this name space does not intersect
  80  *                with BSD names.
  81  */
  82
  83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  84
  85 #include <linux/module.h>
  86 #include <linux/kernel.h>
  87 #include <linux/signal.h>
  88 #include <linux/sched/signal.h>
  89 #include <linux/errno.h>
  90 #include <linux/string.h>
  91 #include <linux/stat.h>
  92 #include <linux/dcache.h>
  93 #include <linux/namei.h>
  94 #include <linux/socket.h>
  95 #include <linux/un.h>
  96 #include <linux/fcntl.h>
  97 #include <linux/termios.h>
  98 #include <linux/sockios.h>
  99 #include <linux/net.h>
 100 #include <linux/in.h>
 101 #include <linux/fs.h>
 102 #include <linux/slab.h>
 103 #include <linux/uaccess.h>
 104 #include <linux/skbuff.h>
 105 #include <linux/netdevice.h>
 106 #include <net/net_namespace.h>
 107 #include <net/sock.h>
 108 #include <net/tcp_states.h>
 109 #include <net/af_unix.h>
 110 #include <linux/proc_fs.h>
 111 #include <linux/seq_file.h>
 112 #include <net/scm.h>
 113 #include <linux/init.h>
 114 #include <linux/poll.h>
 115 #include <linux/rtnetlink.h>
 116 #include <linux/mount.h>
 117 #include <net/checksum.h>
 118 #include <linux/security.h>
 119 #include <linux/freezer.h>
 120 #include <linux/file.h>
 121
 122 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 123 EXPORT_SYMBOL_GPL(unix_socket_table);
 124 DEFINE_SPINLOCK(unix_table_lock);
 125 EXPORT_SYMBOL_GPL(unix_table_lock);
 126 static atomic_long_t unix_nr_socks;
 127
 128
 129 static struct hlist_head *unix_sockets_unbound(void *addr)
 130 {
 131         unsigned long hash = (unsigned long)addr;
 132
 133         hash ^= hash >> 16;
 134         hash ^= hash >> 8;
 135         hash %= UNIX_HASH_SIZE;
 136         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 137 }
 138
 139 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 140
 141 #ifdef CONFIG_SECURITY_NETWORK
 142 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 143 {
 144         UNIXCB(skb).secid = scm->secid;
 145 }
 146
 147 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 148 {
 149         scm->secid = UNIXCB(skb).secid;
 150 }
 151
 152 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 153 {
 154         return (scm->secid == UNIXCB(skb).secid);
 155 }
 156 #else
 157 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158 { }
 159
 160 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 161 { }
 162
 163 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 164 {
 165         return true;
 166 }
 167 #endif /* CONFIG_SECURITY_NETWORK */
 168
 169 /*
 170  *  SMP locking strategy:
 171  *    hash table is protected with spinlock unix_table_lock
 172  *    each socket state is protected by separate spin lock.
 173  */
 174
 175 static inline unsigned int unix_hash_fold(__wsum n)
 176 {
 177         unsigned int hash = (__force unsigned int)csum_fold(n);
 178
 179         hash ^= hash>>8;
 180         return hash&(UNIX_HASH_SIZE-1);
 181 }
 182
 183 #define unix_peer(sk) (unix_sk(sk)->peer)
 184
 185 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 186 {
 187         return unix_peer(osk) == sk;
 188 }
 189
 190 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 191 {
 192         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 193 }
 194
 195 static inline int unix_recvq_full(struct sock const *sk)
 196 {
 197         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 198 }
 199
 200 struct sock *unix_peer_get(struct sock *s)
 201 {
 202         struct sock *peer;
 203
 204         unix_state_lock(s);
 205         peer = unix_peer(s);
 206         if (peer)
 207                 sock_hold(peer);
 208         unix_state_unlock(s);
 209         return peer;
 210 }
 211 EXPORT_SYMBOL_GPL(unix_peer_get);
 212
 213 static inline void unix_release_addr(struct unix_address *addr)
 214 {
 215         if (refcount_dec_and_test(&addr->refcnt))
 216                 kfree(addr);
 217 }
 218
 219 /*
 220  *      Check unix socket name:
 221  *              - should be not zero length.
 222  *              - if started by not zero, should be NULL terminated (FS object)
 223  *              - if started by zero, it is abstract name.
 224  */
 225
 226 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 227 {
 228         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 229                 return -EINVAL;
 230         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 231                 return -EINVAL;
 232         if (sunaddr->sun_path[0]) {
 233                 /*
 234                  * This may look like an off by one error but it is a bit more
 235                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 236                  * sun_path[108] doesn't as such exist.  However in kernel space
 237                  * we are guaranteed that it is a valid memory location in our
 238                  * kernel address buffer.
 239                  */
 240                 ((char *)sunaddr)[len] = 0;
 241                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 242                 return len;
 243         }
 244
 245         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 246         return len;
 247 }
 248
 249 static void __unix_remove_socket(struct sock *sk)
 250 {
 251         sk_del_node_init(sk);
 252 }
 253
 254 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 255 {
 256         WARN_ON(!sk_unhashed(sk));
 257         sk_add_node(sk, list);
 258 }
 259
 260 static inline void unix_remove_socket(struct sock *sk)
 261 {
 262         spin_lock(&unix_table_lock);
 263         __unix_remove_socket(sk);
 264         spin_unlock(&unix_table_lock);
 265 }
 266
 267 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 268 {
 269         spin_lock(&unix_table_lock);
 270         __unix_insert_socket(list, sk);
 271         spin_unlock(&unix_table_lock);
 272 }
 273
 274 static struct sock *__unix_find_socket_byname(struct net *net,
 275                                               struct sockaddr_un *sunname,
 276                                               int len, int type, unsigned int hash)
 277 {
 278         struct sock *s;
 279
 280         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 281                 struct unix_sock *u = unix_sk(s);
 282
 283                 if (!net_eq(sock_net(s), net))
 284                         continue;
 285
 286                 if (u->addr->len == len &&
 287                     !memcmp(u->addr->name, sunname, len))
 288                         goto found;
 289         }
 290         s = NULL;
 291 found:
 292         return s;
 293 }
 294
 295 static inline struct sock *unix_find_socket_byname(struct net *net,
 296                                                    struct sockaddr_un *sunname,
 297                                                    int len, int type,
 298                                                    unsigned int hash)
 299 {
 300         struct sock *s;
 301
 302         spin_lock(&unix_table_lock);
 303         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 304         if (s)
 305                 sock_hold(s);
 306         spin_unlock(&unix_table_lock);
 307         return s;
 308 }
 309
 310 static struct sock *unix_find_socket_byinode(struct inode *i)
 311 {
 312         struct sock *s;
 313
 314         spin_lock(&unix_table_lock);
 315         sk_for_each(s,
 316                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 317                 struct dentry *dentry = unix_sk(s)->path.dentry;
 318
 319                 if (dentry && d_backing_inode(dentry) == i) {
 320                         sock_hold(s);
 321                         goto found;
 322                 }
 323         }
 324         s = NULL;
 325 found:
 326         spin_unlock(&unix_table_lock);
 327         return s;
 328 }
 329
 330 /* Support code for asymmetrically connected dgram sockets
 331  *
 332  * If a datagram socket is connected to a socket not itself connected
 333  * to the first socket (eg, /dev/log), clients may only enqueue more
 334  * messages if the present receive queue of the server socket is not
 335  * "too large". This means there's a second writeability condition
 336  * poll and sendmsg need to test. The dgram recv code will do a wake
 337  * up on the peer_wait wait queue of a socket upon reception of a
 338  * datagram which needs to be propagated to sleeping would-be writers
 339  * since these might not have sent anything so far. This can't be
 340  * accomplished via poll_wait because the lifetime of the server
 341  * socket might be less than that of its clients if these break their
 342  * association with it or if the server socket is closed while clients
 343  * are still connected to it and there's no way to inform "a polling
 344  * implementation" that it should let go of a certain wait queue
 345  *
 346  * In order to propagate a wake up, a wait_queue_entry_t of the client
 347  * socket is enqueued on the peer_wait queue of the server socket
 348  * whose wake function does a wake_up on the ordinary client socket
 349  * wait queue. This connection is established whenever a write (or
 350  * poll for write) hit the flow control condition and broken when the
 351  * association to the server socket is dissolved or after a wake up
 352  * was relayed.
 353  */
 354
 355 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 356                                       void *key)
 357 {
 358         struct unix_sock *u;
 359         wait_queue_head_t *u_sleep;
 360
 361         u = container_of(q, struct unix_sock, peer_wake);
 362
 363         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 364                             q);
 365         u->peer_wake.private = NULL;
 366
 367         /* relaying can only happen while the wq still exists */
 368         u_sleep = sk_sleep(&u->sk);
 369         if (u_sleep)
 370                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 371
 372         return 0;
 373 }
 374
 375 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 376 {
 377         struct unix_sock *u, *u_other;
 378         int rc;
 379
 380         u = unix_sk(sk);
 381         u_other = unix_sk(other);
 382         rc = 0;
 383         spin_lock(&u_other->peer_wait.lock);
 384
 385         if (!u->peer_wake.private) {
 386                 u->peer_wake.private = other;
 387                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 388
 389                 rc = 1;
 390         }
 391
 392         spin_unlock(&u_other->peer_wait.lock);
 393         return rc;
 394 }
 395
 396 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 397                                             struct sock *other)
 398 {
 399         struct unix_sock *u, *u_other;
 400
 401         u = unix_sk(sk);
 402         u_other = unix_sk(other);
 403         spin_lock(&u_other->peer_wait.lock);
 404
 405         if (u->peer_wake.private == other) {
 406                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 407                 u->peer_wake.private = NULL;
 408         }
 409
 410         spin_unlock(&u_other->peer_wait.lock);
 411 }
 412
 413 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 414                                                    struct sock *other)
 415 {
 416         unix_dgram_peer_wake_disconnect(sk, other);
 417         wake_up_interruptible_poll(sk_sleep(sk),
 418                                    EPOLLOUT |
 419                                    EPOLLWRNORM |
 420                                    EPOLLWRBAND);
 421 }
 422
 423 /* preconditions:
 424  *      - unix_peer(sk) == other
 425  *      - association is stable
 426  */
 427 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 428 {
 429         int connected;
 430
 431         connected = unix_dgram_peer_wake_connect(sk, other);
 432
 433         if (unix_recvq_full(other))
 434                 return 1;
 435
 436         if (connected)
 437                 unix_dgram_peer_wake_disconnect(sk, other);
 438
 439         return 0;
 440 }
 441
 442 static int unix_writable(const struct sock *sk)
 443 {
 444         return sk->sk_state != TCP_LISTEN &&
 445                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 446 }
 447
 448 static void unix_write_space(struct sock *sk)
 449 {
 450         struct socket_wq *wq;
 451
 452         rcu_read_lock();
 453         if (unix_writable(sk)) {
 454                 wq = rcu_dereference(sk->sk_wq);
 455                 if (skwq_has_sleeper(wq))
 456                         wake_up_interruptible_sync_poll(&wq->wait,
 457                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 458                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 459         }
 460         rcu_read_unlock();
 461 }
 462
 463 /* When dgram socket disconnects (or changes its peer), we clear its receive
 464  * queue of packets arrived from previous peer. First, it allows to do
 465  * flow control based only on wmem_alloc; second, sk connected to peer
 466  * may receive messages only from that peer. */
 467 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 468 {
 469         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 470                 skb_queue_purge(&sk->sk_receive_queue);
 471                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 472
 473                 /* If one link of bidirectional dgram pipe is disconnected,
 474                  * we signal error. Messages are lost. Do not make this,
 475                  * when peer was not connected to us.
 476                  */
 477                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 478                         other->sk_err = ECONNRESET;
 479                         other->sk_error_report(other);
 480                 }
 481         }
 482 }
 483
 484 static void unix_sock_destructor(struct sock *sk)
 485 {
 486         struct unix_sock *u = unix_sk(sk);
 487
 488         skb_queue_purge(&sk->sk_receive_queue);
 489
 490         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 491         WARN_ON(!sk_unhashed(sk));
 492         WARN_ON(sk->sk_socket);
 493         if (!sock_flag(sk, SOCK_DEAD)) {
 494                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 495                 return;
 496         }
 497
 498         if (u->addr)
 499                 unix_release_addr(u->addr);
 500
 501         atomic_long_dec(&unix_nr_socks);
 502         local_bh_disable();
 503         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 504         local_bh_enable();
 505 #ifdef UNIX_REFCNT_DEBUG
 506         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 507                 atomic_long_read(&unix_nr_socks));
 508 #endif
 509 }
 510
 511 static void unix_release_sock(struct sock *sk, int embrion)
 512 {
 513         struct unix_sock *u = unix_sk(sk);
 514         struct path path;
 515         struct sock *skpair;
 516         struct sk_buff *skb;
 517         int state;
 518
 519         unix_remove_socket(sk);
 520
 521         /* Clear state */
 522         unix_state_lock(sk);
 523         sock_orphan(sk);
 524         sk->sk_shutdown = SHUTDOWN_MASK;
 525         path         = u->path;
 526         u->path.dentry = NULL;
 527         u->path.mnt = NULL;
 528         state = sk->sk_state;
 529         sk->sk_state = TCP_CLOSE;
 530         unix_state_unlock(sk);
 531
 532         wake_up_interruptible_all(&u->peer_wait);
 533
 534         skpair = unix_peer(sk);
 535
 536         if (skpair != NULL) {
 537                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 538                         unix_state_lock(skpair);
 539                         /* No more writes */
 540                         skpair->sk_shutdown = SHUTDOWN_MASK;
 541                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 542                                 skpair->sk_err = ECONNRESET;
 543                         unix_state_unlock(skpair);
 544                         skpair->sk_state_change(skpair);
 545                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 546                 }
 547
 548                 unix_dgram_peer_wake_disconnect(sk, skpair);
 549                 sock_put(skpair); /* It may now die */
 550                 unix_peer(sk) = NULL;
 551         }
 552
 553         /* Try to flush out this socket. Throw out buffers at least */
 554
 555         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 556                 if (state == TCP_LISTEN)
 557                         unix_release_sock(skb->sk, 1);
 558                 /* passed fds are erased in the kfree_skb hook        */
 559                 UNIXCB(skb).consumed = skb->len;
 560                 kfree_skb(skb);
 561         }
 562
 563         if (path.dentry)
 564                 path_put(&path);
 565
 566         sock_put(sk);
 567
 568         /* ---- Socket is dead now and most probably destroyed ---- */
 569
 570         /*
 571          * Fixme: BSD difference: In BSD all sockets connected to us get
 572          *        ECONNRESET and we die on the spot. In Linux we behave
 573          *        like files and pipes do and wait for the last
 574          *        dereference.
 575          *
 576          * Can't we simply set sock->err?
 577          *
 578          *        What the above comment does talk about? --ANK(980817)
 579          */
 580
 581         if (unix_tot_inflight)
 582                 unix_gc();              /* Garbage collect fds */
 583 }
 584
 585 static void init_peercred(struct sock *sk)
 586 {
 587         put_pid(sk->sk_peer_pid);
 588         if (sk->sk_peer_cred)
 589                 put_cred(sk->sk_peer_cred);
 590         sk->sk_peer_pid  = get_pid(task_tgid(current));
 591         sk->sk_peer_cred = get_current_cred();
 592 }
 593
 594 static void copy_peercred(struct sock *sk, struct sock *peersk)
 595 {
 596         put_pid(sk->sk_peer_pid);
 597         if (sk->sk_peer_cred)
 598                 put_cred(sk->sk_peer_cred);
 599         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 600         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 601 }
 602
 603 static int unix_listen(struct socket *sock, int backlog)
 604 {
 605         int err;
 606         struct sock *sk = sock->sk;
 607         struct unix_sock *u = unix_sk(sk);
 608         struct pid *old_pid = NULL;
 609
 610         err = -EOPNOTSUPP;
 611         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 612                 goto out;       /* Only stream/seqpacket sockets accept */
 613         err = -EINVAL;
 614         if (!u->addr)
 615                 goto out;       /* No listens on an unbound socket */
 616         unix_state_lock(sk);
 617         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 618                 goto out_unlock;
 619         if (backlog > sk->sk_max_ack_backlog)
 620                 wake_up_interruptible_all(&u->peer_wait);
 621         sk->sk_max_ack_backlog  = backlog;
 622         sk->sk_state            = TCP_LISTEN;
 623         /* set credentials so connect can copy them */
 624         init_peercred(sk);
 625         err = 0;
 626
 627 out_unlock:
 628         unix_state_unlock(sk);
 629         put_pid(old_pid);
 630 out:
 631         return err;
 632 }
 633
 634 static int unix_release(struct socket *);
 635 static int unix_bind(struct socket *, struct sockaddr *, int);
 636 static int unix_stream_connect(struct socket *, struct sockaddr *,
 637                                int addr_len, int flags);
 638 static int unix_socketpair(struct socket *, struct socket *);
 639 static int unix_accept(struct socket *, struct socket *, int, bool);
 640 static int unix_getname(struct socket *, struct sockaddr *, int);
 641 static __poll_t unix_poll_mask(struct socket *, __poll_t);
 642 static __poll_t unix_dgram_poll_mask(struct socket *, __poll_t);
 643 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 644 static int unix_shutdown(struct socket *, int);
 645 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 646 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 647 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 648                                     size_t size, int flags);
 649 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 650                                        struct pipe_inode_info *, size_t size,
 651                                        unsigned int flags);
 652 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 653 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 654 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 655                               int, int);
 656 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 657 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 658                                   int);
 659
 660 static int unix_set_peek_off(struct sock *sk, int val)
 661 {
 662         struct unix_sock *u = unix_sk(sk);
 663
 664         if (mutex_lock_interruptible(&u->iolock))
 665                 return -EINTR;
 666
 667         sk->sk_peek_off = val;
 668         mutex_unlock(&u->iolock);
 669
 670         return 0;
 671 }
 672
 673
 674 static const struct proto_ops unix_stream_ops = {
 675         .family =       PF_UNIX,
 676         .owner =        THIS_MODULE,
 677         .release =      unix_release,
 678         .bind =         unix_bind,
 679         .connect =      unix_stream_connect,
 680         .socketpair =   unix_socketpair,
 681         .accept =       unix_accept,
 682         .getname =      unix_getname,
 683         .poll_mask =    unix_poll_mask,
 684         .ioctl =        unix_ioctl,
 685         .listen =       unix_listen,
 686         .shutdown =     unix_shutdown,
 687         .setsockopt =   sock_no_setsockopt,
 688         .getsockopt =   sock_no_getsockopt,
 689         .sendmsg =      unix_stream_sendmsg,
 690         .recvmsg =      unix_stream_recvmsg,
 691         .mmap =         sock_no_mmap,
 692         .sendpage =     unix_stream_sendpage,
 693         .splice_read =  unix_stream_splice_read,
 694         .set_peek_off = unix_set_peek_off,
 695 };
 696
 697 static const struct proto_ops unix_dgram_ops = {
 698         .family =       PF_UNIX,
 699         .owner =        THIS_MODULE,
 700         .release =      unix_release,
 701         .bind =         unix_bind,
 702         .connect =      unix_dgram_connect,
 703         .socketpair =   unix_socketpair,
 704         .accept =       sock_no_accept,
 705         .getname =      unix_getname,
 706         .poll_mask =    unix_dgram_poll_mask,
 707         .ioctl =        unix_ioctl,
 708         .listen =       sock_no_listen,
 709         .shutdown =     unix_shutdown,
 710         .setsockopt =   sock_no_setsockopt,
 711         .getsockopt =   sock_no_getsockopt,
 712         .sendmsg =      unix_dgram_sendmsg,
 713         .recvmsg =      unix_dgram_recvmsg,
 714         .mmap =         sock_no_mmap,
 715         .sendpage =     sock_no_sendpage,
 716         .set_peek_off = unix_set_peek_off,
 717 };
 718
 719 static const struct proto_ops unix_seqpacket_ops = {
 720         .family =       PF_UNIX,
 721         .owner =        THIS_MODULE,
 722         .release =      unix_release,
 723         .bind =         unix_bind,
 724         .connect =      unix_stream_connect,
 725         .socketpair =   unix_socketpair,
 726         .accept =       unix_accept,
 727         .getname =      unix_getname,
 728         .poll_mask =    unix_dgram_poll_mask,
 729         .ioctl =        unix_ioctl,
 730         .listen =       unix_listen,
 731         .shutdown =     unix_shutdown,
 732         .setsockopt =   sock_no_setsockopt,
 733         .getsockopt =   sock_no_getsockopt,
 734         .sendmsg =      unix_seqpacket_sendmsg,
 735         .recvmsg =      unix_seqpacket_recvmsg,
 736         .mmap =         sock_no_mmap,
 737         .sendpage =     sock_no_sendpage,
 738         .set_peek_off = unix_set_peek_off,
 739 };
 740
 741 static struct proto unix_proto = {
 742         .name                   = "UNIX",
 743         .owner                  = THIS_MODULE,
 744         .obj_size               = sizeof(struct unix_sock),
 745 };
 746
 747 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 748 {
 749         struct sock *sk = NULL;
 750         struct unix_sock *u;
 751
 752         atomic_long_inc(&unix_nr_socks);
 753         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 754                 goto out;
 755
 756         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 757         if (!sk)
 758                 goto out;
 759
 760         sock_init_data(sock, sk);
 761
 762         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 763         sk->sk_write_space      = unix_write_space;
 764         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 765         sk->sk_destruct         = unix_sock_destructor;
 766         u         = unix_sk(sk);
 767         u->path.dentry = NULL;
 768         u->path.mnt = NULL;
 769         spin_lock_init(&u->lock);
 770         atomic_long_set(&u->inflight, 0);
 771         INIT_LIST_HEAD(&u->link);
 772         mutex_init(&u->iolock); /* single task reading lock */
 773         mutex_init(&u->bindlock); /* single task binding lock */
 774         init_waitqueue_head(&u->peer_wait);
 775         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 776         unix_insert_socket(unix_sockets_unbound(sk), sk);
 777 out:
 778         if (sk == NULL)
 779                 atomic_long_dec(&unix_nr_socks);
 780         else {
 781                 local_bh_disable();
 782                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 783                 local_bh_enable();
 784         }
 785         return sk;
 786 }
 787
 788 static int unix_create(struct net *net, struct socket *sock, int protocol,
 789                        int kern)
 790 {
 791         if (protocol && protocol != PF_UNIX)
 792                 return -EPROTONOSUPPORT;
 793
 794         sock->state = SS_UNCONNECTED;
 795
 796         switch (sock->type) {
 797         case SOCK_STREAM:
 798                 sock->ops = &unix_stream_ops;
 799                 break;
 800                 /*
 801                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 802                  *      nothing uses it.
 803                  */
 804         case SOCK_RAW:
 805                 sock->type = SOCK_DGRAM;
 806                 /* fall through */
 807         case SOCK_DGRAM:
 808                 sock->ops = &unix_dgram_ops;
 809                 break;
 810         case SOCK_SEQPACKET:
 811                 sock->ops = &unix_seqpacket_ops;
 812                 break;
 813         default:
 814                 return -ESOCKTNOSUPPORT;
 815         }
 816
 817         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 818 }
 819
 820 static int unix_release(struct socket *sock)
 821 {
 822         struct sock *sk = sock->sk;
 823
 824         if (!sk)
 825                 return 0;
 826
 827         unix_release_sock(sk, 0);
 828         sock->sk = NULL;
 829
 830         return 0;
 831 }
 832
 833 static int unix_autobind(struct socket *sock)
 834 {
 835         struct sock *sk = sock->sk;
 836         struct net *net = sock_net(sk);
 837         struct unix_sock *u = unix_sk(sk);
 838         static u32 ordernum = 1;
 839         struct unix_address *addr;
 840         int err;
 841         unsigned int retries = 0;
 842
 843         err = mutex_lock_interruptible(&u->bindlock);
 844         if (err)
 845                 return err;
 846
 847         err = 0;
 848         if (u->addr)
 849                 goto out;
 850
 851         err = -ENOMEM;
 852         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 853         if (!addr)
 854                 goto out;
 855
 856         addr->name->sun_family = AF_UNIX;
 857         refcount_set(&addr->refcnt, 1);
 858
 859 retry:
 860         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 861         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 862
 863         spin_lock(&unix_table_lock);
 864         ordernum = (ordernum+1)&0xFFFFF;
 865
 866         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 867                                       addr->hash)) {
 868                 spin_unlock(&unix_table_lock);
 869                 /*
 870                  * __unix_find_socket_byname() may take long time if many names
 871                  * are already in use.
 872                  */
 873                 cond_resched();
 874                 /* Give up if all names seems to be in use. */
 875                 if (retries++ == 0xFFFFF) {
 876                         err = -ENOSPC;
 877                         kfree(addr);
 878                         goto out;
 879                 }
 880                 goto retry;
 881         }
 882         addr->hash ^= sk->sk_type;
 883
 884         __unix_remove_socket(sk);
 885         u->addr = addr;
 886         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 887         spin_unlock(&unix_table_lock);
 888         err = 0;
 889
 890 out:    mutex_unlock(&u->bindlock);
 891         return err;
 892 }
 893
 894 static struct sock *unix_find_other(struct net *net,
 895                                     struct sockaddr_un *sunname, int len,
 896                                     int type, unsigned int hash, int *error)
 897 {
 898         struct sock *u;
 899         struct path path;
 900         int err = 0;
 901
 902         if (sunname->sun_path[0]) {
 903                 struct inode *inode;
 904                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 905                 if (err)
 906                         goto fail;
 907                 inode = d_backing_inode(path.dentry);
 908                 err = inode_permission(inode, MAY_WRITE);
 909                 if (err)
 910                         goto put_fail;
 911
 912                 err = -ECONNREFUSED;
 913                 if (!S_ISSOCK(inode->i_mode))
 914                         goto put_fail;
 915                 u = unix_find_socket_byinode(inode);
 916                 if (!u)
 917                         goto put_fail;
 918
 919                 if (u->sk_type == type)
 920                         touch_atime(&path);
 921
 922                 path_put(&path);
 923
 924                 err = -EPROTOTYPE;
 925                 if (u->sk_type != type) {
 926                         sock_put(u);
 927                         goto fail;
 928                 }
 929         } else {
 930                 err = -ECONNREFUSED;
 931                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 932                 if (u) {
 933                         struct dentry *dentry;
 934                         dentry = unix_sk(u)->path.dentry;
 935                         if (dentry)
 936                                 touch_atime(&unix_sk(u)->path);
 937                 } else
 938                         goto fail;
 939         }
 940         return u;
 941
 942 put_fail:
 943         path_put(&path);
 944 fail:
 945         *error = err;
 946         return NULL;
 947 }
 948
 949 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 950 {
 951         struct dentry *dentry;
 952         struct path path;
 953         int err = 0;
 954         /*
 955          * Get the parent directory, calculate the hash for last
 956          * component.
 957          */
 958         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 959         err = PTR_ERR(dentry);
 960         if (IS_ERR(dentry))
 961                 return err;
 962
 963         /*
 964          * All right, let's create it.
 965          */
 966         err = security_path_mknod(&path, dentry, mode, 0);
 967         if (!err) {
 968                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 969                 if (!err) {
 970                         res->mnt = mntget(path.mnt);
 971                         res->dentry = dget(dentry);
 972                 }
 973         }
 974         done_path_create(&path, dentry);
 975         return err;
 976 }
 977
 978 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 979 {
 980         struct sock *sk = sock->sk;
 981         struct net *net = sock_net(sk);
 982         struct unix_sock *u = unix_sk(sk);
 983         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 984         char *sun_path = sunaddr->sun_path;
 985         int err;
 986         unsigned int hash;
 987         struct unix_address *addr;
 988         struct hlist_head *list;
 989         struct path path = { };
 990
 991         err = -EINVAL;
 992         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
 993             sunaddr->sun_family != AF_UNIX)
 994                 goto out;
 995
 996         if (addr_len == sizeof(short)) {
 997                 err = unix_autobind(sock);
 998                 goto out;
 999         }
1000
1001         err = unix_mkname(sunaddr, addr_len, &hash);
1002         if (err < 0)
1003                 goto out;
1004         addr_len = err;
1005
1006         if (sun_path[0]) {
1007                 umode_t mode = S_IFSOCK |
1008                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1009                 err = unix_mknod(sun_path, mode, &path);
1010                 if (err) {
1011                         if (err == -EEXIST)
1012                                 err = -EADDRINUSE;
1013                         goto out;
1014                 }
1015         }
1016
1017         err = mutex_lock_interruptible(&u->bindlock);
1018         if (err)
1019                 goto out_put;
1020
1021         err = -EINVAL;
1022         if (u->addr)
1023                 goto out_up;
1024
1025         err = -ENOMEM;
1026         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1027         if (!addr)
1028                 goto out_up;
1029
1030         memcpy(addr->name, sunaddr, addr_len);
1031         addr->len = addr_len;
1032         addr->hash = hash ^ sk->sk_type;
1033         refcount_set(&addr->refcnt, 1);
1034
1035         if (sun_path[0]) {
1036                 addr->hash = UNIX_HASH_SIZE;
1037                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1038                 spin_lock(&unix_table_lock);
1039                 u->path = path;
1040                 list = &unix_socket_table[hash];
1041         } else {
1042                 spin_lock(&unix_table_lock);
1043                 err = -EADDRINUSE;
1044                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1045                                               sk->sk_type, hash)) {
1046                         unix_release_addr(addr);
1047                         goto out_unlock;
1048                 }
1049
1050                 list = &unix_socket_table[addr->hash];
1051         }
1052
1053         err = 0;
1054         __unix_remove_socket(sk);
1055         u->addr = addr;
1056         __unix_insert_socket(list, sk);
1057
1058 out_unlock:
1059         spin_unlock(&unix_table_lock);
1060 out_up:
1061         mutex_unlock(&u->bindlock);
1062 out_put:
1063         if (err)
1064                 path_put(&path);
1065 out:
1066         return err;
1067 }
1068
1069 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1070 {
1071         if (unlikely(sk1 == sk2) || !sk2) {
1072                 unix_state_lock(sk1);
1073                 return;
1074         }
1075         if (sk1 < sk2) {
1076                 unix_state_lock(sk1);
1077                 unix_state_lock_nested(sk2);
1078         } else {
1079                 unix_state_lock(sk2);
1080                 unix_state_lock_nested(sk1);
1081         }
1082 }
1083
1084 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1085 {
1086         if (unlikely(sk1 == sk2) || !sk2) {
1087                 unix_state_unlock(sk1);
1088                 return;
1089         }
1090         unix_state_unlock(sk1);
1091         unix_state_unlock(sk2);
1092 }
1093
1094 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1095                               int alen, int flags)
1096 {
1097         struct sock *sk = sock->sk;
1098         struct net *net = sock_net(sk);
1099         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1100         struct sock *other;
1101         unsigned int hash;
1102         int err;
1103
1104         err = -EINVAL;
1105         if (alen < offsetofend(struct sockaddr, sa_family))
1106                 goto out;
1107
1108         if (addr->sa_family != AF_UNSPEC) {
1109                 err = unix_mkname(sunaddr, alen, &hash);
1110                 if (err < 0)
1111                         goto out;
1112                 alen = err;
1113
1114                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1115                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1116                         goto out;
1117
1118 restart:
1119                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1120                 if (!other)
1121                         goto out;
1122
1123                 unix_state_double_lock(sk, other);
1124
1125                 /* Apparently VFS overslept socket death. Retry. */
1126                 if (sock_flag(other, SOCK_DEAD)) {
1127                         unix_state_double_unlock(sk, other);
1128                         sock_put(other);
1129                         goto restart;
1130                 }
1131
1132                 err = -EPERM;
1133                 if (!unix_may_send(sk, other))
1134                         goto out_unlock;
1135
1136                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1137                 if (err)
1138                         goto out_unlock;
1139
1140         } else {
1141                 /*
1142                  *      1003.1g breaking connected state with AF_UNSPEC
1143                  */
1144                 other = NULL;
1145                 unix_state_double_lock(sk, other);
1146         }
1147
1148         /*
1149          * If it was connected, reconnect.
1150          */
1151         if (unix_peer(sk)) {
1152                 struct sock *old_peer = unix_peer(sk);
1153                 unix_peer(sk) = other;
1154                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1155
1156                 unix_state_double_unlock(sk, other);
1157
1158                 if (other != old_peer)
1159                         unix_dgram_disconnected(sk, old_peer);
1160                 sock_put(old_peer);
1161         } else {
1162                 unix_peer(sk) = other;
1163                 unix_state_double_unlock(sk, other);
1164         }
1165         return 0;
1166
1167 out_unlock:
1168         unix_state_double_unlock(sk, other);
1169         sock_put(other);
1170 out:
1171         return err;
1172 }
1173
1174 static long unix_wait_for_peer(struct sock *other, long timeo)
1175 {
1176         struct unix_sock *u = unix_sk(other);
1177         int sched;
1178         DEFINE_WAIT(wait);
1179
1180         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1181
1182         sched = !sock_flag(other, SOCK_DEAD) &&
1183                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1184                 unix_recvq_full(other);
1185
1186         unix_state_unlock(other);
1187
1188         if (sched)
1189                 timeo = schedule_timeout(timeo);
1190
1191         finish_wait(&u->peer_wait, &wait);
1192         return timeo;
1193 }
1194
1195 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1196                                int addr_len, int flags)
1197 {
1198         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1199         struct sock *sk = sock->sk;
1200         struct net *net = sock_net(sk);
1201         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1202         struct sock *newsk = NULL;
1203         struct sock *other = NULL;
1204         struct sk_buff *skb = NULL;
1205         unsigned int hash;
1206         int st;
1207         int err;
1208         long timeo;
1209
1210         err = unix_mkname(sunaddr, addr_len, &hash);
1211         if (err < 0)
1212                 goto out;
1213         addr_len = err;
1214
1215         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1216             (err = unix_autobind(sock)) != 0)
1217                 goto out;
1218
1219         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1220
1221         /* First of all allocate resources.
1222            If we will make it after state is locked,
1223            we will have to recheck all again in any case.
1224          */
1225
1226         err = -ENOMEM;
1227
1228         /* create new sock for complete connection */
1229         newsk = unix_create1(sock_net(sk), NULL, 0);
1230         if (newsk == NULL)
1231                 goto out;
1232
1233         /* Allocate skb for sending to listening sock */
1234         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1235         if (skb == NULL)
1236                 goto out;
1237
1238 restart:
1239         /*  Find listening sock. */
1240         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1241         if (!other)
1242                 goto out;
1243
1244         /* Latch state of peer */
1245         unix_state_lock(other);
1246
1247         /* Apparently VFS overslept socket death. Retry. */
1248         if (sock_flag(other, SOCK_DEAD)) {
1249                 unix_state_unlock(other);
1250                 sock_put(other);
1251                 goto restart;
1252         }
1253
1254         err = -ECONNREFUSED;
1255         if (other->sk_state != TCP_LISTEN)
1256                 goto out_unlock;
1257         if (other->sk_shutdown & RCV_SHUTDOWN)
1258                 goto out_unlock;
1259
1260         if (unix_recvq_full(other)) {
1261                 err = -EAGAIN;
1262                 if (!timeo)
1263                         goto out_unlock;
1264
1265                 timeo = unix_wait_for_peer(other, timeo);
1266
1267                 err = sock_intr_errno(timeo);
1268                 if (signal_pending(current))
1269                         goto out;
1270                 sock_put(other);
1271                 goto restart;
1272         }
1273
1274         /* Latch our state.
1275
1276            It is tricky place. We need to grab our state lock and cannot
1277            drop lock on peer. It is dangerous because deadlock is
1278            possible. Connect to self case and simultaneous
1279            attempt to connect are eliminated by checking socket
1280            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1281            check this before attempt to grab lock.
1282
1283            Well, and we have to recheck the state after socket locked.
1284          */
1285         st = sk->sk_state;
1286
1287         switch (st) {
1288         case TCP_CLOSE:
1289                 /* This is ok... continue with connect */
1290                 break;
1291         case TCP_ESTABLISHED:
1292                 /* Socket is already connected */
1293                 err = -EISCONN;
1294                 goto out_unlock;
1295         default:
1296                 err = -EINVAL;
1297                 goto out_unlock;
1298         }
1299
1300         unix_state_lock_nested(sk);
1301
1302         if (sk->sk_state != st) {
1303                 unix_state_unlock(sk);
1304                 unix_state_unlock(other);
1305                 sock_put(other);
1306                 goto restart;
1307         }
1308
1309         err = security_unix_stream_connect(sk, other, newsk);
1310         if (err) {
1311                 unix_state_unlock(sk);
1312                 goto out_unlock;
1313         }
1314
1315         /* The way is open! Fastly set all the necessary fields... */
1316
1317         sock_hold(sk);
1318         unix_peer(newsk)        = sk;
1319         newsk->sk_state         = TCP_ESTABLISHED;
1320         newsk->sk_type          = sk->sk_type;
1321         init_peercred(newsk);
1322         newu = unix_sk(newsk);
1323         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1324         otheru = unix_sk(other);
1325
1326         /* copy address information from listening to new sock*/
1327         if (otheru->addr) {
1328                 refcount_inc(&otheru->addr->refcnt);
1329                 newu->addr = otheru->addr;
1330         }
1331         if (otheru->path.dentry) {
1332                 path_get(&otheru->path);
1333                 newu->path = otheru->path;
1334         }
1335
1336         /* Set credentials */
1337         copy_peercred(sk, other);
1338
1339         sock->state     = SS_CONNECTED;
1340         sk->sk_state    = TCP_ESTABLISHED;
1341         sock_hold(newsk);
1342
1343         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1344         unix_peer(sk)   = newsk;
1345
1346         unix_state_unlock(sk);
1347
1348         /* take ten and and send info to listening sock */
1349         spin_lock(&other->sk_receive_queue.lock);
1350         __skb_queue_tail(&other->sk_receive_queue, skb);
1351         spin_unlock(&other->sk_receive_queue.lock);
1352         unix_state_unlock(other);
1353         other->sk_data_ready(other);
1354         sock_put(other);
1355         return 0;
1356
1357 out_unlock:
1358         if (other)
1359                 unix_state_unlock(other);
1360
1361 out:
1362         kfree_skb(skb);
1363         if (newsk)
1364                 unix_release_sock(newsk, 0);
1365         if (other)
1366                 sock_put(other);
1367         return err;
1368 }
1369
1370 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1371 {
1372         struct sock *ska = socka->sk, *skb = sockb->sk;
1373
1374         /* Join our sockets back to back */
1375         sock_hold(ska);
1376         sock_hold(skb);
1377         unix_peer(ska) = skb;
1378         unix_peer(skb) = ska;
1379         init_peercred(ska);
1380         init_peercred(skb);
1381
1382         if (ska->sk_type != SOCK_DGRAM) {
1383                 ska->sk_state = TCP_ESTABLISHED;
1384                 skb->sk_state = TCP_ESTABLISHED;
1385                 socka->state  = SS_CONNECTED;
1386                 sockb->state  = SS_CONNECTED;
1387         }
1388         return 0;
1389 }
1390
1391 static void unix_sock_inherit_flags(const struct socket *old,
1392                                     struct socket *new)
1393 {
1394         if (test_bit(SOCK_PASSCRED, &old->flags))
1395                 set_bit(SOCK_PASSCRED, &new->flags);
1396         if (test_bit(SOCK_PASSSEC, &old->flags))
1397                 set_bit(SOCK_PASSSEC, &new->flags);
1398 }
1399
1400 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1401                        bool kern)
1402 {
1403         struct sock *sk = sock->sk;
1404         struct sock *tsk;
1405         struct sk_buff *skb;
1406         int err;
1407
1408         err = -EOPNOTSUPP;
1409         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1410                 goto out;
1411
1412         err = -EINVAL;
1413         if (sk->sk_state != TCP_LISTEN)
1414                 goto out;
1415
1416         /* If socket state is TCP_LISTEN it cannot change (for now...),
1417          * so that no locks are necessary.
1418          */
1419
1420         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1421         if (!skb) {
1422                 /* This means receive shutdown. */
1423                 if (err == 0)
1424                         err = -EINVAL;
1425                 goto out;
1426         }
1427
1428         tsk = skb->sk;
1429         skb_free_datagram(sk, skb);
1430         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1431
1432         /* attach accepted sock to socket */
1433         unix_state_lock(tsk);
1434         newsock->state = SS_CONNECTED;
1435         unix_sock_inherit_flags(sock, newsock);
1436         sock_graft(tsk, newsock);
1437         unix_state_unlock(tsk);
1438         return 0;
1439
1440 out:
1441         return err;
1442 }
1443
1444
1445 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1446 {
1447         struct sock *sk = sock->sk;
1448         struct unix_sock *u;
1449         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1450         int err = 0;
1451
1452         if (peer) {
1453                 sk = unix_peer_get(sk);
1454
1455                 err = -ENOTCONN;
1456                 if (!sk)
1457                         goto out;
1458                 err = 0;
1459         } else {
1460                 sock_hold(sk);
1461         }
1462
1463         u = unix_sk(sk);
1464         unix_state_lock(sk);
1465         if (!u->addr) {
1466                 sunaddr->sun_family = AF_UNIX;
1467                 sunaddr->sun_path[0] = 0;
1468                 err = sizeof(short);
1469         } else {
1470                 struct unix_address *addr = u->addr;
1471
1472                 err = addr->len;
1473                 memcpy(sunaddr, addr->name, addr->len);
1474         }
1475         unix_state_unlock(sk);
1476         sock_put(sk);
1477 out:
1478         return err;
1479 }
1480
1481 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1482 {
1483         int i;
1484
1485         scm->fp = UNIXCB(skb).fp;
1486         UNIXCB(skb).fp = NULL;
1487
1488         for (i = scm->fp->count-1; i >= 0; i--)
1489                 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1490 }
1491
1492 static void unix_destruct_scm(struct sk_buff *skb)
1493 {
1494         struct scm_cookie scm;
1495         memset(&scm, 0, sizeof(scm));
1496         scm.pid  = UNIXCB(skb).pid;
1497         if (UNIXCB(skb).fp)
1498                 unix_detach_fds(&scm, skb);
1499
1500         /* Alas, it calls VFS */
1501         /* So fscking what? fput() had been SMP-safe since the last Summer */
1502         scm_destroy(&scm);
1503         sock_wfree(skb);
1504 }
1505
1506 /*
1507  * The "user->unix_inflight" variable is protected by the garbage
1508  * collection lock, and we just read it locklessly here. If you go
1509  * over the limit, there might be a tiny race in actually noticing
1510  * it across threads. Tough.
1511  */
1512 static inline bool too_many_unix_fds(struct task_struct *p)
1513 {
1514         struct user_struct *user = current_user();
1515
1516         if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1517                 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1518         return false;
1519 }
1520
1521 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1522 {
1523         int i;
1524
1525         if (too_many_unix_fds(current))
1526                 return -ETOOMANYREFS;
1527
1528         /*
1529          * Need to duplicate file references for the sake of garbage
1530          * collection.  Otherwise a socket in the fps might become a
1531          * candidate for GC while the skb is not yet queued.
1532          */
1533         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1534         if (!UNIXCB(skb).fp)
1535                 return -ENOMEM;
1536
1537         for (i = scm->fp->count - 1; i >= 0; i--)
1538                 unix_inflight(scm->fp->user, scm->fp->fp[i]);
1539         return 0;
1540 }
1541
1542 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1543 {
1544         int err = 0;
1545
1546         UNIXCB(skb).pid  = get_pid(scm->pid);
1547         UNIXCB(skb).uid = scm->creds.uid;
1548         UNIXCB(skb).gid = scm->creds.gid;
1549         UNIXCB(skb).fp = NULL;
1550         unix_get_secdata(scm, skb);
1551         if (scm->fp && send_fds)
1552                 err = unix_attach_fds(scm, skb);
1553
1554         skb->destructor = unix_destruct_scm;
1555         return err;
1556 }
1557
1558 static bool unix_passcred_enabled(const struct socket *sock,
1559                                   const struct sock *other)
1560 {
1561         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1562                !other->sk_socket ||
1563                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1564 }
1565
1566 /*
1567  * Some apps rely on write() giving SCM_CREDENTIALS
1568  * We include credentials if source or destination socket
1569  * asserted SOCK_PASSCRED.
1570  */
1571 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1572                             const struct sock *other)
1573 {
1574         if (UNIXCB(skb).pid)
1575                 return;
1576         if (unix_passcred_enabled(sock, other)) {
1577                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1578                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1579         }
1580 }
1581
1582 static int maybe_init_creds(struct scm_cookie *scm,
1583                             struct socket *socket,
1584                             const struct sock *other)
1585 {
1586         int err;
1587         struct msghdr msg = { .msg_controllen = 0 };
1588
1589         err = scm_send(socket, &msg, scm, false);
1590         if (err)
1591                 return err;
1592
1593         if (unix_passcred_enabled(socket, other)) {
1594                 scm->pid = get_pid(task_tgid(current));
1595                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1596         }
1597         return err;
1598 }
1599
1600 static bool unix_skb_scm_eq(struct sk_buff *skb,
1601                             struct scm_cookie *scm)
1602 {
1603         const struct unix_skb_parms *u = &UNIXCB(skb);
1604
1605         return u->pid == scm->pid &&
1606                uid_eq(u->uid, scm->creds.uid) &&
1607                gid_eq(u->gid, scm->creds.gid) &&
1608                unix_secdata_eq(scm, skb);
1609 }
1610
1611 /*
1612  *      Send AF_UNIX data.
1613  */
1614
1615 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1616                               size_t len)
1617 {
1618         struct sock *sk = sock->sk;
1619         struct net *net = sock_net(sk);
1620         struct unix_sock *u = unix_sk(sk);
1621         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1622         struct sock *other = NULL;
1623         int namelen = 0; /* fake GCC */
1624         int err;
1625         unsigned int hash;
1626         struct sk_buff *skb;
1627         long timeo;
1628         struct scm_cookie scm;
1629         int data_len = 0;
1630         int sk_locked;
1631
1632         wait_for_unix_gc();
1633         err = scm_send(sock, msg, &scm, false);
1634         if (err < 0)
1635                 return err;
1636
1637         err = -EOPNOTSUPP;
1638         if (msg->msg_flags&MSG_OOB)
1639                 goto out;
1640
1641         if (msg->msg_namelen) {
1642                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1643                 if (err < 0)
1644                         goto out;
1645                 namelen = err;
1646         } else {
1647                 sunaddr = NULL;
1648                 err = -ENOTCONN;
1649                 other = unix_peer_get(sk);
1650                 if (!other)
1651                         goto out;
1652         }
1653
1654         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1655             && (err = unix_autobind(sock)) != 0)
1656                 goto out;
1657
1658         err = -EMSGSIZE;
1659         if (len > sk->sk_sndbuf - 32)
1660                 goto out;
1661
1662         if (len > SKB_MAX_ALLOC) {
1663                 data_len = min_t(size_t,
1664                                  len - SKB_MAX_ALLOC,
1665                                  MAX_SKB_FRAGS * PAGE_SIZE);
1666                 data_len = PAGE_ALIGN(data_len);
1667
1668                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1669         }
1670
1671         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1672                                    msg->msg_flags & MSG_DONTWAIT, &err,
1673                                    PAGE_ALLOC_COSTLY_ORDER);
1674         if (skb == NULL)
1675                 goto out;
1676
1677         err = unix_scm_to_skb(&scm, skb, true);
1678         if (err < 0)
1679                 goto out_free;
1680
1681         skb_put(skb, len - data_len);
1682         skb->data_len = data_len;
1683         skb->len = len;
1684         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1685         if (err)
1686                 goto out_free;
1687
1688         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1689
1690 restart:
1691         if (!other) {
1692                 err = -ECONNRESET;
1693                 if (sunaddr == NULL)
1694                         goto out_free;
1695
1696                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1697                                         hash, &err);
1698                 if (other == NULL)
1699                         goto out_free;
1700         }
1701
1702         if (sk_filter(other, skb) < 0) {
1703                 /* Toss the packet but do not return any error to the sender */
1704                 err = len;
1705                 goto out_free;
1706         }
1707
1708         sk_locked = 0;
1709         unix_state_lock(other);
1710 restart_locked:
1711         err = -EPERM;
1712         if (!unix_may_send(sk, other))
1713                 goto out_unlock;
1714
1715         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1716                 /*
1717                  *      Check with 1003.1g - what should
1718                  *      datagram error
1719                  */
1720                 unix_state_unlock(other);
1721                 sock_put(other);
1722
1723                 if (!sk_locked)
1724                         unix_state_lock(sk);
1725
1726                 err = 0;
1727                 if (unix_peer(sk) == other) {
1728                         unix_peer(sk) = NULL;
1729                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1730
1731                         unix_state_unlock(sk);
1732
1733                         unix_dgram_disconnected(sk, other);
1734                         sock_put(other);
1735                         err = -ECONNREFUSED;
1736                 } else {
1737                         unix_state_unlock(sk);
1738                 }
1739
1740                 other = NULL;
1741                 if (err)
1742                         goto out_free;
1743                 goto restart;
1744         }
1745
1746         err = -EPIPE;
1747         if (other->sk_shutdown & RCV_SHUTDOWN)
1748                 goto out_unlock;
1749
1750         if (sk->sk_type != SOCK_SEQPACKET) {
1751                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1752                 if (err)
1753                         goto out_unlock;
1754         }
1755
1756         /* other == sk && unix_peer(other) != sk if
1757          * - unix_peer(sk) == NULL, destination address bound to sk
1758          * - unix_peer(sk) == sk by time of get but disconnected before lock
1759          */
1760         if (other != sk &&
1761             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1762                 if (timeo) {
1763                         timeo = unix_wait_for_peer(other, timeo);
1764
1765                         err = sock_intr_errno(timeo);
1766                         if (signal_pending(current))
1767                                 goto out_free;
1768
1769                         goto restart;
1770                 }
1771
1772                 if (!sk_locked) {
1773                         unix_state_unlock(other);
1774                         unix_state_double_lock(sk, other);
1775                 }
1776
1777                 if (unix_peer(sk) != other ||
1778                     unix_dgram_peer_wake_me(sk, other)) {
1779                         err = -EAGAIN;
1780                         sk_locked = 1;
1781                         goto out_unlock;
1782                 }
1783
1784                 if (!sk_locked) {
1785                         sk_locked = 1;
1786                         goto restart_locked;
1787                 }
1788         }
1789
1790         if (unlikely(sk_locked))
1791                 unix_state_unlock(sk);
1792
1793         if (sock_flag(other, SOCK_RCVTSTAMP))
1794                 __net_timestamp(skb);
1795         maybe_add_creds(skb, sock, other);
1796         skb_queue_tail(&other->sk_receive_queue, skb);
1797         unix_state_unlock(other);
1798         other->sk_data_ready(other);
1799         sock_put(other);
1800         scm_destroy(&scm);
1801         return len;
1802
1803 out_unlock:
1804         if (sk_locked)
1805                 unix_state_unlock(sk);
1806         unix_state_unlock(other);
1807 out_free:
1808         kfree_skb(skb);
1809 out:
1810         if (other)
1811                 sock_put(other);
1812         scm_destroy(&scm);
1813         return err;
1814 }
1815
1816 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1817  * bytes, and a minimum of a full page.
1818  */
1819 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1820
1821 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1822                                size_t len)
1823 {
1824         struct sock *sk = sock->sk;
1825         struct sock *other = NULL;
1826         int err, size;
1827         struct sk_buff *skb;
1828         int sent = 0;
1829         struct scm_cookie scm;
1830         bool fds_sent = false;
1831         int data_len;
1832
1833         wait_for_unix_gc();
1834         err = scm_send(sock, msg, &scm, false);
1835         if (err < 0)
1836                 return err;
1837
1838         err = -EOPNOTSUPP;
1839         if (msg->msg_flags&MSG_OOB)
1840                 goto out_err;
1841
1842         if (msg->msg_namelen) {
1843                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1844                 goto out_err;
1845         } else {
1846                 err = -ENOTCONN;
1847                 other = unix_peer(sk);
1848                 if (!other)
1849                         goto out_err;
1850         }
1851
1852         if (sk->sk_shutdown & SEND_SHUTDOWN)
1853                 goto pipe_err;
1854
1855         while (sent < len) {
1856                 size = len - sent;
1857
1858                 /* Keep two messages in the pipe so it schedules better */
1859                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1860
1861                 /* allow fallback to order-0 allocations */
1862                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1863
1864                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1865
1866                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1867
1868                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1869                                            msg->msg_flags & MSG_DONTWAIT, &err,
1870                                            get_order(UNIX_SKB_FRAGS_SZ));
1871                 if (!skb)
1872                         goto out_err;
1873
1874                 /* Only send the fds in the first buffer */
1875                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1876                 if (err < 0) {
1877                         kfree_skb(skb);
1878                         goto out_err;
1879                 }
1880                 fds_sent = true;
1881
1882                 skb_put(skb, size - data_len);
1883                 skb->data_len = data_len;
1884                 skb->len = size;
1885                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1886                 if (err) {
1887                         kfree_skb(skb);
1888                         goto out_err;
1889                 }
1890
1891                 unix_state_lock(other);
1892
1893                 if (sock_flag(other, SOCK_DEAD) ||
1894                     (other->sk_shutdown & RCV_SHUTDOWN))
1895                         goto pipe_err_free;
1896
1897                 maybe_add_creds(skb, sock, other);
1898                 skb_queue_tail(&other->sk_receive_queue, skb);
1899                 unix_state_unlock(other);
1900                 other->sk_data_ready(other);
1901                 sent += size;
1902         }
1903
1904         scm_destroy(&scm);
1905
1906         return sent;
1907
1908 pipe_err_free:
1909         unix_state_unlock(other);
1910         kfree_skb(skb);
1911 pipe_err:
1912         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1913                 send_sig(SIGPIPE, current, 0);
1914         err = -EPIPE;
1915 out_err:
1916         scm_destroy(&scm);
1917         return sent ? : err;
1918 }
1919
1920 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1921                                     int offset, size_t size, int flags)
1922 {
1923         int err;
1924         bool send_sigpipe = false;
1925         bool init_scm = true;
1926         struct scm_cookie scm;
1927         struct sock *other, *sk = socket->sk;
1928         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1929
1930         if (flags & MSG_OOB)
1931                 return -EOPNOTSUPP;
1932
1933         other = unix_peer(sk);
1934         if (!other || sk->sk_state != TCP_ESTABLISHED)
1935                 return -ENOTCONN;
1936
1937         if (false) {
1938 alloc_skb:
1939                 unix_state_unlock(other);
1940                 mutex_unlock(&unix_sk(other)->iolock);
1941                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1942                                               &err, 0);
1943                 if (!newskb)
1944                         goto err;
1945         }
1946
1947         /* we must acquire iolock as we modify already present
1948          * skbs in the sk_receive_queue and mess with skb->len
1949          */
1950         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1951         if (err) {
1952                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1953                 goto err;
1954         }
1955
1956         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1957                 err = -EPIPE;
1958                 send_sigpipe = true;
1959                 goto err_unlock;
1960         }
1961
1962         unix_state_lock(other);
1963
1964         if (sock_flag(other, SOCK_DEAD) ||
1965             other->sk_shutdown & RCV_SHUTDOWN) {
1966                 err = -EPIPE;
1967                 send_sigpipe = true;
1968                 goto err_state_unlock;
1969         }
1970
1971         if (init_scm) {
1972                 err = maybe_init_creds(&scm, socket, other);
1973                 if (err)
1974                         goto err_state_unlock;
1975                 init_scm = false;
1976         }
1977
1978         skb = skb_peek_tail(&other->sk_receive_queue);
1979         if (tail && tail == skb) {
1980                 skb = newskb;
1981         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1982                 if (newskb) {
1983                         skb = newskb;
1984                 } else {
1985                         tail = skb;
1986                         goto alloc_skb;
1987                 }
1988         } else if (newskb) {
1989                 /* this is fast path, we don't necessarily need to
1990                  * call to kfree_skb even though with newskb == NULL
1991                  * this - does no harm
1992                  */
1993                 consume_skb(newskb);
1994                 newskb = NULL;
1995         }
1996
1997         if (skb_append_pagefrags(skb, page, offset, size)) {
1998                 tail = skb;
1999                 goto alloc_skb;
2000         }
2001
2002         skb->len += size;
2003         skb->data_len += size;
2004         skb->truesize += size;
2005         refcount_add(size, &sk->sk_wmem_alloc);
2006
2007         if (newskb) {
2008                 err = unix_scm_to_skb(&scm, skb, false);
2009                 if (err)
2010                         goto err_state_unlock;
2011                 spin_lock(&other->sk_receive_queue.lock);
2012                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2013                 spin_unlock(&other->sk_receive_queue.lock);
2014         }
2015
2016         unix_state_unlock(other);
2017         mutex_unlock(&unix_sk(other)->iolock);
2018
2019         other->sk_data_ready(other);
2020         scm_destroy(&scm);
2021         return size;
2022
2023 err_state_unlock:
2024         unix_state_unlock(other);
2025 err_unlock:
2026         mutex_unlock(&unix_sk(other)->iolock);
2027 err:
2028         kfree_skb(newskb);
2029         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2030                 send_sig(SIGPIPE, current, 0);
2031         if (!init_scm)
2032                 scm_destroy(&scm);
2033         return err;
2034 }
2035
2036 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2037                                   size_t len)
2038 {
2039         int err;
2040         struct sock *sk = sock->sk;
2041
2042         err = sock_error(sk);
2043         if (err)
2044                 return err;
2045
2046         if (sk->sk_state != TCP_ESTABLISHED)
2047                 return -ENOTCONN;
2048
2049         if (msg->msg_namelen)
2050                 msg->msg_namelen = 0;
2051
2052         return unix_dgram_sendmsg(sock, msg, len);
2053 }
2054
2055 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2056                                   size_t size, int flags)
2057 {
2058         struct sock *sk = sock->sk;
2059
2060         if (sk->sk_state != TCP_ESTABLISHED)
2061                 return -ENOTCONN;
2062
2063         return unix_dgram_recvmsg(sock, msg, size, flags);
2064 }
2065
2066 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2067 {
2068         struct unix_sock *u = unix_sk(sk);
2069
2070         if (u->addr) {
2071                 msg->msg_namelen = u->addr->len;
2072                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
2073         }
2074 }
2075
2076 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2077                               size_t size, int flags)
2078 {
2079         struct scm_cookie scm;
2080         struct sock *sk = sock->sk;
2081         struct unix_sock *u = unix_sk(sk);
2082         struct sk_buff *skb, *last;
2083         long timeo;
2084         int err;
2085         int peeked, skip;
2086
2087         err = -EOPNOTSUPP;
2088         if (flags&MSG_OOB)
2089                 goto out;
2090
2091         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2092
2093         do {
2094                 mutex_lock(&u->iolock);
2095
2096                 skip = sk_peek_offset(sk, flags);
2097                 skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2098                                               &err, &last);
2099                 if (skb)
2100                         break;
2101
2102                 mutex_unlock(&u->iolock);
2103
2104                 if (err != -EAGAIN)
2105                         break;
2106         } while (timeo &&
2107                  !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2108
2109         if (!skb) { /* implies iolock unlocked */
2110                 unix_state_lock(sk);
2111                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2112                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2113                     (sk->sk_shutdown & RCV_SHUTDOWN))
2114                         err = 0;
2115                 unix_state_unlock(sk);
2116                 goto out;
2117         }
2118
2119         if (wq_has_sleeper(&u->peer_wait))
2120                 wake_up_interruptible_sync_poll(&u->peer_wait,
2121                                                 EPOLLOUT | EPOLLWRNORM |
2122                                                 EPOLLWRBAND);
2123
2124         if (msg->msg_name)
2125                 unix_copy_addr(msg, skb->sk);
2126
2127         if (size > skb->len - skip)
2128                 size = skb->len - skip;
2129         else if (size < skb->len - skip)
2130                 msg->msg_flags |= MSG_TRUNC;
2131
2132         err = skb_copy_datagram_msg(skb, skip, msg, size);
2133         if (err)
2134                 goto out_free;
2135
2136         if (sock_flag(sk, SOCK_RCVTSTAMP))
2137                 __sock_recv_timestamp(msg, sk, skb);
2138
2139         memset(&scm, 0, sizeof(scm));
2140
2141         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2142         unix_set_secdata(&scm, skb);
2143
2144         if (!(flags & MSG_PEEK)) {
2145                 if (UNIXCB(skb).fp)
2146                         unix_detach_fds(&scm, skb);
2147
2148                 sk_peek_offset_bwd(sk, skb->len);
2149         } else {
2150                 /* It is questionable: on PEEK we could:
2151                    - do not return fds - good, but too simple 8)
2152                    - return fds, and do not return them on read (old strategy,
2153                      apparently wrong)
2154                    - clone fds (I chose it for now, it is the most universal
2155                      solution)
2156
2157                    POSIX 1003.1g does not actually define this clearly
2158                    at all. POSIX 1003.1g doesn't define a lot of things
2159                    clearly however!
2160
2161                 */
2162
2163                 sk_peek_offset_fwd(sk, size);
2164
2165                 if (UNIXCB(skb).fp)
2166                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2167         }
2168         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2169
2170         scm_recv(sock, msg, &scm, flags);
2171
2172 out_free:
2173         skb_free_datagram(sk, skb);
2174         mutex_unlock(&u->iolock);
2175 out:
2176         return err;
2177 }
2178
2179 /*
2180  *      Sleep until more data has arrived. But check for races..
2181  */
2182 static long unix_stream_data_wait(struct sock *sk, long timeo,
2183                                   struct sk_buff *last, unsigned int last_len,
2184                                   bool freezable)
2185 {
2186         struct sk_buff *tail;
2187         DEFINE_WAIT(wait);
2188
2189         unix_state_lock(sk);
2190
2191         for (;;) {
2192                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2193
2194                 tail = skb_peek_tail(&sk->sk_receive_queue);
2195                 if (tail != last ||
2196                     (tail && tail->len != last_len) ||
2197                     sk->sk_err ||
2198                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2199                     signal_pending(current) ||
2200                     !timeo)
2201                         break;
2202
2203                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2204                 unix_state_unlock(sk);
2205                 if (freezable)
2206                         timeo = freezable_schedule_timeout(timeo);
2207                 else
2208                         timeo = schedule_timeout(timeo);
2209                 unix_state_lock(sk);
2210
2211                 if (sock_flag(sk, SOCK_DEAD))
2212                         break;
2213
2214                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2215         }
2216
2217         finish_wait(sk_sleep(sk), &wait);
2218         unix_state_unlock(sk);
2219         return timeo;
2220 }
2221
2222 static unsigned int unix_skb_len(const struct sk_buff *skb)
2223 {
2224         return skb->len - UNIXCB(skb).consumed;
2225 }
2226
2227 struct unix_stream_read_state {
2228         int (*recv_actor)(struct sk_buff *, int, int,
2229                           struct unix_stream_read_state *);
2230         struct socket *socket;
2231         struct msghdr *msg;
2232         struct pipe_inode_info *pipe;
2233         size_t size;
2234         int flags;
2235         unsigned int splice_flags;
2236 };
2237
2238 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2239                                     bool freezable)
2240 {
2241         struct scm_cookie scm;
2242         struct socket *sock = state->socket;
2243         struct sock *sk = sock->sk;
2244         struct unix_sock *u = unix_sk(sk);
2245         int copied = 0;
2246         int flags = state->flags;
2247         int noblock = flags & MSG_DONTWAIT;
2248         bool check_creds = false;
2249         int target;
2250         int err = 0;
2251         long timeo;
2252         int skip;
2253         size_t size = state->size;
2254         unsigned int last_len;
2255
2256         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2257                 err = -EINVAL;
2258                 goto out;
2259         }
2260
2261         if (unlikely(flags & MSG_OOB)) {
2262                 err = -EOPNOTSUPP;
2263                 goto out;
2264         }
2265
2266         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2267         timeo = sock_rcvtimeo(sk, noblock);
2268
2269         memset(&scm, 0, sizeof(scm));
2270
2271         /* Lock the socket to prevent queue disordering
2272          * while sleeps in memcpy_tomsg
2273          */
2274         mutex_lock(&u->iolock);
2275
2276         skip = max(sk_peek_offset(sk, flags), 0);
2277
2278         do {
2279                 int chunk;
2280                 bool drop_skb;
2281                 struct sk_buff *skb, *last;
2282
2283 redo:
2284                 unix_state_lock(sk);
2285                 if (sock_flag(sk, SOCK_DEAD)) {
2286                         err = -ECONNRESET;
2287                         goto unlock;
2288                 }
2289                 last = skb = skb_peek(&sk->sk_receive_queue);
2290                 last_len = last ? last->len : 0;
2291 again:
2292                 if (skb == NULL) {
2293                         if (copied >= target)
2294                                 goto unlock;
2295
2296                         /*
2297                          *      POSIX 1003.1g mandates this order.
2298                          */
2299
2300                         err = sock_error(sk);
2301                         if (err)
2302                                 goto unlock;
2303                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2304                                 goto unlock;
2305
2306                         unix_state_unlock(sk);
2307                         if (!timeo) {
2308                                 err = -EAGAIN;
2309                                 break;
2310                         }
2311
2312                         mutex_unlock(&u->iolock);
2313
2314                         timeo = unix_stream_data_wait(sk, timeo, last,
2315                                                       last_len, freezable);
2316
2317                         if (signal_pending(current)) {
2318                                 err = sock_intr_errno(timeo);
2319                                 scm_destroy(&scm);
2320                                 goto out;
2321                         }
2322
2323                         mutex_lock(&u->iolock);
2324                         goto redo;
2325 unlock:
2326                         unix_state_unlock(sk);
2327                         break;
2328                 }
2329
2330                 while (skip >= unix_skb_len(skb)) {
2331                         skip -= unix_skb_len(skb);
2332                         last = skb;
2333                         last_len = skb->len;
2334                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2335                         if (!skb)
2336                                 goto again;
2337                 }
2338
2339                 unix_state_unlock(sk);
2340
2341                 if (check_creds) {
2342                         /* Never glue messages from different writers */
2343                         if (!unix_skb_scm_eq(skb, &scm))
2344                                 break;
2345                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2346                         /* Copy credentials */
2347                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2348                         unix_set_secdata(&scm, skb);
2349                         check_creds = true;
2350                 }
2351
2352                 /* Copy address just once */
2353                 if (state->msg && state->msg->msg_name) {
2354                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2355                                          state->msg->msg_name);
2356                         unix_copy_addr(state->msg, skb->sk);
2357                         sunaddr = NULL;
2358                 }
2359
2360                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2361                 skb_get(skb);
2362                 chunk = state->recv_actor(skb, skip, chunk, state);
2363                 drop_skb = !unix_skb_len(skb);
2364                 /* skb is only safe to use if !drop_skb */
2365                 consume_skb(skb);
2366                 if (chunk < 0) {
2367                         if (copied == 0)
2368                                 copied = -EFAULT;
2369                         break;
2370                 }
2371                 copied += chunk;
2372                 size -= chunk;
2373
2374                 if (drop_skb) {
2375                         /* the skb was touched by a concurrent reader;
2376                          * we should not expect anything from this skb
2377                          * anymore and assume it invalid - we can be
2378                          * sure it was dropped from the socket queue
2379                          *
2380                          * let's report a short read
2381                          */
2382                         err = 0;
2383                         break;
2384                 }
2385
2386                 /* Mark read part of skb as used */
2387                 if (!(flags & MSG_PEEK)) {
2388                         UNIXCB(skb).consumed += chunk;
2389
2390                         sk_peek_offset_bwd(sk, chunk);
2391
2392                         if (UNIXCB(skb).fp)
2393                                 unix_detach_fds(&scm, skb);
2394
2395                         if (unix_skb_len(skb))
2396                                 break;
2397
2398                         skb_unlink(skb, &sk->sk_receive_queue);
2399                         consume_skb(skb);
2400
2401                         if (scm.fp)
2402                                 break;
2403                 } else {
2404                         /* It is questionable, see note in unix_dgram_recvmsg.
2405                          */
2406                         if (UNIXCB(skb).fp)
2407                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2408
2409                         sk_peek_offset_fwd(sk, chunk);
2410
2411                         if (UNIXCB(skb).fp)
2412                                 break;
2413
2414                         skip = 0;
2415                         last = skb;
2416                         last_len = skb->len;
2417                         unix_state_lock(sk);
2418                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2419                         if (skb)
2420                                 goto again;
2421                         unix_state_unlock(sk);
2422                         break;
2423                 }
2424         } while (size);
2425
2426         mutex_unlock(&u->iolock);
2427         if (state->msg)
2428                 scm_recv(sock, state->msg, &scm, flags);
2429         else
2430                 scm_destroy(&scm);
2431 out:
2432         return copied ? : err;
2433 }
2434
2435 static int unix_stream_read_actor(struct sk_buff *skb,
2436                                   int skip, int chunk,
2437                                   struct unix_stream_read_state *state)
2438 {
2439         int ret;
2440
2441         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2442                                     state->msg, chunk);
2443         return ret ?: chunk;
2444 }
2445
2446 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2447                                size_t size, int flags)
2448 {
2449         struct unix_stream_read_state state = {
2450                 .recv_actor = unix_stream_read_actor,
2451                 .socket = sock,
2452                 .msg = msg,
2453                 .size = size,
2454                 .flags = flags
2455         };
2456
2457         return unix_stream_read_generic(&state, true);
2458 }
2459
2460 static int unix_stream_splice_actor(struct sk_buff *skb,
2461                                     int skip, int chunk,
2462                                     struct unix_stream_read_state *state)
2463 {
2464         return skb_splice_bits(skb, state->socket->sk,
2465                                UNIXCB(skb).consumed + skip,
2466                                state->pipe, chunk, state->splice_flags);
2467 }
2468
2469 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2470                                        struct pipe_inode_info *pipe,
2471                                        size_t size, unsigned int flags)
2472 {
2473         struct unix_stream_read_state state = {
2474                 .recv_actor = unix_stream_splice_actor,
2475                 .socket = sock,
2476                 .pipe = pipe,
2477                 .size = size,
2478                 .splice_flags = flags,
2479         };
2480
2481         if (unlikely(*ppos))
2482                 return -ESPIPE;
2483
2484         if (sock->file->f_flags & O_NONBLOCK ||
2485             flags & SPLICE_F_NONBLOCK)
2486                 state.flags = MSG_DONTWAIT;
2487
2488         return unix_stream_read_generic(&state, false);
2489 }
2490
2491 static int unix_shutdown(struct socket *sock, int mode)
2492 {
2493         struct sock *sk = sock->sk;
2494         struct sock *other;
2495
2496         if (mode < SHUT_RD || mode > SHUT_RDWR)
2497                 return -EINVAL;
2498         /* This maps:
2499          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2500          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2501          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2502          */
2503         ++mode;
2504
2505         unix_state_lock(sk);
2506         sk->sk_shutdown |= mode;
2507         other = unix_peer(sk);
2508         if (other)
2509                 sock_hold(other);
2510         unix_state_unlock(sk);
2511         sk->sk_state_change(sk);
2512
2513         if (other &&
2514                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2515
2516                 int peer_mode = 0;
2517
2518                 if (mode&RCV_SHUTDOWN)
2519                         peer_mode |= SEND_SHUTDOWN;
2520                 if (mode&SEND_SHUTDOWN)
2521                         peer_mode |= RCV_SHUTDOWN;
2522                 unix_state_lock(other);
2523                 other->sk_shutdown |= peer_mode;
2524                 unix_state_unlock(other);
2525                 other->sk_state_change(other);
2526                 if (peer_mode == SHUTDOWN_MASK)
2527                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2528                 else if (peer_mode & RCV_SHUTDOWN)
2529                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2530         }
2531         if (other)
2532                 sock_put(other);
2533
2534         return 0;
2535 }
2536
2537 long unix_inq_len(struct sock *sk)
2538 {
2539         struct sk_buff *skb;
2540         long amount = 0;
2541
2542         if (sk->sk_state == TCP_LISTEN)
2543                 return -EINVAL;
2544
2545         spin_lock(&sk->sk_receive_queue.lock);
2546         if (sk->sk_type == SOCK_STREAM ||
2547             sk->sk_type == SOCK_SEQPACKET) {
2548                 skb_queue_walk(&sk->sk_receive_queue, skb)
2549                         amount += unix_skb_len(skb);
2550         } else {
2551                 skb = skb_peek(&sk->sk_receive_queue);
2552                 if (skb)
2553                         amount = skb->len;
2554         }
2555         spin_unlock(&sk->sk_receive_queue.lock);
2556
2557         return amount;
2558 }
2559 EXPORT_SYMBOL_GPL(unix_inq_len);
2560
2561 long unix_outq_len(struct sock *sk)
2562 {
2563         return sk_wmem_alloc_get(sk);
2564 }
2565 EXPORT_SYMBOL_GPL(unix_outq_len);
2566
2567 static int unix_open_file(struct sock *sk)
2568 {
2569         struct path path;
2570         struct file *f;
2571         int fd;
2572
2573         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2574                 return -EPERM;
2575
2576         unix_state_lock(sk);
2577         path = unix_sk(sk)->path;
2578         if (!path.dentry) {
2579                 unix_state_unlock(sk);
2580                 return -ENOENT;
2581         }
2582
2583         path_get(&path);
2584         unix_state_unlock(sk);
2585
2586         fd = get_unused_fd_flags(O_CLOEXEC);
2587         if (fd < 0)
2588                 goto out;
2589
2590         f = dentry_open(&path, O_PATH, current_cred());
2591         if (IS_ERR(f)) {
2592                 put_unused_fd(fd);
2593                 fd = PTR_ERR(f);
2594                 goto out;
2595         }
2596
2597         fd_install(fd, f);
2598 out:
2599         path_put(&path);
2600
2601         return fd;
2602 }
2603
2604 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2605 {
2606         struct sock *sk = sock->sk;
2607         long amount = 0;
2608         int err;
2609
2610         switch (cmd) {
2611         case SIOCOUTQ:
2612                 amount = unix_outq_len(sk);
2613                 err = put_user(amount, (int __user *)arg);
2614                 break;
2615         case SIOCINQ:
2616                 amount = unix_inq_len(sk);
2617                 if (amount < 0)
2618                         err = amount;
2619                 else
2620                         err = put_user(amount, (int __user *)arg);
2621                 break;
2622         case SIOCUNIXFILE:
2623                 err = unix_open_file(sk);
2624                 break;
2625         default:
2626                 err = -ENOIOCTLCMD;
2627                 break;
2628         }
2629         return err;
2630 }
2631
2632 static __poll_t unix_poll_mask(struct socket *sock, __poll_t events)
2633 {
2634         struct sock *sk = sock->sk;
2635         __poll_t mask = 0;
2636
2637         /* exceptional events? */
2638         if (sk->sk_err)
2639                 mask |= EPOLLERR;
2640         if (sk->sk_shutdown == SHUTDOWN_MASK)
2641                 mask |= EPOLLHUP;
2642         if (sk->sk_shutdown & RCV_SHUTDOWN)
2643                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2644
2645         /* readable? */
2646         if (!skb_queue_empty(&sk->sk_receive_queue))
2647                 mask |= EPOLLIN | EPOLLRDNORM;
2648
2649         /* Connection-based need to check for termination and startup */
2650         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2651             sk->sk_state == TCP_CLOSE)
2652                 mask |= EPOLLHUP;
2653
2654         /*
2655          * we set writable also when the other side has shut down the
2656          * connection. This prevents stuck sockets.
2657          */
2658         if (unix_writable(sk))
2659                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2660
2661         return mask;
2662 }
2663
2664 static __poll_t unix_dgram_poll_mask(struct socket *sock, __poll_t events)
2665 {
2666         struct sock *sk = sock->sk, *other;
2667         int writable;
2668         __poll_t mask = 0;
2669
2670         /* exceptional events? */
2671         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2672                 mask |= EPOLLERR |
2673                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2674
2675         if (sk->sk_shutdown & RCV_SHUTDOWN)
2676                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2677         if (sk->sk_shutdown == SHUTDOWN_MASK)
2678                 mask |= EPOLLHUP;
2679
2680         /* readable? */
2681         if (!skb_queue_empty(&sk->sk_receive_queue))
2682                 mask |= EPOLLIN | EPOLLRDNORM;
2683
2684         /* Connection-based need to check for termination and startup */
2685         if (sk->sk_type == SOCK_SEQPACKET) {
2686                 if (sk->sk_state == TCP_CLOSE)
2687                         mask |= EPOLLHUP;
2688                 /* connection hasn't started yet? */
2689                 if (sk->sk_state == TCP_SYN_SENT)
2690                         return mask;
2691         }
2692
2693         /* No write status requested, avoid expensive OUT tests. */
2694         if (!(events & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2695                 return mask;
2696
2697         writable = unix_writable(sk);
2698         if (writable) {
2699                 unix_state_lock(sk);
2700
2701                 other = unix_peer(sk);
2702                 if (other && unix_peer(other) != sk &&
2703                     unix_recvq_full(other) &&
2704                     unix_dgram_peer_wake_me(sk, other))
2705                         writable = 0;
2706
2707                 unix_state_unlock(sk);
2708         }
2709
2710         if (writable)
2711                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2712         else
2713                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2714
2715         return mask;
2716 }
2717
2718 #ifdef CONFIG_PROC_FS
2719
2720 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2721
2722 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2723 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2724 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2725
2726 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2727 {
2728         unsigned long offset = get_offset(*pos);
2729         unsigned long bucket = get_bucket(*pos);
2730         struct sock *sk;
2731         unsigned long count = 0;
2732
2733         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2734                 if (sock_net(sk) != seq_file_net(seq))
2735                         continue;
2736                 if (++count == offset)
2737                         break;
2738         }
2739
2740         return sk;
2741 }
2742
2743 static struct sock *unix_next_socket(struct seq_file *seq,
2744                                      struct sock *sk,
2745                                      loff_t *pos)
2746 {
2747         unsigned long bucket;
2748
2749         while (sk > (struct sock *)SEQ_START_TOKEN) {
2750                 sk = sk_next(sk);
2751                 if (!sk)
2752                         goto next_bucket;
2753                 if (sock_net(sk) == seq_file_net(seq))
2754                         return sk;
2755         }
2756
2757         do {
2758                 sk = unix_from_bucket(seq, pos);
2759                 if (sk)
2760                         return sk;
2761
2762 next_bucket:
2763                 bucket = get_bucket(*pos) + 1;
2764                 *pos = set_bucket_offset(bucket, 1);
2765         } while (bucket < ARRAY_SIZE(unix_socket_table));
2766
2767         return NULL;
2768 }
2769
2770 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2771         __acquires(unix_table_lock)
2772 {
2773         spin_lock(&unix_table_lock);
2774
2775         if (!*pos)
2776                 return SEQ_START_TOKEN;
2777
2778         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2779                 return NULL;
2780
2781         return unix_next_socket(seq, NULL, pos);
2782 }
2783
2784 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2785 {
2786         ++*pos;
2787         return unix_next_socket(seq, v, pos);
2788 }
2789
2790 static void unix_seq_stop(struct seq_file *seq, void *v)
2791         __releases(unix_table_lock)
2792 {
2793         spin_unlock(&unix_table_lock);
2794 }
2795
2796 static int unix_seq_show(struct seq_file *seq, void *v)
2797 {
2798
2799         if (v == SEQ_START_TOKEN)
2800                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2801                          "Inode Path\n");
2802         else {
2803                 struct sock *s = v;
2804                 struct unix_sock *u = unix_sk(s);
2805                 unix_state_lock(s);
2806
2807                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2808                         s,
2809                         refcount_read(&s->sk_refcnt),
2810                         0,
2811                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2812                         s->sk_type,
2813                         s->sk_socket ?
2814                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2815                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2816                         sock_i_ino(s));
2817
2818                 if (u->addr) {
2819                         int i, len;
2820                         seq_putc(seq, ' ');
2821
2822                         i = 0;
2823                         len = u->addr->len - sizeof(short);
2824                         if (!UNIX_ABSTRACT(s))
2825                                 len--;
2826                         else {
2827                                 seq_putc(seq, '@');
2828                                 i++;
2829                         }
2830                         for ( ; i < len; i++)
2831                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2832                                          '@');
2833                 }
2834                 unix_state_unlock(s);
2835                 seq_putc(seq, '\n');
2836         }
2837
2838         return 0;
2839 }
2840
2841 static const struct seq_operations unix_seq_ops = {
2842         .start  = unix_seq_start,
2843         .next   = unix_seq_next,
2844         .stop   = unix_seq_stop,
2845         .show   = unix_seq_show,
2846 };
2847 #endif
2848
2849 static const struct net_proto_family unix_family_ops = {
2850         .family = PF_UNIX,
2851         .create = unix_create,
2852         .owner  = THIS_MODULE,
2853 };
2854
2855
2856 static int __net_init unix_net_init(struct net *net)
2857 {
2858         int error = -ENOMEM;
2859
2860         net->unx.sysctl_max_dgram_qlen = 10;
2861         if (unix_sysctl_register(net))
2862                 goto out;
2863
2864 #ifdef CONFIG_PROC_FS
2865         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2866                         sizeof(struct seq_net_private))) {
2867                 unix_sysctl_unregister(net);
2868                 goto out;
2869         }
2870 #endif
2871         error = 0;
2872 out:
2873         return error;
2874 }
2875
2876 static void __net_exit unix_net_exit(struct net *net)
2877 {
2878         unix_sysctl_unregister(net);
2879         remove_proc_entry("unix", net->proc_net);
2880 }
2881
2882 static struct pernet_operations unix_net_ops = {
2883         .init = unix_net_init,
2884         .exit = unix_net_exit,
2885 };
2886
2887 static int __init af_unix_init(void)
2888 {
2889         int rc = -1;
2890
2891         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2892
2893         rc = proto_register(&unix_proto, 1);
2894         if (rc != 0) {
2895                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2896                 goto out;
2897         }
2898
2899         sock_register(&unix_family_ops);
2900         register_pernet_subsys(&unix_net_ops);
2901 out:
2902         return rc;
2903 }
2904
2905 static void __exit af_unix_exit(void)
2906 {
2907         sock_unregister(PF_UNIX);
2908         proto_unregister(&unix_proto);
2909         unregister_pernet_subsys(&unix_net_ops);
2910 }
2911
2912 /* Earlier than device_initcall() so that other drivers invoking
2913    request_module() don't end up in a loop when modprobe tries
2914    to use a UNIX socket. But later than subsys_initcall() because
2915    we depend on stuff initialised there */
2916 fs_initcall(af_unix_init);
2917 module_exit(af_unix_exit);
2918
2919 MODULE_LICENSE("GPL");
2920 MODULE_ALIAS_NETPROTO(PF_UNIX);