]> Git Repo - J-linux.git/commitdiff
Merge tag 'nfs-for-4.18-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
authorLinus Torvalds <[email protected]>
Tue, 12 Jun 2018 17:09:03 +0000 (10:09 -0700)
committerLinus Torvalds <[email protected]>
Tue, 12 Jun 2018 17:09:03 +0000 (10:09 -0700)
Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable fixes:

   - Fix a 1-byte stack overflow in nfs_idmap_read_and_verify_message

   - Fix a hang due to incorrect error returns in rpcrdma_convert_iovs()

   - Revert an incorrect change to the NFSv4.1 callback channel

   - Fix a bug in the NFSv4.1 sequence error handling

  Features and optimisations:

   - Support for piggybacking a LAYOUTGET operation to the OPEN compound

   - RDMA performance enhancements to deal with transport congestion

   - Add proper SPDX tags for NetApp-contributed RDMA source

   - Do not request delegated file attributes (size+change) from the
     server

   - Optimise away a GETATTR in the lookup revalidate code when doing
     NFSv4 OPEN

   - Optimise away unnecessary lookups for rename targets

   - Misc performance improvements when freeing NFSv4 delegations

  Bugfixes and cleanups:

   - Try to fail quickly if proto=rdma

   - Clean up RDMA receive trace points

   - Fix sillyrename to return the delegation when appropriate

   - Misc attribute revalidation fixes

   - Immediately clear the pNFS layout on a file when the server returns
     ESTALE

   - Return NFS4ERR_DELAY when delegation/layout recalls fail due to
     igrab()

   - Fix the client behaviour on NFS4ERR_SEQ_FALSE_RETRY"

* tag 'nfs-for-4.18-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (80 commits)
  skip LAYOUTRETURN if layout is invalid
  NFSv4.1: Fix the client behaviour on NFS4ERR_SEQ_FALSE_RETRY
  NFSv4: Fix a typo in nfs41_sequence_process
  NFSv4: Revert commit 5f83d86cf531d ("NFSv4.x: Fix wraparound issues..")
  NFSv4: Return NFS4ERR_DELAY when a layout recall fails due to igrab()
  NFSv4: Return NFS4ERR_DELAY when a delegation recall fails due to igrab()
  NFSv4.0: Remove transport protocol name from non-UCS client ID
  NFSv4.0: Remove cl_ipaddr from non-UCS client ID
  NFSv4: Fix a compiler warning when CONFIG_NFS_V4_1 is undefined
  NFS: Filter cache invalidation when holding a delegation
  NFS: Ignore NFS_INO_REVAL_FORCED in nfs_check_inode_attributes()
  NFS: Improve caching while holding a delegation
  NFS: Fix attribute revalidation
  NFS: fix up nfs_setattr_update_inode
  NFSv4: Ensure the inode is clean when we set a delegation
  NFSv4: Ignore NFS_INO_REVAL_FORCED in nfs4_proc_access
  NFSv4: Don't ask for delegated attributes when adding a hard link
  NFSv4: Don't ask for delegated attributes when revalidating the inode
  NFS: Pass the inode down to the getattr() callback
  NFSv4: Don't request size+change attribute if they are delegated to us
  ...

1  2 
fs/nfs/client.c
include/trace/events/rpcrdma.h
net/sunrpc/xprtrdma/backchannel.c
net/sunrpc/xprtrdma/fmr_ops.c
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/module.c
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/svc_rdma_backchannel.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h

diff --combined fs/nfs/client.c
index bbc91d7ca1bd43db7335f1dd8afe4182dd319ecf,02e97c29af0c54400f473dfc9074a90e5a943413..377a61654a887401da3a97b942599eb494dbf3d6
@@@ -969,7 -969,8 +969,8 @@@ struct nfs_server *nfs_create_server(st
        }
  
        if (!(fattr->valid & NFS_ATTR_FATTR)) {
-               error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, fattr, NULL);
+               error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh,
+                               fattr, NULL, NULL);
                if (error < 0) {
                        dprintk("nfs_create_server: getattr error = %d\n", -error);
                        goto error;
@@@ -1067,6 -1068,7 +1068,6 @@@ void nfs_clients_init(struct net *net
  }
  
  #ifdef CONFIG_PROC_FS
 -static int nfs_server_list_open(struct inode *inode, struct file *file);
  static void *nfs_server_list_start(struct seq_file *p, loff_t *pos);
  static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
  static void nfs_server_list_stop(struct seq_file *p, void *v);
@@@ -1079,6 -1081,14 +1080,6 @@@ static const struct seq_operations nfs_
        .show   = nfs_server_list_show,
  };
  
 -static const struct file_operations nfs_server_list_fops = {
 -      .open           = nfs_server_list_open,
 -      .read           = seq_read,
 -      .llseek         = seq_lseek,
 -      .release        = seq_release_net,
 -};
 -
 -static int nfs_volume_list_open(struct inode *inode, struct file *file);
  static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos);
  static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos);
  static void nfs_volume_list_stop(struct seq_file *p, void *v);
@@@ -1091,6 -1101,23 +1092,6 @@@ static const struct seq_operations nfs_
        .show   = nfs_volume_list_show,
  };
  
 -static const struct file_operations nfs_volume_list_fops = {
 -      .open           = nfs_volume_list_open,
 -      .read           = seq_read,
 -      .llseek         = seq_lseek,
 -      .release        = seq_release_net,
 -};
 -
 -/*
 - * open "/proc/fs/nfsfs/servers" which provides a summary of servers with which
 - * we're dealing
 - */
 -static int nfs_server_list_open(struct inode *inode, struct file *file)
 -{
 -      return seq_open_net(inode, file, &nfs_server_list_ops,
 -                         sizeof(struct seq_net_private));
 -}
 -
  /*
   * set up the iterator to start reading from the server list and return the first item
   */
@@@ -1158,6 -1185,15 +1159,6 @@@ static int nfs_server_list_show(struct 
        return 0;
  }
  
 -/*
 - * open "/proc/fs/nfsfs/volumes" which provides a summary of extant volumes
 - */
 -static int nfs_volume_list_open(struct inode *inode, struct file *file)
 -{
 -      return seq_open_net(inode, file, &nfs_volume_list_ops,
 -                         sizeof(struct seq_net_private));
 -}
 -
  /*
   * set up the iterator to start reading from the volume list and return the first item
   */
@@@ -1243,14 -1279,14 +1244,14 @@@ int nfs_fs_proc_net_init(struct net *ne
                goto error_0;
  
        /* a file of servers with which we're dealing */
 -      p = proc_create("servers", S_IFREG|S_IRUGO,
 -                      nn->proc_nfsfs, &nfs_server_list_fops);
 +      p = proc_create_net("servers", S_IFREG|S_IRUGO, nn->proc_nfsfs,
 +                      &nfs_server_list_ops, sizeof(struct seq_net_private));
        if (!p)
                goto error_1;
  
        /* a file of volumes that we have mounted */
 -      p = proc_create("volumes", S_IFREG|S_IRUGO,
 -                      nn->proc_nfsfs, &nfs_volume_list_fops);
 +      p = proc_create_net("volumes", S_IFREG|S_IRUGO, nn->proc_nfsfs,
 +                      &nfs_volume_list_ops, sizeof(struct seq_net_private));
        if (!p)
                goto error_1;
        return 0;
index 094a676d92a7ee7e01771dfedcdb5195184ff84f,c4494a2b3ecdc4f250fbe9a8aa178a9d7e51ac5e..53df203b8057afd417ebbcb93a298f51af791a49
@@@ -1,8 -1,6 +1,8 @@@
  /* SPDX-License-Identifier: GPL-2.0 */
  /*
 - * Copyright (c) 2017 Oracle.  All rights reserved.
 + * Copyright (c) 2017, 2018 Oracle.  All rights reserved.
 + *
 + * Trace point definitions for the "rpcrdma" subsystem.
   */
  #undef TRACE_SYSTEM
  #define TRACE_SYSTEM rpcrdma
@@@ -530,24 -528,54 +530,54 @@@ TRACE_EVENT(xprtrdma_post_send
  
  TRACE_EVENT(xprtrdma_post_recv,
        TP_PROTO(
-               const struct rpcrdma_rep *rep,
+               const struct ib_cqe *cqe
+       ),
+       TP_ARGS(cqe),
+       TP_STRUCT__entry(
+               __field(const void *, cqe)
+       ),
+       TP_fast_assign(
+               __entry->cqe = cqe;
+       ),
+       TP_printk("cqe=%p",
+               __entry->cqe
+       )
+ );
+ TRACE_EVENT(xprtrdma_post_recvs,
+       TP_PROTO(
+               const struct rpcrdma_xprt *r_xprt,
+               unsigned int count,
                int status
        ),
  
-       TP_ARGS(rep, status),
+       TP_ARGS(r_xprt, count, status),
  
        TP_STRUCT__entry(
-               __field(const void *, rep)
+               __field(const void *, r_xprt)
+               __field(unsigned int, count)
                __field(int, status)
+               __field(int, posted)
+               __string(addr, rpcrdma_addrstr(r_xprt))
+               __string(port, rpcrdma_portstr(r_xprt))
        ),
  
        TP_fast_assign(
-               __entry->rep = rep;
+               __entry->r_xprt = r_xprt;
+               __entry->count = count;
                __entry->status = status;
+               __entry->posted = r_xprt->rx_buf.rb_posted_receives;
+               __assign_str(addr, rpcrdma_addrstr(r_xprt));
+               __assign_str(port, rpcrdma_portstr(r_xprt));
        ),
  
-       TP_printk("rep=%p status=%d",
-               __entry->rep, __entry->status
+       TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)",
+               __get_str(addr), __get_str(port), __entry->r_xprt,
+               __entry->count, __entry->posted, __entry->status
        )
  );
  
@@@ -586,28 -614,32 +616,32 @@@ TRACE_EVENT(xprtrdma_wc_send
  
  TRACE_EVENT(xprtrdma_wc_receive,
        TP_PROTO(
-               const struct rpcrdma_rep *rep,
                const struct ib_wc *wc
        ),
  
-       TP_ARGS(rep, wc),
+       TP_ARGS(wc),
  
        TP_STRUCT__entry(
-               __field(const void *, rep)
-               __field(unsigned int, byte_len)
+               __field(const void *, cqe)
+               __field(u32, byte_len)
                __field(unsigned int, status)
-               __field(unsigned int, vendor_err)
+               __field(u32, vendor_err)
        ),
  
        TP_fast_assign(
-               __entry->rep = rep;
-               __entry->byte_len = wc->byte_len;
+               __entry->cqe = wc->wr_cqe;
                __entry->status = wc->status;
-               __entry->vendor_err = __entry->status ? wc->vendor_err : 0;
+               if (wc->status) {
+                       __entry->byte_len = 0;
+                       __entry->vendor_err = wc->vendor_err;
+               } else {
+                       __entry->byte_len = wc->byte_len;
+                       __entry->vendor_err = 0;
+               }
        ),
  
-       TP_printk("rep=%p, %u bytes: %s (%u/0x%x)",
-               __entry->rep, __entry->byte_len,
+       TP_printk("cqe=%p %u bytes: %s (%u/0x%x)",
+               __entry->cqe, __entry->byte_len,
                rdma_show_wc_status(__entry->status),
                __entry->status, __entry->vendor_err
        )
@@@ -618,6 -650,7 +652,7 @@@ DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li)
  DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake);
  
  DEFINE_MR_EVENT(xprtrdma_localinv);
+ DEFINE_MR_EVENT(xprtrdma_dma_map);
  DEFINE_MR_EVENT(xprtrdma_dma_unmap);
  DEFINE_MR_EVENT(xprtrdma_remoteinv);
  DEFINE_MR_EVENT(xprtrdma_recover_mr);
@@@ -801,7 -834,6 +836,6 @@@ TRACE_EVENT(xprtrdma_allocate
                __field(unsigned int, task_id)
                __field(unsigned int, client_id)
                __field(const void *, req)
-               __field(const void *, rep)
                __field(size_t, callsize)
                __field(size_t, rcvsize)
        ),
                __entry->task_id = task->tk_pid;
                __entry->client_id = task->tk_client->cl_clid;
                __entry->req = req;
-               __entry->rep = req ? req->rl_reply : NULL;
                __entry->callsize = task->tk_rqstp->rq_callsize;
                __entry->rcvsize = task->tk_rqstp->rq_rcvsize;
        ),
  
-       TP_printk("task:%u@%u req=%p rep=%p (%zu, %zu)",
+       TP_printk("task:%u@%u req=%p (%zu, %zu)",
                __entry->task_id, __entry->client_id,
-               __entry->req, __entry->rep,
-               __entry->callsize, __entry->rcvsize
+               __entry->req, __entry->callsize, __entry->rcvsize
        )
  );
  
@@@ -850,8 -880,6 +882,6 @@@ TRACE_EVENT(xprtrdma_rpc_done
        )
  );
  
- DEFINE_RXPRT_EVENT(xprtrdma_noreps);
  /**
   ** Callback events
   **/
@@@ -887,586 -915,6 +917,586 @@@ TRACE_EVENT(xprtrdma_cb_setup
  DEFINE_CB_EVENT(xprtrdma_cb_call);
  DEFINE_CB_EVENT(xprtrdma_cb_reply);
  
 +/**
 + ** Server-side RPC/RDMA events
 + **/
 +
 +DECLARE_EVENT_CLASS(svcrdma_xprt_event,
 +      TP_PROTO(
 +              const struct svc_xprt *xprt
 +      ),
 +
 +      TP_ARGS(xprt),
 +
 +      TP_STRUCT__entry(
 +              __field(const void *, xprt)
 +              __string(addr, xprt->xpt_remotebuf)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->xprt = xprt;
 +              __assign_str(addr, xprt->xpt_remotebuf);
 +      ),
 +
 +      TP_printk("xprt=%p addr=%s",
 +              __entry->xprt, __get_str(addr)
 +      )
 +);
 +
 +#define DEFINE_XPRT_EVENT(name)                                               \
 +              DEFINE_EVENT(svcrdma_xprt_event, svcrdma_xprt_##name,   \
 +                              TP_PROTO(                               \
 +                                      const struct svc_xprt *xprt     \
 +                              ),                                      \
 +                              TP_ARGS(xprt))
 +
 +DEFINE_XPRT_EVENT(accept);
 +DEFINE_XPRT_EVENT(fail);
 +DEFINE_XPRT_EVENT(free);
 +
 +TRACE_DEFINE_ENUM(RDMA_MSG);
 +TRACE_DEFINE_ENUM(RDMA_NOMSG);
 +TRACE_DEFINE_ENUM(RDMA_MSGP);
 +TRACE_DEFINE_ENUM(RDMA_DONE);
 +TRACE_DEFINE_ENUM(RDMA_ERROR);
 +
 +#define show_rpcrdma_proc(x)                                          \
 +              __print_symbolic(x,                                     \
 +                              { RDMA_MSG, "RDMA_MSG" },               \
 +                              { RDMA_NOMSG, "RDMA_NOMSG" },           \
 +                              { RDMA_MSGP, "RDMA_MSGP" },             \
 +                              { RDMA_DONE, "RDMA_DONE" },             \
 +                              { RDMA_ERROR, "RDMA_ERROR" })
 +
 +TRACE_EVENT(svcrdma_decode_rqst,
 +      TP_PROTO(
 +              __be32 *p,
 +              unsigned int hdrlen
 +      ),
 +
 +      TP_ARGS(p, hdrlen),
 +
 +      TP_STRUCT__entry(
 +              __field(u32, xid)
 +              __field(u32, vers)
 +              __field(u32, proc)
 +              __field(u32, credits)
 +              __field(unsigned int, hdrlen)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->xid = be32_to_cpup(p++);
 +              __entry->vers = be32_to_cpup(p++);
 +              __entry->credits = be32_to_cpup(p++);
 +              __entry->proc = be32_to_cpup(p);
 +              __entry->hdrlen = hdrlen;
 +      ),
 +
 +      TP_printk("xid=0x%08x vers=%u credits=%u proc=%s hdrlen=%u",
 +              __entry->xid, __entry->vers, __entry->credits,
 +              show_rpcrdma_proc(__entry->proc), __entry->hdrlen)
 +);
 +
 +TRACE_EVENT(svcrdma_decode_short,
 +      TP_PROTO(
 +              unsigned int hdrlen
 +      ),
 +
 +      TP_ARGS(hdrlen),
 +
 +      TP_STRUCT__entry(
 +              __field(unsigned int, hdrlen)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->hdrlen = hdrlen;
 +      ),
 +
 +      TP_printk("hdrlen=%u", __entry->hdrlen)
 +);
 +
 +DECLARE_EVENT_CLASS(svcrdma_badreq_event,
 +      TP_PROTO(
 +              __be32 *p
 +      ),
 +
 +      TP_ARGS(p),
 +
 +      TP_STRUCT__entry(
 +              __field(u32, xid)
 +              __field(u32, vers)
 +              __field(u32, proc)
 +              __field(u32, credits)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->xid = be32_to_cpup(p++);
 +              __entry->vers = be32_to_cpup(p++);
 +              __entry->credits = be32_to_cpup(p++);
 +              __entry->proc = be32_to_cpup(p);
 +      ),
 +
 +      TP_printk("xid=0x%08x vers=%u credits=%u proc=%u",
 +              __entry->xid, __entry->vers, __entry->credits, __entry->proc)
 +);
 +
 +#define DEFINE_BADREQ_EVENT(name)                                     \
 +              DEFINE_EVENT(svcrdma_badreq_event, svcrdma_decode_##name,\
 +                              TP_PROTO(                               \
 +                                      __be32 *p                       \
 +                              ),                                      \
 +                              TP_ARGS(p))
 +
 +DEFINE_BADREQ_EVENT(badvers);
 +DEFINE_BADREQ_EVENT(drop);
 +DEFINE_BADREQ_EVENT(badproc);
 +DEFINE_BADREQ_EVENT(parse);
 +
 +DECLARE_EVENT_CLASS(svcrdma_segment_event,
 +      TP_PROTO(
 +              u32 handle,
 +              u32 length,
 +              u64 offset
 +      ),
 +
 +      TP_ARGS(handle, length, offset),
 +
 +      TP_STRUCT__entry(
 +              __field(u32, handle)
 +              __field(u32, length)
 +              __field(u64, offset)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->handle = handle;
 +              __entry->length = length;
 +              __entry->offset = offset;
 +      ),
 +
 +      TP_printk("%u@0x%016llx:0x%08x",
 +              __entry->length, (unsigned long long)__entry->offset,
 +              __entry->handle
 +      )
 +);
 +
 +#define DEFINE_SEGMENT_EVENT(name)                                    \
 +              DEFINE_EVENT(svcrdma_segment_event, svcrdma_encode_##name,\
 +                              TP_PROTO(                               \
 +                                      u32 handle,                     \
 +                                      u32 length,                     \
 +                                      u64 offset                      \
 +                              ),                                      \
 +                              TP_ARGS(handle, length, offset))
 +
 +DEFINE_SEGMENT_EVENT(rseg);
 +DEFINE_SEGMENT_EVENT(wseg);
 +
 +DECLARE_EVENT_CLASS(svcrdma_chunk_event,
 +      TP_PROTO(
 +              u32 length
 +      ),
 +
 +      TP_ARGS(length),
 +
 +      TP_STRUCT__entry(
 +              __field(u32, length)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->length = length;
 +      ),
 +
 +      TP_printk("length=%u",
 +              __entry->length
 +      )
 +);
 +
 +#define DEFINE_CHUNK_EVENT(name)                                      \
 +              DEFINE_EVENT(svcrdma_chunk_event, svcrdma_encode_##name,\
 +                              TP_PROTO(                               \
 +                                      u32 length                      \
 +                              ),                                      \
 +                              TP_ARGS(length))
 +
 +DEFINE_CHUNK_EVENT(pzr);
 +DEFINE_CHUNK_EVENT(write);
 +DEFINE_CHUNK_EVENT(reply);
 +
 +TRACE_EVENT(svcrdma_encode_read,
 +      TP_PROTO(
 +              u32 length,
 +              u32 position
 +      ),
 +
 +      TP_ARGS(length, position),
 +
 +      TP_STRUCT__entry(
 +              __field(u32, length)
 +              __field(u32, position)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->length = length;
 +              __entry->position = position;
 +      ),
 +
 +      TP_printk("length=%u position=%u",
 +              __entry->length, __entry->position
 +      )
 +);
 +
 +DECLARE_EVENT_CLASS(svcrdma_error_event,
 +      TP_PROTO(
 +              __be32 xid
 +      ),
 +
 +      TP_ARGS(xid),
 +
 +      TP_STRUCT__entry(
 +              __field(u32, xid)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->xid = be32_to_cpu(xid);
 +      ),
 +
 +      TP_printk("xid=0x%08x",
 +              __entry->xid
 +      )
 +);
 +
 +#define DEFINE_ERROR_EVENT(name)                                      \
 +              DEFINE_EVENT(svcrdma_error_event, svcrdma_err_##name,   \
 +                              TP_PROTO(                               \
 +                                      __be32 xid                      \
 +                              ),                                      \
 +                              TP_ARGS(xid))
 +
 +DEFINE_ERROR_EVENT(vers);
 +DEFINE_ERROR_EVENT(chunk);
 +
 +/**
 + ** Server-side RDMA API events
 + **/
 +
 +TRACE_EVENT(svcrdma_dma_map_page,
 +      TP_PROTO(
 +              const struct svcxprt_rdma *rdma,
 +              const void *page
 +      ),
 +
 +      TP_ARGS(rdma, page),
 +
 +      TP_STRUCT__entry(
 +              __field(const void *, page);
 +              __string(device, rdma->sc_cm_id->device->name)
 +              __string(addr, rdma->sc_xprt.xpt_remotebuf)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->page = page;
 +              __assign_str(device, rdma->sc_cm_id->device->name);
 +              __assign_str(addr, rdma->sc_xprt.xpt_remotebuf);
 +      ),
 +
 +      TP_printk("addr=%s device=%s page=%p",
 +              __get_str(addr), __get_str(device), __entry->page
 +      )
 +);
 +
 +TRACE_EVENT(svcrdma_dma_map_rwctx,
 +      TP_PROTO(
 +              const struct svcxprt_rdma *rdma,
 +              int status
 +      ),
 +
 +      TP_ARGS(rdma, status),
 +
 +      TP_STRUCT__entry(
 +              __field(int, status)
 +              __string(device, rdma->sc_cm_id->device->name)
 +              __string(addr, rdma->sc_xprt.xpt_remotebuf)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->status = status;
 +              __assign_str(device, rdma->sc_cm_id->device->name);
 +              __assign_str(addr, rdma->sc_xprt.xpt_remotebuf);
 +      ),
 +
 +      TP_printk("addr=%s device=%s status=%d",
 +              __get_str(addr), __get_str(device), __entry->status
 +      )
 +);
 +
 +TRACE_EVENT(svcrdma_send_failed,
 +      TP_PROTO(
 +              const struct svc_rqst *rqst,
 +              int status
 +      ),
 +
 +      TP_ARGS(rqst, status),
 +
 +      TP_STRUCT__entry(
 +              __field(int, status)
 +              __field(u32, xid)
 +              __field(const void *, xprt)
 +              __string(addr, rqst->rq_xprt->xpt_remotebuf)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->status = status;
 +              __entry->xid = __be32_to_cpu(rqst->rq_xid);
 +              __entry->xprt = rqst->rq_xprt;
 +              __assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
 +      ),
 +
 +      TP_printk("xprt=%p addr=%s xid=0x%08x status=%d",
 +              __entry->xprt, __get_str(addr),
 +              __entry->xid, __entry->status
 +      )
 +);
 +
 +DECLARE_EVENT_CLASS(svcrdma_sendcomp_event,
 +      TP_PROTO(
 +              const struct ib_wc *wc
 +      ),
 +
 +      TP_ARGS(wc),
 +
 +      TP_STRUCT__entry(
 +              __field(const void *, cqe)
 +              __field(unsigned int, status)
 +              __field(unsigned int, vendor_err)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->cqe = wc->wr_cqe;
 +              __entry->status = wc->status;
 +              if (wc->status)
 +                      __entry->vendor_err = wc->vendor_err;
 +              else
 +                      __entry->vendor_err = 0;
 +      ),
 +
 +      TP_printk("cqe=%p status=%s (%u/0x%x)",
 +              __entry->cqe, rdma_show_wc_status(__entry->status),
 +              __entry->status, __entry->vendor_err
 +      )
 +);
 +
 +#define DEFINE_SENDCOMP_EVENT(name)                                   \
 +              DEFINE_EVENT(svcrdma_sendcomp_event, svcrdma_wc_##name, \
 +                              TP_PROTO(                               \
 +                                      const struct ib_wc *wc          \
 +                              ),                                      \
 +                              TP_ARGS(wc))
 +
 +TRACE_EVENT(svcrdma_post_send,
 +      TP_PROTO(
 +              const struct ib_send_wr *wr,
 +              int status
 +      ),
 +
 +      TP_ARGS(wr, status),
 +
 +      TP_STRUCT__entry(
 +              __field(const void *, cqe)
 +              __field(unsigned int, num_sge)
 +              __field(u32, inv_rkey)
 +              __field(int, status)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->cqe = wr->wr_cqe;
 +              __entry->num_sge = wr->num_sge;
 +              __entry->inv_rkey = (wr->opcode == IB_WR_SEND_WITH_INV) ?
 +                                      wr->ex.invalidate_rkey : 0;
 +              __entry->status = status;
 +      ),
 +
 +      TP_printk("cqe=%p num_sge=%u inv_rkey=0x%08x status=%d",
 +              __entry->cqe, __entry->num_sge,
 +              __entry->inv_rkey, __entry->status
 +      )
 +);
 +
 +DEFINE_SENDCOMP_EVENT(send);
 +
 +TRACE_EVENT(svcrdma_post_recv,
 +      TP_PROTO(
 +              const struct ib_recv_wr *wr,
 +              int status
 +      ),
 +
 +      TP_ARGS(wr, status),
 +
 +      TP_STRUCT__entry(
 +              __field(const void *, cqe)
 +              __field(int, status)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->cqe = wr->wr_cqe;
 +              __entry->status = status;
 +      ),
 +
 +      TP_printk("cqe=%p status=%d",
 +              __entry->cqe, __entry->status
 +      )
 +);
 +
 +TRACE_EVENT(svcrdma_wc_receive,
 +      TP_PROTO(
 +              const struct ib_wc *wc
 +      ),
 +
 +      TP_ARGS(wc),
 +
 +      TP_STRUCT__entry(
 +              __field(const void *, cqe)
 +              __field(u32, byte_len)
 +              __field(unsigned int, status)
 +              __field(u32, vendor_err)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->cqe = wc->wr_cqe;
 +              __entry->status = wc->status;
 +              if (wc->status) {
 +                      __entry->byte_len = 0;
 +                      __entry->vendor_err = wc->vendor_err;
 +              } else {
 +                      __entry->byte_len = wc->byte_len;
 +                      __entry->vendor_err = 0;
 +              }
 +      ),
 +
 +      TP_printk("cqe=%p byte_len=%u status=%s (%u/0x%x)",
 +              __entry->cqe, __entry->byte_len,
 +              rdma_show_wc_status(__entry->status),
 +              __entry->status, __entry->vendor_err
 +      )
 +);
 +
 +TRACE_EVENT(svcrdma_post_rw,
 +      TP_PROTO(
 +              const void *cqe,
 +              int sqecount,
 +              int status
 +      ),
 +
 +      TP_ARGS(cqe, sqecount, status),
 +
 +      TP_STRUCT__entry(
 +              __field(const void *, cqe)
 +              __field(int, sqecount)
 +              __field(int, status)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->cqe = cqe;
 +              __entry->sqecount = sqecount;
 +              __entry->status = status;
 +      ),
 +
 +      TP_printk("cqe=%p sqecount=%d status=%d",
 +              __entry->cqe, __entry->sqecount, __entry->status
 +      )
 +);
 +
 +DEFINE_SENDCOMP_EVENT(read);
 +DEFINE_SENDCOMP_EVENT(write);
 +
 +TRACE_EVENT(svcrdma_cm_event,
 +      TP_PROTO(
 +              const struct rdma_cm_event *event,
 +              const struct sockaddr *sap
 +      ),
 +
 +      TP_ARGS(event, sap),
 +
 +      TP_STRUCT__entry(
 +              __field(unsigned int, event)
 +              __field(int, status)
 +              __array(__u8, addr, INET6_ADDRSTRLEN + 10)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->event = event->event;
 +              __entry->status = event->status;
 +              snprintf(__entry->addr, sizeof(__entry->addr) - 1,
 +                       "%pISpc", sap);
 +      ),
 +
 +      TP_printk("addr=%s event=%s (%u/%d)",
 +              __entry->addr,
 +              rdma_show_cm_event(__entry->event),
 +              __entry->event, __entry->status
 +      )
 +);
 +
 +TRACE_EVENT(svcrdma_qp_error,
 +      TP_PROTO(
 +              const struct ib_event *event,
 +              const struct sockaddr *sap
 +      ),
 +
 +      TP_ARGS(event, sap),
 +
 +      TP_STRUCT__entry(
 +              __field(unsigned int, event)
 +              __string(device, event->device->name)
 +              __array(__u8, addr, INET6_ADDRSTRLEN + 10)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->event = event->event;
 +              __assign_str(device, event->device->name);
 +              snprintf(__entry->addr, sizeof(__entry->addr) - 1,
 +                       "%pISpc", sap);
 +      ),
 +
 +      TP_printk("addr=%s dev=%s event=%s (%u)",
 +              __entry->addr, __get_str(device),
 +              rdma_show_ib_event(__entry->event), __entry->event
 +      )
 +);
 +
 +DECLARE_EVENT_CLASS(svcrdma_sendqueue_event,
 +      TP_PROTO(
 +              const struct svcxprt_rdma *rdma
 +      ),
 +
 +      TP_ARGS(rdma),
 +
 +      TP_STRUCT__entry(
 +              __field(int, avail)
 +              __field(int, depth)
 +              __string(addr, rdma->sc_xprt.xpt_remotebuf)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->avail = atomic_read(&rdma->sc_sq_avail);
 +              __entry->depth = rdma->sc_sq_depth;
 +              __assign_str(addr, rdma->sc_xprt.xpt_remotebuf);
 +      ),
 +
 +      TP_printk("addr=%s sc_sq_avail=%d/%d",
 +              __get_str(addr), __entry->avail, __entry->depth
 +      )
 +);
 +
 +#define DEFINE_SQ_EVENT(name)                                         \
 +              DEFINE_EVENT(svcrdma_sendqueue_event, svcrdma_sq_##name,\
 +                              TP_PROTO(                               \
 +                                      const struct svcxprt_rdma *rdma \
 +                              ),                                      \
 +                              TP_ARGS(rdma))
 +
 +DEFINE_SQ_EVENT(full);
 +DEFINE_SQ_EVENT(retry);
 +
  #endif /* _TRACE_RPCRDMA_H */
  
  #include <trace/define_trace.h>
index dbedc872ec10c5c3b739caa798e03133482b1e27,c8f1c2b89dadf7b6956803d7e14f0b9404d363ef..90adeff4c06b889391ae9d08e22ebe7d13dd9bbc
@@@ -9,10 -9,8 +9,10 @@@
  #include <linux/sunrpc/xprt.h>
  #include <linux/sunrpc/svc.h>
  #include <linux/sunrpc/svc_xprt.h>
 +#include <linux/sunrpc/svc_rdma.h>
  
  #include "xprt_rdma.h"
 +#include <trace/events/rpcrdma.h>
  
  #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  # define RPCDBG_FACILITY      RPCDBG_TRANS
@@@ -31,29 -29,41 +31,41 @@@ static void rpcrdma_bc_free_rqst(struc
        spin_unlock(&buf->rb_reqslock);
  
        rpcrdma_destroy_req(req);
-       kfree(rqst);
  }
  
- static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
-                                struct rpc_rqst *rqst)
+ static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
+                                unsigned int count)
  {
-       struct rpcrdma_regbuf *rb;
-       struct rpcrdma_req *req;
-       size_t size;
-       req = rpcrdma_create_req(r_xprt);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
-       size = r_xprt->rx_data.inline_rsize;
-       rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
-       if (IS_ERR(rb))
-               goto out_fail;
-       req->rl_sendbuf = rb;
-       xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base,
-                    min_t(size_t, size, PAGE_SIZE));
-       rpcrdma_set_xprtdata(rqst, req);
+       struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+       struct rpc_rqst *rqst;
+       unsigned int i;
+       for (i = 0; i < (count << 1); i++) {
+               struct rpcrdma_regbuf *rb;
+               struct rpcrdma_req *req;
+               size_t size;
+               req = rpcrdma_create_req(r_xprt);
+               if (IS_ERR(req))
+                       return PTR_ERR(req);
+               rqst = &req->rl_slot;
+               rqst->rq_xprt = xprt;
+               INIT_LIST_HEAD(&rqst->rq_list);
+               INIT_LIST_HEAD(&rqst->rq_bc_list);
+               __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
+               spin_lock_bh(&xprt->bc_pa_lock);
+               list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
+               spin_unlock_bh(&xprt->bc_pa_lock);
+               size = r_xprt->rx_data.inline_rsize;
+               rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
+               if (IS_ERR(rb))
+                       goto out_fail;
+               req->rl_sendbuf = rb;
+               xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base,
+                            min_t(size_t, size, PAGE_SIZE));
+       }
        return 0;
  
  out_fail:
        return -ENOMEM;
  }
  
- /* Allocate and add receive buffers to the rpcrdma_buffer's
-  * existing list of rep's. These are released when the
-  * transport is destroyed.
-  */
- static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
-                                unsigned int count)
- {
-       int rc = 0;
-       while (count--) {
-               rc = rpcrdma_create_rep(r_xprt);
-               if (rc)
-                       break;
-       }
-       return rc;
- }
  /**
   * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests
   * @xprt: transport associated with these backchannel resources
@@@ -88,9 -81,6 +83,6 @@@
  int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
  {
        struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
-       struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
-       struct rpc_rqst *rqst;
-       unsigned int i;
        int rc;
  
        /* The backchannel reply path returns each rpc_rqst to the
        if (reqs > RPCRDMA_BACKWARD_WRS >> 1)
                goto out_err;
  
-       for (i = 0; i < (reqs << 1); i++) {
-               rqst = kzalloc(sizeof(*rqst), GFP_KERNEL);
-               if (!rqst)
-                       goto out_free;
-               dprintk("RPC:       %s: new rqst %p\n", __func__, rqst);
-               rqst->rq_xprt = &r_xprt->rx_xprt;
-               INIT_LIST_HEAD(&rqst->rq_list);
-               INIT_LIST_HEAD(&rqst->rq_bc_list);
-               __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
-               if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
-                       goto out_free;
-               spin_lock_bh(&xprt->bc_pa_lock);
-               list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
-               spin_unlock_bh(&xprt->bc_pa_lock);
-       }
-       rc = rpcrdma_bc_setup_reps(r_xprt, reqs);
+       rc = rpcrdma_bc_setup_reqs(r_xprt, reqs);
        if (rc)
                goto out_free;
  
-       rc = rpcrdma_ep_post_extra_recv(r_xprt, reqs);
-       if (rc)
-               goto out_free;
-       buffer->rb_bc_srv_max_requests = reqs;
+       r_xprt->rx_buf.rb_bc_srv_max_requests = reqs;
        request_module("svcrdma");
        trace_xprtrdma_cb_setup(r_xprt, reqs);
        return 0;
@@@ -237,6 -203,7 +205,7 @@@ int xprt_rdma_bc_send_reply(struct rpc_
        if (rc < 0)
                goto failed_marshal;
  
+       rpcrdma_post_recvs(r_xprt, true);
        if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
                goto drop_connection;
        return 0;
@@@ -277,10 -244,14 +246,14 @@@ void xprt_rdma_bc_destroy(struct rpc_xp
   */
  void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
  {
+       struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
        struct rpc_xprt *xprt = rqst->rq_xprt;
  
        dprintk("RPC:       %s: freeing rqst %p (req %p)\n",
-               __func__, rqst, rpcr_to_rdmar(rqst));
+               __func__, rqst, req);
+       rpcrdma_recv_buffer_put(req->rl_reply);
+       req->rl_reply = NULL;
  
        spin_lock_bh(&xprt->bc_pa_lock);
        list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
index 3c8d19f6e3205fb6a65a0ac2f037ddb07f0c72b9,58b472666255398864ae291320f2e5f47ee1c063..17fb1e02565466462e95d479b6c1cc9d5f853f20
   * verb (fmr_op_unmap).
   */
  
 +#include <linux/sunrpc/svc_rdma.h>
 +
  #include "xprt_rdma.h"
 +#include <trace/events/rpcrdma.h>
  
  #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  # define RPCDBG_FACILITY      RPCDBG_TRANS
@@@ -159,10 -156,32 +159,32 @@@ out_release
        fmr_op_release_mr(mr);
  }
  
+ /* On success, sets:
+  *    ep->rep_attr.cap.max_send_wr
+  *    ep->rep_attr.cap.max_recv_wr
+  *    cdata->max_requests
+  *    ia->ri_max_segs
+  */
  static int
  fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
            struct rpcrdma_create_data_internal *cdata)
  {
+       int max_qp_wr;
+       max_qp_wr = ia->ri_device->attrs.max_qp_wr;
+       max_qp_wr -= RPCRDMA_BACKWARD_WRS;
+       max_qp_wr -= 1;
+       if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
+               return -ENOMEM;
+       if (cdata->max_requests > max_qp_wr)
+               cdata->max_requests = max_qp_wr;
+       ep->rep_attr.cap.max_send_wr = cdata->max_requests;
+       ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
+       ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
+       ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
+       ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
+       ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
        ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
                                RPCRDMA_MAX_FMR_SGES);
        return 0;
@@@ -222,6 -241,7 +244,7 @@@ fmr_op_map(struct rpcrdma_xprt *r_xprt
                                     mr->mr_sg, i, mr->mr_dir);
        if (!mr->mr_nents)
                goto out_dmamap_err;
+       trace_xprtrdma_dma_map(mr);
  
        for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
                dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
index 2d2fefbb810b5d9150e9929ebe1dfd660b946743,d46dc7e6e30afe736000763cf489aaf90c3e9a6b..c040de196e13a0b2fb7d71478500a34f3fcd6169
   */
  
  #include <linux/sunrpc/rpc_rdma.h>
 +#include <linux/sunrpc/svc_rdma.h>
  
  #include "xprt_rdma.h"
 +#include <trace/events/rpcrdma.h>
  
  #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  # define RPCDBG_FACILITY      RPCDBG_TRANS
@@@ -204,12 -202,22 +204,22 @@@ out_release
        frwr_op_release_mr(mr);
  }
  
+ /* On success, sets:
+  *    ep->rep_attr.cap.max_send_wr
+  *    ep->rep_attr.cap.max_recv_wr
+  *    cdata->max_requests
+  *    ia->ri_max_segs
+  *
+  * And these FRWR-related fields:
+  *    ia->ri_max_frwr_depth
+  *    ia->ri_mrtype
+  */
  static int
  frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
             struct rpcrdma_create_data_internal *cdata)
  {
        struct ib_device_attr *attrs = &ia->ri_device->attrs;
-       int depth, delta;
+       int max_qp_wr, depth, delta;
  
        ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
        if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
                } while (delta > 0);
        }
  
-       ep->rep_attr.cap.max_send_wr *= depth;
-       if (ep->rep_attr.cap.max_send_wr > attrs->max_qp_wr) {
-               cdata->max_requests = attrs->max_qp_wr / depth;
+       max_qp_wr = ia->ri_device->attrs.max_qp_wr;
+       max_qp_wr -= RPCRDMA_BACKWARD_WRS;
+       max_qp_wr -= 1;
+       if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
+               return -ENOMEM;
+       if (cdata->max_requests > max_qp_wr)
+               cdata->max_requests = max_qp_wr;
+       ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth;
+       if (ep->rep_attr.cap.max_send_wr > max_qp_wr) {
+               cdata->max_requests = max_qp_wr / depth;
                if (!cdata->max_requests)
                        return -EINVAL;
                ep->rep_attr.cap.max_send_wr = cdata->max_requests *
                                               depth;
        }
+       ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
+       ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
+       ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
+       ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
+       ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
  
        ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
                                ia->ri_max_frwr_depth);
@@@ -395,6 -415,7 +417,7 @@@ frwr_op_map(struct rpcrdma_xprt *r_xprt
        mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
        if (!mr->mr_nents)
                goto out_dmamap_err;
+       trace_xprtrdma_dma_map(mr);
  
        ibmr = frwr->fr_mr;
        n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
index d95ac0736b7fc218febc1b759751f755b5e596d8,f338065121f26dfca097671e95e4f5d035f26c72..620327c01302ce8702062bd48f7545f9882cea24
@@@ -1,3 -1,4 +1,4 @@@
+ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
  /*
   * Copyright (c) 2015, 2017 Oracle.  All rights reserved.
   */
  
  #include <asm/swab.h>
  
 -#define CREATE_TRACE_POINTS
  #include "xprt_rdma.h"
  
 +#define CREATE_TRACE_POINTS
 +#include <trace/events/rpcrdma.h>
 +
  MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
  MODULE_DESCRIPTION("RPC/RDMA Transport");
  MODULE_LICENSE("Dual BSD/GPL");
index b942d7e0aef51be71c67b8ab35276aefb607f99e,1c78516aa6f2abaab96d77c14ba865412482aaa3..c8ae983c6cc017ae342ac71604625e25bb7fe777
@@@ -1,3 -1,4 +1,4 @@@
+ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
  /*
   * Copyright (c) 2014-2017 Oracle.  All rights reserved.
   * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
   * to the Linux RPC framework lives.
   */
  
 -#include "xprt_rdma.h"
 -
  #include <linux/highmem.h>
  
 +#include <linux/sunrpc/svc_rdma.h>
 +
 +#include "xprt_rdma.h"
 +#include <trace/events/rpcrdma.h>
 +
  #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  # define RPCDBG_FACILITY      RPCDBG_TRANS
  #endif
  
- static const char transfertypes[][12] = {
-       "inline",       /* no chunks */
-       "read list",    /* some argument via rdma read */
-       "*read list",   /* entire request via rdma read */
-       "write list",   /* some result via rdma write */
-       "reply chunk"   /* entire reply via rdma write */
- };
  /* Returns size of largest RPC-over-RDMA header in a Call message
   *
   * The largest Call header contains a full-size Read list and a
@@@ -233,7 -223,7 +226,7 @@@ rpcrdma_convert_iovs(struct rpcrdma_xpr
                         */
                        *ppages = alloc_page(GFP_ATOMIC);
                        if (!*ppages)
-                               return -EAGAIN;
+                               return -ENOBUFS;
                }
                seg->mr_page = *ppages;
                seg->mr_offset = (char *)page_base;
@@@ -368,7 -358,7 +361,7 @@@ rpcrdma_encode_read_list(struct rpcrdma
                seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
                                                   false, &mr);
                if (IS_ERR(seg))
-                       goto out_maperr;
+                       return PTR_ERR(seg);
                rpcrdma_mr_push(mr, &req->rl_registered);
  
                if (encode_read_segment(xdr, mr, pos) < 0)
        } while (nsegs);
  
        return 0;
- out_maperr:
-       if (PTR_ERR(seg) == -EAGAIN)
-               xprt_wait_for_buffer_space(rqst->rq_task, NULL);
-       return PTR_ERR(seg);
  }
  
  /* Register and XDR encode the Write list. Supports encoding a list
@@@ -431,7 -416,7 +419,7 @@@ rpcrdma_encode_write_list(struct rpcrdm
                seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
                                                   true, &mr);
                if (IS_ERR(seg))
-                       goto out_maperr;
+                       return PTR_ERR(seg);
                rpcrdma_mr_push(mr, &req->rl_registered);
  
                if (encode_rdma_segment(xdr, mr) < 0)
        *segcount = cpu_to_be32(nchunks);
  
        return 0;
- out_maperr:
-       if (PTR_ERR(seg) == -EAGAIN)
-               xprt_wait_for_buffer_space(rqst->rq_task, NULL);
-       return PTR_ERR(seg);
  }
  
  /* Register and XDR encode the Reply chunk. Supports encoding an array
@@@ -494,7 -474,7 +477,7 @@@ rpcrdma_encode_reply_chunk(struct rpcrd
                seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
                                                   true, &mr);
                if (IS_ERR(seg))
-                       goto out_maperr;
+                       return PTR_ERR(seg);
                rpcrdma_mr_push(mr, &req->rl_registered);
  
                if (encode_rdma_segment(xdr, mr) < 0)
        *segcount = cpu_to_be32(nchunks);
  
        return 0;
- out_maperr:
-       if (PTR_ERR(seg) == -EAGAIN)
-               xprt_wait_for_buffer_space(rqst->rq_task, NULL);
-       return PTR_ERR(seg);
  }
  
  /**
@@@ -712,7 -687,7 +690,7 @@@ rpcrdma_prepare_send_sges(struct rpcrdm
  {
        req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf);
        if (!req->rl_sendctx)
-               return -ENOBUFS;
+               return -EAGAIN;
        req->rl_sendctx->sc_wr.num_sge = 0;
        req->rl_sendctx->sc_unmap_count = 0;
        req->rl_sendctx->sc_req = req;
@@@ -886,7 -861,15 +864,15 @@@ rpcrdma_marshal_req(struct rpcrdma_xpr
        return 0;
  
  out_err:
-       r_xprt->rx_stats.failed_marshal_count++;
+       switch (ret) {
+       case -EAGAIN:
+               xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+               break;
+       case -ENOBUFS:
+               break;
+       default:
+               r_xprt->rx_stats.failed_marshal_count++;
+       }
        return ret;
  }
  
@@@ -1029,8 -1012,6 +1015,6 @@@ rpcrdma_is_bcall(struct rpcrdma_xprt *r
  
  out_short:
        pr_warn("RPC/RDMA short backward direction call\n");
-       if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
-               xprt_disconnect_done(&r_xprt->rx_xprt);
        return true;
  }
  #else /* CONFIG_SUNRPC_BACKCHANNEL */
@@@ -1336,13 -1317,14 +1320,14 @@@ void rpcrdma_reply_handler(struct rpcrd
        u32 credits;
        __be32 *p;
  
+       --buf->rb_posted_receives;
        if (rep->rr_hdrbuf.head[0].iov_len == 0)
                goto out_badstatus;
  
+       /* Fixed transport header fields */
        xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
                        rep->rr_hdrbuf.head[0].iov_base);
-       /* Fixed transport header fields */
        p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
        if (unlikely(!p))
                goto out_shortreply;
  
        trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
  
+       rpcrdma_post_recvs(r_xprt, false);
        queue_work(rpcrdma_receive_wq, &rep->rr_work);
        return;
  
- out_badstatus:
-       rpcrdma_recv_buffer_put(rep);
-       if (r_xprt->rx_ep.rep_connected == 1) {
-               r_xprt->rx_ep.rep_connected = -EIO;
-               rpcrdma_conn_func(&r_xprt->rx_ep);
-       }
-       return;
  out_badversion:
        trace_xprtrdma_reply_vers(rep);
        goto repost;
@@@ -1411,7 -1386,7 +1389,7 @@@ out_shortreply
   * receive buffer before returning.
   */
  repost:
-       r_xprt->rx_stats.bad_reply_count++;
-       if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
-               rpcrdma_recv_buffer_put(rep);
+       rpcrdma_post_recvs(r_xprt, false);
+ out_badstatus:
+       rpcrdma_recv_buffer_put(rep);
  }
index 343e7add672cdfd68b4aa36bb3cae8393f08b263,1035516d54e28079e233a4fb5f3ae5bdd4ffe76f..a68180090554f2f40ebb9ed5cd72a70cd9639541
@@@ -1,16 -1,13 +1,16 @@@
  // SPDX-License-Identifier: GPL-2.0
  /*
 - * Copyright (c) 2015 Oracle.  All rights reserved.
 + * Copyright (c) 2015-2018 Oracle.  All rights reserved.
   *
   * Support for backward direction RPCs on RPC/RDMA (server-side).
   */
  
  #include <linux/module.h>
 +
  #include <linux/sunrpc/svc_rdma.h>
 +
  #include "xprt_rdma.h"
 +#include <trace/events/rpcrdma.h>
  
  #define RPCDBG_FACILITY       RPCDBG_SVCXPRT
  
@@@ -115,21 -112,39 +115,21 @@@ out_notfound
   * the adapter has a small maximum SQ depth.
   */
  static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
 -                            struct rpc_rqst *rqst)
 +                            struct rpc_rqst *rqst,
 +                            struct svc_rdma_send_ctxt *ctxt)
  {
 -      struct svc_rdma_op_ctxt *ctxt;
        int ret;
  
 -      ctxt = svc_rdma_get_context(rdma);
 -
 -      /* rpcrdma_bc_send_request builds the transport header and
 -       * the backchannel RPC message in the same buffer. Thus only
 -       * one SGE is needed to send both.
 -       */
 -      ret = svc_rdma_map_reply_hdr(rdma, ctxt, rqst->rq_buffer,
 -                                   rqst->rq_snd_buf.len);
 +      ret = svc_rdma_map_reply_msg(rdma, ctxt, &rqst->rq_snd_buf, NULL);
        if (ret < 0)
 -              goto out_err;
 +              return -EIO;
  
        /* Bump page refcnt so Send completion doesn't release
         * the rq_buffer before all retransmits are complete.
         */
        get_page(virt_to_page(rqst->rq_buffer));
 -      ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0);
 -      if (ret)
 -              goto out_unmap;
 -
 -out_err:
 -      dprintk("svcrdma: %s returns %d\n", __func__, ret);
 -      return ret;
 -
 -out_unmap:
 -      svc_rdma_unmap_dma(ctxt);
 -      svc_rdma_put_context(ctxt, 1);
 -      ret = -EIO;
 -      goto out_err;
 +      ctxt->sc_send_wr.opcode = IB_WR_SEND;
 +      return svc_rdma_send(rdma, &ctxt->sc_send_wr);
  }
  
  /* Server-side transport endpoint wants a whole page for its send
@@@ -176,15 -191,13 +176,15 @@@ rpcrdma_bc_send_request(struct svcxprt_
  {
        struct rpc_xprt *xprt = rqst->rq_xprt;
        struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 +      struct svc_rdma_send_ctxt *ctxt;
        __be32 *p;
        int rc;
  
 -      /* Space in the send buffer for an RPC/RDMA header is reserved
 -       * via xprt->tsh_size.
 -       */
 -      p = rqst->rq_buffer;
 +      ctxt = svc_rdma_send_ctxt_get(rdma);
 +      if (!ctxt)
 +              goto drop_connection;
 +
 +      p = ctxt->sc_xprt_buf;
        *p++ = rqst->rq_xid;
        *p++ = rpcrdma_version;
        *p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
        *p++ = xdr_zero;
        *p++ = xdr_zero;
        *p   = xdr_zero;
 +      svc_rdma_sync_reply_hdr(rdma, ctxt, RPCRDMA_HDRLEN_MIN);
  
  #ifdef SVCRDMA_BACKCHANNEL_DEBUG
        pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
  #endif
  
 -      rc = svc_rdma_bc_sendto(rdma, rqst);
 -      if (rc)
 +      rc = svc_rdma_bc_sendto(rdma, rqst, ctxt);
 +      if (rc) {
 +              svc_rdma_send_ctxt_put(rdma, ctxt);
                goto drop_connection;
 +      }
        return rc;
  
  drop_connection:
@@@ -263,6 -273,7 +263,7 @@@ static const struct rpc_xprt_ops xprt_r
        .reserve_xprt           = xprt_reserve_xprt_cong,
        .release_xprt           = xprt_release_xprt_cong,
        .alloc_slot             = xprt_alloc_slot,
+       .free_slot              = xprt_free_slot,
        .release_request        = xprt_release_rqst_cong,
        .buf_alloc              = xprt_rdma_bc_allocate,
        .buf_free               = xprt_rdma_bc_free,
@@@ -310,7 -321,7 +311,7 @@@ xprt_setup_rdma_bc(struct xprt_create *
        xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
  
        xprt->prot = XPRT_TRANSPORT_BC_RDMA;
 -      xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32);
 +      xprt->tsh_size = 0;
        xprt->ops = &xprt_rdma_bc_procs;
  
        memcpy(&xprt->addr, args->dstaddr, args->addrlen);
index caca977e375538d7780e04461369c5da42a499d6,0c775f05123c5a45abd82861fcb2add73fd54dde..143ce2579ba90cca47a87b5413ba35caf9d10792
@@@ -1,3 -1,4 +1,4 @@@
+ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
  /*
   * Copyright (c) 2014-2017 Oracle.  All rights reserved.
   * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
  #include <linux/module.h>
  #include <linux/slab.h>
  #include <linux/seq_file.h>
 +#include <linux/smp.h>
 +
  #include <linux/sunrpc/addr.h>
 +#include <linux/sunrpc/svc_rdma.h>
  
  #include "xprt_rdma.h"
 +#include <trace/events/rpcrdma.h>
  
  #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  # define RPCDBG_FACILITY      RPCDBG_TRANS
@@@ -334,9 -331,7 +335,7 @@@ xprt_setup_rdma(struct xprt_create *arg
                return ERR_PTR(-EBADF);
        }
  
-       xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
-                       xprt_rdma_slot_table_entries,
-                       xprt_rdma_slot_table_entries);
+       xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0);
        if (xprt == NULL) {
                dprintk("RPC:       %s: couldn't allocate rpcrdma_xprt\n",
                        __func__);
                xprt_set_bound(xprt);
        xprt_rdma_format_addresses(xprt, sap);
  
-       cdata.max_requests = xprt->max_reqs;
+       cdata.max_requests = xprt_rdma_slot_table_entries;
  
        cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
        cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
@@@ -541,6 -536,47 +540,47 @@@ xprt_rdma_connect(struct rpc_xprt *xprt
        }
  }
  
+ /**
+  * xprt_rdma_alloc_slot - allocate an rpc_rqst
+  * @xprt: controlling RPC transport
+  * @task: RPC task requesting a fresh rpc_rqst
+  *
+  * tk_status values:
+  *    %0 if task->tk_rqstp points to a fresh rpc_rqst
+  *    %-EAGAIN if no rpc_rqst is available; queued on backlog
+  */
+ static void
+ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
+ {
+       struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+       struct rpcrdma_req *req;
+       req = rpcrdma_buffer_get(&r_xprt->rx_buf);
+       if (!req)
+               goto out_sleep;
+       task->tk_rqstp = &req->rl_slot;
+       task->tk_status = 0;
+       return;
+ out_sleep:
+       rpc_sleep_on(&xprt->backlog, task, NULL);
+       task->tk_status = -EAGAIN;
+ }
+ /**
+  * xprt_rdma_free_slot - release an rpc_rqst
+  * @xprt: controlling RPC transport
+  * @rqst: rpc_rqst to release
+  *
+  */
+ static void
+ xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
+ {
+       memset(rqst, 0, sizeof(*rqst));
+       rpcrdma_buffer_put(rpcr_to_rdmar(rqst));
+       rpc_wake_up_next(&xprt->backlog);
+ }
  static bool
  rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
                    size_t size, gfp_t flags)
@@@ -611,13 -647,9 +651,9 @@@ xprt_rdma_allocate(struct rpc_task *tas
  {
        struct rpc_rqst *rqst = task->tk_rqstp;
        struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
-       struct rpcrdma_req *req;
+       struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
        gfp_t flags;
  
-       req = rpcrdma_buffer_get(&r_xprt->rx_buf);
-       if (req == NULL)
-               goto out_get;
        flags = RPCRDMA_DEF_GFP;
        if (RPC_IS_SWAPPER(task))
                flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
        if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
                goto out_fail;
  
-       rpcrdma_set_xprtdata(rqst, req);
        rqst->rq_buffer = req->rl_sendbuf->rg_base;
        rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
        trace_xprtrdma_allocate(task, req);
        return 0;
  
  out_fail:
-       rpcrdma_buffer_put(req);
- out_get:
        trace_xprtrdma_allocate(task, NULL);
        return -ENOMEM;
  }
@@@ -656,7 -685,6 +689,6 @@@ xprt_rdma_free(struct rpc_task *task
        if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
                rpcrdma_release_rqst(r_xprt, req);
        trace_xprtrdma_rpc_done(task, req);
-       rpcrdma_buffer_put(req);
  }
  
  /**
@@@ -694,9 -722,6 +726,6 @@@ xprt_rdma_send_request(struct rpc_task 
        if (rc < 0)
                goto failed_marshal;
  
-       if (req->rl_reply == NULL)              /* e.g. reconnection */
-               rpcrdma_recv_buffer_get(req);
        /* Must suppress retransmit to maintain credits */
        if (rqst->rq_connect_cookie == xprt->connect_cookie)
                goto drop_connection;
@@@ -783,7 -808,8 +812,8 @@@ xprt_rdma_disable_swap(struct rpc_xprt 
  static const struct rpc_xprt_ops xprt_rdma_procs = {
        .reserve_xprt           = xprt_reserve_xprt_cong,
        .release_xprt           = xprt_release_xprt_cong, /* sunrpc/xprt.c */
-       .alloc_slot             = xprt_alloc_slot,
+       .alloc_slot             = xprt_rdma_alloc_slot,
+       .free_slot              = xprt_rdma_free_slot,
        .release_request        = xprt_release_rqst_cong,       /* ditto */
        .set_retrans_timeout    = xprt_set_retrans_timeout_def, /* ditto */
        .timer                  = xprt_rdma_timer,
index 7f913ece5038078a338ae4cbb8e4063fe7e5a69a,042bb24114b34e8344862367e414634350743298..16161a36dc739896b6712f14ccdbf5cc6106ec8a
@@@ -1,3 -1,4 +1,4 @@@
+ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
  /*
   * Copyright (c) 2014-2017 Oracle.  All rights reserved.
   * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@@ -59,7 -60,6 +60,7 @@@
  #include <rdma/ib_cm.h>
  
  #include "xprt_rdma.h"
 +#include <trace/events/rpcrdma.h>
  
  /*
   * Globals/Macros
  /*
   * internal functions
   */
+ static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
  static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
  static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
+ static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp);
  static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
  
  struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
@@@ -160,7 -162,7 +163,7 @@@ rpcrdma_wc_receive(struct ib_cq *cq, st
                                               rr_cqe);
  
        /* WARNING: Only wr_id and status are reliable at this point */
-       trace_xprtrdma_wc_receive(rep, wc);
+       trace_xprtrdma_wc_receive(wc);
        if (wc->status != IB_WC_SUCCESS)
                goto out_fail;
  
@@@ -232,7 -234,7 +235,7 @@@ rpcrdma_conn_upcall(struct rdma_cm_id *
                complete(&ia->ri_done);
                break;
        case RDMA_CM_EVENT_ADDR_ERROR:
-               ia->ri_async_rc = -EHOSTUNREACH;
+               ia->ri_async_rc = -EPROTO;
                complete(&ia->ri_done);
                break;
        case RDMA_CM_EVENT_ROUTE_ERROR:
                connstate = -ENOTCONN;
                goto connected;
        case RDMA_CM_EVENT_UNREACHABLE:
-               connstate = -ENETDOWN;
+               connstate = -ENETUNREACH;
                goto connected;
        case RDMA_CM_EVENT_REJECTED:
                dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
@@@ -306,8 -308,8 +309,8 @@@ rpcrdma_create_id(struct rpcrdma_xprt *
        init_completion(&ia->ri_done);
        init_completion(&ia->ri_remove_done);
  
-       id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
-                           IB_QPT_RC);
+       id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_conn_upcall,
+                           xprt, RDMA_PS_TCP, IB_QPT_RC);
        if (IS_ERR(id)) {
                rc = PTR_ERR(id);
                dprintk("RPC:       %s: rdma_create_id() failed %i\n",
@@@ -501,8 -503,8 +504,8 @@@ rpcrdma_ep_create(struct rpcrdma_ep *ep
                  struct rpcrdma_create_data_internal *cdata)
  {
        struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
-       unsigned int max_qp_wr, max_sge;
        struct ib_cq *sendcq, *recvcq;
+       unsigned int max_sge;
        int rc;
  
        max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge,
        }
        ia->ri_max_send_sges = max_sge;
  
-       if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
-               dprintk("RPC:       %s: insufficient wqe's available\n",
-                       __func__);
-               return -ENOMEM;
-       }
-       max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS - 1;
-       /* check provider's send/recv wr limits */
-       if (cdata->max_requests > max_qp_wr)
-               cdata->max_requests = max_qp_wr;
+       rc = ia->ri_ops->ro_open(ia, ep, cdata);
+       if (rc)
+               return rc;
  
        ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
        ep->rep_attr.qp_context = ep;
        ep->rep_attr.srq = NULL;
-       ep->rep_attr.cap.max_send_wr = cdata->max_requests;
-       ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
-       ep->rep_attr.cap.max_send_wr += 1;      /* drain cqe */
-       rc = ia->ri_ops->ro_open(ia, ep, cdata);
-       if (rc)
-               return rc;
-       ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
-       ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
-       ep->rep_attr.cap.max_recv_wr += 1;      /* drain cqe */
        ep->rep_attr.cap.max_send_sge = max_sge;
        ep->rep_attr.cap.max_recv_sge = 1;
        ep->rep_attr.cap.max_inline_data = 0;
@@@ -742,7 -728,6 +729,6 @@@ rpcrdma_ep_connect(struct rpcrdma_ep *e
  {
        struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
                                                   rx_ia);
-       unsigned int extras;
        int rc;
  
  retry:
        }
  
        dprintk("RPC:       %s: connected\n", __func__);
-       extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
-       if (extras)
-               rpcrdma_ep_post_extra_recv(r_xprt, extras);
+       rpcrdma_post_recvs(r_xprt, true);
  
  out:
        if (rc)
@@@ -894,6 -878,7 +879,7 @@@ static int rpcrdma_sendctxs_create(stru
                sc->sc_xprt = r_xprt;
                buf->rb_sc_ctxs[i] = sc;
        }
+       buf->rb_flags = 0;
  
        return 0;
  
@@@ -951,7 -936,7 +937,7 @@@ out_emptyq
         * completions recently. This is a sign the Send Queue is
         * backing up. Cause the caller to pause and try again.
         */
-       dprintk("RPC:       %s: empty sendctx queue\n", __func__);
+       set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags);
        r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf);
        r_xprt->rx_stats.empty_sendctx_q++;
        return NULL;
   *
   * The caller serializes calls to this function (per rpcrdma_buffer).
   */
- void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
+ static void
+ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
  {
        struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf;
        unsigned long next_tail;
  
        /* Paired with READ_ONCE */
        smp_store_release(&buf->rb_sc_tail, next_tail);
+       if (test_and_clear_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags)) {
+               smp_mb__after_atomic();
+               xprt_write_space(&sc->sc_xprt->rx_xprt);
+       }
  }
  
  static void
@@@ -1098,14 -1089,8 +1090,8 @@@ rpcrdma_create_req(struct rpcrdma_xprt 
        return req;
  }
  
- /**
-  * rpcrdma_create_rep - Allocate an rpcrdma_rep object
-  * @r_xprt: controlling transport
-  *
-  * Returns 0 on success or a negative errno on failure.
-  */
- int
- rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
+ static int
+ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp)
  {
        struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
        struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
        rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
        rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
        rep->rr_recv_wr.num_sge = 1;
+       rep->rr_temp = temp;
  
        spin_lock(&buf->rb_lock);
        list_add(&rep->rr_list, &buf->rb_recv_bufs);
@@@ -1184,12 -1170,8 +1171,8 @@@ rpcrdma_buffer_create(struct rpcrdma_xp
                list_add(&req->rl_list, &buf->rb_send_bufs);
        }
  
+       buf->rb_posted_receives = 0;
        INIT_LIST_HEAD(&buf->rb_recv_bufs);
-       for (i = 0; i <= buf->rb_max_requests; i++) {
-               rc = rpcrdma_create_rep(r_xprt);
-               if (rc)
-                       goto out;
-       }
  
        rc = rpcrdma_sendctxs_create(r_xprt);
        if (rc)
@@@ -1201,28 -1183,6 +1184,6 @@@ out
        return rc;
  }
  
- static struct rpcrdma_req *
- rpcrdma_buffer_get_req_locked(struct rpcrdma_buffer *buf)
- {
-       struct rpcrdma_req *req;
-       req = list_first_entry(&buf->rb_send_bufs,
-                              struct rpcrdma_req, rl_list);
-       list_del_init(&req->rl_list);
-       return req;
- }
- static struct rpcrdma_rep *
- rpcrdma_buffer_get_rep_locked(struct rpcrdma_buffer *buf)
- {
-       struct rpcrdma_rep *rep;
-       rep = list_first_entry(&buf->rb_recv_bufs,
-                              struct rpcrdma_rep, rr_list);
-       list_del(&rep->rr_list);
-       return rep;
- }
  static void
  rpcrdma_destroy_rep(struct rpcrdma_rep *rep)
  {
@@@ -1281,10 -1241,11 +1242,11 @@@ rpcrdma_buffer_destroy(struct rpcrdma_b
        while (!list_empty(&buf->rb_recv_bufs)) {
                struct rpcrdma_rep *rep;
  
-               rep = rpcrdma_buffer_get_rep_locked(buf);
+               rep = list_first_entry(&buf->rb_recv_bufs,
+                                      struct rpcrdma_rep, rr_list);
+               list_del(&rep->rr_list);
                rpcrdma_destroy_rep(rep);
        }
-       buf->rb_send_count = 0;
  
        spin_lock(&buf->rb_reqslock);
        while (!list_empty(&buf->rb_allreqs)) {
                spin_lock(&buf->rb_reqslock);
        }
        spin_unlock(&buf->rb_reqslock);
-       buf->rb_recv_count = 0;
  
        rpcrdma_mrs_destroy(buf);
  }
@@@ -1372,27 -1332,11 +1333,11 @@@ rpcrdma_mr_unmap_and_put(struct rpcrdma
        __rpcrdma_mr_put(&r_xprt->rx_buf, mr);
  }
  
- static struct rpcrdma_rep *
- rpcrdma_buffer_get_rep(struct rpcrdma_buffer *buffers)
- {
-       /* If an RPC previously completed without a reply (say, a
-        * credential problem or a soft timeout occurs) then hold off
-        * on supplying more Receive buffers until the number of new
-        * pending RPCs catches up to the number of posted Receives.
-        */
-       if (unlikely(buffers->rb_send_count < buffers->rb_recv_count))
-               return NULL;
-       if (unlikely(list_empty(&buffers->rb_recv_bufs)))
-               return NULL;
-       buffers->rb_recv_count++;
-       return rpcrdma_buffer_get_rep_locked(buffers);
- }
- /*
-  * Get a set of request/reply buffers.
+ /**
+  * rpcrdma_buffer_get - Get a request buffer
+  * @buffers: Buffer pool from which to obtain a buffer
   *
-  * Reply buffer (if available) is attached to send buffer upon return.
+  * Returns a fresh rpcrdma_req, or NULL if none are available.
   */
  struct rpcrdma_req *
  rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
        struct rpcrdma_req *req;
  
        spin_lock(&buffers->rb_lock);
-       if (list_empty(&buffers->rb_send_bufs))
-               goto out_reqbuf;
-       buffers->rb_send_count++;
-       req = rpcrdma_buffer_get_req_locked(buffers);
-       req->rl_reply = rpcrdma_buffer_get_rep(buffers);
+       req = list_first_entry_or_null(&buffers->rb_send_bufs,
+                                      struct rpcrdma_req, rl_list);
+       if (req)
+               list_del_init(&req->rl_list);
        spin_unlock(&buffers->rb_lock);
        return req;
- out_reqbuf:
-       spin_unlock(&buffers->rb_lock);
-       return NULL;
  }
  
- /*
-  * Put request/reply buffers back into pool.
-  * Pre-decrement counter/array index.
+ /**
+  * rpcrdma_buffer_put - Put request/reply buffers back into pool
+  * @req: object to return
+  *
   */
  void
  rpcrdma_buffer_put(struct rpcrdma_req *req)
        req->rl_reply = NULL;
  
        spin_lock(&buffers->rb_lock);
-       buffers->rb_send_count--;
-       list_add_tail(&req->rl_list, &buffers->rb_send_bufs);
+       list_add(&req->rl_list, &buffers->rb_send_bufs);
        if (rep) {
-               buffers->rb_recv_count--;
-               list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
+               if (!rep->rr_temp) {
+                       list_add(&rep->rr_list, &buffers->rb_recv_bufs);
+                       rep = NULL;
+               }
        }
        spin_unlock(&buffers->rb_lock);
- }
- /*
-  * Recover reply buffers from pool.
-  * This happens when recovering from disconnect.
-  */
- void
- rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
- {
-       struct rpcrdma_buffer *buffers = req->rl_buffer;
-       spin_lock(&buffers->rb_lock);
-       req->rl_reply = rpcrdma_buffer_get_rep(buffers);
-       spin_unlock(&buffers->rb_lock);
+       if (rep)
+               rpcrdma_destroy_rep(rep);
  }
  
  /*
@@@ -1459,10 -1387,13 +1388,13 @@@ rpcrdma_recv_buffer_put(struct rpcrdma_
  {
        struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
  
-       spin_lock(&buffers->rb_lock);
-       buffers->rb_recv_count--;
-       list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
-       spin_unlock(&buffers->rb_lock);
+       if (!rep->rr_temp) {
+               spin_lock(&buffers->rb_lock);
+               list_add(&rep->rr_list, &buffers->rb_recv_bufs);
+               spin_unlock(&buffers->rb_lock);
+       } else {
+               rpcrdma_destroy_rep(rep);
+       }
  }
  
  /**
@@@ -1558,13 -1489,6 +1490,6 @@@ rpcrdma_ep_post(struct rpcrdma_ia *ia
        struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
        int rc;
  
-       if (req->rl_reply) {
-               rc = rpcrdma_ep_post_recv(ia, req->rl_reply);
-               if (rc)
-                       return rc;
-               req->rl_reply = NULL;
-       }
        if (!ep->rep_send_count ||
            test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
                send_wr->send_flags |= IB_SEND_SIGNALED;
        return 0;
  }
  
- int
- rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
-                    struct rpcrdma_rep *rep)
- {
-       struct ib_recv_wr *recv_wr_fail;
-       int rc;
-       if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf))
-               goto out_map;
-       rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail);
-       trace_xprtrdma_post_recv(rep, rc);
-       if (rc)
-               return -ENOTCONN;
-       return 0;
- out_map:
-       pr_err("rpcrdma: failed to DMA map the Receive buffer\n");
-       return -EIO;
- }
  /**
-  * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests
-  * @r_xprt: transport associated with these backchannel resources
-  * @count: minimum number of incoming requests expected
+  * rpcrdma_post_recvs - Maybe post some Receive buffers
+  * @r_xprt: controlling transport
+  * @temp: when true, allocate temp rpcrdma_rep objects
   *
-  * Returns zero if all requested buffers were posted, or a negative errno.
   */
- int
- rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
+ void
+ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
  {
-       struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
-       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-       struct rpcrdma_rep *rep;
-       int rc;
+       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+       struct ib_recv_wr *wr, *bad_wr;
+       int needed, count, rc;
  
-       while (count--) {
-               spin_lock(&buffers->rb_lock);
-               if (list_empty(&buffers->rb_recv_bufs))
-                       goto out_reqbuf;
-               rep = rpcrdma_buffer_get_rep_locked(buffers);
-               spin_unlock(&buffers->rb_lock);
+       needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
+       if (buf->rb_posted_receives > needed)
+               return;
+       needed -= buf->rb_posted_receives;
  
-               rc = rpcrdma_ep_post_recv(ia, rep);
-               if (rc)
-                       goto out_rc;
-       }
+       count = 0;
+       wr = NULL;
+       while (needed) {
+               struct rpcrdma_regbuf *rb;
+               struct rpcrdma_rep *rep;
  
-       return 0;
+               spin_lock(&buf->rb_lock);
+               rep = list_first_entry_or_null(&buf->rb_recv_bufs,
+                                              struct rpcrdma_rep, rr_list);
+               if (likely(rep))
+                       list_del(&rep->rr_list);
+               spin_unlock(&buf->rb_lock);
+               if (!rep) {
+                       if (rpcrdma_create_rep(r_xprt, temp))
+                               break;
+                       continue;
+               }
  
- out_reqbuf:
-       spin_unlock(&buffers->rb_lock);
-       trace_xprtrdma_noreps(r_xprt);
-       return -ENOMEM;
+               rb = rep->rr_rdmabuf;
+               if (!rpcrdma_regbuf_is_mapped(rb)) {
+                       if (!__rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, rb)) {
+                               rpcrdma_recv_buffer_put(rep);
+                               break;
+                       }
+               }
  
- out_rc:
-       rpcrdma_recv_buffer_put(rep);
-       return rc;
+               trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe);
+               rep->rr_recv_wr.next = wr;
+               wr = &rep->rr_recv_wr;
+               ++count;
+               --needed;
+       }
+       if (!count)
+               return;
+       rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, &bad_wr);
+       if (rc) {
+               for (wr = bad_wr; wr; wr = wr->next) {
+                       struct rpcrdma_rep *rep;
+                       rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr);
+                       rpcrdma_recv_buffer_put(rep);
+                       --count;
+               }
+       }
+       buf->rb_posted_receives += count;
+       trace_xprtrdma_post_recvs(r_xprt, count, rc);
  }
index f845b71793e2af036a1ff3cc1107ee4bf1ffa277,38973a97579eb5bfb8fe8e9ef6c8d4463b228da2..2ca14f7c2d51adb89d8df5f313fc2d30c1e9c16c
@@@ -1,3 -1,4 +1,4 @@@
+ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
  /*
   * Copyright (c) 2014-2017 Oracle.  All rights reserved.
   * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@@ -196,6 -197,7 +197,7 @@@ struct rpcrdma_rep 
        __be32                  rr_proc;
        int                     rr_wc_flags;
        u32                     rr_inv_rkey;
+       bool                    rr_temp;
        struct rpcrdma_regbuf   *rr_rdmabuf;
        struct rpcrdma_xprt     *rr_rxprt;
        struct work_struct      rr_work;
@@@ -334,6 -336,7 +336,7 @@@ enum 
  struct rpcrdma_buffer;
  struct rpcrdma_req {
        struct list_head        rl_list;
+       struct rpc_rqst         rl_slot;
        struct rpcrdma_buffer   *rl_buffer;
        struct rpcrdma_rep      *rl_reply;
        struct xdr_stream       rl_stream;
@@@ -356,16 -359,10 +359,10 @@@ enum 
        RPCRDMA_REQ_F_TX_RESOURCES,
  };
  
- static inline void
- rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
- {
-       rqst->rq_xprtdata = req;
- }
  static inline struct rpcrdma_req *
  rpcr_to_rdmar(const struct rpc_rqst *rqst)
  {
-       return rqst->rq_xprtdata;
+       return container_of(rqst, struct rpcrdma_req, rl_slot);
  }
  
  static inline void
@@@ -401,11 -398,12 +398,12 @@@ struct rpcrdma_buffer 
        struct rpcrdma_sendctx  **rb_sc_ctxs;
  
        spinlock_t              rb_lock;        /* protect buf lists */
-       int                     rb_send_count, rb_recv_count;
        struct list_head        rb_send_bufs;
        struct list_head        rb_recv_bufs;
+       unsigned long           rb_flags;
        u32                     rb_max_requests;
        u32                     rb_credits;     /* most recent credit grant */
+       int                     rb_posted_receives;
  
        u32                     rb_bc_srv_max_requests;
        spinlock_t              rb_reqslock;    /* protect rb_allreqs */
  };
  #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
  
+ /* rb_flags */
+ enum {
+       RPCRDMA_BUF_F_EMPTY_SCQ = 0,
+ };
  /*
   * Internal structure for transport instance creation. This
   * exists primarily for modularity.
@@@ -561,18 -564,16 +564,16 @@@ void rpcrdma_ep_disconnect(struct rpcrd
  
  int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
                                struct rpcrdma_req *);
int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *);
void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
  
  /*
   * Buffer calls - xprtrdma/verbs.c
   */
  struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
  void rpcrdma_destroy_req(struct rpcrdma_req *);
- int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt);
  int rpcrdma_buffer_create(struct rpcrdma_xprt *);
  void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
  struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
- void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
  
  struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
  void rpcrdma_mr_put(struct rpcrdma_mr *mr);
@@@ -581,7 -582,6 +582,6 @@@ void rpcrdma_mr_defer_recovery(struct r
  
  struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
  void rpcrdma_buffer_put(struct rpcrdma_req *);
- void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
  void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
  
  struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
@@@ -603,8 -603,6 +603,6 @@@ rpcrdma_dma_map_regbuf(struct rpcrdma_i
        return __rpcrdma_dma_map_regbuf(ia, rb);
  }
  
- int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int);
  int rpcrdma_alloc_wq(void);
  void rpcrdma_destroy_wq(void);
  
@@@ -675,3 -673,5 +673,3 @@@ void xprt_rdma_bc_destroy(struct rpc_xp
  extern struct xprt_class xprt_rdma_bc;
  
  #endif                                /* _LINUX_SUNRPC_XPRT_RDMA_H */
 -
 -#include <trace/events/rpcrdma.h>
This page took 0.18679 seconds and 4 git commands to generate.