return false;
}
- static inline void nfs4_read_plus_scratch_free(struct nfs_pgio_header *hdr)
- {
- if (hdr->res.scratch) {
- kfree(hdr->res.scratch);
- hdr->res.scratch = NULL;
- }
- }
-
static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
- nfs4_read_plus_scratch_free(hdr);
-
if (!nfs4_sequence_done(task, &hdr->res.seq_res))
return -EAGAIN;
if (nfs4_read_stateid_changed(task, &hdr->args))
/* Note: We don't use READ_PLUS with pNFS yet */
if (nfs_server_capable(hdr->inode, NFS_CAP_READ_PLUS) && !hdr->ds_clp) {
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS];
- hdr->res.scratch = kmalloc(32, GFP_KERNEL);
- return hdr->res.scratch != NULL;
+ return nfs_read_alloc_scratch(hdr, READ_PLUS_SCRATCH_SIZE);
}
return false;
}
return generic_setlease(file, F_UNLCK, NULL, priv);
}
-static int nfs4_add_lease(struct file *file, long arg, struct file_lock **lease,
+static int nfs4_add_lease(struct file *file, int arg, struct file_lock **lease,
void **priv)
{
struct inode *inode = file_inode(file);
return -EAGAIN;
}
-int nfs4_proc_setlease(struct file *file, long arg, struct file_lock **lease,
+int nfs4_proc_setlease(struct file *file, int arg, struct file_lock **lease,
void **priv)
{
switch (arg) {
#ifdef CONFIG_NFS_V4_1_MIGRATION
calldata->args.flags |= EXCHGID4_FLAG_SUPP_MOVED_MIGR;
#endif
+ if (test_bit(NFS_CS_DS, &clp->cl_flags))
+ calldata->args.flags |= EXCHGID4_FLAG_USE_PNFS_DS;
msg.rpc_argp = &calldata->args;
msg.rpc_resp = &calldata->res;
task_setup_data.callback_data = calldata;
/* Save the EXCHANGE_ID verifier session trunk tests */
memcpy(clp->cl_confirm.data, argp->verifier.data,
sizeof(clp->cl_confirm.data));
+ if (resp->flags & EXCHGID4_FLAG_USE_PNFS_DS)
+ set_bit(NFS_CS_DS, &clp->cl_flags);
out:
trace_nfs4_exchange_id(clp, status);
rpc_put_task(task);
size_t xdr_buf_pagecount(const struct xdr_buf *buf);
int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp);
void xdr_free_bvec(struct xdr_buf *buf);
+unsigned int xdr_buf_to_bvec(struct bio_vec *bvec, unsigned int bvec_size,
+ const struct xdr_buf *xdr);
static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len)
{
struct kvec *iov; /* pointer to the current kvec */
struct kvec scratch; /* Scratch buffer */
struct page **page_ptr; /* pointer to the current page */
+ void *page_kaddr; /* kmapped address of the current page */
unsigned int nwords; /* Remaining decode buffer length */
struct rpc_rqst *rqst; /* For debugging */
__be32 *p, struct rpc_rqst *rqst);
extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
struct page **pages, unsigned int len);
+ extern void xdr_finish_decode(struct xdr_stream *xdr);
extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0))
return -EBADMSG;
- if (len > SIZE_MAX / sizeof(*p))
- return -EBADMSG;
- p = xdr_inline_decode(xdr, len * sizeof(*p));
+ p = xdr_inline_decode(xdr, size_mul(len, sizeof(*p)));
if (unlikely(!p))
return -EBADMSG;
if (array == NULL)
INIT_LIST_HEAD(&pool->sp_all_threads);
spin_lock_init(&pool->sp_lock);
+ percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL);
percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL);
percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL);
- percpu_counter_init(&pool->sp_threads_timedout, 0, GFP_KERNEL);
}
return serv;
for (i = 0; i < serv->sv_nrpools; i++) {
struct svc_pool *pool = &serv->sv_pools[i];
+ percpu_counter_destroy(&pool->sp_messages_arrived);
percpu_counter_destroy(&pool->sp_sockets_queued);
percpu_counter_destroy(&pool->sp_threads_woken);
- percpu_counter_destroy(&pool->sp_threads_timedout);
}
kfree(serv->sv_pools);
kfree(serv);
return rqstp;
}
-/*
- * Choose a pool in which to create a new thread, for svc_set_num_threads
+/**
+ * svc_pool_wake_idle_thread - Awaken an idle thread in @pool
+ * @pool: service thread pool
+ *
+ * Can be called from soft IRQ or process context. Finding an idle
+ * service thread and marking it BUSY is atomic with respect to
+ * other calls to svc_pool_wake_idle_thread().
+ *
*/
-static inline struct svc_pool *
-choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+void svc_pool_wake_idle_thread(struct svc_pool *pool)
{
- if (pool != NULL)
- return pool;
+ struct svc_rqst *rqstp;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
+ if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
+ continue;
+
+ WRITE_ONCE(rqstp->rq_qtime, ktime_get());
+ wake_up_process(rqstp->rq_task);
+ rcu_read_unlock();
+ percpu_counter_inc(&pool->sp_threads_woken);
+ trace_svc_wake_up(rqstp->rq_task->pid);
+ return;
+ }
+ rcu_read_unlock();
- return &serv->sv_pools[(*state)++ % serv->sv_nrpools];
+ set_bit(SP_CONGESTED, &pool->sp_flags);
}
-/*
- * Choose a thread to kill, for svc_set_num_threads
- */
-static inline struct task_struct *
-choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+static struct svc_pool *
+svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+{
+ return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools];
+}
+
+static struct task_struct *
+svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
{
unsigned int i;
struct task_struct *task = NULL;
if (pool != NULL) {
spin_lock_bh(&pool->sp_lock);
} else {
- /* choose a pool in round-robin fashion */
for (i = 0; i < serv->sv_nrpools; i++) {
pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
spin_lock_bh(&pool->sp_lock);
if (!list_empty(&pool->sp_all_threads)) {
struct svc_rqst *rqstp;
- /*
- * Remove from the pool->sp_all_threads list
- * so we don't try to kill it again.
- */
rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
set_bit(RQ_VICTIM, &rqstp->rq_flags);
list_del_rcu(&rqstp->rq_all);
task = rqstp->rq_task;
}
spin_unlock_bh(&pool->sp_lock);
-
return task;
}
-/* create new threads */
static int
svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
do {
nrservs--;
- chosen_pool = choose_pool(serv, pool, &state);
-
+ chosen_pool = svc_pool_next(serv, pool, &state);
node = svc_pool_map_get_node(chosen_pool->sp_id);
+
rqstp = svc_prepare_thread(serv, chosen_pool, node);
if (IS_ERR(rqstp))
return PTR_ERR(rqstp);
-
task = kthread_create_on_node(serv->sv_threadfn, rqstp,
node, "%s", serv->sv_name);
if (IS_ERR(task)) {
return 0;
}
-/*
- * Create or destroy enough new threads to make the number
- * of threads the given number. If `pool' is non-NULL, applies
- * only to threads in that pool, otherwise round-robins between
- * all pools. Caller must ensure that mutual exclusion between this and
- * server startup or shutdown.
- */
-
-/* destroy old threads */
static int
svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
struct task_struct *task;
unsigned int state = serv->sv_nrthreads-1;
- /* destroy old threads */
do {
- task = choose_victim(serv, pool, &state);
+ task = svc_pool_victim(serv, pool, &state);
if (task == NULL)
break;
rqstp = kthread_data(task);
return 0;
}
+/**
+ * svc_set_num_threads - adjust number of threads per RPC service
+ * @serv: RPC service to adjust
+ * @pool: Specific pool from which to choose threads, or NULL
+ * @nrservs: New number of threads for @serv (0 or less means kill all threads)
+ *
+ * Create or destroy threads to make the number of threads for @serv the
+ * given number. If @pool is non-NULL, change only threads in that pool;
+ * otherwise, round-robin between all pools for @serv. @serv's
+ * sv_nrthreads is adjusted for each thread created or destroyed.
+ *
+ * Caller must ensure mutual exclusion between this and server startup or
+ * shutdown.
+ *
+ * Returns zero on success or a negative errno if an error occurred while
+ * starting a thread.
+ */
int
svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
const struct svc_procedure *procp = NULL;
struct svc_serv *serv = rqstp->rq_server;
struct svc_process_info process;
- int auth_res, rc;
+ enum svc_auth_status auth_res;
unsigned int aoffset;
+ int rc;
__be32 *p;
/* Will be turned off by GSS integrity and privacy services */
goto dropit;
case SVC_COMPLETE:
goto sendit;
+ default:
+ pr_warn_once("Unexpected svc_auth_status (%d)\n", auth_res);
+ goto err_system_err;
}
if (progp == NULL)
rc = process.dispatch(rqstp);
if (procp->pc_release)
procp->pc_release(rqstp);
+ xdr_finish_decode(xdr);
+
if (!rc)
goto dropit;
if (rqstp->rq_auth_stat != rpc_auth_ok)
out_drop:
svc_drop(rqstp);
}
-EXPORT_SYMBOL_GPL(svc_process);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
buf->bvec = NULL;
}
+/**
+ * xdr_buf_to_bvec - Copy components of an xdr_buf into a bio_vec array
+ * @bvec: bio_vec array to populate
+ * @bvec_size: element count of @bio_vec
+ * @xdr: xdr_buf to be copied
+ *
+ * Returns the number of entries consumed in @bvec.
+ */
+unsigned int xdr_buf_to_bvec(struct bio_vec *bvec, unsigned int bvec_size,
+ const struct xdr_buf *xdr)
+{
+ const struct kvec *head = xdr->head;
+ const struct kvec *tail = xdr->tail;
+ unsigned int count = 0;
+
+ if (head->iov_len) {
+ bvec_set_virt(bvec++, head->iov_base, head->iov_len);
+ ++count;
+ }
+
+ if (xdr->page_len) {
+ unsigned int offset, len, remaining;
+ struct page **pages = xdr->pages;
+
+ offset = offset_in_page(xdr->page_base);
+ remaining = xdr->page_len;
+ while (remaining > 0) {
+ len = min_t(unsigned int, remaining,
+ PAGE_SIZE - offset);
+ bvec_set_page(bvec++, *pages++, len, offset);
+ remaining -= len;
+ offset = 0;
+ if (unlikely(++count > bvec_size))
+ goto bvec_overflow;
+ }
+ }
+
+ if (tail->iov_len) {
+ bvec_set_virt(bvec, tail->iov_base, tail->iov_len);
+ if (unlikely(++count > bvec_size))
+ goto bvec_overflow;
+ }
+
+ return count;
+
+bvec_overflow:
+ pr_warn_once("%s: bio_vec array overflow\n", __func__);
+ return count - 1;
+}
+
/**
* xdr_inline_pages - Prepare receive buffer for a large reply
* @xdr: xdr_buf into which reply will be placed
return xdr_set_iov(xdr, buf->tail, base, len);
}
+ static void xdr_stream_unmap_current_page(struct xdr_stream *xdr)
+ {
+ if (xdr->page_kaddr) {
+ kunmap_local(xdr->page_kaddr);
+ xdr->page_kaddr = NULL;
+ }
+ }
+
static unsigned int xdr_set_page_base(struct xdr_stream *xdr,
unsigned int base, unsigned int len)
{
if (len > maxlen)
len = maxlen;
+ xdr_stream_unmap_current_page(xdr);
xdr_stream_page_set_pos(xdr, base);
base += xdr->buf->page_base;
pgnr = base >> PAGE_SHIFT;
xdr->page_ptr = &xdr->buf->pages[pgnr];
- kaddr = page_address(*xdr->page_ptr);
+
+ if (PageHighMem(*xdr->page_ptr)) {
+ xdr->page_kaddr = kmap_local_page(*xdr->page_ptr);
+ kaddr = xdr->page_kaddr;
+ } else
+ kaddr = page_address(*xdr->page_ptr);
pgoff = base & ~PAGE_MASK;
xdr->p = (__be32*)(kaddr + pgoff);
struct rpc_rqst *rqst)
{
xdr->buf = buf;
+ xdr->page_kaddr = NULL;
xdr_reset_scratch_buffer(xdr);
xdr->nwords = XDR_QUADLEN(buf->len);
if (xdr_set_iov(xdr, buf->head, 0, buf->len) == 0 &&
}
EXPORT_SYMBOL_GPL(xdr_init_decode_pages);
+ /**
+ * xdr_finish_decode - Clean up the xdr_stream after decoding data.
+ * @xdr: pointer to xdr_stream struct
+ */
+ void xdr_finish_decode(struct xdr_stream *xdr)
+ {
+ xdr_stream_unmap_current_page(xdr);
+ }
+ EXPORT_SYMBOL(xdr_finish_decode);
+
static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
{
unsigned int nwords = XDR_QUADLEN(nbytes);
#include <net/checksum.h>
#include <net/udp.h>
#include <net/tcp.h>
-#include <net/tls.h>
+#include <net/tls_prot.h>
#include <net/handshake.h>
#include <linux/bvec.h>
xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
struct cmsghdr *cmsg, int ret)
{
- if (cmsg->cmsg_level == SOL_TLS &&
- cmsg->cmsg_type == TLS_GET_RECORD_TYPE) {
- u8 content_type = *((u8 *)CMSG_DATA(cmsg));
-
- switch (content_type) {
- case TLS_RECORD_TYPE_DATA:
- /* TLS sets EOR at the end of each application data
- * record, even though there might be more frames
- * waiting to be decrypted.
- */
- msg->msg_flags &= ~MSG_EOR;
- break;
- case TLS_RECORD_TYPE_ALERT:
- ret = -ENOTCONN;
- break;
- default:
- ret = -EAGAIN;
- }
+ u8 content_type = tls_get_record_type(sock->sk, cmsg);
+ u8 level, description;
+
+ switch (content_type) {
+ case 0:
+ break;
+ case TLS_RECORD_TYPE_DATA:
+ /* TLS sets EOR at the end of each application data
+ * record, even though there might be more frames
+ * waiting to be decrypted.
+ */
+ msg->msg_flags &= ~MSG_EOR;
+ break;
+ case TLS_RECORD_TYPE_ALERT:
+ tls_alert_recv(sock->sk, msg, &level, &description);
+ ret = (level == TLS_ALERT_LEVEL_FATAL) ?
+ -EACCES : -EAGAIN;
+ break;
+ default:
+ /* discard this record type */
+ ret = -EAGAIN;
}
return ret;
}
}
if (ret == -ESHUTDOWN)
kernel_sock_shutdown(transport->sock, SHUT_RDWR);
+ else if (ret == -EACCES)
+ xprt_wake_pending_tasks(&transport->xprt, -EACCES);
else
xs_poll_check_readable(transport);
out:
dprintk("RPC: xs_close xprt %p\n", xprt);
+ if (transport->sock)
+ tls_handshake_close(transport->sock);
xs_reset_transport(transport);
xprt->reestablish_timeout = 0;
}
struct socket *sock)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ struct net *net = sock_net(sock->sk);
+ unsigned long connect_timeout;
+ unsigned long syn_retries;
unsigned int keepidle;
unsigned int keepcnt;
unsigned int timeo;
+ unsigned long t;
spin_lock(&xprt->transport_lock);
keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ);
/* TCP user timeout (see RFC5482) */
tcp_sock_set_user_timeout(sock->sk, timeo);
+
+ /* Connect timeout */
+ connect_timeout = max_t(unsigned long,
+ DIV_ROUND_UP(xprt->connect_timeout, HZ), 1);
+ syn_retries = max_t(unsigned long,
+ READ_ONCE(net->ipv4.sysctl_tcp_syn_retries), 1);
+ for (t = 0; t <= syn_retries && (1UL << t) < connect_timeout; t++)
+ ;
+ if (t <= syn_retries)
+ tcp_sock_set_syncnt(sock->sk, t - 1);
+ }
+
+ static void xs_tcp_do_set_connect_timeout(struct rpc_xprt *xprt,
+ unsigned long connect_timeout)
+ {
+ struct sock_xprt *transport =
+ container_of(xprt, struct sock_xprt, xprt);
+ struct rpc_timeout to;
+ unsigned long initval;
+
+ memcpy(&to, xprt->timeout, sizeof(to));
+ /* Arbitrary lower limit */
+ initval = max_t(unsigned long, connect_timeout, XS_TCP_INIT_REEST_TO);
+ to.to_initval = initval;
+ to.to_maxval = initval;
+ to.to_retries = 0;
+ memcpy(&transport->tcp_timeout, &to, sizeof(transport->tcp_timeout));
+ xprt->timeout = &transport->tcp_timeout;
+ xprt->connect_timeout = connect_timeout;
}
static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
unsigned long reconnect_timeout)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
- struct rpc_timeout to;
- unsigned long initval;
spin_lock(&xprt->transport_lock);
if (reconnect_timeout < xprt->max_reconnect_timeout)
xprt->max_reconnect_timeout = reconnect_timeout;
- if (connect_timeout < xprt->connect_timeout) {
- memcpy(&to, xprt->timeout, sizeof(to));
- initval = DIV_ROUND_UP(connect_timeout, to.to_retries + 1);
- /* Arbitrary lower limit */
- if (initval < XS_TCP_INIT_REEST_TO << 1)
- initval = XS_TCP_INIT_REEST_TO << 1;
- to.to_initval = initval;
- to.to_maxval = initval;
- memcpy(&transport->tcp_timeout, &to,
- sizeof(transport->tcp_timeout));
- xprt->timeout = &transport->tcp_timeout;
- xprt->connect_timeout = connect_timeout;
- }
+ if (connect_timeout < xprt->connect_timeout)
+ xs_tcp_do_set_connect_timeout(xprt, connect_timeout);
set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
spin_unlock(&xprt->transport_lock);
}
xprt->timeout = &xs_tcp_default_timeout;
xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
+ if (args->reconnect_timeout)
+ xprt->max_reconnect_timeout = args->reconnect_timeout;
+
xprt->connect_timeout = xprt->timeout->to_initval *
(xprt->timeout->to_retries + 1);
+ if (args->connect_timeout)
+ xs_tcp_do_set_connect_timeout(xprt, args->connect_timeout);
INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
INIT_WORK(&transport->error_worker, xs_error_handle);