From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 22 Jun 2009 19:55:50 +0000 (-0700)
Subject: Merge branch 'for-2.6.31' of git://fieldses.org/git/linux-nfsd
X-Git-Tag: v2.6.31-rc1~57
X-Git-Url: https://repo.jachan.dev/J-linux.git/commitdiff_plain/7e0338c0de18c50f09aea1fbef45110cf7d64a3c?hp=-c

Merge branch 'for-2.6.31' of git://fieldses.org/git/linux-nfsd

* 'for-2.6.31' of git://fieldses.org/git/linux-nfsd: (60 commits)
  SUNRPC: Fix the TCP server's send buffer accounting
  nfsd41: Backchannel: minorversion support for the back channel
  nfsd41: Backchannel: cleanup nfs4.0 callback encode routines
  nfsd41: Remove ip address collision detection case
  nfsd: optimise the starting of zero threads when none are running.
  nfsd: don't take nfsd_mutex twice when setting number of threads.
  nfsd41: sanity check client drc maxreqs
  nfsd41: move channel attributes from nfsd4_session to a nfsd4_channel_attr struct
  NFS: kill off complicated macro 'PROC'
  sunrpc: potential memory leak in function rdma_read_xdr
  nfsd: minor nfsd_vfs_write cleanup
  nfsd: Pull write-gathering code out of nfsd_vfs_write
  nfsd: track last inode only in use_wgather case
  sunrpc: align cache_clean work's timer
  nfsd: Use write gathering only with NFSv2
  NFSv4: kill off complicated macro 'PROC'
  NFSv4: do exact check about attribute specified
  knfsd: remove unreported filehandle stats counters
  knfsd: fix reply cache memory corruption
  knfsd: reply cache cleanups
  ...
---

7e0338c0de18c50f09aea1fbef45110cf7d64a3c
diff --combined fs/Kconfig
index d78e950402c1,44ab328ceb2a..a97263be6a91
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@@ -39,13 -39,6 +39,13 @@@ config FS_POSIX_AC
  	bool
  	default n
  
 +source "fs/xfs/Kconfig"
 +source "fs/gfs2/Kconfig"
 +source "fs/ocfs2/Kconfig"
 +source "fs/btrfs/Kconfig"
 +
 +endif # BLOCK
 +
  config FILE_LOCKING
  	bool "Enable POSIX file locking API" if EMBEDDED
  	default y
@@@ -54,6 -47,13 +54,6 @@@
            for filesystems like NFS and for the flock() system
            call. Disabling this option saves about 11k.
  
 -source "fs/xfs/Kconfig"
 -source "fs/gfs2/Kconfig"
 -source "fs/ocfs2/Kconfig"
 -source "fs/btrfs/Kconfig"
 -
 -endif # BLOCK
 -
  source "fs/notify/Kconfig"
  
  source "fs/quota/Kconfig"
@@@ -62,16 -62,6 +62,16 @@@ source "fs/autofs/Kconfig
  source "fs/autofs4/Kconfig"
  source "fs/fuse/Kconfig"
  
 +config CUSE
 +	tristate "Character device in Userpace support"
 +	depends on FUSE_FS
 +	help
 +	  This FUSE extension allows character devices to be
 +	  implemented in userspace.
 +
 +	  If you want to develop or use userspace character device
 +	  based on CUSE, answer Y or M.
 +
  config GENERIC_ACL
  	bool
  	select FS_POSIX_ACL
@@@ -134,7 -124,7 +134,7 @@@ config TMPFS_POSIX_AC
  config HUGETLBFS
  	bool "HugeTLB file system support"
  	depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \
 -		   (S390 && 64BIT) || BROKEN
 +		   (S390 && 64BIT) || SYS_SUPPORTS_HUGETLBFS || BROKEN
  	help
  	  hugetlbfs is a filesystem backing for HugeTLB pages, based on
  	  ramfs. For architectures that support it, say Y here and read
@@@ -236,10 -226,12 +236,12 @@@ source "fs/nfsd/Kconfig
  
  config LOCKD
  	tristate
+ 	depends on FILE_LOCKING
  
  config LOCKD_V4
  	bool
  	depends on NFSD_V3 || NFS_V3
+ 	depends on FILE_LOCKING
  	default y
  
  config EXPORTFS
diff --combined fs/nfs/Kconfig
index 5d6d6f415935,7dbb8f27b9d6..2a77bc25d5af
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@@ -1,6 -1,6 +1,6 @@@
  config NFS_FS
  	tristate "NFS client support"
- 	depends on INET
+ 	depends on INET && FILE_LOCKING
  	select LOCKD
  	select SUNRPC
  	select NFS_ACL_SUPPORT if NFS_V3_ACL
@@@ -74,15 -74,6 +74,15 @@@ config NFS_V
  
  	  If unsure, say N.
  
 +config NFS_V4_1
 +	bool "NFS client support for NFSv4.1 (DEVELOPER ONLY)"
 +	depends on NFS_V4 && EXPERIMENTAL
 +	help
 +	  This option enables support for minor version 1 of the NFSv4 protocol
 +	  (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client.
 +
 +	  Unless you're an NFS developer, say N.
 +
  config ROOT_NFS
  	bool "Root file system on NFS"
  	depends on NFS_FS=y && IP_PNP
diff --combined fs/nfsd/export.c
index 8b1f8efb4690,6eb918153fd4..b92a27629fb7
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@@ -464,16 -464,11 +464,11 @@@ static int secinfo_parse(char **mesg, c
  		if (err)
  			return err;
  		/*
- 		 * Just a quick sanity check; we could also try to check
- 		 * whether this pseudoflavor is supported, but at worst
- 		 * an unsupported pseudoflavor on the export would just
- 		 * be a pseudoflavor that won't match the flavor of any
- 		 * authenticated request.  The administrator will
- 		 * probably discover the problem when someone fails to
- 		 * authenticate.
+ 		 * XXX: It would be nice to also check whether this
+ 		 * pseudoflavor is supported, so we can discover the
+ 		 * problem at export time instead of when a client fails
+ 		 * to authenticate.
  		 */
- 		if (f->pseudoflavor < 0)
- 			return -EINVAL;
  		err = get_int(mesg, &f->flags);
  		if (err)
  			return err;
@@@ -847,8 -842,9 +842,8 @@@ exp_get_fsid_key(svc_client *clp, int f
  	return exp_find_key(clp, FSID_NUM, fsidv, NULL);
  }
  
 -static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt,
 -				   struct dentry *dentry,
 -				   struct cache_req *reqp)
 +static svc_export *exp_get_by_name(svc_client *clp, const struct path *path,
 +				     struct cache_req *reqp)
  {
  	struct svc_export *exp, key;
  	int err;
@@@ -857,7 -853,8 +852,7 @@@
  		return ERR_PTR(-ENOENT);
  
  	key.ex_client = clp;
 -	key.ex_path.mnt = mnt;
 -	key.ex_path.dentry = dentry;
 +	key.ex_path = *path;
  
  	exp = svc_export_lookup(&key);
  	if (exp == NULL)
@@@ -871,19 -868,24 +866,19 @@@
  /*
   * Find the export entry for a given dentry.
   */
 -static struct svc_export *exp_parent(svc_client *clp, struct vfsmount *mnt,
 -				     struct dentry *dentry,
 -				     struct cache_req *reqp)
 +static struct svc_export *exp_parent(svc_client *clp, struct path *path)
  {
 -	svc_export *exp;
 -
 -	dget(dentry);
 -	exp = exp_get_by_name(clp, mnt, dentry, reqp);
 -
 -	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) {
 -		struct dentry *parent;
 -
 -		parent = dget_parent(dentry);
 -		dput(dentry);
 -		dentry = parent;
 -		exp = exp_get_by_name(clp, mnt, dentry, reqp);
 +	struct dentry *saved = dget(path->dentry);
 +	svc_export *exp = exp_get_by_name(clp, path, NULL);
 +
 +	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
 +		struct dentry *parent = dget_parent(path->dentry);
 +		dput(path->dentry);
 +		path->dentry = parent;
 +		exp = exp_get_by_name(clp, path, NULL);
  	}
 -	dput(dentry);
 +	dput(path->dentry);
 +	path->dentry = saved;
  	return exp;
  }
  
@@@ -1011,7 -1013,7 +1006,7 @@@ exp_export(struct nfsctl_export *nxp
  		goto out_put_clp;
  	err = -EINVAL;
  
 -	exp = exp_get_by_name(clp, path.mnt, path.dentry, NULL);
 +	exp = exp_get_by_name(clp, &path, NULL);
  
  	memset(&new, 0, sizeof(new));
  
@@@ -1128,7 -1130,7 +1123,7 @@@ exp_unexport(struct nfsctl_export *nxp
  		goto out_domain;
  
  	err = -EINVAL;
 -	exp = exp_get_by_name(dom, path.mnt, path.dentry, NULL);
 +	exp = exp_get_by_name(dom, &path, NULL);
  	path_put(&path);
  	if (IS_ERR(exp))
  		goto out_domain;
@@@ -1170,7 -1172,7 +1165,7 @@@ exp_rootfh(svc_client *clp, char *name
  	dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n",
  		 name, path.dentry, clp->name,
  		 inode->i_sb->s_id, inode->i_ino);
 -	exp = exp_parent(clp, path.mnt, path.dentry, NULL);
 +	exp = exp_parent(clp, &path);
  	if (IS_ERR(exp)) {
  		err = PTR_ERR(exp);
  		goto out;
@@@ -1200,7 -1202,7 +1195,7 @@@ static struct svc_export *exp_find(stru
  	if (IS_ERR(ek))
  		return ERR_CAST(ek);
  
 -	exp = exp_get_by_name(clp, ek->ek_path.mnt, ek->ek_path.dentry, reqp);
 +	exp = exp_get_by_name(clp, &ek->ek_path, reqp);
  	cache_put(&ek->h, &svc_expkey_cache);
  
  	if (IS_ERR(exp))
@@@ -1240,7 -1242,8 +1235,7 @@@ __be32 check_nfsd_access(struct svc_exp
   * use exp_get_by_name() or exp_find().
   */
  struct svc_export *
 -rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt,
 -		struct dentry *dentry)
 +rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path)
  {
  	struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT);
  
@@@ -1248,7 -1251,8 +1243,7 @@@
  		goto gss;
  
  	/* First try the auth_unix client: */
 -	exp = exp_get_by_name(rqstp->rq_client, mnt, dentry,
 -						&rqstp->rq_chandle);
 +	exp = exp_get_by_name(rqstp->rq_client, path, &rqstp->rq_chandle);
  	if (PTR_ERR(exp) == -ENOENT)
  		goto gss;
  	if (IS_ERR(exp))
@@@ -1260,7 -1264,8 +1255,7 @@@ gss
  	/* Otherwise, try falling back on gss client */
  	if (rqstp->rq_gssclient == NULL)
  		return exp;
 -	gssexp = exp_get_by_name(rqstp->rq_gssclient, mnt, dentry,
 -						&rqstp->rq_chandle);
 +	gssexp = exp_get_by_name(rqstp->rq_gssclient, path, &rqstp->rq_chandle);
  	if (PTR_ERR(gssexp) == -ENOENT)
  		return exp;
  	if (!IS_ERR(exp))
@@@ -1299,19 -1304,23 +1294,19 @@@ gss
  }
  
  struct svc_export *
 -rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt,
 -		struct dentry *dentry)
 +rqst_exp_parent(struct svc_rqst *rqstp, struct path *path)
  {
 -	struct svc_export *exp;
 -
 -	dget(dentry);
 -	exp = rqst_exp_get_by_name(rqstp, mnt, dentry);
 -
 -	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) {
 -		struct dentry *parent;
 -
 -		parent = dget_parent(dentry);
 -		dput(dentry);
 -		dentry = parent;
 -		exp = rqst_exp_get_by_name(rqstp, mnt, dentry);
 +	struct dentry *saved = dget(path->dentry);
 +	struct svc_export *exp = rqst_exp_get_by_name(rqstp, path);
 +
 +	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
 +		struct dentry *parent = dget_parent(path->dentry);
 +		dput(path->dentry);
 +		path->dentry = parent;
 +		exp = rqst_exp_get_by_name(rqstp, path);
  	}
 -	dput(dentry);
 +	dput(path->dentry);
 +	path->dentry = saved;
  	return exp;
  }
  
diff --combined fs/nfsd/vfs.c
index 99f835753596,1cf70616a11e..4145083dcf88
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@@ -55,7 -55,6 +55,7 @@@
  #include <linux/security.h>
  #endif /* CONFIG_NFSD_V4 */
  #include <linux/jhash.h>
 +#include <linux/ima.h>
  
  #include <asm/uaccess.h>
  
@@@ -101,35 -100,36 +101,35 @@@ nfsd_cross_mnt(struct svc_rqst *rqstp, 
  {
  	struct svc_export *exp = *expp, *exp2 = NULL;
  	struct dentry *dentry = *dpp;
 -	struct vfsmount *mnt = mntget(exp->ex_path.mnt);
 -	struct dentry *mounts = dget(dentry);
 +	struct path path = {.mnt = mntget(exp->ex_path.mnt),
 +			    .dentry = dget(dentry)};
  	int err = 0;
  
 -	while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts));
 +	while (d_mountpoint(path.dentry) && follow_down(&path))
 +		;
  
 -	exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts);
 +	exp2 = rqst_exp_get_by_name(rqstp, &path);
  	if (IS_ERR(exp2)) {
  		if (PTR_ERR(exp2) != -ENOENT)
  			err = PTR_ERR(exp2);
 -		dput(mounts);
 -		mntput(mnt);
 +		path_put(&path);
  		goto out;
  	}
  	if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
  		/* successfully crossed mount point */
  		/*
 -		 * This is subtle: dentry is *not* under mnt at this point.
 -		 * The only reason we are safe is that original mnt is pinned
 -		 * down by exp, so we should dput before putting exp.
 +		 * This is subtle: path.dentry is *not* on path.mnt
 +		 * at this point.  The only reason we are safe is that
 +		 * original mnt is pinned down by exp, so we should
 +		 * put path *before* putting exp
  		 */
 -		dput(dentry);
 -		*dpp = mounts;
 -		exp_put(exp);
 +		*dpp = path.dentry;
 +		path.dentry = dentry;
  		*expp = exp2;
 -	} else {
 -		exp_put(exp2);
 -		dput(mounts);
 +		exp2 = exp;
  	}
 -	mntput(mnt);
 +	path_put(&path);
 +	exp_put(exp2);
  out:
  	return err;
  }
@@@ -168,29 -168,28 +168,29 @@@ nfsd_lookup_dentry(struct svc_rqst *rqs
  			/* checking mountpoint crossing is very different when stepping up */
  			struct svc_export *exp2 = NULL;
  			struct dentry *dp;
 -			struct vfsmount *mnt = mntget(exp->ex_path.mnt);
 -			dentry = dget(dparent);
 -			while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry))
 +			struct path path = {.mnt = mntget(exp->ex_path.mnt),
 +					    .dentry = dget(dparent)};
 +
 +			while (path.dentry == path.mnt->mnt_root &&
 +			       follow_up(&path))
  				;
 -			dp = dget_parent(dentry);
 -			dput(dentry);
 -			dentry = dp;
 +			dp = dget_parent(path.dentry);
 +			dput(path.dentry);
 +			path.dentry = dp;
  
 -			exp2 = rqst_exp_parent(rqstp, mnt, dentry);
 +			exp2 = rqst_exp_parent(rqstp, &path);
  			if (PTR_ERR(exp2) == -ENOENT) {
 -				dput(dentry);
  				dentry = dget(dparent);
  			} else if (IS_ERR(exp2)) {
  				host_err = PTR_ERR(exp2);
 -				dput(dentry);
 -				mntput(mnt);
 +				path_put(&path);
  				goto out_nfserr;
  			} else {
 +				dentry = dget(path.dentry);
  				exp_put(exp);
  				exp = exp2;
  			}
 -			mntput(mnt);
 +			path_put(&path);
  		}
  	} else {
  		fh_lock(fhp);
@@@ -736,8 -735,6 +736,8 @@@ nfsd_open(struct svc_rqst *rqstp, struc
  			    flags, cred);
  	if (IS_ERR(*filp))
  		host_err = PTR_ERR(*filp);
 +	else
 +		ima_counts_get(*filp);
  out_nfserr:
  	err = nfserrno(host_err);
  out:
@@@ -966,6 -963,43 +966,43 @@@ static void kill_suid(struct dentry *de
  	mutex_unlock(&dentry->d_inode->i_mutex);
  }
  
+ /*
+  * Gathered writes: If another process is currently writing to the file,
+  * there's a high chance this is another nfsd (triggered by a bulk write
+  * from a client's biod). Rather than syncing the file with each write
+  * request, we sleep for 10 msec.
+  *
+  * I don't know if this roughly approximates C. Juszak's idea of
+  * gathered writes, but it's a nice and simple solution (IMHO), and it
+  * seems to work:-)
+  *
+  * Note: we do this only in the NFSv2 case, since v3 and higher have a
+  * better tool (separate unstable writes and commits) for solving this
+  * problem.
+  */
+ static int wait_for_concurrent_writes(struct file *file)
+ {
+ 	struct inode *inode = file->f_path.dentry->d_inode;
+ 	static ino_t last_ino;
+ 	static dev_t last_dev;
+ 	int err = 0;
+ 
+ 	if (atomic_read(&inode->i_writecount) > 1
+ 	    || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
+ 		dprintk("nfsd: write defer %d\n", task_pid_nr(current));
+ 		msleep(10);
+ 		dprintk("nfsd: write resume %d\n", task_pid_nr(current));
+ 	}
+ 
+ 	if (inode->i_state & I_DIRTY) {
+ 		dprintk("nfsd: write sync %d\n", task_pid_nr(current));
+ 		err = nfsd_sync(file);
+ 	}
+ 	last_ino = inode->i_ino;
+ 	last_dev = inode->i_sb->s_dev;
+ 	return err;
+ }
+ 
  static __be32
  nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
  				loff_t offset, struct kvec *vec, int vlen,
@@@ -978,6 -1012,7 +1015,7 @@@
  	__be32			err = 0;
  	int			host_err;
  	int			stable = *stablep;
+ 	int			use_wgather;
  
  #ifdef MSNFS
  	err = nfserr_perm;
@@@ -996,9 -1031,10 +1034,10 @@@
  	 *  -	the sync export option has been set, or
  	 *  -	the client requested O_SYNC behavior (NFSv3 feature).
  	 *  -   The file system doesn't support fsync().
- 	 * When gathered writes have been configured for this volume,
+ 	 * When NFSv2 gathered writes have been configured for this volume,
  	 * flushing the data to disk is handled separately below.
  	 */
+ 	use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
  
  	if (!file->f_op->fsync) {/* COMMIT3 cannot work */
  	       stable = 2;
@@@ -1007,7 -1043,7 +1046,7 @@@
  
  	if (!EX_ISSYNC(exp))
  		stable = 0;
- 	if (stable && !EX_WGATHER(exp)) {
+ 	if (stable && !use_wgather) {
  		spin_lock(&file->f_lock);
  		file->f_flags |= O_SYNC;
  		spin_unlock(&file->f_lock);
@@@ -1017,52 -1053,20 +1056,20 @@@
  	oldfs = get_fs(); set_fs(KERNEL_DS);
  	host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
  	set_fs(oldfs);
- 	if (host_err >= 0) {
- 		*cnt = host_err;
- 		nfsdstats.io_write += host_err;
- 		fsnotify_modify(file->f_path.dentry);
- 	}
+ 	if (host_err < 0)
+ 		goto out_nfserr;
+ 	*cnt = host_err;
+ 	nfsdstats.io_write += host_err;
+ 	fsnotify_modify(file->f_path.dentry);
  
  	/* clear setuid/setgid flag after write */
- 	if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID)))
+ 	if (inode->i_mode & (S_ISUID | S_ISGID))
  		kill_suid(dentry);
  
- 	if (host_err >= 0 && stable) {
- 		static ino_t	last_ino;
- 		static dev_t	last_dev;
- 
- 		/*
- 		 * Gathered writes: If another process is currently
- 		 * writing to the file, there's a high chance
- 		 * this is another nfsd (triggered by a bulk write
- 		 * from a client's biod). Rather than syncing the
- 		 * file with each write request, we sleep for 10 msec.
- 		 *
- 		 * I don't know if this roughly approximates
- 		 * C. Juszak's idea of gathered writes, but it's a
- 		 * nice and simple solution (IMHO), and it seems to
- 		 * work:-)
- 		 */
- 		if (EX_WGATHER(exp)) {
- 			if (atomic_read(&inode->i_writecount) > 1
- 			    || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
- 				dprintk("nfsd: write defer %d\n", task_pid_nr(current));
- 				msleep(10);
- 				dprintk("nfsd: write resume %d\n", task_pid_nr(current));
- 			}
- 
- 			if (inode->i_state & I_DIRTY) {
- 				dprintk("nfsd: write sync %d\n", task_pid_nr(current));
- 				host_err=nfsd_sync(file);
- 			}
- #if 0
- 			wake_up(&inode->i_wait);
- #endif
- 		}
- 		last_ino = inode->i_ino;
- 		last_dev = inode->i_sb->s_dev;
- 	}
+ 	if (stable && use_wgather)
+ 		host_err = wait_for_concurrent_writes(file);
  
+ out_nfserr:
  	dprintk("nfsd: write complete host_err=%d\n", host_err);
  	if (host_err >= 0)
  		err = 0;
@@@ -2027,7 -2031,6 +2034,7 @@@ nfsd_permission(struct svc_rqst *rqstp
  					struct dentry *dentry, int acc)
  {
  	struct inode	*inode = dentry->d_inode;
 +	struct path	path;
  	int		err;
  
  	if (acc == NFSD_MAY_NOP)
@@@ -2100,17 -2103,7 +2107,17 @@@
  	if (err == -EACCES && S_ISREG(inode->i_mode) &&
  	    acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
  		err = inode_permission(inode, MAY_EXEC);
 +	if (err)
 +		goto nfsd_out;
  
 +	/* Do integrity (permission) checking now, but defer incrementing
 +	 * IMA counts to the actual file open.
 +	 */
 +	path.mnt = exp->ex_path.mnt;
 +	path.dentry = dentry;
 +	err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC),
 +			     IMA_COUNT_LEAVE);
 +nfsd_out:
  	return err? nfserrno(err) : 0;
  }
  
diff --combined include/linux/fs.h
index 74a57938c880,58e843b26b98..1ff5e4e01952
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -729,8 -729,8 +729,8 @@@ struct inode 
  	struct timespec		i_atime;
  	struct timespec		i_mtime;
  	struct timespec		i_ctime;
 -	unsigned int		i_blkbits;
  	blkcnt_t		i_blocks;
 +	unsigned int		i_blkbits;
  	unsigned short          i_bytes;
  	umode_t			i_mode;
  	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
@@@ -751,12 -751,13 +751,12 @@@
  		struct block_device	*i_bdev;
  		struct cdev		*i_cdev;
  	};
 -	int			i_cindex;
  
  	__u32			i_generation;
  
 -#ifdef CONFIG_DNOTIFY
 -	unsigned long		i_dnotify_mask; /* Directory notify events */
 -	struct dnotify_struct	*i_dnotify; /* for directory notifications */
 +#ifdef CONFIG_FSNOTIFY
 +	__u32			i_fsnotify_mask; /* all events this inode cares about */
 +	struct hlist_head	i_fsnotify_mark_entries; /* fsnotify mark entries */
  #endif
  
  #ifdef CONFIG_INOTIFY
@@@ -879,7 -880,7 +879,7 @@@ struct file_ra_state 
  					   there are only # of pages ahead */
  
  	unsigned int ra_pages;		/* Maximum readahead window */
 -	int mmap_miss;			/* Cache miss stat for mmap accesses */
 +	unsigned int mmap_miss;		/* Cache miss stat for mmap accesses */
  	loff_t prev_pos;		/* Cache last read() position */
  };
  
@@@ -1107,6 -1108,7 +1107,7 @@@ extern void locks_copy_lock(struct file
  extern void __locks_copy_lock(struct file_lock *, const struct file_lock *);
  extern void locks_remove_posix(struct file *, fl_owner_t);
  extern void locks_remove_flock(struct file *);
+ extern void locks_release_private(struct file_lock *);
  extern void posix_test_lock(struct file *, struct file_lock *);
  extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
  extern int posix_lock_file_wait(struct file *, struct file_lock *);
@@@ -1320,7 -1322,7 +1321,7 @@@ struct super_block 
  	struct rw_semaphore	s_umount;
  	struct mutex		s_lock;
  	int			s_count;
 -	int			s_need_sync_fs;
 +	int			s_need_sync;
  	atomic_t		s_active;
  #ifdef CONFIG_SECURITY
  	void                    *s_security;
@@@ -1371,6 -1373,11 +1372,6 @@@
  	 * generic_show_options()
  	 */
  	char *s_options;
 -
 -	/*
 -	 * storage for asynchronous operations
 -	 */
 -	struct list_head s_async_list;
  };
  
  extern struct timespec current_fs_time(struct super_block *sb);
@@@ -1794,7 -1801,7 +1795,7 @@@ extern struct vfsmount *kern_mount_data
  extern int may_umount_tree(struct vfsmount *);
  extern int may_umount(struct vfsmount *);
  extern long do_mount(char *, char *, char *, unsigned long, void *);
 -extern struct vfsmount *collect_mounts(struct vfsmount *, struct dentry *);
 +extern struct vfsmount *collect_mounts(struct path *);
  extern void drop_collected_mounts(struct vfsmount *);
  
  extern int vfs_statfs(struct dentry *, struct kstatfs *);
@@@ -1919,9 -1926,8 +1920,9 @@@ extern void __init vfs_caches_init(unsi
  
  extern struct kmem_cache *names_cachep;
  
 -#define __getname()	kmem_cache_alloc(names_cachep, GFP_KERNEL)
 -#define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
 +#define __getname_gfp(gfp)	kmem_cache_alloc(names_cachep, (gfp))
 +#define __getname()		__getname_gfp(GFP_KERNEL)
 +#define __putname(name)		kmem_cache_free(names_cachep, (void *)(name))
  #ifndef CONFIG_AUDITSYSCALL
  #define putname(name)   __putname(name)
  #else
@@@ -1942,6 -1948,8 +1943,6 @@@ extern struct super_block *freeze_bdev(
  extern void emergency_thaw_all(void);
  extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
  extern int fsync_bdev(struct block_device *);
 -extern int fsync_super(struct super_block *);
 -extern int fsync_no_super(struct block_device *);
  #else
  static inline void bd_forget(struct inode *inode) {}
  static inline int sync_blockdev(struct block_device *bdev) { return 0; }
@@@ -1957,7 -1965,6 +1958,7 @@@ static inline int thaw_bdev(struct bloc
  	return 0;
  }
  #endif
 +extern int sync_filesystem(struct super_block *);
  extern const struct file_operations def_blk_fops;
  extern const struct file_operations def_chr_fops;
  extern const struct file_operations bad_sock_fops;
@@@ -2037,6 -2044,9 +2038,6 @@@ extern int __invalidate_device(struct b
  extern int invalidate_partition(struct gendisk *, int);
  #endif
  extern int invalidate_inodes(struct super_block *);
 -unsigned long __invalidate_mapping_pages(struct address_space *mapping,
 -					pgoff_t start, pgoff_t end,
 -					bool be_atomic);
  unsigned long invalidate_mapping_pages(struct address_space *mapping,
  					pgoff_t start, pgoff_t end);
  
@@@ -2073,8 -2083,12 +2074,8 @@@ extern int filemap_fdatawrite_range(str
  
  extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
  extern void sync_supers(void);
 -extern void sync_filesystems(int wait);
 -extern void __fsync_super(struct super_block *sb);
  extern void emergency_sync(void);
  extern void emergency_remount(void);
 -extern int do_remount_sb(struct super_block *sb, int flags,
 -			 void *data, int force);
  #ifdef CONFIG_BLOCK
  extern sector_t bmap(struct inode *, sector_t);
  #endif
@@@ -2192,8 -2206,6 +2193,8 @@@ extern int generic_segment_checks(cons
  /* fs/splice.c */
  extern ssize_t generic_file_splice_read(struct file *, loff_t *,
  		struct pipe_inode_info *, size_t, unsigned int);
 +extern ssize_t default_file_splice_read(struct file *, loff_t *,
 +		struct pipe_inode_info *, size_t, unsigned int);
  extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
  		struct file *, loff_t *, size_t, unsigned int);
  extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
@@@ -2343,8 -2355,6 +2344,8 @@@ extern void simple_release_fs(struct vf
  extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
  			loff_t *ppos, const void *from, size_t available);
  
 +extern int simple_fsync(struct file *, struct dentry *, int);
 +
  #ifdef CONFIG_MIGRATION
  extern int buffer_migrate_page(struct address_space *,
  				struct page *, struct page *);
diff --combined include/linux/nfsd/state.h
index 7ef4b7ad1214,f5a95fd34312..57ab2ed08459
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@@ -41,6 -41,7 +41,6 @@@
  #include <linux/kref.h>
  #include <linux/sunrpc/clnt.h>
  
 -#define NFS4_OPAQUE_LIMIT 1024
  typedef struct {
  	u32             cl_boot;
  	u32             cl_id;
@@@ -60,15 -61,6 +60,6 @@@ typedef struct 
  #define si_stateownerid   si_opaque.so_stateownerid
  #define si_fileid         si_opaque.so_fileid
  
- 
- struct nfs4_cb_recall {
- 	u32			cbr_ident;
- 	int			cbr_trunc;
- 	stateid_t		cbr_stateid;
- 	struct knfsd_fh		cbr_fh;
- 	struct nfs4_delegation	*cbr_dp;
- };
- 
  struct nfs4_delegation {
  	struct list_head	dl_perfile;
  	struct list_head	dl_perclnt;
@@@ -80,22 -72,25 +71,25 @@@
  	struct file		*dl_vfs_file;
  	u32			dl_type;
  	time_t			dl_time;
- 	struct nfs4_cb_recall	dl_recall;
+ /* For recall: */
+ 	u32			dl_ident;
+ 	stateid_t		dl_stateid;
+ 	struct knfsd_fh		dl_fh;
+ 	int			dl_retries;
  };
  
- #define dl_stateid      dl_recall.cbr_stateid
- #define dl_fh           dl_recall.cbr_fh
- 
  /* client delegation callback info */
- struct nfs4_callback {
+ struct nfs4_cb_conn {
  	/* SETCLIENTID info */
  	u32                     cb_addr;
  	unsigned short          cb_port;
  	u32                     cb_prog;
- 	u32                     cb_ident;
+ 	u32			cb_minorversion;
+ 	u32                     cb_ident;	/* minorversion 0 only */
  	/* RPC client info */
  	atomic_t		cb_set;     /* successful CB_NULL call */
  	struct rpc_clnt *       cb_client;
+ 	struct rpc_cred	*	cb_cred;
  };
  
  /* Maximum number of slots per session. 128 is useful for long haul TCP */
@@@ -121,6 -116,17 +115,17 @@@ struct nfsd4_slot 
  	struct nfsd4_cache_entry	sl_cache_entry;
  };
  
+ struct nfsd4_channel_attrs {
+ 	u32		headerpadsz;
+ 	u32		maxreq_sz;
+ 	u32		maxresp_sz;
+ 	u32		maxresp_cached;
+ 	u32		maxops;
+ 	u32		maxreqs;
+ 	u32		nr_rdma_attrs;
+ 	u32		rdma_attrs;
+ };
+ 
  struct nfsd4_session {
  	struct kref		se_ref;
  	struct list_head	se_hash;	/* hash by sessionid */
@@@ -128,11 -134,8 +133,8 @@@
  	u32			se_flags;
  	struct nfs4_client	*se_client;	/* for expire_client */
  	struct nfs4_sessionid	se_sessionid;
- 	u32			se_fmaxreq_sz;
- 	u32			se_fmaxresp_sz;
- 	u32			se_fmaxresp_cached;
- 	u32			se_fmaxops;
- 	u32			se_fnumslots;
+ 	struct nfsd4_channel_attrs se_fchannel;
+ 	struct nfsd4_channel_attrs se_bchannel;
  	struct nfsd4_slot	se_slots[];	/* forward channel slots */
  };
  
@@@ -184,7 -187,7 +186,7 @@@ struct nfs4_client 
  	struct svc_cred		cl_cred; 	/* setclientid principal */
  	clientid_t		cl_clientid;	/* generated by server */
  	nfs4_verifier		cl_confirm;	/* generated by server */
- 	struct nfs4_callback	cl_callback;    /* callback info */
+ 	struct nfs4_cb_conn	cl_cb_conn;     /* callback info */
  	atomic_t		cl_count;	/* ref count */
  	u32			cl_firststate;	/* recovery dir creation */
  
diff --combined include/linux/sunrpc/svcsock.h
index 6bb1ec4ae310,827163138949..04dba23c59f2
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@@ -38,12 -38,13 +38,15 @@@ int		svc_recv(struct svc_rqst *, long)
  int		svc_send(struct svc_rqst *);
  void		svc_drop(struct svc_rqst *);
  void		svc_sock_update_bufs(struct svc_serv *serv);
- int		svc_sock_names(char *buf, struct svc_serv *serv, char *toclose);
- int		svc_addsock(struct svc_serv *serv, int fd, char *name_return);
+ int		svc_sock_names(struct svc_serv *serv, char *buf,
+ 					const size_t buflen,
+ 					const char *toclose);
+ int		svc_addsock(struct svc_serv *serv, const int fd,
+ 					char *name_return, const size_t len);
  void		svc_init_xprt_sock(void);
  void		svc_cleanup_xprt_sock(void);
 +struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot);
 +void		svc_sock_destroy(struct svc_xprt *);
  
  /*
   * svc_makesock socket characteristics
diff --combined net/sunrpc/svcsock.c
index a2a03e500533,b09c80c56ee3..23128ee191ae
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@@ -240,42 -240,76 +240,76 @@@ out
  /*
   * Report socket names for nfsdfs
   */
- static int one_sock_name(char *buf, struct svc_sock *svsk)
+ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
  {
+ 	const struct sock *sk = svsk->sk_sk;
+ 	const char *proto_name = sk->sk_protocol == IPPROTO_UDP ?
+ 							"udp" : "tcp";
  	int len;
  
- 	switch(svsk->sk_sk->sk_family) {
- 	case AF_INET:
- 		len = sprintf(buf, "ipv4 %s %pI4 %d\n",
- 			      svsk->sk_sk->sk_protocol == IPPROTO_UDP ?
- 			      "udp" : "tcp",
- 			      &inet_sk(svsk->sk_sk)->rcv_saddr,
- 			      inet_sk(svsk->sk_sk)->num);
+ 	switch (sk->sk_family) {
+ 	case PF_INET:
+ 		len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n",
+ 				proto_name,
+ 				&inet_sk(sk)->rcv_saddr,
+ 				inet_sk(sk)->num);
+ 		break;
+ 	case PF_INET6:
+ 		len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n",
+ 				proto_name,
+ 				&inet6_sk(sk)->rcv_saddr,
+ 				inet_sk(sk)->num);
  		break;
  	default:
- 		len = sprintf(buf, "*unknown-%d*\n",
- 			       svsk->sk_sk->sk_family);
+ 		len = snprintf(buf, remaining, "*unknown-%d*\n",
+ 				sk->sk_family);
+ 	}
+ 
+ 	if (len >= remaining) {
+ 		*buf = '\0';
+ 		return -ENAMETOOLONG;
  	}
  	return len;
  }
  
- int
- svc_sock_names(char *buf, struct svc_serv *serv, char *toclose)
+ /**
+  * svc_sock_names - construct a list of listener names in a string
+  * @serv: pointer to RPC service
+  * @buf: pointer to a buffer to fill in with socket names
+  * @buflen: size of the buffer to be filled
+  * @toclose: pointer to '\0'-terminated C string containing the name
+  *		of a listener to be closed
+  *
+  * Fills in @buf with a '\n'-separated list of names of listener
+  * sockets.  If @toclose is not NULL, the socket named by @toclose
+  * is closed, and is not included in the output list.
+  *
+  * Returns positive length of the socket name string, or a negative
+  * errno value on error.
+  */
+ int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen,
+ 		   const char *toclose)
  {
  	struct svc_sock *svsk, *closesk = NULL;
  	int len = 0;
  
  	if (!serv)
  		return 0;
+ 
  	spin_lock_bh(&serv->sv_lock);
  	list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) {
- 		int onelen = one_sock_name(buf+len, svsk);
- 		if (toclose && strcmp(toclose, buf+len) == 0)
+ 		int onelen = svc_one_sock_name(svsk, buf + len, buflen - len);
+ 		if (onelen < 0) {
+ 			len = onelen;
+ 			break;
+ 		}
+ 		if (toclose && strcmp(toclose, buf + len) == 0)
  			closesk = svsk;
  		else
  			len += onelen;
  	}
  	spin_unlock_bh(&serv->sv_lock);
+ 
  	if (closesk)
  		/* Should unregister with portmap, but you cannot
  		 * unregister just one protocol...
@@@ -346,6 -380,7 +380,7 @@@ static void svc_sock_setbufsize(struct 
  	sock->sk->sk_sndbuf = snd * 2;
  	sock->sk->sk_rcvbuf = rcv * 2;
  	sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
+ 	sock->sk->sk_write_space(sock->sk);
  	release_sock(sock->sk);
  #endif
  }
@@@ -387,6 -422,15 +422,15 @@@ static void svc_write_space(struct soc
  	}
  }
  
+ static void svc_tcp_write_space(struct sock *sk)
+ {
+ 	struct socket *sock = sk->sk_socket;
+ 
+ 	if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock)
+ 		clear_bit(SOCK_NOSPACE, &sock->flags);
+ 	svc_write_space(sk);
+ }
+ 
  /*
   * Copy the UDP datagram's destination address to the rqstp structure.
   * The 'destination' address in this case is the address to which the
@@@ -427,13 -471,14 +471,14 @@@ static int svc_udp_recvfrom(struct svc_
  		long		all[SVC_PKTINFO_SPACE / sizeof(long)];
  	} buffer;
  	struct cmsghdr *cmh = &buffer.hdr;
- 	int		err, len;
  	struct msghdr msg = {
  		.msg_name = svc_addr(rqstp),
  		.msg_control = cmh,
  		.msg_controllen = sizeof(buffer),
  		.msg_flags = MSG_DONTWAIT,
  	};
+ 	size_t len;
+ 	int err;
  
  	if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
  	    /* udp sockets need large rcvbuf as all pending
@@@ -465,8 -510,8 +510,8 @@@
  		return -EAGAIN;
  	}
  	len = svc_addr_len(svc_addr(rqstp));
- 	if (len < 0)
- 		return len;
+ 	if (len == 0)
+ 		return -EAFNOSUPPORT;
  	rqstp->rq_addrlen = len;
  	if (skb->tstamp.tv64 == 0) {
  		skb->tstamp = ktime_get_real();
@@@ -980,25 -1025,16 +1025,16 @@@ static void svc_tcp_prep_reply_hdr(stru
  static int svc_tcp_has_wspace(struct svc_xprt *xprt)
  {
  	struct svc_sock *svsk =	container_of(xprt, struct svc_sock, sk_xprt);
- 	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;
+ 	struct svc_serv *serv = svsk->sk_xprt.xpt_server;
  	int required;
- 	int wspace;
  
- 	/*
- 	 * Set the SOCK_NOSPACE flag before checking the available
- 	 * sock space.
- 	 */
+ 	if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
+ 		return 1;
+ 	required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
+ 	if (sk_stream_wspace(svsk->sk_sk) >= required)
+ 		return 1;
  	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
- 	required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg;
- 	wspace = sk_stream_wspace(svsk->sk_sk);
- 
- 	if (wspace < sk_stream_min_wspace(svsk->sk_sk))
- 		return 0;
- 	if (required * 2 > wspace)
- 		return 0;
- 
- 	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
- 	return 1;
+ 	return 0;
  }
  
  static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
@@@ -1054,7 -1090,7 +1090,7 @@@ static void svc_tcp_init(struct svc_soc
  		dprintk("setting up TCP socket for reading\n");
  		sk->sk_state_change = svc_tcp_state_change;
  		sk->sk_data_ready = svc_tcp_data_ready;
- 		sk->sk_write_space = svc_write_space;
+ 		sk->sk_write_space = svc_tcp_write_space;
  
  		svsk->sk_reclen = 0;
  		svsk->sk_tcplen = 0;
@@@ -1148,9 -1184,19 +1184,19 @@@ static struct svc_sock *svc_setup_socke
  	return svsk;
  }
  
- int svc_addsock(struct svc_serv *serv,
- 		int fd,
- 		char *name_return)
+ /**
+  * svc_addsock - add a listener socket to an RPC service
+  * @serv: pointer to RPC service to which to add a new listener
+  * @fd: file descriptor of the new listener
+  * @name_return: pointer to buffer to fill in with name of listener
+  * @len: size of the buffer
+  *
+  * Fills in socket name and returns positive length of name if successful.
+  * Name is terminated with '\n'.  On error, returns a negative errno
+  * value.
+  */
+ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
+ 		const size_t len)
  {
  	int err = 0;
  	struct socket *so = sockfd_lookup(fd, &err);
@@@ -1190,7 -1236,7 +1236,7 @@@
  		sockfd_put(so);
  		return err;
  	}
- 	return one_sock_name(name_return, svsk);
+ 	return svc_one_sock_name(svsk, name_return, len);
  }
  EXPORT_SYMBOL_GPL(svc_addsock);
  
@@@ -1327,42 -1373,3 +1373,42 @@@ static void svc_sock_free(struct svc_xp
  		sock_release(svsk->sk_sock);
  	kfree(svsk);
  }
 +
 +/*
 + * Create a svc_xprt.
 + *
 + * For internal use only (e.g. nfsv4.1 backchannel).
 + * Callers should typically use the xpo_create() method.
 + */
 +struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot)
 +{
 +	struct svc_sock *svsk;
 +	struct svc_xprt *xprt = NULL;
 +
 +	dprintk("svc: %s\n", __func__);
 +	svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
 +	if (!svsk)
 +		goto out;
 +
 +	xprt = &svsk->sk_xprt;
 +	if (prot == IPPROTO_TCP)
 +		svc_xprt_init(&svc_tcp_class, xprt, serv);
 +	else if (prot == IPPROTO_UDP)
 +		svc_xprt_init(&svc_udp_class, xprt, serv);
 +	else
 +		BUG();
 +out:
 +	dprintk("svc: %s return %p\n", __func__, xprt);
 +	return xprt;
 +}
 +EXPORT_SYMBOL_GPL(svc_sock_create);
 +
 +/*
 + * Destroy a svc_sock.
 + */
 +void svc_sock_destroy(struct svc_xprt *xprt)
 +{
 +	if (xprt)
 +		kfree(container_of(xprt, struct svc_sock, sk_xprt));
 +}
 +EXPORT_SYMBOL_GPL(svc_sock_destroy);