From: Linus Torvalds Date: Mon, 22 Jun 2009 19:55:50 +0000 (-0700) Subject: Merge branch 'for-2.6.31' of git://fieldses.org/git/linux-nfsd X-Git-Tag: v2.6.31-rc1~57 X-Git-Url: https://repo.jachan.dev/J-linux.git/commitdiff_plain/7e0338c0de18c50f09aea1fbef45110cf7d64a3c?hp=-c Merge branch 'for-2.6.31' of git://fieldses.org/git/linux-nfsd * 'for-2.6.31' of git://fieldses.org/git/linux-nfsd: (60 commits) SUNRPC: Fix the TCP server's send buffer accounting nfsd41: Backchannel: minorversion support for the back channel nfsd41: Backchannel: cleanup nfs4.0 callback encode routines nfsd41: Remove ip address collision detection case nfsd: optimise the starting of zero threads when none are running. nfsd: don't take nfsd_mutex twice when setting number of threads. nfsd41: sanity check client drc maxreqs nfsd41: move channel attributes from nfsd4_session to a nfsd4_channel_attr struct NFS: kill off complicated macro 'PROC' sunrpc: potential memory leak in function rdma_read_xdr nfsd: minor nfsd_vfs_write cleanup nfsd: Pull write-gathering code out of nfsd_vfs_write nfsd: track last inode only in use_wgather case sunrpc: align cache_clean work's timer nfsd: Use write gathering only with NFSv2 NFSv4: kill off complicated macro 'PROC' NFSv4: do exact check about attribute specified knfsd: remove unreported filehandle stats counters knfsd: fix reply cache memory corruption knfsd: reply cache cleanups ... --- 7e0338c0de18c50f09aea1fbef45110cf7d64a3c diff --combined fs/Kconfig index d78e950402c1,44ab328ceb2a..a97263be6a91 --- a/fs/Kconfig +++ b/fs/Kconfig @@@ -39,13 -39,6 +39,13 @@@ config FS_POSIX_AC bool default n +source "fs/xfs/Kconfig" +source "fs/gfs2/Kconfig" +source "fs/ocfs2/Kconfig" +source "fs/btrfs/Kconfig" + +endif # BLOCK + config FILE_LOCKING bool "Enable POSIX file locking API" if EMBEDDED default y @@@ -54,6 -47,13 +54,6 @@@ for filesystems like NFS and for the flock() system call. Disabling this option saves about 11k. -source "fs/xfs/Kconfig" -source "fs/gfs2/Kconfig" -source "fs/ocfs2/Kconfig" -source "fs/btrfs/Kconfig" - -endif # BLOCK - source "fs/notify/Kconfig" source "fs/quota/Kconfig" @@@ -62,16 -62,6 +62,16 @@@ source "fs/autofs/Kconfig source "fs/autofs4/Kconfig" source "fs/fuse/Kconfig" +config CUSE + tristate "Character device in Userpace support" + depends on FUSE_FS + help + This FUSE extension allows character devices to be + implemented in userspace. + + If you want to develop or use userspace character device + based on CUSE, answer Y or M. + config GENERIC_ACL bool select FS_POSIX_ACL @@@ -134,7 -124,7 +134,7 @@@ config TMPFS_POSIX_AC config HUGETLBFS bool "HugeTLB file system support" depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \ - (S390 && 64BIT) || BROKEN + (S390 && 64BIT) || SYS_SUPPORTS_HUGETLBFS || BROKEN help hugetlbfs is a filesystem backing for HugeTLB pages, based on ramfs. For architectures that support it, say Y here and read @@@ -236,10 -226,12 +236,12 @@@ source "fs/nfsd/Kconfig config LOCKD tristate + depends on FILE_LOCKING config LOCKD_V4 bool depends on NFSD_V3 || NFS_V3 + depends on FILE_LOCKING default y config EXPORTFS diff --combined fs/nfs/Kconfig index 5d6d6f415935,7dbb8f27b9d6..2a77bc25d5af --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@@ -1,6 -1,6 +1,6 @@@ config NFS_FS tristate "NFS client support" - depends on INET + depends on INET && FILE_LOCKING select LOCKD select SUNRPC select NFS_ACL_SUPPORT if NFS_V3_ACL @@@ -74,15 -74,6 +74,15 @@@ config NFS_V If unsure, say N. +config NFS_V4_1 + bool "NFS client support for NFSv4.1 (DEVELOPER ONLY)" + depends on NFS_V4 && EXPERIMENTAL + help + This option enables support for minor version 1 of the NFSv4 protocol + (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. + + Unless you're an NFS developer, say N. + config ROOT_NFS bool "Root file system on NFS" depends on NFS_FS=y && IP_PNP diff --combined fs/nfsd/export.c index 8b1f8efb4690,6eb918153fd4..b92a27629fb7 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@@ -464,16 -464,11 +464,11 @@@ static int secinfo_parse(char **mesg, c if (err) return err; /* - * Just a quick sanity check; we could also try to check - * whether this pseudoflavor is supported, but at worst - * an unsupported pseudoflavor on the export would just - * be a pseudoflavor that won't match the flavor of any - * authenticated request. The administrator will - * probably discover the problem when someone fails to - * authenticate. + * XXX: It would be nice to also check whether this + * pseudoflavor is supported, so we can discover the + * problem at export time instead of when a client fails + * to authenticate. */ - if (f->pseudoflavor < 0) - return -EINVAL; err = get_int(mesg, &f->flags); if (err) return err; @@@ -847,8 -842,9 +842,8 @@@ exp_get_fsid_key(svc_client *clp, int f return exp_find_key(clp, FSID_NUM, fsidv, NULL); } -static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt, - struct dentry *dentry, - struct cache_req *reqp) +static svc_export *exp_get_by_name(svc_client *clp, const struct path *path, + struct cache_req *reqp) { struct svc_export *exp, key; int err; @@@ -857,7 -853,8 +852,7 @@@ return ERR_PTR(-ENOENT); key.ex_client = clp; - key.ex_path.mnt = mnt; - key.ex_path.dentry = dentry; + key.ex_path = *path; exp = svc_export_lookup(&key); if (exp == NULL) @@@ -871,19 -868,24 +866,19 @@@ /* * Find the export entry for a given dentry. */ -static struct svc_export *exp_parent(svc_client *clp, struct vfsmount *mnt, - struct dentry *dentry, - struct cache_req *reqp) +static struct svc_export *exp_parent(svc_client *clp, struct path *path) { - svc_export *exp; - - dget(dentry); - exp = exp_get_by_name(clp, mnt, dentry, reqp); - - while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) { - struct dentry *parent; - - parent = dget_parent(dentry); - dput(dentry); - dentry = parent; - exp = exp_get_by_name(clp, mnt, dentry, reqp); + struct dentry *saved = dget(path->dentry); + svc_export *exp = exp_get_by_name(clp, path, NULL); + + while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) { + struct dentry *parent = dget_parent(path->dentry); + dput(path->dentry); + path->dentry = parent; + exp = exp_get_by_name(clp, path, NULL); } - dput(dentry); + dput(path->dentry); + path->dentry = saved; return exp; } @@@ -1011,7 -1013,7 +1006,7 @@@ exp_export(struct nfsctl_export *nxp goto out_put_clp; err = -EINVAL; - exp = exp_get_by_name(clp, path.mnt, path.dentry, NULL); + exp = exp_get_by_name(clp, &path, NULL); memset(&new, 0, sizeof(new)); @@@ -1128,7 -1130,7 +1123,7 @@@ exp_unexport(struct nfsctl_export *nxp goto out_domain; err = -EINVAL; - exp = exp_get_by_name(dom, path.mnt, path.dentry, NULL); + exp = exp_get_by_name(dom, &path, NULL); path_put(&path); if (IS_ERR(exp)) goto out_domain; @@@ -1170,7 -1172,7 +1165,7 @@@ exp_rootfh(svc_client *clp, char *name dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n", name, path.dentry, clp->name, inode->i_sb->s_id, inode->i_ino); - exp = exp_parent(clp, path.mnt, path.dentry, NULL); + exp = exp_parent(clp, &path); if (IS_ERR(exp)) { err = PTR_ERR(exp); goto out; @@@ -1200,7 -1202,7 +1195,7 @@@ static struct svc_export *exp_find(stru if (IS_ERR(ek)) return ERR_CAST(ek); - exp = exp_get_by_name(clp, ek->ek_path.mnt, ek->ek_path.dentry, reqp); + exp = exp_get_by_name(clp, &ek->ek_path, reqp); cache_put(&ek->h, &svc_expkey_cache); if (IS_ERR(exp)) @@@ -1240,7 -1242,8 +1235,7 @@@ __be32 check_nfsd_access(struct svc_exp * use exp_get_by_name() or exp_find(). */ struct svc_export * -rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt, - struct dentry *dentry) +rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path) { struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); @@@ -1248,7 -1251,8 +1243,7 @@@ goto gss; /* First try the auth_unix client: */ - exp = exp_get_by_name(rqstp->rq_client, mnt, dentry, - &rqstp->rq_chandle); + exp = exp_get_by_name(rqstp->rq_client, path, &rqstp->rq_chandle); if (PTR_ERR(exp) == -ENOENT) goto gss; if (IS_ERR(exp)) @@@ -1260,7 -1264,8 +1255,7 @@@ gss /* Otherwise, try falling back on gss client */ if (rqstp->rq_gssclient == NULL) return exp; - gssexp = exp_get_by_name(rqstp->rq_gssclient, mnt, dentry, - &rqstp->rq_chandle); + gssexp = exp_get_by_name(rqstp->rq_gssclient, path, &rqstp->rq_chandle); if (PTR_ERR(gssexp) == -ENOENT) return exp; if (!IS_ERR(exp)) @@@ -1299,19 -1304,23 +1294,19 @@@ gss } struct svc_export * -rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt, - struct dentry *dentry) +rqst_exp_parent(struct svc_rqst *rqstp, struct path *path) { - struct svc_export *exp; - - dget(dentry); - exp = rqst_exp_get_by_name(rqstp, mnt, dentry); - - while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) { - struct dentry *parent; - - parent = dget_parent(dentry); - dput(dentry); - dentry = parent; - exp = rqst_exp_get_by_name(rqstp, mnt, dentry); + struct dentry *saved = dget(path->dentry); + struct svc_export *exp = rqst_exp_get_by_name(rqstp, path); + + while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) { + struct dentry *parent = dget_parent(path->dentry); + dput(path->dentry); + path->dentry = parent; + exp = rqst_exp_get_by_name(rqstp, path); } - dput(dentry); + dput(path->dentry); + path->dentry = saved; return exp; } diff --combined fs/nfsd/vfs.c index 99f835753596,1cf70616a11e..4145083dcf88 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@@ -55,7 -55,6 +55,7 @@@ #include #endif /* CONFIG_NFSD_V4 */ #include +#include #include @@@ -101,35 -100,36 +101,35 @@@ nfsd_cross_mnt(struct svc_rqst *rqstp, { struct svc_export *exp = *expp, *exp2 = NULL; struct dentry *dentry = *dpp; - struct vfsmount *mnt = mntget(exp->ex_path.mnt); - struct dentry *mounts = dget(dentry); + struct path path = {.mnt = mntget(exp->ex_path.mnt), + .dentry = dget(dentry)}; int err = 0; - while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts)); + while (d_mountpoint(path.dentry) && follow_down(&path)) + ; - exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts); + exp2 = rqst_exp_get_by_name(rqstp, &path); if (IS_ERR(exp2)) { if (PTR_ERR(exp2) != -ENOENT) err = PTR_ERR(exp2); - dput(mounts); - mntput(mnt); + path_put(&path); goto out; } if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { /* successfully crossed mount point */ /* - * This is subtle: dentry is *not* under mnt at this point. - * The only reason we are safe is that original mnt is pinned - * down by exp, so we should dput before putting exp. + * This is subtle: path.dentry is *not* on path.mnt + * at this point. The only reason we are safe is that + * original mnt is pinned down by exp, so we should + * put path *before* putting exp */ - dput(dentry); - *dpp = mounts; - exp_put(exp); + *dpp = path.dentry; + path.dentry = dentry; *expp = exp2; - } else { - exp_put(exp2); - dput(mounts); + exp2 = exp; } - mntput(mnt); + path_put(&path); + exp_put(exp2); out: return err; } @@@ -168,29 -168,28 +168,29 @@@ nfsd_lookup_dentry(struct svc_rqst *rqs /* checking mountpoint crossing is very different when stepping up */ struct svc_export *exp2 = NULL; struct dentry *dp; - struct vfsmount *mnt = mntget(exp->ex_path.mnt); - dentry = dget(dparent); - while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry)) + struct path path = {.mnt = mntget(exp->ex_path.mnt), + .dentry = dget(dparent)}; + + while (path.dentry == path.mnt->mnt_root && + follow_up(&path)) ; - dp = dget_parent(dentry); - dput(dentry); - dentry = dp; + dp = dget_parent(path.dentry); + dput(path.dentry); + path.dentry = dp; - exp2 = rqst_exp_parent(rqstp, mnt, dentry); + exp2 = rqst_exp_parent(rqstp, &path); if (PTR_ERR(exp2) == -ENOENT) { - dput(dentry); dentry = dget(dparent); } else if (IS_ERR(exp2)) { host_err = PTR_ERR(exp2); - dput(dentry); - mntput(mnt); + path_put(&path); goto out_nfserr; } else { + dentry = dget(path.dentry); exp_put(exp); exp = exp2; } - mntput(mnt); + path_put(&path); } } else { fh_lock(fhp); @@@ -736,8 -735,6 +736,8 @@@ nfsd_open(struct svc_rqst *rqstp, struc flags, cred); if (IS_ERR(*filp)) host_err = PTR_ERR(*filp); + else + ima_counts_get(*filp); out_nfserr: err = nfserrno(host_err); out: @@@ -966,6 -963,43 +966,43 @@@ static void kill_suid(struct dentry *de mutex_unlock(&dentry->d_inode->i_mutex); } + /* + * Gathered writes: If another process is currently writing to the file, + * there's a high chance this is another nfsd (triggered by a bulk write + * from a client's biod). Rather than syncing the file with each write + * request, we sleep for 10 msec. + * + * I don't know if this roughly approximates C. Juszak's idea of + * gathered writes, but it's a nice and simple solution (IMHO), and it + * seems to work:-) + * + * Note: we do this only in the NFSv2 case, since v3 and higher have a + * better tool (separate unstable writes and commits) for solving this + * problem. + */ + static int wait_for_concurrent_writes(struct file *file) + { + struct inode *inode = file->f_path.dentry->d_inode; + static ino_t last_ino; + static dev_t last_dev; + int err = 0; + + if (atomic_read(&inode->i_writecount) > 1 + || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { + dprintk("nfsd: write defer %d\n", task_pid_nr(current)); + msleep(10); + dprintk("nfsd: write resume %d\n", task_pid_nr(current)); + } + + if (inode->i_state & I_DIRTY) { + dprintk("nfsd: write sync %d\n", task_pid_nr(current)); + err = nfsd_sync(file); + } + last_ino = inode->i_ino; + last_dev = inode->i_sb->s_dev; + return err; + } + static __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, @@@ -978,6 -1012,7 +1015,7 @@@ __be32 err = 0; int host_err; int stable = *stablep; + int use_wgather; #ifdef MSNFS err = nfserr_perm; @@@ -996,9 -1031,10 +1034,10 @@@ * - the sync export option has been set, or * - the client requested O_SYNC behavior (NFSv3 feature). * - The file system doesn't support fsync(). - * When gathered writes have been configured for this volume, + * When NFSv2 gathered writes have been configured for this volume, * flushing the data to disk is handled separately below. */ + use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); if (!file->f_op->fsync) {/* COMMIT3 cannot work */ stable = 2; @@@ -1007,7 -1043,7 +1046,7 @@@ if (!EX_ISSYNC(exp)) stable = 0; - if (stable && !EX_WGATHER(exp)) { + if (stable && !use_wgather) { spin_lock(&file->f_lock); file->f_flags |= O_SYNC; spin_unlock(&file->f_lock); @@@ -1017,52 -1053,20 +1056,20 @@@ oldfs = get_fs(); set_fs(KERNEL_DS); host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); set_fs(oldfs); - if (host_err >= 0) { - *cnt = host_err; - nfsdstats.io_write += host_err; - fsnotify_modify(file->f_path.dentry); - } + if (host_err < 0) + goto out_nfserr; + *cnt = host_err; + nfsdstats.io_write += host_err; + fsnotify_modify(file->f_path.dentry); /* clear setuid/setgid flag after write */ - if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) + if (inode->i_mode & (S_ISUID | S_ISGID)) kill_suid(dentry); - if (host_err >= 0 && stable) { - static ino_t last_ino; - static dev_t last_dev; - - /* - * Gathered writes: If another process is currently - * writing to the file, there's a high chance - * this is another nfsd (triggered by a bulk write - * from a client's biod). Rather than syncing the - * file with each write request, we sleep for 10 msec. - * - * I don't know if this roughly approximates - * C. Juszak's idea of gathered writes, but it's a - * nice and simple solution (IMHO), and it seems to - * work:-) - */ - if (EX_WGATHER(exp)) { - if (atomic_read(&inode->i_writecount) > 1 - || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { - dprintk("nfsd: write defer %d\n", task_pid_nr(current)); - msleep(10); - dprintk("nfsd: write resume %d\n", task_pid_nr(current)); - } - - if (inode->i_state & I_DIRTY) { - dprintk("nfsd: write sync %d\n", task_pid_nr(current)); - host_err=nfsd_sync(file); - } - #if 0 - wake_up(&inode->i_wait); - #endif - } - last_ino = inode->i_ino; - last_dev = inode->i_sb->s_dev; - } + if (stable && use_wgather) + host_err = wait_for_concurrent_writes(file); + out_nfserr: dprintk("nfsd: write complete host_err=%d\n", host_err); if (host_err >= 0) err = 0; @@@ -2027,7 -2031,6 +2034,7 @@@ nfsd_permission(struct svc_rqst *rqstp struct dentry *dentry, int acc) { struct inode *inode = dentry->d_inode; + struct path path; int err; if (acc == NFSD_MAY_NOP) @@@ -2100,17 -2103,7 +2107,17 @@@ if (err == -EACCES && S_ISREG(inode->i_mode) && acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE)) err = inode_permission(inode, MAY_EXEC); + if (err) + goto nfsd_out; + /* Do integrity (permission) checking now, but defer incrementing + * IMA counts to the actual file open. + */ + path.mnt = exp->ex_path.mnt; + path.dentry = dentry; + err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC), + IMA_COUNT_LEAVE); +nfsd_out: return err? nfserrno(err) : 0; } diff --combined include/linux/fs.h index 74a57938c880,58e843b26b98..1ff5e4e01952 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@@ -729,8 -729,8 +729,8 @@@ struct inode struct timespec i_atime; struct timespec i_mtime; struct timespec i_ctime; - unsigned int i_blkbits; blkcnt_t i_blocks; + unsigned int i_blkbits; unsigned short i_bytes; umode_t i_mode; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ @@@ -751,12 -751,13 +751,12 @@@ struct block_device *i_bdev; struct cdev *i_cdev; }; - int i_cindex; __u32 i_generation; -#ifdef CONFIG_DNOTIFY - unsigned long i_dnotify_mask; /* Directory notify events */ - struct dnotify_struct *i_dnotify; /* for directory notifications */ +#ifdef CONFIG_FSNOTIFY + __u32 i_fsnotify_mask; /* all events this inode cares about */ + struct hlist_head i_fsnotify_mark_entries; /* fsnotify mark entries */ #endif #ifdef CONFIG_INOTIFY @@@ -879,7 -880,7 +879,7 @@@ struct file_ra_state there are only # of pages ahead */ unsigned int ra_pages; /* Maximum readahead window */ - int mmap_miss; /* Cache miss stat for mmap accesses */ + unsigned int mmap_miss; /* Cache miss stat for mmap accesses */ loff_t prev_pos; /* Cache last read() position */ }; @@@ -1107,6 -1108,7 +1107,7 @@@ extern void locks_copy_lock(struct file extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); extern void locks_remove_posix(struct file *, fl_owner_t); extern void locks_remove_flock(struct file *); + extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); @@@ -1320,7 -1322,7 +1321,7 @@@ struct super_block struct rw_semaphore s_umount; struct mutex s_lock; int s_count; - int s_need_sync_fs; + int s_need_sync; atomic_t s_active; #ifdef CONFIG_SECURITY void *s_security; @@@ -1371,6 -1373,11 +1372,6 @@@ * generic_show_options() */ char *s_options; - - /* - * storage for asynchronous operations - */ - struct list_head s_async_list; }; extern struct timespec current_fs_time(struct super_block *sb); @@@ -1794,7 -1801,7 +1795,7 @@@ extern struct vfsmount *kern_mount_data extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); extern long do_mount(char *, char *, char *, unsigned long, void *); -extern struct vfsmount *collect_mounts(struct vfsmount *, struct dentry *); +extern struct vfsmount *collect_mounts(struct path *); extern void drop_collected_mounts(struct vfsmount *); extern int vfs_statfs(struct dentry *, struct kstatfs *); @@@ -1919,9 -1926,8 +1920,9 @@@ extern void __init vfs_caches_init(unsi extern struct kmem_cache *names_cachep; -#define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) -#define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) +#define __getname_gfp(gfp) kmem_cache_alloc(names_cachep, (gfp)) +#define __getname() __getname_gfp(GFP_KERNEL) +#define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) #ifndef CONFIG_AUDITSYSCALL #define putname(name) __putname(name) #else @@@ -1942,6 -1948,8 +1943,6 @@@ extern struct super_block *freeze_bdev( extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); -extern int fsync_super(struct super_block *); -extern int fsync_no_super(struct block_device *); #else static inline void bd_forget(struct inode *inode) {} static inline int sync_blockdev(struct block_device *bdev) { return 0; } @@@ -1957,7 -1965,6 +1958,7 @@@ static inline int thaw_bdev(struct bloc return 0; } #endif +extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; extern const struct file_operations bad_sock_fops; @@@ -2037,6 -2044,9 +2038,6 @@@ extern int __invalidate_device(struct b extern int invalidate_partition(struct gendisk *, int); #endif extern int invalidate_inodes(struct super_block *); -unsigned long __invalidate_mapping_pages(struct address_space *mapping, - pgoff_t start, pgoff_t end, - bool be_atomic); unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end); @@@ -2073,8 -2083,12 +2074,8 @@@ extern int filemap_fdatawrite_range(str extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); extern void sync_supers(void); -extern void sync_filesystems(int wait); -extern void __fsync_super(struct super_block *sb); extern void emergency_sync(void); extern void emergency_remount(void); -extern int do_remount_sb(struct super_block *sb, int flags, - void *data, int force); #ifdef CONFIG_BLOCK extern sector_t bmap(struct inode *, sector_t); #endif @@@ -2192,8 -2206,6 +2193,8 @@@ extern int generic_segment_checks(cons /* fs/splice.c */ extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); +extern ssize_t default_file_splice_read(struct file *, loff_t *, + struct pipe_inode_info *, size_t, unsigned int); extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, @@@ -2343,8 -2355,6 +2344,8 @@@ extern void simple_release_fs(struct vf extern ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available); +extern int simple_fsync(struct file *, struct dentry *, int); + #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, struct page *, struct page *); diff --combined include/linux/nfsd/state.h index 7ef4b7ad1214,f5a95fd34312..57ab2ed08459 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@@ -41,6 -41,7 +41,6 @@@ #include #include -#define NFS4_OPAQUE_LIMIT 1024 typedef struct { u32 cl_boot; u32 cl_id; @@@ -60,15 -61,6 +60,6 @@@ typedef struct #define si_stateownerid si_opaque.so_stateownerid #define si_fileid si_opaque.so_fileid - - struct nfs4_cb_recall { - u32 cbr_ident; - int cbr_trunc; - stateid_t cbr_stateid; - struct knfsd_fh cbr_fh; - struct nfs4_delegation *cbr_dp; - }; - struct nfs4_delegation { struct list_head dl_perfile; struct list_head dl_perclnt; @@@ -80,22 -72,25 +71,25 @@@ struct file *dl_vfs_file; u32 dl_type; time_t dl_time; - struct nfs4_cb_recall dl_recall; + /* For recall: */ + u32 dl_ident; + stateid_t dl_stateid; + struct knfsd_fh dl_fh; + int dl_retries; }; - #define dl_stateid dl_recall.cbr_stateid - #define dl_fh dl_recall.cbr_fh - /* client delegation callback info */ - struct nfs4_callback { + struct nfs4_cb_conn { /* SETCLIENTID info */ u32 cb_addr; unsigned short cb_port; u32 cb_prog; - u32 cb_ident; + u32 cb_minorversion; + u32 cb_ident; /* minorversion 0 only */ /* RPC client info */ atomic_t cb_set; /* successful CB_NULL call */ struct rpc_clnt * cb_client; + struct rpc_cred * cb_cred; }; /* Maximum number of slots per session. 128 is useful for long haul TCP */ @@@ -121,6 -116,17 +115,17 @@@ struct nfsd4_slot struct nfsd4_cache_entry sl_cache_entry; }; + struct nfsd4_channel_attrs { + u32 headerpadsz; + u32 maxreq_sz; + u32 maxresp_sz; + u32 maxresp_cached; + u32 maxops; + u32 maxreqs; + u32 nr_rdma_attrs; + u32 rdma_attrs; + }; + struct nfsd4_session { struct kref se_ref; struct list_head se_hash; /* hash by sessionid */ @@@ -128,11 -134,8 +133,8 @@@ u32 se_flags; struct nfs4_client *se_client; /* for expire_client */ struct nfs4_sessionid se_sessionid; - u32 se_fmaxreq_sz; - u32 se_fmaxresp_sz; - u32 se_fmaxresp_cached; - u32 se_fmaxops; - u32 se_fnumslots; + struct nfsd4_channel_attrs se_fchannel; + struct nfsd4_channel_attrs se_bchannel; struct nfsd4_slot se_slots[]; /* forward channel slots */ }; @@@ -184,7 -187,7 +186,7 @@@ struct nfs4_client struct svc_cred cl_cred; /* setclientid principal */ clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ - struct nfs4_callback cl_callback; /* callback info */ + struct nfs4_cb_conn cl_cb_conn; /* callback info */ atomic_t cl_count; /* ref count */ u32 cl_firststate; /* recovery dir creation */ diff --combined include/linux/sunrpc/svcsock.h index 6bb1ec4ae310,827163138949..04dba23c59f2 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@@ -38,12 -38,13 +38,15 @@@ int svc_recv(struct svc_rqst *, long) int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); void svc_sock_update_bufs(struct svc_serv *serv); - int svc_sock_names(char *buf, struct svc_serv *serv, char *toclose); - int svc_addsock(struct svc_serv *serv, int fd, char *name_return); + int svc_sock_names(struct svc_serv *serv, char *buf, + const size_t buflen, + const char *toclose); + int svc_addsock(struct svc_serv *serv, const int fd, + char *name_return, const size_t len); void svc_init_xprt_sock(void); void svc_cleanup_xprt_sock(void); +struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot); +void svc_sock_destroy(struct svc_xprt *); /* * svc_makesock socket characteristics diff --combined net/sunrpc/svcsock.c index a2a03e500533,b09c80c56ee3..23128ee191ae --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@@ -240,42 -240,76 +240,76 @@@ out /* * Report socket names for nfsdfs */ - static int one_sock_name(char *buf, struct svc_sock *svsk) + static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) { + const struct sock *sk = svsk->sk_sk; + const char *proto_name = sk->sk_protocol == IPPROTO_UDP ? + "udp" : "tcp"; int len; - switch(svsk->sk_sk->sk_family) { - case AF_INET: - len = sprintf(buf, "ipv4 %s %pI4 %d\n", - svsk->sk_sk->sk_protocol == IPPROTO_UDP ? - "udp" : "tcp", - &inet_sk(svsk->sk_sk)->rcv_saddr, - inet_sk(svsk->sk_sk)->num); + switch (sk->sk_family) { + case PF_INET: + len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n", + proto_name, + &inet_sk(sk)->rcv_saddr, + inet_sk(sk)->num); + break; + case PF_INET6: + len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n", + proto_name, + &inet6_sk(sk)->rcv_saddr, + inet_sk(sk)->num); break; default: - len = sprintf(buf, "*unknown-%d*\n", - svsk->sk_sk->sk_family); + len = snprintf(buf, remaining, "*unknown-%d*\n", + sk->sk_family); + } + + if (len >= remaining) { + *buf = '\0'; + return -ENAMETOOLONG; } return len; } - int - svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) + /** + * svc_sock_names - construct a list of listener names in a string + * @serv: pointer to RPC service + * @buf: pointer to a buffer to fill in with socket names + * @buflen: size of the buffer to be filled + * @toclose: pointer to '\0'-terminated C string containing the name + * of a listener to be closed + * + * Fills in @buf with a '\n'-separated list of names of listener + * sockets. If @toclose is not NULL, the socket named by @toclose + * is closed, and is not included in the output list. + * + * Returns positive length of the socket name string, or a negative + * errno value on error. + */ + int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen, + const char *toclose) { struct svc_sock *svsk, *closesk = NULL; int len = 0; if (!serv) return 0; + spin_lock_bh(&serv->sv_lock); list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) { - int onelen = one_sock_name(buf+len, svsk); - if (toclose && strcmp(toclose, buf+len) == 0) + int onelen = svc_one_sock_name(svsk, buf + len, buflen - len); + if (onelen < 0) { + len = onelen; + break; + } + if (toclose && strcmp(toclose, buf + len) == 0) closesk = svsk; else len += onelen; } spin_unlock_bh(&serv->sv_lock); + if (closesk) /* Should unregister with portmap, but you cannot * unregister just one protocol... @@@ -346,6 -380,7 +380,7 @@@ static void svc_sock_setbufsize(struct sock->sk->sk_sndbuf = snd * 2; sock->sk->sk_rcvbuf = rcv * 2; sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; + sock->sk->sk_write_space(sock->sk); release_sock(sock->sk); #endif } @@@ -387,6 -422,15 +422,15 @@@ static void svc_write_space(struct soc } } + static void svc_tcp_write_space(struct sock *sk) + { + struct socket *sock = sk->sk_socket; + + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) + clear_bit(SOCK_NOSPACE, &sock->flags); + svc_write_space(sk); + } + /* * Copy the UDP datagram's destination address to the rqstp structure. * The 'destination' address in this case is the address to which the @@@ -427,13 -471,14 +471,14 @@@ static int svc_udp_recvfrom(struct svc_ long all[SVC_PKTINFO_SPACE / sizeof(long)]; } buffer; struct cmsghdr *cmh = &buffer.hdr; - int err, len; struct msghdr msg = { .msg_name = svc_addr(rqstp), .msg_control = cmh, .msg_controllen = sizeof(buffer), .msg_flags = MSG_DONTWAIT, }; + size_t len; + int err; if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) /* udp sockets need large rcvbuf as all pending @@@ -465,8 -510,8 +510,8 @@@ return -EAGAIN; } len = svc_addr_len(svc_addr(rqstp)); - if (len < 0) - return len; + if (len == 0) + return -EAFNOSUPPORT; rqstp->rq_addrlen = len; if (skb->tstamp.tv64 == 0) { skb->tstamp = ktime_get_real(); @@@ -980,25 -1025,16 +1025,16 @@@ static void svc_tcp_prep_reply_hdr(stru static int svc_tcp_has_wspace(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - struct svc_serv *serv = svsk->sk_xprt.xpt_server; + struct svc_serv *serv = svsk->sk_xprt.xpt_server; int required; - int wspace; - /* - * Set the SOCK_NOSPACE flag before checking the available - * sock space. - */ + if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) + return 1; + required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg; + if (sk_stream_wspace(svsk->sk_sk) >= required) + return 1; set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); - required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg; - wspace = sk_stream_wspace(svsk->sk_sk); - - if (wspace < sk_stream_min_wspace(svsk->sk_sk)) - return 0; - if (required * 2 > wspace) - return 0; - - clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); - return 1; + return 0; } static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, @@@ -1054,7 -1090,7 +1090,7 @@@ static void svc_tcp_init(struct svc_soc dprintk("setting up TCP socket for reading\n"); sk->sk_state_change = svc_tcp_state_change; sk->sk_data_ready = svc_tcp_data_ready; - sk->sk_write_space = svc_write_space; + sk->sk_write_space = svc_tcp_write_space; svsk->sk_reclen = 0; svsk->sk_tcplen = 0; @@@ -1148,9 -1184,19 +1184,19 @@@ static struct svc_sock *svc_setup_socke return svsk; } - int svc_addsock(struct svc_serv *serv, - int fd, - char *name_return) + /** + * svc_addsock - add a listener socket to an RPC service + * @serv: pointer to RPC service to which to add a new listener + * @fd: file descriptor of the new listener + * @name_return: pointer to buffer to fill in with name of listener + * @len: size of the buffer + * + * Fills in socket name and returns positive length of name if successful. + * Name is terminated with '\n'. On error, returns a negative errno + * value. + */ + int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, + const size_t len) { int err = 0; struct socket *so = sockfd_lookup(fd, &err); @@@ -1190,7 -1236,7 +1236,7 @@@ sockfd_put(so); return err; } - return one_sock_name(name_return, svsk); + return svc_one_sock_name(svsk, name_return, len); } EXPORT_SYMBOL_GPL(svc_addsock); @@@ -1327,42 -1373,3 +1373,42 @@@ static void svc_sock_free(struct svc_xp sock_release(svsk->sk_sock); kfree(svsk); } + +/* + * Create a svc_xprt. + * + * For internal use only (e.g. nfsv4.1 backchannel). + * Callers should typically use the xpo_create() method. + */ +struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot) +{ + struct svc_sock *svsk; + struct svc_xprt *xprt = NULL; + + dprintk("svc: %s\n", __func__); + svsk = kzalloc(sizeof(*svsk), GFP_KERNEL); + if (!svsk) + goto out; + + xprt = &svsk->sk_xprt; + if (prot == IPPROTO_TCP) + svc_xprt_init(&svc_tcp_class, xprt, serv); + else if (prot == IPPROTO_UDP) + svc_xprt_init(&svc_udp_class, xprt, serv); + else + BUG(); +out: + dprintk("svc: %s return %p\n", __func__, xprt); + return xprt; +} +EXPORT_SYMBOL_GPL(svc_sock_create); + +/* + * Destroy a svc_sock. + */ +void svc_sock_destroy(struct svc_xprt *xprt) +{ + if (xprt) + kfree(container_of(xprt, struct svc_sock, sk_xprt)); +} +EXPORT_SYMBOL_GPL(svc_sock_destroy);