2 FUSE: Filesystem in Userspace
5 This program can be distributed under the terms of the GNU GPL.
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/sched.h>
14 #include <linux/namei.h>
15 #include <linux/slab.h>
16 #include <linux/xattr.h>
17 #include <linux/iversion.h>
18 #include <linux/posix_acl.h>
20 static void fuse_advise_use_readdirplus(struct inode *dir)
22 struct fuse_inode *fi = get_fuse_inode(dir);
24 set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
27 #if BITS_PER_LONG >= 64
28 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
30 entry->d_fsdata = (void *) time;
33 static inline u64 fuse_dentry_time(const struct dentry *entry)
35 return (u64)entry->d_fsdata;
44 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
46 ((union fuse_dentry *) dentry->d_fsdata)->time = time;
49 static inline u64 fuse_dentry_time(const struct dentry *entry)
51 return ((union fuse_dentry *) entry->d_fsdata)->time;
55 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
57 struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
58 bool delete = !time && fc->delete_stale;
60 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
61 * Don't care about races, either way it's just an optimization
63 if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
64 (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
65 spin_lock(&dentry->d_lock);
67 dentry->d_flags &= ~DCACHE_OP_DELETE;
69 dentry->d_flags |= DCACHE_OP_DELETE;
70 spin_unlock(&dentry->d_lock);
73 __fuse_dentry_settime(dentry, time);
77 * FUSE caches dentries and attributes with separate timeout. The
78 * time in jiffies until the dentry/attributes are valid is stored in
79 * dentry->d_fsdata and fuse_inode->i_time respectively.
83 * Calculate the time in jiffies until a dentry/attributes are valid
85 static u64 time_to_jiffies(u64 sec, u32 nsec)
88 struct timespec64 ts = {
90 min_t(u32, nsec, NSEC_PER_SEC - 1)
93 return get_jiffies_64() + timespec64_to_jiffies(&ts);
99 * Set dentry and possibly attribute timeouts from the lookup/mk*
102 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
104 fuse_dentry_settime(entry,
105 time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
108 static u64 attr_timeout(struct fuse_attr_out *o)
110 return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
113 u64 entry_attr_timeout(struct fuse_entry_out *o)
115 return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
118 static void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
120 set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
124 * Mark the attributes as stale, so that at the next call to
125 * ->getattr() they will be fetched from userspace
127 void fuse_invalidate_attr(struct inode *inode)
129 fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
132 static void fuse_dir_changed(struct inode *dir)
134 fuse_invalidate_attr(dir);
135 inode_maybe_inc_iversion(dir, false);
139 * Mark the attributes as stale due to an atime change. Avoid the invalidate if
142 void fuse_invalidate_atime(struct inode *inode)
144 if (!IS_RDONLY(inode))
145 fuse_invalidate_attr_mask(inode, STATX_ATIME);
149 * Just mark the entry as stale, so that a next attempt to look it up
150 * will result in a new lookup call to userspace
152 * This is called when a dentry is about to become negative and the
153 * timeout is unknown (unlink, rmdir, rename and in some cases
156 void fuse_invalidate_entry_cache(struct dentry *entry)
158 fuse_dentry_settime(entry, 0);
162 * Same as fuse_invalidate_entry_cache(), but also try to remove the
163 * dentry from the hash
165 static void fuse_invalidate_entry(struct dentry *entry)
168 fuse_invalidate_entry_cache(entry);
171 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
172 u64 nodeid, const struct qstr *name,
173 struct fuse_entry_out *outarg)
175 memset(outarg, 0, sizeof(struct fuse_entry_out));
176 args->opcode = FUSE_LOOKUP;
177 args->nodeid = nodeid;
178 args->in_numargs = 1;
179 args->in_args[0].size = name->len + 1;
180 args->in_args[0].value = name->name;
181 args->out_numargs = 1;
182 args->out_args[0].size = sizeof(struct fuse_entry_out);
183 args->out_args[0].value = outarg;
187 * Check whether the dentry is still valid
189 * If the entry validity timeout has expired and the dentry is
190 * positive, try to redo the lookup. If the lookup results in a
191 * different inode, then let the VFS invalidate the dentry and redo
192 * the lookup once more. If the lookup results in the same inode,
193 * then refresh the attributes, timeouts and mark the dentry valid.
195 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
198 struct dentry *parent;
199 struct fuse_conn *fc;
200 struct fuse_inode *fi;
203 inode = d_inode_rcu(entry);
204 if (inode && is_bad_inode(inode))
206 else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
207 (flags & LOOKUP_REVAL)) {
208 struct fuse_entry_out outarg;
210 struct fuse_forget_link *forget;
213 /* For negative dentries, always do a fresh lookup */
218 if (flags & LOOKUP_RCU)
221 fc = get_fuse_conn(inode);
223 forget = fuse_alloc_forget();
228 attr_version = fuse_get_attr_version(fc);
230 parent = dget_parent(entry);
231 fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)),
232 &entry->d_name, &outarg);
233 ret = fuse_simple_request(fc, &args);
235 /* Zero nodeid is same as -ENOENT */
236 if (!ret && !outarg.nodeid)
239 fi = get_fuse_inode(inode);
240 if (outarg.nodeid != get_node_id(inode)) {
241 fuse_queue_forget(fc, forget, outarg.nodeid, 1);
244 spin_lock(&fi->lock);
246 spin_unlock(&fi->lock);
251 if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
254 forget_all_cached_acls(inode);
255 fuse_change_attributes(inode, &outarg.attr,
256 entry_attr_timeout(&outarg),
258 fuse_change_entry_timeout(entry, &outarg);
260 fi = get_fuse_inode(inode);
261 if (flags & LOOKUP_RCU) {
262 if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
264 } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
265 parent = dget_parent(entry);
266 fuse_advise_use_readdirplus(d_inode(parent));
279 #if BITS_PER_LONG < 64
280 static int fuse_dentry_init(struct dentry *dentry)
282 dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
283 GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
285 return dentry->d_fsdata ? 0 : -ENOMEM;
287 static void fuse_dentry_release(struct dentry *dentry)
289 union fuse_dentry *fd = dentry->d_fsdata;
295 static int fuse_dentry_delete(const struct dentry *dentry)
297 return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
300 const struct dentry_operations fuse_dentry_operations = {
301 .d_revalidate = fuse_dentry_revalidate,
302 .d_delete = fuse_dentry_delete,
303 #if BITS_PER_LONG < 64
304 .d_init = fuse_dentry_init,
305 .d_release = fuse_dentry_release,
309 const struct dentry_operations fuse_root_dentry_operations = {
310 #if BITS_PER_LONG < 64
311 .d_init = fuse_dentry_init,
312 .d_release = fuse_dentry_release,
316 int fuse_valid_type(int m)
318 return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
319 S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
322 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
323 struct fuse_entry_out *outarg, struct inode **inode)
325 struct fuse_conn *fc = get_fuse_conn_super(sb);
327 struct fuse_forget_link *forget;
333 if (name->len > FUSE_NAME_MAX)
337 forget = fuse_alloc_forget();
342 attr_version = fuse_get_attr_version(fc);
344 fuse_lookup_init(fc, &args, nodeid, name, outarg);
345 err = fuse_simple_request(fc, &args);
346 /* Zero nodeid is same as -ENOENT, but with valid timeout */
347 if (err || !outarg->nodeid)
353 if (!fuse_valid_type(outarg->attr.mode))
356 *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
357 &outarg->attr, entry_attr_timeout(outarg),
361 fuse_queue_forget(fc, forget, outarg->nodeid, 1);
372 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
376 struct fuse_entry_out outarg;
378 struct dentry *newent;
379 bool outarg_valid = true;
382 locked = fuse_lock_inode(dir);
383 err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
385 fuse_unlock_inode(dir, locked);
386 if (err == -ENOENT) {
387 outarg_valid = false;
394 if (inode && get_node_id(inode) == FUSE_ROOT_ID)
397 newent = d_splice_alias(inode, entry);
398 err = PTR_ERR(newent);
402 entry = newent ? newent : entry;
404 fuse_change_entry_timeout(entry, &outarg);
406 fuse_invalidate_entry_cache(entry);
408 fuse_advise_use_readdirplus(dir);
418 * Atomic create+open operation
420 * If the filesystem doesn't support this, then fall back to separate
421 * 'mknod' + 'open' requests.
423 static int fuse_create_open(struct inode *dir, struct dentry *entry,
424 struct file *file, unsigned flags,
429 struct fuse_conn *fc = get_fuse_conn(dir);
431 struct fuse_forget_link *forget;
432 struct fuse_create_in inarg;
433 struct fuse_open_out outopen;
434 struct fuse_entry_out outentry;
435 struct fuse_inode *fi;
436 struct fuse_file *ff;
438 /* Userspace expects S_IFREG in create mode */
439 BUG_ON((mode & S_IFMT) != S_IFREG);
441 forget = fuse_alloc_forget();
447 ff = fuse_file_alloc(fc);
449 goto out_put_forget_req;
452 mode &= ~current_umask();
455 memset(&inarg, 0, sizeof(inarg));
456 memset(&outentry, 0, sizeof(outentry));
459 inarg.umask = current_umask();
460 args.opcode = FUSE_CREATE;
461 args.nodeid = get_node_id(dir);
463 args.in_args[0].size = sizeof(inarg);
464 args.in_args[0].value = &inarg;
465 args.in_args[1].size = entry->d_name.len + 1;
466 args.in_args[1].value = entry->d_name.name;
467 args.out_numargs = 2;
468 args.out_args[0].size = sizeof(outentry);
469 args.out_args[0].value = &outentry;
470 args.out_args[1].size = sizeof(outopen);
471 args.out_args[1].value = &outopen;
472 err = fuse_simple_request(fc, &args);
477 if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
481 ff->nodeid = outentry.nodeid;
482 ff->open_flags = outopen.open_flags;
483 inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
484 &outentry.attr, entry_attr_timeout(&outentry), 0);
486 flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
487 fuse_sync_release(NULL, ff, flags);
488 fuse_queue_forget(fc, forget, outentry.nodeid, 1);
493 d_instantiate(entry, inode);
494 fuse_change_entry_timeout(entry, &outentry);
495 fuse_dir_changed(dir);
496 err = finish_open(file, entry, generic_file_open);
498 fi = get_fuse_inode(inode);
499 fuse_sync_release(fi, ff, flags);
501 file->private_data = ff;
502 fuse_finish_open(inode, file);
514 static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
515 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
516 struct file *file, unsigned flags,
520 struct fuse_conn *fc = get_fuse_conn(dir);
521 struct dentry *res = NULL;
523 if (d_in_lookup(entry)) {
524 res = fuse_lookup(dir, entry, 0);
532 if (!(flags & O_CREAT) || d_really_is_positive(entry))
536 file->f_mode |= FMODE_CREATED;
541 err = fuse_create_open(dir, entry, file, flags, mode);
542 if (err == -ENOSYS) {
551 err = fuse_mknod(dir, entry, mode, 0);
555 return finish_no_open(file, res);
559 * Code shared between mknod, mkdir, symlink and link
561 static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
562 struct inode *dir, struct dentry *entry,
565 struct fuse_entry_out outarg;
569 struct fuse_forget_link *forget;
571 forget = fuse_alloc_forget();
575 memset(&outarg, 0, sizeof(outarg));
576 args->nodeid = get_node_id(dir);
577 args->out_numargs = 1;
578 args->out_args[0].size = sizeof(outarg);
579 args->out_args[0].value = &outarg;
580 err = fuse_simple_request(fc, args);
582 goto out_put_forget_req;
585 if (invalid_nodeid(outarg.nodeid))
586 goto out_put_forget_req;
588 if ((outarg.attr.mode ^ mode) & S_IFMT)
589 goto out_put_forget_req;
591 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
592 &outarg.attr, entry_attr_timeout(&outarg), 0);
594 fuse_queue_forget(fc, forget, outarg.nodeid, 1);
600 d = d_splice_alias(inode, entry);
605 fuse_change_entry_timeout(d, &outarg);
608 fuse_change_entry_timeout(entry, &outarg);
610 fuse_dir_changed(dir);
618 static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
621 struct fuse_mknod_in inarg;
622 struct fuse_conn *fc = get_fuse_conn(dir);
626 mode &= ~current_umask();
628 memset(&inarg, 0, sizeof(inarg));
630 inarg.rdev = new_encode_dev(rdev);
631 inarg.umask = current_umask();
632 args.opcode = FUSE_MKNOD;
634 args.in_args[0].size = sizeof(inarg);
635 args.in_args[0].value = &inarg;
636 args.in_args[1].size = entry->d_name.len + 1;
637 args.in_args[1].value = entry->d_name.name;
638 return create_new_entry(fc, &args, dir, entry, mode);
641 static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
644 return fuse_mknod(dir, entry, mode, 0);
647 static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
649 struct fuse_mkdir_in inarg;
650 struct fuse_conn *fc = get_fuse_conn(dir);
654 mode &= ~current_umask();
656 memset(&inarg, 0, sizeof(inarg));
658 inarg.umask = current_umask();
659 args.opcode = FUSE_MKDIR;
661 args.in_args[0].size = sizeof(inarg);
662 args.in_args[0].value = &inarg;
663 args.in_args[1].size = entry->d_name.len + 1;
664 args.in_args[1].value = entry->d_name.name;
665 return create_new_entry(fc, &args, dir, entry, S_IFDIR);
668 static int fuse_symlink(struct inode *dir, struct dentry *entry,
671 struct fuse_conn *fc = get_fuse_conn(dir);
672 unsigned len = strlen(link) + 1;
675 args.opcode = FUSE_SYMLINK;
677 args.in_args[0].size = entry->d_name.len + 1;
678 args.in_args[0].value = entry->d_name.name;
679 args.in_args[1].size = len;
680 args.in_args[1].value = link;
681 return create_new_entry(fc, &args, dir, entry, S_IFLNK);
684 void fuse_update_ctime(struct inode *inode)
686 if (!IS_NOCMTIME(inode)) {
687 inode->i_ctime = current_time(inode);
688 mark_inode_dirty_sync(inode);
692 static int fuse_unlink(struct inode *dir, struct dentry *entry)
695 struct fuse_conn *fc = get_fuse_conn(dir);
698 args.opcode = FUSE_UNLINK;
699 args.nodeid = get_node_id(dir);
701 args.in_args[0].size = entry->d_name.len + 1;
702 args.in_args[0].value = entry->d_name.name;
703 err = fuse_simple_request(fc, &args);
705 struct inode *inode = d_inode(entry);
706 struct fuse_inode *fi = get_fuse_inode(inode);
708 spin_lock(&fi->lock);
709 fi->attr_version = atomic64_inc_return(&fc->attr_version);
711 * If i_nlink == 0 then unlink doesn't make sense, yet this can
712 * happen if userspace filesystem is careless. It would be
713 * difficult to enforce correct nlink usage so just ignore this
716 if (inode->i_nlink > 0)
718 spin_unlock(&fi->lock);
719 fuse_invalidate_attr(inode);
720 fuse_dir_changed(dir);
721 fuse_invalidate_entry_cache(entry);
722 fuse_update_ctime(inode);
723 } else if (err == -EINTR)
724 fuse_invalidate_entry(entry);
728 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
731 struct fuse_conn *fc = get_fuse_conn(dir);
734 args.opcode = FUSE_RMDIR;
735 args.nodeid = get_node_id(dir);
737 args.in_args[0].size = entry->d_name.len + 1;
738 args.in_args[0].value = entry->d_name.name;
739 err = fuse_simple_request(fc, &args);
741 clear_nlink(d_inode(entry));
742 fuse_dir_changed(dir);
743 fuse_invalidate_entry_cache(entry);
744 } else if (err == -EINTR)
745 fuse_invalidate_entry(entry);
749 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
750 struct inode *newdir, struct dentry *newent,
751 unsigned int flags, int opcode, size_t argsize)
754 struct fuse_rename2_in inarg;
755 struct fuse_conn *fc = get_fuse_conn(olddir);
758 memset(&inarg, 0, argsize);
759 inarg.newdir = get_node_id(newdir);
761 args.opcode = opcode;
762 args.nodeid = get_node_id(olddir);
764 args.in_args[0].size = argsize;
765 args.in_args[0].value = &inarg;
766 args.in_args[1].size = oldent->d_name.len + 1;
767 args.in_args[1].value = oldent->d_name.name;
768 args.in_args[2].size = newent->d_name.len + 1;
769 args.in_args[2].value = newent->d_name.name;
770 err = fuse_simple_request(fc, &args);
773 fuse_invalidate_attr(d_inode(oldent));
774 fuse_update_ctime(d_inode(oldent));
776 if (flags & RENAME_EXCHANGE) {
777 fuse_invalidate_attr(d_inode(newent));
778 fuse_update_ctime(d_inode(newent));
781 fuse_dir_changed(olddir);
782 if (olddir != newdir)
783 fuse_dir_changed(newdir);
785 /* newent will end up negative */
786 if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
787 fuse_invalidate_attr(d_inode(newent));
788 fuse_invalidate_entry_cache(newent);
789 fuse_update_ctime(d_inode(newent));
791 } else if (err == -EINTR) {
792 /* If request was interrupted, DEITY only knows if the
793 rename actually took place. If the invalidation
794 fails (e.g. some process has CWD under the renamed
795 directory), then there can be inconsistency between
796 the dcache and the real filesystem. Tough luck. */
797 fuse_invalidate_entry(oldent);
798 if (d_really_is_positive(newent))
799 fuse_invalidate_entry(newent);
805 static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
806 struct inode *newdir, struct dentry *newent,
809 struct fuse_conn *fc = get_fuse_conn(olddir);
812 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
816 if (fc->no_rename2 || fc->minor < 23)
819 err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
821 sizeof(struct fuse_rename2_in));
822 if (err == -ENOSYS) {
827 err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
829 sizeof(struct fuse_rename_in));
835 static int fuse_link(struct dentry *entry, struct inode *newdir,
836 struct dentry *newent)
839 struct fuse_link_in inarg;
840 struct inode *inode = d_inode(entry);
841 struct fuse_conn *fc = get_fuse_conn(inode);
844 memset(&inarg, 0, sizeof(inarg));
845 inarg.oldnodeid = get_node_id(inode);
846 args.opcode = FUSE_LINK;
848 args.in_args[0].size = sizeof(inarg);
849 args.in_args[0].value = &inarg;
850 args.in_args[1].size = newent->d_name.len + 1;
851 args.in_args[1].value = newent->d_name.name;
852 err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
853 /* Contrary to "normal" filesystems it can happen that link
854 makes two "logical" inodes point to the same "physical"
855 inode. We invalidate the attributes of the old one, so it
856 will reflect changes in the backing inode (link count,
860 struct fuse_inode *fi = get_fuse_inode(inode);
862 spin_lock(&fi->lock);
863 fi->attr_version = atomic64_inc_return(&fc->attr_version);
865 spin_unlock(&fi->lock);
866 fuse_invalidate_attr(inode);
867 fuse_update_ctime(inode);
868 } else if (err == -EINTR) {
869 fuse_invalidate_attr(inode);
874 static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
877 unsigned int blkbits;
878 struct fuse_conn *fc = get_fuse_conn(inode);
880 /* see the comment in fuse_change_attributes() */
881 if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
882 attr->size = i_size_read(inode);
883 attr->mtime = inode->i_mtime.tv_sec;
884 attr->mtimensec = inode->i_mtime.tv_nsec;
885 attr->ctime = inode->i_ctime.tv_sec;
886 attr->ctimensec = inode->i_ctime.tv_nsec;
889 stat->dev = inode->i_sb->s_dev;
890 stat->ino = attr->ino;
891 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
892 stat->nlink = attr->nlink;
893 stat->uid = make_kuid(fc->user_ns, attr->uid);
894 stat->gid = make_kgid(fc->user_ns, attr->gid);
895 stat->rdev = inode->i_rdev;
896 stat->atime.tv_sec = attr->atime;
897 stat->atime.tv_nsec = attr->atimensec;
898 stat->mtime.tv_sec = attr->mtime;
899 stat->mtime.tv_nsec = attr->mtimensec;
900 stat->ctime.tv_sec = attr->ctime;
901 stat->ctime.tv_nsec = attr->ctimensec;
902 stat->size = attr->size;
903 stat->blocks = attr->blocks;
905 if (attr->blksize != 0)
906 blkbits = ilog2(attr->blksize);
908 blkbits = inode->i_sb->s_blocksize_bits;
910 stat->blksize = 1 << blkbits;
913 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
917 struct fuse_getattr_in inarg;
918 struct fuse_attr_out outarg;
919 struct fuse_conn *fc = get_fuse_conn(inode);
923 attr_version = fuse_get_attr_version(fc);
925 memset(&inarg, 0, sizeof(inarg));
926 memset(&outarg, 0, sizeof(outarg));
927 /* Directories have separate file-handle space */
928 if (file && S_ISREG(inode->i_mode)) {
929 struct fuse_file *ff = file->private_data;
931 inarg.getattr_flags |= FUSE_GETATTR_FH;
934 args.opcode = FUSE_GETATTR;
935 args.nodeid = get_node_id(inode);
937 args.in_args[0].size = sizeof(inarg);
938 args.in_args[0].value = &inarg;
939 args.out_numargs = 1;
940 args.out_args[0].size = sizeof(outarg);
941 args.out_args[0].value = &outarg;
942 err = fuse_simple_request(fc, &args);
944 if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
945 make_bad_inode(inode);
948 fuse_change_attributes(inode, &outarg.attr,
949 attr_timeout(&outarg),
952 fuse_fillattr(inode, &outarg.attr, stat);
958 static int fuse_update_get_attr(struct inode *inode, struct file *file,
959 struct kstat *stat, u32 request_mask,
962 struct fuse_inode *fi = get_fuse_inode(inode);
966 if (flags & AT_STATX_FORCE_SYNC)
968 else if (flags & AT_STATX_DONT_SYNC)
970 else if (request_mask & READ_ONCE(fi->inval_mask))
973 sync = time_before64(fi->i_time, get_jiffies_64());
976 forget_all_cached_acls(inode);
977 err = fuse_do_getattr(inode, stat, file);
979 generic_fillattr(inode, stat);
980 stat->mode = fi->orig_i_mode;
981 stat->ino = fi->orig_ino;
987 int fuse_update_attributes(struct inode *inode, struct file *file)
989 /* Do *not* need to get atime for internal purposes */
990 return fuse_update_get_attr(inode, file, NULL,
991 STATX_BASIC_STATS & ~STATX_ATIME, 0);
994 int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
995 u64 child_nodeid, struct qstr *name)
998 struct inode *parent;
1000 struct dentry *entry;
1002 parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
1007 if (!S_ISDIR(parent->i_mode))
1011 dir = d_find_alias(parent);
1015 name->hash = full_name_hash(dir, name->name, name->len);
1016 entry = d_lookup(dir, name);
1021 fuse_dir_changed(parent);
1022 fuse_invalidate_entry(entry);
1024 if (child_nodeid != 0 && d_really_is_positive(entry)) {
1025 inode_lock(d_inode(entry));
1026 if (get_node_id(d_inode(entry)) != child_nodeid) {
1030 if (d_mountpoint(entry)) {
1034 if (d_is_dir(entry)) {
1035 shrink_dcache_parent(entry);
1036 if (!simple_empty(entry)) {
1040 d_inode(entry)->i_flags |= S_DEAD;
1043 clear_nlink(d_inode(entry));
1046 inode_unlock(d_inode(entry));
1055 inode_unlock(parent);
1061 * Calling into a user-controlled filesystem gives the filesystem
1062 * daemon ptrace-like capabilities over the current process. This
1063 * means, that the filesystem daemon is able to record the exact
1064 * filesystem operations performed, and can also control the behavior
1065 * of the requester process in otherwise impossible ways. For example
1066 * it can delay the operation for arbitrary length of time allowing
1067 * DoS against the requester.
1069 * For this reason only those processes can call into the filesystem,
1070 * for which the owner of the mount has ptrace privilege. This
1071 * excludes processes started by other users, suid or sgid processes.
1073 int fuse_allow_current_process(struct fuse_conn *fc)
1075 const struct cred *cred;
1077 if (fc->allow_other)
1078 return current_in_userns(fc->user_ns);
1080 cred = current_cred();
1081 if (uid_eq(cred->euid, fc->user_id) &&
1082 uid_eq(cred->suid, fc->user_id) &&
1083 uid_eq(cred->uid, fc->user_id) &&
1084 gid_eq(cred->egid, fc->group_id) &&
1085 gid_eq(cred->sgid, fc->group_id) &&
1086 gid_eq(cred->gid, fc->group_id))
1092 static int fuse_access(struct inode *inode, int mask)
1094 struct fuse_conn *fc = get_fuse_conn(inode);
1096 struct fuse_access_in inarg;
1099 BUG_ON(mask & MAY_NOT_BLOCK);
1104 memset(&inarg, 0, sizeof(inarg));
1105 inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1106 args.opcode = FUSE_ACCESS;
1107 args.nodeid = get_node_id(inode);
1108 args.in_numargs = 1;
1109 args.in_args[0].size = sizeof(inarg);
1110 args.in_args[0].value = &inarg;
1111 err = fuse_simple_request(fc, &args);
1112 if (err == -ENOSYS) {
1119 static int fuse_perm_getattr(struct inode *inode, int mask)
1121 if (mask & MAY_NOT_BLOCK)
1124 forget_all_cached_acls(inode);
1125 return fuse_do_getattr(inode, NULL, NULL);
1129 * Check permission. The two basic access models of FUSE are:
1131 * 1) Local access checking ('default_permissions' mount option) based
1132 * on file mode. This is the plain old disk filesystem permission
1135 * 2) "Remote" access checking, where server is responsible for
1136 * checking permission in each inode operation. An exception to this
1137 * is if ->permission() was invoked from sys_access() in which case an
1138 * access request is sent. Execute permission is still checked
1139 * locally based on file mode.
1141 static int fuse_permission(struct inode *inode, int mask)
1143 struct fuse_conn *fc = get_fuse_conn(inode);
1144 bool refreshed = false;
1147 if (!fuse_allow_current_process(fc))
1151 * If attributes are needed, refresh them before proceeding
1153 if (fc->default_permissions ||
1154 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1155 struct fuse_inode *fi = get_fuse_inode(inode);
1156 u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1158 if (perm_mask & READ_ONCE(fi->inval_mask) ||
1159 time_before64(fi->i_time, get_jiffies_64())) {
1162 err = fuse_perm_getattr(inode, mask);
1168 if (fc->default_permissions) {
1169 err = generic_permission(inode, mask);
1171 /* If permission is denied, try to refresh file
1172 attributes. This is also needed, because the root
1173 node will at first have no permissions */
1174 if (err == -EACCES && !refreshed) {
1175 err = fuse_perm_getattr(inode, mask);
1177 err = generic_permission(inode, mask);
1180 /* Note: the opposite of the above test does not
1181 exist. So if permissions are revoked this won't be
1182 noticed immediately, only after the attribute
1183 timeout has expired */
1184 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1185 err = fuse_access(inode, mask);
1186 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1187 if (!(inode->i_mode & S_IXUGO)) {
1191 err = fuse_perm_getattr(inode, mask);
1192 if (!err && !(inode->i_mode & S_IXUGO))
1199 static int fuse_readlink_page(struct inode *inode, struct page *page)
1201 struct fuse_conn *fc = get_fuse_conn(inode);
1202 struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
1203 struct fuse_args_pages ap = {
1211 ap.args.opcode = FUSE_READLINK;
1212 ap.args.nodeid = get_node_id(inode);
1213 ap.args.out_pages = true;
1214 ap.args.out_argvar = true;
1215 ap.args.page_zeroing = true;
1216 ap.args.out_numargs = 1;
1217 ap.args.out_args[0].size = desc.length;
1218 res = fuse_simple_request(fc, &ap.args);
1220 fuse_invalidate_atime(inode);
1225 if (WARN_ON(res >= PAGE_SIZE))
1228 link = page_address(page);
1234 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1235 struct delayed_call *callback)
1237 struct fuse_conn *fc = get_fuse_conn(inode);
1242 if (is_bad_inode(inode))
1245 if (fc->cache_symlinks)
1246 return page_get_link(dentry, inode, callback);
1252 page = alloc_page(GFP_KERNEL);
1257 err = fuse_readlink_page(inode, page);
1263 set_delayed_call(callback, page_put_link, page);
1265 return page_address(page);
1268 return ERR_PTR(err);
1271 static int fuse_dir_open(struct inode *inode, struct file *file)
1273 return fuse_open_common(inode, file, true);
1276 static int fuse_dir_release(struct inode *inode, struct file *file)
1278 fuse_release_common(file, true);
1283 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1286 struct inode *inode = file->f_mapping->host;
1287 struct fuse_conn *fc = get_fuse_conn(inode);
1290 if (is_bad_inode(inode))
1293 if (fc->no_fsyncdir)
1297 err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1298 if (err == -ENOSYS) {
1299 fc->no_fsyncdir = 1;
1302 inode_unlock(inode);
1307 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1310 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1312 /* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1316 return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1319 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1322 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1327 return fuse_ioctl_common(file, cmd, arg,
1328 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1331 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1333 /* Always update if mtime is explicitly set */
1334 if (ivalid & ATTR_MTIME_SET)
1337 /* Or if kernel i_mtime is the official one */
1338 if (trust_local_mtime)
1341 /* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1342 if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1345 /* In all other cases update */
1349 static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
1350 struct fuse_setattr_in *arg, bool trust_local_cmtime)
1352 unsigned ivalid = iattr->ia_valid;
1354 if (ivalid & ATTR_MODE)
1355 arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
1356 if (ivalid & ATTR_UID)
1357 arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
1358 if (ivalid & ATTR_GID)
1359 arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
1360 if (ivalid & ATTR_SIZE)
1361 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
1362 if (ivalid & ATTR_ATIME) {
1363 arg->valid |= FATTR_ATIME;
1364 arg->atime = iattr->ia_atime.tv_sec;
1365 arg->atimensec = iattr->ia_atime.tv_nsec;
1366 if (!(ivalid & ATTR_ATIME_SET))
1367 arg->valid |= FATTR_ATIME_NOW;
1369 if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1370 arg->valid |= FATTR_MTIME;
1371 arg->mtime = iattr->ia_mtime.tv_sec;
1372 arg->mtimensec = iattr->ia_mtime.tv_nsec;
1373 if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1374 arg->valid |= FATTR_MTIME_NOW;
1376 if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1377 arg->valid |= FATTR_CTIME;
1378 arg->ctime = iattr->ia_ctime.tv_sec;
1379 arg->ctimensec = iattr->ia_ctime.tv_nsec;
1384 * Prevent concurrent writepages on inode
1386 * This is done by adding a negative bias to the inode write counter
1387 * and waiting for all pending writes to finish.
1389 void fuse_set_nowrite(struct inode *inode)
1391 struct fuse_inode *fi = get_fuse_inode(inode);
1393 BUG_ON(!inode_is_locked(inode));
1395 spin_lock(&fi->lock);
1396 BUG_ON(fi->writectr < 0);
1397 fi->writectr += FUSE_NOWRITE;
1398 spin_unlock(&fi->lock);
1399 wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1403 * Allow writepages on inode
1405 * Remove the bias from the writecounter and send any queued
1408 static void __fuse_release_nowrite(struct inode *inode)
1410 struct fuse_inode *fi = get_fuse_inode(inode);
1412 BUG_ON(fi->writectr != FUSE_NOWRITE);
1414 fuse_flush_writepages(inode);
1417 void fuse_release_nowrite(struct inode *inode)
1419 struct fuse_inode *fi = get_fuse_inode(inode);
1421 spin_lock(&fi->lock);
1422 __fuse_release_nowrite(inode);
1423 spin_unlock(&fi->lock);
1426 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1427 struct inode *inode,
1428 struct fuse_setattr_in *inarg_p,
1429 struct fuse_attr_out *outarg_p)
1431 args->opcode = FUSE_SETATTR;
1432 args->nodeid = get_node_id(inode);
1433 args->in_numargs = 1;
1434 args->in_args[0].size = sizeof(*inarg_p);
1435 args->in_args[0].value = inarg_p;
1436 args->out_numargs = 1;
1437 args->out_args[0].size = sizeof(*outarg_p);
1438 args->out_args[0].value = outarg_p;
1442 * Flush inode->i_mtime to the server
1444 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1446 struct fuse_conn *fc = get_fuse_conn(inode);
1448 struct fuse_setattr_in inarg;
1449 struct fuse_attr_out outarg;
1451 memset(&inarg, 0, sizeof(inarg));
1452 memset(&outarg, 0, sizeof(outarg));
1454 inarg.valid = FATTR_MTIME;
1455 inarg.mtime = inode->i_mtime.tv_sec;
1456 inarg.mtimensec = inode->i_mtime.tv_nsec;
1457 if (fc->minor >= 23) {
1458 inarg.valid |= FATTR_CTIME;
1459 inarg.ctime = inode->i_ctime.tv_sec;
1460 inarg.ctimensec = inode->i_ctime.tv_nsec;
1463 inarg.valid |= FATTR_FH;
1466 fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1468 return fuse_simple_request(fc, &args);
1472 * Set attributes, and at the same time refresh them.
1474 * Truncation is slightly complicated, because the 'truncate' request
1475 * may fail, in which case we don't want to touch the mapping.
1476 * vmtruncate() doesn't allow for this case, so do the rlimit checking
1477 * and the actual truncation by hand.
1479 int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
1482 struct inode *inode = d_inode(dentry);
1483 struct fuse_conn *fc = get_fuse_conn(inode);
1484 struct fuse_inode *fi = get_fuse_inode(inode);
1486 struct fuse_setattr_in inarg;
1487 struct fuse_attr_out outarg;
1488 bool is_truncate = false;
1489 bool is_wb = fc->writeback_cache;
1492 bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1494 if (!fc->default_permissions)
1495 attr->ia_valid |= ATTR_FORCE;
1497 err = setattr_prepare(dentry, attr);
1501 if (attr->ia_valid & ATTR_OPEN) {
1502 /* This is coming from open(..., ... | O_TRUNC); */
1503 WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1504 WARN_ON(attr->ia_size != 0);
1505 if (fc->atomic_o_trunc) {
1507 * No need to send request to userspace, since actual
1508 * truncation has already been done by OPEN. But still
1509 * need to truncate page cache.
1511 i_size_write(inode, 0);
1512 truncate_pagecache(inode, 0);
1518 if (attr->ia_valid & ATTR_SIZE) {
1519 if (WARN_ON(!S_ISREG(inode->i_mode)))
1525 fuse_set_nowrite(inode);
1526 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1527 if (trust_local_cmtime && attr->ia_size != inode->i_size)
1528 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1531 memset(&inarg, 0, sizeof(inarg));
1532 memset(&outarg, 0, sizeof(outarg));
1533 iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
1535 struct fuse_file *ff = file->private_data;
1536 inarg.valid |= FATTR_FH;
1539 if (attr->ia_valid & ATTR_SIZE) {
1540 /* For mandatory locking in truncate */
1541 inarg.valid |= FATTR_LOCKOWNER;
1542 inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1544 fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1545 err = fuse_simple_request(fc, &args);
1548 fuse_invalidate_attr(inode);
1552 if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1553 make_bad_inode(inode);
1558 spin_lock(&fi->lock);
1559 /* the kernel maintains i_mtime locally */
1560 if (trust_local_cmtime) {
1561 if (attr->ia_valid & ATTR_MTIME)
1562 inode->i_mtime = attr->ia_mtime;
1563 if (attr->ia_valid & ATTR_CTIME)
1564 inode->i_ctime = attr->ia_ctime;
1565 /* FIXME: clear I_DIRTY_SYNC? */
1568 fuse_change_attributes_common(inode, &outarg.attr,
1569 attr_timeout(&outarg));
1570 oldsize = inode->i_size;
1571 /* see the comment in fuse_change_attributes() */
1572 if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
1573 i_size_write(inode, outarg.attr.size);
1576 /* NOTE: this may release/reacquire fi->lock */
1577 __fuse_release_nowrite(inode);
1579 spin_unlock(&fi->lock);
1582 * Only call invalidate_inode_pages2() after removing
1583 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1585 if ((is_truncate || !is_wb) &&
1586 S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1587 truncate_pagecache(inode, outarg.attr.size);
1588 invalidate_inode_pages2(inode->i_mapping);
1591 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1596 fuse_release_nowrite(inode);
1598 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1602 static int fuse_setattr(struct dentry *entry, struct iattr *attr)
1604 struct inode *inode = d_inode(entry);
1605 struct fuse_conn *fc = get_fuse_conn(inode);
1606 struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
1609 if (!fuse_allow_current_process(get_fuse_conn(inode)))
1612 if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
1613 attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
1617 * The only sane way to reliably kill suid/sgid is to do it in
1618 * the userspace filesystem
1620 * This should be done on write(), truncate() and chown().
1622 if (!fc->handle_killpriv) {
1624 * ia_mode calculation may have used stale i_mode.
1625 * Refresh and recalculate.
1627 ret = fuse_do_getattr(inode, NULL, file);
1631 attr->ia_mode = inode->i_mode;
1632 if (inode->i_mode & S_ISUID) {
1633 attr->ia_valid |= ATTR_MODE;
1634 attr->ia_mode &= ~S_ISUID;
1636 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1637 attr->ia_valid |= ATTR_MODE;
1638 attr->ia_mode &= ~S_ISGID;
1642 if (!attr->ia_valid)
1645 ret = fuse_do_setattr(entry, attr, file);
1648 * If filesystem supports acls it may have updated acl xattrs in
1649 * the filesystem, so forget cached acls for the inode.
1652 forget_all_cached_acls(inode);
1654 /* Directory mode changed, may need to revalidate access */
1655 if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
1656 fuse_invalidate_entry_cache(entry);
1661 static int fuse_getattr(const struct path *path, struct kstat *stat,
1662 u32 request_mask, unsigned int flags)
1664 struct inode *inode = d_inode(path->dentry);
1665 struct fuse_conn *fc = get_fuse_conn(inode);
1667 if (!fuse_allow_current_process(fc))
1670 return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
1673 static const struct inode_operations fuse_dir_inode_operations = {
1674 .lookup = fuse_lookup,
1675 .mkdir = fuse_mkdir,
1676 .symlink = fuse_symlink,
1677 .unlink = fuse_unlink,
1678 .rmdir = fuse_rmdir,
1679 .rename = fuse_rename2,
1681 .setattr = fuse_setattr,
1682 .create = fuse_create,
1683 .atomic_open = fuse_atomic_open,
1684 .mknod = fuse_mknod,
1685 .permission = fuse_permission,
1686 .getattr = fuse_getattr,
1687 .listxattr = fuse_listxattr,
1688 .get_acl = fuse_get_acl,
1689 .set_acl = fuse_set_acl,
1692 static const struct file_operations fuse_dir_operations = {
1693 .llseek = generic_file_llseek,
1694 .read = generic_read_dir,
1695 .iterate_shared = fuse_readdir,
1696 .open = fuse_dir_open,
1697 .release = fuse_dir_release,
1698 .fsync = fuse_dir_fsync,
1699 .unlocked_ioctl = fuse_dir_ioctl,
1700 .compat_ioctl = fuse_dir_compat_ioctl,
1703 static const struct inode_operations fuse_common_inode_operations = {
1704 .setattr = fuse_setattr,
1705 .permission = fuse_permission,
1706 .getattr = fuse_getattr,
1707 .listxattr = fuse_listxattr,
1708 .get_acl = fuse_get_acl,
1709 .set_acl = fuse_set_acl,
1712 static const struct inode_operations fuse_symlink_inode_operations = {
1713 .setattr = fuse_setattr,
1714 .get_link = fuse_get_link,
1715 .getattr = fuse_getattr,
1716 .listxattr = fuse_listxattr,
1719 void fuse_init_common(struct inode *inode)
1721 inode->i_op = &fuse_common_inode_operations;
1724 void fuse_init_dir(struct inode *inode)
1726 struct fuse_inode *fi = get_fuse_inode(inode);
1728 inode->i_op = &fuse_dir_inode_operations;
1729 inode->i_fop = &fuse_dir_operations;
1731 spin_lock_init(&fi->rdc.lock);
1732 fi->rdc.cached = false;
1735 fi->rdc.version = 0;
1738 static int fuse_symlink_readpage(struct file *null, struct page *page)
1740 int err = fuse_readlink_page(page->mapping->host, page);
1743 SetPageUptodate(page);
1750 static const struct address_space_operations fuse_symlink_aops = {
1751 .readpage = fuse_symlink_readpage,
1754 void fuse_init_symlink(struct inode *inode)
1756 inode->i_op = &fuse_symlink_inode_operations;
1757 inode->i_data.a_ops = &fuse_symlink_aops;
1758 inode_nohighmem(inode);