2 FUSE: Filesystem in Userspace
5 This program can be distributed under the terms of the GNU GPL.
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/sched.h>
14 #include <linux/namei.h>
15 #include <linux/slab.h>
16 #include <linux/xattr.h>
17 #include <linux/iversion.h>
18 #include <linux/posix_acl.h>
20 static void fuse_advise_use_readdirplus(struct inode *dir)
22 struct fuse_inode *fi = get_fuse_inode(dir);
24 set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
27 #if BITS_PER_LONG >= 64
28 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
30 entry->d_fsdata = (void *) time;
33 static inline u64 fuse_dentry_time(const struct dentry *entry)
35 return (u64)entry->d_fsdata;
44 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
46 ((union fuse_dentry *) dentry->d_fsdata)->time = time;
49 static inline u64 fuse_dentry_time(const struct dentry *entry)
51 return ((union fuse_dentry *) entry->d_fsdata)->time;
55 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
57 struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
58 bool delete = !time && fc->delete_stale;
60 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
61 * Don't care about races, either way it's just an optimization
63 if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
64 (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
65 spin_lock(&dentry->d_lock);
67 dentry->d_flags &= ~DCACHE_OP_DELETE;
69 dentry->d_flags |= DCACHE_OP_DELETE;
70 spin_unlock(&dentry->d_lock);
73 __fuse_dentry_settime(dentry, time);
77 * FUSE caches dentries and attributes with separate timeout. The
78 * time in jiffies until the dentry/attributes are valid is stored in
79 * dentry->d_fsdata and fuse_inode->i_time respectively.
83 * Calculate the time in jiffies until a dentry/attributes are valid
85 static u64 time_to_jiffies(u64 sec, u32 nsec)
88 struct timespec64 ts = {
90 min_t(u32, nsec, NSEC_PER_SEC - 1)
93 return get_jiffies_64() + timespec64_to_jiffies(&ts);
99 * Set dentry and possibly attribute timeouts from the lookup/mk*
102 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
104 fuse_dentry_settime(entry,
105 time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
108 static u64 attr_timeout(struct fuse_attr_out *o)
110 return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
113 u64 entry_attr_timeout(struct fuse_entry_out *o)
115 return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
118 static void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
120 set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
124 * Mark the attributes as stale, so that at the next call to
125 * ->getattr() they will be fetched from userspace
127 void fuse_invalidate_attr(struct inode *inode)
129 fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
132 static void fuse_dir_changed(struct inode *dir)
134 fuse_invalidate_attr(dir);
135 inode_maybe_inc_iversion(dir, false);
139 * Mark the attributes as stale due to an atime change. Avoid the invalidate if
142 void fuse_invalidate_atime(struct inode *inode)
144 if (!IS_RDONLY(inode))
145 fuse_invalidate_attr_mask(inode, STATX_ATIME);
149 * Just mark the entry as stale, so that a next attempt to look it up
150 * will result in a new lookup call to userspace
152 * This is called when a dentry is about to become negative and the
153 * timeout is unknown (unlink, rmdir, rename and in some cases
156 void fuse_invalidate_entry_cache(struct dentry *entry)
158 fuse_dentry_settime(entry, 0);
162 * Same as fuse_invalidate_entry_cache(), but also try to remove the
163 * dentry from the hash
165 static void fuse_invalidate_entry(struct dentry *entry)
168 fuse_invalidate_entry_cache(entry);
171 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
172 u64 nodeid, const struct qstr *name,
173 struct fuse_entry_out *outarg)
175 memset(outarg, 0, sizeof(struct fuse_entry_out));
176 args->opcode = FUSE_LOOKUP;
177 args->nodeid = nodeid;
178 args->in_numargs = 1;
179 args->in_args[0].size = name->len + 1;
180 args->in_args[0].value = name->name;
181 args->out_numargs = 1;
182 args->out_args[0].size = sizeof(struct fuse_entry_out);
183 args->out_args[0].value = outarg;
187 * Check whether the dentry is still valid
189 * If the entry validity timeout has expired and the dentry is
190 * positive, try to redo the lookup. If the lookup results in a
191 * different inode, then let the VFS invalidate the dentry and redo
192 * the lookup once more. If the lookup results in the same inode,
193 * then refresh the attributes, timeouts and mark the dentry valid.
195 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
198 struct dentry *parent;
199 struct fuse_conn *fc;
200 struct fuse_inode *fi;
203 inode = d_inode_rcu(entry);
204 if (inode && is_bad_inode(inode))
206 else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
207 (flags & LOOKUP_REVAL)) {
208 struct fuse_entry_out outarg;
210 struct fuse_forget_link *forget;
213 /* For negative dentries, always do a fresh lookup */
218 if (flags & LOOKUP_RCU)
221 fc = get_fuse_conn(inode);
223 forget = fuse_alloc_forget();
228 attr_version = fuse_get_attr_version(fc);
230 parent = dget_parent(entry);
231 fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)),
232 &entry->d_name, &outarg);
233 ret = fuse_simple_request(fc, &args);
235 /* Zero nodeid is same as -ENOENT */
236 if (!ret && !outarg.nodeid)
239 fi = get_fuse_inode(inode);
240 if (outarg.nodeid != get_node_id(inode)) {
241 fuse_queue_forget(fc, forget, outarg.nodeid, 1);
244 spin_lock(&fi->lock);
246 spin_unlock(&fi->lock);
251 if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
254 forget_all_cached_acls(inode);
255 fuse_change_attributes(inode, &outarg.attr,
256 entry_attr_timeout(&outarg),
258 fuse_change_entry_timeout(entry, &outarg);
260 fi = get_fuse_inode(inode);
261 if (flags & LOOKUP_RCU) {
262 if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
264 } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
265 parent = dget_parent(entry);
266 fuse_advise_use_readdirplus(d_inode(parent));
279 #if BITS_PER_LONG < 64
280 static int fuse_dentry_init(struct dentry *dentry)
282 dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
283 GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
285 return dentry->d_fsdata ? 0 : -ENOMEM;
287 static void fuse_dentry_release(struct dentry *dentry)
289 union fuse_dentry *fd = dentry->d_fsdata;
295 static int fuse_dentry_delete(const struct dentry *dentry)
297 return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
300 const struct dentry_operations fuse_dentry_operations = {
301 .d_revalidate = fuse_dentry_revalidate,
302 .d_delete = fuse_dentry_delete,
303 #if BITS_PER_LONG < 64
304 .d_init = fuse_dentry_init,
305 .d_release = fuse_dentry_release,
309 const struct dentry_operations fuse_root_dentry_operations = {
310 #if BITS_PER_LONG < 64
311 .d_init = fuse_dentry_init,
312 .d_release = fuse_dentry_release,
316 int fuse_valid_type(int m)
318 return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
319 S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
322 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
323 struct fuse_entry_out *outarg, struct inode **inode)
325 struct fuse_conn *fc = get_fuse_conn_super(sb);
327 struct fuse_forget_link *forget;
333 if (name->len > FUSE_NAME_MAX)
337 forget = fuse_alloc_forget();
342 attr_version = fuse_get_attr_version(fc);
344 fuse_lookup_init(fc, &args, nodeid, name, outarg);
345 err = fuse_simple_request(fc, &args);
346 /* Zero nodeid is same as -ENOENT, but with valid timeout */
347 if (err || !outarg->nodeid)
353 if (!fuse_valid_type(outarg->attr.mode))
356 *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
357 &outarg->attr, entry_attr_timeout(outarg),
361 fuse_queue_forget(fc, forget, outarg->nodeid, 1);
372 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
376 struct fuse_entry_out outarg;
378 struct dentry *newent;
379 bool outarg_valid = true;
382 locked = fuse_lock_inode(dir);
383 err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
385 fuse_unlock_inode(dir, locked);
386 if (err == -ENOENT) {
387 outarg_valid = false;
394 if (inode && get_node_id(inode) == FUSE_ROOT_ID)
397 newent = d_splice_alias(inode, entry);
398 err = PTR_ERR(newent);
402 entry = newent ? newent : entry;
404 fuse_change_entry_timeout(entry, &outarg);
406 fuse_invalidate_entry_cache(entry);
409 fuse_advise_use_readdirplus(dir);
419 * Atomic create+open operation
421 * If the filesystem doesn't support this, then fall back to separate
422 * 'mknod' + 'open' requests.
424 static int fuse_create_open(struct inode *dir, struct dentry *entry,
425 struct file *file, unsigned flags,
430 struct fuse_conn *fc = get_fuse_conn(dir);
432 struct fuse_forget_link *forget;
433 struct fuse_create_in inarg;
434 struct fuse_open_out outopen;
435 struct fuse_entry_out outentry;
436 struct fuse_inode *fi;
437 struct fuse_file *ff;
439 /* Userspace expects S_IFREG in create mode */
440 BUG_ON((mode & S_IFMT) != S_IFREG);
442 forget = fuse_alloc_forget();
448 ff = fuse_file_alloc(fc);
450 goto out_put_forget_req;
453 mode &= ~current_umask();
456 memset(&inarg, 0, sizeof(inarg));
457 memset(&outentry, 0, sizeof(outentry));
460 inarg.umask = current_umask();
461 args.opcode = FUSE_CREATE;
462 args.nodeid = get_node_id(dir);
464 args.in_args[0].size = sizeof(inarg);
465 args.in_args[0].value = &inarg;
466 args.in_args[1].size = entry->d_name.len + 1;
467 args.in_args[1].value = entry->d_name.name;
468 args.out_numargs = 2;
469 args.out_args[0].size = sizeof(outentry);
470 args.out_args[0].value = &outentry;
471 args.out_args[1].size = sizeof(outopen);
472 args.out_args[1].value = &outopen;
473 err = fuse_simple_request(fc, &args);
478 if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
482 ff->nodeid = outentry.nodeid;
483 ff->open_flags = outopen.open_flags;
484 inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
485 &outentry.attr, entry_attr_timeout(&outentry), 0);
487 flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
488 fuse_sync_release(NULL, ff, flags);
489 fuse_queue_forget(fc, forget, outentry.nodeid, 1);
494 d_instantiate(entry, inode);
495 fuse_change_entry_timeout(entry, &outentry);
496 fuse_dir_changed(dir);
497 err = finish_open(file, entry, generic_file_open);
499 fi = get_fuse_inode(inode);
500 fuse_sync_release(fi, ff, flags);
502 file->private_data = ff;
503 fuse_finish_open(inode, file);
515 static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
516 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
517 struct file *file, unsigned flags,
521 struct fuse_conn *fc = get_fuse_conn(dir);
522 struct dentry *res = NULL;
524 if (d_in_lookup(entry)) {
525 res = fuse_lookup(dir, entry, 0);
533 if (!(flags & O_CREAT) || d_really_is_positive(entry))
537 file->f_mode |= FMODE_CREATED;
542 err = fuse_create_open(dir, entry, file, flags, mode);
543 if (err == -ENOSYS) {
552 err = fuse_mknod(dir, entry, mode, 0);
556 return finish_no_open(file, res);
560 * Code shared between mknod, mkdir, symlink and link
562 static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
563 struct inode *dir, struct dentry *entry,
566 struct fuse_entry_out outarg;
570 struct fuse_forget_link *forget;
572 forget = fuse_alloc_forget();
576 memset(&outarg, 0, sizeof(outarg));
577 args->nodeid = get_node_id(dir);
578 args->out_numargs = 1;
579 args->out_args[0].size = sizeof(outarg);
580 args->out_args[0].value = &outarg;
581 err = fuse_simple_request(fc, args);
583 goto out_put_forget_req;
586 if (invalid_nodeid(outarg.nodeid))
587 goto out_put_forget_req;
589 if ((outarg.attr.mode ^ mode) & S_IFMT)
590 goto out_put_forget_req;
592 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
593 &outarg.attr, entry_attr_timeout(&outarg), 0);
595 fuse_queue_forget(fc, forget, outarg.nodeid, 1);
601 d = d_splice_alias(inode, entry);
606 fuse_change_entry_timeout(d, &outarg);
609 fuse_change_entry_timeout(entry, &outarg);
611 fuse_dir_changed(dir);
619 static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
622 struct fuse_mknod_in inarg;
623 struct fuse_conn *fc = get_fuse_conn(dir);
627 mode &= ~current_umask();
629 memset(&inarg, 0, sizeof(inarg));
631 inarg.rdev = new_encode_dev(rdev);
632 inarg.umask = current_umask();
633 args.opcode = FUSE_MKNOD;
635 args.in_args[0].size = sizeof(inarg);
636 args.in_args[0].value = &inarg;
637 args.in_args[1].size = entry->d_name.len + 1;
638 args.in_args[1].value = entry->d_name.name;
639 return create_new_entry(fc, &args, dir, entry, mode);
642 static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
645 return fuse_mknod(dir, entry, mode, 0);
648 static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
650 struct fuse_mkdir_in inarg;
651 struct fuse_conn *fc = get_fuse_conn(dir);
655 mode &= ~current_umask();
657 memset(&inarg, 0, sizeof(inarg));
659 inarg.umask = current_umask();
660 args.opcode = FUSE_MKDIR;
662 args.in_args[0].size = sizeof(inarg);
663 args.in_args[0].value = &inarg;
664 args.in_args[1].size = entry->d_name.len + 1;
665 args.in_args[1].value = entry->d_name.name;
666 return create_new_entry(fc, &args, dir, entry, S_IFDIR);
669 static int fuse_symlink(struct inode *dir, struct dentry *entry,
672 struct fuse_conn *fc = get_fuse_conn(dir);
673 unsigned len = strlen(link) + 1;
676 args.opcode = FUSE_SYMLINK;
678 args.in_args[0].size = entry->d_name.len + 1;
679 args.in_args[0].value = entry->d_name.name;
680 args.in_args[1].size = len;
681 args.in_args[1].value = link;
682 return create_new_entry(fc, &args, dir, entry, S_IFLNK);
685 void fuse_update_ctime(struct inode *inode)
687 if (!IS_NOCMTIME(inode)) {
688 inode->i_ctime = current_time(inode);
689 mark_inode_dirty_sync(inode);
693 static int fuse_unlink(struct inode *dir, struct dentry *entry)
696 struct fuse_conn *fc = get_fuse_conn(dir);
699 args.opcode = FUSE_UNLINK;
700 args.nodeid = get_node_id(dir);
702 args.in_args[0].size = entry->d_name.len + 1;
703 args.in_args[0].value = entry->d_name.name;
704 err = fuse_simple_request(fc, &args);
706 struct inode *inode = d_inode(entry);
707 struct fuse_inode *fi = get_fuse_inode(inode);
709 spin_lock(&fi->lock);
710 fi->attr_version = atomic64_inc_return(&fc->attr_version);
712 * If i_nlink == 0 then unlink doesn't make sense, yet this can
713 * happen if userspace filesystem is careless. It would be
714 * difficult to enforce correct nlink usage so just ignore this
717 if (inode->i_nlink > 0)
719 spin_unlock(&fi->lock);
720 fuse_invalidate_attr(inode);
721 fuse_dir_changed(dir);
722 fuse_invalidate_entry_cache(entry);
723 fuse_update_ctime(inode);
724 } else if (err == -EINTR)
725 fuse_invalidate_entry(entry);
729 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
732 struct fuse_conn *fc = get_fuse_conn(dir);
735 args.opcode = FUSE_RMDIR;
736 args.nodeid = get_node_id(dir);
738 args.in_args[0].size = entry->d_name.len + 1;
739 args.in_args[0].value = entry->d_name.name;
740 err = fuse_simple_request(fc, &args);
742 clear_nlink(d_inode(entry));
743 fuse_dir_changed(dir);
744 fuse_invalidate_entry_cache(entry);
745 } else if (err == -EINTR)
746 fuse_invalidate_entry(entry);
750 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
751 struct inode *newdir, struct dentry *newent,
752 unsigned int flags, int opcode, size_t argsize)
755 struct fuse_rename2_in inarg;
756 struct fuse_conn *fc = get_fuse_conn(olddir);
759 memset(&inarg, 0, argsize);
760 inarg.newdir = get_node_id(newdir);
762 args.opcode = opcode;
763 args.nodeid = get_node_id(olddir);
765 args.in_args[0].size = argsize;
766 args.in_args[0].value = &inarg;
767 args.in_args[1].size = oldent->d_name.len + 1;
768 args.in_args[1].value = oldent->d_name.name;
769 args.in_args[2].size = newent->d_name.len + 1;
770 args.in_args[2].value = newent->d_name.name;
771 err = fuse_simple_request(fc, &args);
774 fuse_invalidate_attr(d_inode(oldent));
775 fuse_update_ctime(d_inode(oldent));
777 if (flags & RENAME_EXCHANGE) {
778 fuse_invalidate_attr(d_inode(newent));
779 fuse_update_ctime(d_inode(newent));
782 fuse_dir_changed(olddir);
783 if (olddir != newdir)
784 fuse_dir_changed(newdir);
786 /* newent will end up negative */
787 if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
788 fuse_invalidate_attr(d_inode(newent));
789 fuse_invalidate_entry_cache(newent);
790 fuse_update_ctime(d_inode(newent));
792 } else if (err == -EINTR) {
793 /* If request was interrupted, DEITY only knows if the
794 rename actually took place. If the invalidation
795 fails (e.g. some process has CWD under the renamed
796 directory), then there can be inconsistency between
797 the dcache and the real filesystem. Tough luck. */
798 fuse_invalidate_entry(oldent);
799 if (d_really_is_positive(newent))
800 fuse_invalidate_entry(newent);
806 static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
807 struct inode *newdir, struct dentry *newent,
810 struct fuse_conn *fc = get_fuse_conn(olddir);
813 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
817 if (fc->no_rename2 || fc->minor < 23)
820 err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
822 sizeof(struct fuse_rename2_in));
823 if (err == -ENOSYS) {
828 err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
830 sizeof(struct fuse_rename_in));
836 static int fuse_link(struct dentry *entry, struct inode *newdir,
837 struct dentry *newent)
840 struct fuse_link_in inarg;
841 struct inode *inode = d_inode(entry);
842 struct fuse_conn *fc = get_fuse_conn(inode);
845 memset(&inarg, 0, sizeof(inarg));
846 inarg.oldnodeid = get_node_id(inode);
847 args.opcode = FUSE_LINK;
849 args.in_args[0].size = sizeof(inarg);
850 args.in_args[0].value = &inarg;
851 args.in_args[1].size = newent->d_name.len + 1;
852 args.in_args[1].value = newent->d_name.name;
853 err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
854 /* Contrary to "normal" filesystems it can happen that link
855 makes two "logical" inodes point to the same "physical"
856 inode. We invalidate the attributes of the old one, so it
857 will reflect changes in the backing inode (link count,
861 struct fuse_inode *fi = get_fuse_inode(inode);
863 spin_lock(&fi->lock);
864 fi->attr_version = atomic64_inc_return(&fc->attr_version);
866 spin_unlock(&fi->lock);
867 fuse_invalidate_attr(inode);
868 fuse_update_ctime(inode);
869 } else if (err == -EINTR) {
870 fuse_invalidate_attr(inode);
875 static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
878 unsigned int blkbits;
879 struct fuse_conn *fc = get_fuse_conn(inode);
881 /* see the comment in fuse_change_attributes() */
882 if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
883 attr->size = i_size_read(inode);
884 attr->mtime = inode->i_mtime.tv_sec;
885 attr->mtimensec = inode->i_mtime.tv_nsec;
886 attr->ctime = inode->i_ctime.tv_sec;
887 attr->ctimensec = inode->i_ctime.tv_nsec;
890 stat->dev = inode->i_sb->s_dev;
891 stat->ino = attr->ino;
892 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
893 stat->nlink = attr->nlink;
894 stat->uid = make_kuid(fc->user_ns, attr->uid);
895 stat->gid = make_kgid(fc->user_ns, attr->gid);
896 stat->rdev = inode->i_rdev;
897 stat->atime.tv_sec = attr->atime;
898 stat->atime.tv_nsec = attr->atimensec;
899 stat->mtime.tv_sec = attr->mtime;
900 stat->mtime.tv_nsec = attr->mtimensec;
901 stat->ctime.tv_sec = attr->ctime;
902 stat->ctime.tv_nsec = attr->ctimensec;
903 stat->size = attr->size;
904 stat->blocks = attr->blocks;
906 if (attr->blksize != 0)
907 blkbits = ilog2(attr->blksize);
909 blkbits = inode->i_sb->s_blocksize_bits;
911 stat->blksize = 1 << blkbits;
914 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
918 struct fuse_getattr_in inarg;
919 struct fuse_attr_out outarg;
920 struct fuse_conn *fc = get_fuse_conn(inode);
924 attr_version = fuse_get_attr_version(fc);
926 memset(&inarg, 0, sizeof(inarg));
927 memset(&outarg, 0, sizeof(outarg));
928 /* Directories have separate file-handle space */
929 if (file && S_ISREG(inode->i_mode)) {
930 struct fuse_file *ff = file->private_data;
932 inarg.getattr_flags |= FUSE_GETATTR_FH;
935 args.opcode = FUSE_GETATTR;
936 args.nodeid = get_node_id(inode);
938 args.in_args[0].size = sizeof(inarg);
939 args.in_args[0].value = &inarg;
940 args.out_numargs = 1;
941 args.out_args[0].size = sizeof(outarg);
942 args.out_args[0].value = &outarg;
943 err = fuse_simple_request(fc, &args);
945 if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
946 make_bad_inode(inode);
949 fuse_change_attributes(inode, &outarg.attr,
950 attr_timeout(&outarg),
953 fuse_fillattr(inode, &outarg.attr, stat);
959 static int fuse_update_get_attr(struct inode *inode, struct file *file,
960 struct kstat *stat, u32 request_mask,
963 struct fuse_inode *fi = get_fuse_inode(inode);
967 if (flags & AT_STATX_FORCE_SYNC)
969 else if (flags & AT_STATX_DONT_SYNC)
971 else if (request_mask & READ_ONCE(fi->inval_mask))
974 sync = time_before64(fi->i_time, get_jiffies_64());
977 forget_all_cached_acls(inode);
978 err = fuse_do_getattr(inode, stat, file);
980 generic_fillattr(inode, stat);
981 stat->mode = fi->orig_i_mode;
982 stat->ino = fi->orig_ino;
988 int fuse_update_attributes(struct inode *inode, struct file *file)
990 /* Do *not* need to get atime for internal purposes */
991 return fuse_update_get_attr(inode, file, NULL,
992 STATX_BASIC_STATS & ~STATX_ATIME, 0);
995 int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
996 u64 child_nodeid, struct qstr *name)
999 struct inode *parent;
1001 struct dentry *entry;
1003 parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
1008 if (!S_ISDIR(parent->i_mode))
1012 dir = d_find_alias(parent);
1016 name->hash = full_name_hash(dir, name->name, name->len);
1017 entry = d_lookup(dir, name);
1022 fuse_dir_changed(parent);
1023 fuse_invalidate_entry(entry);
1025 if (child_nodeid != 0 && d_really_is_positive(entry)) {
1026 inode_lock(d_inode(entry));
1027 if (get_node_id(d_inode(entry)) != child_nodeid) {
1031 if (d_mountpoint(entry)) {
1035 if (d_is_dir(entry)) {
1036 shrink_dcache_parent(entry);
1037 if (!simple_empty(entry)) {
1041 d_inode(entry)->i_flags |= S_DEAD;
1044 clear_nlink(d_inode(entry));
1047 inode_unlock(d_inode(entry));
1056 inode_unlock(parent);
1062 * Calling into a user-controlled filesystem gives the filesystem
1063 * daemon ptrace-like capabilities over the current process. This
1064 * means, that the filesystem daemon is able to record the exact
1065 * filesystem operations performed, and can also control the behavior
1066 * of the requester process in otherwise impossible ways. For example
1067 * it can delay the operation for arbitrary length of time allowing
1068 * DoS against the requester.
1070 * For this reason only those processes can call into the filesystem,
1071 * for which the owner of the mount has ptrace privilege. This
1072 * excludes processes started by other users, suid or sgid processes.
1074 int fuse_allow_current_process(struct fuse_conn *fc)
1076 const struct cred *cred;
1078 if (fc->allow_other)
1079 return current_in_userns(fc->user_ns);
1081 cred = current_cred();
1082 if (uid_eq(cred->euid, fc->user_id) &&
1083 uid_eq(cred->suid, fc->user_id) &&
1084 uid_eq(cred->uid, fc->user_id) &&
1085 gid_eq(cred->egid, fc->group_id) &&
1086 gid_eq(cred->sgid, fc->group_id) &&
1087 gid_eq(cred->gid, fc->group_id))
1093 static int fuse_access(struct inode *inode, int mask)
1095 struct fuse_conn *fc = get_fuse_conn(inode);
1097 struct fuse_access_in inarg;
1100 BUG_ON(mask & MAY_NOT_BLOCK);
1105 memset(&inarg, 0, sizeof(inarg));
1106 inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1107 args.opcode = FUSE_ACCESS;
1108 args.nodeid = get_node_id(inode);
1109 args.in_numargs = 1;
1110 args.in_args[0].size = sizeof(inarg);
1111 args.in_args[0].value = &inarg;
1112 err = fuse_simple_request(fc, &args);
1113 if (err == -ENOSYS) {
1120 static int fuse_perm_getattr(struct inode *inode, int mask)
1122 if (mask & MAY_NOT_BLOCK)
1125 forget_all_cached_acls(inode);
1126 return fuse_do_getattr(inode, NULL, NULL);
1130 * Check permission. The two basic access models of FUSE are:
1132 * 1) Local access checking ('default_permissions' mount option) based
1133 * on file mode. This is the plain old disk filesystem permission
1136 * 2) "Remote" access checking, where server is responsible for
1137 * checking permission in each inode operation. An exception to this
1138 * is if ->permission() was invoked from sys_access() in which case an
1139 * access request is sent. Execute permission is still checked
1140 * locally based on file mode.
1142 static int fuse_permission(struct inode *inode, int mask)
1144 struct fuse_conn *fc = get_fuse_conn(inode);
1145 bool refreshed = false;
1148 if (!fuse_allow_current_process(fc))
1152 * If attributes are needed, refresh them before proceeding
1154 if (fc->default_permissions ||
1155 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1156 struct fuse_inode *fi = get_fuse_inode(inode);
1157 u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1159 if (perm_mask & READ_ONCE(fi->inval_mask) ||
1160 time_before64(fi->i_time, get_jiffies_64())) {
1163 err = fuse_perm_getattr(inode, mask);
1169 if (fc->default_permissions) {
1170 err = generic_permission(inode, mask);
1172 /* If permission is denied, try to refresh file
1173 attributes. This is also needed, because the root
1174 node will at first have no permissions */
1175 if (err == -EACCES && !refreshed) {
1176 err = fuse_perm_getattr(inode, mask);
1178 err = generic_permission(inode, mask);
1181 /* Note: the opposite of the above test does not
1182 exist. So if permissions are revoked this won't be
1183 noticed immediately, only after the attribute
1184 timeout has expired */
1185 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1186 err = fuse_access(inode, mask);
1187 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1188 if (!(inode->i_mode & S_IXUGO)) {
1192 err = fuse_perm_getattr(inode, mask);
1193 if (!err && !(inode->i_mode & S_IXUGO))
1200 static int fuse_readlink_page(struct inode *inode, struct page *page)
1202 struct fuse_conn *fc = get_fuse_conn(inode);
1203 struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
1204 struct fuse_args_pages ap = {
1212 ap.args.opcode = FUSE_READLINK;
1213 ap.args.nodeid = get_node_id(inode);
1214 ap.args.out_pages = true;
1215 ap.args.out_argvar = true;
1216 ap.args.page_zeroing = true;
1217 ap.args.out_numargs = 1;
1218 ap.args.out_args[0].size = desc.length;
1219 res = fuse_simple_request(fc, &ap.args);
1221 fuse_invalidate_atime(inode);
1226 if (WARN_ON(res >= PAGE_SIZE))
1229 link = page_address(page);
1235 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1236 struct delayed_call *callback)
1238 struct fuse_conn *fc = get_fuse_conn(inode);
1243 if (is_bad_inode(inode))
1246 if (fc->cache_symlinks)
1247 return page_get_link(dentry, inode, callback);
1253 page = alloc_page(GFP_KERNEL);
1258 err = fuse_readlink_page(inode, page);
1264 set_delayed_call(callback, page_put_link, page);
1266 return page_address(page);
1269 return ERR_PTR(err);
1272 static int fuse_dir_open(struct inode *inode, struct file *file)
1274 return fuse_open_common(inode, file, true);
1277 static int fuse_dir_release(struct inode *inode, struct file *file)
1279 fuse_release_common(file, true);
1284 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1287 struct inode *inode = file->f_mapping->host;
1288 struct fuse_conn *fc = get_fuse_conn(inode);
1291 if (is_bad_inode(inode))
1294 if (fc->no_fsyncdir)
1298 err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1299 if (err == -ENOSYS) {
1300 fc->no_fsyncdir = 1;
1303 inode_unlock(inode);
1308 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1311 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1313 /* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1317 return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1320 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1323 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1328 return fuse_ioctl_common(file, cmd, arg,
1329 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1332 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1334 /* Always update if mtime is explicitly set */
1335 if (ivalid & ATTR_MTIME_SET)
1338 /* Or if kernel i_mtime is the official one */
1339 if (trust_local_mtime)
1342 /* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1343 if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1346 /* In all other cases update */
1350 static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
1351 struct fuse_setattr_in *arg, bool trust_local_cmtime)
1353 unsigned ivalid = iattr->ia_valid;
1355 if (ivalid & ATTR_MODE)
1356 arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
1357 if (ivalid & ATTR_UID)
1358 arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
1359 if (ivalid & ATTR_GID)
1360 arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
1361 if (ivalid & ATTR_SIZE)
1362 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
1363 if (ivalid & ATTR_ATIME) {
1364 arg->valid |= FATTR_ATIME;
1365 arg->atime = iattr->ia_atime.tv_sec;
1366 arg->atimensec = iattr->ia_atime.tv_nsec;
1367 if (!(ivalid & ATTR_ATIME_SET))
1368 arg->valid |= FATTR_ATIME_NOW;
1370 if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1371 arg->valid |= FATTR_MTIME;
1372 arg->mtime = iattr->ia_mtime.tv_sec;
1373 arg->mtimensec = iattr->ia_mtime.tv_nsec;
1374 if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1375 arg->valid |= FATTR_MTIME_NOW;
1377 if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1378 arg->valid |= FATTR_CTIME;
1379 arg->ctime = iattr->ia_ctime.tv_sec;
1380 arg->ctimensec = iattr->ia_ctime.tv_nsec;
1385 * Prevent concurrent writepages on inode
1387 * This is done by adding a negative bias to the inode write counter
1388 * and waiting for all pending writes to finish.
1390 void fuse_set_nowrite(struct inode *inode)
1392 struct fuse_inode *fi = get_fuse_inode(inode);
1394 BUG_ON(!inode_is_locked(inode));
1396 spin_lock(&fi->lock);
1397 BUG_ON(fi->writectr < 0);
1398 fi->writectr += FUSE_NOWRITE;
1399 spin_unlock(&fi->lock);
1400 wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1404 * Allow writepages on inode
1406 * Remove the bias from the writecounter and send any queued
1409 static void __fuse_release_nowrite(struct inode *inode)
1411 struct fuse_inode *fi = get_fuse_inode(inode);
1413 BUG_ON(fi->writectr != FUSE_NOWRITE);
1415 fuse_flush_writepages(inode);
1418 void fuse_release_nowrite(struct inode *inode)
1420 struct fuse_inode *fi = get_fuse_inode(inode);
1422 spin_lock(&fi->lock);
1423 __fuse_release_nowrite(inode);
1424 spin_unlock(&fi->lock);
1427 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1428 struct inode *inode,
1429 struct fuse_setattr_in *inarg_p,
1430 struct fuse_attr_out *outarg_p)
1432 args->opcode = FUSE_SETATTR;
1433 args->nodeid = get_node_id(inode);
1434 args->in_numargs = 1;
1435 args->in_args[0].size = sizeof(*inarg_p);
1436 args->in_args[0].value = inarg_p;
1437 args->out_numargs = 1;
1438 args->out_args[0].size = sizeof(*outarg_p);
1439 args->out_args[0].value = outarg_p;
1443 * Flush inode->i_mtime to the server
1445 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1447 struct fuse_conn *fc = get_fuse_conn(inode);
1449 struct fuse_setattr_in inarg;
1450 struct fuse_attr_out outarg;
1452 memset(&inarg, 0, sizeof(inarg));
1453 memset(&outarg, 0, sizeof(outarg));
1455 inarg.valid = FATTR_MTIME;
1456 inarg.mtime = inode->i_mtime.tv_sec;
1457 inarg.mtimensec = inode->i_mtime.tv_nsec;
1458 if (fc->minor >= 23) {
1459 inarg.valid |= FATTR_CTIME;
1460 inarg.ctime = inode->i_ctime.tv_sec;
1461 inarg.ctimensec = inode->i_ctime.tv_nsec;
1464 inarg.valid |= FATTR_FH;
1467 fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1469 return fuse_simple_request(fc, &args);
1473 * Set attributes, and at the same time refresh them.
1475 * Truncation is slightly complicated, because the 'truncate' request
1476 * may fail, in which case we don't want to touch the mapping.
1477 * vmtruncate() doesn't allow for this case, so do the rlimit checking
1478 * and the actual truncation by hand.
1480 int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
1483 struct inode *inode = d_inode(dentry);
1484 struct fuse_conn *fc = get_fuse_conn(inode);
1485 struct fuse_inode *fi = get_fuse_inode(inode);
1487 struct fuse_setattr_in inarg;
1488 struct fuse_attr_out outarg;
1489 bool is_truncate = false;
1490 bool is_wb = fc->writeback_cache;
1493 bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1495 if (!fc->default_permissions)
1496 attr->ia_valid |= ATTR_FORCE;
1498 err = setattr_prepare(dentry, attr);
1502 if (attr->ia_valid & ATTR_OPEN) {
1503 /* This is coming from open(..., ... | O_TRUNC); */
1504 WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1505 WARN_ON(attr->ia_size != 0);
1506 if (fc->atomic_o_trunc) {
1508 * No need to send request to userspace, since actual
1509 * truncation has already been done by OPEN. But still
1510 * need to truncate page cache.
1512 i_size_write(inode, 0);
1513 truncate_pagecache(inode, 0);
1519 if (attr->ia_valid & ATTR_SIZE) {
1520 if (WARN_ON(!S_ISREG(inode->i_mode)))
1525 /* Flush dirty data/metadata before non-truncate SETATTR */
1526 if (is_wb && S_ISREG(inode->i_mode) &&
1528 (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
1530 err = write_inode_now(inode, true);
1534 fuse_set_nowrite(inode);
1535 fuse_release_nowrite(inode);
1539 fuse_set_nowrite(inode);
1540 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1541 if (trust_local_cmtime && attr->ia_size != inode->i_size)
1542 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1545 memset(&inarg, 0, sizeof(inarg));
1546 memset(&outarg, 0, sizeof(outarg));
1547 iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
1549 struct fuse_file *ff = file->private_data;
1550 inarg.valid |= FATTR_FH;
1553 if (attr->ia_valid & ATTR_SIZE) {
1554 /* For mandatory locking in truncate */
1555 inarg.valid |= FATTR_LOCKOWNER;
1556 inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1558 fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1559 err = fuse_simple_request(fc, &args);
1562 fuse_invalidate_attr(inode);
1566 if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1567 make_bad_inode(inode);
1572 spin_lock(&fi->lock);
1573 /* the kernel maintains i_mtime locally */
1574 if (trust_local_cmtime) {
1575 if (attr->ia_valid & ATTR_MTIME)
1576 inode->i_mtime = attr->ia_mtime;
1577 if (attr->ia_valid & ATTR_CTIME)
1578 inode->i_ctime = attr->ia_ctime;
1579 /* FIXME: clear I_DIRTY_SYNC? */
1582 fuse_change_attributes_common(inode, &outarg.attr,
1583 attr_timeout(&outarg));
1584 oldsize = inode->i_size;
1585 /* see the comment in fuse_change_attributes() */
1586 if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
1587 i_size_write(inode, outarg.attr.size);
1590 /* NOTE: this may release/reacquire fi->lock */
1591 __fuse_release_nowrite(inode);
1593 spin_unlock(&fi->lock);
1596 * Only call invalidate_inode_pages2() after removing
1597 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1599 if ((is_truncate || !is_wb) &&
1600 S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1601 truncate_pagecache(inode, outarg.attr.size);
1602 invalidate_inode_pages2(inode->i_mapping);
1605 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1610 fuse_release_nowrite(inode);
1612 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1616 static int fuse_setattr(struct dentry *entry, struct iattr *attr)
1618 struct inode *inode = d_inode(entry);
1619 struct fuse_conn *fc = get_fuse_conn(inode);
1620 struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
1623 if (!fuse_allow_current_process(get_fuse_conn(inode)))
1626 if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
1627 attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
1631 * The only sane way to reliably kill suid/sgid is to do it in
1632 * the userspace filesystem
1634 * This should be done on write(), truncate() and chown().
1636 if (!fc->handle_killpriv) {
1638 * ia_mode calculation may have used stale i_mode.
1639 * Refresh and recalculate.
1641 ret = fuse_do_getattr(inode, NULL, file);
1645 attr->ia_mode = inode->i_mode;
1646 if (inode->i_mode & S_ISUID) {
1647 attr->ia_valid |= ATTR_MODE;
1648 attr->ia_mode &= ~S_ISUID;
1650 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1651 attr->ia_valid |= ATTR_MODE;
1652 attr->ia_mode &= ~S_ISGID;
1656 if (!attr->ia_valid)
1659 ret = fuse_do_setattr(entry, attr, file);
1662 * If filesystem supports acls it may have updated acl xattrs in
1663 * the filesystem, so forget cached acls for the inode.
1666 forget_all_cached_acls(inode);
1668 /* Directory mode changed, may need to revalidate access */
1669 if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
1670 fuse_invalidate_entry_cache(entry);
1675 static int fuse_getattr(const struct path *path, struct kstat *stat,
1676 u32 request_mask, unsigned int flags)
1678 struct inode *inode = d_inode(path->dentry);
1679 struct fuse_conn *fc = get_fuse_conn(inode);
1681 if (!fuse_allow_current_process(fc))
1684 return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
1687 static const struct inode_operations fuse_dir_inode_operations = {
1688 .lookup = fuse_lookup,
1689 .mkdir = fuse_mkdir,
1690 .symlink = fuse_symlink,
1691 .unlink = fuse_unlink,
1692 .rmdir = fuse_rmdir,
1693 .rename = fuse_rename2,
1695 .setattr = fuse_setattr,
1696 .create = fuse_create,
1697 .atomic_open = fuse_atomic_open,
1698 .mknod = fuse_mknod,
1699 .permission = fuse_permission,
1700 .getattr = fuse_getattr,
1701 .listxattr = fuse_listxattr,
1702 .get_acl = fuse_get_acl,
1703 .set_acl = fuse_set_acl,
1706 static const struct file_operations fuse_dir_operations = {
1707 .llseek = generic_file_llseek,
1708 .read = generic_read_dir,
1709 .iterate_shared = fuse_readdir,
1710 .open = fuse_dir_open,
1711 .release = fuse_dir_release,
1712 .fsync = fuse_dir_fsync,
1713 .unlocked_ioctl = fuse_dir_ioctl,
1714 .compat_ioctl = fuse_dir_compat_ioctl,
1717 static const struct inode_operations fuse_common_inode_operations = {
1718 .setattr = fuse_setattr,
1719 .permission = fuse_permission,
1720 .getattr = fuse_getattr,
1721 .listxattr = fuse_listxattr,
1722 .get_acl = fuse_get_acl,
1723 .set_acl = fuse_set_acl,
1726 static const struct inode_operations fuse_symlink_inode_operations = {
1727 .setattr = fuse_setattr,
1728 .get_link = fuse_get_link,
1729 .getattr = fuse_getattr,
1730 .listxattr = fuse_listxattr,
1733 void fuse_init_common(struct inode *inode)
1735 inode->i_op = &fuse_common_inode_operations;
1738 void fuse_init_dir(struct inode *inode)
1740 struct fuse_inode *fi = get_fuse_inode(inode);
1742 inode->i_op = &fuse_dir_inode_operations;
1743 inode->i_fop = &fuse_dir_operations;
1745 spin_lock_init(&fi->rdc.lock);
1746 fi->rdc.cached = false;
1749 fi->rdc.version = 0;
1752 static int fuse_symlink_readpage(struct file *null, struct page *page)
1754 int err = fuse_readlink_page(page->mapping->host, page);
1757 SetPageUptodate(page);
1764 static const struct address_space_operations fuse_symlink_aops = {
1765 .readpage = fuse_symlink_readpage,
1768 void fuse_init_symlink(struct inode *inode)
1770 inode->i_op = &fuse_symlink_inode_operations;
1771 inode->i_data.a_ops = &fuse_symlink_aops;
1772 inode_nohighmem(inode);