2 FUSE: Filesystem in Userspace
5 This program can be distributed under the terms of the GNU GPL.
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/fs_context.h>
14 #include <linux/moduleparam.h>
15 #include <linux/sched.h>
16 #include <linux/namei.h>
17 #include <linux/slab.h>
18 #include <linux/xattr.h>
19 #include <linux/iversion.h>
20 #include <linux/posix_acl.h>
21 #include <linux/security.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
25 static bool __read_mostly allow_sys_admin_access;
26 module_param(allow_sys_admin_access, bool, 0644);
27 MODULE_PARM_DESC(allow_sys_admin_access,
28 "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check");
30 static void fuse_advise_use_readdirplus(struct inode *dir)
32 struct fuse_inode *fi = get_fuse_inode(dir);
34 set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
37 #if BITS_PER_LONG >= 64
38 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
40 entry->d_fsdata = (void *) time;
43 static inline u64 fuse_dentry_time(const struct dentry *entry)
45 return (u64)entry->d_fsdata;
54 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
56 ((union fuse_dentry *) dentry->d_fsdata)->time = time;
59 static inline u64 fuse_dentry_time(const struct dentry *entry)
61 return ((union fuse_dentry *) entry->d_fsdata)->time;
65 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
67 struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
68 bool delete = !time && fc->delete_stale;
70 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
71 * Don't care about races, either way it's just an optimization
73 if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
74 (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
75 spin_lock(&dentry->d_lock);
77 dentry->d_flags &= ~DCACHE_OP_DELETE;
79 dentry->d_flags |= DCACHE_OP_DELETE;
80 spin_unlock(&dentry->d_lock);
83 __fuse_dentry_settime(dentry, time);
87 * FUSE caches dentries and attributes with separate timeout. The
88 * time in jiffies until the dentry/attributes are valid is stored in
89 * dentry->d_fsdata and fuse_inode->i_time respectively.
93 * Calculate the time in jiffies until a dentry/attributes are valid
95 u64 fuse_time_to_jiffies(u64 sec, u32 nsec)
98 struct timespec64 ts = {
100 min_t(u32, nsec, NSEC_PER_SEC - 1)
103 return get_jiffies_64() + timespec64_to_jiffies(&ts);
109 * Set dentry and possibly attribute timeouts from the lookup/mk*
112 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
114 fuse_dentry_settime(entry,
115 fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
118 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
120 set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
124 * Mark the attributes as stale, so that at the next call to
125 * ->getattr() they will be fetched from userspace
127 void fuse_invalidate_attr(struct inode *inode)
129 fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
132 static void fuse_dir_changed(struct inode *dir)
134 fuse_invalidate_attr(dir);
135 inode_maybe_inc_iversion(dir, false);
139 * Mark the attributes as stale due to an atime change. Avoid the invalidate if
142 void fuse_invalidate_atime(struct inode *inode)
144 if (!IS_RDONLY(inode))
145 fuse_invalidate_attr_mask(inode, STATX_ATIME);
149 * Just mark the entry as stale, so that a next attempt to look it up
150 * will result in a new lookup call to userspace
152 * This is called when a dentry is about to become negative and the
153 * timeout is unknown (unlink, rmdir, rename and in some cases
156 void fuse_invalidate_entry_cache(struct dentry *entry)
158 fuse_dentry_settime(entry, 0);
162 * Same as fuse_invalidate_entry_cache(), but also try to remove the
163 * dentry from the hash
165 static void fuse_invalidate_entry(struct dentry *entry)
168 fuse_invalidate_entry_cache(entry);
171 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
172 u64 nodeid, const struct qstr *name,
173 struct fuse_entry_out *outarg)
175 memset(outarg, 0, sizeof(struct fuse_entry_out));
176 args->opcode = FUSE_LOOKUP;
177 args->nodeid = nodeid;
178 args->in_numargs = 1;
179 args->in_args[0].size = name->len + 1;
180 args->in_args[0].value = name->name;
181 args->out_numargs = 1;
182 args->out_args[0].size = sizeof(struct fuse_entry_out);
183 args->out_args[0].value = outarg;
187 * Check whether the dentry is still valid
189 * If the entry validity timeout has expired and the dentry is
190 * positive, try to redo the lookup. If the lookup results in a
191 * different inode, then let the VFS invalidate the dentry and redo
192 * the lookup once more. If the lookup results in the same inode,
193 * then refresh the attributes, timeouts and mark the dentry valid.
195 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
198 struct dentry *parent;
199 struct fuse_mount *fm;
200 struct fuse_inode *fi;
203 inode = d_inode_rcu(entry);
204 if (inode && fuse_is_bad(inode))
206 else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
207 (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
208 struct fuse_entry_out outarg;
210 struct fuse_forget_link *forget;
213 /* For negative dentries, always do a fresh lookup */
218 if (flags & LOOKUP_RCU)
221 fm = get_fuse_mount(inode);
223 forget = fuse_alloc_forget();
228 attr_version = fuse_get_attr_version(fm->fc);
230 parent = dget_parent(entry);
231 fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)),
232 &entry->d_name, &outarg);
233 ret = fuse_simple_request(fm, &args);
235 /* Zero nodeid is same as -ENOENT */
236 if (!ret && !outarg.nodeid)
239 fi = get_fuse_inode(inode);
240 if (outarg.nodeid != get_node_id(inode) ||
241 (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
242 fuse_queue_forget(fm->fc, forget,
246 spin_lock(&fi->lock);
248 spin_unlock(&fi->lock);
251 if (ret == -ENOMEM || ret == -EINTR)
253 if (ret || fuse_invalid_attr(&outarg.attr) ||
254 fuse_stale_inode(inode, outarg.generation, &outarg.attr))
257 forget_all_cached_acls(inode);
258 fuse_change_attributes(inode, &outarg.attr, NULL,
259 ATTR_TIMEOUT(&outarg),
261 fuse_change_entry_timeout(entry, &outarg);
263 fi = get_fuse_inode(inode);
264 if (flags & LOOKUP_RCU) {
265 if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
267 } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
268 parent = dget_parent(entry);
269 fuse_advise_use_readdirplus(d_inode(parent));
282 #if BITS_PER_LONG < 64
283 static int fuse_dentry_init(struct dentry *dentry)
285 dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
286 GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
288 return dentry->d_fsdata ? 0 : -ENOMEM;
290 static void fuse_dentry_release(struct dentry *dentry)
292 union fuse_dentry *fd = dentry->d_fsdata;
298 static int fuse_dentry_delete(const struct dentry *dentry)
300 return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
304 * Create a fuse_mount object with a new superblock (with path->dentry
305 * as the root), and return that mount so it can be auto-mounted on
308 static struct vfsmount *fuse_dentry_automount(struct path *path)
310 struct fs_context *fsc;
311 struct vfsmount *mnt;
312 struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
314 fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
316 return ERR_CAST(fsc);
318 /* Pass the FUSE inode of the mount for fuse_get_tree_submount() */
319 fsc->fs_private = mp_fi;
321 /* Create the submount */
330 const struct dentry_operations fuse_dentry_operations = {
331 .d_revalidate = fuse_dentry_revalidate,
332 .d_delete = fuse_dentry_delete,
333 #if BITS_PER_LONG < 64
334 .d_init = fuse_dentry_init,
335 .d_release = fuse_dentry_release,
337 .d_automount = fuse_dentry_automount,
340 const struct dentry_operations fuse_root_dentry_operations = {
341 #if BITS_PER_LONG < 64
342 .d_init = fuse_dentry_init,
343 .d_release = fuse_dentry_release,
347 int fuse_valid_type(int m)
349 return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
350 S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
353 static bool fuse_valid_size(u64 size)
355 return size <= LLONG_MAX;
358 bool fuse_invalid_attr(struct fuse_attr *attr)
360 return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
363 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
364 struct fuse_entry_out *outarg, struct inode **inode)
366 struct fuse_mount *fm = get_fuse_mount_super(sb);
368 struct fuse_forget_link *forget;
374 if (name->len > FUSE_NAME_MAX)
378 forget = fuse_alloc_forget();
383 attr_version = fuse_get_attr_version(fm->fc);
385 fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
386 err = fuse_simple_request(fm, &args);
387 /* Zero nodeid is same as -ENOENT, but with valid timeout */
388 if (err || !outarg->nodeid)
392 if (fuse_invalid_attr(&outarg->attr))
394 if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
395 pr_warn_once("root generation should be zero\n");
396 outarg->generation = 0;
399 *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
400 &outarg->attr, ATTR_TIMEOUT(outarg),
404 fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
415 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
419 struct fuse_entry_out outarg;
421 struct dentry *newent;
422 bool outarg_valid = true;
425 if (fuse_is_bad(dir))
426 return ERR_PTR(-EIO);
428 locked = fuse_lock_inode(dir);
429 err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
431 fuse_unlock_inode(dir, locked);
432 if (err == -ENOENT) {
433 outarg_valid = false;
440 if (inode && get_node_id(inode) == FUSE_ROOT_ID)
443 newent = d_splice_alias(inode, entry);
444 err = PTR_ERR(newent);
448 entry = newent ? newent : entry;
450 fuse_change_entry_timeout(entry, &outarg);
452 fuse_invalidate_entry_cache(entry);
455 fuse_advise_use_readdirplus(dir);
464 static int get_security_context(struct dentry *entry, umode_t mode,
465 struct fuse_in_arg *ext)
467 struct fuse_secctx *fctx;
468 struct fuse_secctx_header *header;
469 void *ctx = NULL, *ptr;
470 u32 ctxlen, total_len = sizeof(*header);
475 err = security_dentry_init_security(entry, mode, &entry->d_name,
476 &name, &ctx, &ctxlen);
478 if (err != -EOPNOTSUPP)
480 /* No LSM is supporting this security hook. Ignore error */
487 namelen = strlen(name) + 1;
489 if (WARN_ON(namelen > XATTR_NAME_MAX + 1 || ctxlen > S32_MAX))
491 total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen + ctxlen);
495 header = ptr = kzalloc(total_len, GFP_KERNEL);
499 header->nr_secctx = nr_ctx;
500 header->size = total_len;
501 ptr += sizeof(*header);
505 ptr += sizeof(*fctx);
510 memcpy(ptr, ctx, ctxlen);
512 ext->size = total_len;
520 static void *extend_arg(struct fuse_in_arg *buf, u32 bytes)
523 u32 newlen = buf->size + bytes;
525 p = krealloc(buf->value, newlen, GFP_KERNEL);
533 memset(p + buf->size, 0, bytes);
537 return p + newlen - bytes;
540 static u32 fuse_ext_size(size_t size)
542 return FUSE_REC_ALIGN(sizeof(struct fuse_ext_header) + size);
546 * This adds just a single supplementary group that matches the parent's group.
548 static int get_create_supp_group(struct inode *dir, struct fuse_in_arg *ext)
550 struct fuse_conn *fc = get_fuse_conn(dir);
551 struct fuse_ext_header *xh;
552 struct fuse_supp_groups *sg;
553 kgid_t kgid = dir->i_gid;
554 gid_t parent_gid = from_kgid(fc->user_ns, kgid);
555 u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0]));
557 if (parent_gid == (gid_t) -1 || gid_eq(kgid, current_fsgid()) ||
561 xh = extend_arg(ext, sg_len);
566 xh->type = FUSE_EXT_GROUPS;
568 sg = (struct fuse_supp_groups *) &xh[1];
570 sg->groups[0] = parent_gid;
575 static int get_create_ext(struct fuse_args *args,
576 struct inode *dir, struct dentry *dentry,
579 struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
580 struct fuse_in_arg ext = { .size = 0, .value = NULL };
583 if (fc->init_security)
584 err = get_security_context(dentry, mode, &ext);
585 if (!err && fc->create_supp_group)
586 err = get_create_supp_group(dir, &ext);
588 if (!err && ext.size) {
589 WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args));
591 args->ext_idx = args->in_numargs++;
592 args->in_args[args->ext_idx] = ext;
600 static void free_ext_value(struct fuse_args *args)
603 kfree(args->in_args[args->ext_idx].value);
607 * Atomic create+open operation
609 * If the filesystem doesn't support this, then fall back to separate
610 * 'mknod' + 'open' requests.
612 static int fuse_create_open(struct inode *dir, struct dentry *entry,
613 struct file *file, unsigned int flags,
614 umode_t mode, u32 opcode)
618 struct fuse_mount *fm = get_fuse_mount(dir);
620 struct fuse_forget_link *forget;
621 struct fuse_create_in inarg;
622 struct fuse_open_out *outopenp;
623 struct fuse_entry_out outentry;
624 struct fuse_inode *fi;
625 struct fuse_file *ff;
626 bool trunc = flags & O_TRUNC;
628 /* Userspace expects S_IFREG in create mode */
629 BUG_ON((mode & S_IFMT) != S_IFREG);
631 forget = fuse_alloc_forget();
637 ff = fuse_file_alloc(fm, true);
639 goto out_put_forget_req;
641 if (!fm->fc->dont_mask)
642 mode &= ~current_umask();
645 memset(&inarg, 0, sizeof(inarg));
646 memset(&outentry, 0, sizeof(outentry));
649 inarg.umask = current_umask();
651 if (fm->fc->handle_killpriv_v2 && trunc &&
652 !(flags & O_EXCL) && !capable(CAP_FSETID)) {
653 inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
656 args.opcode = opcode;
657 args.nodeid = get_node_id(dir);
659 args.in_args[0].size = sizeof(inarg);
660 args.in_args[0].value = &inarg;
661 args.in_args[1].size = entry->d_name.len + 1;
662 args.in_args[1].value = entry->d_name.name;
663 args.out_numargs = 2;
664 args.out_args[0].size = sizeof(outentry);
665 args.out_args[0].value = &outentry;
666 /* Store outarg for fuse_finish_open() */
667 outopenp = &ff->args->open_outarg;
668 args.out_args[1].size = sizeof(*outopenp);
669 args.out_args[1].value = outopenp;
671 err = get_create_ext(&args, dir, entry, mode);
673 goto out_put_forget_req;
675 err = fuse_simple_request(fm, &args);
676 free_ext_value(&args);
681 if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
682 fuse_invalid_attr(&outentry.attr))
685 ff->fh = outopenp->fh;
686 ff->nodeid = outentry.nodeid;
687 ff->open_flags = outopenp->open_flags;
688 inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
689 &outentry.attr, ATTR_TIMEOUT(&outentry), 0);
691 flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
692 fuse_sync_release(NULL, ff, flags);
693 fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
698 d_instantiate(entry, inode);
699 fuse_change_entry_timeout(entry, &outentry);
700 fuse_dir_changed(dir);
701 err = generic_file_open(inode, file);
703 file->private_data = ff;
704 err = finish_open(file, entry, fuse_finish_open);
707 fi = get_fuse_inode(inode);
708 fuse_sync_release(fi, ff, flags);
710 if (fm->fc->atomic_o_trunc && trunc)
711 truncate_pagecache(inode, 0);
712 else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
713 invalidate_inode_pages2(inode->i_mapping);
725 static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
727 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
728 struct file *file, unsigned flags,
732 struct fuse_conn *fc = get_fuse_conn(dir);
733 struct dentry *res = NULL;
735 if (fuse_is_bad(dir))
738 if (d_in_lookup(entry)) {
739 res = fuse_lookup(dir, entry, 0);
747 if (!(flags & O_CREAT) || d_really_is_positive(entry))
751 file->f_mode |= FMODE_CREATED;
756 err = fuse_create_open(dir, entry, file, flags, mode, FUSE_CREATE);
757 if (err == -ENOSYS) {
760 } else if (err == -EEXIST)
761 fuse_invalidate_entry(entry);
767 err = fuse_mknod(&nop_mnt_idmap, dir, entry, mode, 0);
771 return finish_no_open(file, res);
775 * Code shared between mknod, mkdir, symlink and link
777 static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
778 struct inode *dir, struct dentry *entry,
781 struct fuse_entry_out outarg;
785 struct fuse_forget_link *forget;
787 if (fuse_is_bad(dir))
790 forget = fuse_alloc_forget();
794 memset(&outarg, 0, sizeof(outarg));
795 args->nodeid = get_node_id(dir);
796 args->out_numargs = 1;
797 args->out_args[0].size = sizeof(outarg);
798 args->out_args[0].value = &outarg;
800 if (args->opcode != FUSE_LINK) {
801 err = get_create_ext(args, dir, entry, mode);
803 goto out_put_forget_req;
806 err = fuse_simple_request(fm, args);
807 free_ext_value(args);
809 goto out_put_forget_req;
812 if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
813 goto out_put_forget_req;
815 if ((outarg.attr.mode ^ mode) & S_IFMT)
816 goto out_put_forget_req;
818 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
819 &outarg.attr, ATTR_TIMEOUT(&outarg), 0);
821 fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
827 d = d_splice_alias(inode, entry);
832 fuse_change_entry_timeout(d, &outarg);
835 fuse_change_entry_timeout(entry, &outarg);
837 fuse_dir_changed(dir);
842 fuse_invalidate_entry(entry);
847 static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir,
848 struct dentry *entry, umode_t mode, dev_t rdev)
850 struct fuse_mknod_in inarg;
851 struct fuse_mount *fm = get_fuse_mount(dir);
854 if (!fm->fc->dont_mask)
855 mode &= ~current_umask();
857 memset(&inarg, 0, sizeof(inarg));
859 inarg.rdev = new_encode_dev(rdev);
860 inarg.umask = current_umask();
861 args.opcode = FUSE_MKNOD;
863 args.in_args[0].size = sizeof(inarg);
864 args.in_args[0].value = &inarg;
865 args.in_args[1].size = entry->d_name.len + 1;
866 args.in_args[1].value = entry->d_name.name;
867 return create_new_entry(fm, &args, dir, entry, mode);
870 static int fuse_create(struct mnt_idmap *idmap, struct inode *dir,
871 struct dentry *entry, umode_t mode, bool excl)
873 return fuse_mknod(&nop_mnt_idmap, dir, entry, mode, 0);
876 static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
877 struct file *file, umode_t mode)
879 struct fuse_conn *fc = get_fuse_conn(dir);
885 err = fuse_create_open(dir, file->f_path.dentry, file, file->f_flags, mode, FUSE_TMPFILE);
886 if (err == -ENOSYS) {
893 static int fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir,
894 struct dentry *entry, umode_t mode)
896 struct fuse_mkdir_in inarg;
897 struct fuse_mount *fm = get_fuse_mount(dir);
900 if (!fm->fc->dont_mask)
901 mode &= ~current_umask();
903 memset(&inarg, 0, sizeof(inarg));
905 inarg.umask = current_umask();
906 args.opcode = FUSE_MKDIR;
908 args.in_args[0].size = sizeof(inarg);
909 args.in_args[0].value = &inarg;
910 args.in_args[1].size = entry->d_name.len + 1;
911 args.in_args[1].value = entry->d_name.name;
912 return create_new_entry(fm, &args, dir, entry, S_IFDIR);
915 static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir,
916 struct dentry *entry, const char *link)
918 struct fuse_mount *fm = get_fuse_mount(dir);
919 unsigned len = strlen(link) + 1;
922 args.opcode = FUSE_SYMLINK;
924 args.in_args[0].size = entry->d_name.len + 1;
925 args.in_args[0].value = entry->d_name.name;
926 args.in_args[1].size = len;
927 args.in_args[1].value = link;
928 return create_new_entry(fm, &args, dir, entry, S_IFLNK);
931 void fuse_flush_time_update(struct inode *inode)
933 int err = sync_inode_metadata(inode, 1);
935 mapping_set_error(inode->i_mapping, err);
938 static void fuse_update_ctime_in_cache(struct inode *inode)
940 if (!IS_NOCMTIME(inode)) {
941 inode_set_ctime_current(inode);
942 mark_inode_dirty_sync(inode);
943 fuse_flush_time_update(inode);
947 void fuse_update_ctime(struct inode *inode)
949 fuse_invalidate_attr_mask(inode, STATX_CTIME);
950 fuse_update_ctime_in_cache(inode);
953 static void fuse_entry_unlinked(struct dentry *entry)
955 struct inode *inode = d_inode(entry);
956 struct fuse_conn *fc = get_fuse_conn(inode);
957 struct fuse_inode *fi = get_fuse_inode(inode);
959 spin_lock(&fi->lock);
960 fi->attr_version = atomic64_inc_return(&fc->attr_version);
962 * If i_nlink == 0 then unlink doesn't make sense, yet this can
963 * happen if userspace filesystem is careless. It would be
964 * difficult to enforce correct nlink usage so just ignore this
967 if (S_ISDIR(inode->i_mode))
969 else if (inode->i_nlink > 0)
971 spin_unlock(&fi->lock);
972 fuse_invalidate_entry_cache(entry);
973 fuse_update_ctime(inode);
976 static int fuse_unlink(struct inode *dir, struct dentry *entry)
979 struct fuse_mount *fm = get_fuse_mount(dir);
982 if (fuse_is_bad(dir))
985 args.opcode = FUSE_UNLINK;
986 args.nodeid = get_node_id(dir);
988 args.in_args[0].size = entry->d_name.len + 1;
989 args.in_args[0].value = entry->d_name.name;
990 err = fuse_simple_request(fm, &args);
992 fuse_dir_changed(dir);
993 fuse_entry_unlinked(entry);
994 } else if (err == -EINTR || err == -ENOENT)
995 fuse_invalidate_entry(entry);
999 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
1002 struct fuse_mount *fm = get_fuse_mount(dir);
1005 if (fuse_is_bad(dir))
1008 args.opcode = FUSE_RMDIR;
1009 args.nodeid = get_node_id(dir);
1010 args.in_numargs = 1;
1011 args.in_args[0].size = entry->d_name.len + 1;
1012 args.in_args[0].value = entry->d_name.name;
1013 err = fuse_simple_request(fm, &args);
1015 fuse_dir_changed(dir);
1016 fuse_entry_unlinked(entry);
1017 } else if (err == -EINTR || err == -ENOENT)
1018 fuse_invalidate_entry(entry);
1022 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
1023 struct inode *newdir, struct dentry *newent,
1024 unsigned int flags, int opcode, size_t argsize)
1027 struct fuse_rename2_in inarg;
1028 struct fuse_mount *fm = get_fuse_mount(olddir);
1031 memset(&inarg, 0, argsize);
1032 inarg.newdir = get_node_id(newdir);
1033 inarg.flags = flags;
1034 args.opcode = opcode;
1035 args.nodeid = get_node_id(olddir);
1036 args.in_numargs = 3;
1037 args.in_args[0].size = argsize;
1038 args.in_args[0].value = &inarg;
1039 args.in_args[1].size = oldent->d_name.len + 1;
1040 args.in_args[1].value = oldent->d_name.name;
1041 args.in_args[2].size = newent->d_name.len + 1;
1042 args.in_args[2].value = newent->d_name.name;
1043 err = fuse_simple_request(fm, &args);
1046 fuse_update_ctime(d_inode(oldent));
1048 if (flags & RENAME_EXCHANGE)
1049 fuse_update_ctime(d_inode(newent));
1051 fuse_dir_changed(olddir);
1052 if (olddir != newdir)
1053 fuse_dir_changed(newdir);
1055 /* newent will end up negative */
1056 if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
1057 fuse_entry_unlinked(newent);
1058 } else if (err == -EINTR || err == -ENOENT) {
1059 /* If request was interrupted, DEITY only knows if the
1060 rename actually took place. If the invalidation
1061 fails (e.g. some process has CWD under the renamed
1062 directory), then there can be inconsistency between
1063 the dcache and the real filesystem. Tough luck. */
1064 fuse_invalidate_entry(oldent);
1065 if (d_really_is_positive(newent))
1066 fuse_invalidate_entry(newent);
1072 static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir,
1073 struct dentry *oldent, struct inode *newdir,
1074 struct dentry *newent, unsigned int flags)
1076 struct fuse_conn *fc = get_fuse_conn(olddir);
1079 if (fuse_is_bad(olddir))
1082 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
1086 if (fc->no_rename2 || fc->minor < 23)
1089 err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
1091 sizeof(struct fuse_rename2_in));
1092 if (err == -ENOSYS) {
1097 err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
1099 sizeof(struct fuse_rename_in));
1105 static int fuse_link(struct dentry *entry, struct inode *newdir,
1106 struct dentry *newent)
1109 struct fuse_link_in inarg;
1110 struct inode *inode = d_inode(entry);
1111 struct fuse_mount *fm = get_fuse_mount(inode);
1114 memset(&inarg, 0, sizeof(inarg));
1115 inarg.oldnodeid = get_node_id(inode);
1116 args.opcode = FUSE_LINK;
1117 args.in_numargs = 2;
1118 args.in_args[0].size = sizeof(inarg);
1119 args.in_args[0].value = &inarg;
1120 args.in_args[1].size = newent->d_name.len + 1;
1121 args.in_args[1].value = newent->d_name.name;
1122 err = create_new_entry(fm, &args, newdir, newent, inode->i_mode);
1124 fuse_update_ctime_in_cache(inode);
1125 else if (err == -EINTR)
1126 fuse_invalidate_attr(inode);
1131 static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
1134 unsigned int blkbits;
1135 struct fuse_conn *fc = get_fuse_conn(inode);
1137 stat->dev = inode->i_sb->s_dev;
1138 stat->ino = attr->ino;
1139 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
1140 stat->nlink = attr->nlink;
1141 stat->uid = make_kuid(fc->user_ns, attr->uid);
1142 stat->gid = make_kgid(fc->user_ns, attr->gid);
1143 stat->rdev = inode->i_rdev;
1144 stat->atime.tv_sec = attr->atime;
1145 stat->atime.tv_nsec = attr->atimensec;
1146 stat->mtime.tv_sec = attr->mtime;
1147 stat->mtime.tv_nsec = attr->mtimensec;
1148 stat->ctime.tv_sec = attr->ctime;
1149 stat->ctime.tv_nsec = attr->ctimensec;
1150 stat->size = attr->size;
1151 stat->blocks = attr->blocks;
1153 if (attr->blksize != 0)
1154 blkbits = ilog2(attr->blksize);
1156 blkbits = inode->i_sb->s_blocksize_bits;
1158 stat->blksize = 1 << blkbits;
1161 static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr)
1163 memset(attr, 0, sizeof(*attr));
1164 attr->ino = sx->ino;
1165 attr->size = sx->size;
1166 attr->blocks = sx->blocks;
1167 attr->atime = sx->atime.tv_sec;
1168 attr->mtime = sx->mtime.tv_sec;
1169 attr->ctime = sx->ctime.tv_sec;
1170 attr->atimensec = sx->atime.tv_nsec;
1171 attr->mtimensec = sx->mtime.tv_nsec;
1172 attr->ctimensec = sx->ctime.tv_nsec;
1173 attr->mode = sx->mode;
1174 attr->nlink = sx->nlink;
1175 attr->uid = sx->uid;
1176 attr->gid = sx->gid;
1177 attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor));
1178 attr->blksize = sx->blksize;
1181 static int fuse_do_statx(struct inode *inode, struct file *file,
1185 struct fuse_attr attr;
1186 struct fuse_statx *sx;
1187 struct fuse_statx_in inarg;
1188 struct fuse_statx_out outarg;
1189 struct fuse_mount *fm = get_fuse_mount(inode);
1190 u64 attr_version = fuse_get_attr_version(fm->fc);
1193 memset(&inarg, 0, sizeof(inarg));
1194 memset(&outarg, 0, sizeof(outarg));
1195 /* Directories have separate file-handle space */
1196 if (file && S_ISREG(inode->i_mode)) {
1197 struct fuse_file *ff = file->private_data;
1199 inarg.getattr_flags |= FUSE_GETATTR_FH;
1202 /* For now leave sync hints as the default, request all stats. */
1204 inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME;
1205 args.opcode = FUSE_STATX;
1206 args.nodeid = get_node_id(inode);
1207 args.in_numargs = 1;
1208 args.in_args[0].size = sizeof(inarg);
1209 args.in_args[0].value = &inarg;
1210 args.out_numargs = 1;
1211 args.out_args[0].size = sizeof(outarg);
1212 args.out_args[0].value = &outarg;
1213 err = fuse_simple_request(fm, &args);
1218 if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
1219 ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
1220 inode_wrong_type(inode, sx->mode)))) {
1221 fuse_make_bad(inode);
1225 fuse_statx_to_attr(&outarg.stat, &attr);
1226 if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) {
1227 fuse_change_attributes(inode, &attr, &outarg.stat,
1228 ATTR_TIMEOUT(&outarg), attr_version);
1232 stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME);
1233 stat->btime.tv_sec = sx->btime.tv_sec;
1234 stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
1235 fuse_fillattr(inode, &attr, stat);
1236 stat->result_mask |= STATX_TYPE;
1242 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
1246 struct fuse_getattr_in inarg;
1247 struct fuse_attr_out outarg;
1248 struct fuse_mount *fm = get_fuse_mount(inode);
1252 attr_version = fuse_get_attr_version(fm->fc);
1254 memset(&inarg, 0, sizeof(inarg));
1255 memset(&outarg, 0, sizeof(outarg));
1256 /* Directories have separate file-handle space */
1257 if (file && S_ISREG(inode->i_mode)) {
1258 struct fuse_file *ff = file->private_data;
1260 inarg.getattr_flags |= FUSE_GETATTR_FH;
1263 args.opcode = FUSE_GETATTR;
1264 args.nodeid = get_node_id(inode);
1265 args.in_numargs = 1;
1266 args.in_args[0].size = sizeof(inarg);
1267 args.in_args[0].value = &inarg;
1268 args.out_numargs = 1;
1269 args.out_args[0].size = sizeof(outarg);
1270 args.out_args[0].value = &outarg;
1271 err = fuse_simple_request(fm, &args);
1273 if (fuse_invalid_attr(&outarg.attr) ||
1274 inode_wrong_type(inode, outarg.attr.mode)) {
1275 fuse_make_bad(inode);
1278 fuse_change_attributes(inode, &outarg.attr, NULL,
1279 ATTR_TIMEOUT(&outarg),
1282 fuse_fillattr(inode, &outarg.attr, stat);
1288 static int fuse_update_get_attr(struct inode *inode, struct file *file,
1289 struct kstat *stat, u32 request_mask,
1292 struct fuse_inode *fi = get_fuse_inode(inode);
1293 struct fuse_conn *fc = get_fuse_conn(inode);
1296 u32 inval_mask = READ_ONCE(fi->inval_mask);
1297 u32 cache_mask = fuse_get_cache_mask(inode);
1300 /* FUSE only supports basic stats and possibly btime */
1301 request_mask &= STATX_BASIC_STATS | STATX_BTIME;
1304 request_mask &= STATX_BASIC_STATS;
1308 else if (flags & AT_STATX_FORCE_SYNC)
1310 else if (flags & AT_STATX_DONT_SYNC)
1312 else if (request_mask & inval_mask & ~cache_mask)
1315 sync = time_before64(fi->i_time, get_jiffies_64());
1318 forget_all_cached_acls(inode);
1319 /* Try statx if BTIME is requested */
1320 if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) {
1321 err = fuse_do_statx(inode, file, stat);
1322 if (err == -ENOSYS) {
1328 err = fuse_do_getattr(inode, stat, file);
1331 generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
1332 stat->mode = fi->orig_i_mode;
1333 stat->ino = fi->orig_ino;
1334 if (test_bit(FUSE_I_BTIME, &fi->state)) {
1335 stat->btime = fi->i_btime;
1336 stat->result_mask |= STATX_BTIME;
1343 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask)
1345 return fuse_update_get_attr(inode, file, NULL, mask, 0);
1348 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1349 u64 child_nodeid, struct qstr *name, u32 flags)
1352 struct inode *parent;
1354 struct dentry *entry;
1356 parent = fuse_ilookup(fc, parent_nodeid, NULL);
1360 inode_lock_nested(parent, I_MUTEX_PARENT);
1361 if (!S_ISDIR(parent->i_mode))
1365 dir = d_find_alias(parent);
1369 name->hash = full_name_hash(dir, name->name, name->len);
1370 entry = d_lookup(dir, name);
1375 fuse_dir_changed(parent);
1376 if (!(flags & FUSE_EXPIRE_ONLY))
1377 d_invalidate(entry);
1378 fuse_invalidate_entry_cache(entry);
1380 if (child_nodeid != 0 && d_really_is_positive(entry)) {
1381 inode_lock(d_inode(entry));
1382 if (get_node_id(d_inode(entry)) != child_nodeid) {
1386 if (d_mountpoint(entry)) {
1390 if (d_is_dir(entry)) {
1391 shrink_dcache_parent(entry);
1392 if (!simple_empty(entry)) {
1396 d_inode(entry)->i_flags |= S_DEAD;
1399 clear_nlink(d_inode(entry));
1402 inode_unlock(d_inode(entry));
1411 inode_unlock(parent);
1416 static inline bool fuse_permissible_uidgid(struct fuse_conn *fc)
1418 const struct cred *cred = current_cred();
1420 return (uid_eq(cred->euid, fc->user_id) &&
1421 uid_eq(cred->suid, fc->user_id) &&
1422 uid_eq(cred->uid, fc->user_id) &&
1423 gid_eq(cred->egid, fc->group_id) &&
1424 gid_eq(cred->sgid, fc->group_id) &&
1425 gid_eq(cred->gid, fc->group_id));
1429 * Calling into a user-controlled filesystem gives the filesystem
1430 * daemon ptrace-like capabilities over the current process. This
1431 * means, that the filesystem daemon is able to record the exact
1432 * filesystem operations performed, and can also control the behavior
1433 * of the requester process in otherwise impossible ways. For example
1434 * it can delay the operation for arbitrary length of time allowing
1435 * DoS against the requester.
1437 * For this reason only those processes can call into the filesystem,
1438 * for which the owner of the mount has ptrace privilege. This
1439 * excludes processes started by other users, suid or sgid processes.
1441 bool fuse_allow_current_process(struct fuse_conn *fc)
1445 if (fc->allow_other)
1446 allow = current_in_userns(fc->user_ns);
1448 allow = fuse_permissible_uidgid(fc);
1450 if (!allow && allow_sys_admin_access && capable(CAP_SYS_ADMIN))
1456 static int fuse_access(struct inode *inode, int mask)
1458 struct fuse_mount *fm = get_fuse_mount(inode);
1460 struct fuse_access_in inarg;
1463 BUG_ON(mask & MAY_NOT_BLOCK);
1465 if (fm->fc->no_access)
1468 memset(&inarg, 0, sizeof(inarg));
1469 inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1470 args.opcode = FUSE_ACCESS;
1471 args.nodeid = get_node_id(inode);
1472 args.in_numargs = 1;
1473 args.in_args[0].size = sizeof(inarg);
1474 args.in_args[0].value = &inarg;
1475 err = fuse_simple_request(fm, &args);
1476 if (err == -ENOSYS) {
1477 fm->fc->no_access = 1;
1483 static int fuse_perm_getattr(struct inode *inode, int mask)
1485 if (mask & MAY_NOT_BLOCK)
1488 forget_all_cached_acls(inode);
1489 return fuse_do_getattr(inode, NULL, NULL);
1493 * Check permission. The two basic access models of FUSE are:
1495 * 1) Local access checking ('default_permissions' mount option) based
1496 * on file mode. This is the plain old disk filesystem permission
1499 * 2) "Remote" access checking, where server is responsible for
1500 * checking permission in each inode operation. An exception to this
1501 * is if ->permission() was invoked from sys_access() in which case an
1502 * access request is sent. Execute permission is still checked
1503 * locally based on file mode.
1505 static int fuse_permission(struct mnt_idmap *idmap,
1506 struct inode *inode, int mask)
1508 struct fuse_conn *fc = get_fuse_conn(inode);
1509 bool refreshed = false;
1512 if (fuse_is_bad(inode))
1515 if (!fuse_allow_current_process(fc))
1519 * If attributes are needed, refresh them before proceeding
1521 if (fc->default_permissions ||
1522 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1523 struct fuse_inode *fi = get_fuse_inode(inode);
1524 u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1526 if (perm_mask & READ_ONCE(fi->inval_mask) ||
1527 time_before64(fi->i_time, get_jiffies_64())) {
1530 err = fuse_perm_getattr(inode, mask);
1536 if (fc->default_permissions) {
1537 err = generic_permission(&nop_mnt_idmap, inode, mask);
1539 /* If permission is denied, try to refresh file
1540 attributes. This is also needed, because the root
1541 node will at first have no permissions */
1542 if (err == -EACCES && !refreshed) {
1543 err = fuse_perm_getattr(inode, mask);
1545 err = generic_permission(&nop_mnt_idmap,
1549 /* Note: the opposite of the above test does not
1550 exist. So if permissions are revoked this won't be
1551 noticed immediately, only after the attribute
1552 timeout has expired */
1553 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1554 err = fuse_access(inode, mask);
1555 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1556 if (!(inode->i_mode & S_IXUGO)) {
1560 err = fuse_perm_getattr(inode, mask);
1561 if (!err && !(inode->i_mode & S_IXUGO))
1568 static int fuse_readlink_page(struct inode *inode, struct page *page)
1570 struct fuse_mount *fm = get_fuse_mount(inode);
1571 struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
1572 struct fuse_args_pages ap = {
1580 ap.args.opcode = FUSE_READLINK;
1581 ap.args.nodeid = get_node_id(inode);
1582 ap.args.out_pages = true;
1583 ap.args.out_argvar = true;
1584 ap.args.page_zeroing = true;
1585 ap.args.out_numargs = 1;
1586 ap.args.out_args[0].size = desc.length;
1587 res = fuse_simple_request(fm, &ap.args);
1589 fuse_invalidate_atime(inode);
1594 if (WARN_ON(res >= PAGE_SIZE))
1597 link = page_address(page);
1603 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1604 struct delayed_call *callback)
1606 struct fuse_conn *fc = get_fuse_conn(inode);
1611 if (fuse_is_bad(inode))
1614 if (fc->cache_symlinks)
1615 return page_get_link(dentry, inode, callback);
1621 page = alloc_page(GFP_KERNEL);
1626 err = fuse_readlink_page(inode, page);
1632 set_delayed_call(callback, page_put_link, page);
1634 return page_address(page);
1637 return ERR_PTR(err);
1640 static int fuse_dir_open(struct inode *inode, struct file *file)
1642 struct fuse_mount *fm = get_fuse_mount(inode);
1645 if (fuse_is_bad(inode))
1648 err = generic_file_open(inode, file);
1652 err = fuse_do_open(fm, get_node_id(inode), file, true);
1654 struct fuse_file *ff = file->private_data;
1657 * Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for
1658 * directories for backward compatibility, though it's unlikely
1661 if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
1662 nonseekable_open(inode, file);
1668 static int fuse_dir_release(struct inode *inode, struct file *file)
1670 fuse_release_common(file, true);
1675 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1678 struct inode *inode = file->f_mapping->host;
1679 struct fuse_conn *fc = get_fuse_conn(inode);
1682 if (fuse_is_bad(inode))
1685 if (fc->no_fsyncdir)
1689 err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1690 if (err == -ENOSYS) {
1691 fc->no_fsyncdir = 1;
1694 inode_unlock(inode);
1699 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1702 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1704 /* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1708 return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1711 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1714 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1719 return fuse_ioctl_common(file, cmd, arg,
1720 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1723 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1725 /* Always update if mtime is explicitly set */
1726 if (ivalid & ATTR_MTIME_SET)
1729 /* Or if kernel i_mtime is the official one */
1730 if (trust_local_mtime)
1733 /* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1734 if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1737 /* In all other cases update */
1741 static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
1742 struct fuse_setattr_in *arg, bool trust_local_cmtime)
1744 unsigned ivalid = iattr->ia_valid;
1746 if (ivalid & ATTR_MODE)
1747 arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
1748 if (ivalid & ATTR_UID)
1749 arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
1750 if (ivalid & ATTR_GID)
1751 arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
1752 if (ivalid & ATTR_SIZE)
1753 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
1754 if (ivalid & ATTR_ATIME) {
1755 arg->valid |= FATTR_ATIME;
1756 arg->atime = iattr->ia_atime.tv_sec;
1757 arg->atimensec = iattr->ia_atime.tv_nsec;
1758 if (!(ivalid & ATTR_ATIME_SET))
1759 arg->valid |= FATTR_ATIME_NOW;
1761 if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1762 arg->valid |= FATTR_MTIME;
1763 arg->mtime = iattr->ia_mtime.tv_sec;
1764 arg->mtimensec = iattr->ia_mtime.tv_nsec;
1765 if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1766 arg->valid |= FATTR_MTIME_NOW;
1768 if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1769 arg->valid |= FATTR_CTIME;
1770 arg->ctime = iattr->ia_ctime.tv_sec;
1771 arg->ctimensec = iattr->ia_ctime.tv_nsec;
1776 * Prevent concurrent writepages on inode
1778 * This is done by adding a negative bias to the inode write counter
1779 * and waiting for all pending writes to finish.
1781 void fuse_set_nowrite(struct inode *inode)
1783 struct fuse_inode *fi = get_fuse_inode(inode);
1785 BUG_ON(!inode_is_locked(inode));
1787 spin_lock(&fi->lock);
1788 BUG_ON(fi->writectr < 0);
1789 fi->writectr += FUSE_NOWRITE;
1790 spin_unlock(&fi->lock);
1791 wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1795 * Allow writepages on inode
1797 * Remove the bias from the writecounter and send any queued
1800 static void __fuse_release_nowrite(struct inode *inode)
1802 struct fuse_inode *fi = get_fuse_inode(inode);
1804 BUG_ON(fi->writectr != FUSE_NOWRITE);
1806 fuse_flush_writepages(inode);
1809 void fuse_release_nowrite(struct inode *inode)
1811 struct fuse_inode *fi = get_fuse_inode(inode);
1813 spin_lock(&fi->lock);
1814 __fuse_release_nowrite(inode);
1815 spin_unlock(&fi->lock);
1818 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1819 struct inode *inode,
1820 struct fuse_setattr_in *inarg_p,
1821 struct fuse_attr_out *outarg_p)
1823 args->opcode = FUSE_SETATTR;
1824 args->nodeid = get_node_id(inode);
1825 args->in_numargs = 1;
1826 args->in_args[0].size = sizeof(*inarg_p);
1827 args->in_args[0].value = inarg_p;
1828 args->out_numargs = 1;
1829 args->out_args[0].size = sizeof(*outarg_p);
1830 args->out_args[0].value = outarg_p;
1834 * Flush inode->i_mtime to the server
1836 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1838 struct fuse_mount *fm = get_fuse_mount(inode);
1840 struct fuse_setattr_in inarg;
1841 struct fuse_attr_out outarg;
1843 memset(&inarg, 0, sizeof(inarg));
1844 memset(&outarg, 0, sizeof(outarg));
1846 inarg.valid = FATTR_MTIME;
1847 inarg.mtime = inode_get_mtime_sec(inode);
1848 inarg.mtimensec = inode_get_mtime_nsec(inode);
1849 if (fm->fc->minor >= 23) {
1850 inarg.valid |= FATTR_CTIME;
1851 inarg.ctime = inode_get_ctime_sec(inode);
1852 inarg.ctimensec = inode_get_ctime_nsec(inode);
1855 inarg.valid |= FATTR_FH;
1858 fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
1860 return fuse_simple_request(fm, &args);
1864 * Set attributes, and at the same time refresh them.
1866 * Truncation is slightly complicated, because the 'truncate' request
1867 * may fail, in which case we don't want to touch the mapping.
1868 * vmtruncate() doesn't allow for this case, so do the rlimit checking
1869 * and the actual truncation by hand.
1871 int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
1874 struct inode *inode = d_inode(dentry);
1875 struct fuse_mount *fm = get_fuse_mount(inode);
1876 struct fuse_conn *fc = fm->fc;
1877 struct fuse_inode *fi = get_fuse_inode(inode);
1878 struct address_space *mapping = inode->i_mapping;
1880 struct fuse_setattr_in inarg;
1881 struct fuse_attr_out outarg;
1882 bool is_truncate = false;
1883 bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode);
1886 bool trust_local_cmtime = is_wb;
1887 bool fault_blocked = false;
1889 if (!fc->default_permissions)
1890 attr->ia_valid |= ATTR_FORCE;
1892 err = setattr_prepare(&nop_mnt_idmap, dentry, attr);
1896 if (attr->ia_valid & ATTR_SIZE) {
1897 if (WARN_ON(!S_ISREG(inode->i_mode)))
1902 if (FUSE_IS_DAX(inode) && is_truncate) {
1903 filemap_invalidate_lock(mapping);
1904 fault_blocked = true;
1905 err = fuse_dax_break_layouts(inode, 0, 0);
1907 filemap_invalidate_unlock(mapping);
1912 if (attr->ia_valid & ATTR_OPEN) {
1913 /* This is coming from open(..., ... | O_TRUNC); */
1914 WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1915 WARN_ON(attr->ia_size != 0);
1916 if (fc->atomic_o_trunc) {
1918 * No need to send request to userspace, since actual
1919 * truncation has already been done by OPEN. But still
1920 * need to truncate page cache.
1922 i_size_write(inode, 0);
1923 truncate_pagecache(inode, 0);
1929 /* Flush dirty data/metadata before non-truncate SETATTR */
1932 (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
1934 err = write_inode_now(inode, true);
1938 fuse_set_nowrite(inode);
1939 fuse_release_nowrite(inode);
1943 fuse_set_nowrite(inode);
1944 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1945 if (trust_local_cmtime && attr->ia_size != inode->i_size)
1946 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1949 memset(&inarg, 0, sizeof(inarg));
1950 memset(&outarg, 0, sizeof(outarg));
1951 iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
1953 struct fuse_file *ff = file->private_data;
1954 inarg.valid |= FATTR_FH;
1958 /* Kill suid/sgid for non-directory chown unconditionally */
1959 if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
1960 attr->ia_valid & (ATTR_UID | ATTR_GID))
1961 inarg.valid |= FATTR_KILL_SUIDGID;
1963 if (attr->ia_valid & ATTR_SIZE) {
1964 /* For mandatory locking in truncate */
1965 inarg.valid |= FATTR_LOCKOWNER;
1966 inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1968 /* Kill suid/sgid for truncate only if no CAP_FSETID */
1969 if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
1970 inarg.valid |= FATTR_KILL_SUIDGID;
1972 fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1973 err = fuse_simple_request(fm, &args);
1976 fuse_invalidate_attr(inode);
1980 if (fuse_invalid_attr(&outarg.attr) ||
1981 inode_wrong_type(inode, outarg.attr.mode)) {
1982 fuse_make_bad(inode);
1987 spin_lock(&fi->lock);
1988 /* the kernel maintains i_mtime locally */
1989 if (trust_local_cmtime) {
1990 if (attr->ia_valid & ATTR_MTIME)
1991 inode_set_mtime_to_ts(inode, attr->ia_mtime);
1992 if (attr->ia_valid & ATTR_CTIME)
1993 inode_set_ctime_to_ts(inode, attr->ia_ctime);
1994 /* FIXME: clear I_DIRTY_SYNC? */
1997 fuse_change_attributes_common(inode, &outarg.attr, NULL,
1998 ATTR_TIMEOUT(&outarg),
1999 fuse_get_cache_mask(inode));
2000 oldsize = inode->i_size;
2001 /* see the comment in fuse_change_attributes() */
2002 if (!is_wb || is_truncate)
2003 i_size_write(inode, outarg.attr.size);
2006 /* NOTE: this may release/reacquire fi->lock */
2007 __fuse_release_nowrite(inode);
2009 spin_unlock(&fi->lock);
2012 * Only call invalidate_inode_pages2() after removing
2013 * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock.
2015 if ((is_truncate || !is_wb) &&
2016 S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
2017 truncate_pagecache(inode, outarg.attr.size);
2018 invalidate_inode_pages2(mapping);
2021 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2024 filemap_invalidate_unlock(mapping);
2030 fuse_release_nowrite(inode);
2032 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2035 filemap_invalidate_unlock(mapping);
2039 static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry,
2042 struct inode *inode = d_inode(entry);
2043 struct fuse_conn *fc = get_fuse_conn(inode);
2044 struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
2047 if (fuse_is_bad(inode))
2050 if (!fuse_allow_current_process(get_fuse_conn(inode)))
2053 if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
2054 attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
2058 * The only sane way to reliably kill suid/sgid is to do it in
2059 * the userspace filesystem
2061 * This should be done on write(), truncate() and chown().
2063 if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
2065 * ia_mode calculation may have used stale i_mode.
2066 * Refresh and recalculate.
2068 ret = fuse_do_getattr(inode, NULL, file);
2072 attr->ia_mode = inode->i_mode;
2073 if (inode->i_mode & S_ISUID) {
2074 attr->ia_valid |= ATTR_MODE;
2075 attr->ia_mode &= ~S_ISUID;
2077 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
2078 attr->ia_valid |= ATTR_MODE;
2079 attr->ia_mode &= ~S_ISGID;
2083 if (!attr->ia_valid)
2086 ret = fuse_do_setattr(entry, attr, file);
2089 * If filesystem supports acls it may have updated acl xattrs in
2090 * the filesystem, so forget cached acls for the inode.
2093 forget_all_cached_acls(inode);
2095 /* Directory mode changed, may need to revalidate access */
2096 if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
2097 fuse_invalidate_entry_cache(entry);
2102 static int fuse_getattr(struct mnt_idmap *idmap,
2103 const struct path *path, struct kstat *stat,
2104 u32 request_mask, unsigned int flags)
2106 struct inode *inode = d_inode(path->dentry);
2107 struct fuse_conn *fc = get_fuse_conn(inode);
2109 if (fuse_is_bad(inode))
2112 if (!fuse_allow_current_process(fc)) {
2113 if (!request_mask) {
2115 * If user explicitly requested *nothing* then don't
2116 * error out, but return st_dev only.
2118 stat->result_mask = 0;
2119 stat->dev = inode->i_sb->s_dev;
2125 return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
2128 static const struct inode_operations fuse_dir_inode_operations = {
2129 .lookup = fuse_lookup,
2130 .mkdir = fuse_mkdir,
2131 .symlink = fuse_symlink,
2132 .unlink = fuse_unlink,
2133 .rmdir = fuse_rmdir,
2134 .rename = fuse_rename2,
2136 .setattr = fuse_setattr,
2137 .create = fuse_create,
2138 .atomic_open = fuse_atomic_open,
2139 .tmpfile = fuse_tmpfile,
2140 .mknod = fuse_mknod,
2141 .permission = fuse_permission,
2142 .getattr = fuse_getattr,
2143 .listxattr = fuse_listxattr,
2144 .get_inode_acl = fuse_get_inode_acl,
2145 .get_acl = fuse_get_acl,
2146 .set_acl = fuse_set_acl,
2147 .fileattr_get = fuse_fileattr_get,
2148 .fileattr_set = fuse_fileattr_set,
2151 static const struct file_operations fuse_dir_operations = {
2152 .llseek = generic_file_llseek,
2153 .read = generic_read_dir,
2154 .iterate_shared = fuse_readdir,
2155 .open = fuse_dir_open,
2156 .release = fuse_dir_release,
2157 .fsync = fuse_dir_fsync,
2158 .unlocked_ioctl = fuse_dir_ioctl,
2159 .compat_ioctl = fuse_dir_compat_ioctl,
2162 static const struct inode_operations fuse_common_inode_operations = {
2163 .setattr = fuse_setattr,
2164 .permission = fuse_permission,
2165 .getattr = fuse_getattr,
2166 .listxattr = fuse_listxattr,
2167 .get_inode_acl = fuse_get_inode_acl,
2168 .get_acl = fuse_get_acl,
2169 .set_acl = fuse_set_acl,
2170 .fileattr_get = fuse_fileattr_get,
2171 .fileattr_set = fuse_fileattr_set,
2174 static const struct inode_operations fuse_symlink_inode_operations = {
2175 .setattr = fuse_setattr,
2176 .get_link = fuse_get_link,
2177 .getattr = fuse_getattr,
2178 .listxattr = fuse_listxattr,
2181 void fuse_init_common(struct inode *inode)
2183 inode->i_op = &fuse_common_inode_operations;
2186 void fuse_init_dir(struct inode *inode)
2188 struct fuse_inode *fi = get_fuse_inode(inode);
2190 inode->i_op = &fuse_dir_inode_operations;
2191 inode->i_fop = &fuse_dir_operations;
2193 spin_lock_init(&fi->rdc.lock);
2194 fi->rdc.cached = false;
2197 fi->rdc.version = 0;
2200 static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
2202 int err = fuse_readlink_page(folio->mapping->host, &folio->page);
2205 folio_mark_uptodate(folio);
2207 folio_unlock(folio);
2212 static const struct address_space_operations fuse_symlink_aops = {
2213 .read_folio = fuse_symlink_read_folio,
2216 void fuse_init_symlink(struct inode *inode)
2218 inode->i_op = &fuse_symlink_inode_operations;
2219 inode->i_data.a_ops = &fuse_symlink_aops;
2220 inode_nohighmem(inode);