2 FUSE: Filesystem in Userspace
5 This program can be distributed under the terms of the GNU GPL.
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/fs_context.h>
14 #include <linux/moduleparam.h>
15 #include <linux/sched.h>
16 #include <linux/namei.h>
17 #include <linux/slab.h>
18 #include <linux/xattr.h>
19 #include <linux/iversion.h>
20 #include <linux/posix_acl.h>
21 #include <linux/security.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
25 static bool __read_mostly allow_sys_admin_access;
26 module_param(allow_sys_admin_access, bool, 0644);
27 MODULE_PARM_DESC(allow_sys_admin_access,
28 "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check");
30 static void fuse_advise_use_readdirplus(struct inode *dir)
32 struct fuse_inode *fi = get_fuse_inode(dir);
34 set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
37 #if BITS_PER_LONG >= 64
38 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
40 entry->d_fsdata = (void *) time;
43 static inline u64 fuse_dentry_time(const struct dentry *entry)
45 return (u64)entry->d_fsdata;
54 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
56 ((union fuse_dentry *) dentry->d_fsdata)->time = time;
59 static inline u64 fuse_dentry_time(const struct dentry *entry)
61 return ((union fuse_dentry *) entry->d_fsdata)->time;
65 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
67 struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
68 bool delete = !time && fc->delete_stale;
70 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
71 * Don't care about races, either way it's just an optimization
73 if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
74 (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
75 spin_lock(&dentry->d_lock);
77 dentry->d_flags &= ~DCACHE_OP_DELETE;
79 dentry->d_flags |= DCACHE_OP_DELETE;
80 spin_unlock(&dentry->d_lock);
83 __fuse_dentry_settime(dentry, time);
87 * FUSE caches dentries and attributes with separate timeout. The
88 * time in jiffies until the dentry/attributes are valid is stored in
89 * dentry->d_fsdata and fuse_inode->i_time respectively.
93 * Calculate the time in jiffies until a dentry/attributes are valid
95 u64 fuse_time_to_jiffies(u64 sec, u32 nsec)
98 struct timespec64 ts = {
100 min_t(u32, nsec, NSEC_PER_SEC - 1)
103 return get_jiffies_64() + timespec64_to_jiffies(&ts);
109 * Set dentry and possibly attribute timeouts from the lookup/mk*
112 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
114 fuse_dentry_settime(entry,
115 fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
118 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
120 set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
124 * Mark the attributes as stale, so that at the next call to
125 * ->getattr() they will be fetched from userspace
127 void fuse_invalidate_attr(struct inode *inode)
129 fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
132 static void fuse_dir_changed(struct inode *dir)
134 fuse_invalidate_attr(dir);
135 inode_maybe_inc_iversion(dir, false);
139 * Mark the attributes as stale due to an atime change. Avoid the invalidate if
142 void fuse_invalidate_atime(struct inode *inode)
144 if (!IS_RDONLY(inode))
145 fuse_invalidate_attr_mask(inode, STATX_ATIME);
149 * Just mark the entry as stale, so that a next attempt to look it up
150 * will result in a new lookup call to userspace
152 * This is called when a dentry is about to become negative and the
153 * timeout is unknown (unlink, rmdir, rename and in some cases
156 void fuse_invalidate_entry_cache(struct dentry *entry)
158 fuse_dentry_settime(entry, 0);
162 * Same as fuse_invalidate_entry_cache(), but also try to remove the
163 * dentry from the hash
165 static void fuse_invalidate_entry(struct dentry *entry)
168 fuse_invalidate_entry_cache(entry);
171 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
172 u64 nodeid, const struct qstr *name,
173 struct fuse_entry_out *outarg)
175 memset(outarg, 0, sizeof(struct fuse_entry_out));
176 args->opcode = FUSE_LOOKUP;
177 args->nodeid = nodeid;
178 args->in_numargs = 1;
179 args->in_args[0].size = name->len + 1;
180 args->in_args[0].value = name->name;
181 args->out_numargs = 1;
182 args->out_args[0].size = sizeof(struct fuse_entry_out);
183 args->out_args[0].value = outarg;
187 * Check whether the dentry is still valid
189 * If the entry validity timeout has expired and the dentry is
190 * positive, try to redo the lookup. If the lookup results in a
191 * different inode, then let the VFS invalidate the dentry and redo
192 * the lookup once more. If the lookup results in the same inode,
193 * then refresh the attributes, timeouts and mark the dentry valid.
195 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
198 struct dentry *parent;
199 struct fuse_mount *fm;
200 struct fuse_inode *fi;
203 inode = d_inode_rcu(entry);
204 if (inode && fuse_is_bad(inode))
206 else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
207 (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
208 struct fuse_entry_out outarg;
210 struct fuse_forget_link *forget;
213 /* For negative dentries, always do a fresh lookup */
218 if (flags & LOOKUP_RCU)
221 fm = get_fuse_mount(inode);
223 forget = fuse_alloc_forget();
228 attr_version = fuse_get_attr_version(fm->fc);
230 parent = dget_parent(entry);
231 fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)),
232 &entry->d_name, &outarg);
233 ret = fuse_simple_request(fm, &args);
235 /* Zero nodeid is same as -ENOENT */
236 if (!ret && !outarg.nodeid)
239 fi = get_fuse_inode(inode);
240 if (outarg.nodeid != get_node_id(inode) ||
241 (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
242 fuse_queue_forget(fm->fc, forget,
246 spin_lock(&fi->lock);
248 spin_unlock(&fi->lock);
251 if (ret == -ENOMEM || ret == -EINTR)
253 if (ret || fuse_invalid_attr(&outarg.attr) ||
254 fuse_stale_inode(inode, outarg.generation, &outarg.attr))
257 forget_all_cached_acls(inode);
258 fuse_change_attributes(inode, &outarg.attr, NULL,
259 ATTR_TIMEOUT(&outarg),
261 fuse_change_entry_timeout(entry, &outarg);
263 fi = get_fuse_inode(inode);
264 if (flags & LOOKUP_RCU) {
265 if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
267 } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
268 parent = dget_parent(entry);
269 fuse_advise_use_readdirplus(d_inode(parent));
282 #if BITS_PER_LONG < 64
283 static int fuse_dentry_init(struct dentry *dentry)
285 dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
286 GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
288 return dentry->d_fsdata ? 0 : -ENOMEM;
290 static void fuse_dentry_release(struct dentry *dentry)
292 union fuse_dentry *fd = dentry->d_fsdata;
298 static int fuse_dentry_delete(const struct dentry *dentry)
300 return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
304 * Create a fuse_mount object with a new superblock (with path->dentry
305 * as the root), and return that mount so it can be auto-mounted on
308 static struct vfsmount *fuse_dentry_automount(struct path *path)
310 struct fs_context *fsc;
311 struct vfsmount *mnt;
312 struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
314 fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
316 return ERR_CAST(fsc);
318 /* Pass the FUSE inode of the mount for fuse_get_tree_submount() */
319 fsc->fs_private = mp_fi;
321 /* Create the submount */
330 const struct dentry_operations fuse_dentry_operations = {
331 .d_revalidate = fuse_dentry_revalidate,
332 .d_delete = fuse_dentry_delete,
333 #if BITS_PER_LONG < 64
334 .d_init = fuse_dentry_init,
335 .d_release = fuse_dentry_release,
337 .d_automount = fuse_dentry_automount,
340 const struct dentry_operations fuse_root_dentry_operations = {
341 #if BITS_PER_LONG < 64
342 .d_init = fuse_dentry_init,
343 .d_release = fuse_dentry_release,
347 int fuse_valid_type(int m)
349 return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
350 S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
353 static bool fuse_valid_size(u64 size)
355 return size <= LLONG_MAX;
358 bool fuse_invalid_attr(struct fuse_attr *attr)
360 return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
363 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
364 struct fuse_entry_out *outarg, struct inode **inode)
366 struct fuse_mount *fm = get_fuse_mount_super(sb);
368 struct fuse_forget_link *forget;
374 if (name->len > FUSE_NAME_MAX)
378 forget = fuse_alloc_forget();
383 attr_version = fuse_get_attr_version(fm->fc);
385 fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
386 err = fuse_simple_request(fm, &args);
387 /* Zero nodeid is same as -ENOENT, but with valid timeout */
388 if (err || !outarg->nodeid)
392 if (fuse_invalid_attr(&outarg->attr))
394 if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
395 pr_warn_once("root generation should be zero\n");
396 outarg->generation = 0;
399 *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
400 &outarg->attr, ATTR_TIMEOUT(outarg),
404 fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
415 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
419 struct fuse_entry_out outarg;
421 struct dentry *newent;
422 bool outarg_valid = true;
425 if (fuse_is_bad(dir))
426 return ERR_PTR(-EIO);
428 locked = fuse_lock_inode(dir);
429 err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
431 fuse_unlock_inode(dir, locked);
432 if (err == -ENOENT) {
433 outarg_valid = false;
440 if (inode && get_node_id(inode) == FUSE_ROOT_ID)
443 newent = d_splice_alias(inode, entry);
444 err = PTR_ERR(newent);
448 entry = newent ? newent : entry;
450 fuse_change_entry_timeout(entry, &outarg);
452 fuse_invalidate_entry_cache(entry);
455 fuse_advise_use_readdirplus(dir);
464 static int get_security_context(struct dentry *entry, umode_t mode,
465 struct fuse_in_arg *ext)
467 struct fuse_secctx *fctx;
468 struct fuse_secctx_header *header;
469 void *ctx = NULL, *ptr;
470 u32 ctxlen, total_len = sizeof(*header);
475 err = security_dentry_init_security(entry, mode, &entry->d_name,
476 &name, &ctx, &ctxlen);
478 if (err != -EOPNOTSUPP)
480 /* No LSM is supporting this security hook. Ignore error */
487 namelen = strlen(name) + 1;
489 if (WARN_ON(namelen > XATTR_NAME_MAX + 1 || ctxlen > S32_MAX))
491 total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen + ctxlen);
495 header = ptr = kzalloc(total_len, GFP_KERNEL);
499 header->nr_secctx = nr_ctx;
500 header->size = total_len;
501 ptr += sizeof(*header);
505 ptr += sizeof(*fctx);
510 memcpy(ptr, ctx, ctxlen);
512 ext->size = total_len;
520 static void *extend_arg(struct fuse_in_arg *buf, u32 bytes)
523 u32 newlen = buf->size + bytes;
525 p = krealloc(buf->value, newlen, GFP_KERNEL);
533 memset(p + buf->size, 0, bytes);
537 return p + newlen - bytes;
540 static u32 fuse_ext_size(size_t size)
542 return FUSE_REC_ALIGN(sizeof(struct fuse_ext_header) + size);
546 * This adds just a single supplementary group that matches the parent's group.
548 static int get_create_supp_group(struct inode *dir, struct fuse_in_arg *ext)
550 struct fuse_conn *fc = get_fuse_conn(dir);
551 struct fuse_ext_header *xh;
552 struct fuse_supp_groups *sg;
553 kgid_t kgid = dir->i_gid;
554 gid_t parent_gid = from_kgid(fc->user_ns, kgid);
555 u32 sg_len = fuse_ext_size(sizeof(*sg) + sizeof(sg->groups[0]));
557 if (parent_gid == (gid_t) -1 || gid_eq(kgid, current_fsgid()) ||
561 xh = extend_arg(ext, sg_len);
566 xh->type = FUSE_EXT_GROUPS;
568 sg = (struct fuse_supp_groups *) &xh[1];
570 sg->groups[0] = parent_gid;
575 static int get_create_ext(struct fuse_args *args,
576 struct inode *dir, struct dentry *dentry,
579 struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
580 struct fuse_in_arg ext = { .size = 0, .value = NULL };
583 if (fc->init_security)
584 err = get_security_context(dentry, mode, &ext);
585 if (!err && fc->create_supp_group)
586 err = get_create_supp_group(dir, &ext);
588 if (!err && ext.size) {
589 WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args));
591 args->ext_idx = args->in_numargs++;
592 args->in_args[args->ext_idx] = ext;
600 static void free_ext_value(struct fuse_args *args)
603 kfree(args->in_args[args->ext_idx].value);
607 * Atomic create+open operation
609 * If the filesystem doesn't support this, then fall back to separate
610 * 'mknod' + 'open' requests.
612 static int fuse_create_open(struct inode *dir, struct dentry *entry,
613 struct file *file, unsigned int flags,
614 umode_t mode, u32 opcode)
618 struct fuse_mount *fm = get_fuse_mount(dir);
620 struct fuse_forget_link *forget;
621 struct fuse_create_in inarg;
622 struct fuse_open_out *outopenp;
623 struct fuse_entry_out outentry;
624 struct fuse_inode *fi;
625 struct fuse_file *ff;
626 bool trunc = flags & O_TRUNC;
628 /* Userspace expects S_IFREG in create mode */
629 BUG_ON((mode & S_IFMT) != S_IFREG);
631 forget = fuse_alloc_forget();
637 ff = fuse_file_alloc(fm, true);
639 goto out_put_forget_req;
641 if (!fm->fc->dont_mask)
642 mode &= ~current_umask();
645 memset(&inarg, 0, sizeof(inarg));
646 memset(&outentry, 0, sizeof(outentry));
649 inarg.umask = current_umask();
651 if (fm->fc->handle_killpriv_v2 && trunc &&
652 !(flags & O_EXCL) && !capable(CAP_FSETID)) {
653 inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
656 args.opcode = opcode;
657 args.nodeid = get_node_id(dir);
659 args.in_args[0].size = sizeof(inarg);
660 args.in_args[0].value = &inarg;
661 args.in_args[1].size = entry->d_name.len + 1;
662 args.in_args[1].value = entry->d_name.name;
663 args.out_numargs = 2;
664 args.out_args[0].size = sizeof(outentry);
665 args.out_args[0].value = &outentry;
666 /* Store outarg for fuse_finish_open() */
667 outopenp = &ff->args->open_outarg;
668 args.out_args[1].size = sizeof(*outopenp);
669 args.out_args[1].value = outopenp;
671 err = get_create_ext(&args, dir, entry, mode);
673 goto out_put_forget_req;
675 err = fuse_simple_request(fm, &args);
676 free_ext_value(&args);
681 if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
682 fuse_invalid_attr(&outentry.attr))
685 ff->fh = outopenp->fh;
686 ff->nodeid = outentry.nodeid;
687 ff->open_flags = outopenp->open_flags;
688 inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
689 &outentry.attr, ATTR_TIMEOUT(&outentry), 0);
691 flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
692 fuse_sync_release(NULL, ff, flags);
693 fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
698 d_instantiate(entry, inode);
699 fuse_change_entry_timeout(entry, &outentry);
700 fuse_dir_changed(dir);
701 err = generic_file_open(inode, file);
703 file->private_data = ff;
704 err = finish_open(file, entry, fuse_finish_open);
707 fi = get_fuse_inode(inode);
708 fuse_sync_release(fi, ff, flags);
710 if (fm->fc->atomic_o_trunc && trunc)
711 truncate_pagecache(inode, 0);
712 else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
713 invalidate_inode_pages2(inode->i_mapping);
725 static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
727 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
728 struct file *file, unsigned flags,
732 struct fuse_conn *fc = get_fuse_conn(dir);
733 struct dentry *res = NULL;
735 if (fuse_is_bad(dir))
738 if (d_in_lookup(entry)) {
739 res = fuse_lookup(dir, entry, 0);
747 if (!(flags & O_CREAT) || d_really_is_positive(entry))
751 file->f_mode |= FMODE_CREATED;
756 err = fuse_create_open(dir, entry, file, flags, mode, FUSE_CREATE);
757 if (err == -ENOSYS) {
760 } else if (err == -EEXIST)
761 fuse_invalidate_entry(entry);
767 err = fuse_mknod(&nop_mnt_idmap, dir, entry, mode, 0);
771 return finish_no_open(file, res);
775 * Code shared between mknod, mkdir, symlink and link
777 static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
778 struct inode *dir, struct dentry *entry,
781 struct fuse_entry_out outarg;
785 struct fuse_forget_link *forget;
787 if (fuse_is_bad(dir))
790 forget = fuse_alloc_forget();
794 memset(&outarg, 0, sizeof(outarg));
795 args->nodeid = get_node_id(dir);
796 args->out_numargs = 1;
797 args->out_args[0].size = sizeof(outarg);
798 args->out_args[0].value = &outarg;
800 if (args->opcode != FUSE_LINK) {
801 err = get_create_ext(args, dir, entry, mode);
803 goto out_put_forget_req;
806 err = fuse_simple_request(fm, args);
807 free_ext_value(args);
809 goto out_put_forget_req;
812 if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
813 goto out_put_forget_req;
815 if ((outarg.attr.mode ^ mode) & S_IFMT)
816 goto out_put_forget_req;
818 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
819 &outarg.attr, ATTR_TIMEOUT(&outarg), 0);
821 fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
827 d = d_splice_alias(inode, entry);
832 fuse_change_entry_timeout(d, &outarg);
835 fuse_change_entry_timeout(entry, &outarg);
837 fuse_dir_changed(dir);
842 fuse_invalidate_entry(entry);
847 static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir,
848 struct dentry *entry, umode_t mode, dev_t rdev)
850 struct fuse_mknod_in inarg;
851 struct fuse_mount *fm = get_fuse_mount(dir);
854 if (!fm->fc->dont_mask)
855 mode &= ~current_umask();
857 memset(&inarg, 0, sizeof(inarg));
859 inarg.rdev = new_encode_dev(rdev);
860 inarg.umask = current_umask();
861 args.opcode = FUSE_MKNOD;
863 args.in_args[0].size = sizeof(inarg);
864 args.in_args[0].value = &inarg;
865 args.in_args[1].size = entry->d_name.len + 1;
866 args.in_args[1].value = entry->d_name.name;
867 return create_new_entry(fm, &args, dir, entry, mode);
870 static int fuse_create(struct mnt_idmap *idmap, struct inode *dir,
871 struct dentry *entry, umode_t mode, bool excl)
873 return fuse_mknod(&nop_mnt_idmap, dir, entry, mode, 0);
876 static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
877 struct file *file, umode_t mode)
879 struct fuse_conn *fc = get_fuse_conn(dir);
885 err = fuse_create_open(dir, file->f_path.dentry, file, file->f_flags, mode, FUSE_TMPFILE);
886 if (err == -ENOSYS) {
893 static int fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir,
894 struct dentry *entry, umode_t mode)
896 struct fuse_mkdir_in inarg;
897 struct fuse_mount *fm = get_fuse_mount(dir);
900 if (!fm->fc->dont_mask)
901 mode &= ~current_umask();
903 memset(&inarg, 0, sizeof(inarg));
905 inarg.umask = current_umask();
906 args.opcode = FUSE_MKDIR;
908 args.in_args[0].size = sizeof(inarg);
909 args.in_args[0].value = &inarg;
910 args.in_args[1].size = entry->d_name.len + 1;
911 args.in_args[1].value = entry->d_name.name;
912 return create_new_entry(fm, &args, dir, entry, S_IFDIR);
915 static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir,
916 struct dentry *entry, const char *link)
918 struct fuse_mount *fm = get_fuse_mount(dir);
919 unsigned len = strlen(link) + 1;
922 args.opcode = FUSE_SYMLINK;
924 args.in_args[0].size = entry->d_name.len + 1;
925 args.in_args[0].value = entry->d_name.name;
926 args.in_args[1].size = len;
927 args.in_args[1].value = link;
928 return create_new_entry(fm, &args, dir, entry, S_IFLNK);
931 void fuse_flush_time_update(struct inode *inode)
933 int err = sync_inode_metadata(inode, 1);
935 mapping_set_error(inode->i_mapping, err);
938 static void fuse_update_ctime_in_cache(struct inode *inode)
940 if (!IS_NOCMTIME(inode)) {
941 inode_set_ctime_current(inode);
942 mark_inode_dirty_sync(inode);
943 fuse_flush_time_update(inode);
947 void fuse_update_ctime(struct inode *inode)
949 fuse_invalidate_attr_mask(inode, STATX_CTIME);
950 fuse_update_ctime_in_cache(inode);
953 static void fuse_entry_unlinked(struct dentry *entry)
955 struct inode *inode = d_inode(entry);
956 struct fuse_conn *fc = get_fuse_conn(inode);
957 struct fuse_inode *fi = get_fuse_inode(inode);
959 spin_lock(&fi->lock);
960 fi->attr_version = atomic64_inc_return(&fc->attr_version);
962 * If i_nlink == 0 then unlink doesn't make sense, yet this can
963 * happen if userspace filesystem is careless. It would be
964 * difficult to enforce correct nlink usage so just ignore this
967 if (S_ISDIR(inode->i_mode))
969 else if (inode->i_nlink > 0)
971 spin_unlock(&fi->lock);
972 fuse_invalidate_entry_cache(entry);
973 fuse_update_ctime(inode);
976 static int fuse_unlink(struct inode *dir, struct dentry *entry)
979 struct fuse_mount *fm = get_fuse_mount(dir);
982 if (fuse_is_bad(dir))
985 args.opcode = FUSE_UNLINK;
986 args.nodeid = get_node_id(dir);
988 args.in_args[0].size = entry->d_name.len + 1;
989 args.in_args[0].value = entry->d_name.name;
990 err = fuse_simple_request(fm, &args);
992 fuse_dir_changed(dir);
993 fuse_entry_unlinked(entry);
994 } else if (err == -EINTR || err == -ENOENT)
995 fuse_invalidate_entry(entry);
999 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
1002 struct fuse_mount *fm = get_fuse_mount(dir);
1005 if (fuse_is_bad(dir))
1008 args.opcode = FUSE_RMDIR;
1009 args.nodeid = get_node_id(dir);
1010 args.in_numargs = 1;
1011 args.in_args[0].size = entry->d_name.len + 1;
1012 args.in_args[0].value = entry->d_name.name;
1013 err = fuse_simple_request(fm, &args);
1015 fuse_dir_changed(dir);
1016 fuse_entry_unlinked(entry);
1017 } else if (err == -EINTR || err == -ENOENT)
1018 fuse_invalidate_entry(entry);
1022 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
1023 struct inode *newdir, struct dentry *newent,
1024 unsigned int flags, int opcode, size_t argsize)
1027 struct fuse_rename2_in inarg;
1028 struct fuse_mount *fm = get_fuse_mount(olddir);
1031 memset(&inarg, 0, argsize);
1032 inarg.newdir = get_node_id(newdir);
1033 inarg.flags = flags;
1034 args.opcode = opcode;
1035 args.nodeid = get_node_id(olddir);
1036 args.in_numargs = 3;
1037 args.in_args[0].size = argsize;
1038 args.in_args[0].value = &inarg;
1039 args.in_args[1].size = oldent->d_name.len + 1;
1040 args.in_args[1].value = oldent->d_name.name;
1041 args.in_args[2].size = newent->d_name.len + 1;
1042 args.in_args[2].value = newent->d_name.name;
1043 err = fuse_simple_request(fm, &args);
1046 fuse_update_ctime(d_inode(oldent));
1048 if (flags & RENAME_EXCHANGE)
1049 fuse_update_ctime(d_inode(newent));
1051 fuse_dir_changed(olddir);
1052 if (olddir != newdir)
1053 fuse_dir_changed(newdir);
1055 /* newent will end up negative */
1056 if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
1057 fuse_entry_unlinked(newent);
1058 } else if (err == -EINTR || err == -ENOENT) {
1059 /* If request was interrupted, DEITY only knows if the
1060 rename actually took place. If the invalidation
1061 fails (e.g. some process has CWD under the renamed
1062 directory), then there can be inconsistency between
1063 the dcache and the real filesystem. Tough luck. */
1064 fuse_invalidate_entry(oldent);
1065 if (d_really_is_positive(newent))
1066 fuse_invalidate_entry(newent);
1072 static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir,
1073 struct dentry *oldent, struct inode *newdir,
1074 struct dentry *newent, unsigned int flags)
1076 struct fuse_conn *fc = get_fuse_conn(olddir);
1079 if (fuse_is_bad(olddir))
1082 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
1086 if (fc->no_rename2 || fc->minor < 23)
1089 err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
1091 sizeof(struct fuse_rename2_in));
1092 if (err == -ENOSYS) {
1097 err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
1099 sizeof(struct fuse_rename_in));
1105 static int fuse_link(struct dentry *entry, struct inode *newdir,
1106 struct dentry *newent)
1109 struct fuse_link_in inarg;
1110 struct inode *inode = d_inode(entry);
1111 struct fuse_mount *fm = get_fuse_mount(inode);
1114 memset(&inarg, 0, sizeof(inarg));
1115 inarg.oldnodeid = get_node_id(inode);
1116 args.opcode = FUSE_LINK;
1117 args.in_numargs = 2;
1118 args.in_args[0].size = sizeof(inarg);
1119 args.in_args[0].value = &inarg;
1120 args.in_args[1].size = newent->d_name.len + 1;
1121 args.in_args[1].value = newent->d_name.name;
1122 err = create_new_entry(fm, &args, newdir, newent, inode->i_mode);
1124 fuse_update_ctime_in_cache(inode);
1125 else if (err == -EINTR)
1126 fuse_invalidate_attr(inode);
1131 static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
1134 unsigned int blkbits;
1135 struct fuse_conn *fc = get_fuse_conn(inode);
1137 stat->dev = inode->i_sb->s_dev;
1138 stat->ino = attr->ino;
1139 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
1140 stat->nlink = attr->nlink;
1141 stat->uid = make_kuid(fc->user_ns, attr->uid);
1142 stat->gid = make_kgid(fc->user_ns, attr->gid);
1143 stat->rdev = inode->i_rdev;
1144 stat->atime.tv_sec = attr->atime;
1145 stat->atime.tv_nsec = attr->atimensec;
1146 stat->mtime.tv_sec = attr->mtime;
1147 stat->mtime.tv_nsec = attr->mtimensec;
1148 stat->ctime.tv_sec = attr->ctime;
1149 stat->ctime.tv_nsec = attr->ctimensec;
1150 stat->size = attr->size;
1151 stat->blocks = attr->blocks;
1153 if (attr->blksize != 0)
1154 blkbits = ilog2(attr->blksize);
1156 blkbits = inode->i_sb->s_blocksize_bits;
1158 stat->blksize = 1 << blkbits;
1161 static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr)
1163 memset(attr, 0, sizeof(*attr));
1164 attr->ino = sx->ino;
1165 attr->size = sx->size;
1166 attr->blocks = sx->blocks;
1167 attr->atime = sx->atime.tv_sec;
1168 attr->mtime = sx->mtime.tv_sec;
1169 attr->ctime = sx->ctime.tv_sec;
1170 attr->atimensec = sx->atime.tv_nsec;
1171 attr->mtimensec = sx->mtime.tv_nsec;
1172 attr->ctimensec = sx->ctime.tv_nsec;
1173 attr->mode = sx->mode;
1174 attr->nlink = sx->nlink;
1175 attr->uid = sx->uid;
1176 attr->gid = sx->gid;
1177 attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor));
1178 attr->blksize = sx->blksize;
1181 static int fuse_do_statx(struct inode *inode, struct file *file,
1185 struct fuse_attr attr;
1186 struct fuse_statx *sx;
1187 struct fuse_statx_in inarg;
1188 struct fuse_statx_out outarg;
1189 struct fuse_mount *fm = get_fuse_mount(inode);
1190 u64 attr_version = fuse_get_attr_version(fm->fc);
1193 memset(&inarg, 0, sizeof(inarg));
1194 memset(&outarg, 0, sizeof(outarg));
1195 /* Directories have separate file-handle space */
1196 if (file && S_ISREG(inode->i_mode)) {
1197 struct fuse_file *ff = file->private_data;
1199 inarg.getattr_flags |= FUSE_GETATTR_FH;
1202 /* For now leave sync hints as the default, request all stats. */
1204 inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME;
1205 args.opcode = FUSE_STATX;
1206 args.nodeid = get_node_id(inode);
1207 args.in_numargs = 1;
1208 args.in_args[0].size = sizeof(inarg);
1209 args.in_args[0].value = &inarg;
1210 args.out_numargs = 1;
1211 args.out_args[0].size = sizeof(outarg);
1212 args.out_args[0].value = &outarg;
1213 err = fuse_simple_request(fm, &args);
1218 if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
1219 ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
1220 inode_wrong_type(inode, sx->mode)))) {
1221 fuse_make_bad(inode);
1225 fuse_statx_to_attr(&outarg.stat, &attr);
1226 if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) {
1227 fuse_change_attributes(inode, &attr, &outarg.stat,
1228 ATTR_TIMEOUT(&outarg), attr_version);
1232 stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME);
1233 stat->btime.tv_sec = sx->btime.tv_sec;
1234 stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
1235 fuse_fillattr(inode, &attr, stat);
1236 stat->result_mask |= STATX_TYPE;
1242 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
1246 struct fuse_getattr_in inarg;
1247 struct fuse_attr_out outarg;
1248 struct fuse_mount *fm = get_fuse_mount(inode);
1252 attr_version = fuse_get_attr_version(fm->fc);
1254 memset(&inarg, 0, sizeof(inarg));
1255 memset(&outarg, 0, sizeof(outarg));
1256 /* Directories have separate file-handle space */
1257 if (file && S_ISREG(inode->i_mode)) {
1258 struct fuse_file *ff = file->private_data;
1260 inarg.getattr_flags |= FUSE_GETATTR_FH;
1263 args.opcode = FUSE_GETATTR;
1264 args.nodeid = get_node_id(inode);
1265 args.in_numargs = 1;
1266 args.in_args[0].size = sizeof(inarg);
1267 args.in_args[0].value = &inarg;
1268 args.out_numargs = 1;
1269 args.out_args[0].size = sizeof(outarg);
1270 args.out_args[0].value = &outarg;
1271 err = fuse_simple_request(fm, &args);
1273 if (fuse_invalid_attr(&outarg.attr) ||
1274 inode_wrong_type(inode, outarg.attr.mode)) {
1275 fuse_make_bad(inode);
1278 fuse_change_attributes(inode, &outarg.attr, NULL,
1279 ATTR_TIMEOUT(&outarg),
1282 fuse_fillattr(inode, &outarg.attr, stat);
1288 static int fuse_update_get_attr(struct inode *inode, struct file *file,
1289 struct kstat *stat, u32 request_mask,
1292 struct fuse_inode *fi = get_fuse_inode(inode);
1293 struct fuse_conn *fc = get_fuse_conn(inode);
1296 u32 inval_mask = READ_ONCE(fi->inval_mask);
1297 u32 cache_mask = fuse_get_cache_mask(inode);
1300 /* FUSE only supports basic stats and possibly btime */
1301 request_mask &= STATX_BASIC_STATS | STATX_BTIME;
1304 request_mask &= STATX_BASIC_STATS;
1308 else if (flags & AT_STATX_FORCE_SYNC)
1310 else if (flags & AT_STATX_DONT_SYNC)
1312 else if (request_mask & inval_mask & ~cache_mask)
1315 sync = time_before64(fi->i_time, get_jiffies_64());
1318 forget_all_cached_acls(inode);
1319 /* Try statx if BTIME is requested */
1320 if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) {
1321 err = fuse_do_statx(inode, file, stat);
1322 if (err == -ENOSYS) {
1327 err = fuse_do_getattr(inode, stat, file);
1330 generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
1331 stat->mode = fi->orig_i_mode;
1332 stat->ino = fi->orig_ino;
1333 if (test_bit(FUSE_I_BTIME, &fi->state)) {
1334 stat->btime = fi->i_btime;
1335 stat->result_mask |= STATX_BTIME;
1342 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask)
1344 return fuse_update_get_attr(inode, file, NULL, mask, 0);
1347 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1348 u64 child_nodeid, struct qstr *name, u32 flags)
1351 struct inode *parent;
1353 struct dentry *entry;
1355 parent = fuse_ilookup(fc, parent_nodeid, NULL);
1359 inode_lock_nested(parent, I_MUTEX_PARENT);
1360 if (!S_ISDIR(parent->i_mode))
1364 dir = d_find_alias(parent);
1368 name->hash = full_name_hash(dir, name->name, name->len);
1369 entry = d_lookup(dir, name);
1374 fuse_dir_changed(parent);
1375 if (!(flags & FUSE_EXPIRE_ONLY))
1376 d_invalidate(entry);
1377 fuse_invalidate_entry_cache(entry);
1379 if (child_nodeid != 0 && d_really_is_positive(entry)) {
1380 inode_lock(d_inode(entry));
1381 if (get_node_id(d_inode(entry)) != child_nodeid) {
1385 if (d_mountpoint(entry)) {
1389 if (d_is_dir(entry)) {
1390 shrink_dcache_parent(entry);
1391 if (!simple_empty(entry)) {
1395 d_inode(entry)->i_flags |= S_DEAD;
1398 clear_nlink(d_inode(entry));
1401 inode_unlock(d_inode(entry));
1410 inode_unlock(parent);
1415 static inline bool fuse_permissible_uidgid(struct fuse_conn *fc)
1417 const struct cred *cred = current_cred();
1419 return (uid_eq(cred->euid, fc->user_id) &&
1420 uid_eq(cred->suid, fc->user_id) &&
1421 uid_eq(cred->uid, fc->user_id) &&
1422 gid_eq(cred->egid, fc->group_id) &&
1423 gid_eq(cred->sgid, fc->group_id) &&
1424 gid_eq(cred->gid, fc->group_id));
1428 * Calling into a user-controlled filesystem gives the filesystem
1429 * daemon ptrace-like capabilities over the current process. This
1430 * means, that the filesystem daemon is able to record the exact
1431 * filesystem operations performed, and can also control the behavior
1432 * of the requester process in otherwise impossible ways. For example
1433 * it can delay the operation for arbitrary length of time allowing
1434 * DoS against the requester.
1436 * For this reason only those processes can call into the filesystem,
1437 * for which the owner of the mount has ptrace privilege. This
1438 * excludes processes started by other users, suid or sgid processes.
1440 bool fuse_allow_current_process(struct fuse_conn *fc)
1444 if (fc->allow_other)
1445 allow = current_in_userns(fc->user_ns);
1447 allow = fuse_permissible_uidgid(fc);
1449 if (!allow && allow_sys_admin_access && capable(CAP_SYS_ADMIN))
1455 static int fuse_access(struct inode *inode, int mask)
1457 struct fuse_mount *fm = get_fuse_mount(inode);
1459 struct fuse_access_in inarg;
1462 BUG_ON(mask & MAY_NOT_BLOCK);
1464 if (fm->fc->no_access)
1467 memset(&inarg, 0, sizeof(inarg));
1468 inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1469 args.opcode = FUSE_ACCESS;
1470 args.nodeid = get_node_id(inode);
1471 args.in_numargs = 1;
1472 args.in_args[0].size = sizeof(inarg);
1473 args.in_args[0].value = &inarg;
1474 err = fuse_simple_request(fm, &args);
1475 if (err == -ENOSYS) {
1476 fm->fc->no_access = 1;
1482 static int fuse_perm_getattr(struct inode *inode, int mask)
1484 if (mask & MAY_NOT_BLOCK)
1487 forget_all_cached_acls(inode);
1488 return fuse_do_getattr(inode, NULL, NULL);
1492 * Check permission. The two basic access models of FUSE are:
1494 * 1) Local access checking ('default_permissions' mount option) based
1495 * on file mode. This is the plain old disk filesystem permission
1498 * 2) "Remote" access checking, where server is responsible for
1499 * checking permission in each inode operation. An exception to this
1500 * is if ->permission() was invoked from sys_access() in which case an
1501 * access request is sent. Execute permission is still checked
1502 * locally based on file mode.
1504 static int fuse_permission(struct mnt_idmap *idmap,
1505 struct inode *inode, int mask)
1507 struct fuse_conn *fc = get_fuse_conn(inode);
1508 bool refreshed = false;
1511 if (fuse_is_bad(inode))
1514 if (!fuse_allow_current_process(fc))
1518 * If attributes are needed, refresh them before proceeding
1520 if (fc->default_permissions ||
1521 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1522 struct fuse_inode *fi = get_fuse_inode(inode);
1523 u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1525 if (perm_mask & READ_ONCE(fi->inval_mask) ||
1526 time_before64(fi->i_time, get_jiffies_64())) {
1529 err = fuse_perm_getattr(inode, mask);
1535 if (fc->default_permissions) {
1536 err = generic_permission(&nop_mnt_idmap, inode, mask);
1538 /* If permission is denied, try to refresh file
1539 attributes. This is also needed, because the root
1540 node will at first have no permissions */
1541 if (err == -EACCES && !refreshed) {
1542 err = fuse_perm_getattr(inode, mask);
1544 err = generic_permission(&nop_mnt_idmap,
1548 /* Note: the opposite of the above test does not
1549 exist. So if permissions are revoked this won't be
1550 noticed immediately, only after the attribute
1551 timeout has expired */
1552 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1553 err = fuse_access(inode, mask);
1554 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1555 if (!(inode->i_mode & S_IXUGO)) {
1559 err = fuse_perm_getattr(inode, mask);
1560 if (!err && !(inode->i_mode & S_IXUGO))
1567 static int fuse_readlink_page(struct inode *inode, struct page *page)
1569 struct fuse_mount *fm = get_fuse_mount(inode);
1570 struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
1571 struct fuse_args_pages ap = {
1579 ap.args.opcode = FUSE_READLINK;
1580 ap.args.nodeid = get_node_id(inode);
1581 ap.args.out_pages = true;
1582 ap.args.out_argvar = true;
1583 ap.args.page_zeroing = true;
1584 ap.args.out_numargs = 1;
1585 ap.args.out_args[0].size = desc.length;
1586 res = fuse_simple_request(fm, &ap.args);
1588 fuse_invalidate_atime(inode);
1593 if (WARN_ON(res >= PAGE_SIZE))
1596 link = page_address(page);
1602 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1603 struct delayed_call *callback)
1605 struct fuse_conn *fc = get_fuse_conn(inode);
1610 if (fuse_is_bad(inode))
1613 if (fc->cache_symlinks)
1614 return page_get_link(dentry, inode, callback);
1620 page = alloc_page(GFP_KERNEL);
1625 err = fuse_readlink_page(inode, page);
1631 set_delayed_call(callback, page_put_link, page);
1633 return page_address(page);
1636 return ERR_PTR(err);
1639 static int fuse_dir_open(struct inode *inode, struct file *file)
1641 struct fuse_mount *fm = get_fuse_mount(inode);
1644 if (fuse_is_bad(inode))
1647 err = generic_file_open(inode, file);
1651 err = fuse_do_open(fm, get_node_id(inode), file, true);
1653 struct fuse_file *ff = file->private_data;
1656 * Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for
1657 * directories for backward compatibility, though it's unlikely
1660 if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
1661 nonseekable_open(inode, file);
1667 static int fuse_dir_release(struct inode *inode, struct file *file)
1669 fuse_release_common(file, true);
1674 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1677 struct inode *inode = file->f_mapping->host;
1678 struct fuse_conn *fc = get_fuse_conn(inode);
1681 if (fuse_is_bad(inode))
1684 if (fc->no_fsyncdir)
1688 err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1689 if (err == -ENOSYS) {
1690 fc->no_fsyncdir = 1;
1693 inode_unlock(inode);
1698 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1701 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1703 /* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1707 return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1710 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1713 struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1718 return fuse_ioctl_common(file, cmd, arg,
1719 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1722 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1724 /* Always update if mtime is explicitly set */
1725 if (ivalid & ATTR_MTIME_SET)
1728 /* Or if kernel i_mtime is the official one */
1729 if (trust_local_mtime)
1732 /* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1733 if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1736 /* In all other cases update */
1740 static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
1741 struct fuse_setattr_in *arg, bool trust_local_cmtime)
1743 unsigned ivalid = iattr->ia_valid;
1745 if (ivalid & ATTR_MODE)
1746 arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
1747 if (ivalid & ATTR_UID)
1748 arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
1749 if (ivalid & ATTR_GID)
1750 arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
1751 if (ivalid & ATTR_SIZE)
1752 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
1753 if (ivalid & ATTR_ATIME) {
1754 arg->valid |= FATTR_ATIME;
1755 arg->atime = iattr->ia_atime.tv_sec;
1756 arg->atimensec = iattr->ia_atime.tv_nsec;
1757 if (!(ivalid & ATTR_ATIME_SET))
1758 arg->valid |= FATTR_ATIME_NOW;
1760 if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1761 arg->valid |= FATTR_MTIME;
1762 arg->mtime = iattr->ia_mtime.tv_sec;
1763 arg->mtimensec = iattr->ia_mtime.tv_nsec;
1764 if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1765 arg->valid |= FATTR_MTIME_NOW;
1767 if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1768 arg->valid |= FATTR_CTIME;
1769 arg->ctime = iattr->ia_ctime.tv_sec;
1770 arg->ctimensec = iattr->ia_ctime.tv_nsec;
1775 * Prevent concurrent writepages on inode
1777 * This is done by adding a negative bias to the inode write counter
1778 * and waiting for all pending writes to finish.
1780 void fuse_set_nowrite(struct inode *inode)
1782 struct fuse_inode *fi = get_fuse_inode(inode);
1784 BUG_ON(!inode_is_locked(inode));
1786 spin_lock(&fi->lock);
1787 BUG_ON(fi->writectr < 0);
1788 fi->writectr += FUSE_NOWRITE;
1789 spin_unlock(&fi->lock);
1790 wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1794 * Allow writepages on inode
1796 * Remove the bias from the writecounter and send any queued
1799 static void __fuse_release_nowrite(struct inode *inode)
1801 struct fuse_inode *fi = get_fuse_inode(inode);
1803 BUG_ON(fi->writectr != FUSE_NOWRITE);
1805 fuse_flush_writepages(inode);
1808 void fuse_release_nowrite(struct inode *inode)
1810 struct fuse_inode *fi = get_fuse_inode(inode);
1812 spin_lock(&fi->lock);
1813 __fuse_release_nowrite(inode);
1814 spin_unlock(&fi->lock);
1817 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1818 struct inode *inode,
1819 struct fuse_setattr_in *inarg_p,
1820 struct fuse_attr_out *outarg_p)
1822 args->opcode = FUSE_SETATTR;
1823 args->nodeid = get_node_id(inode);
1824 args->in_numargs = 1;
1825 args->in_args[0].size = sizeof(*inarg_p);
1826 args->in_args[0].value = inarg_p;
1827 args->out_numargs = 1;
1828 args->out_args[0].size = sizeof(*outarg_p);
1829 args->out_args[0].value = outarg_p;
1833 * Flush inode->i_mtime to the server
1835 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1837 struct fuse_mount *fm = get_fuse_mount(inode);
1839 struct fuse_setattr_in inarg;
1840 struct fuse_attr_out outarg;
1842 memset(&inarg, 0, sizeof(inarg));
1843 memset(&outarg, 0, sizeof(outarg));
1845 inarg.valid = FATTR_MTIME;
1846 inarg.mtime = inode_get_mtime_sec(inode);
1847 inarg.mtimensec = inode_get_mtime_nsec(inode);
1848 if (fm->fc->minor >= 23) {
1849 inarg.valid |= FATTR_CTIME;
1850 inarg.ctime = inode_get_ctime_sec(inode);
1851 inarg.ctimensec = inode_get_ctime_nsec(inode);
1854 inarg.valid |= FATTR_FH;
1857 fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
1859 return fuse_simple_request(fm, &args);
1863 * Set attributes, and at the same time refresh them.
1865 * Truncation is slightly complicated, because the 'truncate' request
1866 * may fail, in which case we don't want to touch the mapping.
1867 * vmtruncate() doesn't allow for this case, so do the rlimit checking
1868 * and the actual truncation by hand.
1870 int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
1873 struct inode *inode = d_inode(dentry);
1874 struct fuse_mount *fm = get_fuse_mount(inode);
1875 struct fuse_conn *fc = fm->fc;
1876 struct fuse_inode *fi = get_fuse_inode(inode);
1877 struct address_space *mapping = inode->i_mapping;
1879 struct fuse_setattr_in inarg;
1880 struct fuse_attr_out outarg;
1881 bool is_truncate = false;
1882 bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode);
1885 bool trust_local_cmtime = is_wb;
1886 bool fault_blocked = false;
1888 if (!fc->default_permissions)
1889 attr->ia_valid |= ATTR_FORCE;
1891 err = setattr_prepare(&nop_mnt_idmap, dentry, attr);
1895 if (attr->ia_valid & ATTR_SIZE) {
1896 if (WARN_ON(!S_ISREG(inode->i_mode)))
1901 if (FUSE_IS_DAX(inode) && is_truncate) {
1902 filemap_invalidate_lock(mapping);
1903 fault_blocked = true;
1904 err = fuse_dax_break_layouts(inode, 0, 0);
1906 filemap_invalidate_unlock(mapping);
1911 if (attr->ia_valid & ATTR_OPEN) {
1912 /* This is coming from open(..., ... | O_TRUNC); */
1913 WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1914 WARN_ON(attr->ia_size != 0);
1915 if (fc->atomic_o_trunc) {
1917 * No need to send request to userspace, since actual
1918 * truncation has already been done by OPEN. But still
1919 * need to truncate page cache.
1921 i_size_write(inode, 0);
1922 truncate_pagecache(inode, 0);
1928 /* Flush dirty data/metadata before non-truncate SETATTR */
1931 (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
1933 err = write_inode_now(inode, true);
1937 fuse_set_nowrite(inode);
1938 fuse_release_nowrite(inode);
1942 fuse_set_nowrite(inode);
1943 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1944 if (trust_local_cmtime && attr->ia_size != inode->i_size)
1945 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1948 memset(&inarg, 0, sizeof(inarg));
1949 memset(&outarg, 0, sizeof(outarg));
1950 iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
1952 struct fuse_file *ff = file->private_data;
1953 inarg.valid |= FATTR_FH;
1957 /* Kill suid/sgid for non-directory chown unconditionally */
1958 if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
1959 attr->ia_valid & (ATTR_UID | ATTR_GID))
1960 inarg.valid |= FATTR_KILL_SUIDGID;
1962 if (attr->ia_valid & ATTR_SIZE) {
1963 /* For mandatory locking in truncate */
1964 inarg.valid |= FATTR_LOCKOWNER;
1965 inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1967 /* Kill suid/sgid for truncate only if no CAP_FSETID */
1968 if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
1969 inarg.valid |= FATTR_KILL_SUIDGID;
1971 fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1972 err = fuse_simple_request(fm, &args);
1975 fuse_invalidate_attr(inode);
1979 if (fuse_invalid_attr(&outarg.attr) ||
1980 inode_wrong_type(inode, outarg.attr.mode)) {
1981 fuse_make_bad(inode);
1986 spin_lock(&fi->lock);
1987 /* the kernel maintains i_mtime locally */
1988 if (trust_local_cmtime) {
1989 if (attr->ia_valid & ATTR_MTIME)
1990 inode_set_mtime_to_ts(inode, attr->ia_mtime);
1991 if (attr->ia_valid & ATTR_CTIME)
1992 inode_set_ctime_to_ts(inode, attr->ia_ctime);
1993 /* FIXME: clear I_DIRTY_SYNC? */
1996 fuse_change_attributes_common(inode, &outarg.attr, NULL,
1997 ATTR_TIMEOUT(&outarg),
1998 fuse_get_cache_mask(inode));
1999 oldsize = inode->i_size;
2000 /* see the comment in fuse_change_attributes() */
2001 if (!is_wb || is_truncate)
2002 i_size_write(inode, outarg.attr.size);
2005 /* NOTE: this may release/reacquire fi->lock */
2006 __fuse_release_nowrite(inode);
2008 spin_unlock(&fi->lock);
2011 * Only call invalidate_inode_pages2() after removing
2012 * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock.
2014 if ((is_truncate || !is_wb) &&
2015 S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
2016 truncate_pagecache(inode, outarg.attr.size);
2017 invalidate_inode_pages2(mapping);
2020 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2023 filemap_invalidate_unlock(mapping);
2029 fuse_release_nowrite(inode);
2031 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
2034 filemap_invalidate_unlock(mapping);
2038 static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry,
2041 struct inode *inode = d_inode(entry);
2042 struct fuse_conn *fc = get_fuse_conn(inode);
2043 struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
2046 if (fuse_is_bad(inode))
2049 if (!fuse_allow_current_process(get_fuse_conn(inode)))
2052 if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
2053 attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
2057 * The only sane way to reliably kill suid/sgid is to do it in
2058 * the userspace filesystem
2060 * This should be done on write(), truncate() and chown().
2062 if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
2064 * ia_mode calculation may have used stale i_mode.
2065 * Refresh and recalculate.
2067 ret = fuse_do_getattr(inode, NULL, file);
2071 attr->ia_mode = inode->i_mode;
2072 if (inode->i_mode & S_ISUID) {
2073 attr->ia_valid |= ATTR_MODE;
2074 attr->ia_mode &= ~S_ISUID;
2076 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
2077 attr->ia_valid |= ATTR_MODE;
2078 attr->ia_mode &= ~S_ISGID;
2082 if (!attr->ia_valid)
2085 ret = fuse_do_setattr(entry, attr, file);
2088 * If filesystem supports acls it may have updated acl xattrs in
2089 * the filesystem, so forget cached acls for the inode.
2092 forget_all_cached_acls(inode);
2094 /* Directory mode changed, may need to revalidate access */
2095 if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
2096 fuse_invalidate_entry_cache(entry);
2101 static int fuse_getattr(struct mnt_idmap *idmap,
2102 const struct path *path, struct kstat *stat,
2103 u32 request_mask, unsigned int flags)
2105 struct inode *inode = d_inode(path->dentry);
2106 struct fuse_conn *fc = get_fuse_conn(inode);
2108 if (fuse_is_bad(inode))
2111 if (!fuse_allow_current_process(fc)) {
2112 if (!request_mask) {
2114 * If user explicitly requested *nothing* then don't
2115 * error out, but return st_dev only.
2117 stat->result_mask = 0;
2118 stat->dev = inode->i_sb->s_dev;
2124 return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
2127 static const struct inode_operations fuse_dir_inode_operations = {
2128 .lookup = fuse_lookup,
2129 .mkdir = fuse_mkdir,
2130 .symlink = fuse_symlink,
2131 .unlink = fuse_unlink,
2132 .rmdir = fuse_rmdir,
2133 .rename = fuse_rename2,
2135 .setattr = fuse_setattr,
2136 .create = fuse_create,
2137 .atomic_open = fuse_atomic_open,
2138 .tmpfile = fuse_tmpfile,
2139 .mknod = fuse_mknod,
2140 .permission = fuse_permission,
2141 .getattr = fuse_getattr,
2142 .listxattr = fuse_listxattr,
2143 .get_inode_acl = fuse_get_inode_acl,
2144 .get_acl = fuse_get_acl,
2145 .set_acl = fuse_set_acl,
2146 .fileattr_get = fuse_fileattr_get,
2147 .fileattr_set = fuse_fileattr_set,
2150 static const struct file_operations fuse_dir_operations = {
2151 .llseek = generic_file_llseek,
2152 .read = generic_read_dir,
2153 .iterate_shared = fuse_readdir,
2154 .open = fuse_dir_open,
2155 .release = fuse_dir_release,
2156 .fsync = fuse_dir_fsync,
2157 .unlocked_ioctl = fuse_dir_ioctl,
2158 .compat_ioctl = fuse_dir_compat_ioctl,
2161 static const struct inode_operations fuse_common_inode_operations = {
2162 .setattr = fuse_setattr,
2163 .permission = fuse_permission,
2164 .getattr = fuse_getattr,
2165 .listxattr = fuse_listxattr,
2166 .get_inode_acl = fuse_get_inode_acl,
2167 .get_acl = fuse_get_acl,
2168 .set_acl = fuse_set_acl,
2169 .fileattr_get = fuse_fileattr_get,
2170 .fileattr_set = fuse_fileattr_set,
2173 static const struct inode_operations fuse_symlink_inode_operations = {
2174 .setattr = fuse_setattr,
2175 .get_link = fuse_get_link,
2176 .getattr = fuse_getattr,
2177 .listxattr = fuse_listxattr,
2180 void fuse_init_common(struct inode *inode)
2182 inode->i_op = &fuse_common_inode_operations;
2185 void fuse_init_dir(struct inode *inode)
2187 struct fuse_inode *fi = get_fuse_inode(inode);
2189 inode->i_op = &fuse_dir_inode_operations;
2190 inode->i_fop = &fuse_dir_operations;
2192 spin_lock_init(&fi->rdc.lock);
2193 fi->rdc.cached = false;
2196 fi->rdc.version = 0;
2199 static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
2201 int err = fuse_readlink_page(folio->mapping->host, &folio->page);
2204 folio_mark_uptodate(folio);
2206 folio_unlock(folio);
2211 static const struct address_space_operations fuse_symlink_aops = {
2212 .read_folio = fuse_symlink_read_folio,
2215 void fuse_init_symlink(struct inode *inode)
2217 inode->i_op = &fuse_symlink_inode_operations;
2218 inode->i_data.a_ops = &fuse_symlink_aops;
2219 inode_nohighmem(inode);