3 * Copyright (C) 2011 Novell Inc.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
11 #include <linux/namei.h>
12 #include <linux/xattr.h>
13 #include <linux/security.h>
14 #include <linux/cred.h>
15 #include <linux/posix_acl.h>
16 #include <linux/posix_acl_xattr.h>
17 #include <linux/atomic.h>
18 #include "overlayfs.h"
20 void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
25 if (d_is_dir(wdentry))
26 err = ovl_do_rmdir(wdir, wdentry);
28 err = ovl_do_unlink(wdir, wdentry);
32 pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
37 struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
41 static atomic_t temp_id = ATOMIC_INIT(0);
43 /* counter is allowed to wrap, since temp dentries are ephemeral */
44 snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
46 temp = lookup_one_len(name, workdir, strlen(name));
47 if (!IS_ERR(temp) && temp->d_inode) {
48 pr_err("overlayfs: workdir/%s already exists\n", name);
56 /* caller holds i_mutex on workdir */
57 static struct dentry *ovl_whiteout(struct dentry *workdir,
58 struct dentry *dentry)
61 struct dentry *whiteout;
62 struct inode *wdir = workdir->d_inode;
64 whiteout = ovl_lookup_temp(workdir, dentry);
68 err = ovl_do_whiteout(wdir, whiteout);
71 whiteout = ERR_PTR(err);
77 int ovl_create_real(struct inode *dir, struct dentry *newdentry,
78 struct kstat *stat, const char *link,
79 struct dentry *hardlink, bool debug)
83 if (newdentry->d_inode)
87 err = ovl_do_link(hardlink, dir, newdentry, debug);
89 switch (stat->mode & S_IFMT) {
91 err = ovl_do_create(dir, newdentry, stat->mode, debug);
95 err = ovl_do_mkdir(dir, newdentry, stat->mode, debug);
102 err = ovl_do_mknod(dir, newdentry,
103 stat->mode, stat->rdev, debug);
107 err = ovl_do_symlink(dir, newdentry, link, debug);
114 if (!err && WARN_ON(!newdentry->d_inode)) {
116 * Not quite sure if non-instantiated dentry is legal or not.
117 * VFS doesn't seem to care so check and warn here.
124 static int ovl_set_opaque(struct dentry *upperdentry)
126 return ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0);
129 static void ovl_remove_opaque(struct dentry *upperdentry)
133 err = ovl_do_removexattr(upperdentry, OVL_XATTR_OPAQUE);
135 pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n",
136 upperdentry->d_name.name, err);
140 static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
144 enum ovl_path_type type;
145 struct path realpath;
146 const struct cred *old_cred;
148 type = ovl_path_real(dentry, &realpath);
149 old_cred = ovl_override_creds(dentry->d_sb);
150 err = vfs_getattr(&realpath, stat);
151 revert_creds(old_cred);
155 stat->dev = dentry->d_sb->s_dev;
156 stat->ino = dentry->d_inode->i_ino;
159 * It's probably not worth it to count subdirs to get the
160 * correct link count. nlink=1 seems to pacify 'find' and
163 if (OVL_TYPE_MERGE(type))
169 /* Common operations required to be done after creation of file on upper */
170 static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
171 struct dentry *newdentry, bool hardlink)
173 ovl_dentry_version_inc(dentry->d_parent);
174 ovl_dentry_update(dentry, newdentry);
176 ovl_inode_update(inode, d_inode(newdentry));
177 ovl_copyattr(newdentry->d_inode, inode);
179 WARN_ON(ovl_inode_real(inode, NULL) != d_inode(newdentry));
182 d_instantiate(dentry, inode);
185 static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
186 struct kstat *stat, const char *link,
187 struct dentry *hardlink)
189 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
190 struct inode *udir = upperdir->d_inode;
191 struct dentry *newdentry;
194 if (!hardlink && !IS_POSIXACL(udir))
195 stat->mode &= ~current_umask();
197 inode_lock_nested(udir, I_MUTEX_PARENT);
198 newdentry = lookup_one_len(dentry->d_name.name, upperdir,
200 err = PTR_ERR(newdentry);
201 if (IS_ERR(newdentry))
203 err = ovl_create_real(udir, newdentry, stat, link, hardlink, false);
207 ovl_instantiate(dentry, inode, newdentry, !!hardlink);
216 static int ovl_lock_rename_workdir(struct dentry *workdir,
217 struct dentry *upperdir)
219 /* Workdir should not be the same as upperdir */
220 if (workdir == upperdir)
223 /* Workdir should not be subdir of upperdir and vice versa */
224 if (lock_rename(workdir, upperdir) != NULL)
230 unlock_rename(workdir, upperdir);
232 pr_err("overlayfs: failed to lock workdir+upperdir\n");
236 static struct dentry *ovl_clear_empty(struct dentry *dentry,
237 struct list_head *list)
239 struct dentry *workdir = ovl_workdir(dentry);
240 struct inode *wdir = workdir->d_inode;
241 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
242 struct inode *udir = upperdir->d_inode;
243 struct path upperpath;
244 struct dentry *upper;
245 struct dentry *opaquedir;
249 if (WARN_ON(!workdir))
250 return ERR_PTR(-EROFS);
252 err = ovl_lock_rename_workdir(workdir, upperdir);
256 ovl_path_upper(dentry, &upperpath);
257 err = vfs_getattr(&upperpath, &stat);
262 if (!S_ISDIR(stat.mode))
264 upper = upperpath.dentry;
265 if (upper->d_parent->d_inode != udir)
268 opaquedir = ovl_lookup_temp(workdir, dentry);
269 err = PTR_ERR(opaquedir);
270 if (IS_ERR(opaquedir))
273 err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true);
277 err = ovl_copy_xattr(upper, opaquedir);
281 err = ovl_set_opaque(opaquedir);
285 inode_lock(opaquedir->d_inode);
286 err = ovl_set_attr(opaquedir, &stat);
287 inode_unlock(opaquedir->d_inode);
291 err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
295 ovl_cleanup_whiteouts(upper, list);
296 ovl_cleanup(wdir, upper);
297 unlock_rename(workdir, upperdir);
299 /* dentry's upper doesn't match now, get rid of it */
305 ovl_cleanup(wdir, opaquedir);
309 unlock_rename(workdir, upperdir);
314 static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
317 struct dentry *ret = NULL;
318 enum ovl_path_type type = ovl_path_type(dentry);
321 err = ovl_check_empty_dir(dentry, &list);
328 * When removing an empty opaque directory, then it makes no sense to
329 * replace it with an exact replica of itself.
331 * If no upperdentry then skip clearing whiteouts.
333 * Can race with copy-up, since we don't hold the upperdir mutex.
334 * Doesn't matter, since copy-up can't create a non-empty directory
337 if (OVL_TYPE_UPPER(type) && OVL_TYPE_MERGE(type))
338 ret = ovl_clear_empty(dentry, &list);
341 ovl_cache_free(&list);
346 static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
347 const struct posix_acl *acl)
353 if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
356 size = posix_acl_to_xattr(NULL, acl, NULL, 0);
357 buffer = kmalloc(size, GFP_KERNEL);
361 size = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
366 err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE);
372 static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
373 struct kstat *stat, const char *link,
374 struct dentry *hardlink)
376 struct dentry *workdir = ovl_workdir(dentry);
377 struct inode *wdir = workdir->d_inode;
378 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
379 struct inode *udir = upperdir->d_inode;
380 struct dentry *upper;
381 struct dentry *newdentry;
383 struct posix_acl *acl, *default_acl;
385 if (WARN_ON(!workdir))
389 err = posix_acl_create(dentry->d_parent->d_inode,
390 &stat->mode, &default_acl, &acl);
395 err = ovl_lock_rename_workdir(workdir, upperdir);
399 newdentry = ovl_lookup_temp(workdir, dentry);
400 err = PTR_ERR(newdentry);
401 if (IS_ERR(newdentry))
404 upper = lookup_one_len(dentry->d_name.name, upperdir,
406 err = PTR_ERR(upper);
410 err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true);
415 * mode could have been mutilated due to umask (e.g. sgid directory)
418 !S_ISLNK(stat->mode) && newdentry->d_inode->i_mode != stat->mode) {
419 struct iattr attr = {
420 .ia_valid = ATTR_MODE,
421 .ia_mode = stat->mode,
423 inode_lock(newdentry->d_inode);
424 err = notify_change(newdentry, &attr, NULL);
425 inode_unlock(newdentry->d_inode);
430 err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS,
435 err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT,
441 if (!hardlink && S_ISDIR(stat->mode)) {
442 err = ovl_set_opaque(newdentry);
446 err = ovl_do_rename(wdir, newdentry, udir, upper,
451 ovl_cleanup(wdir, upper);
453 err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
457 ovl_instantiate(dentry, inode, newdentry, !!hardlink);
464 unlock_rename(workdir, upperdir);
467 posix_acl_release(acl);
468 posix_acl_release(default_acl);
473 ovl_cleanup(wdir, newdentry);
477 static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
478 struct kstat *stat, const char *link,
479 struct dentry *hardlink)
482 const struct cred *old_cred;
483 struct cred *override_cred;
485 err = ovl_copy_up(dentry->d_parent);
489 old_cred = ovl_override_creds(dentry->d_sb);
491 override_cred = prepare_creds();
493 override_cred->fsuid = inode->i_uid;
494 override_cred->fsgid = inode->i_gid;
496 err = security_dentry_create_files_as(dentry,
497 stat->mode, &dentry->d_name, old_cred,
500 put_cred(override_cred);
501 goto out_revert_creds;
504 put_cred(override_creds(override_cred));
505 put_cred(override_cred);
507 if (!ovl_dentry_is_opaque(dentry))
508 err = ovl_create_upper(dentry, inode, stat, link,
511 err = ovl_create_over_whiteout(dentry, inode, stat,
515 revert_creds(old_cred);
517 struct inode *realinode = d_inode(ovl_dentry_upper(dentry));
519 WARN_ON(inode->i_mode != realinode->i_mode);
520 WARN_ON(!uid_eq(inode->i_uid, realinode->i_uid));
521 WARN_ON(!gid_eq(inode->i_gid, realinode->i_gid));
526 static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
531 struct kstat stat = {
535 err = ovl_want_write(dentry);
540 inode = ovl_new_inode(dentry->d_sb, mode);
544 inode_init_owner(inode, dentry->d_parent->d_inode, mode);
545 stat.mode = inode->i_mode;
547 err = ovl_create_or_link(dentry, inode, &stat, link, NULL);
552 ovl_drop_write(dentry);
557 static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
560 return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
563 static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
565 return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
568 static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
571 /* Don't allow creation of "whiteout" on overlay */
572 if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
575 return ovl_create_object(dentry, mode, rdev, NULL);
578 static int ovl_symlink(struct inode *dir, struct dentry *dentry,
581 return ovl_create_object(dentry, S_IFLNK, 0, link);
584 static int ovl_link(struct dentry *old, struct inode *newdir,
590 err = ovl_want_write(old);
594 err = ovl_copy_up(old);
598 inode = d_inode(old);
601 err = ovl_create_or_link(new, inode, NULL, NULL, ovl_dentry_upper(old));
611 static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
613 struct dentry *workdir = ovl_workdir(dentry);
614 struct inode *wdir = workdir->d_inode;
615 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
616 struct inode *udir = upperdir->d_inode;
617 struct dentry *whiteout;
618 struct dentry *upper;
619 struct dentry *opaquedir = NULL;
623 if (WARN_ON(!workdir))
627 opaquedir = ovl_check_empty_and_clear(dentry);
628 err = PTR_ERR(opaquedir);
629 if (IS_ERR(opaquedir))
633 err = ovl_lock_rename_workdir(workdir, upperdir);
637 upper = lookup_one_len(dentry->d_name.name, upperdir,
639 err = PTR_ERR(upper);
644 if ((opaquedir && upper != opaquedir) ||
645 (!opaquedir && ovl_dentry_upper(dentry) &&
646 upper != ovl_dentry_upper(dentry))) {
650 whiteout = ovl_whiteout(workdir, dentry);
651 err = PTR_ERR(whiteout);
652 if (IS_ERR(whiteout))
656 flags = RENAME_EXCHANGE;
658 err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
662 ovl_cleanup(wdir, upper);
664 ovl_dentry_version_inc(dentry->d_parent);
671 unlock_rename(workdir, upperdir);
678 ovl_cleanup(wdir, whiteout);
682 static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
684 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
685 struct inode *dir = upperdir->d_inode;
686 struct dentry *upper;
689 inode_lock_nested(dir, I_MUTEX_PARENT);
690 upper = lookup_one_len(dentry->d_name.name, upperdir,
692 err = PTR_ERR(upper);
697 if (upper == ovl_dentry_upper(dentry)) {
699 err = vfs_rmdir(dir, upper);
701 err = vfs_unlink(dir, upper, NULL);
702 ovl_dentry_version_inc(dentry->d_parent);
707 * Keeping this dentry hashed would mean having to release
708 * upperpath/lowerpath, which could only be done if we are the
709 * sole user of this dentry. Too tricky... Just unhash for
720 static inline int ovl_check_sticky(struct dentry *dentry)
722 struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode;
723 struct inode *inode = ovl_dentry_real(dentry)->d_inode;
725 if (check_sticky(dir, inode))
731 static int ovl_do_remove(struct dentry *dentry, bool is_dir)
733 enum ovl_path_type type;
735 const struct cred *old_cred;
738 err = ovl_check_sticky(dentry);
742 err = ovl_want_write(dentry);
746 err = ovl_copy_up(dentry->d_parent);
750 type = ovl_path_type(dentry);
752 old_cred = ovl_override_creds(dentry->d_sb);
753 if (OVL_TYPE_PURE_UPPER(type))
754 err = ovl_remove_upper(dentry, is_dir);
756 err = ovl_remove_and_whiteout(dentry, is_dir);
757 revert_creds(old_cred);
760 clear_nlink(dentry->d_inode);
762 drop_nlink(dentry->d_inode);
765 ovl_drop_write(dentry);
770 static int ovl_unlink(struct inode *dir, struct dentry *dentry)
772 return ovl_do_remove(dentry, false);
775 static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
777 return ovl_do_remove(dentry, true);
780 static int ovl_rename2(struct inode *olddir, struct dentry *old,
781 struct inode *newdir, struct dentry *new,
785 enum ovl_path_type old_type;
786 enum ovl_path_type new_type;
787 struct dentry *old_upperdir;
788 struct dentry *new_upperdir;
789 struct dentry *olddentry;
790 struct dentry *newdentry;
794 bool cleanup_whiteout = false;
795 bool overwrite = !(flags & RENAME_EXCHANGE);
796 bool is_dir = d_is_dir(old);
797 bool new_is_dir = false;
798 struct dentry *opaquedir = NULL;
799 const struct cred *old_cred = NULL;
802 if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
805 flags &= ~RENAME_NOREPLACE;
807 err = ovl_check_sticky(old);
811 /* Don't copy up directory trees */
812 old_type = ovl_path_type(old);
814 if (OVL_TYPE_MERGE_OR_LOWER(old_type) && is_dir)
818 err = ovl_check_sticky(new);
825 new_type = ovl_path_type(new);
827 if (!overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir)
831 if (!OVL_TYPE_UPPER(new_type) && !OVL_TYPE_UPPER(old_type)) {
832 if (ovl_dentry_lower(old)->d_inode ==
833 ovl_dentry_lower(new)->d_inode)
836 if (OVL_TYPE_UPPER(new_type) && OVL_TYPE_UPPER(old_type)) {
837 if (ovl_dentry_upper(old)->d_inode ==
838 ovl_dentry_upper(new)->d_inode)
842 if (ovl_dentry_is_opaque(new))
843 new_type = __OVL_PATH_UPPER;
845 new_type = __OVL_PATH_UPPER | __OVL_PATH_PURE;
848 err = ovl_want_write(old);
852 err = ovl_copy_up(old);
856 err = ovl_copy_up(new->d_parent);
860 err = ovl_copy_up(new);
865 old_opaque = !OVL_TYPE_PURE_UPPER(old_type);
866 new_opaque = !OVL_TYPE_PURE_UPPER(new_type);
868 old_cred = ovl_override_creds(old->d_sb);
870 if (overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir) {
871 opaquedir = ovl_check_empty_and_clear(new);
872 err = PTR_ERR(opaquedir);
873 if (IS_ERR(opaquedir)) {
875 goto out_revert_creds;
881 if (new->d_inode || !new_opaque) {
882 /* Whiteout source */
883 flags |= RENAME_WHITEOUT;
885 /* Switch whiteouts */
886 flags |= RENAME_EXCHANGE;
888 } else if (is_dir && !new->d_inode && new_opaque) {
889 flags |= RENAME_EXCHANGE;
890 cleanup_whiteout = true;
894 old_upperdir = ovl_dentry_upper(old->d_parent);
895 new_upperdir = ovl_dentry_upper(new->d_parent);
897 trap = lock_rename(new_upperdir, old_upperdir);
900 olddentry = lookup_one_len(old->d_name.name, old_upperdir,
902 err = PTR_ERR(olddentry);
903 if (IS_ERR(olddentry))
907 if (olddentry != ovl_dentry_upper(old))
910 newdentry = lookup_one_len(new->d_name.name, new_upperdir,
912 err = PTR_ERR(newdentry);
913 if (IS_ERR(newdentry))
917 if (ovl_dentry_upper(new)) {
919 if (newdentry != opaquedir)
922 if (newdentry != ovl_dentry_upper(new))
926 if (!d_is_negative(newdentry) &&
927 (!new_opaque || !ovl_is_whiteout(newdentry)))
931 if (olddentry == trap)
933 if (newdentry == trap)
936 if (is_dir && !old_opaque && new_opaque) {
937 err = ovl_set_opaque(olddentry);
941 if (!overwrite && new_is_dir && old_opaque && !new_opaque) {
942 err = ovl_set_opaque(newdentry);
947 if (old_opaque || new_opaque) {
948 err = ovl_do_rename(old_upperdir->d_inode, olddentry,
949 new_upperdir->d_inode, newdentry,
952 /* No debug for the plain case */
953 BUG_ON(flags & ~RENAME_EXCHANGE);
954 err = vfs_rename(old_upperdir->d_inode, olddentry,
955 new_upperdir->d_inode, newdentry,
960 if (is_dir && !old_opaque && new_opaque)
961 ovl_remove_opaque(olddentry);
962 if (!overwrite && new_is_dir && old_opaque && !new_opaque)
963 ovl_remove_opaque(newdentry);
967 if (is_dir && old_opaque && !new_opaque)
968 ovl_remove_opaque(olddentry);
969 if (!overwrite && new_is_dir && !old_opaque && new_opaque)
970 ovl_remove_opaque(newdentry);
973 * Old dentry now lives in different location. Dentries in
974 * lowerstack are stale. We cannot drop them here because
975 * access to them is lockless. This could be only pure upper
976 * or opaque directory - numlower is zero. Or upper non-dir
977 * entry - its pureness is tracked by flag opaque.
979 if (old_opaque != new_opaque) {
980 ovl_dentry_set_opaque(old, new_opaque);
982 ovl_dentry_set_opaque(new, old_opaque);
985 if (cleanup_whiteout)
986 ovl_cleanup(old_upperdir->d_inode, newdentry);
988 ovl_dentry_version_inc(old->d_parent);
989 ovl_dentry_version_inc(new->d_parent);
996 unlock_rename(new_upperdir, old_upperdir);
998 revert_creds(old_cred);
1000 ovl_drop_write(old);
1006 const struct inode_operations ovl_dir_inode_operations = {
1007 .lookup = ovl_lookup,
1009 .symlink = ovl_symlink,
1010 .unlink = ovl_unlink,
1012 .rename = ovl_rename2,
1014 .setattr = ovl_setattr,
1015 .create = ovl_create,
1017 .permission = ovl_permission,
1018 .getattr = ovl_dir_getattr,
1019 .listxattr = ovl_listxattr,
1020 .get_acl = ovl_get_acl,
1021 .update_time = ovl_update_time,