]> Git Repo - linux.git/blame - fs/namei.c
fs: port i_{g,u}id_{needs_}update() to mnt_idmap
[linux.git] / fs / namei.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
1da177e4
LT
2/*
3 * linux/fs/namei.c
4 *
5 * Copyright (C) 1991, 1992 Linus Torvalds
6 */
7
8/*
9 * Some corrections by tytso.
10 */
11
12/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
13 * lookup logic.
14 */
15/* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
16 */
17
18#include <linux/init.h>
630d9c47 19#include <linux/export.h>
44696908 20#include <linux/kernel.h>
1da177e4
LT
21#include <linux/slab.h>
22#include <linux/fs.h>
23#include <linux/namei.h>
1da177e4 24#include <linux/pagemap.h>
2d878178 25#include <linux/sched/mm.h>
0eeca283 26#include <linux/fsnotify.h>
1da177e4
LT
27#include <linux/personality.h>
28#include <linux/security.h>
6146f0d5 29#include <linux/ima.h>
1da177e4
LT
30#include <linux/syscalls.h>
31#include <linux/mount.h>
32#include <linux/audit.h>
16f7e0fe 33#include <linux/capability.h>
834f2a4a 34#include <linux/file.h>
5590ff0d 35#include <linux/fcntl.h>
08ce5f16 36#include <linux/device_cgroup.h>
5ad4e53b 37#include <linux/fs_struct.h>
e77819e5 38#include <linux/posix_acl.h>
99d263d4 39#include <linux/hash.h>
2a18da7a 40#include <linux/bitops.h>
aeaa4a79 41#include <linux/init_task.h>
7c0f6ba6 42#include <linux/uaccess.h>
1da177e4 43
e81e3f4d 44#include "internal.h"
c7105365 45#include "mount.h"
e81e3f4d 46
1da177e4
LT
47/* [Feb-1997 T. Schoebel-Theuer]
48 * Fundamental changes in the pathname lookup mechanisms (namei)
49 * were necessary because of omirr. The reason is that omirr needs
50 * to know the _real_ pathname, not the user-supplied one, in case
51 * of symlinks (and also when transname replacements occur).
52 *
53 * The new code replaces the old recursive symlink resolution with
54 * an iterative one (in case of non-nested symlink chains). It does
55 * this with calls to <fs>_follow_link().
56 * As a side effect, dir_namei(), _namei() and follow_link() are now
57 * replaced with a single function lookup_dentry() that can handle all
58 * the special cases of the former code.
59 *
60 * With the new dcache, the pathname is stored at each inode, at least as
61 * long as the refcount of the inode is positive. As a side effect, the
62 * size of the dcache depends on the inode cache and thus is dynamic.
63 *
64 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
65 * resolution to correspond with current state of the code.
66 *
67 * Note that the symlink resolution is not *completely* iterative.
68 * There is still a significant amount of tail- and mid- recursion in
69 * the algorithm. Also, note that <fs>_readlink() is not used in
70 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
71 * may return different results than <fs>_follow_link(). Many virtual
72 * filesystems (including /proc) exhibit this behavior.
73 */
74
75/* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
76 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
77 * and the name already exists in form of a symlink, try to create the new
78 * name indicated by the symlink. The old code always complained that the
79 * name already exists, due to not following the symlink even if its target
80 * is nonexistent. The new semantics affects also mknod() and link() when
25985edc 81 * the name is a symlink pointing to a non-existent name.
1da177e4
LT
82 *
83 * I don't know which semantics is the right one, since I have no access
84 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
85 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
86 * "old" one. Personally, I think the new semantics is much more logical.
87 * Note that "ln old new" where "new" is a symlink pointing to a non-existing
88 * file does succeed in both HP-UX and SunOs, but not in Solaris
89 * and in the old Linux semantics.
90 */
91
92/* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
93 * semantics. See the comments in "open_namei" and "do_link" below.
94 *
95 * [10-Sep-98 Alan Modra] Another symlink change.
96 */
97
98/* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
99 * inside the path - always follow.
100 * in the last component in creation/removal/renaming - never follow.
101 * if LOOKUP_FOLLOW passed - follow.
102 * if the pathname has trailing slashes - follow.
103 * otherwise - don't follow.
104 * (applied in that order).
105 *
106 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
107 * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
108 * During the 2.4 we need to fix the userland stuff depending on it -
109 * hopefully we will be able to get rid of that wart in 2.5. So far only
110 * XEmacs seems to be relying on it...
111 */
112/*
113 * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
a11f3a05 114 * implemented. Let's see if raised priority of ->s_vfs_rename_mutex gives
1da177e4
LT
115 * any extra contention...
116 */
117
118/* In order to reduce some races, while at the same time doing additional
119 * checking and hopefully speeding things up, we copy filenames to the
120 * kernel data space before using them..
121 *
122 * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
123 * PATH_MAX includes the nul terminator --RR.
124 */
91a27b2a 125
fd2f7cb5 126#define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname))
7950e385 127
51f39a1f 128struct filename *
91a27b2a
JL
129getname_flags(const char __user *filename, int flags, int *empty)
130{
94b5d262 131 struct filename *result;
7950e385 132 char *kname;
94b5d262 133 int len;
4043cde8 134
7ac86265
JL
135 result = audit_reusename(filename);
136 if (result)
137 return result;
138
7950e385 139 result = __getname();
3f9f0aa6 140 if (unlikely(!result))
4043cde8
EP
141 return ERR_PTR(-ENOMEM);
142
7950e385
JL
143 /*
144 * First, try to embed the struct filename inside the names_cache
145 * allocation
146 */
fd2f7cb5 147 kname = (char *)result->iname;
91a27b2a 148 result->name = kname;
7950e385 149
94b5d262 150 len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
91a27b2a 151 if (unlikely(len < 0)) {
94b5d262
AV
152 __putname(result);
153 return ERR_PTR(len);
91a27b2a 154 }
3f9f0aa6 155
7950e385
JL
156 /*
157 * Uh-oh. We have a name that's approaching PATH_MAX. Allocate a
158 * separate struct filename so we can dedicate the entire
159 * names_cache allocation for the pathname, and re-do the copy from
160 * userland.
161 */
94b5d262 162 if (unlikely(len == EMBEDDED_NAME_MAX)) {
fd2f7cb5 163 const size_t size = offsetof(struct filename, iname[1]);
7950e385
JL
164 kname = (char *)result;
165
fd2f7cb5
AV
166 /*
167 * size is chosen that way we to guarantee that
168 * result->iname[0] is within the same object and that
169 * kname can't be equal to result->iname, no matter what.
170 */
171 result = kzalloc(size, GFP_KERNEL);
94b5d262
AV
172 if (unlikely(!result)) {
173 __putname(kname);
174 return ERR_PTR(-ENOMEM);
7950e385
JL
175 }
176 result->name = kname;
94b5d262
AV
177 len = strncpy_from_user(kname, filename, PATH_MAX);
178 if (unlikely(len < 0)) {
179 __putname(kname);
180 kfree(result);
181 return ERR_PTR(len);
182 }
183 if (unlikely(len == PATH_MAX)) {
184 __putname(kname);
185 kfree(result);
186 return ERR_PTR(-ENAMETOOLONG);
187 }
7950e385
JL
188 }
189
94b5d262 190 result->refcnt = 1;
3f9f0aa6
LT
191 /* The empty path is special. */
192 if (unlikely(!len)) {
193 if (empty)
4043cde8 194 *empty = 1;
94b5d262
AV
195 if (!(flags & LOOKUP_EMPTY)) {
196 putname(result);
197 return ERR_PTR(-ENOENT);
198 }
1da177e4 199 }
3f9f0aa6 200
7950e385 201 result->uptr = filename;
c4ad8f98 202 result->aname = NULL;
7950e385
JL
203 audit_getname(result);
204 return result;
1da177e4
LT
205}
206
8228e2c3
DK
207struct filename *
208getname_uflags(const char __user *filename, int uflags)
209{
210 int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
211
212 return getname_flags(filename, flags, NULL);
213}
214
91a27b2a
JL
215struct filename *
216getname(const char __user * filename)
f52e0c11 217{
f7493e5d 218 return getname_flags(filename, 0, NULL);
f52e0c11
AV
219}
220
c4ad8f98
LT
221struct filename *
222getname_kernel(const char * filename)
223{
224 struct filename *result;
08518549 225 int len = strlen(filename) + 1;
c4ad8f98
LT
226
227 result = __getname();
228 if (unlikely(!result))
229 return ERR_PTR(-ENOMEM);
230
08518549 231 if (len <= EMBEDDED_NAME_MAX) {
fd2f7cb5 232 result->name = (char *)result->iname;
08518549 233 } else if (len <= PATH_MAX) {
30ce4d19 234 const size_t size = offsetof(struct filename, iname[1]);
08518549
PM
235 struct filename *tmp;
236
30ce4d19 237 tmp = kmalloc(size, GFP_KERNEL);
08518549
PM
238 if (unlikely(!tmp)) {
239 __putname(result);
240 return ERR_PTR(-ENOMEM);
241 }
242 tmp->name = (char *)result;
08518549
PM
243 result = tmp;
244 } else {
245 __putname(result);
246 return ERR_PTR(-ENAMETOOLONG);
247 }
248 memcpy((char *)result->name, filename, len);
c4ad8f98
LT
249 result->uptr = NULL;
250 result->aname = NULL;
55422d0b 251 result->refcnt = 1;
fd3522fd 252 audit_getname(result);
c4ad8f98 253
c4ad8f98
LT
254 return result;
255}
256
91a27b2a 257void putname(struct filename *name)
1da177e4 258{
ea47ab11 259 if (IS_ERR(name))
91ef658f
DK
260 return;
261
55422d0b
PM
262 BUG_ON(name->refcnt <= 0);
263
264 if (--name->refcnt > 0)
265 return;
266
fd2f7cb5 267 if (name->name != name->iname) {
55422d0b
PM
268 __putname(name->name);
269 kfree(name);
270 } else
271 __putname(name);
1da177e4 272}
1da177e4 273
47291baa
CB
274/**
275 * check_acl - perform ACL permission checking
700b7940 276 * @idmap: idmap of the mount the inode was found from
47291baa
CB
277 * @inode: inode to check permissions on
278 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
279 *
280 * This function performs the ACL permission checking. Since this function
281 * retrieve POSIX acls it needs to know whether it is called from a blocking or
282 * non-blocking context and thus cares about the MAY_NOT_BLOCK bit.
283 *
700b7940
CB
284 * If the inode has been found through an idmapped mount the idmap of
285 * the vfsmount must be passed through @idmap. This function will then take
286 * care to map the inode according to @idmap before checking permissions.
47291baa 287 * On non-idmapped mounts or if permission checking is to be performed on the
700b7940 288 * raw inode simply passs @nop_mnt_idmap.
47291baa 289 */
700b7940 290static int check_acl(struct mnt_idmap *idmap,
47291baa 291 struct inode *inode, int mask)
e77819e5 292{
84635d68 293#ifdef CONFIG_FS_POSIX_ACL
e77819e5
LT
294 struct posix_acl *acl;
295
e77819e5 296 if (mask & MAY_NOT_BLOCK) {
3567866b
AV
297 acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS);
298 if (!acl)
e77819e5 299 return -EAGAIN;
cac2f8b8 300 /* no ->get_inode_acl() calls in RCU mode... */
b8a7a3a6 301 if (is_uncached_acl(acl))
3567866b 302 return -ECHILD;
700b7940 303 return posix_acl_permission(idmap, inode, acl, mask);
e77819e5
LT
304 }
305
cac2f8b8 306 acl = get_inode_acl(inode, ACL_TYPE_ACCESS);
2982baa2
CH
307 if (IS_ERR(acl))
308 return PTR_ERR(acl);
e77819e5 309 if (acl) {
700b7940 310 int error = posix_acl_permission(idmap, inode, acl, mask);
e77819e5
LT
311 posix_acl_release(acl);
312 return error;
313 }
84635d68 314#endif
e77819e5
LT
315
316 return -EAGAIN;
317}
318
47291baa
CB
319/**
320 * acl_permission_check - perform basic UNIX permission checking
700b7940 321 * @idmap: idmap of the mount the inode was found from
47291baa
CB
322 * @inode: inode to check permissions on
323 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
324 *
325 * This function performs the basic UNIX permission checking. Since this
326 * function may retrieve POSIX acls it needs to know whether it is called from a
327 * blocking or non-blocking context and thus cares about the MAY_NOT_BLOCK bit.
5fc475b7 328 *
700b7940
CB
329 * If the inode has been found through an idmapped mount the idmap of
330 * the vfsmount must be passed through @idmap. This function will then take
331 * care to map the inode according to @idmap before checking permissions.
47291baa 332 * On non-idmapped mounts or if permission checking is to be performed on the
700b7940 333 * raw inode simply passs @nop_mnt_idmap.
1da177e4 334 */
700b7940 335static int acl_permission_check(struct mnt_idmap *idmap,
47291baa 336 struct inode *inode, int mask)
1da177e4 337{
700b7940 338 struct user_namespace *mnt_userns = mnt_idmap_owner(idmap);
26cf46be 339 unsigned int mode = inode->i_mode;
a2bd096f 340 vfsuid_t vfsuid;
1da177e4 341
5fc475b7 342 /* Are we the owner? If so, ACL's don't matter */
a2bd096f
CB
343 vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
344 if (likely(vfsuid_eq_kuid(vfsuid, current_fsuid()))) {
5fc475b7 345 mask &= 7;
1da177e4 346 mode >>= 6;
5fc475b7
LT
347 return (mask & ~mode) ? -EACCES : 0;
348 }
1da177e4 349
5fc475b7
LT
350 /* Do we have ACL's? */
351 if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
700b7940 352 int error = check_acl(idmap, inode, mask);
5fc475b7
LT
353 if (error != -EAGAIN)
354 return error;
1da177e4
LT
355 }
356
5fc475b7
LT
357 /* Only RWX matters for group/other mode bits */
358 mask &= 7;
359
1da177e4 360 /*
5fc475b7
LT
361 * Are the group permissions different from
362 * the other permissions in the bits we care
363 * about? Need to check group ownership if so.
1da177e4 364 */
5fc475b7 365 if (mask & (mode ^ (mode >> 3))) {
a2bd096f
CB
366 vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
367 if (vfsgid_in_group_p(vfsgid))
5fc475b7
LT
368 mode >>= 3;
369 }
370
371 /* Bits in 'mode' clear that we require? */
372 return (mask & ~mode) ? -EACCES : 0;
5909ccaa
LT
373}
374
375/**
b74c79e9 376 * generic_permission - check for access rights on a Posix-like filesystem
4609e1f1 377 * @idmap: idmap of the mount the inode was found from
5909ccaa 378 * @inode: inode to check access rights for
5fc475b7
LT
379 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC,
380 * %MAY_NOT_BLOCK ...)
5909ccaa
LT
381 *
382 * Used to check for read/write/execute permissions on a file.
383 * We use "fsuid" for this, letting us set arbitrary permissions
384 * for filesystem access without changing the "normal" uids which
b74c79e9
NP
385 * are used for other things.
386 *
387 * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
388 * request cannot be satisfied (eg. requires blocking or too much complexity).
389 * It would then be called again in ref-walk mode.
47291baa 390 *
4609e1f1
CB
391 * If the inode has been found through an idmapped mount the idmap of
392 * the vfsmount must be passed through @idmap. This function will then take
393 * care to map the inode according to @idmap before checking permissions.
47291baa 394 * On non-idmapped mounts or if permission checking is to be performed on the
4609e1f1 395 * raw inode simply passs @nop_mnt_idmap.
5909ccaa 396 */
4609e1f1 397int generic_permission(struct mnt_idmap *idmap, struct inode *inode,
47291baa 398 int mask)
5909ccaa
LT
399{
400 int ret;
401
402 /*
948409c7 403 * Do the basic permission checks.
5909ccaa 404 */
700b7940 405 ret = acl_permission_check(idmap, inode, mask);
5909ccaa
LT
406 if (ret != -EACCES)
407 return ret;
1da177e4 408
d594e7ec
AV
409 if (S_ISDIR(inode->i_mode)) {
410 /* DACs are overridable for directories */
d594e7ec 411 if (!(mask & MAY_WRITE))
9452e93e 412 if (capable_wrt_inode_uidgid(idmap, inode,
23adbe12 413 CAP_DAC_READ_SEARCH))
d594e7ec 414 return 0;
9452e93e 415 if (capable_wrt_inode_uidgid(idmap, inode,
0558c1bf 416 CAP_DAC_OVERRIDE))
1da177e4 417 return 0;
2a4c2242
SS
418 return -EACCES;
419 }
1da177e4
LT
420
421 /*
422 * Searching includes executable on directories, else just read.
423 */
7ea66001 424 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
d594e7ec 425 if (mask == MAY_READ)
9452e93e 426 if (capable_wrt_inode_uidgid(idmap, inode,
0558c1bf 427 CAP_DAC_READ_SEARCH))
1da177e4 428 return 0;
2a4c2242
SS
429 /*
430 * Read/write DACs are always overridable.
431 * Executable DACs are overridable when there is
432 * at least one exec bit set.
433 */
434 if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
9452e93e 435 if (capable_wrt_inode_uidgid(idmap, inode,
0558c1bf 436 CAP_DAC_OVERRIDE))
2a4c2242 437 return 0;
1da177e4
LT
438
439 return -EACCES;
440}
4d359507 441EXPORT_SYMBOL(generic_permission);
1da177e4 442
47291baa
CB
443/**
444 * do_inode_permission - UNIX permission checking
4609e1f1 445 * @idmap: idmap of the mount the inode was found from
47291baa
CB
446 * @inode: inode to check permissions on
447 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
448 *
3ddcd056
LT
449 * We _really_ want to just do "generic_permission()" without
450 * even looking at the inode->i_op values. So we keep a cache
451 * flag in inode->i_opflags, that says "this has not special
452 * permission function, use the fast case".
453 */
4609e1f1 454static inline int do_inode_permission(struct mnt_idmap *idmap,
47291baa 455 struct inode *inode, int mask)
3ddcd056
LT
456{
457 if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
458 if (likely(inode->i_op->permission))
4609e1f1 459 return inode->i_op->permission(idmap, inode, mask);
3ddcd056
LT
460
461 /* This gets set once for the inode lifetime */
462 spin_lock(&inode->i_lock);
463 inode->i_opflags |= IOP_FASTPERM;
464 spin_unlock(&inode->i_lock);
465 }
4609e1f1 466 return generic_permission(idmap, inode, mask);
3ddcd056
LT
467}
468
0bdaea90
DH
469/**
470 * sb_permission - Check superblock-level permissions
471 * @sb: Superblock of inode to check permission on
55852635 472 * @inode: Inode to check permission on
0bdaea90
DH
473 * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
474 *
475 * Separate out file-system wide checks from inode-specific permission checks.
476 */
477static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
478{
479 if (unlikely(mask & MAY_WRITE)) {
480 umode_t mode = inode->i_mode;
481
482 /* Nobody gets write access to a read-only fs. */
bc98a42c 483 if (sb_rdonly(sb) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
0bdaea90
DH
484 return -EROFS;
485 }
486 return 0;
487}
488
489/**
490 * inode_permission - Check for access rights to a given inode
4609e1f1 491 * @idmap: idmap of the mount the inode was found from
47291baa
CB
492 * @inode: Inode to check permission on
493 * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
0bdaea90
DH
494 *
495 * Check for read/write/execute permissions on an inode. We use fs[ug]id for
496 * this, letting us set arbitrary permissions for filesystem access without
497 * changing the "normal" UIDs which are used for other things.
498 *
499 * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
500 */
4609e1f1 501int inode_permission(struct mnt_idmap *idmap,
47291baa 502 struct inode *inode, int mask)
0bdaea90
DH
503{
504 int retval;
505
506 retval = sb_permission(inode->i_sb, inode, mask);
507 if (retval)
508 return retval;
4bfd054a
EB
509
510 if (unlikely(mask & MAY_WRITE)) {
511 /*
512 * Nobody gets write access to an immutable file.
513 */
514 if (IS_IMMUTABLE(inode))
515 return -EPERM;
516
517 /*
518 * Updating mtime will likely cause i_uid and i_gid to be
519 * written back improperly if their true value is unknown
520 * to the vfs.
521 */
4609e1f1 522 if (HAS_UNMAPPED_ID(idmap, inode))
4bfd054a
EB
523 return -EACCES;
524 }
525
4609e1f1 526 retval = do_inode_permission(idmap, inode, mask);
4bfd054a
EB
527 if (retval)
528 return retval;
529
530 retval = devcgroup_inode_permission(inode, mask);
531 if (retval)
532 return retval;
533
534 return security_inode_permission(inode, mask);
0bdaea90 535}
4d359507 536EXPORT_SYMBOL(inode_permission);
0bdaea90 537
5dd784d0
JB
538/**
539 * path_get - get a reference to a path
540 * @path: path to get the reference to
541 *
542 * Given a path increment the reference count to the dentry and the vfsmount.
543 */
dcf787f3 544void path_get(const struct path *path)
5dd784d0
JB
545{
546 mntget(path->mnt);
547 dget(path->dentry);
548}
549EXPORT_SYMBOL(path_get);
550
1d957f9b
JB
551/**
552 * path_put - put a reference to a path
553 * @path: path to put the reference to
554 *
555 * Given a path decrement the reference count to the dentry and the vfsmount.
556 */
dcf787f3 557void path_put(const struct path *path)
1da177e4 558{
1d957f9b
JB
559 dput(path->dentry);
560 mntput(path->mnt);
1da177e4 561}
1d957f9b 562EXPORT_SYMBOL(path_put);
1da177e4 563
894bc8c4 564#define EMBEDDED_LEVELS 2
1f55a6ec
AV
565struct nameidata {
566 struct path path;
1cf2665b 567 struct qstr last;
1f55a6ec
AV
568 struct path root;
569 struct inode *inode; /* path.dentry.d_inode */
bcba1e7d 570 unsigned int flags, state;
03fa86e9 571 unsigned seq, next_seq, m_seq, r_seq;
1f55a6ec
AV
572 int last_type;
573 unsigned depth;
756daf26 574 int total_link_count;
697fc6ca
AV
575 struct saved {
576 struct path link;
fceef393 577 struct delayed_call done;
697fc6ca 578 const char *name;
0450b2d1 579 unsigned seq;
894bc8c4 580 } *stack, internal[EMBEDDED_LEVELS];
9883d185
AV
581 struct filename *name;
582 struct nameidata *saved;
583 unsigned root_seq;
584 int dfd;
a2bd096f 585 vfsuid_t dir_vfsuid;
0f705953 586 umode_t dir_mode;
3859a271 587} __randomize_layout;
1f55a6ec 588
bcba1e7d
AV
589#define ND_ROOT_PRESET 1
590#define ND_ROOT_GRABBED 2
591#define ND_JUMPED 4
592
06422964 593static void __set_nameidata(struct nameidata *p, int dfd, struct filename *name)
894bc8c4 594{
756daf26
N
595 struct nameidata *old = current->nameidata;
596 p->stack = p->internal;
7962c7d1 597 p->depth = 0;
c8a53ee5
AV
598 p->dfd = dfd;
599 p->name = name;
7d01ef75
AV
600 p->path.mnt = NULL;
601 p->path.dentry = NULL;
756daf26 602 p->total_link_count = old ? old->total_link_count : 0;
9883d185 603 p->saved = old;
756daf26 604 current->nameidata = p;
894bc8c4
AV
605}
606
06422964
AV
607static inline void set_nameidata(struct nameidata *p, int dfd, struct filename *name,
608 const struct path *root)
609{
610 __set_nameidata(p, dfd, name);
611 p->state = 0;
612 if (unlikely(root)) {
613 p->state = ND_ROOT_PRESET;
614 p->root = *root;
615 }
616}
617
9883d185 618static void restore_nameidata(void)
894bc8c4 619{
9883d185 620 struct nameidata *now = current->nameidata, *old = now->saved;
756daf26
N
621
622 current->nameidata = old;
623 if (old)
624 old->total_link_count = now->total_link_count;
e1a63bbc 625 if (now->stack != now->internal)
756daf26 626 kfree(now->stack);
894bc8c4
AV
627}
628
60ef60c7 629static bool nd_alloc_stack(struct nameidata *nd)
894bc8c4 630{
bc40aee0
AV
631 struct saved *p;
632
60ef60c7
AV
633 p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved),
634 nd->flags & LOOKUP_RCU ? GFP_ATOMIC : GFP_KERNEL);
635 if (unlikely(!p))
636 return false;
894bc8c4
AV
637 memcpy(p, nd->internal, sizeof(nd->internal));
638 nd->stack = p;
60ef60c7 639 return true;
894bc8c4
AV
640}
641
397d425d 642/**
6b03f7ed 643 * path_connected - Verify that a dentry is below mnt.mnt_root
397d425d
EB
644 *
645 * Rename can sometimes move a file or directory outside of a bind
646 * mount, path_connected allows those cases to be detected.
647 */
6b03f7ed 648static bool path_connected(struct vfsmount *mnt, struct dentry *dentry)
397d425d 649{
95dd7758 650 struct super_block *sb = mnt->mnt_sb;
397d425d 651
402dd2cf
CH
652 /* Bind mounts can have disconnected paths */
653 if (mnt->mnt_root == sb->s_root)
397d425d
EB
654 return true;
655
6b03f7ed 656 return is_subdir(dentry, mnt->mnt_root);
397d425d
EB
657}
658
7973387a
AV
659static void drop_links(struct nameidata *nd)
660{
661 int i = nd->depth;
662 while (i--) {
663 struct saved *last = nd->stack + i;
fceef393
AV
664 do_delayed_call(&last->done);
665 clear_delayed_call(&last->done);
7973387a
AV
666 }
667}
668
6e180327
AV
669static void leave_rcu(struct nameidata *nd)
670{
671 nd->flags &= ~LOOKUP_RCU;
03fa86e9 672 nd->seq = nd->next_seq = 0;
6e180327
AV
673 rcu_read_unlock();
674}
675
7973387a
AV
676static void terminate_walk(struct nameidata *nd)
677{
678 drop_links(nd);
679 if (!(nd->flags & LOOKUP_RCU)) {
680 int i;
681 path_put(&nd->path);
682 for (i = 0; i < nd->depth; i++)
683 path_put(&nd->stack[i].link);
bcba1e7d 684 if (nd->state & ND_ROOT_GRABBED) {
102b8af2 685 path_put(&nd->root);
bcba1e7d 686 nd->state &= ~ND_ROOT_GRABBED;
102b8af2 687 }
7973387a 688 } else {
6e180327 689 leave_rcu(nd);
7973387a
AV
690 }
691 nd->depth = 0;
7d01ef75
AV
692 nd->path.mnt = NULL;
693 nd->path.dentry = NULL;
7973387a
AV
694}
695
696/* path_put is needed afterwards regardless of success or failure */
2aa38470 697static bool __legitimize_path(struct path *path, unsigned seq, unsigned mseq)
7973387a 698{
2aa38470 699 int res = __legitimize_mnt(path->mnt, mseq);
7973387a
AV
700 if (unlikely(res)) {
701 if (res > 0)
702 path->mnt = NULL;
703 path->dentry = NULL;
704 return false;
705 }
706 if (unlikely(!lockref_get_not_dead(&path->dentry->d_lockref))) {
707 path->dentry = NULL;
708 return false;
709 }
710 return !read_seqcount_retry(&path->dentry->d_seq, seq);
711}
712
2aa38470
AV
713static inline bool legitimize_path(struct nameidata *nd,
714 struct path *path, unsigned seq)
715{
5bd73286 716 return __legitimize_path(path, seq, nd->m_seq);
2aa38470
AV
717}
718
7973387a
AV
719static bool legitimize_links(struct nameidata *nd)
720{
721 int i;
eacd9aa8
AV
722 if (unlikely(nd->flags & LOOKUP_CACHED)) {
723 drop_links(nd);
724 nd->depth = 0;
725 return false;
726 }
7973387a
AV
727 for (i = 0; i < nd->depth; i++) {
728 struct saved *last = nd->stack + i;
729 if (unlikely(!legitimize_path(nd, &last->link, last->seq))) {
730 drop_links(nd);
731 nd->depth = i + 1;
732 return false;
733 }
734 }
735 return true;
736}
737
ee594bff
AV
738static bool legitimize_root(struct nameidata *nd)
739{
adb21d2b 740 /* Nothing to do if nd->root is zero or is managed by the VFS user. */
bcba1e7d 741 if (!nd->root.mnt || (nd->state & ND_ROOT_PRESET))
ee594bff 742 return true;
bcba1e7d 743 nd->state |= ND_ROOT_GRABBED;
ee594bff
AV
744 return legitimize_path(nd, &nd->root, nd->root_seq);
745}
746
19660af7 747/*
31e6b01f 748 * Path walking has 2 modes, rcu-walk and ref-walk (see
19660af7
AV
749 * Documentation/filesystems/path-lookup.txt). In situations when we can't
750 * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab
57e3715c 751 * normal reference counts on dentries and vfsmounts to transition to ref-walk
19660af7
AV
752 * mode. Refcounts are grabbed at the last known good point before rcu-walk
753 * got stuck, so ref-walk may continue from there. If this is not successful
754 * (eg. a seqcount has changed), then failure is returned and it's up to caller
755 * to restart the path walk from the beginning in ref-walk mode.
31e6b01f 756 */
31e6b01f
NP
757
758/**
e36cffed 759 * try_to_unlazy - try to switch to ref-walk mode.
19660af7 760 * @nd: nameidata pathwalk data
e36cffed 761 * Returns: true on success, false on failure
31e6b01f 762 *
e36cffed 763 * try_to_unlazy attempts to legitimize the current nd->path and nd->root
4675ac39
AV
764 * for ref-walk mode.
765 * Must be called from rcu-walk context.
e36cffed 766 * Nothing should touch nameidata between try_to_unlazy() failure and
7973387a 767 * terminate_walk().
31e6b01f 768 */
e36cffed 769static bool try_to_unlazy(struct nameidata *nd)
31e6b01f 770{
31e6b01f
NP
771 struct dentry *parent = nd->path.dentry;
772
773 BUG_ON(!(nd->flags & LOOKUP_RCU));
e5c832d5 774
4675ac39 775 if (unlikely(!legitimize_links(nd)))
4675ac39 776 goto out1;
84a2bd39
AV
777 if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
778 goto out;
ee594bff
AV
779 if (unlikely(!legitimize_root(nd)))
780 goto out;
6e180327 781 leave_rcu(nd);
4675ac39 782 BUG_ON(nd->inode != parent->d_inode);
e36cffed 783 return true;
4675ac39 784
84a2bd39 785out1:
4675ac39
AV
786 nd->path.mnt = NULL;
787 nd->path.dentry = NULL;
4675ac39 788out:
6e180327 789 leave_rcu(nd);
e36cffed 790 return false;
4675ac39
AV
791}
792
793/**
ae66db45 794 * try_to_unlazy_next - try to switch to ref-walk mode.
4675ac39 795 * @nd: nameidata pathwalk data
ae66db45 796 * @dentry: next dentry to step into
ae66db45 797 * Returns: true on success, false on failure
4675ac39 798 *
30476f7e 799 * Similar to try_to_unlazy(), but here we have the next dentry already
ae66db45
AV
800 * picked by rcu-walk and want to legitimize that in addition to the current
801 * nd->path and nd->root for ref-walk mode. Must be called from rcu-walk context.
802 * Nothing should touch nameidata between try_to_unlazy_next() failure and
4675ac39
AV
803 * terminate_walk().
804 */
03fa86e9 805static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry)
4675ac39 806{
7e4745a0 807 int res;
4675ac39
AV
808 BUG_ON(!(nd->flags & LOOKUP_RCU));
809
7973387a
AV
810 if (unlikely(!legitimize_links(nd)))
811 goto out2;
7e4745a0
AV
812 res = __legitimize_mnt(nd->path.mnt, nd->m_seq);
813 if (unlikely(res)) {
814 if (res > 0)
815 goto out2;
816 goto out1;
817 }
4675ac39 818 if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref)))
7973387a 819 goto out1;
48a066e7 820
15570086 821 /*
4675ac39
AV
822 * We need to move both the parent and the dentry from the RCU domain
823 * to be properly refcounted. And the sequence number in the dentry
824 * validates *both* dentry counters, since we checked the sequence
825 * number of the parent after we got the child sequence number. So we
826 * know the parent must still be valid if the child sequence number is
15570086 827 */
4675ac39
AV
828 if (unlikely(!lockref_get_not_dead(&dentry->d_lockref)))
829 goto out;
03fa86e9 830 if (read_seqcount_retry(&dentry->d_seq, nd->next_seq))
84a2bd39 831 goto out_dput;
e5c832d5
LT
832 /*
833 * Sequence counts matched. Now make sure that the root is
834 * still valid and get it if required.
835 */
84a2bd39
AV
836 if (unlikely(!legitimize_root(nd)))
837 goto out_dput;
6e180327 838 leave_rcu(nd);
ae66db45 839 return true;
19660af7 840
7973387a
AV
841out2:
842 nd->path.mnt = NULL;
843out1:
844 nd->path.dentry = NULL;
e5c832d5 845out:
6e180327 846 leave_rcu(nd);
ae66db45 847 return false;
84a2bd39 848out_dput:
6e180327 849 leave_rcu(nd);
84a2bd39 850 dput(dentry);
ae66db45 851 return false;
31e6b01f
NP
852}
853
4ce16ef3 854static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
34286d66 855{
a89f8337
AV
856 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
857 return dentry->d_op->d_revalidate(dentry, flags);
858 else
859 return 1;
34286d66
NP
860}
861
9f1fafee
AV
862/**
863 * complete_walk - successful completion of path walk
864 * @nd: pointer nameidata
39159de2 865 *
9f1fafee
AV
866 * If we had been in RCU mode, drop out of it and legitimize nd->path.
867 * Revalidate the final result, unless we'd already done that during
868 * the path walk or the filesystem doesn't ask for it. Return 0 on
869 * success, -error on failure. In case of failure caller does not
870 * need to drop nd->path.
39159de2 871 */
9f1fafee 872static int complete_walk(struct nameidata *nd)
39159de2 873{
16c2cd71 874 struct dentry *dentry = nd->path.dentry;
39159de2 875 int status;
39159de2 876
9f1fafee 877 if (nd->flags & LOOKUP_RCU) {
adb21d2b
AS
878 /*
879 * We don't want to zero nd->root for scoped-lookups or
880 * externally-managed nd->root.
881 */
bcba1e7d
AV
882 if (!(nd->state & ND_ROOT_PRESET))
883 if (!(nd->flags & LOOKUP_IS_SCOPED))
884 nd->root.mnt = NULL;
6c6ec2b0 885 nd->flags &= ~LOOKUP_CACHED;
e36cffed 886 if (!try_to_unlazy(nd))
9f1fafee 887 return -ECHILD;
9f1fafee
AV
888 }
889
adb21d2b
AS
890 if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
891 /*
892 * While the guarantee of LOOKUP_IS_SCOPED is (roughly) "don't
893 * ever step outside the root during lookup" and should already
894 * be guaranteed by the rest of namei, we want to avoid a namei
895 * BUG resulting in userspace being given a path that was not
896 * scoped within the root at some point during the lookup.
897 *
898 * So, do a final sanity-check to make sure that in the
899 * worst-case scenario (a complete bypass of LOOKUP_IS_SCOPED)
900 * we won't silently return an fd completely outside of the
901 * requested root to userspace.
902 *
903 * Userspace could move the path outside the root after this
904 * check, but as discussed elsewhere this is not a concern (the
905 * resolved file was inside the root at some point).
906 */
907 if (!path_is_under(&nd->path, &nd->root))
908 return -EXDEV;
909 }
910
bcba1e7d 911 if (likely(!(nd->state & ND_JUMPED)))
16c2cd71
AV
912 return 0;
913
ecf3d1f1 914 if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE)))
39159de2
JL
915 return 0;
916
ecf3d1f1 917 status = dentry->d_op->d_weak_revalidate(dentry, nd->flags);
39159de2
JL
918 if (status > 0)
919 return 0;
920
16c2cd71 921 if (!status)
39159de2 922 status = -ESTALE;
16c2cd71 923
39159de2
JL
924 return status;
925}
926
740a1678 927static int set_root(struct nameidata *nd)
31e6b01f 928{
7bd88377 929 struct fs_struct *fs = current->fs;
c28cc364 930
adb21d2b
AS
931 /*
932 * Jumping to the real root in a scoped-lookup is a BUG in namei, but we
933 * still have to ensure it doesn't happen because it will cause a breakout
934 * from the dirfd.
935 */
936 if (WARN_ON(nd->flags & LOOKUP_IS_SCOPED))
937 return -ENOTRECOVERABLE;
938
9e6697e2
AV
939 if (nd->flags & LOOKUP_RCU) {
940 unsigned seq;
941
942 do {
943 seq = read_seqcount_begin(&fs->seq);
944 nd->root = fs->root;
945 nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
946 } while (read_seqcount_retry(&fs->seq, seq));
947 } else {
948 get_fs_root(fs, &nd->root);
bcba1e7d 949 nd->state |= ND_ROOT_GRABBED;
9e6697e2 950 }
740a1678 951 return 0;
31e6b01f
NP
952}
953
248fb5b9
AV
954static int nd_jump_root(struct nameidata *nd)
955{
adb21d2b
AS
956 if (unlikely(nd->flags & LOOKUP_BENEATH))
957 return -EXDEV;
72ba2929
AS
958 if (unlikely(nd->flags & LOOKUP_NO_XDEV)) {
959 /* Absolute path arguments to path_init() are allowed. */
960 if (nd->path.mnt != NULL && nd->path.mnt != nd->root.mnt)
961 return -EXDEV;
962 }
740a1678
AS
963 if (!nd->root.mnt) {
964 int error = set_root(nd);
965 if (error)
966 return error;
967 }
248fb5b9
AV
968 if (nd->flags & LOOKUP_RCU) {
969 struct dentry *d;
970 nd->path = nd->root;
971 d = nd->path.dentry;
972 nd->inode = d->d_inode;
973 nd->seq = nd->root_seq;
82ef0698 974 if (read_seqcount_retry(&d->d_seq, nd->seq))
248fb5b9
AV
975 return -ECHILD;
976 } else {
977 path_put(&nd->path);
978 nd->path = nd->root;
979 path_get(&nd->path);
980 nd->inode = nd->path.dentry->d_inode;
981 }
bcba1e7d 982 nd->state |= ND_JUMPED;
248fb5b9
AV
983 return 0;
984}
985
b5fb63c1 986/*
6b255391 987 * Helper to directly jump to a known parsed path from ->get_link,
b5fb63c1
CH
988 * caller must have taken a reference to path beforehand.
989 */
ea4af4aa 990int nd_jump_link(const struct path *path)
b5fb63c1 991{
4b99d499 992 int error = -ELOOP;
6e77137b 993 struct nameidata *nd = current->nameidata;
b5fb63c1 994
4b99d499
AS
995 if (unlikely(nd->flags & LOOKUP_NO_MAGICLINKS))
996 goto err;
997
72ba2929
AS
998 error = -EXDEV;
999 if (unlikely(nd->flags & LOOKUP_NO_XDEV)) {
1000 if (nd->path.mnt != path->mnt)
1001 goto err;
1002 }
adb21d2b
AS
1003 /* Not currently safe for scoped-lookups. */
1004 if (unlikely(nd->flags & LOOKUP_IS_SCOPED))
1005 goto err;
72ba2929 1006
4b99d499 1007 path_put(&nd->path);
b5fb63c1
CH
1008 nd->path = *path;
1009 nd->inode = nd->path.dentry->d_inode;
bcba1e7d 1010 nd->state |= ND_JUMPED;
1bc82070 1011 return 0;
4b99d499
AS
1012
1013err:
1014 path_put(path);
1015 return error;
b5fb63c1
CH
1016}
1017
b9ff4429 1018static inline void put_link(struct nameidata *nd)
574197e0 1019{
21c3003d 1020 struct saved *last = nd->stack + --nd->depth;
fceef393 1021 do_delayed_call(&last->done);
6548fae2
AV
1022 if (!(nd->flags & LOOKUP_RCU))
1023 path_put(&last->link);
574197e0
AV
1024}
1025
9c011be1
LC
1026static int sysctl_protected_symlinks __read_mostly;
1027static int sysctl_protected_hardlinks __read_mostly;
1028static int sysctl_protected_fifos __read_mostly;
1029static int sysctl_protected_regular __read_mostly;
1030
1031#ifdef CONFIG_SYSCTL
1032static struct ctl_table namei_sysctls[] = {
1033 {
1034 .procname = "protected_symlinks",
1035 .data = &sysctl_protected_symlinks,
1036 .maxlen = sizeof(int),
c7031c14 1037 .mode = 0644,
9c011be1
LC
1038 .proc_handler = proc_dointvec_minmax,
1039 .extra1 = SYSCTL_ZERO,
1040 .extra2 = SYSCTL_ONE,
1041 },
1042 {
1043 .procname = "protected_hardlinks",
1044 .data = &sysctl_protected_hardlinks,
1045 .maxlen = sizeof(int),
c7031c14 1046 .mode = 0644,
9c011be1
LC
1047 .proc_handler = proc_dointvec_minmax,
1048 .extra1 = SYSCTL_ZERO,
1049 .extra2 = SYSCTL_ONE,
1050 },
1051 {
1052 .procname = "protected_fifos",
1053 .data = &sysctl_protected_fifos,
1054 .maxlen = sizeof(int),
c7031c14 1055 .mode = 0644,
9c011be1
LC
1056 .proc_handler = proc_dointvec_minmax,
1057 .extra1 = SYSCTL_ZERO,
1058 .extra2 = SYSCTL_TWO,
1059 },
1060 {
1061 .procname = "protected_regular",
1062 .data = &sysctl_protected_regular,
1063 .maxlen = sizeof(int),
c7031c14 1064 .mode = 0644,
9c011be1
LC
1065 .proc_handler = proc_dointvec_minmax,
1066 .extra1 = SYSCTL_ZERO,
1067 .extra2 = SYSCTL_TWO,
1068 },
1069 { }
1070};
1071
1072static int __init init_fs_namei_sysctls(void)
1073{
1074 register_sysctl_init("fs", namei_sysctls);
1075 return 0;
1076}
1077fs_initcall(init_fs_namei_sysctls);
1078
1079#endif /* CONFIG_SYSCTL */
800179c9
KC
1080
1081/**
1082 * may_follow_link - Check symlink following for unsafe situations
55852635 1083 * @nd: nameidata pathwalk data
800179c9
KC
1084 *
1085 * In the case of the sysctl_protected_symlinks sysctl being enabled,
1086 * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
1087 * in a sticky world-writable directory. This is to protect privileged
1088 * processes from failing races against path names that may change out
1089 * from under them by way of other users creating malicious symlinks.
1090 * It will permit symlinks to be followed only when outside a sticky
1091 * world-writable directory, or when the uid of the symlink and follower
1092 * match, or when the directory owner matches the symlink's owner.
1093 *
1094 * Returns 0 if following the symlink is allowed, -ve on error.
1095 */
ad6cc4c3 1096static inline int may_follow_link(struct nameidata *nd, const struct inode *inode)
800179c9 1097{
ba73d987 1098 struct user_namespace *mnt_userns;
a2bd096f 1099 vfsuid_t vfsuid;
ba73d987 1100
800179c9
KC
1101 if (!sysctl_protected_symlinks)
1102 return 0;
1103
ba73d987 1104 mnt_userns = mnt_user_ns(nd->path.mnt);
a2bd096f 1105 vfsuid = i_uid_into_vfsuid(mnt_userns, inode);
800179c9 1106 /* Allowed if owner and follower match. */
a2bd096f 1107 if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
800179c9
KC
1108 return 0;
1109
1110 /* Allowed if parent directory not sticky and world-writable. */
0f705953 1111 if ((nd->dir_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH))
800179c9
KC
1112 return 0;
1113
1114 /* Allowed if parent directory and link owner match. */
a2bd096f 1115 if (vfsuid_valid(nd->dir_vfsuid) && vfsuid_eq(nd->dir_vfsuid, vfsuid))
800179c9
KC
1116 return 0;
1117
31956502
AV
1118 if (nd->flags & LOOKUP_RCU)
1119 return -ECHILD;
1120
ea841baf 1121 audit_inode(nd->name, nd->stack[0].link.dentry, 0);
245d7369 1122 audit_log_path_denied(AUDIT_ANOM_LINK, "follow_link");
800179c9
KC
1123 return -EACCES;
1124}
1125
1126/**
1127 * safe_hardlink_source - Check for safe hardlink conditions
4609e1f1 1128 * @idmap: idmap of the mount the inode was found from
800179c9
KC
1129 * @inode: the source inode to hardlink from
1130 *
1131 * Return false if at least one of the following conditions:
1132 * - inode is not a regular file
1133 * - inode is setuid
1134 * - inode is setgid and group-exec
1135 * - access failure for read and write
1136 *
1137 * Otherwise returns true.
1138 */
4609e1f1 1139static bool safe_hardlink_source(struct mnt_idmap *idmap,
ba73d987 1140 struct inode *inode)
800179c9
KC
1141{
1142 umode_t mode = inode->i_mode;
1143
1144 /* Special files should not get pinned to the filesystem. */
1145 if (!S_ISREG(mode))
1146 return false;
1147
1148 /* Setuid files should not get pinned to the filesystem. */
1149 if (mode & S_ISUID)
1150 return false;
1151
1152 /* Executable setgid files should not get pinned to the filesystem. */
1153 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
1154 return false;
1155
1156 /* Hardlinking to unreadable or unwritable sources is dangerous. */
4609e1f1 1157 if (inode_permission(idmap, inode, MAY_READ | MAY_WRITE))
800179c9
KC
1158 return false;
1159
1160 return true;
1161}
1162
1163/**
1164 * may_linkat - Check permissions for creating a hardlink
4609e1f1
CB
1165 * @idmap: idmap of the mount the inode was found from
1166 * @link: the source to hardlink from
800179c9
KC
1167 *
1168 * Block hardlink when all of:
1169 * - sysctl_protected_hardlinks enabled
1170 * - fsuid does not match inode
1171 * - hardlink source is unsafe (see safe_hardlink_source() above)
f2ca3796 1172 * - not CAP_FOWNER in a namespace with the inode owner uid mapped
800179c9 1173 *
4609e1f1
CB
1174 * If the inode has been found through an idmapped mount the idmap of
1175 * the vfsmount must be passed through @idmap. This function will then take
1176 * care to map the inode according to @idmap before checking permissions.
ba73d987 1177 * On non-idmapped mounts or if permission checking is to be performed on the
4609e1f1 1178 * raw inode simply pass @nop_mnt_idmap.
ba73d987 1179 *
800179c9
KC
1180 * Returns 0 if successful, -ve on error.
1181 */
4609e1f1 1182int may_linkat(struct mnt_idmap *idmap, const struct path *link)
800179c9 1183{
4609e1f1 1184 struct user_namespace *mnt_userns = mnt_idmap_owner(idmap);
593d1ce8
EB
1185 struct inode *inode = link->dentry->d_inode;
1186
1187 /* Inode writeback is not safe when the uid or gid are invalid. */
a2bd096f
CB
1188 if (!vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode)) ||
1189 !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode)))
593d1ce8 1190 return -EOVERFLOW;
800179c9
KC
1191
1192 if (!sysctl_protected_hardlinks)
1193 return 0;
1194
800179c9
KC
1195 /* Source inode owner (or CAP_FOWNER) can hardlink all they like,
1196 * otherwise, it must be a safe source.
1197 */
4609e1f1 1198 if (safe_hardlink_source(idmap, inode) ||
01beba79 1199 inode_owner_or_capable(idmap, inode))
800179c9
KC
1200 return 0;
1201
245d7369 1202 audit_log_path_denied(AUDIT_ANOM_LINK, "linkat");
800179c9
KC
1203 return -EPERM;
1204}
1205
30aba665
SM
1206/**
1207 * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory
1208 * should be allowed, or not, on files that already
1209 * exist.
ba73d987 1210 * @mnt_userns: user namespace of the mount the inode was found from
2111c3c0 1211 * @nd: nameidata pathwalk data
30aba665
SM
1212 * @inode: the inode of the file to open
1213 *
1214 * Block an O_CREAT open of a FIFO (or a regular file) when:
1215 * - sysctl_protected_fifos (or sysctl_protected_regular) is enabled
1216 * - the file already exists
1217 * - we are in a sticky directory
1218 * - we don't own the file
1219 * - the owner of the directory doesn't own the file
1220 * - the directory is world writable
1221 * If the sysctl_protected_fifos (or sysctl_protected_regular) is set to 2
1222 * the directory doesn't have to be world writable: being group writable will
1223 * be enough.
1224 *
ba73d987
CB
1225 * If the inode has been found through an idmapped mount the user namespace of
1226 * the vfsmount must be passed through @mnt_userns. This function will then take
1227 * care to map the inode according to @mnt_userns before checking permissions.
1228 * On non-idmapped mounts or if permission checking is to be performed on the
1229 * raw inode simply passs init_user_ns.
1230 *
30aba665
SM
1231 * Returns 0 if the open is allowed, -ve on error.
1232 */
ba73d987
CB
1233static int may_create_in_sticky(struct user_namespace *mnt_userns,
1234 struct nameidata *nd, struct inode *const inode)
30aba665 1235{
ba73d987 1236 umode_t dir_mode = nd->dir_mode;
a2bd096f 1237 vfsuid_t dir_vfsuid = nd->dir_vfsuid;
ba73d987 1238
30aba665
SM
1239 if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) ||
1240 (!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
d0cb5018 1241 likely(!(dir_mode & S_ISVTX)) ||
a2bd096f
CB
1242 vfsuid_eq(i_uid_into_vfsuid(mnt_userns, inode), dir_vfsuid) ||
1243 vfsuid_eq_kuid(i_uid_into_vfsuid(mnt_userns, inode), current_fsuid()))
30aba665
SM
1244 return 0;
1245
d0cb5018
AV
1246 if (likely(dir_mode & 0002) ||
1247 (dir_mode & 0020 &&
30aba665
SM
1248 ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) ||
1249 (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) {
245d7369
KC
1250 const char *operation = S_ISFIFO(inode->i_mode) ?
1251 "sticky_create_fifo" :
1252 "sticky_create_regular";
1253 audit_log_path_denied(AUDIT_ANOM_CREAT, operation);
30aba665
SM
1254 return -EACCES;
1255 }
1256 return 0;
1257}
1258
f015f126
DH
1259/*
1260 * follow_up - Find the mountpoint of path's vfsmount
1261 *
1262 * Given a path, find the mountpoint of its source file system.
1263 * Replace @path with the path of the mountpoint in the parent mount.
1264 * Up is towards /.
1265 *
1266 * Return 1 if we went up a level and 0 if we were already at the
1267 * root.
1268 */
bab77ebf 1269int follow_up(struct path *path)
1da177e4 1270{
0714a533
AV
1271 struct mount *mnt = real_mount(path->mnt);
1272 struct mount *parent;
1da177e4 1273 struct dentry *mountpoint;
99b7db7b 1274
48a066e7 1275 read_seqlock_excl(&mount_lock);
0714a533 1276 parent = mnt->mnt_parent;
3c0a6163 1277 if (parent == mnt) {
48a066e7 1278 read_sequnlock_excl(&mount_lock);
1da177e4
LT
1279 return 0;
1280 }
0714a533 1281 mntget(&parent->mnt);
a73324da 1282 mountpoint = dget(mnt->mnt_mountpoint);
48a066e7 1283 read_sequnlock_excl(&mount_lock);
bab77ebf
AV
1284 dput(path->dentry);
1285 path->dentry = mountpoint;
1286 mntput(path->mnt);
0714a533 1287 path->mnt = &parent->mnt;
1da177e4
LT
1288 return 1;
1289}
4d359507 1290EXPORT_SYMBOL(follow_up);
1da177e4 1291
7ef482fa
AV
1292static bool choose_mountpoint_rcu(struct mount *m, const struct path *root,
1293 struct path *path, unsigned *seqp)
1294{
1295 while (mnt_has_parent(m)) {
1296 struct dentry *mountpoint = m->mnt_mountpoint;
1297
1298 m = m->mnt_parent;
1299 if (unlikely(root->dentry == mountpoint &&
1300 root->mnt == &m->mnt))
1301 break;
1302 if (mountpoint != m->mnt.mnt_root) {
1303 path->mnt = &m->mnt;
1304 path->dentry = mountpoint;
1305 *seqp = read_seqcount_begin(&mountpoint->d_seq);
1306 return true;
1307 }
1308 }
1309 return false;
1310}
1311
2aa38470
AV
1312static bool choose_mountpoint(struct mount *m, const struct path *root,
1313 struct path *path)
1314{
1315 bool found;
1316
1317 rcu_read_lock();
1318 while (1) {
1319 unsigned seq, mseq = read_seqbegin(&mount_lock);
1320
1321 found = choose_mountpoint_rcu(m, root, path, &seq);
1322 if (unlikely(!found)) {
1323 if (!read_seqretry(&mount_lock, mseq))
1324 break;
1325 } else {
1326 if (likely(__legitimize_path(path, seq, mseq)))
1327 break;
1328 rcu_read_unlock();
1329 path_put(path);
1330 rcu_read_lock();
1331 }
1332 }
1333 rcu_read_unlock();
1334 return found;
1335}
1336
b5c84bf6 1337/*
9875cf80
DH
1338 * Perform an automount
1339 * - return -EISDIR to tell follow_managed() to stop and return the path we
1340 * were called with.
1da177e4 1341 */
1c9f5e06 1342static int follow_automount(struct path *path, int *count, unsigned lookup_flags)
31e6b01f 1343{
25e195aa 1344 struct dentry *dentry = path->dentry;
9875cf80 1345
0ec26fd0
MS
1346 /* We don't want to mount if someone's just doing a stat -
1347 * unless they're stat'ing a directory and appended a '/' to
1348 * the name.
1349 *
1350 * We do, however, want to mount if someone wants to open or
1351 * create a file of any type under the mountpoint, wants to
1352 * traverse through the mountpoint or wants to open the
1353 * mounted directory. Also, autofs may mark negative dentries
1354 * as being automount points. These will need the attentions
1355 * of the daemon to instantiate them before they can be used.
9875cf80 1356 */
1c9f5e06 1357 if (!(lookup_flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
5d38f049 1358 LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
25e195aa 1359 dentry->d_inode)
5d38f049 1360 return -EISDIR;
0ec26fd0 1361
1c9f5e06 1362 if (count && (*count)++ >= MAXSYMLINKS)
9875cf80
DH
1363 return -ELOOP;
1364
25e195aa 1365 return finish_automount(dentry->d_op->d_automount(path), path);
463ffb2e
AV
1366}
1367
9875cf80 1368/*
9deed3eb
AV
1369 * mount traversal - out-of-line part. One note on ->d_flags accesses -
1370 * dentries are pinned but not locked here, so negative dentry can go
1371 * positive right under us. Use of smp_load_acquire() provides a barrier
1372 * sufficient for ->d_inode and ->d_flags consistency.
9875cf80 1373 */
9deed3eb
AV
1374static int __traverse_mounts(struct path *path, unsigned flags, bool *jumped,
1375 int *count, unsigned lookup_flags)
1da177e4 1376{
9deed3eb 1377 struct vfsmount *mnt = path->mnt;
9875cf80 1378 bool need_mntput = false;
8aef1884 1379 int ret = 0;
9875cf80 1380
9deed3eb 1381 while (flags & DCACHE_MANAGED_DENTRY) {
cc53ce53
DH
1382 /* Allow the filesystem to manage the transit without i_mutex
1383 * being held. */
d41efb52 1384 if (flags & DCACHE_MANAGE_TRANSIT) {
fb5f51c7 1385 ret = path->dentry->d_op->d_manage(path, false);
508c8772 1386 flags = smp_load_acquire(&path->dentry->d_flags);
cc53ce53 1387 if (ret < 0)
8aef1884 1388 break;
cc53ce53
DH
1389 }
1390
9deed3eb 1391 if (flags & DCACHE_MOUNTED) { // something's mounted on it..
9875cf80 1392 struct vfsmount *mounted = lookup_mnt(path);
9deed3eb 1393 if (mounted) { // ... in our namespace
9875cf80
DH
1394 dput(path->dentry);
1395 if (need_mntput)
1396 mntput(path->mnt);
1397 path->mnt = mounted;
1398 path->dentry = dget(mounted->mnt_root);
9deed3eb
AV
1399 // here we know it's positive
1400 flags = path->dentry->d_flags;
9875cf80
DH
1401 need_mntput = true;
1402 continue;
1403 }
9875cf80
DH
1404 }
1405
9deed3eb
AV
1406 if (!(flags & DCACHE_NEED_AUTOMOUNT))
1407 break;
9875cf80 1408
9deed3eb
AV
1409 // uncovered automount point
1410 ret = follow_automount(path, count, lookup_flags);
1411 flags = smp_load_acquire(&path->dentry->d_flags);
1412 if (ret < 0)
1413 break;
1da177e4 1414 }
8aef1884 1415
9deed3eb
AV
1416 if (ret == -EISDIR)
1417 ret = 0;
1418 // possible if you race with several mount --move
1419 if (need_mntput && path->mnt == mnt)
1420 mntput(path->mnt);
1421 if (!ret && unlikely(d_flags_negative(flags)))
d41efb52 1422 ret = -ENOENT;
9deed3eb 1423 *jumped = need_mntput;
8402752e 1424 return ret;
1da177e4
LT
1425}
1426
9deed3eb
AV
1427static inline int traverse_mounts(struct path *path, bool *jumped,
1428 int *count, unsigned lookup_flags)
1429{
1430 unsigned flags = smp_load_acquire(&path->dentry->d_flags);
1431
1432 /* fastpath */
1433 if (likely(!(flags & DCACHE_MANAGED_DENTRY))) {
1434 *jumped = false;
1435 if (unlikely(d_flags_negative(flags)))
1436 return -ENOENT;
1437 return 0;
1438 }
1439 return __traverse_mounts(path, flags, jumped, count, lookup_flags);
1440}
1441
cc53ce53 1442int follow_down_one(struct path *path)
1da177e4
LT
1443{
1444 struct vfsmount *mounted;
1445
1c755af4 1446 mounted = lookup_mnt(path);
1da177e4 1447 if (mounted) {
9393bd07
AV
1448 dput(path->dentry);
1449 mntput(path->mnt);
1450 path->mnt = mounted;
1451 path->dentry = dget(mounted->mnt_root);
1da177e4
LT
1452 return 1;
1453 }
1454 return 0;
1455}
4d359507 1456EXPORT_SYMBOL(follow_down_one);
1da177e4 1457
9deed3eb
AV
1458/*
1459 * Follow down to the covering mount currently visible to userspace. At each
1460 * point, the filesystem owning that dentry may be queried as to whether the
1461 * caller is permitted to proceed or not.
1462 */
1463int follow_down(struct path *path)
1464{
1465 struct vfsmount *mnt = path->mnt;
1466 bool jumped;
1467 int ret = traverse_mounts(path, &jumped, NULL, 0);
1468
1469 if (path->mnt != mnt)
1470 mntput(mnt);
1471 return ret;
1472}
1473EXPORT_SYMBOL(follow_down);
1474
9875cf80 1475/*
287548e4
AV
1476 * Try to skip to top of mountpoint pile in rcuwalk mode. Fail if
1477 * we meet a managed dentry that would need blocking.
9875cf80 1478 */
3bd8bc89 1479static bool __follow_mount_rcu(struct nameidata *nd, struct path *path)
9875cf80 1480{
ea936aeb
AV
1481 struct dentry *dentry = path->dentry;
1482 unsigned int flags = dentry->d_flags;
1483
1484 if (likely(!(flags & DCACHE_MANAGED_DENTRY)))
1485 return true;
1486
1487 if (unlikely(nd->flags & LOOKUP_NO_XDEV))
1488 return false;
1489
62a7375e 1490 for (;;) {
62a7375e
IK
1491 /*
1492 * Don't forget we might have a non-mountpoint managed dentry
1493 * that wants to block transit.
1494 */
ea936aeb
AV
1495 if (unlikely(flags & DCACHE_MANAGE_TRANSIT)) {
1496 int res = dentry->d_op->d_manage(path, true);
1497 if (res)
1498 return res == -EISDIR;
1499 flags = dentry->d_flags;
b8faf035 1500 }
62a7375e 1501
ea936aeb
AV
1502 if (flags & DCACHE_MOUNTED) {
1503 struct mount *mounted = __lookup_mnt(path->mnt, dentry);
1504 if (mounted) {
1505 path->mnt = &mounted->mnt;
1506 dentry = path->dentry = mounted->mnt.mnt_root;
bcba1e7d 1507 nd->state |= ND_JUMPED;
03fa86e9 1508 nd->next_seq = read_seqcount_begin(&dentry->d_seq);
ea936aeb 1509 flags = dentry->d_flags;
03fa86e9
AV
1510 // makes sure that non-RCU pathwalk could reach
1511 // this state.
20aac6c6
AV
1512 if (read_seqretry(&mount_lock, nd->m_seq))
1513 return false;
ea936aeb
AV
1514 continue;
1515 }
1516 if (read_seqretry(&mount_lock, nd->m_seq))
1517 return false;
1518 }
1519 return !(flags & DCACHE_NEED_AUTOMOUNT);
9875cf80 1520 }
287548e4
AV
1521}
1522
db3c9ade 1523static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
3bd8bc89 1524 struct path *path)
bd7c4b50 1525{
9deed3eb 1526 bool jumped;
db3c9ade 1527 int ret;
bd7c4b50 1528
db3c9ade
AV
1529 path->mnt = nd->path.mnt;
1530 path->dentry = dentry;
c153007b 1531 if (nd->flags & LOOKUP_RCU) {
03fa86e9 1532 unsigned int seq = nd->next_seq;
3bd8bc89 1533 if (likely(__follow_mount_rcu(nd, path)))
9deed3eb 1534 return 0;
03fa86e9 1535 // *path and nd->next_seq might've been clobbered
c153007b
AV
1536 path->mnt = nd->path.mnt;
1537 path->dentry = dentry;
03fa86e9
AV
1538 nd->next_seq = seq;
1539 if (!try_to_unlazy_next(nd, dentry))
1540 return -ECHILD;
c153007b 1541 }
9deed3eb
AV
1542 ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
1543 if (jumped) {
1544 if (unlikely(nd->flags & LOOKUP_NO_XDEV))
1545 ret = -EXDEV;
1546 else
bcba1e7d 1547 nd->state |= ND_JUMPED;
9deed3eb
AV
1548 }
1549 if (unlikely(ret)) {
1550 dput(path->dentry);
1551 if (path->mnt != nd->path.mnt)
1552 mntput(path->mnt);
bd7c4b50
AV
1553 }
1554 return ret;
1555}
1556
baa03890 1557/*
f4fdace9
OD
1558 * This looks up the name in dcache and possibly revalidates the found dentry.
1559 * NULL is returned if the dentry does not exist in the cache.
baa03890 1560 */
e3c13928
AV
1561static struct dentry *lookup_dcache(const struct qstr *name,
1562 struct dentry *dir,
6c51e513 1563 unsigned int flags)
baa03890 1564{
a89f8337 1565 struct dentry *dentry = d_lookup(dir, name);
bad61189 1566 if (dentry) {
a89f8337
AV
1567 int error = d_revalidate(dentry, flags);
1568 if (unlikely(error <= 0)) {
1569 if (!error)
1570 d_invalidate(dentry);
1571 dput(dentry);
1572 return ERR_PTR(error);
bad61189
MS
1573 }
1574 }
baa03890
NP
1575 return dentry;
1576}
1577
44396f4b 1578/*
a03ece5f
AV
1579 * Parent directory has inode locked exclusive. This is one
1580 * and only case when ->lookup() gets called on non in-lookup
1581 * dentries - as the matter of fact, this only gets called
1582 * when directory is guaranteed to have no in-lookup children
1583 * at all.
44396f4b 1584 */
e3c13928 1585static struct dentry *__lookup_hash(const struct qstr *name,
72bd866a 1586 struct dentry *base, unsigned int flags)
a3255546 1587{
6c51e513 1588 struct dentry *dentry = lookup_dcache(name, base, flags);
a03ece5f
AV
1589 struct dentry *old;
1590 struct inode *dir = base->d_inode;
a3255546 1591
6c51e513 1592 if (dentry)
bad61189 1593 return dentry;
a3255546 1594
a03ece5f
AV
1595 /* Don't create child dentry for a dead directory. */
1596 if (unlikely(IS_DEADDIR(dir)))
1597 return ERR_PTR(-ENOENT);
1598
6c51e513
AV
1599 dentry = d_alloc(base, name);
1600 if (unlikely(!dentry))
1601 return ERR_PTR(-ENOMEM);
1602
a03ece5f
AV
1603 old = dir->i_op->lookup(dir, dentry, flags);
1604 if (unlikely(old)) {
1605 dput(dentry);
1606 dentry = old;
1607 }
1608 return dentry;
a3255546
AV
1609}
1610
4cb64024 1611static struct dentry *lookup_fast(struct nameidata *nd)
1da177e4 1612{
31e6b01f 1613 struct dentry *dentry, *parent = nd->path.dentry;
5a18fff2 1614 int status = 1;
9875cf80 1615
b04f784e
NP
1616 /*
1617 * Rename seqlock is not required here because in the off chance
5d0f49c1
AV
1618 * of a false negative due to a concurrent rename, the caller is
1619 * going to fall back to non-racy lookup.
b04f784e 1620 */
31e6b01f 1621 if (nd->flags & LOOKUP_RCU) {
03fa86e9 1622 dentry = __d_lookup_rcu(parent, &nd->last, &nd->next_seq);
5d0f49c1 1623 if (unlikely(!dentry)) {
e36cffed 1624 if (!try_to_unlazy(nd))
20e34357
AV
1625 return ERR_PTR(-ECHILD);
1626 return NULL;
5d0f49c1 1627 }
5a18fff2 1628
12f8ad4b
LT
1629 /*
1630 * This sequence count validates that the parent had no
1631 * changes while we did the lookup of the dentry above.
12f8ad4b 1632 */
4cb64024 1633 if (read_seqcount_retry(&parent->d_seq, nd->seq))
20e34357 1634 return ERR_PTR(-ECHILD);
5a18fff2 1635
a89f8337 1636 status = d_revalidate(dentry, nd->flags);
c153007b 1637 if (likely(status > 0))
20e34357 1638 return dentry;
03fa86e9 1639 if (!try_to_unlazy_next(nd, dentry))
20e34357 1640 return ERR_PTR(-ECHILD);
26ddb45e 1641 if (status == -ECHILD)
209a7fb2
AV
1642 /* we'd been told to redo it in non-rcu mode */
1643 status = d_revalidate(dentry, nd->flags);
5a18fff2 1644 } else {
e97cdc87 1645 dentry = __d_lookup(parent, &nd->last);
5d0f49c1 1646 if (unlikely(!dentry))
20e34357 1647 return NULL;
a89f8337 1648 status = d_revalidate(dentry, nd->flags);
9875cf80 1649 }
5a18fff2 1650 if (unlikely(status <= 0)) {
e9742b53 1651 if (!status)
5d0f49c1 1652 d_invalidate(dentry);
5542aa2f 1653 dput(dentry);
20e34357 1654 return ERR_PTR(status);
24643087 1655 }
20e34357 1656 return dentry;
697f514d
MS
1657}
1658
1659/* Fast lookup failed, do it the slow way */
88d8331a
AV
1660static struct dentry *__lookup_slow(const struct qstr *name,
1661 struct dentry *dir,
1662 unsigned int flags)
697f514d 1663{
88d8331a 1664 struct dentry *dentry, *old;
1936386e 1665 struct inode *inode = dir->d_inode;
d9171b93 1666 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
1936386e 1667
1936386e 1668 /* Don't go there if it's already dead */
94bdd655 1669 if (unlikely(IS_DEADDIR(inode)))
88d8331a 1670 return ERR_PTR(-ENOENT);
94bdd655 1671again:
d9171b93 1672 dentry = d_alloc_parallel(dir, name, &wq);
94bdd655 1673 if (IS_ERR(dentry))
88d8331a 1674 return dentry;
94bdd655 1675 if (unlikely(!d_in_lookup(dentry))) {
c64cd6e3
AV
1676 int error = d_revalidate(dentry, flags);
1677 if (unlikely(error <= 0)) {
1678 if (!error) {
1679 d_invalidate(dentry);
949a852e 1680 dput(dentry);
c64cd6e3 1681 goto again;
949a852e 1682 }
c64cd6e3
AV
1683 dput(dentry);
1684 dentry = ERR_PTR(error);
949a852e 1685 }
94bdd655
AV
1686 } else {
1687 old = inode->i_op->lookup(inode, dentry, flags);
1688 d_lookup_done(dentry);
1689 if (unlikely(old)) {
1690 dput(dentry);
1691 dentry = old;
949a852e
AV
1692 }
1693 }
e3c13928 1694 return dentry;
1da177e4
LT
1695}
1696
88d8331a
AV
1697static struct dentry *lookup_slow(const struct qstr *name,
1698 struct dentry *dir,
1699 unsigned int flags)
1700{
1701 struct inode *inode = dir->d_inode;
1702 struct dentry *res;
1703 inode_lock_shared(inode);
1704 res = __lookup_slow(name, dir, flags);
1705 inode_unlock_shared(inode);
1706 return res;
1707}
1708
4609e1f1 1709static inline int may_lookup(struct mnt_idmap *idmap,
ba73d987 1710 struct nameidata *nd)
52094c8a
AV
1711{
1712 if (nd->flags & LOOKUP_RCU) {
4609e1f1 1713 int err = inode_permission(idmap, nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
e36cffed 1714 if (err != -ECHILD || !try_to_unlazy(nd))
52094c8a 1715 return err;
52094c8a 1716 }
4609e1f1 1717 return inode_permission(idmap, nd->inode, MAY_EXEC);
52094c8a
AV
1718}
1719
03fa86e9 1720static int reserve_stack(struct nameidata *nd, struct path *link)
49055906 1721{
49055906
AV
1722 if (unlikely(nd->total_link_count++ >= MAXSYMLINKS))
1723 return -ELOOP;
4542576b
AV
1724
1725 if (likely(nd->depth != EMBEDDED_LEVELS))
1726 return 0;
1727 if (likely(nd->stack != nd->internal))
1728 return 0;
60ef60c7 1729 if (likely(nd_alloc_stack(nd)))
49055906 1730 return 0;
60ef60c7
AV
1731
1732 if (nd->flags & LOOKUP_RCU) {
1733 // we need to grab link before we do unlazy. And we can't skip
1734 // unlazy even if we fail to grab the link - cleanup needs it
03fa86e9 1735 bool grabbed_link = legitimize_path(nd, link, nd->next_seq);
60ef60c7 1736
e5ca024e 1737 if (!try_to_unlazy(nd) || !grabbed_link)
60ef60c7
AV
1738 return -ECHILD;
1739
1740 if (nd_alloc_stack(nd))
1741 return 0;
49055906 1742 }
60ef60c7 1743 return -ENOMEM;
49055906
AV
1744}
1745
b1a81972
AV
1746enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};
1747
06708adb 1748static const char *pick_link(struct nameidata *nd, struct path *link,
03fa86e9 1749 struct inode *inode, int flags)
d63ff28f 1750{
1cf2665b 1751 struct saved *last;
ad6cc4c3 1752 const char *res;
03fa86e9 1753 int error = reserve_stack(nd, link);
ad6cc4c3 1754
626de996 1755 if (unlikely(error)) {
49055906 1756 if (!(nd->flags & LOOKUP_RCU))
bc40aee0 1757 path_put(link);
49055906 1758 return ERR_PTR(error);
626de996 1759 }
ab104923 1760 last = nd->stack + nd->depth++;
1cf2665b 1761 last->link = *link;
fceef393 1762 clear_delayed_call(&last->done);
03fa86e9 1763 last->seq = nd->next_seq;
ad6cc4c3 1764
b1a81972 1765 if (flags & WALK_TRAILING) {
ad6cc4c3
AV
1766 error = may_follow_link(nd, inode);
1767 if (unlikely(error))
1768 return ERR_PTR(error);
1769 }
1770
dab741e0
MN
1771 if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS) ||
1772 unlikely(link->mnt->mnt_flags & MNT_NOSYMFOLLOW))
ad6cc4c3
AV
1773 return ERR_PTR(-ELOOP);
1774
1775 if (!(nd->flags & LOOKUP_RCU)) {
1776 touch_atime(&last->link);
1777 cond_resched();
1778 } else if (atime_needs_update(&last->link, inode)) {
e36cffed 1779 if (!try_to_unlazy(nd))
ad6cc4c3
AV
1780 return ERR_PTR(-ECHILD);
1781 touch_atime(&last->link);
1782 }
1783
1784 error = security_inode_follow_link(link->dentry, inode,
1785 nd->flags & LOOKUP_RCU);
1786 if (unlikely(error))
1787 return ERR_PTR(error);
1788
ad6cc4c3
AV
1789 res = READ_ONCE(inode->i_link);
1790 if (!res) {
1791 const char * (*get)(struct dentry *, struct inode *,
1792 struct delayed_call *);
1793 get = inode->i_op->get_link;
1794 if (nd->flags & LOOKUP_RCU) {
1795 res = get(NULL, inode, &last->done);
e36cffed 1796 if (res == ERR_PTR(-ECHILD) && try_to_unlazy(nd))
ad6cc4c3 1797 res = get(link->dentry, inode, &last->done);
ad6cc4c3
AV
1798 } else {
1799 res = get(link->dentry, inode, &last->done);
1800 }
1801 if (!res)
1802 goto all_done;
1803 if (IS_ERR(res))
1804 return res;
1805 }
1806 if (*res == '/') {
1807 error = nd_jump_root(nd);
1808 if (unlikely(error))
1809 return ERR_PTR(error);
1810 while (unlikely(*++res == '/'))
1811 ;
1812 }
1813 if (*res)
1814 return res;
1815all_done: // pure jump
1816 put_link(nd);
1817 return NULL;
d63ff28f
AV
1818}
1819
3ddcd056
LT
1820/*
1821 * Do we need to follow links? We _really_ want to be able
1822 * to do this check without having to look at inode->i_op,
1823 * so we keep a cache of "no, this doesn't need follow_link"
1824 * for the common case.
03fa86e9
AV
1825 *
1826 * NOTE: dentry must be what nd->next_seq had been sampled from.
3ddcd056 1827 */
b0417d2c 1828static const char *step_into(struct nameidata *nd, int flags,
a4f5b521 1829 struct dentry *dentry)
3ddcd056 1830{
cbae4d12 1831 struct path path;
a4f5b521 1832 struct inode *inode;
3bd8bc89 1833 int err = handle_mounts(nd, dentry, &path);
cbae4d12
AV
1834
1835 if (err < 0)
b0417d2c 1836 return ERR_PTR(err);
3bd8bc89 1837 inode = path.dentry->d_inode;
cbae4d12 1838 if (likely(!d_is_symlink(path.dentry)) ||
8c4efe22 1839 ((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) ||
aca2903e 1840 (flags & WALK_NOFOLLOW)) {
8f64fb1c 1841 /* not a symlink or should not follow */
3bd8bc89
AV
1842 if (nd->flags & LOOKUP_RCU) {
1843 if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
1844 return ERR_PTR(-ECHILD);
1845 if (unlikely(!inode))
1846 return ERR_PTR(-ENOENT);
1847 } else {
c99687a0
AV
1848 dput(nd->path.dentry);
1849 if (nd->path.mnt != path.mnt)
1850 mntput(nd->path.mnt);
1851 }
1852 nd->path = path;
8f64fb1c 1853 nd->inode = inode;
03fa86e9 1854 nd->seq = nd->next_seq;
b0417d2c 1855 return NULL;
8f64fb1c 1856 }
a7f77542 1857 if (nd->flags & LOOKUP_RCU) {
84f0cd9e 1858 /* make sure that d_is_symlink above matches inode */
03fa86e9 1859 if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
b0417d2c 1860 return ERR_PTR(-ECHILD);
84f0cd9e
AV
1861 } else {
1862 if (path.mnt == nd->path.mnt)
1863 mntget(path.mnt);
a7f77542 1864 }
03fa86e9 1865 return pick_link(nd, &path, inode, flags);
3ddcd056
LT
1866}
1867
b16c001d 1868static struct dentry *follow_dotdot_rcu(struct nameidata *nd)
957dd41d 1869{
12487f30 1870 struct dentry *parent, *old;
957dd41d 1871
12487f30
AV
1872 if (path_equal(&nd->path, &nd->root))
1873 goto in_root;
1874 if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) {
7ef482fa 1875 struct path path;
efe772d6 1876 unsigned seq;
7ef482fa
AV
1877 if (!choose_mountpoint_rcu(real_mount(nd->path.mnt),
1878 &nd->root, &path, &seq))
1879 goto in_root;
efe772d6
AV
1880 if (unlikely(nd->flags & LOOKUP_NO_XDEV))
1881 return ERR_PTR(-ECHILD);
1882 nd->path = path;
1883 nd->inode = path.dentry->d_inode;
1884 nd->seq = seq;
03fa86e9 1885 // makes sure that non-RCU pathwalk could reach this state
82ef0698 1886 if (read_seqretry(&mount_lock, nd->m_seq))
efe772d6
AV
1887 return ERR_PTR(-ECHILD);
1888 /* we know that mountpoint was pinned */
957dd41d 1889 }
12487f30
AV
1890 old = nd->path.dentry;
1891 parent = old->d_parent;
03fa86e9
AV
1892 nd->next_seq = read_seqcount_begin(&parent->d_seq);
1893 // makes sure that non-RCU pathwalk could reach this state
82ef0698 1894 if (read_seqcount_retry(&old->d_seq, nd->seq))
12487f30
AV
1895 return ERR_PTR(-ECHILD);
1896 if (unlikely(!path_connected(nd->path.mnt, parent)))
1897 return ERR_PTR(-ECHILD);
1898 return parent;
1899in_root:
82ef0698 1900 if (read_seqretry(&mount_lock, nd->m_seq))
efe772d6 1901 return ERR_PTR(-ECHILD);
c2df1968
AV
1902 if (unlikely(nd->flags & LOOKUP_BENEATH))
1903 return ERR_PTR(-ECHILD);
03fa86e9 1904 nd->next_seq = nd->seq;
51c6546c 1905 return nd->path.dentry;
957dd41d
AV
1906}
1907
b16c001d 1908static struct dentry *follow_dotdot(struct nameidata *nd)
957dd41d 1909{
12487f30
AV
1910 struct dentry *parent;
1911
1912 if (path_equal(&nd->path, &nd->root))
1913 goto in_root;
1914 if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) {
2aa38470
AV
1915 struct path path;
1916
1917 if (!choose_mountpoint(real_mount(nd->path.mnt),
1918 &nd->root, &path))
1919 goto in_root;
165200d6
AV
1920 path_put(&nd->path);
1921 nd->path = path;
2aa38470 1922 nd->inode = path.dentry->d_inode;
165200d6
AV
1923 if (unlikely(nd->flags & LOOKUP_NO_XDEV))
1924 return ERR_PTR(-EXDEV);
957dd41d 1925 }
12487f30
AV
1926 /* rare case of legitimate dget_parent()... */
1927 parent = dget_parent(nd->path.dentry);
1928 if (unlikely(!path_connected(nd->path.mnt, parent))) {
1929 dput(parent);
1930 return ERR_PTR(-ENOENT);
1931 }
12487f30
AV
1932 return parent;
1933
1934in_root:
c2df1968
AV
1935 if (unlikely(nd->flags & LOOKUP_BENEATH))
1936 return ERR_PTR(-EXDEV);
51c6546c 1937 return dget(nd->path.dentry);
957dd41d
AV
1938}
1939
7521f22b 1940static const char *handle_dots(struct nameidata *nd, int type)
957dd41d
AV
1941{
1942 if (type == LAST_DOTDOT) {
7521f22b 1943 const char *error = NULL;
c2df1968 1944 struct dentry *parent;
957dd41d
AV
1945
1946 if (!nd->root.mnt) {
7521f22b 1947 error = ERR_PTR(set_root(nd));
957dd41d
AV
1948 if (error)
1949 return error;
1950 }
1951 if (nd->flags & LOOKUP_RCU)
b16c001d 1952 parent = follow_dotdot_rcu(nd);
957dd41d 1953 else
b16c001d 1954 parent = follow_dotdot(nd);
c2df1968
AV
1955 if (IS_ERR(parent))
1956 return ERR_CAST(parent);
a4f5b521 1957 error = step_into(nd, WALK_NOFOLLOW, parent);
c2df1968 1958 if (unlikely(error))
957dd41d
AV
1959 return error;
1960
1961 if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
1962 /*
1963 * If there was a racing rename or mount along our
1964 * path, then we can't be sure that ".." hasn't jumped
1965 * above nd->root (and so userspace should retry or use
1966 * some fallback).
1967 */
1968 smp_rmb();
82ef0698 1969 if (__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq))
7521f22b 1970 return ERR_PTR(-EAGAIN);
82ef0698 1971 if (__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq))
7521f22b 1972 return ERR_PTR(-EAGAIN);
957dd41d
AV
1973 }
1974 }
7521f22b 1975 return NULL;
957dd41d
AV
1976}
1977
92d27016 1978static const char *walk_component(struct nameidata *nd, int flags)
ce57dfc1 1979{
db3c9ade 1980 struct dentry *dentry;
ce57dfc1
AV
1981 /*
1982 * "." and ".." are special - ".." especially so because it has
1983 * to be able to know about the current root directory and
1984 * parent relationships.
1985 */
4693a547 1986 if (unlikely(nd->last_type != LAST_NORM)) {
1c4ff1a8 1987 if (!(flags & WALK_MORE) && nd->depth)
4693a547 1988 put_link(nd);
7521f22b 1989 return handle_dots(nd, nd->last_type);
4693a547 1990 }
4cb64024 1991 dentry = lookup_fast(nd);
20e34357 1992 if (IS_ERR(dentry))
92d27016 1993 return ERR_CAST(dentry);
20e34357 1994 if (unlikely(!dentry)) {
db3c9ade
AV
1995 dentry = lookup_slow(&nd->last, nd->path.dentry, nd->flags);
1996 if (IS_ERR(dentry))
92d27016 1997 return ERR_CAST(dentry);
ce57dfc1 1998 }
56676ec3
AV
1999 if (!(flags & WALK_MORE) && nd->depth)
2000 put_link(nd);
a4f5b521 2001 return step_into(nd, flags, dentry);
ce57dfc1
AV
2002}
2003
bfcfaa77
LT
2004/*
2005 * We can do the critical dentry name comparison and hashing
2006 * operations one word at a time, but we are limited to:
2007 *
2008 * - Architectures with fast unaligned word accesses. We could
2009 * do a "get_unaligned()" if this helps and is sufficiently
2010 * fast.
2011 *
bfcfaa77
LT
2012 * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
2013 * do not trap on the (extremely unlikely) case of a page
2014 * crossing operation.
2015 *
2016 * - Furthermore, we need an efficient 64-bit compile for the
2017 * 64-bit case in order to generate the "number of bytes in
2018 * the final mask". Again, that could be replaced with a
2019 * efficient population count instruction or similar.
2020 */
2021#ifdef CONFIG_DCACHE_WORD_ACCESS
2022
f68e556e 2023#include <asm/word-at-a-time.h>
bfcfaa77 2024
468a9428 2025#ifdef HASH_MIX
bfcfaa77 2026
468a9428 2027/* Architecture provides HASH_MIX and fold_hash() in <asm/hash.h> */
bfcfaa77 2028
468a9428 2029#elif defined(CONFIG_64BIT)
0fed3ac8 2030/*
2a18da7a
GS
2031 * Register pressure in the mixing function is an issue, particularly
2032 * on 32-bit x86, but almost any function requires one state value and
2033 * one temporary. Instead, use a function designed for two state values
2034 * and no temporaries.
2035 *
2036 * This function cannot create a collision in only two iterations, so
2037 * we have two iterations to achieve avalanche. In those two iterations,
2038 * we have six layers of mixing, which is enough to spread one bit's
2039 * influence out to 2^6 = 64 state bits.
2040 *
2041 * Rotate constants are scored by considering either 64 one-bit input
2042 * deltas or 64*63/2 = 2016 two-bit input deltas, and finding the
2043 * probability of that delta causing a change to each of the 128 output
2044 * bits, using a sample of random initial states.
2045 *
2046 * The Shannon entropy of the computed probabilities is then summed
2047 * to produce a score. Ideally, any input change has a 50% chance of
2048 * toggling any given output bit.
2049 *
2050 * Mixing scores (in bits) for (12,45):
2051 * Input delta: 1-bit 2-bit
2052 * 1 round: 713.3 42542.6
2053 * 2 rounds: 2753.7 140389.8
2054 * 3 rounds: 5954.1 233458.2
2055 * 4 rounds: 7862.6 256672.2
2056 * Perfect: 8192 258048
2057 * (64*128) (64*63/2 * 128)
0fed3ac8 2058 */
2a18da7a
GS
2059#define HASH_MIX(x, y, a) \
2060 ( x ^= (a), \
2061 y ^= x, x = rol64(x,12),\
2062 x += y, y = rol64(y,45),\
2063 y *= 9 )
bfcfaa77 2064
0fed3ac8 2065/*
2a18da7a
GS
2066 * Fold two longs into one 32-bit hash value. This must be fast, but
2067 * latency isn't quite as critical, as there is a fair bit of additional
2068 * work done before the hash value is used.
0fed3ac8 2069 */
2a18da7a 2070static inline unsigned int fold_hash(unsigned long x, unsigned long y)
0fed3ac8 2071{
2a18da7a
GS
2072 y ^= x * GOLDEN_RATIO_64;
2073 y *= GOLDEN_RATIO_64;
2074 return y >> 32;
0fed3ac8
GS
2075}
2076
bfcfaa77
LT
2077#else /* 32-bit case */
2078
2a18da7a
GS
2079/*
2080 * Mixing scores (in bits) for (7,20):
2081 * Input delta: 1-bit 2-bit
2082 * 1 round: 330.3 9201.6
2083 * 2 rounds: 1246.4 25475.4
2084 * 3 rounds: 1907.1 31295.1
2085 * 4 rounds: 2042.3 31718.6
2086 * Perfect: 2048 31744
2087 * (32*64) (32*31/2 * 64)
2088 */
2089#define HASH_MIX(x, y, a) \
2090 ( x ^= (a), \
2091 y ^= x, x = rol32(x, 7),\
2092 x += y, y = rol32(y,20),\
2093 y *= 9 )
bfcfaa77 2094
2a18da7a 2095static inline unsigned int fold_hash(unsigned long x, unsigned long y)
0fed3ac8 2096{
2a18da7a
GS
2097 /* Use arch-optimized multiply if one exists */
2098 return __hash_32(y ^ __hash_32(x));
0fed3ac8
GS
2099}
2100
bfcfaa77
LT
2101#endif
2102
2a18da7a
GS
2103/*
2104 * Return the hash of a string of known length. This is carfully
2105 * designed to match hash_name(), which is the more critical function.
2106 * In particular, we must end by hashing a final word containing 0..7
2107 * payload bytes, to match the way that hash_name() iterates until it
2108 * finds the delimiter after the name.
2109 */
8387ff25 2110unsigned int full_name_hash(const void *salt, const char *name, unsigned int len)
bfcfaa77 2111{
8387ff25 2112 unsigned long a, x = 0, y = (unsigned long)salt;
bfcfaa77
LT
2113
2114 for (;;) {
fcfd2fbf
GS
2115 if (!len)
2116 goto done;
e419b4cc 2117 a = load_unaligned_zeropad(name);
bfcfaa77
LT
2118 if (len < sizeof(unsigned long))
2119 break;
2a18da7a 2120 HASH_MIX(x, y, a);
bfcfaa77
LT
2121 name += sizeof(unsigned long);
2122 len -= sizeof(unsigned long);
bfcfaa77 2123 }
2a18da7a 2124 x ^= a & bytemask_from_count(len);
bfcfaa77 2125done:
2a18da7a 2126 return fold_hash(x, y);
bfcfaa77
LT
2127}
2128EXPORT_SYMBOL(full_name_hash);
2129
fcfd2fbf 2130/* Return the "hash_len" (hash and length) of a null-terminated string */
8387ff25 2131u64 hashlen_string(const void *salt, const char *name)
fcfd2fbf 2132{
8387ff25
LT
2133 unsigned long a = 0, x = 0, y = (unsigned long)salt;
2134 unsigned long adata, mask, len;
fcfd2fbf
GS
2135 const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
2136
8387ff25
LT
2137 len = 0;
2138 goto inside;
2139
fcfd2fbf 2140 do {
2a18da7a 2141 HASH_MIX(x, y, a);
fcfd2fbf 2142 len += sizeof(unsigned long);
8387ff25 2143inside:
fcfd2fbf
GS
2144 a = load_unaligned_zeropad(name+len);
2145 } while (!has_zero(a, &adata, &constants));
2146
2147 adata = prep_zero_mask(a, adata, &constants);
2148 mask = create_zero_mask(adata);
2a18da7a 2149 x ^= a & zero_bytemask(mask);
fcfd2fbf 2150
2a18da7a 2151 return hashlen_create(fold_hash(x, y), len + find_zero(mask));
fcfd2fbf
GS
2152}
2153EXPORT_SYMBOL(hashlen_string);
2154
bfcfaa77
LT
2155/*
2156 * Calculate the length and hash of the path component, and
d6bb3e90 2157 * return the "hash_len" as the result.
bfcfaa77 2158 */
8387ff25 2159static inline u64 hash_name(const void *salt, const char *name)
bfcfaa77 2160{
8387ff25
LT
2161 unsigned long a = 0, b, x = 0, y = (unsigned long)salt;
2162 unsigned long adata, bdata, mask, len;
36126f8f 2163 const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
bfcfaa77 2164
8387ff25
LT
2165 len = 0;
2166 goto inside;
2167
bfcfaa77 2168 do {
2a18da7a 2169 HASH_MIX(x, y, a);
bfcfaa77 2170 len += sizeof(unsigned long);
8387ff25 2171inside:
e419b4cc 2172 a = load_unaligned_zeropad(name+len);
36126f8f
LT
2173 b = a ^ REPEAT_BYTE('/');
2174 } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)));
2175
2176 adata = prep_zero_mask(a, adata, &constants);
2177 bdata = prep_zero_mask(b, bdata, &constants);
36126f8f 2178 mask = create_zero_mask(adata | bdata);
2a18da7a 2179 x ^= a & zero_bytemask(mask);
36126f8f 2180
2a18da7a 2181 return hashlen_create(fold_hash(x, y), len + find_zero(mask));
bfcfaa77
LT
2182}
2183
2a18da7a 2184#else /* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */
bfcfaa77 2185
fcfd2fbf 2186/* Return the hash of a string of known length */
8387ff25 2187unsigned int full_name_hash(const void *salt, const char *name, unsigned int len)
0145acc2 2188{
8387ff25 2189 unsigned long hash = init_name_hash(salt);
0145acc2 2190 while (len--)
fcfd2fbf 2191 hash = partial_name_hash((unsigned char)*name++, hash);
0145acc2
LT
2192 return end_name_hash(hash);
2193}
ae942ae7 2194EXPORT_SYMBOL(full_name_hash);
0145acc2 2195
fcfd2fbf 2196/* Return the "hash_len" (hash and length) of a null-terminated string */
8387ff25 2197u64 hashlen_string(const void *salt, const char *name)
fcfd2fbf 2198{
8387ff25 2199 unsigned long hash = init_name_hash(salt);
fcfd2fbf
GS
2200 unsigned long len = 0, c;
2201
2202 c = (unsigned char)*name;
e0ab7af9 2203 while (c) {
fcfd2fbf
GS
2204 len++;
2205 hash = partial_name_hash(c, hash);
2206 c = (unsigned char)name[len];
e0ab7af9 2207 }
fcfd2fbf
GS
2208 return hashlen_create(end_name_hash(hash), len);
2209}
f2a031b6 2210EXPORT_SYMBOL(hashlen_string);
fcfd2fbf 2211
200e9ef7
LT
2212/*
2213 * We know there's a real path component here of at least
2214 * one character.
2215 */
8387ff25 2216static inline u64 hash_name(const void *salt, const char *name)
200e9ef7 2217{
8387ff25 2218 unsigned long hash = init_name_hash(salt);
200e9ef7
LT
2219 unsigned long len = 0, c;
2220
2221 c = (unsigned char)*name;
2222 do {
2223 len++;
2224 hash = partial_name_hash(c, hash);
2225 c = (unsigned char)name[len];
2226 } while (c && c != '/');
d6bb3e90 2227 return hashlen_create(end_name_hash(hash), len);
200e9ef7
LT
2228}
2229
bfcfaa77
LT
2230#endif
2231
1da177e4
LT
2232/*
2233 * Name resolution.
ea3834d9
PM
2234 * This is the basic name resolution function, turning a pathname into
2235 * the final dentry. We expect 'base' to be positive and a directory.
1da177e4 2236 *
ea3834d9
PM
2237 * Returns 0 and nd will have valid dentry and mnt on success.
2238 * Returns error and drops reference to input namei data on failure.
1da177e4 2239 */
6de88d72 2240static int link_path_walk(const char *name, struct nameidata *nd)
1da177e4 2241{
d8d4611a 2242 int depth = 0; // depth <= nd->depth
1da177e4 2243 int err;
32cd7468 2244
b4c03536 2245 nd->last_type = LAST_ROOT;
c108837e 2246 nd->flags |= LOOKUP_PARENT;
9b5858e9
AV
2247 if (IS_ERR(name))
2248 return PTR_ERR(name);
1da177e4
LT
2249 while (*name=='/')
2250 name++;
1a97d899
AV
2251 if (!*name) {
2252 nd->dir_mode = 0; // short-circuit the 'hardening' idiocy
9e18f10a 2253 return 0;
1a97d899 2254 }
1da177e4 2255
1da177e4
LT
2256 /* At this point we know we have a real path component. */
2257 for(;;) {
4609e1f1 2258 struct mnt_idmap *idmap;
549c7297 2259 struct user_namespace *mnt_userns;
92d27016 2260 const char *link;
d6bb3e90 2261 u64 hash_len;
fe479a58 2262 int type;
1da177e4 2263
4609e1f1
CB
2264 idmap = mnt_idmap(nd->path.mnt);
2265 mnt_userns = mnt_idmap_owner(idmap);
2266 err = may_lookup(idmap, nd);
2a18da7a 2267 if (err)
3595e234 2268 return err;
1da177e4 2269
8387ff25 2270 hash_len = hash_name(nd->path.dentry, name);
1da177e4 2271
fe479a58 2272 type = LAST_NORM;
d6bb3e90 2273 if (name[0] == '.') switch (hashlen_len(hash_len)) {
fe479a58 2274 case 2:
200e9ef7 2275 if (name[1] == '.') {
fe479a58 2276 type = LAST_DOTDOT;
bcba1e7d 2277 nd->state |= ND_JUMPED;
16c2cd71 2278 }
fe479a58
AV
2279 break;
2280 case 1:
2281 type = LAST_DOT;
2282 }
5a202bcd
AV
2283 if (likely(type == LAST_NORM)) {
2284 struct dentry *parent = nd->path.dentry;
bcba1e7d 2285 nd->state &= ~ND_JUMPED;
5a202bcd 2286 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
a060dc50 2287 struct qstr this = { { .hash_len = hash_len }, .name = name };
da53be12 2288 err = parent->d_op->d_hash(parent, &this);
5a202bcd 2289 if (err < 0)
3595e234 2290 return err;
d6bb3e90
LT
2291 hash_len = this.hash_len;
2292 name = this.name;
5a202bcd
AV
2293 }
2294 }
fe479a58 2295
d6bb3e90
LT
2296 nd->last.hash_len = hash_len;
2297 nd->last.name = name;
5f4a6a69
AV
2298 nd->last_type = type;
2299
d6bb3e90
LT
2300 name += hashlen_len(hash_len);
2301 if (!*name)
bdf6cbf1 2302 goto OK;
200e9ef7
LT
2303 /*
2304 * If it wasn't NUL, we know it was '/'. Skip that
2305 * slash, and continue until no more slashes.
2306 */
2307 do {
d6bb3e90
LT
2308 name++;
2309 } while (unlikely(*name == '/'));
8620c238
AV
2310 if (unlikely(!*name)) {
2311OK:
d8d4611a 2312 /* pathname or trailing symlink, done */
c108837e 2313 if (!depth) {
a2bd096f 2314 nd->dir_vfsuid = i_uid_into_vfsuid(mnt_userns, nd->inode);
0f705953 2315 nd->dir_mode = nd->inode->i_mode;
c108837e 2316 nd->flags &= ~LOOKUP_PARENT;
8620c238 2317 return 0;
c108837e 2318 }
8620c238 2319 /* last component of nested symlink */
d8d4611a 2320 name = nd->stack[--depth].name;
8c4efe22 2321 link = walk_component(nd, 0);
1c4ff1a8
AV
2322 } else {
2323 /* not the last component */
8c4efe22 2324 link = walk_component(nd, WALK_MORE);
8620c238 2325 }
92d27016
AV
2326 if (unlikely(link)) {
2327 if (IS_ERR(link))
2328 return PTR_ERR(link);
2329 /* a symlink to follow */
d8d4611a 2330 nd->stack[depth++].name = name;
92d27016
AV
2331 name = link;
2332 continue;
31e6b01f 2333 }
97242f99
AV
2334 if (unlikely(!d_can_lookup(nd->path.dentry))) {
2335 if (nd->flags & LOOKUP_RCU) {
e36cffed 2336 if (!try_to_unlazy(nd))
97242f99
AV
2337 return -ECHILD;
2338 }
3595e234 2339 return -ENOTDIR;
97242f99 2340 }
1da177e4 2341 }
1da177e4
LT
2342}
2343
edc2b1da 2344/* must be paired with terminate_walk() */
c8a53ee5 2345static const char *path_init(struct nameidata *nd, unsigned flags)
31e6b01f 2346{
740a1678 2347 int error;
c8a53ee5 2348 const char *s = nd->name->name;
31e6b01f 2349
6c6ec2b0
JA
2350 /* LOOKUP_CACHED requires RCU, ask caller to retry */
2351 if ((flags & (LOOKUP_RCU | LOOKUP_CACHED)) == LOOKUP_CACHED)
2352 return ERR_PTR(-EAGAIN);
2353
c0eb027e
LT
2354 if (!*s)
2355 flags &= ~LOOKUP_RCU;
edc2b1da
AV
2356 if (flags & LOOKUP_RCU)
2357 rcu_read_lock();
03fa86e9
AV
2358 else
2359 nd->seq = nd->next_seq = 0;
c0eb027e 2360
bcba1e7d
AV
2361 nd->flags = flags;
2362 nd->state |= ND_JUMPED;
ab87f9a5
AS
2363
2364 nd->m_seq = __read_seqcount_begin(&mount_lock.seqcount);
2365 nd->r_seq = __read_seqcount_begin(&rename_lock.seqcount);
2366 smp_rmb();
2367
bcba1e7d 2368 if (nd->state & ND_ROOT_PRESET) {
b18825a7
DH
2369 struct dentry *root = nd->root.dentry;
2370 struct inode *inode = root->d_inode;
93893862
AV
2371 if (*s && unlikely(!d_can_lookup(root)))
2372 return ERR_PTR(-ENOTDIR);
5b6ca027
AV
2373 nd->path = nd->root;
2374 nd->inode = inode;
2375 if (flags & LOOKUP_RCU) {
ab87f9a5 2376 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
8f47a016 2377 nd->root_seq = nd->seq;
5b6ca027
AV
2378 } else {
2379 path_get(&nd->path);
2380 }
368ee9ba 2381 return s;
5b6ca027
AV
2382 }
2383
31e6b01f 2384 nd->root.mnt = NULL;
31e6b01f 2385
8db52c7e
AS
2386 /* Absolute pathname -- fetch the root (LOOKUP_IN_ROOT uses nd->dfd). */
2387 if (*s == '/' && !(flags & LOOKUP_IN_ROOT)) {
740a1678
AS
2388 error = nd_jump_root(nd);
2389 if (unlikely(error))
2390 return ERR_PTR(error);
2391 return s;
8db52c7e
AS
2392 }
2393
2394 /* Relative pathname -- get the starting-point it is relative to. */
2395 if (nd->dfd == AT_FDCWD) {
e41f7d4e
AV
2396 if (flags & LOOKUP_RCU) {
2397 struct fs_struct *fs = current->fs;
2398 unsigned seq;
31e6b01f 2399
e41f7d4e
AV
2400 do {
2401 seq = read_seqcount_begin(&fs->seq);
2402 nd->path = fs->pwd;
ef55d917 2403 nd->inode = nd->path.dentry->d_inode;
e41f7d4e
AV
2404 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
2405 } while (read_seqcount_retry(&fs->seq, seq));
2406 } else {
2407 get_fs_pwd(current->fs, &nd->path);
ef55d917 2408 nd->inode = nd->path.dentry->d_inode;
e41f7d4e 2409 }
31e6b01f 2410 } else {
582aa64a 2411 /* Caller must check execute permissions on the starting path component */
c8a53ee5 2412 struct fd f = fdget_raw(nd->dfd);
31e6b01f
NP
2413 struct dentry *dentry;
2414
2903ff01 2415 if (!f.file)
368ee9ba 2416 return ERR_PTR(-EBADF);
31e6b01f 2417
2903ff01 2418 dentry = f.file->f_path.dentry;
31e6b01f 2419
edc2b1da
AV
2420 if (*s && unlikely(!d_can_lookup(dentry))) {
2421 fdput(f);
2422 return ERR_PTR(-ENOTDIR);
f52e0c11 2423 }
31e6b01f 2424
2903ff01 2425 nd->path = f.file->f_path;
e41f7d4e 2426 if (flags & LOOKUP_RCU) {
34a26b99
AV
2427 nd->inode = nd->path.dentry->d_inode;
2428 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
e41f7d4e 2429 } else {
2903ff01 2430 path_get(&nd->path);
34a26b99 2431 nd->inode = nd->path.dentry->d_inode;
e41f7d4e 2432 }
34a26b99 2433 fdput(f);
31e6b01f 2434 }
8db52c7e 2435
adb21d2b
AS
2436 /* For scoped-lookups we need to set the root to the dirfd as well. */
2437 if (flags & LOOKUP_IS_SCOPED) {
2438 nd->root = nd->path;
2439 if (flags & LOOKUP_RCU) {
2440 nd->root_seq = nd->seq;
2441 } else {
2442 path_get(&nd->root);
bcba1e7d 2443 nd->state |= ND_ROOT_GRABBED;
adb21d2b
AS
2444 }
2445 }
2446 return s;
9b4a9b14
AV
2447}
2448
1ccac622 2449static inline const char *lookup_last(struct nameidata *nd)
bd92d7fe
AV
2450{
2451 if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
2452 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
2453
c108837e 2454 return walk_component(nd, WALK_TRAILING);
bd92d7fe
AV
2455}
2456
4f757f3c
AV
2457static int handle_lookup_down(struct nameidata *nd)
2458{
c153007b 2459 if (!(nd->flags & LOOKUP_RCU))
db3c9ade 2460 dget(nd->path.dentry);
03fa86e9 2461 nd->next_seq = nd->seq;
a4f5b521 2462 return PTR_ERR(step_into(nd, WALK_NOFOLLOW, nd->path.dentry));
4f757f3c
AV
2463}
2464
9b4a9b14 2465/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
c8a53ee5 2466static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path)
9b4a9b14 2467{
c8a53ee5 2468 const char *s = path_init(nd, flags);
bd92d7fe 2469 int err;
31e6b01f 2470
9b5858e9 2471 if (unlikely(flags & LOOKUP_DOWN) && !IS_ERR(s)) {
4f757f3c 2472 err = handle_lookup_down(nd);
5f336e72
AV
2473 if (unlikely(err < 0))
2474 s = ERR_PTR(err);
4f757f3c
AV
2475 }
2476
1ccac622
AV
2477 while (!(err = link_path_walk(s, nd)) &&
2478 (s = lookup_last(nd)) != NULL)
2479 ;
4f0ed93f
AV
2480 if (!err && unlikely(nd->flags & LOOKUP_MOUNTPOINT)) {
2481 err = handle_lookup_down(nd);
bcba1e7d 2482 nd->state &= ~ND_JUMPED; // no d_weak_revalidate(), please...
4f0ed93f 2483 }
9f1fafee
AV
2484 if (!err)
2485 err = complete_walk(nd);
bd92d7fe 2486
deb106c6
AV
2487 if (!err && nd->flags & LOOKUP_DIRECTORY)
2488 if (!d_can_lookup(nd->path.dentry))
bd23a539 2489 err = -ENOTDIR;
625b6d10
AV
2490 if (!err) {
2491 *path = nd->path;
2492 nd->path.mnt = NULL;
2493 nd->path.dentry = NULL;
2494 }
2495 terminate_walk(nd);
bd92d7fe 2496 return err;
ee0827cd 2497}
31e6b01f 2498
794ebcea 2499int filename_lookup(int dfd, struct filename *name, unsigned flags,
31d921c7 2500 struct path *path, struct path *root)
ee0827cd 2501{
894bc8c4 2502 int retval;
9883d185 2503 struct nameidata nd;
abc9f5be
AV
2504 if (IS_ERR(name))
2505 return PTR_ERR(name);
06422964 2506 set_nameidata(&nd, dfd, name, root);
c8a53ee5 2507 retval = path_lookupat(&nd, flags | LOOKUP_RCU, path);
ee0827cd 2508 if (unlikely(retval == -ECHILD))
c8a53ee5 2509 retval = path_lookupat(&nd, flags, path);
ee0827cd 2510 if (unlikely(retval == -ESTALE))
c8a53ee5 2511 retval = path_lookupat(&nd, flags | LOOKUP_REVAL, path);
31e6b01f 2512
f78570dd 2513 if (likely(!retval))
161aff1d
AV
2514 audit_inode(name, path->dentry,
2515 flags & LOOKUP_MOUNTPOINT ? AUDIT_INODE_NOEVAL : 0);
9883d185 2516 restore_nameidata();
020250f3
DK
2517 return retval;
2518}
2519
8bcb77fa 2520/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
c8a53ee5 2521static int path_parentat(struct nameidata *nd, unsigned flags,
391172c4 2522 struct path *parent)
8bcb77fa 2523{
c8a53ee5 2524 const char *s = path_init(nd, flags);
9b5858e9 2525 int err = link_path_walk(s, nd);
8bcb77fa
AV
2526 if (!err)
2527 err = complete_walk(nd);
391172c4
AV
2528 if (!err) {
2529 *parent = nd->path;
2530 nd->path.mnt = NULL;
2531 nd->path.dentry = NULL;
2532 }
2533 terminate_walk(nd);
8bcb77fa
AV
2534 return err;
2535}
2536
0766ec82 2537/* Note: this does not consume "name" */
c5f563f9 2538static int filename_parentat(int dfd, struct filename *name,
0766ec82
SB
2539 unsigned int flags, struct path *parent,
2540 struct qstr *last, int *type)
8bcb77fa
AV
2541{
2542 int retval;
9883d185 2543 struct nameidata nd;
8bcb77fa 2544
5c31b6ce 2545 if (IS_ERR(name))
0ee50b47 2546 return PTR_ERR(name);
06422964 2547 set_nameidata(&nd, dfd, name, NULL);
c8a53ee5 2548 retval = path_parentat(&nd, flags | LOOKUP_RCU, parent);
8bcb77fa 2549 if (unlikely(retval == -ECHILD))
c8a53ee5 2550 retval = path_parentat(&nd, flags, parent);
8bcb77fa 2551 if (unlikely(retval == -ESTALE))
c8a53ee5 2552 retval = path_parentat(&nd, flags | LOOKUP_REVAL, parent);
391172c4
AV
2553 if (likely(!retval)) {
2554 *last = nd.last;
2555 *type = nd.last_type;
c9b07eab 2556 audit_inode(name, parent->dentry, AUDIT_INODE_PARENT);
391172c4 2557 }
9883d185 2558 restore_nameidata();
0ee50b47
DK
2559 return retval;
2560}
2561
79714f72 2562/* does lookup, returns the object with parent locked */
0766ec82 2563static struct dentry *__kern_path_locked(struct filename *name, struct path *path)
5590ff0d 2564{
5c31b6ce 2565 struct dentry *d;
391172c4 2566 struct qstr last;
0ee50b47 2567 int type, error;
51689104 2568
c5f563f9 2569 error = filename_parentat(AT_FDCWD, name, 0, path, &last, &type);
0ee50b47
DK
2570 if (error)
2571 return ERR_PTR(error);
5c31b6ce 2572 if (unlikely(type != LAST_NORM)) {
391172c4 2573 path_put(path);
5c31b6ce 2574 return ERR_PTR(-EINVAL);
79714f72 2575 }
5955102c 2576 inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
391172c4 2577 d = __lookup_hash(&last, path->dentry, 0);
79714f72 2578 if (IS_ERR(d)) {
5955102c 2579 inode_unlock(path->dentry->d_inode);
391172c4 2580 path_put(path);
79714f72 2581 }
79714f72 2582 return d;
5590ff0d
UD
2583}
2584
0766ec82
SB
2585struct dentry *kern_path_locked(const char *name, struct path *path)
2586{
2587 struct filename *filename = getname_kernel(name);
2588 struct dentry *res = __kern_path_locked(filename, path);
2589
2590 putname(filename);
2591 return res;
2592}
2593
d1811465
AV
2594int kern_path(const char *name, unsigned int flags, struct path *path)
2595{
794ebcea
SB
2596 struct filename *filename = getname_kernel(name);
2597 int ret = filename_lookup(AT_FDCWD, filename, flags, path, NULL);
2598
2599 putname(filename);
2600 return ret;
2601
d1811465 2602}
4d359507 2603EXPORT_SYMBOL(kern_path);
d1811465 2604
16f18200
JJS
2605/**
2606 * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
2607 * @dentry: pointer to dentry of the base directory
2608 * @mnt: pointer to vfs mount of the base directory
2609 * @name: pointer to file name
2610 * @flags: lookup flags
e0a01249 2611 * @path: pointer to struct path to fill
16f18200
JJS
2612 */
2613int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
2614 const char *name, unsigned int flags,
e0a01249 2615 struct path *path)
16f18200 2616{
794ebcea 2617 struct filename *filename;
9ad1aaa6 2618 struct path root = {.mnt = mnt, .dentry = dentry};
794ebcea
SB
2619 int ret;
2620
2621 filename = getname_kernel(name);
9ad1aaa6 2622 /* the first argument of filename_lookup() is ignored with root */
794ebcea
SB
2623 ret = filename_lookup(AT_FDCWD, filename, flags, path, &root);
2624 putname(filename);
2625 return ret;
16f18200 2626}
4d359507 2627EXPORT_SYMBOL(vfs_path_lookup);
16f18200 2628
4609e1f1 2629static int lookup_one_common(struct mnt_idmap *idmap,
c2fd68b6
CB
2630 const char *name, struct dentry *base, int len,
2631 struct qstr *this)
057f6c01 2632{
3c95f0dc
AV
2633 this->name = name;
2634 this->len = len;
2635 this->hash = full_name_hash(base, name, len);
6a96ba54 2636 if (!len)
3c95f0dc 2637 return -EACCES;
6a96ba54 2638
21d8a15a
AV
2639 if (unlikely(name[0] == '.')) {
2640 if (len < 2 || (len == 2 && name[1] == '.'))
3c95f0dc 2641 return -EACCES;
21d8a15a
AV
2642 }
2643
6a96ba54 2644 while (len--) {
3c95f0dc 2645 unsigned int c = *(const unsigned char *)name++;
6a96ba54 2646 if (c == '/' || c == '\0')
3c95f0dc 2647 return -EACCES;
6a96ba54 2648 }
5a202bcd
AV
2649 /*
2650 * See if the low-level filesystem might want
2651 * to use its own hash..
2652 */
2653 if (base->d_flags & DCACHE_OP_HASH) {
3c95f0dc 2654 int err = base->d_op->d_hash(base, this);
5a202bcd 2655 if (err < 0)
3c95f0dc 2656 return err;
5a202bcd 2657 }
eead1911 2658
4609e1f1 2659 return inode_permission(idmap, base->d_inode, MAY_EXEC);
3c95f0dc
AV
2660}
2661
0da0b7fd
DH
2662/**
2663 * try_lookup_one_len - filesystem helper to lookup single pathname component
2664 * @name: pathname component to lookup
2665 * @base: base directory to lookup from
2666 * @len: maximum length @len should be interpreted to
2667 *
2668 * Look up a dentry by name in the dcache, returning NULL if it does not
2669 * currently exist. The function does not try to create a dentry.
2670 *
2671 * Note that this routine is purely a helper for filesystem usage and should
2672 * not be called by generic code.
2673 *
2674 * The caller must hold base->i_mutex.
2675 */
2676struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len)
2677{
2678 struct qstr this;
2679 int err;
2680
2681 WARN_ON_ONCE(!inode_is_locked(base->d_inode));
2682
4609e1f1 2683 err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this);
0da0b7fd
DH
2684 if (err)
2685 return ERR_PTR(err);
2686
2687 return lookup_dcache(&this, base, 0);
2688}
2689EXPORT_SYMBOL(try_lookup_one_len);
2690
3c95f0dc
AV
2691/**
2692 * lookup_one_len - filesystem helper to lookup single pathname component
2693 * @name: pathname component to lookup
2694 * @base: base directory to lookup from
2695 * @len: maximum length @len should be interpreted to
2696 *
2697 * Note that this routine is purely a helper for filesystem usage and should
2698 * not be called by generic code.
2699 *
2700 * The caller must hold base->i_mutex.
2701 */
2702struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
2703{
8613a209 2704 struct dentry *dentry;
3c95f0dc
AV
2705 struct qstr this;
2706 int err;
2707
2708 WARN_ON_ONCE(!inode_is_locked(base->d_inode));
2709
4609e1f1 2710 err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this);
cda309de
MS
2711 if (err)
2712 return ERR_PTR(err);
2713
8613a209
AV
2714 dentry = lookup_dcache(&this, base, 0);
2715 return dentry ? dentry : __lookup_slow(&this, base, 0);
057f6c01 2716}
4d359507 2717EXPORT_SYMBOL(lookup_one_len);
057f6c01 2718
c2fd68b6
CB
2719/**
2720 * lookup_one - filesystem helper to lookup single pathname component
4609e1f1 2721 * @idmap: idmap of the mount the lookup is performed from
c2fd68b6
CB
2722 * @name: pathname component to lookup
2723 * @base: base directory to lookup from
2724 * @len: maximum length @len should be interpreted to
2725 *
2726 * Note that this routine is purely a helper for filesystem usage and should
2727 * not be called by generic code.
2728 *
2729 * The caller must hold base->i_mutex.
2730 */
4609e1f1 2731struct dentry *lookup_one(struct mnt_idmap *idmap, const char *name,
c2fd68b6
CB
2732 struct dentry *base, int len)
2733{
2734 struct dentry *dentry;
2735 struct qstr this;
2736 int err;
2737
2738 WARN_ON_ONCE(!inode_is_locked(base->d_inode));
2739
4609e1f1 2740 err = lookup_one_common(idmap, name, base, len, &this);
c2fd68b6
CB
2741 if (err)
2742 return ERR_PTR(err);
2743
2744 dentry = lookup_dcache(&this, base, 0);
2745 return dentry ? dentry : __lookup_slow(&this, base, 0);
2746}
2747EXPORT_SYMBOL(lookup_one);
2748
bbddca8e 2749/**
00675017 2750 * lookup_one_unlocked - filesystem helper to lookup single pathname component
4609e1f1 2751 * @idmap: idmap of the mount the lookup is performed from
bbddca8e
N
2752 * @name: pathname component to lookup
2753 * @base: base directory to lookup from
2754 * @len: maximum length @len should be interpreted to
2755 *
2756 * Note that this routine is purely a helper for filesystem usage and should
2757 * not be called by generic code.
2758 *
2759 * Unlike lookup_one_len, it should be called without the parent
2760 * i_mutex held, and will take the i_mutex itself if necessary.
2761 */
4609e1f1 2762struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap,
00675017
CB
2763 const char *name, struct dentry *base,
2764 int len)
bbddca8e
N
2765{
2766 struct qstr this;
bbddca8e 2767 int err;
20d00ee8 2768 struct dentry *ret;
bbddca8e 2769
4609e1f1 2770 err = lookup_one_common(idmap, name, base, len, &this);
bbddca8e
N
2771 if (err)
2772 return ERR_PTR(err);
2773
20d00ee8
LT
2774 ret = lookup_dcache(&this, base, 0);
2775 if (!ret)
2776 ret = lookup_slow(&this, base, 0);
2777 return ret;
bbddca8e 2778}
00675017
CB
2779EXPORT_SYMBOL(lookup_one_unlocked);
2780
2781/**
2782 * lookup_one_positive_unlocked - filesystem helper to lookup single
2783 * pathname component
4609e1f1 2784 * @idmap: idmap of the mount the lookup is performed from
00675017
CB
2785 * @name: pathname component to lookup
2786 * @base: base directory to lookup from
2787 * @len: maximum length @len should be interpreted to
2788 *
2789 * This helper will yield ERR_PTR(-ENOENT) on negatives. The helper returns
2790 * known positive or ERR_PTR(). This is what most of the users want.
2791 *
2792 * Note that pinned negative with unlocked parent _can_ become positive at any
2793 * time, so callers of lookup_one_unlocked() need to be very careful; pinned
2794 * positives have >d_inode stable, so this one avoids such problems.
2795 *
2796 * Note that this routine is purely a helper for filesystem usage and should
2797 * not be called by generic code.
2798 *
2799 * The helper should be called without i_mutex held.
2800 */
4609e1f1 2801struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap,
00675017
CB
2802 const char *name,
2803 struct dentry *base, int len)
2804{
4609e1f1 2805 struct dentry *ret = lookup_one_unlocked(idmap, name, base, len);
00675017
CB
2806
2807 if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
2808 dput(ret);
2809 ret = ERR_PTR(-ENOENT);
2810 }
2811 return ret;
2812}
2813EXPORT_SYMBOL(lookup_one_positive_unlocked);
2814
2815/**
2816 * lookup_one_len_unlocked - filesystem helper to lookup single pathname component
2817 * @name: pathname component to lookup
2818 * @base: base directory to lookup from
2819 * @len: maximum length @len should be interpreted to
2820 *
2821 * Note that this routine is purely a helper for filesystem usage and should
2822 * not be called by generic code.
2823 *
2824 * Unlike lookup_one_len, it should be called without the parent
2825 * i_mutex held, and will take the i_mutex itself if necessary.
2826 */
2827struct dentry *lookup_one_len_unlocked(const char *name,
2828 struct dentry *base, int len)
2829{
4609e1f1 2830 return lookup_one_unlocked(&nop_mnt_idmap, name, base, len);
00675017 2831}
bbddca8e
N
2832EXPORT_SYMBOL(lookup_one_len_unlocked);
2833
6c2d4798
AV
2834/*
2835 * Like lookup_one_len_unlocked(), except that it yields ERR_PTR(-ENOENT)
2836 * on negatives. Returns known positive or ERR_PTR(); that's what
2837 * most of the users want. Note that pinned negative with unlocked parent
2838 * _can_ become positive at any time, so callers of lookup_one_len_unlocked()
2839 * need to be very careful; pinned positives have ->d_inode stable, so
2840 * this one avoids such problems.
2841 */
2842struct dentry *lookup_positive_unlocked(const char *name,
2843 struct dentry *base, int len)
2844{
4609e1f1 2845 return lookup_one_positive_unlocked(&nop_mnt_idmap, name, base, len);
6c2d4798
AV
2846}
2847EXPORT_SYMBOL(lookup_positive_unlocked);
2848
eedf265a
EB
2849#ifdef CONFIG_UNIX98_PTYS
2850int path_pts(struct path *path)
2851{
2852 /* Find something mounted on "pts" in the same directory as
2853 * the input path.
2854 */
a6a7eb76
AV
2855 struct dentry *parent = dget_parent(path->dentry);
2856 struct dentry *child;
19f6028a 2857 struct qstr this = QSTR_INIT("pts", 3);
eedf265a 2858
a6a7eb76
AV
2859 if (unlikely(!path_connected(path->mnt, parent))) {
2860 dput(parent);
63b27720 2861 return -ENOENT;
a6a7eb76 2862 }
63b27720
AV
2863 dput(path->dentry);
2864 path->dentry = parent;
eedf265a
EB
2865 child = d_hash_and_lookup(parent, &this);
2866 if (!child)
2867 return -ENOENT;
2868
2869 path->dentry = child;
2870 dput(parent);
19f6028a 2871 follow_down(path);
eedf265a
EB
2872 return 0;
2873}
2874#endif
2875
1fa1e7f6
AW
2876int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
2877 struct path *path, int *empty)
1da177e4 2878{
794ebcea
SB
2879 struct filename *filename = getname_flags(name, flags, empty);
2880 int ret = filename_lookup(dfd, filename, flags, path, NULL);
2881
2882 putname(filename);
2883 return ret;
1da177e4 2884}
b853a161 2885EXPORT_SYMBOL(user_path_at_empty);
1fa1e7f6 2886
9452e93e 2887int __check_sticky(struct mnt_idmap *idmap, struct inode *dir,
ba73d987 2888 struct inode *inode)
1da177e4 2889{
8e96e3b7 2890 kuid_t fsuid = current_fsuid();
9452e93e 2891 struct user_namespace *mnt_userns = mnt_idmap_owner(idmap);
da9592ed 2892
a2bd096f 2893 if (vfsuid_eq_kuid(i_uid_into_vfsuid(mnt_userns, inode), fsuid))
1da177e4 2894 return 0;
a2bd096f 2895 if (vfsuid_eq_kuid(i_uid_into_vfsuid(mnt_userns, dir), fsuid))
1da177e4 2896 return 0;
9452e93e 2897 return !capable_wrt_inode_uidgid(idmap, inode, CAP_FOWNER);
1da177e4 2898}
cbdf35bc 2899EXPORT_SYMBOL(__check_sticky);
1da177e4
LT
2900
2901/*
2902 * Check whether we can remove a link victim from directory dir, check
2903 * whether the type of victim is right.
2904 * 1. We can't do it if dir is read-only (done in permission())
2905 * 2. We should have write and exec permissions on dir
2906 * 3. We can't remove anything from append-only dir
2907 * 4. We can't do anything with immutable dir (done in permission())
2908 * 5. If the sticky bit on dir is set we should either
2909 * a. be owner of dir, or
2910 * b. be owner of victim, or
2911 * c. have CAP_FOWNER capability
2912 * 6. If the victim is append-only or immutable we can't do antyhing with
2913 * links pointing to it.
0bd23d09
EB
2914 * 7. If the victim has an unknown uid or gid we can't change the inode.
2915 * 8. If we were asked to remove a directory and victim isn't one - ENOTDIR.
2916 * 9. If we were asked to remove a non-directory and victim isn't one - EISDIR.
2917 * 10. We can't remove a root or mountpoint.
2918 * 11. We don't allow removal of NFS sillyrenamed files; it's handled by
1da177e4
LT
2919 * nfs_async_unlink().
2920 */
4609e1f1 2921static int may_delete(struct mnt_idmap *idmap, struct inode *dir,
ba73d987 2922 struct dentry *victim, bool isdir)
1da177e4 2923{
4609e1f1 2924 struct user_namespace *mnt_userns = mnt_idmap_owner(idmap);
63afdfc7 2925 struct inode *inode = d_backing_inode(victim);
1da177e4
LT
2926 int error;
2927
b18825a7 2928 if (d_is_negative(victim))
1da177e4 2929 return -ENOENT;
b18825a7 2930 BUG_ON(!inode);
1da177e4
LT
2931
2932 BUG_ON(victim->d_parent->d_inode != dir);
593d1ce8
EB
2933
2934 /* Inode writeback is not safe when the uid or gid are invalid. */
a2bd096f
CB
2935 if (!vfsuid_valid(i_uid_into_vfsuid(mnt_userns, inode)) ||
2936 !vfsgid_valid(i_gid_into_vfsgid(mnt_userns, inode)))
593d1ce8
EB
2937 return -EOVERFLOW;
2938
4fa6b5ec 2939 audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
1da177e4 2940
4609e1f1 2941 error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
1da177e4
LT
2942 if (error)
2943 return error;
2944 if (IS_APPEND(dir))
2945 return -EPERM;
b18825a7 2946
9452e93e 2947 if (check_sticky(idmap, dir, inode) || IS_APPEND(inode) ||
ba73d987 2948 IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) ||
4609e1f1 2949 HAS_UNMAPPED_ID(idmap, inode))
1da177e4
LT
2950 return -EPERM;
2951 if (isdir) {
44b1d530 2952 if (!d_is_dir(victim))
1da177e4
LT
2953 return -ENOTDIR;
2954 if (IS_ROOT(victim))
2955 return -EBUSY;
44b1d530 2956 } else if (d_is_dir(victim))
1da177e4
LT
2957 return -EISDIR;
2958 if (IS_DEADDIR(dir))
2959 return -ENOENT;
2960 if (victim->d_flags & DCACHE_NFSFS_RENAMED)
2961 return -EBUSY;
2962 return 0;
2963}
2964
2965/* Check whether we can create an object with dentry child in directory
2966 * dir.
2967 * 1. We can't do it if child already exists (open has special treatment for
2968 * this case, but since we are inlined it's OK)
2969 * 2. We can't do it if dir is read-only (done in permission())
036d5236
EB
2970 * 3. We can't do it if the fs can't represent the fsuid or fsgid.
2971 * 4. We should have write and exec permissions on dir
2972 * 5. We can't do it if dir is immutable (done in permission())
1da177e4 2973 */
4609e1f1 2974static inline int may_create(struct mnt_idmap *idmap,
ba73d987 2975 struct inode *dir, struct dentry *child)
1da177e4 2976{
14e972b4 2977 audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
1da177e4
LT
2978 if (child->d_inode)
2979 return -EEXIST;
2980 if (IS_DEADDIR(dir))
2981 return -ENOENT;
4609e1f1 2982 if (!fsuidgid_has_mapping(dir->i_sb, idmap))
036d5236 2983 return -EOVERFLOW;
8e538913 2984
4609e1f1 2985 return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
1da177e4
LT
2986}
2987
1da177e4
LT
2988/*
2989 * p1 and p2 should be directories on the same fs.
2990 */
2991struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
2992{
2993 struct dentry *p;
2994
2995 if (p1 == p2) {
5955102c 2996 inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
1da177e4
LT
2997 return NULL;
2998 }
2999
fc64005c 3000 mutex_lock(&p1->d_sb->s_vfs_rename_mutex);
1da177e4 3001
e2761a11
OH
3002 p = d_ancestor(p2, p1);
3003 if (p) {
5955102c
AV
3004 inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
3005 inode_lock_nested(p1->d_inode, I_MUTEX_CHILD);
e2761a11 3006 return p;
1da177e4
LT
3007 }
3008
e2761a11
OH
3009 p = d_ancestor(p1, p2);
3010 if (p) {
5955102c
AV
3011 inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
3012 inode_lock_nested(p2->d_inode, I_MUTEX_CHILD);
e2761a11 3013 return p;
1da177e4
LT
3014 }
3015
5955102c
AV
3016 inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
3017 inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
1da177e4
LT
3018 return NULL;
3019}
4d359507 3020EXPORT_SYMBOL(lock_rename);
1da177e4
LT
3021
3022void unlock_rename(struct dentry *p1, struct dentry *p2)
3023{
5955102c 3024 inode_unlock(p1->d_inode);
1da177e4 3025 if (p1 != p2) {
5955102c 3026 inode_unlock(p2->d_inode);
fc64005c 3027 mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
1da177e4
LT
3028 }
3029}
4d359507 3030EXPORT_SYMBOL(unlock_rename);
1da177e4 3031
1639a49c
YX
3032/**
3033 * mode_strip_umask - handle vfs umask stripping
3034 * @dir: parent directory of the new inode
3035 * @mode: mode of the new inode to be created in @dir
3036 *
3037 * Umask stripping depends on whether or not the filesystem supports POSIX
3038 * ACLs. If the filesystem doesn't support it umask stripping is done directly
3039 * in here. If the filesystem does support POSIX ACLs umask stripping is
3040 * deferred until the filesystem calls posix_acl_create().
3041 *
3042 * Returns: mode
3043 */
3044static inline umode_t mode_strip_umask(const struct inode *dir, umode_t mode)
3045{
3046 if (!IS_POSIXACL(dir))
3047 mode &= ~current_umask();
3048 return mode;
3049}
3050
3051/**
3052 * vfs_prepare_mode - prepare the mode to be used for a new inode
9452e93e 3053 * @idmap: idmap of the mount the inode was found from
1639a49c
YX
3054 * @dir: parent directory of the new inode
3055 * @mode: mode of the new inode
3056 * @mask_perms: allowed permission by the vfs
3057 * @type: type of file to be created
3058 *
3059 * This helper consolidates and enforces vfs restrictions on the @mode of a new
3060 * object to be created.
3061 *
3062 * Umask stripping depends on whether the filesystem supports POSIX ACLs (see
3063 * the kernel documentation for mode_strip_umask()). Moving umask stripping
3064 * after setgid stripping allows the same ordering for both non-POSIX ACL and
3065 * POSIX ACL supporting filesystems.
3066 *
3067 * Note that it's currently valid for @type to be 0 if a directory is created.
3068 * Filesystems raise that flag individually and we need to check whether each
3069 * filesystem can deal with receiving S_IFDIR from the vfs before we enforce a
3070 * non-zero type.
3071 *
3072 * Returns: mode to be passed to the filesystem
3073 */
9452e93e 3074static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap,
1639a49c
YX
3075 const struct inode *dir, umode_t mode,
3076 umode_t mask_perms, umode_t type)
3077{
9452e93e 3078 mode = mode_strip_sgid(idmap, dir, mode);
1639a49c
YX
3079 mode = mode_strip_umask(dir, mode);
3080
3081 /*
3082 * Apply the vfs mandated allowed permission mask and set the type of
3083 * file to be created before we call into the filesystem.
3084 */
3085 mode &= (mask_perms & ~S_IFMT);
3086 mode |= (type & S_IFMT);
3087
3088 return mode;
3089}
3090
6521f891
CB
3091/**
3092 * vfs_create - create new file
abf08576 3093 * @idmap: idmap of the mount the inode was found from
6521f891
CB
3094 * @dir: inode of @dentry
3095 * @dentry: pointer to dentry of the base directory
3096 * @mode: mode of the new file
3097 * @want_excl: whether the file must not yet exist
3098 *
3099 * Create a new file.
3100 *
abf08576
CB
3101 * If the inode has been found through an idmapped mount the idmap of
3102 * the vfsmount must be passed through @idmap. This function will then take
3103 * care to map the inode according to @idmap before checking permissions.
6521f891 3104 * On non-idmapped mounts or if permission checking is to be performed on the
abf08576 3105 * raw inode simply passs @nop_mnt_idmap.
6521f891 3106 */
abf08576 3107int vfs_create(struct mnt_idmap *idmap, struct inode *dir,
6521f891 3108 struct dentry *dentry, umode_t mode, bool want_excl)
1da177e4 3109{
abf08576
CB
3110 int error;
3111
4609e1f1 3112 error = may_create(idmap, dir, dentry);
1da177e4
LT
3113 if (error)
3114 return error;
3115
acfa4380 3116 if (!dir->i_op->create)
1da177e4 3117 return -EACCES; /* shouldn't it be ENOSYS? */
1639a49c 3118
9452e93e 3119 mode = vfs_prepare_mode(idmap, dir, mode, S_IALLUGO, S_IFREG);
1da177e4
LT
3120 error = security_inode_create(dir, dentry, mode);
3121 if (error)
3122 return error;
6c960e68 3123 error = dir->i_op->create(idmap, dir, dentry, mode, want_excl);
a74574aa 3124 if (!error)
f38aa942 3125 fsnotify_create(dir, dentry);
1da177e4
LT
3126 return error;
3127}
4d359507 3128EXPORT_SYMBOL(vfs_create);
1da177e4 3129
8e6c848e
AV
3130int vfs_mkobj(struct dentry *dentry, umode_t mode,
3131 int (*f)(struct dentry *, umode_t, void *),
3132 void *arg)
3133{
3134 struct inode *dir = dentry->d_parent->d_inode;
4609e1f1 3135 int error = may_create(&nop_mnt_idmap, dir, dentry);
8e6c848e
AV
3136 if (error)
3137 return error;
3138
3139 mode &= S_IALLUGO;
3140 mode |= S_IFREG;
3141 error = security_inode_create(dir, dentry, mode);
3142 if (error)
3143 return error;
3144 error = f(dentry, mode, arg);
3145 if (!error)
3146 fsnotify_create(dir, dentry);
3147 return error;
3148}
3149EXPORT_SYMBOL(vfs_mkobj);
3150
a2982cc9
EB
3151bool may_open_dev(const struct path *path)
3152{
3153 return !(path->mnt->mnt_flags & MNT_NODEV) &&
3154 !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV);
3155}
3156
4609e1f1 3157static int may_open(struct mnt_idmap *idmap, const struct path *path,
ba73d987 3158 int acc_mode, int flag)
1da177e4 3159{
3fb64190 3160 struct dentry *dentry = path->dentry;
1da177e4
LT
3161 struct inode *inode = dentry->d_inode;
3162 int error;
3163
3164 if (!inode)
3165 return -ENOENT;
3166
c8fe8f30
CH
3167 switch (inode->i_mode & S_IFMT) {
3168 case S_IFLNK:
1da177e4 3169 return -ELOOP;
c8fe8f30 3170 case S_IFDIR:
fc4177be 3171 if (acc_mode & MAY_WRITE)
c8fe8f30 3172 return -EISDIR;
fc4177be
KC
3173 if (acc_mode & MAY_EXEC)
3174 return -EACCES;
c8fe8f30
CH
3175 break;
3176 case S_IFBLK:
3177 case S_IFCHR:
a2982cc9 3178 if (!may_open_dev(path))
1da177e4 3179 return -EACCES;
633fb6ac 3180 fallthrough;
c8fe8f30
CH
3181 case S_IFIFO:
3182 case S_IFSOCK:
633fb6ac
KC
3183 if (acc_mode & MAY_EXEC)
3184 return -EACCES;
1da177e4 3185 flag &= ~O_TRUNC;
c8fe8f30 3186 break;
0fd338b2
KC
3187 case S_IFREG:
3188 if ((acc_mode & MAY_EXEC) && path_noexec(path))
3189 return -EACCES;
3190 break;
4a3fd211 3191 }
b41572e9 3192
4609e1f1 3193 error = inode_permission(idmap, inode, MAY_OPEN | acc_mode);
b41572e9
DH
3194 if (error)
3195 return error;
6146f0d5 3196
1da177e4
LT
3197 /*
3198 * An append-only file must be opened in append mode for writing.
3199 */
3200 if (IS_APPEND(inode)) {
8737c930 3201 if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
7715b521 3202 return -EPERM;
1da177e4 3203 if (flag & O_TRUNC)
7715b521 3204 return -EPERM;
1da177e4
LT
3205 }
3206
3207 /* O_NOATIME can only be set by the owner or superuser */
01beba79 3208 if (flag & O_NOATIME && !inode_owner_or_capable(idmap, inode))
7715b521 3209 return -EPERM;
1da177e4 3210
f3c7691e 3211 return 0;
7715b521 3212}
1da177e4 3213
abf08576 3214static int handle_truncate(struct mnt_idmap *idmap, struct file *filp)
7715b521 3215{
f0bb5aaf 3216 const struct path *path = &filp->f_path;
7715b521
AV
3217 struct inode *inode = path->dentry->d_inode;
3218 int error = get_write_access(inode);
3219 if (error)
3220 return error;
482e0007 3221
3350607d 3222 error = security_file_truncate(filp);
7715b521 3223 if (!error) {
abf08576 3224 error = do_truncate(idmap, path->dentry, 0,
7715b521 3225 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
e1181ee6 3226 filp);
7715b521
AV
3227 }
3228 put_write_access(inode);
acd0c935 3229 return error;
1da177e4
LT
3230}
3231
d57999e1
DH
3232static inline int open_to_namei_flags(int flag)
3233{
8a5e929d
AV
3234 if ((flag & O_ACCMODE) == 3)
3235 flag--;
d57999e1
DH
3236 return flag;
3237}
3238
4609e1f1 3239static int may_o_create(struct mnt_idmap *idmap,
ba73d987
CB
3240 const struct path *dir, struct dentry *dentry,
3241 umode_t mode)
d18e9008
MS
3242{
3243 int error = security_path_mknod(dir, dentry, mode, 0);
3244 if (error)
3245 return error;
3246
4609e1f1 3247 if (!fsuidgid_has_mapping(dir->dentry->d_sb, idmap))
1328c727
SF
3248 return -EOVERFLOW;
3249
4609e1f1 3250 error = inode_permission(idmap, dir->dentry->d_inode,
47291baa 3251 MAY_WRITE | MAY_EXEC);
d18e9008
MS
3252 if (error)
3253 return error;
3254
3255 return security_inode_create(dir->dentry->d_inode, dentry, mode);
3256}
3257
1acf0af9
DH
3258/*
3259 * Attempt to atomically look up, create and open a file from a negative
3260 * dentry.
3261 *
3262 * Returns 0 if successful. The file will have been created and attached to
3263 * @file by the filesystem calling finish_open().
3264 *
00a07c15
AV
3265 * If the file was looked up only or didn't need creating, FMODE_OPENED won't
3266 * be set. The caller will need to perform the open themselves. @path will
3267 * have been updated to point to the new dentry. This may be negative.
1acf0af9
DH
3268 *
3269 * Returns an error code otherwise.
3270 */
239eb983
AV
3271static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry,
3272 struct file *file,
239eb983 3273 int open_flag, umode_t mode)
d18e9008 3274{
384f26e2 3275 struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
d18e9008 3276 struct inode *dir = nd->path.dentry->d_inode;
d18e9008 3277 int error;
d18e9008 3278
d18e9008
MS
3279 if (nd->flags & LOOKUP_DIRECTORY)
3280 open_flag |= O_DIRECTORY;
3281
30d90494
AV
3282 file->f_path.dentry = DENTRY_NOT_SET;
3283 file->f_path.mnt = nd->path.mnt;
0fb1ea09 3284 error = dir->i_op->atomic_open(dir, dentry, file,
44907d79 3285 open_to_namei_flags(open_flag), mode);
6fbd0714 3286 d_lookup_done(dentry);
384f26e2 3287 if (!error) {
64e1ac4d 3288 if (file->f_mode & FMODE_OPENED) {
6fb968cd
AV
3289 if (unlikely(dentry != file->f_path.dentry)) {
3290 dput(dentry);
3291 dentry = dget(file->f_path.dentry);
3292 }
64e1ac4d 3293 } else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
2675a4eb 3294 error = -EIO;
03da633a 3295 } else {
384f26e2
AV
3296 if (file->f_path.dentry) {
3297 dput(dentry);
3298 dentry = file->f_path.dentry;
03da633a 3299 }
239eb983 3300 if (unlikely(d_is_negative(dentry)))
a01e718f 3301 error = -ENOENT;
62b2ce96 3302 }
d18e9008 3303 }
239eb983
AV
3304 if (error) {
3305 dput(dentry);
3306 dentry = ERR_PTR(error);
3307 }
3308 return dentry;
d18e9008
MS
3309}
3310
d58ffd35 3311/*
1acf0af9 3312 * Look up and maybe create and open the last component.
d58ffd35 3313 *
00a07c15 3314 * Must be called with parent locked (exclusive in O_CREAT case).
1acf0af9 3315 *
00a07c15
AV
3316 * Returns 0 on success, that is, if
3317 * the file was successfully atomically created (if necessary) and opened, or
3318 * the file was not completely opened at this time, though lookups and
3319 * creations were performed.
3320 * These case are distinguished by presence of FMODE_OPENED on file->f_mode.
3321 * In the latter case dentry returned in @path might be negative if O_CREAT
3322 * hadn't been specified.
1acf0af9 3323 *
00a07c15 3324 * An error code is returned on failure.
d58ffd35 3325 */
da5ebf5a
AV
3326static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
3327 const struct open_flags *op,
3328 bool got_write)
d58ffd35 3329{
6c960e68 3330 struct mnt_idmap *idmap;
d58ffd35 3331 struct dentry *dir = nd->path.dentry;
54ef4872 3332 struct inode *dir_inode = dir->d_inode;
1643b43f 3333 int open_flag = op->open_flag;
d58ffd35 3334 struct dentry *dentry;
1643b43f 3335 int error, create_error = 0;
1643b43f 3336 umode_t mode = op->mode;
6fbd0714 3337 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
d58ffd35 3338
ce8644fc 3339 if (unlikely(IS_DEADDIR(dir_inode)))
da5ebf5a 3340 return ERR_PTR(-ENOENT);
d58ffd35 3341
73a09dd9 3342 file->f_mode &= ~FMODE_CREATED;
6fbd0714
AV
3343 dentry = d_lookup(dir, &nd->last);
3344 for (;;) {
3345 if (!dentry) {
3346 dentry = d_alloc_parallel(dir, &nd->last, &wq);
3347 if (IS_ERR(dentry))
da5ebf5a 3348 return dentry;
6fbd0714
AV
3349 }
3350 if (d_in_lookup(dentry))
3351 break;
d58ffd35 3352
6fbd0714
AV
3353 error = d_revalidate(dentry, nd->flags);
3354 if (likely(error > 0))
3355 break;
3356 if (error)
3357 goto out_dput;
3358 d_invalidate(dentry);
3359 dput(dentry);
3360 dentry = NULL;
3361 }
3362 if (dentry->d_inode) {
6c51e513 3363 /* Cached positive dentry: will open in f_op->open */
da5ebf5a 3364 return dentry;
6c51e513 3365 }
d18e9008 3366
1643b43f
AV
3367 /*
3368 * Checking write permission is tricky, bacuse we don't know if we are
3369 * going to actually need it: O_CREAT opens should work as long as the
3370 * file exists. But checking existence breaks atomicity. The trick is
3371 * to check access and if not granted clear O_CREAT from the flags.
3372 *
3373 * Another problem is returing the "right" error value (e.g. for an
3374 * O_EXCL open we want to return EEXIST not EROFS).
3375 */
99a4a90c
AV
3376 if (unlikely(!got_write))
3377 open_flag &= ~O_TRUNC;
6c960e68 3378 idmap = mnt_idmap(nd->path.mnt);
1643b43f 3379 if (open_flag & O_CREAT) {
99a4a90c
AV
3380 if (open_flag & O_EXCL)
3381 open_flag &= ~O_TRUNC;
9452e93e 3382 mode = vfs_prepare_mode(idmap, dir->d_inode, mode, mode, mode);
99a4a90c 3383 if (likely(got_write))
4609e1f1 3384 create_error = may_o_create(idmap, &nd->path,
ba73d987 3385 dentry, mode);
99a4a90c
AV
3386 else
3387 create_error = -EROFS;
d18e9008 3388 }
99a4a90c
AV
3389 if (create_error)
3390 open_flag &= ~O_CREAT;
6ac08709 3391 if (dir_inode->i_op->atomic_open) {
d489cf9a 3392 dentry = atomic_open(nd, dentry, file, open_flag, mode);
da5ebf5a
AV
3393 if (unlikely(create_error) && dentry == ERR_PTR(-ENOENT))
3394 dentry = ERR_PTR(create_error);
3395 return dentry;
d18e9008 3396 }
54ef4872 3397
6fbd0714 3398 if (d_in_lookup(dentry)) {
12fa5e24
AV
3399 struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry,
3400 nd->flags);
6fbd0714 3401 d_lookup_done(dentry);
12fa5e24
AV
3402 if (unlikely(res)) {
3403 if (IS_ERR(res)) {
3404 error = PTR_ERR(res);
3405 goto out_dput;
3406 }
3407 dput(dentry);
3408 dentry = res;
3409 }
54ef4872
MS
3410 }
3411
d58ffd35 3412 /* Negative dentry, just create the file */
1643b43f 3413 if (!dentry->d_inode && (open_flag & O_CREAT)) {
73a09dd9 3414 file->f_mode |= FMODE_CREATED;
ce8644fc 3415 audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
ce8644fc
AV
3416 if (!dir_inode->i_op->create) {
3417 error = -EACCES;
d58ffd35 3418 goto out_dput;
ce8644fc 3419 }
549c7297 3420
6c960e68 3421 error = dir_inode->i_op->create(idmap, dir_inode, dentry,
549c7297 3422 mode, open_flag & O_EXCL);
d58ffd35
MS
3423 if (error)
3424 goto out_dput;
3425 }
1643b43f
AV
3426 if (unlikely(create_error) && !dentry->d_inode) {
3427 error = create_error;
3428 goto out_dput;
d58ffd35 3429 }
da5ebf5a 3430 return dentry;
d58ffd35
MS
3431
3432out_dput:
3433 dput(dentry);
da5ebf5a 3434 return ERR_PTR(error);
d58ffd35
MS
3435}
3436
c981a482 3437static const char *open_last_lookups(struct nameidata *nd,
3ec2eef1 3438 struct file *file, const struct open_flags *op)
fb1cc555 3439{
a1e28038 3440 struct dentry *dir = nd->path.dentry;
ca344a89 3441 int open_flag = op->open_flag;
64894cf8 3442 bool got_write = false;
da5ebf5a 3443 struct dentry *dentry;
b0417d2c 3444 const char *res;
1f36f774 3445
c3e380b0
AV
3446 nd->flags |= op->intent;
3447
bc77daa7 3448 if (nd->last_type != LAST_NORM) {
56676ec3
AV
3449 if (nd->depth)
3450 put_link(nd);
ff326a32 3451 return handle_dots(nd, nd->last_type);
1f36f774 3452 }
67ee3ad2 3453
ca344a89 3454 if (!(open_flag & O_CREAT)) {
fe2d35ff
AV
3455 if (nd->last.name[nd->last.len])
3456 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
3457 /* we _can_ be in RCU mode here */
4cb64024 3458 dentry = lookup_fast(nd);
20e34357 3459 if (IS_ERR(dentry))
1ccac622 3460 return ERR_CAST(dentry);
20e34357 3461 if (likely(dentry))
71574865
MS
3462 goto finish_lookup;
3463
6583fe22 3464 BUG_ON(nd->flags & LOOKUP_RCU);
b6183df7
MS
3465 } else {
3466 /* create side of things */
72287417 3467 if (nd->flags & LOOKUP_RCU) {
e36cffed
JA
3468 if (!try_to_unlazy(nd))
3469 return ERR_PTR(-ECHILD);
72287417 3470 }
c9b07eab 3471 audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
b6183df7 3472 /* trailing slashes? */
deb106c6 3473 if (unlikely(nd->last.name[nd->last.len]))
1ccac622 3474 return ERR_PTR(-EISDIR);
b6183df7 3475 }
a2c36b45 3476
9cf843e3 3477 if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
e36cffed 3478 got_write = !mnt_want_write(nd->path.mnt);
64894cf8
AV
3479 /*
3480 * do _not_ fail yet - we might not need that or fail with
3481 * a different error; let lookup_open() decide; we'll be
3482 * dropping this one anyway.
3483 */
3484 }
9cf843e3
AV
3485 if (open_flag & O_CREAT)
3486 inode_lock(dir->d_inode);
3487 else
3488 inode_lock_shared(dir->d_inode);
da5ebf5a 3489 dentry = lookup_open(nd, file, op, got_write);
f7bb959d
AV
3490 if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED))
3491 fsnotify_create(dir->d_inode, dentry);
9cf843e3
AV
3492 if (open_flag & O_CREAT)
3493 inode_unlock(dir->d_inode);
3494 else
3495 inode_unlock_shared(dir->d_inode);
a1e28038 3496
c981a482 3497 if (got_write)
59e96e65 3498 mnt_drop_write(nd->path.mnt);
d18e9008 3499
59e96e65
AV
3500 if (IS_ERR(dentry))
3501 return ERR_CAST(dentry);
3502
973d4b73 3503 if (file->f_mode & (FMODE_OPENED | FMODE_CREATED)) {
e73cabff
AV
3504 dput(nd->path.dentry);
3505 nd->path.dentry = dentry;
c981a482 3506 return NULL;
fb1cc555
AV
3507 }
3508
20e34357 3509finish_lookup:
56676ec3
AV
3510 if (nd->depth)
3511 put_link(nd);
a4f5b521 3512 res = step_into(nd, WALK_TRAILING, dentry);
ff326a32 3513 if (unlikely(res))
b0417d2c 3514 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
ff326a32 3515 return res;
c981a482
AV
3516}
3517
3518/*
3519 * Handle the last step of open()
3520 */
c5971b8c 3521static int do_open(struct nameidata *nd,
c981a482
AV
3522 struct file *file, const struct open_flags *op)
3523{
abf08576 3524 struct mnt_idmap *idmap;
549c7297 3525 struct user_namespace *mnt_userns;
c981a482
AV
3526 int open_flag = op->open_flag;
3527 bool do_truncate;
3528 int acc_mode;
c981a482
AV
3529 int error;
3530
ff326a32
AV
3531 if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) {
3532 error = complete_walk(nd);
3533 if (error)
3534 return error;
3535 }
973d4b73
AV
3536 if (!(file->f_mode & FMODE_CREATED))
3537 audit_inode(nd->name, nd->path.dentry, 0);
abf08576
CB
3538 idmap = mnt_idmap(nd->path.mnt);
3539 mnt_userns = mnt_idmap_owner(idmap);
30aba665 3540 if (open_flag & O_CREAT) {
b94e0b32
AV
3541 if ((open_flag & O_EXCL) && !(file->f_mode & FMODE_CREATED))
3542 return -EEXIST;
30aba665 3543 if (d_is_dir(nd->path.dentry))
c5971b8c 3544 return -EISDIR;
549c7297 3545 error = may_create_in_sticky(mnt_userns, nd,
30aba665
SM
3546 d_backing_inode(nd->path.dentry));
3547 if (unlikely(error))
c5971b8c 3548 return error;
30aba665 3549 }
44b1d530 3550 if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
c5971b8c 3551 return -ENOTDIR;
6c0d46c4 3552
8795e7d4
AV
3553 do_truncate = false;
3554 acc_mode = op->acc_mode;
5a2d3edd
AV
3555 if (file->f_mode & FMODE_CREATED) {
3556 /* Don't check for write permission, don't truncate */
3557 open_flag &= ~O_TRUNC;
5a2d3edd 3558 acc_mode = 0;
8795e7d4 3559 } else if (d_is_reg(nd->path.dentry) && open_flag & O_TRUNC) {
0f9d1a10
AV
3560 error = mnt_want_write(nd->path.mnt);
3561 if (error)
c5971b8c 3562 return error;
8795e7d4 3563 do_truncate = true;
0f9d1a10 3564 }
4609e1f1 3565 error = may_open(idmap, &nd->path, acc_mode, open_flag);
8795e7d4 3566 if (!error && !(file->f_mode & FMODE_OPENED))
3ad5615a 3567 error = vfs_open(&nd->path, file);
8795e7d4
AV
3568 if (!error)
3569 error = ima_file_check(file, op->acc_mode);
3570 if (!error && do_truncate)
abf08576 3571 error = handle_truncate(idmap, file);
c80567c8
AV
3572 if (unlikely(error > 0)) {
3573 WARN_ON(1);
3574 error = -EINVAL;
3575 }
8795e7d4 3576 if (do_truncate)
0f9d1a10 3577 mnt_drop_write(nd->path.mnt);
c5971b8c 3578 return error;
fb1cc555
AV
3579}
3580
6521f891
CB
3581/**
3582 * vfs_tmpfile - create tmpfile
abf08576 3583 * @idmap: idmap of the mount the inode was found from
6521f891
CB
3584 * @dentry: pointer to dentry of the base directory
3585 * @mode: mode of the new tmpfile
2111c3c0 3586 * @open_flag: flags
6521f891
CB
3587 *
3588 * Create a temporary file.
3589 *
abf08576
CB
3590 * If the inode has been found through an idmapped mount the idmap of
3591 * the vfsmount must be passed through @idmap. This function will then take
3592 * care to map the inode according to @idmap before checking permissions.
6521f891 3593 * On non-idmapped mounts or if permission checking is to be performed on the
abf08576 3594 * raw inode simply passs @nop_mnt_idmap.
6521f891 3595 */
abf08576 3596static int vfs_tmpfile(struct mnt_idmap *idmap,
9751b338
MS
3597 const struct path *parentpath,
3598 struct file *file, umode_t mode)
af7bd4dc 3599{
9751b338
MS
3600 struct dentry *child;
3601 struct inode *dir = d_inode(parentpath->dentry);
af7bd4dc
AG
3602 struct inode *inode;
3603 int error;
406c706c 3604 int open_flag = file->f_flags;
af7bd4dc
AG
3605
3606 /* we want directory to be writable */
4609e1f1 3607 error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
af7bd4dc 3608 if (error)
9751b338 3609 return error;
af7bd4dc 3610 if (!dir->i_op->tmpfile)
9751b338
MS
3611 return -EOPNOTSUPP;
3612 child = d_alloc(parentpath->dentry, &slash_name);
af7bd4dc 3613 if (unlikely(!child))
9751b338
MS
3614 return -ENOMEM;
3615 file->f_path.mnt = parentpath->mnt;
3616 file->f_path.dentry = child;
9452e93e 3617 mode = vfs_prepare_mode(idmap, dir, mode, mode, mode);
011e2b71 3618 error = dir->i_op->tmpfile(idmap, dir, file, mode);
9751b338 3619 dput(child);
af7bd4dc 3620 if (error)
9751b338
MS
3621 return error;
3622 /* Don't check for other permissions, the inode was just created */
4609e1f1 3623 error = may_open(idmap, &file->f_path, 0, file->f_flags);
af7bd4dc 3624 if (error)
9751b338
MS
3625 return error;
3626 inode = file_inode(file);
406c706c 3627 if (!(open_flag & O_EXCL)) {
af7bd4dc
AG
3628 spin_lock(&inode->i_lock);
3629 inode->i_state |= I_LINKABLE;
3630 spin_unlock(&inode->i_lock);
3631 }
39f60c1c 3632 ima_post_create_tmpfile(idmap, inode);
9751b338 3633 return 0;
af7bd4dc 3634}
af7bd4dc 3635
22873dea
MS
3636/**
3637 * vfs_tmpfile_open - open a tmpfile for kernel internal use
abf08576 3638 * @idmap: idmap of the mount the inode was found from
22873dea
MS
3639 * @parentpath: path of the base directory
3640 * @mode: mode of the new tmpfile
3641 * @open_flag: flags
3642 * @cred: credentials for open
3643 *
3644 * Create and open a temporary file. The file is not accounted in nr_files,
3645 * hence this is only for kernel internal use, and must not be installed into
3646 * file tables or such.
3647 */
abf08576 3648struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
22873dea
MS
3649 const struct path *parentpath,
3650 umode_t mode, int open_flag, const struct cred *cred)
3651{
3652 struct file *file;
3653 int error;
22873dea 3654
9751b338
MS
3655 file = alloc_empty_file_noaccount(open_flag, cred);
3656 if (!IS_ERR(file)) {
abf08576 3657 error = vfs_tmpfile(idmap, parentpath, file, mode);
9751b338
MS
3658 if (error) {
3659 fput(file);
3660 file = ERR_PTR(error);
3661 }
3662 }
22873dea 3663 return file;
af7bd4dc 3664}
22873dea 3665EXPORT_SYMBOL(vfs_tmpfile_open);
af7bd4dc 3666
c8a53ee5 3667static int do_tmpfile(struct nameidata *nd, unsigned flags,
60545d0d 3668 const struct open_flags *op,
3ec2eef1 3669 struct file *file)
60545d0d 3670{
625b6d10 3671 struct path path;
c8a53ee5 3672 int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path);
9751b338 3673
60545d0d
AV
3674 if (unlikely(error))
3675 return error;
625b6d10 3676 error = mnt_want_write(path.mnt);
60545d0d
AV
3677 if (unlikely(error))
3678 goto out;
abf08576 3679 error = vfs_tmpfile(mnt_idmap(path.mnt), &path, file, op->mode);
9751b338 3680 if (error)
60545d0d 3681 goto out2;
9751b338 3682 audit_inode(nd->name, file->f_path.dentry, 0);
60545d0d 3683out2:
625b6d10 3684 mnt_drop_write(path.mnt);
60545d0d 3685out:
625b6d10 3686 path_put(&path);
60545d0d
AV
3687 return error;
3688}
3689
6ac08709
AV
3690static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file)
3691{
3692 struct path path;
3693 int error = path_lookupat(nd, flags, &path);
3694 if (!error) {
3695 audit_inode(nd->name, path.dentry, 0);
ae2bb293 3696 error = vfs_open(&path, file);
6ac08709
AV
3697 path_put(&path);
3698 }
3699 return error;
3700}
3701
c8a53ee5
AV
3702static struct file *path_openat(struct nameidata *nd,
3703 const struct open_flags *op, unsigned flags)
1da177e4 3704{
30d90494 3705 struct file *file;
13aab428 3706 int error;
31e6b01f 3707
ea73ea72 3708 file = alloc_empty_file(op->open_flag, current_cred());
1afc99be
AV
3709 if (IS_ERR(file))
3710 return file;
31e6b01f 3711
bb458c64 3712 if (unlikely(file->f_flags & __O_TMPFILE)) {
3ec2eef1 3713 error = do_tmpfile(nd, flags, op, file);
5f336e72 3714 } else if (unlikely(file->f_flags & O_PATH)) {
6ac08709 3715 error = do_o_path(nd, flags, file);
5f336e72
AV
3716 } else {
3717 const char *s = path_init(nd, flags);
3718 while (!(error = link_path_walk(s, nd)) &&
c5971b8c 3719 (s = open_last_lookups(nd, file, op)) != NULL)
1ccac622 3720 ;
c5971b8c
AV
3721 if (!error)
3722 error = do_open(nd, file, op);
5f336e72 3723 terminate_walk(nd);
806b681c 3724 }
7c1c01ec 3725 if (likely(!error)) {
aad888f8 3726 if (likely(file->f_mode & FMODE_OPENED))
7c1c01ec
AV
3727 return file;
3728 WARN_ON(1);
3729 error = -EINVAL;
16b1c1cd 3730 }
7c1c01ec
AV
3731 fput(file);
3732 if (error == -EOPENSTALE) {
3733 if (flags & LOOKUP_RCU)
3734 error = -ECHILD;
3735 else
3736 error = -ESTALE;
2675a4eb 3737 }
7c1c01ec 3738 return ERR_PTR(error);
1da177e4
LT
3739}
3740
669abf4e 3741struct file *do_filp_open(int dfd, struct filename *pathname,
f9652e10 3742 const struct open_flags *op)
13aab428 3743{
9883d185 3744 struct nameidata nd;
f9652e10 3745 int flags = op->lookup_flags;
13aab428
AV
3746 struct file *filp;
3747
06422964 3748 set_nameidata(&nd, dfd, pathname, NULL);
c8a53ee5 3749 filp = path_openat(&nd, op, flags | LOOKUP_RCU);
13aab428 3750 if (unlikely(filp == ERR_PTR(-ECHILD)))
c8a53ee5 3751 filp = path_openat(&nd, op, flags);
13aab428 3752 if (unlikely(filp == ERR_PTR(-ESTALE)))
c8a53ee5 3753 filp = path_openat(&nd, op, flags | LOOKUP_REVAL);
9883d185 3754 restore_nameidata();
13aab428
AV
3755 return filp;
3756}
3757
ffb37ca3 3758struct file *do_file_open_root(const struct path *root,
f9652e10 3759 const char *name, const struct open_flags *op)
73d049a4 3760{
9883d185 3761 struct nameidata nd;
73d049a4 3762 struct file *file;
51689104 3763 struct filename *filename;
bcba1e7d 3764 int flags = op->lookup_flags;
73d049a4 3765
ffb37ca3 3766 if (d_is_symlink(root->dentry) && op->intent & LOOKUP_OPEN)
73d049a4
AV
3767 return ERR_PTR(-ELOOP);
3768
51689104 3769 filename = getname_kernel(name);
a1c83681 3770 if (IS_ERR(filename))
51689104
PM
3771 return ERR_CAST(filename);
3772
06422964 3773 set_nameidata(&nd, -1, filename, root);
c8a53ee5 3774 file = path_openat(&nd, op, flags | LOOKUP_RCU);
73d049a4 3775 if (unlikely(file == ERR_PTR(-ECHILD)))
c8a53ee5 3776 file = path_openat(&nd, op, flags);
73d049a4 3777 if (unlikely(file == ERR_PTR(-ESTALE)))
c8a53ee5 3778 file = path_openat(&nd, op, flags | LOOKUP_REVAL);
9883d185 3779 restore_nameidata();
51689104 3780 putname(filename);
73d049a4
AV
3781 return file;
3782}
3783
b4a4f213
SB
3784static struct dentry *filename_create(int dfd, struct filename *name,
3785 struct path *path, unsigned int lookup_flags)
1da177e4 3786{
c663e5d8 3787 struct dentry *dentry = ERR_PTR(-EEXIST);
391172c4 3788 struct qstr last;
b3d4650d
N
3789 bool want_dir = lookup_flags & LOOKUP_DIRECTORY;
3790 unsigned int reval_flag = lookup_flags & LOOKUP_REVAL;
3791 unsigned int create_flags = LOOKUP_CREATE | LOOKUP_EXCL;
391172c4 3792 int type;
c30dabfe 3793 int err2;
1ac12b4b 3794 int error;
1ac12b4b 3795
b3d4650d 3796 error = filename_parentat(dfd, name, reval_flag, path, &last, &type);
0ee50b47
DK
3797 if (error)
3798 return ERR_PTR(error);
1da177e4 3799
c663e5d8
CH
3800 /*
3801 * Yucky last component or no last component at all?
3802 * (foo/., foo/.., /////)
3803 */
5c31b6ce 3804 if (unlikely(type != LAST_NORM))
ed75e95d 3805 goto out;
c663e5d8 3806
c30dabfe 3807 /* don't fail immediately if it's r/o, at least try to report other errors */
391172c4 3808 err2 = mnt_want_write(path->mnt);
c663e5d8 3809 /*
b3d4650d
N
3810 * Do the final lookup. Suppress 'create' if there is a trailing
3811 * '/', and a directory wasn't requested.
c663e5d8 3812 */
b3d4650d
N
3813 if (last.name[last.len] && !want_dir)
3814 create_flags = 0;
5955102c 3815 inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
b3d4650d 3816 dentry = __lookup_hash(&last, path->dentry, reval_flag | create_flags);
1da177e4 3817 if (IS_ERR(dentry))
a8104a9f 3818 goto unlock;
c663e5d8 3819
a8104a9f 3820 error = -EEXIST;
b18825a7 3821 if (d_is_positive(dentry))
a8104a9f 3822 goto fail;
b18825a7 3823
c663e5d8
CH
3824 /*
3825 * Special case - lookup gave negative, but... we had foo/bar/
3826 * From the vfs_mknod() POV we just have a negative dentry -
3827 * all is fine. Let's be bastards - you had / on the end, you've
3828 * been asking for (non-existent) directory. -ENOENT for you.
3829 */
b3d4650d 3830 if (unlikely(!create_flags)) {
a8104a9f 3831 error = -ENOENT;
ed75e95d 3832 goto fail;
e9baf6e5 3833 }
c30dabfe
JK
3834 if (unlikely(err2)) {
3835 error = err2;
a8104a9f 3836 goto fail;
c30dabfe 3837 }
1da177e4 3838 return dentry;
1da177e4 3839fail:
a8104a9f
AV
3840 dput(dentry);
3841 dentry = ERR_PTR(error);
3842unlock:
5955102c 3843 inode_unlock(path->dentry->d_inode);
c30dabfe 3844 if (!err2)
391172c4 3845 mnt_drop_write(path->mnt);
ed75e95d 3846out:
391172c4 3847 path_put(path);
1da177e4
LT
3848 return dentry;
3849}
fa14a0b8 3850
b4a4f213 3851struct dentry *kern_path_create(int dfd, const char *pathname,
584d3226
DK
3852 struct path *path, unsigned int lookup_flags)
3853{
b4a4f213
SB
3854 struct filename *filename = getname_kernel(pathname);
3855 struct dentry *res = filename_create(dfd, filename, path, lookup_flags);
584d3226 3856
b4a4f213 3857 putname(filename);
584d3226
DK
3858 return res;
3859}
dae6ad8f
AV
3860EXPORT_SYMBOL(kern_path_create);
3861
921a1650
AV
3862void done_path_create(struct path *path, struct dentry *dentry)
3863{
3864 dput(dentry);
5955102c 3865 inode_unlock(path->dentry->d_inode);
a8104a9f 3866 mnt_drop_write(path->mnt);
921a1650
AV
3867 path_put(path);
3868}
3869EXPORT_SYMBOL(done_path_create);
3870
520ae687 3871inline struct dentry *user_path_create(int dfd, const char __user *pathname,
1ac12b4b 3872 struct path *path, unsigned int lookup_flags)
dae6ad8f 3873{
b4a4f213
SB
3874 struct filename *filename = getname(pathname);
3875 struct dentry *res = filename_create(dfd, filename, path, lookup_flags);
3876
3877 putname(filename);
3878 return res;
dae6ad8f
AV
3879}
3880EXPORT_SYMBOL(user_path_create);
3881
6521f891
CB
3882/**
3883 * vfs_mknod - create device node or file
abf08576 3884 * @idmap: idmap of the mount the inode was found from
6521f891
CB
3885 * @dir: inode of @dentry
3886 * @dentry: pointer to dentry of the base directory
3887 * @mode: mode of the new device node or file
3888 * @dev: device number of device to create
3889 *
3890 * Create a device node or file.
3891 *
abf08576
CB
3892 * If the inode has been found through an idmapped mount the idmap of
3893 * the vfsmount must be passed through @idmap. This function will then take
3894 * care to map the inode according to @idmap before checking permissions.
6521f891 3895 * On non-idmapped mounts or if permission checking is to be performed on the
abf08576 3896 * raw inode simply passs @nop_mnt_idmap.
6521f891 3897 */
abf08576 3898int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
6521f891 3899 struct dentry *dentry, umode_t mode, dev_t dev)
1da177e4 3900{
a3c751a5 3901 bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
4609e1f1 3902 int error = may_create(idmap, dir, dentry);
1da177e4
LT
3903
3904 if (error)
3905 return error;
3906
a3c751a5
MS
3907 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !is_whiteout &&
3908 !capable(CAP_MKNOD))
1da177e4
LT
3909 return -EPERM;
3910
acfa4380 3911 if (!dir->i_op->mknod)
1da177e4
LT
3912 return -EPERM;
3913
9452e93e 3914 mode = vfs_prepare_mode(idmap, dir, mode, mode, mode);
08ce5f16
SH
3915 error = devcgroup_inode_mknod(mode, dev);
3916 if (error)
3917 return error;
3918
1da177e4
LT
3919 error = security_inode_mknod(dir, dentry, mode, dev);
3920 if (error)
3921 return error;
3922
5ebb29be 3923 error = dir->i_op->mknod(idmap, dir, dentry, mode, dev);
a74574aa 3924 if (!error)
f38aa942 3925 fsnotify_create(dir, dentry);
1da177e4
LT
3926 return error;
3927}
4d359507 3928EXPORT_SYMBOL(vfs_mknod);
1da177e4 3929
f69aac00 3930static int may_mknod(umode_t mode)
463c3197
DH
3931{
3932 switch (mode & S_IFMT) {
3933 case S_IFREG:
3934 case S_IFCHR:
3935 case S_IFBLK:
3936 case S_IFIFO:
3937 case S_IFSOCK:
3938 case 0: /* zero mode translates to S_IFREG */
3939 return 0;
3940 case S_IFDIR:
3941 return -EPERM;
3942 default:
3943 return -EINVAL;
3944 }
3945}
3946
45f30dab 3947static int do_mknodat(int dfd, struct filename *name, umode_t mode,
87c4e192 3948 unsigned int dev)
1da177e4 3949{
abf08576 3950 struct mnt_idmap *idmap;
2ad94ae6 3951 struct dentry *dentry;
dae6ad8f
AV
3952 struct path path;
3953 int error;
972567f1 3954 unsigned int lookup_flags = 0;
1da177e4 3955
8e4bfca1
AV
3956 error = may_mknod(mode);
3957 if (error)
7797251b 3958 goto out1;
972567f1 3959retry:
b4a4f213 3960 dentry = filename_create(dfd, name, &path, lookup_flags);
7797251b 3961 error = PTR_ERR(dentry);
dae6ad8f 3962 if (IS_ERR(dentry))
7797251b 3963 goto out1;
2ad94ae6 3964
1639a49c
YX
3965 error = security_path_mknod(&path, dentry,
3966 mode_strip_umask(path.dentry->d_inode, mode), dev);
be6d3e56 3967 if (error)
7797251b 3968 goto out2;
6521f891 3969
abf08576 3970 idmap = mnt_idmap(path.mnt);
463c3197 3971 switch (mode & S_IFMT) {
1da177e4 3972 case 0: case S_IFREG:
abf08576 3973 error = vfs_create(idmap, path.dentry->d_inode,
6521f891 3974 dentry, mode, true);
05d1a717 3975 if (!error)
39f60c1c 3976 ima_post_path_mknod(idmap, dentry);
1da177e4
LT
3977 break;
3978 case S_IFCHR: case S_IFBLK:
abf08576 3979 error = vfs_mknod(idmap, path.dentry->d_inode,
6521f891 3980 dentry, mode, new_decode_dev(dev));
1da177e4
LT
3981 break;
3982 case S_IFIFO: case S_IFSOCK:
abf08576 3983 error = vfs_mknod(idmap, path.dentry->d_inode,
6521f891 3984 dentry, mode, 0);
1da177e4 3985 break;
1da177e4 3986 }
7797251b 3987out2:
921a1650 3988 done_path_create(&path, dentry);
972567f1
JL
3989 if (retry_estale(error, lookup_flags)) {
3990 lookup_flags |= LOOKUP_REVAL;
3991 goto retry;
3992 }
7797251b
DK
3993out1:
3994 putname(name);
1da177e4
LT
3995 return error;
3996}
3997
87c4e192
DB
3998SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
3999 unsigned int, dev)
4000{
7797251b 4001 return do_mknodat(dfd, getname(filename), mode, dev);
87c4e192
DB
4002}
4003
8208a22b 4004SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
5590ff0d 4005{
7797251b 4006 return do_mknodat(AT_FDCWD, getname(filename), mode, dev);
5590ff0d
UD
4007}
4008
6521f891
CB
4009/**
4010 * vfs_mkdir - create directory
abf08576 4011 * @idmap: idmap of the mount the inode was found from
6521f891
CB
4012 * @dir: inode of @dentry
4013 * @dentry: pointer to dentry of the base directory
4014 * @mode: mode of the new directory
4015 *
4016 * Create a directory.
4017 *
abf08576
CB
4018 * If the inode has been found through an idmapped mount the idmap of
4019 * the vfsmount must be passed through @idmap. This function will then take
4020 * care to map the inode according to @idmap before checking permissions.
6521f891 4021 * On non-idmapped mounts or if permission checking is to be performed on the
abf08576 4022 * raw inode simply passs @nop_mnt_idmap.
6521f891 4023 */
abf08576 4024int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
6521f891 4025 struct dentry *dentry, umode_t mode)
1da177e4 4026{
abf08576 4027 int error;
8de52778 4028 unsigned max_links = dir->i_sb->s_max_links;
1da177e4 4029
4609e1f1 4030 error = may_create(idmap, dir, dentry);
1da177e4
LT
4031 if (error)
4032 return error;
4033
acfa4380 4034 if (!dir->i_op->mkdir)
1da177e4
LT
4035 return -EPERM;
4036
9452e93e 4037 mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0);
1da177e4
LT
4038 error = security_inode_mkdir(dir, dentry, mode);
4039 if (error)
4040 return error;
4041
8de52778
AV
4042 if (max_links && dir->i_nlink >= max_links)
4043 return -EMLINK;
4044
c54bd91e 4045 error = dir->i_op->mkdir(idmap, dir, dentry, mode);
a74574aa 4046 if (!error)
f38aa942 4047 fsnotify_mkdir(dir, dentry);
1da177e4
LT
4048 return error;
4049}
4d359507 4050EXPORT_SYMBOL(vfs_mkdir);
1da177e4 4051
45f30dab 4052int do_mkdirat(int dfd, struct filename *name, umode_t mode)
1da177e4 4053{
6902d925 4054 struct dentry *dentry;
dae6ad8f
AV
4055 struct path path;
4056 int error;
b76d8b82 4057 unsigned int lookup_flags = LOOKUP_DIRECTORY;
1da177e4 4058
b76d8b82 4059retry:
b4a4f213 4060 dentry = filename_create(dfd, name, &path, lookup_flags);
584d3226 4061 error = PTR_ERR(dentry);
6902d925 4062 if (IS_ERR(dentry))
584d3226 4063 goto out_putname;
1da177e4 4064
1639a49c
YX
4065 error = security_path_mkdir(&path, dentry,
4066 mode_strip_umask(path.dentry->d_inode, mode));
6521f891 4067 if (!error) {
abf08576
CB
4068 error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
4069 dentry, mode);
6521f891 4070 }
921a1650 4071 done_path_create(&path, dentry);
b76d8b82
JL
4072 if (retry_estale(error, lookup_flags)) {
4073 lookup_flags |= LOOKUP_REVAL;
4074 goto retry;
4075 }
584d3226
DK
4076out_putname:
4077 putname(name);
1da177e4
LT
4078 return error;
4079}
4080
0101db7a
DB
4081SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
4082{
584d3226 4083 return do_mkdirat(dfd, getname(pathname), mode);
0101db7a
DB
4084}
4085
a218d0fd 4086SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
5590ff0d 4087{
584d3226 4088 return do_mkdirat(AT_FDCWD, getname(pathname), mode);
5590ff0d
UD
4089}
4090
6521f891
CB
4091/**
4092 * vfs_rmdir - remove directory
abf08576 4093 * @idmap: idmap of the mount the inode was found from
6521f891
CB
4094 * @dir: inode of @dentry
4095 * @dentry: pointer to dentry of the base directory
4096 *
4097 * Remove a directory.
4098 *
abf08576
CB
4099 * If the inode has been found through an idmapped mount the idmap of
4100 * the vfsmount must be passed through @idmap. This function will then take
4101 * care to map the inode according to @idmap before checking permissions.
6521f891 4102 * On non-idmapped mounts or if permission checking is to be performed on the
abf08576 4103 * raw inode simply passs @nop_mnt_idmap.
6521f891 4104 */
abf08576 4105int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir,
6521f891 4106 struct dentry *dentry)
1da177e4 4107{
4609e1f1 4108 int error = may_delete(idmap, dir, dentry, 1);
1da177e4
LT
4109
4110 if (error)
4111 return error;
4112
acfa4380 4113 if (!dir->i_op->rmdir)
1da177e4
LT
4114 return -EPERM;
4115
1d2ef590 4116 dget(dentry);
5955102c 4117 inode_lock(dentry->d_inode);
912dbc15
SW
4118
4119 error = -EBUSY;
1bd9c4e4
DH
4120 if (is_local_mountpoint(dentry) ||
4121 (dentry->d_inode->i_flags & S_KERNEL_FILE))
912dbc15
SW
4122 goto out;
4123
4124 error = security_inode_rmdir(dir, dentry);
4125 if (error)
4126 goto out;
4127
4128 error = dir->i_op->rmdir(dir, dentry);
4129 if (error)
4130 goto out;
4131
8767712f 4132 shrink_dcache_parent(dentry);
912dbc15
SW
4133 dentry->d_inode->i_flags |= S_DEAD;
4134 dont_mount(dentry);
8ed936b5 4135 detach_mounts(dentry);
912dbc15
SW
4136
4137out:
5955102c 4138 inode_unlock(dentry->d_inode);
1d2ef590 4139 dput(dentry);
912dbc15 4140 if (!error)
a37d9a17 4141 d_delete_notify(dir, dentry);
1da177e4
LT
4142 return error;
4143}
4d359507 4144EXPORT_SYMBOL(vfs_rmdir);
1da177e4 4145
45f30dab 4146int do_rmdir(int dfd, struct filename *name)
1da177e4 4147{
0ee50b47 4148 int error;
1da177e4 4149 struct dentry *dentry;
f5beed75
AV
4150 struct path path;
4151 struct qstr last;
4152 int type;
c6ee9206
JL
4153 unsigned int lookup_flags = 0;
4154retry:
c5f563f9 4155 error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
0ee50b47
DK
4156 if (error)
4157 goto exit1;
1da177e4 4158
f5beed75 4159 switch (type) {
0612d9fb
OH
4160 case LAST_DOTDOT:
4161 error = -ENOTEMPTY;
0ee50b47 4162 goto exit2;
0612d9fb
OH
4163 case LAST_DOT:
4164 error = -EINVAL;
0ee50b47 4165 goto exit2;
0612d9fb
OH
4166 case LAST_ROOT:
4167 error = -EBUSY;
0ee50b47 4168 goto exit2;
1da177e4 4169 }
0612d9fb 4170
f5beed75 4171 error = mnt_want_write(path.mnt);
c30dabfe 4172 if (error)
0ee50b47 4173 goto exit2;
0612d9fb 4174
5955102c 4175 inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
f5beed75 4176 dentry = __lookup_hash(&last, path.dentry, lookup_flags);
1da177e4 4177 error = PTR_ERR(dentry);
6902d925 4178 if (IS_ERR(dentry))
0ee50b47 4179 goto exit3;
e6bc45d6
TT
4180 if (!dentry->d_inode) {
4181 error = -ENOENT;
0ee50b47 4182 goto exit4;
e6bc45d6 4183 }
f5beed75 4184 error = security_path_rmdir(&path, dentry);
be6d3e56 4185 if (error)
0ee50b47 4186 goto exit4;
abf08576 4187 error = vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry);
0ee50b47 4188exit4:
6902d925 4189 dput(dentry);
0ee50b47 4190exit3:
5955102c 4191 inode_unlock(path.dentry->d_inode);
f5beed75 4192 mnt_drop_write(path.mnt);
0ee50b47 4193exit2:
f5beed75 4194 path_put(&path);
c6ee9206
JL
4195 if (retry_estale(error, lookup_flags)) {
4196 lookup_flags |= LOOKUP_REVAL;
4197 goto retry;
4198 }
0ee50b47 4199exit1:
24fb33d4 4200 putname(name);
1da177e4
LT
4201 return error;
4202}
4203
3cdad428 4204SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
5590ff0d 4205{
e24ab0ef 4206 return do_rmdir(AT_FDCWD, getname(pathname));
5590ff0d
UD
4207}
4208
b21996e3
BF
4209/**
4210 * vfs_unlink - unlink a filesystem object
abf08576 4211 * @idmap: idmap of the mount the inode was found from
b21996e3
BF
4212 * @dir: parent directory
4213 * @dentry: victim
4214 * @delegated_inode: returns victim inode, if the inode is delegated.
4215 *
4216 * The caller must hold dir->i_mutex.
4217 *
4218 * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
4219 * return a reference to the inode in delegated_inode. The caller
4220 * should then break the delegation on that inode and retry. Because
4221 * breaking a delegation may take a long time, the caller should drop
4222 * dir->i_mutex before doing so.
4223 *
4224 * Alternatively, a caller may pass NULL for delegated_inode. This may
4225 * be appropriate for callers that expect the underlying filesystem not
4226 * to be NFS exported.
6521f891 4227 *
abf08576
CB
4228 * If the inode has been found through an idmapped mount the idmap of
4229 * the vfsmount must be passed through @idmap. This function will then take
4230 * care to map the inode according to @idmap before checking permissions.
6521f891 4231 * On non-idmapped mounts or if permission checking is to be performed on the
abf08576 4232 * raw inode simply passs @nop_mnt_idmap.
b21996e3 4233 */
abf08576 4234int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir,
6521f891 4235 struct dentry *dentry, struct inode **delegated_inode)
1da177e4 4236{
9accbb97 4237 struct inode *target = dentry->d_inode;
4609e1f1 4238 int error = may_delete(idmap, dir, dentry, 0);
1da177e4
LT
4239
4240 if (error)
4241 return error;
4242
acfa4380 4243 if (!dir->i_op->unlink)
1da177e4
LT
4244 return -EPERM;
4245
5955102c 4246 inode_lock(target);
51cc3a66
HD
4247 if (IS_SWAPFILE(target))
4248 error = -EPERM;
4249 else if (is_local_mountpoint(dentry))
1da177e4
LT
4250 error = -EBUSY;
4251 else {
4252 error = security_inode_unlink(dir, dentry);
bec1052e 4253 if (!error) {
5a14696c
BF
4254 error = try_break_deleg(target, delegated_inode);
4255 if (error)
b21996e3 4256 goto out;
1da177e4 4257 error = dir->i_op->unlink(dir, dentry);
8ed936b5 4258 if (!error) {
d83c49f3 4259 dont_mount(dentry);
8ed936b5
EB
4260 detach_mounts(dentry);
4261 }
bec1052e 4262 }
1da177e4 4263 }
b21996e3 4264out:
5955102c 4265 inode_unlock(target);
1da177e4
LT
4266
4267 /* We don't d_delete() NFS sillyrenamed files--they still exist. */
a37d9a17
AG
4268 if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) {
4269 fsnotify_unlink(dir, dentry);
4270 } else if (!error) {
9accbb97 4271 fsnotify_link_count(target);
a37d9a17 4272 d_delete_notify(dir, dentry);
1da177e4 4273 }
0eeca283 4274
1da177e4
LT
4275 return error;
4276}
4d359507 4277EXPORT_SYMBOL(vfs_unlink);
1da177e4
LT
4278
4279/*
4280 * Make sure that the actual truncation of the file will occur outside its
1b1dcc1b 4281 * directory's i_mutex. Truncate can take a long time if there is a lot of
1da177e4
LT
4282 * writeout happening, and we don't want to prevent access to the directory
4283 * while waiting on the I/O.
4284 */
45f30dab 4285int do_unlinkat(int dfd, struct filename *name)
1da177e4 4286{
2ad94ae6 4287 int error;
1da177e4 4288 struct dentry *dentry;
f5beed75
AV
4289 struct path path;
4290 struct qstr last;
4291 int type;
1da177e4 4292 struct inode *inode = NULL;
b21996e3 4293 struct inode *delegated_inode = NULL;
5d18f813
JL
4294 unsigned int lookup_flags = 0;
4295retry:
c5f563f9 4296 error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
0ee50b47
DK
4297 if (error)
4298 goto exit1;
2ad94ae6 4299
1da177e4 4300 error = -EISDIR;
f5beed75 4301 if (type != LAST_NORM)
0ee50b47 4302 goto exit2;
0612d9fb 4303
f5beed75 4304 error = mnt_want_write(path.mnt);
c30dabfe 4305 if (error)
0ee50b47 4306 goto exit2;
b21996e3 4307retry_deleg:
5955102c 4308 inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
f5beed75 4309 dentry = __lookup_hash(&last, path.dentry, lookup_flags);
1da177e4
LT
4310 error = PTR_ERR(dentry);
4311 if (!IS_ERR(dentry)) {
6521f891 4312
1da177e4 4313 /* Why not before? Because we want correct error value */
f5beed75 4314 if (last.name[last.len])
50338b88 4315 goto slashes;
1da177e4 4316 inode = dentry->d_inode;
b18825a7 4317 if (d_is_negative(dentry))
e6bc45d6
TT
4318 goto slashes;
4319 ihold(inode);
f5beed75 4320 error = security_path_unlink(&path, dentry);
be6d3e56 4321 if (error)
0ee50b47 4322 goto exit3;
abf08576
CB
4323 error = vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode,
4324 dentry, &delegated_inode);
0ee50b47 4325exit3:
1da177e4
LT
4326 dput(dentry);
4327 }
5955102c 4328 inode_unlock(path.dentry->d_inode);
1da177e4
LT
4329 if (inode)
4330 iput(inode); /* truncate the inode here */
b21996e3
BF
4331 inode = NULL;
4332 if (delegated_inode) {
5a14696c 4333 error = break_deleg_wait(&delegated_inode);
b21996e3
BF
4334 if (!error)
4335 goto retry_deleg;
4336 }
f5beed75 4337 mnt_drop_write(path.mnt);
0ee50b47 4338exit2:
f5beed75 4339 path_put(&path);
5d18f813
JL
4340 if (retry_estale(error, lookup_flags)) {
4341 lookup_flags |= LOOKUP_REVAL;
4342 inode = NULL;
4343 goto retry;
4344 }
0ee50b47 4345exit1:
da2f1362 4346 putname(name);
1da177e4
LT
4347 return error;
4348
4349slashes:
b18825a7
DH
4350 if (d_is_negative(dentry))
4351 error = -ENOENT;
44b1d530 4352 else if (d_is_dir(dentry))
b18825a7
DH
4353 error = -EISDIR;
4354 else
4355 error = -ENOTDIR;
0ee50b47 4356 goto exit3;
1da177e4
LT
4357}
4358
2e4d0924 4359SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
5590ff0d
UD
4360{
4361 if ((flag & ~AT_REMOVEDIR) != 0)
4362 return -EINVAL;
4363
4364 if (flag & AT_REMOVEDIR)
e24ab0ef 4365 return do_rmdir(dfd, getname(pathname));
da2f1362 4366 return do_unlinkat(dfd, getname(pathname));
5590ff0d
UD
4367}
4368
3480b257 4369SYSCALL_DEFINE1(unlink, const char __user *, pathname)
5590ff0d 4370{
da2f1362 4371 return do_unlinkat(AT_FDCWD, getname(pathname));
5590ff0d
UD
4372}
4373
6521f891
CB
4374/**
4375 * vfs_symlink - create symlink
abf08576 4376 * @idmap: idmap of the mount the inode was found from
6521f891
CB
4377 * @dir: inode of @dentry
4378 * @dentry: pointer to dentry of the base directory
4379 * @oldname: name of the file to link to
4380 *
4381 * Create a symlink.
4382 *
abf08576
CB
4383 * If the inode has been found through an idmapped mount the idmap of
4384 * the vfsmount must be passed through @idmap. This function will then take
4385 * care to map the inode according to @idmap before checking permissions.
6521f891 4386 * On non-idmapped mounts or if permission checking is to be performed on the
abf08576 4387 * raw inode simply passs @nop_mnt_idmap.
6521f891 4388 */
abf08576 4389int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
6521f891 4390 struct dentry *dentry, const char *oldname)
1da177e4 4391{
7a77db95 4392 int error;
1da177e4 4393
4609e1f1 4394 error = may_create(idmap, dir, dentry);
1da177e4
LT
4395 if (error)
4396 return error;
4397
acfa4380 4398 if (!dir->i_op->symlink)
1da177e4
LT
4399 return -EPERM;
4400
4401 error = security_inode_symlink(dir, dentry, oldname);
4402 if (error)
4403 return error;
4404
7a77db95 4405 error = dir->i_op->symlink(idmap, dir, dentry, oldname);
a74574aa 4406 if (!error)
f38aa942 4407 fsnotify_create(dir, dentry);
1da177e4
LT
4408 return error;
4409}
4d359507 4410EXPORT_SYMBOL(vfs_symlink);
1da177e4 4411
7a8721f8 4412int do_symlinkat(struct filename *from, int newdfd, struct filename *to)
1da177e4 4413{
2ad94ae6 4414 int error;
6902d925 4415 struct dentry *dentry;
dae6ad8f 4416 struct path path;
f46d3567 4417 unsigned int lookup_flags = 0;
1da177e4 4418
da2d0ced
DK
4419 if (IS_ERR(from)) {
4420 error = PTR_ERR(from);
4421 goto out_putnames;
4422 }
f46d3567 4423retry:
b4a4f213 4424 dentry = filename_create(newdfd, to, &path, lookup_flags);
6902d925
DH
4425 error = PTR_ERR(dentry);
4426 if (IS_ERR(dentry))
da2d0ced 4427 goto out_putnames;
6902d925 4428
91a27b2a 4429 error = security_path_symlink(&path, dentry, from->name);
abf08576
CB
4430 if (!error)
4431 error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode,
4432 dentry, from->name);
921a1650 4433 done_path_create(&path, dentry);
f46d3567
JL
4434 if (retry_estale(error, lookup_flags)) {
4435 lookup_flags |= LOOKUP_REVAL;
4436 goto retry;
4437 }
da2d0ced
DK
4438out_putnames:
4439 putname(to);
1da177e4
LT
4440 putname(from);
4441 return error;
4442}
4443
b724e846
DB
4444SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
4445 int, newdfd, const char __user *, newname)
4446{
da2d0ced 4447 return do_symlinkat(getname(oldname), newdfd, getname(newname));
b724e846
DB
4448}
4449
3480b257 4450SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname)
5590ff0d 4451{
da2d0ced 4452 return do_symlinkat(getname(oldname), AT_FDCWD, getname(newname));
5590ff0d
UD
4453}
4454
146a8595
BF
4455/**
4456 * vfs_link - create a new link
4457 * @old_dentry: object to be linked
abf08576 4458 * @idmap: idmap of the mount
146a8595
BF
4459 * @dir: new parent
4460 * @new_dentry: where to create the new link
4461 * @delegated_inode: returns inode needing a delegation break
4462 *
4463 * The caller must hold dir->i_mutex
4464 *
4465 * If vfs_link discovers a delegation on the to-be-linked file in need
4466 * of breaking, it will return -EWOULDBLOCK and return a reference to the
4467 * inode in delegated_inode. The caller should then break the delegation
4468 * and retry. Because breaking a delegation may take a long time, the
4469 * caller should drop the i_mutex before doing so.
4470 *
4471 * Alternatively, a caller may pass NULL for delegated_inode. This may
4472 * be appropriate for callers that expect the underlying filesystem not
4473 * to be NFS exported.
6521f891 4474 *
abf08576
CB
4475 * If the inode has been found through an idmapped mount the idmap of
4476 * the vfsmount must be passed through @idmap. This function will then take
4477 * care to map the inode according to @idmap before checking permissions.
6521f891 4478 * On non-idmapped mounts or if permission checking is to be performed on the
abf08576 4479 * raw inode simply passs @nop_mnt_idmap.
146a8595 4480 */
abf08576 4481int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap,
6521f891
CB
4482 struct inode *dir, struct dentry *new_dentry,
4483 struct inode **delegated_inode)
1da177e4
LT
4484{
4485 struct inode *inode = old_dentry->d_inode;
8de52778 4486 unsigned max_links = dir->i_sb->s_max_links;
1da177e4
LT
4487 int error;
4488
4489 if (!inode)
4490 return -ENOENT;
4491
4609e1f1 4492 error = may_create(idmap, dir, new_dentry);
1da177e4
LT
4493 if (error)
4494 return error;
4495
4496 if (dir->i_sb != inode->i_sb)
4497 return -EXDEV;
4498
4499 /*
4500 * A link to an append-only or immutable file cannot be created.
4501 */
4502 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
4503 return -EPERM;
0bd23d09
EB
4504 /*
4505 * Updating the link count will likely cause i_uid and i_gid to
4506 * be writen back improperly if their true value is unknown to
4507 * the vfs.
4508 */
4609e1f1 4509 if (HAS_UNMAPPED_ID(idmap, inode))
0bd23d09 4510 return -EPERM;
acfa4380 4511 if (!dir->i_op->link)
1da177e4 4512 return -EPERM;
7e79eedb 4513 if (S_ISDIR(inode->i_mode))
1da177e4
LT
4514 return -EPERM;
4515
4516 error = security_inode_link(old_dentry, dir, new_dentry);
4517 if (error)
4518 return error;
4519
5955102c 4520 inode_lock(inode);
aae8a97d 4521 /* Make sure we don't allow creating hardlink to an unlinked file */
f4e0c30c 4522 if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
aae8a97d 4523 error = -ENOENT;
8de52778
AV
4524 else if (max_links && inode->i_nlink >= max_links)
4525 error = -EMLINK;
146a8595
BF
4526 else {
4527 error = try_break_deleg(inode, delegated_inode);
4528 if (!error)
4529 error = dir->i_op->link(old_dentry, dir, new_dentry);
4530 }
f4e0c30c
AV
4531
4532 if (!error && (inode->i_state & I_LINKABLE)) {
4533 spin_lock(&inode->i_lock);
4534 inode->i_state &= ~I_LINKABLE;
4535 spin_unlock(&inode->i_lock);
4536 }
5955102c 4537 inode_unlock(inode);
e31e14ec 4538 if (!error)
7e79eedb 4539 fsnotify_link(dir, inode, new_dentry);
1da177e4
LT
4540 return error;
4541}
4d359507 4542EXPORT_SYMBOL(vfs_link);
1da177e4
LT
4543
4544/*
4545 * Hardlinks are often used in delicate situations. We avoid
4546 * security-related surprises by not following symlinks on the
4547 * newname. --KAB
4548 *
4549 * We don't follow them on the oldname either to be compatible
4550 * with linux 2.0, and to avoid hard-linking to directories
4551 * and other special files. --ADM
4552 */
cf30da90 4553int do_linkat(int olddfd, struct filename *old, int newdfd,
020250f3 4554 struct filename *new, int flags)
1da177e4 4555{
abf08576 4556 struct mnt_idmap *idmap;
1da177e4 4557 struct dentry *new_dentry;
dae6ad8f 4558 struct path old_path, new_path;
146a8595 4559 struct inode *delegated_inode = NULL;
11a7b371 4560 int how = 0;
1da177e4 4561 int error;
1da177e4 4562
020250f3
DK
4563 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) {
4564 error = -EINVAL;
4565 goto out_putnames;
4566 }
11a7b371 4567 /*
f0cc6ffb
LT
4568 * To use null names we require CAP_DAC_READ_SEARCH
4569 * This ensures that not everyone will be able to create
4570 * handlink using the passed filedescriptor.
11a7b371 4571 */
020250f3
DK
4572 if (flags & AT_EMPTY_PATH && !capable(CAP_DAC_READ_SEARCH)) {
4573 error = -ENOENT;
4574 goto out_putnames;
f0cc6ffb 4575 }
11a7b371
AK
4576
4577 if (flags & AT_SYMLINK_FOLLOW)
4578 how |= LOOKUP_FOLLOW;
442e31ca 4579retry:
794ebcea 4580 error = filename_lookup(olddfd, old, how, &old_path, NULL);
1da177e4 4581 if (error)
020250f3 4582 goto out_putnames;
2ad94ae6 4583
b4a4f213 4584 new_dentry = filename_create(newdfd, new, &new_path,
442e31ca 4585 (how & LOOKUP_REVAL));
1da177e4 4586 error = PTR_ERR(new_dentry);
6902d925 4587 if (IS_ERR(new_dentry))
020250f3 4588 goto out_putpath;
dae6ad8f
AV
4589
4590 error = -EXDEV;
4591 if (old_path.mnt != new_path.mnt)
4592 goto out_dput;
abf08576 4593 idmap = mnt_idmap(new_path.mnt);
4609e1f1 4594 error = may_linkat(idmap, &old_path);
800179c9
KC
4595 if (unlikely(error))
4596 goto out_dput;
dae6ad8f 4597 error = security_path_link(old_path.dentry, &new_path, new_dentry);
be6d3e56 4598 if (error)
a8104a9f 4599 goto out_dput;
abf08576 4600 error = vfs_link(old_path.dentry, idmap, new_path.dentry->d_inode,
6521f891 4601 new_dentry, &delegated_inode);
75c3f29d 4602out_dput:
921a1650 4603 done_path_create(&new_path, new_dentry);
146a8595
BF
4604 if (delegated_inode) {
4605 error = break_deleg_wait(&delegated_inode);
d22e6338
OD
4606 if (!error) {
4607 path_put(&old_path);
146a8595 4608 goto retry;
d22e6338 4609 }
146a8595 4610 }
442e31ca 4611 if (retry_estale(error, how)) {
d22e6338 4612 path_put(&old_path);
442e31ca
JL
4613 how |= LOOKUP_REVAL;
4614 goto retry;
4615 }
020250f3 4616out_putpath:
2d8f3038 4617 path_put(&old_path);
020250f3
DK
4618out_putnames:
4619 putname(old);
4620 putname(new);
1da177e4
LT
4621
4622 return error;
4623}
4624
46ea89eb
DB
4625SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
4626 int, newdfd, const char __user *, newname, int, flags)
4627{
020250f3
DK
4628 return do_linkat(olddfd, getname_uflags(oldname, flags),
4629 newdfd, getname(newname), flags);
46ea89eb
DB
4630}
4631
3480b257 4632SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname)
5590ff0d 4633{
020250f3 4634 return do_linkat(AT_FDCWD, getname(oldname), AT_FDCWD, getname(newname), 0);
5590ff0d
UD
4635}
4636
bc27027a
MS
4637/**
4638 * vfs_rename - rename a filesystem object
2111c3c0 4639 * @rd: pointer to &struct renamedata info
bc27027a
MS
4640 *
4641 * The caller must hold multiple mutexes--see lock_rename()).
4642 *
4643 * If vfs_rename discovers a delegation in need of breaking at either
4644 * the source or destination, it will return -EWOULDBLOCK and return a
4645 * reference to the inode in delegated_inode. The caller should then
4646 * break the delegation and retry. Because breaking a delegation may
4647 * take a long time, the caller should drop all locks before doing
4648 * so.
4649 *
4650 * Alternatively, a caller may pass NULL for delegated_inode. This may
4651 * be appropriate for callers that expect the underlying filesystem not
4652 * to be NFS exported.
4653 *
1da177e4
LT
4654 * The worst of all namespace operations - renaming directory. "Perverted"
4655 * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
4656 * Problems:
0117d427 4657 *
d03b29a2 4658 * a) we can get into loop creation.
1da177e4
LT
4659 * b) race potential - two innocent renames can create a loop together.
4660 * That's where 4.4 screws up. Current fix: serialization on
a11f3a05 4661 * sb->s_vfs_rename_mutex. We might be more accurate, but that's another
1da177e4 4662 * story.
6cedba89
BF
4663 * c) we have to lock _four_ objects - parents and victim (if it exists),
4664 * and source (if it is not a directory).
1b1dcc1b 4665 * And that - after we got ->i_mutex on parents (until then we don't know
1da177e4
LT
4666 * whether the target exists). Solution: try to be smart with locking
4667 * order for inodes. We rely on the fact that tree topology may change
a11f3a05 4668 * only under ->s_vfs_rename_mutex _and_ that parent of the object we
1da177e4
LT
4669 * move will be locked. Thus we can rank directories by the tree
4670 * (ancestors first) and rank all non-directories after them.
4671 * That works since everybody except rename does "lock parent, lookup,
a11f3a05 4672 * lock child" and rename is under ->s_vfs_rename_mutex.
1da177e4
LT
4673 * HOWEVER, it relies on the assumption that any object with ->lookup()
4674 * has no more than 1 dentry. If "hybrid" objects will ever appear,
4675 * we'd better make sure that there's no link(2) for them.
e4eaac06 4676 * d) conversion from fhandle to dentry may come in the wrong moment - when
1b1dcc1b 4677 * we are removing the target. Solution: we will have to grab ->i_mutex
1da177e4 4678 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
c41b20e7 4679 * ->i_mutex on parents, which works but leads to some truly excessive
1da177e4
LT
4680 * locking].
4681 */
9fe61450 4682int vfs_rename(struct renamedata *rd)
1da177e4 4683{
bc27027a 4684 int error;
9fe61450
CB
4685 struct inode *old_dir = rd->old_dir, *new_dir = rd->new_dir;
4686 struct dentry *old_dentry = rd->old_dentry;
4687 struct dentry *new_dentry = rd->new_dentry;
4688 struct inode **delegated_inode = rd->delegated_inode;
4689 unsigned int flags = rd->flags;
bc27027a 4690 bool is_dir = d_is_dir(old_dentry);
bc27027a 4691 struct inode *source = old_dentry->d_inode;
9055cba7 4692 struct inode *target = new_dentry->d_inode;
da1ce067
MS
4693 bool new_is_dir = false;
4694 unsigned max_links = new_dir->i_sb->s_max_links;
49d31c2f 4695 struct name_snapshot old_name;
bc27027a 4696
8d3e2936 4697 if (source == target)
bc27027a
MS
4698 return 0;
4699
4609e1f1 4700 error = may_delete(rd->old_mnt_idmap, old_dir, old_dentry, is_dir);
bc27027a
MS
4701 if (error)
4702 return error;
4703
da1ce067 4704 if (!target) {
4609e1f1 4705 error = may_create(rd->new_mnt_idmap, new_dir, new_dentry);
da1ce067
MS
4706 } else {
4707 new_is_dir = d_is_dir(new_dentry);
4708
4709 if (!(flags & RENAME_EXCHANGE))
4609e1f1 4710 error = may_delete(rd->new_mnt_idmap, new_dir,
6521f891 4711 new_dentry, is_dir);
da1ce067 4712 else
4609e1f1 4713 error = may_delete(rd->new_mnt_idmap, new_dir,
6521f891 4714 new_dentry, new_is_dir);
da1ce067 4715 }
bc27027a
MS
4716 if (error)
4717 return error;
4718
2773bf00 4719 if (!old_dir->i_op->rename)
bc27027a 4720 return -EPERM;
1da177e4
LT
4721
4722 /*
4723 * If we are going to change the parent - check write permissions,
4724 * we'll need to flip '..'.
4725 */
da1ce067
MS
4726 if (new_dir != old_dir) {
4727 if (is_dir) {
4609e1f1 4728 error = inode_permission(rd->old_mnt_idmap, source,
47291baa 4729 MAY_WRITE);
da1ce067
MS
4730 if (error)
4731 return error;
4732 }
4733 if ((flags & RENAME_EXCHANGE) && new_is_dir) {
4609e1f1 4734 error = inode_permission(rd->new_mnt_idmap, target,
47291baa 4735 MAY_WRITE);
da1ce067
MS
4736 if (error)
4737 return error;
4738 }
1da177e4
LT
4739 }
4740
0b3974eb
MS
4741 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry,
4742 flags);
1da177e4
LT
4743 if (error)
4744 return error;
4745
49d31c2f 4746 take_dentry_name_snapshot(&old_name, old_dentry);
1d2ef590 4747 dget(new_dentry);
da1ce067 4748 if (!is_dir || (flags & RENAME_EXCHANGE))
bc27027a
MS
4749 lock_two_nondirectories(source, target);
4750 else if (target)
5955102c 4751 inode_lock(target);
9055cba7 4752
51cc3a66
HD
4753 error = -EPERM;
4754 if (IS_SWAPFILE(source) || (target && IS_SWAPFILE(target)))
4755 goto out;
4756
9055cba7 4757 error = -EBUSY;
7af1364f 4758 if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
9055cba7
SW
4759 goto out;
4760
da1ce067 4761 if (max_links && new_dir != old_dir) {
bc27027a 4762 error = -EMLINK;
da1ce067 4763 if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links)
bc27027a 4764 goto out;
da1ce067
MS
4765 if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir &&
4766 old_dir->i_nlink >= max_links)
4767 goto out;
4768 }
da1ce067 4769 if (!is_dir) {
bc27027a 4770 error = try_break_deleg(source, delegated_inode);
8e6d782c
BF
4771 if (error)
4772 goto out;
da1ce067
MS
4773 }
4774 if (target && !new_is_dir) {
4775 error = try_break_deleg(target, delegated_inode);
4776 if (error)
4777 goto out;
8e6d782c 4778 }
e18275ae 4779 error = old_dir->i_op->rename(rd->new_mnt_idmap, old_dir, old_dentry,
549c7297 4780 new_dir, new_dentry, flags);
51892bbb
SW
4781 if (error)
4782 goto out;
4783
da1ce067 4784 if (!(flags & RENAME_EXCHANGE) && target) {
8767712f
AV
4785 if (is_dir) {
4786 shrink_dcache_parent(new_dentry);
bc27027a 4787 target->i_flags |= S_DEAD;
8767712f 4788 }
51892bbb 4789 dont_mount(new_dentry);
8ed936b5 4790 detach_mounts(new_dentry);
bc27027a 4791 }
da1ce067
MS
4792 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) {
4793 if (!(flags & RENAME_EXCHANGE))
4794 d_move(old_dentry, new_dentry);
4795 else
4796 d_exchange(old_dentry, new_dentry);
4797 }
51892bbb 4798out:
da1ce067 4799 if (!is_dir || (flags & RENAME_EXCHANGE))
bc27027a
MS
4800 unlock_two_nondirectories(source, target);
4801 else if (target)
5955102c 4802 inode_unlock(target);
1da177e4 4803 dput(new_dentry);
da1ce067 4804 if (!error) {
f4ec3a3d 4805 fsnotify_move(old_dir, new_dir, &old_name.name, is_dir,
da1ce067
MS
4806 !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry);
4807 if (flags & RENAME_EXCHANGE) {
f4ec3a3d 4808 fsnotify_move(new_dir, old_dir, &old_dentry->d_name,
da1ce067
MS
4809 new_is_dir, NULL, new_dentry);
4810 }
4811 }
49d31c2f 4812 release_dentry_name_snapshot(&old_name);
0eeca283 4813
1da177e4
LT
4814 return error;
4815}
4d359507 4816EXPORT_SYMBOL(vfs_rename);
1da177e4 4817
e886663c
JA
4818int do_renameat2(int olddfd, struct filename *from, int newdfd,
4819 struct filename *to, unsigned int flags)
1da177e4 4820{
9fe61450 4821 struct renamedata rd;
2ad94ae6
AV
4822 struct dentry *old_dentry, *new_dentry;
4823 struct dentry *trap;
f5beed75
AV
4824 struct path old_path, new_path;
4825 struct qstr old_last, new_last;
4826 int old_type, new_type;
8e6d782c 4827 struct inode *delegated_inode = NULL;
f5beed75 4828 unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
c6a94284 4829 bool should_retry = false;
e886663c 4830 int error = -EINVAL;
520c8b16 4831
0d7a8555 4832 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
0ee50b47 4833 goto put_names;
da1ce067 4834
0d7a8555
MS
4835 if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) &&
4836 (flags & RENAME_EXCHANGE))
0ee50b47 4837 goto put_names;
520c8b16 4838
f5beed75
AV
4839 if (flags & RENAME_EXCHANGE)
4840 target_flags = 0;
4841
c6a94284 4842retry:
c5f563f9
AV
4843 error = filename_parentat(olddfd, from, lookup_flags, &old_path,
4844 &old_last, &old_type);
0ee50b47
DK
4845 if (error)
4846 goto put_names;
1da177e4 4847
c5f563f9
AV
4848 error = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
4849 &new_type);
0ee50b47 4850 if (error)
1da177e4
LT
4851 goto exit1;
4852
4853 error = -EXDEV;
f5beed75 4854 if (old_path.mnt != new_path.mnt)
1da177e4
LT
4855 goto exit2;
4856
1da177e4 4857 error = -EBUSY;
f5beed75 4858 if (old_type != LAST_NORM)
1da177e4
LT
4859 goto exit2;
4860
0a7c3937
MS
4861 if (flags & RENAME_NOREPLACE)
4862 error = -EEXIST;
f5beed75 4863 if (new_type != LAST_NORM)
1da177e4
LT
4864 goto exit2;
4865
f5beed75 4866 error = mnt_want_write(old_path.mnt);
c30dabfe
JK
4867 if (error)
4868 goto exit2;
4869
8e6d782c 4870retry_deleg:
f5beed75 4871 trap = lock_rename(new_path.dentry, old_path.dentry);
1da177e4 4872
f5beed75 4873 old_dentry = __lookup_hash(&old_last, old_path.dentry, lookup_flags);
1da177e4
LT
4874 error = PTR_ERR(old_dentry);
4875 if (IS_ERR(old_dentry))
4876 goto exit3;
4877 /* source must exist */
4878 error = -ENOENT;
b18825a7 4879 if (d_is_negative(old_dentry))
1da177e4 4880 goto exit4;
f5beed75 4881 new_dentry = __lookup_hash(&new_last, new_path.dentry, lookup_flags | target_flags);
0a7c3937
MS
4882 error = PTR_ERR(new_dentry);
4883 if (IS_ERR(new_dentry))
4884 goto exit4;
4885 error = -EEXIST;
4886 if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry))
4887 goto exit5;
da1ce067
MS
4888 if (flags & RENAME_EXCHANGE) {
4889 error = -ENOENT;
4890 if (d_is_negative(new_dentry))
4891 goto exit5;
4892
4893 if (!d_is_dir(new_dentry)) {
4894 error = -ENOTDIR;
f5beed75 4895 if (new_last.name[new_last.len])
da1ce067
MS
4896 goto exit5;
4897 }
4898 }
1da177e4 4899 /* unless the source is a directory trailing slashes give -ENOTDIR */
44b1d530 4900 if (!d_is_dir(old_dentry)) {
1da177e4 4901 error = -ENOTDIR;
f5beed75 4902 if (old_last.name[old_last.len])
0a7c3937 4903 goto exit5;
f5beed75 4904 if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len])
0a7c3937 4905 goto exit5;
1da177e4
LT
4906 }
4907 /* source should not be ancestor of target */
4908 error = -EINVAL;
4909 if (old_dentry == trap)
0a7c3937 4910 goto exit5;
1da177e4 4911 /* target should not be an ancestor of source */
da1ce067
MS
4912 if (!(flags & RENAME_EXCHANGE))
4913 error = -ENOTEMPTY;
1da177e4
LT
4914 if (new_dentry == trap)
4915 goto exit5;
4916
f5beed75
AV
4917 error = security_path_rename(&old_path, old_dentry,
4918 &new_path, new_dentry, flags);
be6d3e56 4919 if (error)
c30dabfe 4920 goto exit5;
9fe61450
CB
4921
4922 rd.old_dir = old_path.dentry->d_inode;
4923 rd.old_dentry = old_dentry;
abf08576 4924 rd.old_mnt_idmap = mnt_idmap(old_path.mnt);
9fe61450
CB
4925 rd.new_dir = new_path.dentry->d_inode;
4926 rd.new_dentry = new_dentry;
abf08576 4927 rd.new_mnt_idmap = mnt_idmap(new_path.mnt);
9fe61450
CB
4928 rd.delegated_inode = &delegated_inode;
4929 rd.flags = flags;
4930 error = vfs_rename(&rd);
1da177e4
LT
4931exit5:
4932 dput(new_dentry);
4933exit4:
4934 dput(old_dentry);
4935exit3:
f5beed75 4936 unlock_rename(new_path.dentry, old_path.dentry);
8e6d782c
BF
4937 if (delegated_inode) {
4938 error = break_deleg_wait(&delegated_inode);
4939 if (!error)
4940 goto retry_deleg;
4941 }
f5beed75 4942 mnt_drop_write(old_path.mnt);
1da177e4 4943exit2:
c6a94284
JL
4944 if (retry_estale(error, lookup_flags))
4945 should_retry = true;
f5beed75 4946 path_put(&new_path);
1da177e4 4947exit1:
f5beed75 4948 path_put(&old_path);
c6a94284
JL
4949 if (should_retry) {
4950 should_retry = false;
4951 lookup_flags |= LOOKUP_REVAL;
4952 goto retry;
4953 }
0ee50b47 4954put_names:
91ef658f 4955 putname(from);
91ef658f 4956 putname(to);
1da177e4
LT
4957 return error;
4958}
4959
ee81feb6
DB
4960SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
4961 int, newdfd, const char __user *, newname, unsigned int, flags)
4962{
e886663c
JA
4963 return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname),
4964 flags);
ee81feb6
DB
4965}
4966
520c8b16
MS
4967SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
4968 int, newdfd, const char __user *, newname)
4969{
e886663c
JA
4970 return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname),
4971 0);
520c8b16
MS
4972}
4973
a26eab24 4974SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname)
5590ff0d 4975{
e886663c
JA
4976 return do_renameat2(AT_FDCWD, getname(oldname), AT_FDCWD,
4977 getname(newname), 0);
5590ff0d
UD
4978}
4979
5d826c84 4980int readlink_copy(char __user *buffer, int buflen, const char *link)
1da177e4 4981{
5d826c84 4982 int len = PTR_ERR(link);
1da177e4
LT
4983 if (IS_ERR(link))
4984 goto out;
4985
4986 len = strlen(link);
4987 if (len > (unsigned) buflen)
4988 len = buflen;
4989 if (copy_to_user(buffer, link, len))
4990 len = -EFAULT;
4991out:
4992 return len;
4993}
4994
fd4a0edf
MS
4995/**
4996 * vfs_readlink - copy symlink body into userspace buffer
4997 * @dentry: dentry on which to get symbolic link
4998 * @buffer: user memory pointer
4999 * @buflen: size of buffer
5000 *
5001 * Does not touch atime. That's up to the caller if necessary
5002 *
5003 * Does not call security hook.
5004 */
5005int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
5006{
5007 struct inode *inode = d_inode(dentry);
f2df5da6
AV
5008 DEFINE_DELAYED_CALL(done);
5009 const char *link;
5010 int res;
fd4a0edf 5011
76fca90e
MS
5012 if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) {
5013 if (unlikely(inode->i_op->readlink))
5014 return inode->i_op->readlink(dentry, buffer, buflen);
5015
5016 if (!d_is_symlink(dentry))
5017 return -EINVAL;
5018
5019 spin_lock(&inode->i_lock);
5020 inode->i_opflags |= IOP_DEFAULT_READLINK;
5021 spin_unlock(&inode->i_lock);
5022 }
fd4a0edf 5023
4c4f7c19 5024 link = READ_ONCE(inode->i_link);
f2df5da6
AV
5025 if (!link) {
5026 link = inode->i_op->get_link(dentry, inode, &done);
5027 if (IS_ERR(link))
5028 return PTR_ERR(link);
5029 }
5030 res = readlink_copy(buffer, buflen, link);
5031 do_delayed_call(&done);
5032 return res;
fd4a0edf
MS
5033}
5034EXPORT_SYMBOL(vfs_readlink);
1da177e4 5035
d60874cd
MS
5036/**
5037 * vfs_get_link - get symlink body
5038 * @dentry: dentry on which to get symbolic link
5039 * @done: caller needs to free returned data with this
5040 *
5041 * Calls security hook and i_op->get_link() on the supplied inode.
5042 *
5043 * It does not touch atime. That's up to the caller if necessary.
5044 *
5045 * Does not work on "special" symlinks like /proc/$$/fd/N
5046 */
5047const char *vfs_get_link(struct dentry *dentry, struct delayed_call *done)
5048{
5049 const char *res = ERR_PTR(-EINVAL);
5050 struct inode *inode = d_inode(dentry);
5051
5052 if (d_is_symlink(dentry)) {
5053 res = ERR_PTR(security_inode_readlink(dentry));
5054 if (!res)
5055 res = inode->i_op->get_link(dentry, inode, done);
5056 }
5057 return res;
5058}
5059EXPORT_SYMBOL(vfs_get_link);
5060
1da177e4 5061/* get the link contents into pagecache */
6b255391 5062const char *page_get_link(struct dentry *dentry, struct inode *inode,
fceef393 5063 struct delayed_call *callback)
1da177e4 5064{
ebd09abb
DG
5065 char *kaddr;
5066 struct page *page;
6b255391
AV
5067 struct address_space *mapping = inode->i_mapping;
5068
d3883d4f
AV
5069 if (!dentry) {
5070 page = find_get_page(mapping, 0);
5071 if (!page)
5072 return ERR_PTR(-ECHILD);
5073 if (!PageUptodate(page)) {
5074 put_page(page);
5075 return ERR_PTR(-ECHILD);
5076 }
5077 } else {
5078 page = read_mapping_page(mapping, 0, NULL);
5079 if (IS_ERR(page))
5080 return (char*)page;
5081 }
fceef393 5082 set_delayed_call(callback, page_put_link, page);
21fc61c7
AV
5083 BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM);
5084 kaddr = page_address(page);
6b255391 5085 nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1);
ebd09abb 5086 return kaddr;
1da177e4
LT
5087}
5088
6b255391 5089EXPORT_SYMBOL(page_get_link);
1da177e4 5090
fceef393 5091void page_put_link(void *arg)
1da177e4 5092{
fceef393 5093 put_page(arg);
1da177e4 5094}
4d359507 5095EXPORT_SYMBOL(page_put_link);
1da177e4 5096
aa80deab
AV
5097int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
5098{
fceef393 5099 DEFINE_DELAYED_CALL(done);
6b255391
AV
5100 int res = readlink_copy(buffer, buflen,
5101 page_get_link(dentry, d_inode(dentry),
fceef393
AV
5102 &done));
5103 do_delayed_call(&done);
aa80deab
AV
5104 return res;
5105}
5106EXPORT_SYMBOL(page_readlink);
5107
56f5746c 5108int page_symlink(struct inode *inode, const char *symname, int len)
1da177e4
LT
5109{
5110 struct address_space *mapping = inode->i_mapping;
27a77913 5111 const struct address_space_operations *aops = mapping->a_ops;
56f5746c 5112 bool nofs = !mapping_gfp_constraint(mapping, __GFP_FS);
0adb25d2 5113 struct page *page;
1468c6f4 5114 void *fsdata = NULL;
beb497ab 5115 int err;
2d878178 5116 unsigned int flags;
1da177e4 5117
7e53cac4 5118retry:
2d878178
MWO
5119 if (nofs)
5120 flags = memalloc_nofs_save();
27a77913 5121 err = aops->write_begin(NULL, mapping, 0, len-1, &page, &fsdata);
2d878178
MWO
5122 if (nofs)
5123 memalloc_nofs_restore(flags);
1da177e4 5124 if (err)
afddba49
NP
5125 goto fail;
5126
21fc61c7 5127 memcpy(page_address(page), symname, len-1);
afddba49 5128
27a77913 5129 err = aops->write_end(NULL, mapping, 0, len-1, len-1,
afddba49 5130 page, fsdata);
1da177e4
LT
5131 if (err < 0)
5132 goto fail;
afddba49
NP
5133 if (err < len-1)
5134 goto retry;
5135
1da177e4
LT
5136 mark_inode_dirty(inode);
5137 return 0;
1da177e4
LT
5138fail:
5139 return err;
5140}
4d359507 5141EXPORT_SYMBOL(page_symlink);
0adb25d2 5142
92e1d5be 5143const struct inode_operations page_symlink_inode_operations = {
6b255391 5144 .get_link = page_get_link,
1da177e4 5145};
1da177e4 5146EXPORT_SYMBOL(page_symlink_inode_operations);
This page took 2.670987 seconds and 4 git commands to generate.