]> Git Repo - J-linux.git/blob - fs/xfs/scrub/orphanage.c
Merge tag 'vfs-6.13-rc7.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
[J-linux.git] / fs / xfs / scrub / orphanage.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <[email protected]>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_ialloc.h"
16 #include "xfs_quota.h"
17 #include "xfs_trans_space.h"
18 #include "xfs_dir2.h"
19 #include "xfs_icache.h"
20 #include "xfs_bmap.h"
21 #include "xfs_bmap_btree.h"
22 #include "xfs_parent.h"
23 #include "xfs_attr_sf.h"
24 #include "scrub/scrub.h"
25 #include "scrub/common.h"
26 #include "scrub/repair.h"
27 #include "scrub/trace.h"
28 #include "scrub/orphanage.h"
29 #include "scrub/readdir.h"
30
31 #include <linux/namei.h>
32
33 /*
34  * The Orphanage
35  * =============
36  *
37  * If the directory tree is damaged, children of that directory become
38  * inaccessible via that file path.  If a child has no other parents, the file
39  * is said to be orphaned.  xfs_repair fixes this situation by creating a
40  * orphanage directory (specifically, /lost+found) and creating a directory
41  * entry pointing to the orphaned file.
42  *
43  * Online repair follows this tactic by creating a root-owned /lost+found
44  * directory if one does not exist.  If an orphan is found, it will move that
45  * files into orphanage.
46  */
47
48 /* Make the orphanage owned by root. */
49 STATIC int
50 xrep_chown_orphanage(
51         struct xfs_scrub        *sc,
52         struct xfs_inode        *dp)
53 {
54         struct xfs_trans        *tp;
55         struct xfs_mount        *mp = sc->mp;
56         struct xfs_dquot        *udqp = NULL, *gdqp = NULL, *pdqp = NULL;
57         struct xfs_dquot        *oldu = NULL, *oldg = NULL, *oldp = NULL;
58         struct inode            *inode = VFS_I(dp);
59         int                     error;
60
61         error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
62                         XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
63         if (error)
64                 return error;
65
66         error = xfs_trans_alloc_ichange(dp, udqp, gdqp, pdqp, true, &tp);
67         if (error)
68                 goto out_dqrele;
69
70         /*
71          * Always clear setuid/setgid/sticky on the orphanage since we don't
72          * normally want that functionality on this directory and xfs_repair
73          * doesn't create it this way either.  Leave the other access bits
74          * unchanged.
75          */
76         inode->i_mode &= ~(S_ISUID | S_ISGID | S_ISVTX);
77
78         /*
79          * Change the ownerships and register quota modifications
80          * in the transaction.
81          */
82         if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) {
83                 if (XFS_IS_UQUOTA_ON(mp))
84                         oldu = xfs_qm_vop_chown(tp, dp, &dp->i_udquot, udqp);
85                 inode->i_uid = GLOBAL_ROOT_UID;
86         }
87         if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) {
88                 if (XFS_IS_GQUOTA_ON(mp))
89                         oldg = xfs_qm_vop_chown(tp, dp, &dp->i_gdquot, gdqp);
90                 inode->i_gid = GLOBAL_ROOT_GID;
91         }
92         if (dp->i_projid != 0) {
93                 if (XFS_IS_PQUOTA_ON(mp))
94                         oldp = xfs_qm_vop_chown(tp, dp, &dp->i_pdquot, pdqp);
95                 dp->i_projid = 0;
96         }
97
98         dp->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
99         xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
100
101         XFS_STATS_INC(mp, xs_ig_attrchg);
102
103         if (xfs_has_wsync(mp))
104                 xfs_trans_set_sync(tp);
105         error = xfs_trans_commit(tp);
106
107         xfs_qm_dqrele(oldu);
108         xfs_qm_dqrele(oldg);
109         xfs_qm_dqrele(oldp);
110
111 out_dqrele:
112         xfs_qm_dqrele(udqp);
113         xfs_qm_dqrele(gdqp);
114         xfs_qm_dqrele(pdqp);
115         return error;
116 }
117
118 #define ORPHANAGE       "lost+found"
119
120 /* Create the orphanage directory, and set sc->orphanage to it. */
121 int
122 xrep_orphanage_create(
123         struct xfs_scrub        *sc)
124 {
125         struct xfs_mount        *mp = sc->mp;
126         struct dentry           *root_dentry, *orphanage_dentry;
127         struct inode            *root_inode = VFS_I(sc->mp->m_rootip);
128         struct inode            *orphanage_inode;
129         int                     error;
130
131         if (xfs_is_shutdown(mp))
132                 return -EIO;
133         if (xfs_is_readonly(mp)) {
134                 sc->orphanage = NULL;
135                 return 0;
136         }
137
138         ASSERT(sc->tp == NULL);
139         ASSERT(sc->orphanage == NULL);
140
141         /* Find the dentry for the root directory... */
142         root_dentry = d_find_alias(root_inode);
143         if (!root_dentry) {
144                 error = -EFSCORRUPTED;
145                 goto out;
146         }
147
148         /* ...which is a directory, right? */
149         if (!d_is_dir(root_dentry)) {
150                 error = -EFSCORRUPTED;
151                 goto out_dput_root;
152         }
153
154         /* Try to find the orphanage directory. */
155         inode_lock_nested(root_inode, I_MUTEX_PARENT);
156         orphanage_dentry = lookup_one_len(ORPHANAGE, root_dentry,
157                         strlen(ORPHANAGE));
158         if (IS_ERR(orphanage_dentry)) {
159                 error = PTR_ERR(orphanage_dentry);
160                 goto out_unlock_root;
161         }
162
163         /*
164          * Nothing found?  Call mkdir to create the orphanage.  Create the
165          * directory without other-user access because we're live and someone
166          * could have been relying partly on minimal access to a parent
167          * directory to control access to a file we put in here.
168          */
169         if (d_really_is_negative(orphanage_dentry)) {
170                 error = vfs_mkdir(&nop_mnt_idmap, root_inode, orphanage_dentry,
171                                 0750);
172                 if (error)
173                         goto out_dput_orphanage;
174         }
175
176         /* Not a directory? Bail out. */
177         if (!d_is_dir(orphanage_dentry)) {
178                 error = -ENOTDIR;
179                 goto out_dput_orphanage;
180         }
181
182         /*
183          * Grab a reference to the orphanage.  This /should/ succeed since
184          * we hold the root directory locked and therefore nobody can delete
185          * the orphanage.
186          */
187         orphanage_inode = igrab(d_inode(orphanage_dentry));
188         if (!orphanage_inode) {
189                 error = -ENOENT;
190                 goto out_dput_orphanage;
191         }
192
193         /* Make sure the orphanage is owned by root. */
194         error = xrep_chown_orphanage(sc, XFS_I(orphanage_inode));
195         if (error)
196                 goto out_dput_orphanage;
197
198         /* Stash the reference for later and bail out. */
199         sc->orphanage = XFS_I(orphanage_inode);
200         sc->orphanage_ilock_flags = 0;
201
202 out_dput_orphanage:
203         dput(orphanage_dentry);
204 out_unlock_root:
205         inode_unlock(VFS_I(sc->mp->m_rootip));
206 out_dput_root:
207         dput(root_dentry);
208 out:
209         return error;
210 }
211
212 void
213 xrep_orphanage_ilock(
214         struct xfs_scrub        *sc,
215         unsigned int            ilock_flags)
216 {
217         sc->orphanage_ilock_flags |= ilock_flags;
218         xfs_ilock(sc->orphanage, ilock_flags);
219 }
220
221 bool
222 xrep_orphanage_ilock_nowait(
223         struct xfs_scrub        *sc,
224         unsigned int            ilock_flags)
225 {
226         if (xfs_ilock_nowait(sc->orphanage, ilock_flags)) {
227                 sc->orphanage_ilock_flags |= ilock_flags;
228                 return true;
229         }
230
231         return false;
232 }
233
234 void
235 xrep_orphanage_iunlock(
236         struct xfs_scrub        *sc,
237         unsigned int            ilock_flags)
238 {
239         xfs_iunlock(sc->orphanage, ilock_flags);
240         sc->orphanage_ilock_flags &= ~ilock_flags;
241 }
242
243 /* Grab the IOLOCK of the orphanage and sc->ip. */
244 int
245 xrep_orphanage_iolock_two(
246         struct xfs_scrub        *sc)
247 {
248         int                     error = 0;
249
250         while (true) {
251                 if (xchk_should_terminate(sc, &error))
252                         return error;
253
254                 /*
255                  * Normal XFS takes the IOLOCK before grabbing a transaction.
256                  * Scrub holds a transaction, which means that we can't block
257                  * on either IOLOCK.
258                  */
259                 if (xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) {
260                         if (xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
261                                 break;
262                         xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL);
263                 }
264                 delay(1);
265         }
266
267         return 0;
268 }
269
270 /* Release the orphanage. */
271 void
272 xrep_orphanage_rele(
273         struct xfs_scrub        *sc)
274 {
275         if (!sc->orphanage)
276                 return;
277
278         if (sc->orphanage_ilock_flags)
279                 xfs_iunlock(sc->orphanage, sc->orphanage_ilock_flags);
280
281         xchk_irele(sc, sc->orphanage);
282         sc->orphanage = NULL;
283 }
284
285 /* Adoption moves a file into /lost+found */
286
287 /* Can the orphanage adopt @sc->ip? */
288 bool
289 xrep_orphanage_can_adopt(
290         struct xfs_scrub        *sc)
291 {
292         ASSERT(sc->ip != NULL);
293
294         if (!sc->orphanage)
295                 return false;
296         if (sc->ip == sc->orphanage)
297                 return false;
298         if (xchk_inode_is_sb_rooted(sc->ip))
299                 return false;
300         if (xfs_is_internal_inode(sc->ip))
301                 return false;
302         return true;
303 }
304
305 /*
306  * Create a new transaction to send a child to the orphanage.
307  *
308  * Allocate a new transaction with sufficient disk space to handle the
309  * adoption, take ILOCK_EXCL of the orphanage and sc->ip, joins them to the
310  * transaction, and reserve quota to reparent the latter.  Caller must hold the
311  * IOLOCK of the orphanage and sc->ip.
312  */
313 int
314 xrep_adoption_trans_alloc(
315         struct xfs_scrub        *sc,
316         struct xrep_adoption    *adopt)
317 {
318         struct xfs_mount        *mp = sc->mp;
319         unsigned int            child_blkres = 0;
320         int                     error;
321
322         ASSERT(sc->tp == NULL);
323         ASSERT(sc->ip != NULL);
324         ASSERT(sc->orphanage != NULL);
325         ASSERT(sc->ilock_flags & XFS_IOLOCK_EXCL);
326         ASSERT(sc->orphanage_ilock_flags & XFS_IOLOCK_EXCL);
327         ASSERT(!(sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)));
328         ASSERT(!(sc->orphanage_ilock_flags &
329                                 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)));
330
331         /* Compute the worst case space reservation that we need. */
332         adopt->sc = sc;
333         adopt->orphanage_blkres = xfs_link_space_res(mp, MAXNAMELEN);
334         if (S_ISDIR(VFS_I(sc->ip)->i_mode))
335                 child_blkres = xfs_rename_space_res(mp, 0, false,
336                                                     xfs_name_dotdot.len, false);
337         if (xfs_has_parent(mp))
338                 child_blkres += XFS_ADDAFORK_SPACE_RES(mp);
339         adopt->child_blkres = child_blkres;
340
341         /*
342          * Allocate a transaction to link the child into the parent, along with
343          * enough disk space to handle expansion of both the orphanage and the
344          * dotdot entry of a child directory.
345          */
346         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link,
347                         adopt->orphanage_blkres + adopt->child_blkres, 0, 0,
348                         &sc->tp);
349         if (error)
350                 return error;
351
352         xfs_lock_two_inodes(sc->orphanage, XFS_ILOCK_EXCL,
353                             sc->ip, XFS_ILOCK_EXCL);
354         sc->ilock_flags |= XFS_ILOCK_EXCL;
355         sc->orphanage_ilock_flags |= XFS_ILOCK_EXCL;
356
357         xfs_trans_ijoin(sc->tp, sc->orphanage, 0);
358         xfs_trans_ijoin(sc->tp, sc->ip, 0);
359
360         /*
361          * Reserve enough quota in the orphan directory to add the new name.
362          * Normally the orphanage should have user/group/project ids of zero
363          * and hence is not subject to quota enforcement, but we're allowed to
364          * exceed quota to reattach disconnected parts of the directory tree.
365          */
366         error = xfs_trans_reserve_quota_nblks(sc->tp, sc->orphanage,
367                         adopt->orphanage_blkres, 0, true);
368         if (error)
369                 goto out_cancel;
370
371         /*
372          * Reserve enough quota in the child directory to change dotdot.
373          * Here we're also allowed to exceed file quota to repair inconsistent
374          * metadata.
375          */
376         if (adopt->child_blkres) {
377                 error = xfs_trans_reserve_quota_nblks(sc->tp, sc->ip,
378                                 adopt->child_blkres, 0, true);
379                 if (error)
380                         goto out_cancel;
381         }
382
383         return 0;
384 out_cancel:
385         xchk_trans_cancel(sc);
386         xrep_orphanage_iunlock(sc, XFS_ILOCK_EXCL);
387         xchk_iunlock(sc, XFS_ILOCK_EXCL);
388         return error;
389 }
390
391 /*
392  * Compute the xfs_name for the directory entry that we're adding to the
393  * orphanage.  Caller must hold ILOCKs of sc->ip and the orphanage and must not
394  * reuse namebuf until the adoption completes or is dissolved.
395  */
396 int
397 xrep_adoption_compute_name(
398         struct xrep_adoption    *adopt,
399         struct xfs_name         *xname)
400 {
401         struct xfs_scrub        *sc = adopt->sc;
402         char                    *namebuf = (void *)xname->name;
403         xfs_ino_t               ino;
404         unsigned int            incr = 0;
405         int                     error = 0;
406
407         adopt->xname = xname;
408         xname->len = snprintf(namebuf, MAXNAMELEN, "%llu", sc->ip->i_ino);
409         xname->type = xfs_mode_to_ftype(VFS_I(sc->ip)->i_mode);
410
411         /* Make sure the filename is unique in the lost+found. */
412         error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino);
413         while (error == 0 && incr < 10000) {
414                 xname->len = snprintf(namebuf, MAXNAMELEN, "%llu.%u",
415                                 sc->ip->i_ino, ++incr);
416                 error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino);
417         }
418         if (error == 0) {
419                 /* We already have 10,000 entries in the orphanage? */
420                 return -EFSCORRUPTED;
421         }
422
423         if (error != -ENOENT)
424                 return error;
425         return 0;
426 }
427
428 /*
429  * Make sure the dcache does not have a positive dentry for the name we've
430  * chosen.  The caller should have checked with the ondisk directory, so any
431  * discrepancy is a sign that something is seriously wrong.
432  */
433 static int
434 xrep_adoption_check_dcache(
435         struct xrep_adoption    *adopt)
436 {
437         struct qstr             qname = QSTR_INIT(adopt->xname->name,
438                                                   adopt->xname->len);
439         struct xfs_scrub        *sc = adopt->sc;
440         struct dentry           *d_orphanage, *d_child;
441         int                     error = 0;
442
443         d_orphanage = d_find_alias(VFS_I(sc->orphanage));
444         if (!d_orphanage)
445                 return 0;
446
447         d_child = d_hash_and_lookup(d_orphanage, &qname);
448         if (d_child) {
449                 trace_xrep_adoption_check_child(sc->mp, d_child);
450
451                 if (d_is_positive(d_child)) {
452                         ASSERT(d_is_negative(d_child));
453                         error = -EFSCORRUPTED;
454                 }
455
456                 dput(d_child);
457         }
458
459         dput(d_orphanage);
460         return error;
461 }
462
463 /*
464  * Invalidate all dentries for the name that was added to the orphanage
465  * directory, and all dentries pointing to the child inode that was moved.
466  *
467  * There should not be any positive entries for the name, since we've
468  * maintained our lock on the orphanage directory.
469  */
470 static void
471 xrep_adoption_zap_dcache(
472         struct xrep_adoption    *adopt)
473 {
474         struct qstr             qname = QSTR_INIT(adopt->xname->name,
475                                                   adopt->xname->len);
476         struct xfs_scrub        *sc = adopt->sc;
477         struct dentry           *d_orphanage, *d_child;
478
479         /* Invalidate all dentries for the adoption name */
480         d_orphanage = d_find_alias(VFS_I(sc->orphanage));
481         if (!d_orphanage)
482                 return;
483
484         d_child = d_hash_and_lookup(d_orphanage, &qname);
485         while (d_child != NULL) {
486                 trace_xrep_adoption_invalidate_child(sc->mp, d_child);
487
488                 ASSERT(d_is_negative(d_child));
489                 d_invalidate(d_child);
490                 dput(d_child);
491                 d_child = d_lookup(d_orphanage, &qname);
492         }
493
494         dput(d_orphanage);
495
496         /* Invalidate all the dentries pointing down to this file. */
497         while ((d_child = d_find_alias(VFS_I(sc->ip))) != NULL) {
498                 trace_xrep_adoption_invalidate_child(sc->mp, d_child);
499
500                 d_invalidate(d_child);
501                 dput(d_child);
502         }
503 }
504
505 /*
506  * If we have to add an attr fork ahead of a parent pointer update, how much
507  * space should we ask for?
508  */
509 static inline int
510 xrep_adoption_attr_sizeof(
511         const struct xrep_adoption      *adopt)
512 {
513         return sizeof(struct xfs_attr_sf_hdr) +
514                 xfs_attr_sf_entsize_byname(sizeof(struct xfs_parent_rec),
515                                            adopt->xname->len);
516 }
517
518 /*
519  * Move the current file to the orphanage under the computed name.
520  *
521  * Returns with a dirty transaction so that the caller can handle any other
522  * work, such as fixing up unlinked lists or resetting link counts.
523  */
524 int
525 xrep_adoption_move(
526         struct xrep_adoption    *adopt)
527 {
528         struct xfs_scrub        *sc = adopt->sc;
529         bool                    isdir = S_ISDIR(VFS_I(sc->ip)->i_mode);
530         int                     error;
531
532         trace_xrep_adoption_reparent(sc->orphanage, adopt->xname,
533                         sc->ip->i_ino);
534
535         error = xrep_adoption_check_dcache(adopt);
536         if (error)
537                 return error;
538
539         /*
540          * If this filesystem has parent pointers, ensure that the file being
541          * moved to the orphanage has an attribute fork.  This is required
542          * because the parent pointer code does not itself add attr forks.
543          */
544         if (!xfs_inode_has_attr_fork(sc->ip) && xfs_has_parent(sc->mp)) {
545                 int sf_size = xrep_adoption_attr_sizeof(adopt);
546
547                 error = xfs_bmap_add_attrfork(sc->tp, sc->ip, sf_size, true);
548                 if (error)
549                         return error;
550         }
551
552         /* Create the new name in the orphanage. */
553         error = xfs_dir_createname(sc->tp, sc->orphanage, adopt->xname,
554                         sc->ip->i_ino, adopt->orphanage_blkres);
555         if (error)
556                 return error;
557
558         /*
559          * Bump the link count of the orphanage if we just added a
560          * subdirectory, and update its timestamps.
561          */
562         xfs_trans_ichgtime(sc->tp, sc->orphanage,
563                         XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
564         if (isdir)
565                 xfs_bumplink(sc->tp, sc->orphanage);
566         xfs_trans_log_inode(sc->tp, sc->orphanage, XFS_ILOG_CORE);
567
568         /* Bump the link count of the child. */
569         if (adopt->bump_child_nlink) {
570                 xfs_bumplink(sc->tp, sc->ip);
571                 xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
572         }
573
574         /* Replace the dotdot entry if the child is a subdirectory. */
575         if (isdir) {
576                 error = xfs_dir_replace(sc->tp, sc->ip, &xfs_name_dotdot,
577                                 sc->orphanage->i_ino, adopt->child_blkres);
578                 if (error)
579                         return error;
580         }
581
582         /* Add a parent pointer from the file back to the lost+found. */
583         if (xfs_has_parent(sc->mp)) {
584                 error = xfs_parent_addname(sc->tp, &adopt->ppargs,
585                                 sc->orphanage, adopt->xname, sc->ip);
586                 if (error)
587                         return error;
588         }
589
590         /*
591          * Notify dirent hooks that we moved the file to /lost+found, and
592          * finish all the deferred work so that we know the adoption is fully
593          * recorded in the log.
594          */
595         xfs_dir_update_hook(sc->orphanage, sc->ip, 1, adopt->xname);
596
597         /* Remove negative dentries from the lost+found's dcache */
598         xrep_adoption_zap_dcache(adopt);
599         return 0;
600 }
601
602 /*
603  * Roll to a clean scrub transaction so that we can release the orphanage,
604  * even if xrep_adoption_move was not called.
605  *
606  * Commits all the work and deferred ops attached to an adoption request and
607  * rolls to a clean scrub transaction.  On success, returns 0 with the scrub
608  * context holding a clean transaction with no inodes joined.  On failure,
609  * returns negative errno with no scrub transaction.  All inode locks are
610  * still held after this function returns.
611  */
612 int
613 xrep_adoption_trans_roll(
614         struct xrep_adoption    *adopt)
615 {
616         struct xfs_scrub        *sc = adopt->sc;
617         int                     error;
618
619         trace_xrep_adoption_trans_roll(sc->orphanage, sc->ip,
620                         !!(sc->tp->t_flags & XFS_TRANS_DIRTY));
621
622         /* Finish all the deferred ops to commit all repairs. */
623         error = xrep_defer_finish(sc);
624         if (error)
625                 return error;
626
627         /* Roll the transaction once more to detach the inodes. */
628         return xfs_trans_roll(&sc->tp);
629 }
This page took 0.060438 seconds and 4 git commands to generate.