]> Git Repo - J-linux.git/blob - fs/xfs/scrub/dir.c
Merge tag 'vfs-6.13-rc7.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
[J-linux.git] / fs / xfs / scrub / dir.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <[email protected]>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_icache.h"
16 #include "xfs_dir2.h"
17 #include "xfs_dir2_priv.h"
18 #include "xfs_health.h"
19 #include "xfs_attr.h"
20 #include "xfs_parent.h"
21 #include "scrub/scrub.h"
22 #include "scrub/common.h"
23 #include "scrub/dabtree.h"
24 #include "scrub/readdir.h"
25 #include "scrub/health.h"
26 #include "scrub/repair.h"
27 #include "scrub/trace.h"
28 #include "scrub/xfile.h"
29 #include "scrub/xfarray.h"
30 #include "scrub/xfblob.h"
31
32 /* Set us up to scrub directories. */
33 int
34 xchk_setup_directory(
35         struct xfs_scrub        *sc)
36 {
37         int                     error;
38
39         if (xchk_could_repair(sc)) {
40                 error = xrep_setup_directory(sc);
41                 if (error)
42                         return error;
43         }
44
45         return xchk_setup_inode_contents(sc, 0);
46 }
47
48 /* Directories */
49
50 /* Deferred directory entry that we saved for later. */
51 struct xchk_dirent {
52         /* Cookie for retrieval of the dirent name. */
53         xfblob_cookie           name_cookie;
54
55         /* Child inode number. */
56         xfs_ino_t               ino;
57
58         /* Length of the pptr name. */
59         uint8_t                 namelen;
60 };
61
62 struct xchk_dir {
63         struct xfs_scrub        *sc;
64
65         /* information for parent pointer validation. */
66         struct xfs_parent_rec   pptr_rec;
67         struct xfs_da_args      pptr_args;
68
69         /* Fixed-size array of xchk_dirent structures. */
70         struct xfarray          *dir_entries;
71
72         /* Blobs containing dirent names. */
73         struct xfblob           *dir_names;
74
75         /* If we've cycled the ILOCK, we must revalidate deferred dirents. */
76         bool                    need_revalidate;
77
78         /* Name buffer for dirent revalidation. */
79         struct xfs_name         xname;
80         uint8_t                 namebuf[MAXNAMELEN];
81 };
82
83 /* Scrub a directory entry. */
84
85 /* Check that an inode's mode matches a given XFS_DIR3_FT_* type. */
86 STATIC void
87 xchk_dir_check_ftype(
88         struct xfs_scrub        *sc,
89         xfs_fileoff_t           offset,
90         struct xfs_inode        *ip,
91         int                     ftype)
92 {
93         struct xfs_mount        *mp = sc->mp;
94
95         if (!xfs_has_ftype(mp)) {
96                 if (ftype != XFS_DIR3_FT_UNKNOWN && ftype != XFS_DIR3_FT_DIR)
97                         xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
98                 return;
99         }
100
101         if (xfs_mode_to_ftype(VFS_I(ip)->i_mode) != ftype)
102                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
103
104         /*
105          * Metadata and regular inodes cannot cross trees.  This property
106          * cannot change without a full inode free and realloc cycle, so it's
107          * safe to check this without holding locks.
108          */
109         if (xfs_is_metadir_inode(ip) != xfs_is_metadir_inode(sc->ip))
110                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
111 }
112
113 /*
114  * Try to lock a child file for checking parent pointers.  Returns the inode
115  * flags for the locks we now hold, or zero if we failed.
116  */
117 STATIC unsigned int
118 xchk_dir_lock_child(
119         struct xfs_scrub        *sc,
120         struct xfs_inode        *ip)
121 {
122         if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
123                 return 0;
124
125         if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
126                 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
127                 return 0;
128         }
129
130         if (!xfs_inode_has_attr_fork(ip) || !xfs_need_iread_extents(&ip->i_af))
131                 return XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED;
132
133         xfs_iunlock(ip, XFS_ILOCK_SHARED);
134
135         if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
136                 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
137                 return 0;
138         }
139
140         return XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL;
141 }
142
143 /* Check the backwards link (parent pointer) associated with this dirent. */
144 STATIC int
145 xchk_dir_parent_pointer(
146         struct xchk_dir         *sd,
147         const struct xfs_name   *name,
148         struct xfs_inode        *ip)
149 {
150         struct xfs_scrub        *sc = sd->sc;
151         int                     error;
152
153         xfs_inode_to_parent_rec(&sd->pptr_rec, sc->ip);
154         error = xfs_parent_lookup(sc->tp, ip, name, &sd->pptr_rec,
155                         &sd->pptr_args);
156         if (error == -ENOATTR)
157                 xchk_fblock_xref_set_corrupt(sc, XFS_DATA_FORK, 0);
158
159         return 0;
160 }
161
162 /* Look for a parent pointer matching this dirent, if the child isn't busy. */
163 STATIC int
164 xchk_dir_check_pptr_fast(
165         struct xchk_dir         *sd,
166         xfs_dir2_dataptr_t      dapos,
167         const struct xfs_name   *name,
168         struct xfs_inode        *ip)
169 {
170         struct xfs_scrub        *sc = sd->sc;
171         unsigned int            lockmode;
172         int                     error;
173
174         /* dot and dotdot entries do not have parent pointers */
175         if (xfs_dir2_samename(name, &xfs_name_dot) ||
176             xfs_dir2_samename(name, &xfs_name_dotdot))
177                 return 0;
178
179         /* No self-referential non-dot or dotdot dirents. */
180         if (ip == sc->ip) {
181                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
182                 return -ECANCELED;
183         }
184
185         /* Try to lock the inode. */
186         lockmode = xchk_dir_lock_child(sc, ip);
187         if (!lockmode) {
188                 struct xchk_dirent      save_de = {
189                         .namelen        = name->len,
190                         .ino            = ip->i_ino,
191                 };
192
193                 /* Couldn't lock the inode, so save the dirent for later. */
194                 trace_xchk_dir_defer(sc->ip, name, ip->i_ino);
195
196                 error = xfblob_storename(sd->dir_names, &save_de.name_cookie,
197                                 name);
198                 if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
199                                         &error))
200                         return error;
201
202                 error = xfarray_append(sd->dir_entries, &save_de);
203                 if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
204                                         &error))
205                         return error;
206
207                 return 0;
208         }
209
210         error = xchk_dir_parent_pointer(sd, name, ip);
211         xfs_iunlock(ip, lockmode);
212         return error;
213 }
214
215 /*
216  * Scrub a single directory entry.
217  *
218  * Check the inode number to make sure it's sane, then we check that we can
219  * look up this filename.  Finally, we check the ftype.
220  */
221 STATIC int
222 xchk_dir_actor(
223         struct xfs_scrub        *sc,
224         struct xfs_inode        *dp,
225         xfs_dir2_dataptr_t      dapos,
226         const struct xfs_name   *name,
227         xfs_ino_t               ino,
228         void                    *priv)
229 {
230         struct xfs_mount        *mp = dp->i_mount;
231         struct xfs_inode        *ip;
232         struct xchk_dir         *sd = priv;
233         xfs_ino_t               lookup_ino;
234         xfs_dablk_t             offset;
235         int                     error = 0;
236
237         offset = xfs_dir2_db_to_da(mp->m_dir_geo,
238                         xfs_dir2_dataptr_to_db(mp->m_dir_geo, dapos));
239
240         if (xchk_should_terminate(sc, &error))
241                 return error;
242
243         /* Does this inode number make sense? */
244         if (!xfs_verify_dir_ino(mp, ino)) {
245                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
246                 return -ECANCELED;
247         }
248
249         /* Does this name make sense? */
250         if (!xfs_dir2_namecheck(name->name, name->len)) {
251                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
252                 return -ECANCELED;
253         }
254
255         if (xfs_dir2_samename(name, &xfs_name_dot)) {
256                 /* If this is "." then check that the inum matches the dir. */
257                 if (ino != dp->i_ino)
258                         xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
259         } else if (xfs_dir2_samename(name, &xfs_name_dotdot)) {
260                 /*
261                  * If this is ".." in the root inode, check that the inum
262                  * matches this dir.
263                  */
264                 if (xchk_inode_is_dirtree_root(dp) && ino != dp->i_ino)
265                         xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
266         }
267
268         /* Verify that we can look up this name by hash. */
269         error = xchk_dir_lookup(sc, dp, name, &lookup_ino);
270         /* ENOENT means the hash lookup failed and the dir is corrupt */
271         if (error == -ENOENT)
272                 error = -EFSCORRUPTED;
273         if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error))
274                 goto out;
275         if (lookup_ino != ino) {
276                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
277                 return -ECANCELED;
278         }
279
280         /*
281          * Grab the inode pointed to by the dirent.  We release the inode
282          * before we cancel the scrub transaction.
283          *
284          * If _iget returns -EINVAL or -ENOENT then the child inode number is
285          * garbage and the directory is corrupt.  If the _iget returns
286          * -EFSCORRUPTED or -EFSBADCRC then the child is corrupt which is a
287          *  cross referencing error.  Any other error is an operational error.
288          */
289         error = xchk_iget(sc, ino, &ip);
290         if (error == -EINVAL || error == -ENOENT) {
291                 error = -EFSCORRUPTED;
292                 xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
293                 goto out;
294         }
295         if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, offset, &error))
296                 goto out;
297
298         xchk_dir_check_ftype(sc, offset, ip, name->type);
299
300         if (xfs_has_parent(mp)) {
301                 error = xchk_dir_check_pptr_fast(sd, dapos, name, ip);
302                 if (error)
303                         goto out_rele;
304         }
305
306 out_rele:
307         xchk_irele(sc, ip);
308 out:
309         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
310                 return -ECANCELED;
311         return error;
312 }
313
314 /* Scrub a directory btree record. */
315 STATIC int
316 xchk_dir_rec(
317         struct xchk_da_btree            *ds,
318         int                             level)
319 {
320         struct xfs_name                 dname = { };
321         struct xfs_da_state_blk         *blk = &ds->state->path.blk[level];
322         struct xfs_mount                *mp = ds->state->mp;
323         struct xfs_inode                *dp = ds->dargs.dp;
324         struct xfs_da_geometry          *geo = mp->m_dir_geo;
325         struct xfs_dir2_data_entry      *dent;
326         struct xfs_buf                  *bp;
327         struct xfs_dir2_leaf_entry      *ent;
328         unsigned int                    end;
329         unsigned int                    iter_off;
330         xfs_ino_t                       ino;
331         xfs_dablk_t                     rec_bno;
332         xfs_dir2_db_t                   db;
333         xfs_dir2_data_aoff_t            off;
334         xfs_dir2_dataptr_t              ptr;
335         xfs_dahash_t                    calc_hash;
336         xfs_dahash_t                    hash;
337         struct xfs_dir3_icleaf_hdr      hdr;
338         unsigned int                    tag;
339         int                             error;
340
341         ASSERT(blk->magic == XFS_DIR2_LEAF1_MAGIC ||
342                blk->magic == XFS_DIR2_LEAFN_MAGIC);
343
344         xfs_dir2_leaf_hdr_from_disk(mp, &hdr, blk->bp->b_addr);
345         ent = hdr.ents + blk->index;
346
347         /* Check the hash of the entry. */
348         error = xchk_da_btree_hash(ds, level, &ent->hashval);
349         if (error)
350                 goto out;
351
352         /* Valid hash pointer? */
353         ptr = be32_to_cpu(ent->address);
354         if (ptr == 0)
355                 return 0;
356
357         /* Find the directory entry's location. */
358         db = xfs_dir2_dataptr_to_db(geo, ptr);
359         off = xfs_dir2_dataptr_to_off(geo, ptr);
360         rec_bno = xfs_dir2_db_to_da(geo, db);
361
362         if (rec_bno >= geo->leafblk) {
363                 xchk_da_set_corrupt(ds, level);
364                 goto out;
365         }
366         error = xfs_dir3_data_read(ds->dargs.trans, dp, ds->dargs.owner,
367                         rec_bno, XFS_DABUF_MAP_HOLE_OK, &bp);
368         if (!xchk_fblock_process_error(ds->sc, XFS_DATA_FORK, rec_bno,
369                         &error))
370                 goto out;
371         if (!bp) {
372                 xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
373                 goto out;
374         }
375         xchk_buffer_recheck(ds->sc, bp);
376
377         if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
378                 goto out_relse;
379
380         dent = bp->b_addr + off;
381
382         /* Make sure we got a real directory entry. */
383         iter_off = geo->data_entry_offset;
384         end = xfs_dir3_data_end_offset(geo, bp->b_addr);
385         if (!end) {
386                 xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
387                 goto out_relse;
388         }
389         for (;;) {
390                 struct xfs_dir2_data_entry      *dep = bp->b_addr + iter_off;
391                 struct xfs_dir2_data_unused     *dup = bp->b_addr + iter_off;
392
393                 if (iter_off >= end) {
394                         xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
395                         goto out_relse;
396                 }
397
398                 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
399                         iter_off += be16_to_cpu(dup->length);
400                         continue;
401                 }
402                 if (dep == dent)
403                         break;
404                 iter_off += xfs_dir2_data_entsize(mp, dep->namelen);
405         }
406
407         /* Retrieve the entry, sanity check it, and compare hashes. */
408         ino = be64_to_cpu(dent->inumber);
409         hash = be32_to_cpu(ent->hashval);
410         tag = be16_to_cpup(xfs_dir2_data_entry_tag_p(mp, dent));
411         if (!xfs_verify_dir_ino(mp, ino) || tag != off)
412                 xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
413         if (dent->namelen == 0) {
414                 xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
415                 goto out_relse;
416         }
417
418         /* Does the directory hash match? */
419         dname.name = dent->name;
420         dname.len = dent->namelen;
421         calc_hash = xfs_dir2_hashname(mp, &dname);
422         if (calc_hash != hash)
423                 xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
424
425 out_relse:
426         xfs_trans_brelse(ds->dargs.trans, bp);
427 out:
428         return error;
429 }
430
431 /*
432  * Is this unused entry either in the bestfree or smaller than all of
433  * them?  We've already checked that the bestfrees are sorted longest to
434  * shortest, and that there aren't any bogus entries.
435  */
436 STATIC void
437 xchk_directory_check_free_entry(
438         struct xfs_scrub                *sc,
439         xfs_dablk_t                     lblk,
440         struct xfs_dir2_data_free       *bf,
441         struct xfs_dir2_data_unused     *dup)
442 {
443         struct xfs_dir2_data_free       *dfp;
444         unsigned int                    dup_length;
445
446         dup_length = be16_to_cpu(dup->length);
447
448         /* Unused entry is shorter than any of the bestfrees */
449         if (dup_length < be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length))
450                 return;
451
452         for (dfp = &bf[XFS_DIR2_DATA_FD_COUNT - 1]; dfp >= bf; dfp--)
453                 if (dup_length == be16_to_cpu(dfp->length))
454                         return;
455
456         /* Unused entry should be in the bestfrees but wasn't found. */
457         xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
458 }
459
460 /* Check free space info in a directory data block. */
461 STATIC int
462 xchk_directory_data_bestfree(
463         struct xfs_scrub                *sc,
464         xfs_dablk_t                     lblk,
465         bool                            is_block)
466 {
467         struct xfs_dir2_data_unused     *dup;
468         struct xfs_dir2_data_free       *dfp;
469         struct xfs_buf                  *bp;
470         struct xfs_dir2_data_free       *bf;
471         struct xfs_mount                *mp = sc->mp;
472         u16                             tag;
473         unsigned int                    nr_bestfrees = 0;
474         unsigned int                    nr_frees = 0;
475         unsigned int                    smallest_bestfree;
476         int                             newlen;
477         unsigned int                    offset;
478         unsigned int                    end;
479         int                             error;
480
481         if (is_block) {
482                 /* dir block format */
483                 if (lblk != XFS_B_TO_FSBT(mp, XFS_DIR2_DATA_OFFSET))
484                         xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
485                 error = xfs_dir3_block_read(sc->tp, sc->ip, sc->ip->i_ino, &bp);
486         } else {
487                 /* dir data format */
488                 error = xfs_dir3_data_read(sc->tp, sc->ip, sc->ip->i_ino, lblk,
489                                 0, &bp);
490         }
491         if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
492                 goto out;
493         xchk_buffer_recheck(sc, bp);
494
495         /* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */
496
497         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
498                 goto out_buf;
499
500         /* Do the bestfrees correspond to actual free space? */
501         bf = xfs_dir2_data_bestfree_p(mp, bp->b_addr);
502         smallest_bestfree = UINT_MAX;
503         for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
504                 offset = be16_to_cpu(dfp->offset);
505                 if (offset == 0)
506                         continue;
507                 if (offset >= mp->m_dir_geo->blksize) {
508                         xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
509                         goto out_buf;
510                 }
511                 dup = bp->b_addr + offset;
512                 tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup));
513
514                 /* bestfree doesn't match the entry it points at? */
515                 if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG) ||
516                     be16_to_cpu(dup->length) != be16_to_cpu(dfp->length) ||
517                     tag != offset) {
518                         xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
519                         goto out_buf;
520                 }
521
522                 /* bestfree records should be ordered largest to smallest */
523                 if (smallest_bestfree < be16_to_cpu(dfp->length)) {
524                         xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
525                         goto out_buf;
526                 }
527
528                 smallest_bestfree = be16_to_cpu(dfp->length);
529                 nr_bestfrees++;
530         }
531
532         /* Make sure the bestfrees are actually the best free spaces. */
533         offset = mp->m_dir_geo->data_entry_offset;
534         end = xfs_dir3_data_end_offset(mp->m_dir_geo, bp->b_addr);
535
536         /* Iterate the entries, stopping when we hit or go past the end. */
537         while (offset < end) {
538                 dup = bp->b_addr + offset;
539
540                 /* Skip real entries */
541                 if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG)) {
542                         struct xfs_dir2_data_entry *dep = bp->b_addr + offset;
543
544                         newlen = xfs_dir2_data_entsize(mp, dep->namelen);
545                         if (newlen <= 0) {
546                                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
547                                                 lblk);
548                                 goto out_buf;
549                         }
550                         offset += newlen;
551                         continue;
552                 }
553
554                 /* Spot check this free entry */
555                 tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup));
556                 if (tag != offset) {
557                         xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
558                         goto out_buf;
559                 }
560
561                 /*
562                  * Either this entry is a bestfree or it's smaller than
563                  * any of the bestfrees.
564                  */
565                 xchk_directory_check_free_entry(sc, lblk, bf, dup);
566                 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
567                         goto out_buf;
568
569                 /* Move on. */
570                 newlen = be16_to_cpu(dup->length);
571                 if (newlen <= 0) {
572                         xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
573                         goto out_buf;
574                 }
575                 offset += newlen;
576                 if (offset <= end)
577                         nr_frees++;
578         }
579
580         /* We're required to fill all the space. */
581         if (offset != end)
582                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
583
584         /* Did we see at least as many free slots as there are bestfrees? */
585         if (nr_frees < nr_bestfrees)
586                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
587 out_buf:
588         xfs_trans_brelse(sc->tp, bp);
589 out:
590         return error;
591 }
592
593 /*
594  * Does the free space length in the free space index block ($len) match
595  * the longest length in the directory data block's bestfree array?
596  * Assume that we've already checked that the data block's bestfree
597  * array is in order.
598  */
599 STATIC void
600 xchk_directory_check_freesp(
601         struct xfs_scrub                *sc,
602         xfs_dablk_t                     lblk,
603         struct xfs_buf                  *dbp,
604         unsigned int                    len)
605 {
606         struct xfs_dir2_data_free       *dfp;
607
608         dfp = xfs_dir2_data_bestfree_p(sc->mp, dbp->b_addr);
609
610         if (len != be16_to_cpu(dfp->length))
611                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
612
613         if (len > 0 && be16_to_cpu(dfp->offset) == 0)
614                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
615 }
616
617 /* Check free space info in a directory leaf1 block. */
618 STATIC int
619 xchk_directory_leaf1_bestfree(
620         struct xfs_scrub                *sc,
621         struct xfs_da_args              *args,
622         xfs_dir2_db_t                   last_data_db,
623         xfs_dablk_t                     lblk)
624 {
625         struct xfs_dir3_icleaf_hdr      leafhdr;
626         struct xfs_dir2_leaf_tail       *ltp;
627         struct xfs_dir2_leaf            *leaf;
628         struct xfs_buf                  *dbp;
629         struct xfs_buf                  *bp;
630         struct xfs_da_geometry          *geo = sc->mp->m_dir_geo;
631         __be16                          *bestp;
632         __u16                           best;
633         __u32                           hash;
634         __u32                           lasthash = 0;
635         __u32                           bestcount;
636         unsigned int                    stale = 0;
637         int                             i;
638         int                             error;
639
640         /* Read the free space block. */
641         error = xfs_dir3_leaf_read(sc->tp, sc->ip, sc->ip->i_ino, lblk, &bp);
642         if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
643                 return error;
644         xchk_buffer_recheck(sc, bp);
645
646         leaf = bp->b_addr;
647         xfs_dir2_leaf_hdr_from_disk(sc->ip->i_mount, &leafhdr, leaf);
648         ltp = xfs_dir2_leaf_tail_p(geo, leaf);
649         bestcount = be32_to_cpu(ltp->bestcount);
650         bestp = xfs_dir2_leaf_bests_p(ltp);
651
652         if (xfs_has_crc(sc->mp)) {
653                 struct xfs_dir3_leaf_hdr        *hdr3 = bp->b_addr;
654
655                 if (hdr3->pad != cpu_to_be32(0))
656                         xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
657         }
658
659         /*
660          * There must be enough bestfree slots to cover all the directory data
661          * blocks that we scanned.  It is possible for there to be a hole
662          * between the last data block and i_disk_size.  This seems like an
663          * oversight to the scrub author, but as we have been writing out
664          * directories like this (and xfs_repair doesn't mind them) for years,
665          * that's what we have to check.
666          */
667         if (bestcount != last_data_db + 1) {
668                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
669                 goto out;
670         }
671
672         /* Is the leaf count even remotely sane? */
673         if (leafhdr.count > geo->leaf_max_ents) {
674                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
675                 goto out;
676         }
677
678         /* Leaves and bests don't overlap in leaf format. */
679         if ((char *)&leafhdr.ents[leafhdr.count] > (char *)bestp) {
680                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
681                 goto out;
682         }
683
684         /* Check hash value order, count stale entries.  */
685         for (i = 0; i < leafhdr.count; i++) {
686                 hash = be32_to_cpu(leafhdr.ents[i].hashval);
687                 if (i > 0 && lasthash > hash)
688                         xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
689                 lasthash = hash;
690                 if (leafhdr.ents[i].address ==
691                     cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
692                         stale++;
693         }
694         if (leafhdr.stale != stale)
695                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
696         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
697                 goto out;
698
699         /* Check all the bestfree entries. */
700         for (i = 0; i < bestcount; i++, bestp++) {
701                 best = be16_to_cpu(*bestp);
702                 error = xfs_dir3_data_read(sc->tp, sc->ip, args->owner,
703                                 xfs_dir2_db_to_da(args->geo, i),
704                                 XFS_DABUF_MAP_HOLE_OK, &dbp);
705                 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk,
706                                 &error))
707                         break;
708
709                 if (!dbp) {
710                         if (best != NULLDATAOFF) {
711                                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
712                                                 lblk);
713                                 break;
714                         }
715                         continue;
716                 }
717
718                 if (best == NULLDATAOFF)
719                         xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
720                 else
721                         xchk_directory_check_freesp(sc, lblk, dbp, best);
722                 xfs_trans_brelse(sc->tp, dbp);
723                 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
724                         break;
725         }
726 out:
727         xfs_trans_brelse(sc->tp, bp);
728         return error;
729 }
730
731 /* Check free space info in a directory freespace block. */
732 STATIC int
733 xchk_directory_free_bestfree(
734         struct xfs_scrub                *sc,
735         struct xfs_da_args              *args,
736         xfs_dablk_t                     lblk)
737 {
738         struct xfs_dir3_icfree_hdr      freehdr;
739         struct xfs_buf                  *dbp;
740         struct xfs_buf                  *bp;
741         __u16                           best;
742         unsigned int                    stale = 0;
743         int                             i;
744         int                             error;
745
746         /* Read the free space block */
747         error = xfs_dir2_free_read(sc->tp, sc->ip, sc->ip->i_ino, lblk, &bp);
748         if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
749                 return error;
750         xchk_buffer_recheck(sc, bp);
751
752         if (xfs_has_crc(sc->mp)) {
753                 struct xfs_dir3_free_hdr        *hdr3 = bp->b_addr;
754
755                 if (hdr3->pad != cpu_to_be32(0))
756                         xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
757         }
758
759         /* Check all the entries. */
760         xfs_dir2_free_hdr_from_disk(sc->ip->i_mount, &freehdr, bp->b_addr);
761         for (i = 0; i < freehdr.nvalid; i++) {
762                 best = be16_to_cpu(freehdr.bests[i]);
763                 if (best == NULLDATAOFF) {
764                         stale++;
765                         continue;
766                 }
767                 error = xfs_dir3_data_read(sc->tp, sc->ip, args->owner,
768                                 (freehdr.firstdb + i) * args->geo->fsbcount,
769                                 0, &dbp);
770                 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk,
771                                 &error))
772                         goto out;
773                 xchk_directory_check_freesp(sc, lblk, dbp, best);
774                 xfs_trans_brelse(sc->tp, dbp);
775         }
776
777         if (freehdr.nused + stale != freehdr.nvalid)
778                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
779 out:
780         xfs_trans_brelse(sc->tp, bp);
781         return error;
782 }
783
784 /* Check free space information in directories. */
785 STATIC int
786 xchk_directory_blocks(
787         struct xfs_scrub        *sc)
788 {
789         struct xfs_bmbt_irec    got;
790         struct xfs_da_args      args = {
791                 .dp             = sc->ip,
792                 .whichfork      = XFS_DATA_FORK,
793                 .geo            = sc->mp->m_dir_geo,
794                 .trans          = sc->tp,
795                 .owner          = sc->ip->i_ino,
796         };
797         struct xfs_ifork        *ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
798         struct xfs_mount        *mp = sc->mp;
799         xfs_fileoff_t           leaf_lblk;
800         xfs_fileoff_t           free_lblk;
801         xfs_fileoff_t           lblk;
802         struct xfs_iext_cursor  icur;
803         xfs_dablk_t             dabno;
804         xfs_dir2_db_t           last_data_db = 0;
805         bool                    found;
806         bool                    is_block = false;
807         int                     error;
808
809         /* Ignore local format directories. */
810         if (ifp->if_format != XFS_DINODE_FMT_EXTENTS &&
811             ifp->if_format != XFS_DINODE_FMT_BTREE)
812                 return 0;
813
814         lblk = XFS_B_TO_FSB(mp, XFS_DIR2_DATA_OFFSET);
815         leaf_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_LEAF_OFFSET);
816         free_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_FREE_OFFSET);
817
818         /* Is this a block dir? */
819         if (xfs_dir2_format(&args, &error) == XFS_DIR2_FMT_BLOCK)
820                 is_block = true;
821         if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
822                 goto out;
823
824         /* Iterate all the data extents in the directory... */
825         found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
826         while (found && !(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
827                 /* No more data blocks... */
828                 if (got.br_startoff >= leaf_lblk)
829                         break;
830
831                 /*
832                  * Check each data block's bestfree data.
833                  *
834                  * Iterate all the fsbcount-aligned block offsets in
835                  * this directory.  The directory block reading code is
836                  * smart enough to do its own bmap lookups to handle
837                  * discontiguous directory blocks.  When we're done
838                  * with the extent record, re-query the bmap at the
839                  * next fsbcount-aligned offset to avoid redundant
840                  * block checks.
841                  */
842                 for (lblk = roundup((xfs_dablk_t)got.br_startoff,
843                                 args.geo->fsbcount);
844                      lblk < got.br_startoff + got.br_blockcount;
845                      lblk += args.geo->fsbcount) {
846                         last_data_db = xfs_dir2_da_to_db(args.geo, lblk);
847                         error = xchk_directory_data_bestfree(sc, lblk,
848                                         is_block);
849                         if (error)
850                                 goto out;
851                 }
852                 dabno = got.br_startoff + got.br_blockcount;
853                 lblk = roundup(dabno, args.geo->fsbcount);
854                 found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
855         }
856
857         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
858                 goto out;
859
860         /* Look for a leaf1 block, which has free info. */
861         if (xfs_iext_lookup_extent(sc->ip, ifp, leaf_lblk, &icur, &got) &&
862             got.br_startoff == leaf_lblk &&
863             got.br_blockcount == args.geo->fsbcount &&
864             !xfs_iext_next_extent(ifp, &icur, &got)) {
865                 if (is_block) {
866                         xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
867                         goto out;
868                 }
869                 error = xchk_directory_leaf1_bestfree(sc, &args, last_data_db,
870                                 leaf_lblk);
871                 if (error)
872                         goto out;
873         }
874
875         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
876                 goto out;
877
878         /* Scan for free blocks */
879         lblk = free_lblk;
880         found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
881         while (found && !(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
882                 /*
883                  * Dirs can't have blocks mapped above 2^32.
884                  * Single-block dirs shouldn't even be here.
885                  */
886                 lblk = got.br_startoff;
887                 if (lblk & ~0xFFFFFFFFULL) {
888                         xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
889                         goto out;
890                 }
891                 if (is_block) {
892                         xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
893                         goto out;
894                 }
895
896                 /*
897                  * Check each dir free block's bestfree data.
898                  *
899                  * Iterate all the fsbcount-aligned block offsets in
900                  * this directory.  The directory block reading code is
901                  * smart enough to do its own bmap lookups to handle
902                  * discontiguous directory blocks.  When we're done
903                  * with the extent record, re-query the bmap at the
904                  * next fsbcount-aligned offset to avoid redundant
905                  * block checks.
906                  */
907                 for (lblk = roundup((xfs_dablk_t)got.br_startoff,
908                                 args.geo->fsbcount);
909                      lblk < got.br_startoff + got.br_blockcount;
910                      lblk += args.geo->fsbcount) {
911                         error = xchk_directory_free_bestfree(sc, &args,
912                                         lblk);
913                         if (error)
914                                 goto out;
915                 }
916                 dabno = got.br_startoff + got.br_blockcount;
917                 lblk = roundup(dabno, args.geo->fsbcount);
918                 found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
919         }
920 out:
921         return error;
922 }
923
924 /*
925  * Revalidate a dirent that we collected in the past but couldn't check because
926  * of lock contention.  Returns 0 if the dirent is still valid, -ENOENT if it
927  * has gone away on us, or a negative errno.
928  */
929 STATIC int
930 xchk_dir_revalidate_dirent(
931         struct xchk_dir         *sd,
932         const struct xfs_name   *xname,
933         xfs_ino_t               ino)
934 {
935         struct xfs_scrub        *sc = sd->sc;
936         xfs_ino_t               child_ino;
937         int                     error;
938
939         /*
940          * Look up the directory entry.  If we get -ENOENT, the directory entry
941          * went away and there's nothing to revalidate.  Return any other
942          * error.
943          */
944         error = xchk_dir_lookup(sc, sc->ip, xname, &child_ino);
945         if (error)
946                 return error;
947
948         /* The inode number changed, nothing to revalidate. */
949         if (ino != child_ino)
950                 return -ENOENT;
951
952         return 0;
953 }
954
955 /*
956  * Check a directory entry's parent pointers the slow way, which means we cycle
957  * locks a bunch and put up with revalidation until we get it done.
958  */
959 STATIC int
960 xchk_dir_slow_dirent(
961         struct xchk_dir         *sd,
962         struct xchk_dirent      *dirent,
963         const struct xfs_name   *xname)
964 {
965         struct xfs_scrub        *sc = sd->sc;
966         struct xfs_inode        *ip;
967         unsigned int            lockmode;
968         int                     error;
969
970         /* Check that the deferred dirent still exists. */
971         if (sd->need_revalidate) {
972                 error = xchk_dir_revalidate_dirent(sd, xname, dirent->ino);
973                 if (error == -ENOENT)
974                         return 0;
975                 if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
976                                         &error))
977                         return error;
978         }
979
980         error = xchk_iget(sc, dirent->ino, &ip);
981         if (error == -EINVAL || error == -ENOENT) {
982                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
983                 return 0;
984         }
985         if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
986                 return error;
987
988         /*
989          * If we can grab both IOLOCK and ILOCK of the alleged child, we can
990          * proceed with the validation.
991          */
992         lockmode = xchk_dir_lock_child(sc, ip);
993         if (lockmode) {
994                 trace_xchk_dir_slowpath(sc->ip, xname, ip->i_ino);
995                 goto check_pptr;
996         }
997
998         /*
999          * We couldn't lock the child file.  Drop all the locks and try to
1000          * get them again, one at a time.
1001          */
1002         xchk_iunlock(sc, sc->ilock_flags);
1003         sd->need_revalidate = true;
1004
1005         trace_xchk_dir_ultraslowpath(sc->ip, xname, ip->i_ino);
1006
1007         error = xchk_dir_trylock_for_pptrs(sc, ip, &lockmode);
1008         if (error)
1009                 goto out_rele;
1010
1011         /* Revalidate, since we just cycled the locks. */
1012         error = xchk_dir_revalidate_dirent(sd, xname, dirent->ino);
1013         if (error == -ENOENT) {
1014                 error = 0;
1015                 goto out_unlock;
1016         }
1017         if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
1018                 goto out_unlock;
1019
1020 check_pptr:
1021         error = xchk_dir_parent_pointer(sd, xname, ip);
1022 out_unlock:
1023         xfs_iunlock(ip, lockmode);
1024 out_rele:
1025         xchk_irele(sc, ip);
1026         return error;
1027 }
1028
1029 /* Check all the dirents that we deferred the first time around. */
1030 STATIC int
1031 xchk_dir_finish_slow_dirents(
1032         struct xchk_dir         *sd)
1033 {
1034         xfarray_idx_t           array_cur;
1035         int                     error;
1036
1037         foreach_xfarray_idx(sd->dir_entries, array_cur) {
1038                 struct xchk_dirent      dirent;
1039
1040                 if (sd->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
1041                         return 0;
1042
1043                 error = xfarray_load(sd->dir_entries, array_cur, &dirent);
1044                 if (error)
1045                         return error;
1046
1047                 error = xfblob_loadname(sd->dir_names, dirent.name_cookie,
1048                                 &sd->xname, dirent.namelen);
1049                 if (error)
1050                         return error;
1051
1052                 error = xchk_dir_slow_dirent(sd, &dirent, &sd->xname);
1053                 if (error)
1054                         return error;
1055         }
1056
1057         return 0;
1058 }
1059
1060 /* Scrub a whole directory. */
1061 int
1062 xchk_directory(
1063         struct xfs_scrub        *sc)
1064 {
1065         struct xchk_dir         *sd;
1066         int                     error;
1067
1068         if (!S_ISDIR(VFS_I(sc->ip)->i_mode))
1069                 return -ENOENT;
1070
1071         if (xchk_file_looks_zapped(sc, XFS_SICK_INO_DIR_ZAPPED)) {
1072                 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
1073                 return 0;
1074         }
1075
1076         /* Plausible size? */
1077         if (sc->ip->i_disk_size < xfs_dir2_sf_hdr_size(0)) {
1078                 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
1079                 return 0;
1080         }
1081
1082         /* Check directory tree structure */
1083         error = xchk_da_btree(sc, XFS_DATA_FORK, xchk_dir_rec, NULL);
1084         if (error)
1085                 return error;
1086
1087         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
1088                 return 0;
1089
1090         /* Check the freespace. */
1091         error = xchk_directory_blocks(sc);
1092         if (error)
1093                 return error;
1094
1095         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
1096                 return 0;
1097
1098         sd = kvzalloc(sizeof(struct xchk_dir), XCHK_GFP_FLAGS);
1099         if (!sd)
1100                 return -ENOMEM;
1101         sd->sc = sc;
1102         sd->xname.name = sd->namebuf;
1103
1104         if (xfs_has_parent(sc->mp)) {
1105                 char            *descr;
1106
1107                 /*
1108                  * Set up some staging memory for dirents that we can't check
1109                  * due to locking contention.
1110                  */
1111                 descr = xchk_xfile_ino_descr(sc, "slow directory entries");
1112                 error = xfarray_create(descr, 0, sizeof(struct xchk_dirent),
1113                                 &sd->dir_entries);
1114                 kfree(descr);
1115                 if (error)
1116                         goto out_sd;
1117
1118                 descr = xchk_xfile_ino_descr(sc, "slow directory entry names");
1119                 error = xfblob_create(descr, &sd->dir_names);
1120                 kfree(descr);
1121                 if (error)
1122                         goto out_entries;
1123         }
1124
1125         /* Look up every name in this directory by hash. */
1126         error = xchk_dir_walk(sc, sc->ip, xchk_dir_actor, sd);
1127         if (error == -ECANCELED)
1128                 error = 0;
1129         if (error)
1130                 goto out_names;
1131
1132         if (xfs_has_parent(sc->mp)) {
1133                 error = xchk_dir_finish_slow_dirents(sd);
1134                 if (error == -ETIMEDOUT) {
1135                         /* Couldn't grab a lock, scrub was marked incomplete */
1136                         error = 0;
1137                         goto out_names;
1138                 }
1139                 if (error)
1140                         goto out_names;
1141         }
1142
1143 out_names:
1144         if (sd->dir_names)
1145                 xfblob_destroy(sd->dir_names);
1146 out_entries:
1147         if (sd->dir_entries)
1148                 xfarray_destroy(sd->dir_entries);
1149 out_sd:
1150         kvfree(sd);
1151         if (error)
1152                 return error;
1153
1154         /* If the dir is clean, it is clearly not zapped. */
1155         xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_DIR_ZAPPED);
1156         return 0;
1157 }
1158
1159 /*
1160  * Decide if this directory has been zapped to satisfy the inode and ifork
1161  * verifiers.  Checking and repairing should be postponed until the directory
1162  * is fixed.
1163  */
1164 bool
1165 xchk_dir_looks_zapped(
1166         struct xfs_inode        *dp)
1167 {
1168         /* Repair zapped this dir's data fork a short time ago */
1169         if (xfs_ifork_zapped(dp, XFS_DATA_FORK))
1170                 return true;
1171
1172         /*
1173          * If the dinode repair found a bad data fork, it will reset the fork
1174          * to extents format with zero records and wait for the bmapbtd
1175          * scrubber to reconstruct the block mappings.  Directories always
1176          * contain some content, so this is a clear sign of a zapped directory.
1177          * The state checked by xfs_ifork_zapped is not persisted, so this is
1178          * the secondary strategy if repairs are interrupted by a crash or an
1179          * unmount.
1180          */
1181         return dp->i_df.if_format == XFS_DINODE_FMT_EXTENTS &&
1182                dp->i_df.if_nextents == 0;
1183 }
This page took 0.09642 seconds and 4 git commands to generate.