1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_trans.h"
14 #include "xfs_btree.h"
16 #include "xfs_refcount.h"
19 #include "xfs_alloc.h"
20 #include "xfs_alloc_btree.h"
21 #include "xfs_ialloc_btree.h"
22 #include "xfs_refcount_btree.h"
23 #include "scrub/scrub.h"
24 #include "scrub/common.h"
25 #include "scrub/btree.h"
26 #include "scrub/bitmap.h"
27 #include "scrub/agb_bitmap.h"
28 #include "scrub/repair.h"
31 * Set us up to scrub reverse mapping btrees.
37 if (xchk_need_intent_drain(sc))
38 xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
40 if (xchk_could_repair(sc)) {
43 error = xrep_setup_ag_rmapbt(sc);
48 return xchk_setup_ag_btree(sc, false);
51 /* Reverse-mapping scrubber. */
55 * The furthest-reaching of the rmapbt records that we've already
56 * processed. This enables us to detect overlapping records for space
57 * allocations that cannot be shared.
59 struct xfs_rmap_irec overlap_rec;
62 * The previous rmapbt record, so that we can check for two records
65 struct xfs_rmap_irec prev_rec;
67 /* Bitmaps containing all blocks for each type of AG metadata. */
68 struct xagb_bitmap fs_owned;
69 struct xagb_bitmap log_owned;
70 struct xagb_bitmap ag_owned;
71 struct xagb_bitmap inobt_owned;
72 struct xagb_bitmap refcbt_owned;
74 /* Did we complete the AG space metadata bitmaps? */
75 bool bitmaps_complete;
78 /* Cross-reference a rmap against the refcount btree. */
80 xchk_rmapbt_xref_refc(
82 struct xfs_rmap_irec *irec)
92 if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm))
95 non_inode = XFS_RMAP_NON_INODE_OWNER(irec->rm_owner);
96 is_bmbt = irec->rm_flags & XFS_RMAP_BMBT_BLOCK;
97 is_attr = irec->rm_flags & XFS_RMAP_ATTR_FORK;
98 is_unwritten = irec->rm_flags & XFS_RMAP_UNWRITTEN;
100 /* If this is shared, must be a data fork extent. */
101 error = xfs_refcount_find_shared(sc->sa.refc_cur, irec->rm_startblock,
102 irec->rm_blockcount, &fbno, &flen, false);
103 if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
105 if (flen != 0 && (non_inode || is_attr || is_bmbt || is_unwritten))
106 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
109 /* Cross-reference with the other btrees. */
112 struct xfs_scrub *sc,
113 struct xfs_rmap_irec *irec)
115 xfs_agblock_t agbno = irec->rm_startblock;
116 xfs_extlen_t len = irec->rm_blockcount;
118 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
121 xchk_xref_is_used_space(sc, agbno, len);
122 if (irec->rm_owner == XFS_RMAP_OWN_INODES)
123 xchk_xref_is_inode_chunk(sc, agbno, len);
125 xchk_xref_is_not_inode_chunk(sc, agbno, len);
126 if (irec->rm_owner == XFS_RMAP_OWN_COW)
127 xchk_xref_is_cow_staging(sc, irec->rm_startblock,
128 irec->rm_blockcount);
130 xchk_rmapbt_xref_refc(sc, irec);
134 * Check for bogus UNWRITTEN flags in the rmapbt node block keys.
136 * In reverse mapping records, the file mapping extent state
137 * (XFS_RMAP_OFF_UNWRITTEN) is a record attribute, not a key field. It is not
138 * involved in lookups in any way. In older kernels, the functions that
139 * convert rmapbt records to keys forgot to filter out the extent state bit,
140 * even though the key comparison functions have filtered the flag correctly.
141 * If we spot an rmap key with the unwritten bit set in rm_offset, we should
142 * mark the btree as needing optimization to rebuild the btree without those
146 xchk_rmapbt_check_unwritten_in_keyflags(
147 struct xchk_btree *bs)
149 struct xfs_scrub *sc = bs->sc;
150 struct xfs_btree_cur *cur = bs->cur;
151 struct xfs_btree_block *keyblock;
152 union xfs_btree_key *lkey, *hkey;
153 __be64 badflag = cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
156 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_PREEN)
159 for (level = 1; level < cur->bc_nlevels; level++) {
163 /* Only check the first time we've seen this node block. */
164 if (cur->bc_levels[level].ptr > 1)
167 keyblock = xfs_btree_get_block(cur, level, &bp);
168 for (ptr = 1; ptr <= be16_to_cpu(keyblock->bb_numrecs); ptr++) {
169 lkey = xfs_btree_key_addr(cur, ptr, keyblock);
171 if (lkey->rmap.rm_offset & badflag) {
172 xchk_btree_set_preen(sc, cur, level);
176 hkey = xfs_btree_high_key_addr(cur, ptr, keyblock);
177 if (hkey->rmap.rm_offset & badflag) {
178 xchk_btree_set_preen(sc, cur, level);
186 xchk_rmapbt_is_shareable(
187 struct xfs_scrub *sc,
188 const struct xfs_rmap_irec *irec)
190 if (!xfs_has_reflink(sc->mp))
192 if (XFS_RMAP_NON_INODE_OWNER(irec->rm_owner))
194 if (irec->rm_flags & (XFS_RMAP_BMBT_BLOCK | XFS_RMAP_ATTR_FORK |
200 /* Flag failures for records that overlap but cannot. */
202 xchk_rmapbt_check_overlapping(
203 struct xchk_btree *bs,
204 struct xchk_rmap *cr,
205 const struct xfs_rmap_irec *irec)
207 xfs_agblock_t pnext, inext;
209 if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
212 /* No previous record? */
213 if (cr->overlap_rec.rm_blockcount == 0)
216 /* Do overlap_rec and irec overlap? */
217 pnext = cr->overlap_rec.rm_startblock + cr->overlap_rec.rm_blockcount;
218 if (pnext <= irec->rm_startblock)
221 /* Overlap is only allowed if both records are data fork mappings. */
222 if (!xchk_rmapbt_is_shareable(bs->sc, &cr->overlap_rec) ||
223 !xchk_rmapbt_is_shareable(bs->sc, irec))
224 xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
226 /* Save whichever rmap record extends furthest. */
227 inext = irec->rm_startblock + irec->rm_blockcount;
232 memcpy(&cr->overlap_rec, irec, sizeof(struct xfs_rmap_irec));
235 /* Decide if two reverse-mapping records can be merged. */
238 struct xchk_rmap *cr,
239 const struct xfs_rmap_irec *r2)
241 const struct xfs_rmap_irec *r1 = &cr->prev_rec;
243 /* Ignore if prev_rec is not yet initialized. */
244 if (cr->prev_rec.rm_blockcount == 0)
247 if (r1->rm_owner != r2->rm_owner)
249 if (r1->rm_startblock + r1->rm_blockcount != r2->rm_startblock)
251 if ((unsigned long long)r1->rm_blockcount + r2->rm_blockcount >
254 if (XFS_RMAP_NON_INODE_OWNER(r2->rm_owner))
256 /* must be an inode owner below here */
257 if (r1->rm_flags != r2->rm_flags)
259 if (r1->rm_flags & XFS_RMAP_BMBT_BLOCK)
261 return r1->rm_offset + r1->rm_blockcount == r2->rm_offset;
264 /* Flag failures for records that could be merged. */
266 xchk_rmapbt_check_mergeable(
267 struct xchk_btree *bs,
268 struct xchk_rmap *cr,
269 const struct xfs_rmap_irec *irec)
271 if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
274 if (xchk_rmap_mergeable(cr, irec))
275 xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
277 memcpy(&cr->prev_rec, irec, sizeof(struct xfs_rmap_irec));
280 /* Compare an rmap for AG metadata against the metadata walk. */
282 xchk_rmapbt_mark_bitmap(
283 struct xchk_btree *bs,
284 struct xchk_rmap *cr,
285 const struct xfs_rmap_irec *irec)
287 struct xfs_scrub *sc = bs->sc;
288 struct xagb_bitmap *bmp = NULL;
289 xfs_extlen_t fsbcount = irec->rm_blockcount;
292 * Skip corrupt records. It is essential that we detect records in the
293 * btree that cannot overlap but do, flag those as CORRUPT, and skip
294 * the bitmap comparison to avoid generating false XCORRUPT reports.
296 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
300 * If the AG metadata walk didn't complete, there's no point in
301 * comparing against partial results.
303 if (!cr->bitmaps_complete)
306 switch (irec->rm_owner) {
307 case XFS_RMAP_OWN_FS:
310 case XFS_RMAP_OWN_LOG:
311 bmp = &cr->log_owned;
313 case XFS_RMAP_OWN_AG:
316 case XFS_RMAP_OWN_INOBT:
317 bmp = &cr->inobt_owned;
319 case XFS_RMAP_OWN_REFC:
320 bmp = &cr->refcbt_owned;
327 if (xagb_bitmap_test(bmp, irec->rm_startblock, &fsbcount)) {
329 * The start of this reverse mapping corresponds to a set
330 * region in the bitmap. If the mapping covers more area than
331 * the set region, then it covers space that wasn't found by
332 * the AG metadata walk.
334 if (fsbcount < irec->rm_blockcount)
335 xchk_btree_xref_set_corrupt(bs->sc,
336 bs->sc->sa.rmap_cur, 0);
339 * The start of this reverse mapping does not correspond to a
340 * completely set region in the bitmap. The region wasn't
341 * fully set by walking the AG metadata, so this is a
342 * cross-referencing corruption.
344 xchk_btree_xref_set_corrupt(bs->sc, bs->sc->sa.rmap_cur, 0);
347 /* Unset the region so that we can detect missing rmap records. */
348 return xagb_bitmap_clear(bmp, irec->rm_startblock, irec->rm_blockcount);
351 /* Scrub an rmapbt record. */
354 struct xchk_btree *bs,
355 const union xfs_btree_rec *rec)
357 struct xchk_rmap *cr = bs->private;
358 struct xfs_rmap_irec irec;
360 if (xfs_rmap_btrec_to_irec(rec, &irec) != NULL ||
361 xfs_rmap_check_irec(to_perag(bs->cur->bc_group), &irec) != NULL) {
362 xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
366 xchk_rmapbt_check_unwritten_in_keyflags(bs);
367 xchk_rmapbt_check_mergeable(bs, cr, &irec);
368 xchk_rmapbt_check_overlapping(bs, cr, &irec);
369 xchk_rmapbt_xref(bs->sc, &irec);
371 return xchk_rmapbt_mark_bitmap(bs, cr, &irec);
374 /* Add an AGFL block to the rmap list. */
376 xchk_rmapbt_walk_agfl(
377 struct xfs_mount *mp,
381 struct xagb_bitmap *bitmap = priv;
383 return xagb_bitmap_set(bitmap, agbno, 1);
387 * Set up bitmaps mapping all the AG metadata to compare with the rmapbt
390 * Grab our own btree cursors here if the scrub setup function didn't give us a
391 * btree cursor due to reports of poor health. We need to find out if the
392 * rmapbt disagrees with primary metadata btrees to tag the rmapbt as being
396 xchk_rmapbt_walk_ag_metadata(
397 struct xfs_scrub *sc,
398 struct xchk_rmap *cr)
400 struct xfs_mount *mp = sc->mp;
401 struct xfs_buf *agfl_bp;
402 struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
403 struct xfs_btree_cur *cur;
406 /* OWN_FS: AG headers */
407 error = xagb_bitmap_set(&cr->fs_owned, XFS_SB_BLOCK(mp),
408 XFS_AGFL_BLOCK(mp) - XFS_SB_BLOCK(mp) + 1);
412 /* OWN_LOG: Internal log */
413 if (xfs_ag_contains_log(mp, pag_agno(sc->sa.pag))) {
414 error = xagb_bitmap_set(&cr->log_owned,
415 XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart),
416 mp->m_sb.sb_logblocks);
421 /* OWN_AG: bnobt, cntbt, rmapbt, and AGFL */
422 cur = sc->sa.bno_cur;
424 cur = xfs_bnobt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
426 error = xagb_bitmap_set_btblocks(&cr->ag_owned, cur);
427 if (cur != sc->sa.bno_cur)
428 xfs_btree_del_cursor(cur, error);
432 cur = sc->sa.cnt_cur;
434 cur = xfs_cntbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
436 error = xagb_bitmap_set_btblocks(&cr->ag_owned, cur);
437 if (cur != sc->sa.cnt_cur)
438 xfs_btree_del_cursor(cur, error);
442 error = xagb_bitmap_set_btblocks(&cr->ag_owned, sc->sa.rmap_cur);
446 error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
450 error = xfs_agfl_walk(sc->mp, agf, agfl_bp, xchk_rmapbt_walk_agfl,
452 xfs_trans_brelse(sc->tp, agfl_bp);
456 /* OWN_INOBT: inobt, finobt */
457 cur = sc->sa.ino_cur;
459 cur = xfs_inobt_init_cursor(sc->sa.pag, sc->tp, sc->sa.agi_bp);
460 error = xagb_bitmap_set_btblocks(&cr->inobt_owned, cur);
461 if (cur != sc->sa.ino_cur)
462 xfs_btree_del_cursor(cur, error);
466 if (xfs_has_finobt(sc->mp)) {
467 cur = sc->sa.fino_cur;
469 cur = xfs_finobt_init_cursor(sc->sa.pag, sc->tp,
471 error = xagb_bitmap_set_btblocks(&cr->inobt_owned, cur);
472 if (cur != sc->sa.fino_cur)
473 xfs_btree_del_cursor(cur, error);
478 /* OWN_REFC: refcountbt */
479 if (xfs_has_reflink(sc->mp)) {
480 cur = sc->sa.refc_cur;
482 cur = xfs_refcountbt_init_cursor(sc->mp, sc->tp,
483 sc->sa.agf_bp, sc->sa.pag);
484 error = xagb_bitmap_set_btblocks(&cr->refcbt_owned, cur);
485 if (cur != sc->sa.refc_cur)
486 xfs_btree_del_cursor(cur, error);
493 * If there's an error, set XFAIL and disable the bitmap
494 * cross-referencing checks, but proceed with the scrub anyway.
497 xchk_btree_xref_process_error(sc, sc->sa.rmap_cur,
498 sc->sa.rmap_cur->bc_nlevels - 1, &error);
500 cr->bitmaps_complete = true;
505 * Check for set regions in the bitmaps; if there are any, the rmap records do
506 * not describe all the AG metadata.
509 xchk_rmapbt_check_bitmaps(
510 struct xfs_scrub *sc,
511 struct xchk_rmap *cr)
513 struct xfs_btree_cur *cur = sc->sa.rmap_cur;
516 if (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
517 XFS_SCRUB_OFLAG_XFAIL))
521 level = cur->bc_nlevels - 1;
524 * Any bitmap with bits still set indicates that the reverse mapping
525 * doesn't cover the entire primary structure.
527 if (xagb_bitmap_hweight(&cr->fs_owned) != 0)
528 xchk_btree_xref_set_corrupt(sc, cur, level);
530 if (xagb_bitmap_hweight(&cr->log_owned) != 0)
531 xchk_btree_xref_set_corrupt(sc, cur, level);
533 if (xagb_bitmap_hweight(&cr->ag_owned) != 0)
534 xchk_btree_xref_set_corrupt(sc, cur, level);
536 if (xagb_bitmap_hweight(&cr->inobt_owned) != 0)
537 xchk_btree_xref_set_corrupt(sc, cur, level);
539 if (xagb_bitmap_hweight(&cr->refcbt_owned) != 0)
540 xchk_btree_xref_set_corrupt(sc, cur, level);
543 /* Scrub the rmap btree for some AG. */
546 struct xfs_scrub *sc)
548 struct xchk_rmap *cr;
551 cr = kzalloc(sizeof(struct xchk_rmap), XCHK_GFP_FLAGS);
555 xagb_bitmap_init(&cr->fs_owned);
556 xagb_bitmap_init(&cr->log_owned);
557 xagb_bitmap_init(&cr->ag_owned);
558 xagb_bitmap_init(&cr->inobt_owned);
559 xagb_bitmap_init(&cr->refcbt_owned);
561 error = xchk_rmapbt_walk_ag_metadata(sc, cr);
565 error = xchk_btree(sc, sc->sa.rmap_cur, xchk_rmapbt_rec,
566 &XFS_RMAP_OINFO_AG, cr);
570 xchk_rmapbt_check_bitmaps(sc, cr);
573 xagb_bitmap_destroy(&cr->refcbt_owned);
574 xagb_bitmap_destroy(&cr->inobt_owned);
575 xagb_bitmap_destroy(&cr->ag_owned);
576 xagb_bitmap_destroy(&cr->log_owned);
577 xagb_bitmap_destroy(&cr->fs_owned);
582 /* xref check that the extent is owned only by a given owner */
584 xchk_xref_is_only_owned_by(
585 struct xfs_scrub *sc,
588 const struct xfs_owner_info *oinfo)
590 struct xfs_rmap_matches res;
593 if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
596 error = xfs_rmap_count_owners(sc->sa.rmap_cur, bno, len, oinfo, &res);
597 if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
599 if (res.matches != 1)
600 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
601 if (res.bad_non_owner_matches)
602 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
603 if (res.non_owner_matches)
604 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
607 /* xref check that the extent is not owned by a given owner */
609 xchk_xref_is_not_owned_by(
610 struct xfs_scrub *sc,
613 const struct xfs_owner_info *oinfo)
615 struct xfs_rmap_matches res;
618 if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
621 error = xfs_rmap_count_owners(sc->sa.rmap_cur, bno, len, oinfo, &res);
622 if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
624 if (res.matches != 0)
625 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
626 if (res.bad_non_owner_matches)
627 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
630 /* xref check that the extent has no reverse mapping at all */
632 xchk_xref_has_no_owner(
633 struct xfs_scrub *sc,
637 enum xbtree_recpacking outcome;
640 if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
643 error = xfs_rmap_has_records(sc->sa.rmap_cur, bno, len, &outcome);
644 if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
646 if (outcome != XBTREE_RECPACKING_EMPTY)
647 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);