1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2017 Oracle. All Rights Reserved.
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_btree.h"
13 #include "scrub/scrub.h"
14 #include "scrub/common.h"
15 #include "scrub/btree.h"
16 #include "scrub/trace.h"
21 * Check for btree operation errors. See the section about handling
22 * operational errors in common.c.
25 __xchk_btree_process_error(
27 struct xfs_btree_cur *cur,
38 /* Used to restart an op with deadlock avoidance. */
39 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
43 /* Note the badness but don't abort. */
44 sc->sm->sm_flags |= errflag;
48 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
49 trace_xchk_ifork_btree_op_error(sc, cur, level,
52 trace_xchk_btree_op_error(sc, cur, level,
60 xchk_btree_process_error(
62 struct xfs_btree_cur *cur,
66 return __xchk_btree_process_error(sc, cur, level, error,
67 XFS_SCRUB_OFLAG_CORRUPT, __return_address);
71 xchk_btree_xref_process_error(
73 struct xfs_btree_cur *cur,
77 return __xchk_btree_process_error(sc, cur, level, error,
78 XFS_SCRUB_OFLAG_XFAIL, __return_address);
81 /* Record btree block corruption. */
83 __xchk_btree_set_corrupt(
85 struct xfs_btree_cur *cur,
90 sc->sm->sm_flags |= errflag;
92 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
93 trace_xchk_ifork_btree_error(sc, cur, level,
96 trace_xchk_btree_error(sc, cur, level,
101 xchk_btree_set_corrupt(
102 struct xfs_scrub *sc,
103 struct xfs_btree_cur *cur,
106 __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
111 xchk_btree_xref_set_corrupt(
112 struct xfs_scrub *sc,
113 struct xfs_btree_cur *cur,
116 __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
121 * Make sure this record is in order and doesn't stray outside of the parent
126 struct xchk_btree *bs)
128 struct xfs_btree_cur *cur = bs->cur;
129 union xfs_btree_rec *rec;
130 union xfs_btree_key key;
131 union xfs_btree_key hkey;
132 union xfs_btree_key *keyp;
133 struct xfs_btree_block *block;
134 struct xfs_btree_block *keyblock;
137 block = xfs_btree_get_block(cur, 0, &bp);
138 rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
140 trace_xchk_btree_rec(bs->sc, cur, 0);
142 /* If this isn't the first record, are they in order? */
143 if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
144 xchk_btree_set_corrupt(bs->sc, cur, 0);
145 bs->firstrec = false;
146 memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
148 if (cur->bc_nlevels == 1)
151 /* Is this at least as large as the parent low key? */
152 cur->bc_ops->init_key_from_rec(&key, rec);
153 keyblock = xfs_btree_get_block(cur, 1, &bp);
154 keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock);
155 if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0)
156 xchk_btree_set_corrupt(bs->sc, cur, 1);
158 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
161 /* Is this no larger than the parent high key? */
162 cur->bc_ops->init_high_key_from_rec(&hkey, rec);
163 keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock);
164 if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0)
165 xchk_btree_set_corrupt(bs->sc, cur, 1);
169 * Make sure this key is in order and doesn't stray outside of the parent
174 struct xchk_btree *bs,
177 struct xfs_btree_cur *cur = bs->cur;
178 union xfs_btree_key *key;
179 union xfs_btree_key *keyp;
180 struct xfs_btree_block *block;
181 struct xfs_btree_block *keyblock;
184 block = xfs_btree_get_block(cur, level, &bp);
185 key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
187 trace_xchk_btree_key(bs->sc, cur, level);
189 /* If this isn't the first key, are they in order? */
190 if (!bs->firstkey[level] &&
191 !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key))
192 xchk_btree_set_corrupt(bs->sc, cur, level);
193 bs->firstkey[level] = false;
194 memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len);
196 if (level + 1 >= cur->bc_nlevels)
199 /* Is this at least as large as the parent low key? */
200 keyblock = xfs_btree_get_block(cur, level + 1, &bp);
201 keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
202 if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0)
203 xchk_btree_set_corrupt(bs->sc, cur, level);
205 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
208 /* Is this no larger than the parent high key? */
209 key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
210 keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
211 if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0)
212 xchk_btree_set_corrupt(bs->sc, cur, level);
216 * Check a btree pointer. Returns true if it's ok to use this pointer.
217 * Callers do not need to set the corrupt flag.
221 struct xchk_btree *bs,
223 union xfs_btree_ptr *ptr)
227 /* A btree rooted in an inode has no block pointer to the root. */
228 if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
229 level == bs->cur->bc_nlevels)
232 /* Otherwise, check the pointers. */
233 if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
234 res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level);
236 res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
238 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
243 /* Check that a btree block's sibling matches what we expect it. */
245 xchk_btree_block_check_sibling(
246 struct xchk_btree *bs,
249 union xfs_btree_ptr *sibling)
251 struct xfs_btree_cur *cur = bs->cur;
252 struct xfs_btree_block *pblock;
254 struct xfs_btree_cur *ncur = NULL;
255 union xfs_btree_ptr *pp;
259 error = xfs_btree_dup_cursor(cur, &ncur);
260 if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
265 * If the pointer is null, we shouldn't be able to move the upper
266 * level pointer anywhere.
268 if (xfs_btree_ptr_is_null(cur, sibling)) {
270 error = xfs_btree_increment(ncur, level + 1, &success);
272 error = xfs_btree_decrement(ncur, level + 1, &success);
273 if (error == 0 && success)
274 xchk_btree_set_corrupt(bs->sc, cur, level);
279 /* Increment upper level pointer. */
281 error = xfs_btree_increment(ncur, level + 1, &success);
283 error = xfs_btree_decrement(ncur, level + 1, &success);
284 if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error))
287 xchk_btree_set_corrupt(bs->sc, cur, level + 1);
291 /* Compare upper level pointer to sibling pointer. */
292 pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
293 pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
294 if (!xchk_btree_ptr_ok(bs, level + 1, pp))
297 xchk_buffer_recheck(bs->sc, pbp);
299 if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
300 xchk_btree_set_corrupt(bs->sc, cur, level);
302 xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
306 /* Check the siblings of a btree block. */
308 xchk_btree_block_check_siblings(
309 struct xchk_btree *bs,
310 struct xfs_btree_block *block)
312 struct xfs_btree_cur *cur = bs->cur;
313 union xfs_btree_ptr leftsib;
314 union xfs_btree_ptr rightsib;
318 xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
319 xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
320 level = xfs_btree_get_level(block);
322 /* Root block should never have siblings. */
323 if (level == cur->bc_nlevels - 1) {
324 if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
325 !xfs_btree_ptr_is_null(cur, &rightsib))
326 xchk_btree_set_corrupt(bs->sc, cur, level);
331 * Does the left & right sibling pointers match the adjacent
332 * parent level pointers?
333 * (These function absorbs error codes for us.)
335 error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib);
338 error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib);
346 struct list_head list;
352 * Make sure this btree block isn't in the free list and that there's
353 * an rmap record for it.
356 xchk_btree_check_block_owner(
357 struct xchk_btree *bs,
370 btnum = bs->cur->bc_btnum;
371 agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
372 agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
374 init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
376 error = xchk_ag_init(bs->sc, agno, &bs->sc->sa);
377 if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
382 xchk_xref_is_used_space(bs->sc, agbno, 1);
384 * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
385 * have to nullify it (to shut down further block owner checks) if
386 * self-xref encounters problems.
388 if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
391 xchk_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo);
392 if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
396 xchk_ag_free(bs->sc, &bs->sc->sa);
401 /* Check the owner of a btree block. */
403 xchk_btree_check_owner(
404 struct xchk_btree *bs,
408 struct xfs_btree_cur *cur = bs->cur;
409 struct check_owner *co;
412 * In theory, xfs_btree_get_block should only give us a null buffer
413 * pointer for the root of a root-in-inode btree type, but we need
414 * to check defensively here in case the cursor state is also screwed
418 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE))
419 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
424 * We want to cross-reference each btree block with the bnobt
425 * and the rmapbt. We cannot cross-reference the bnobt or
426 * rmapbt while scanning the bnobt or rmapbt, respectively,
427 * because we cannot alter the cursor and we'd prefer not to
428 * duplicate cursors. Therefore, save the buffer daddr for
431 if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
432 co = kmem_alloc(sizeof(struct check_owner),
437 co->daddr = XFS_BUF_ADDR(bp);
438 list_add_tail(&co->list, &bs->to_check);
442 return xchk_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp));
446 * Check that this btree block has at least minrecs records or is one of the
447 * special blocks that don't require that.
450 xchk_btree_check_minrecs(
451 struct xchk_btree *bs,
453 struct xfs_btree_block *block)
455 struct xfs_btree_cur *cur = bs->cur;
456 unsigned int root_level = cur->bc_nlevels - 1;
457 unsigned int numrecs = be16_to_cpu(block->bb_numrecs);
459 /* More records than minrecs means the block is ok. */
460 if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
464 * For btrees rooted in the inode, it's possible that the root block
465 * contents spilled into a regular ondisk block because there wasn't
466 * enough space in the inode root. The number of records in that
467 * child block might be less than the standard minrecs, but that's ok
468 * provided that there's only one direct child of the root.
470 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
471 level == cur->bc_nlevels - 2) {
472 struct xfs_btree_block *root_block;
473 struct xfs_buf *root_bp;
476 root_block = xfs_btree_get_block(cur, root_level, &root_bp);
477 root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
478 if (be16_to_cpu(root_block->bb_numrecs) != 1 ||
479 numrecs <= root_maxrecs)
480 xchk_btree_set_corrupt(bs->sc, cur, level);
485 * Otherwise, only the root level is allowed to have fewer than minrecs
486 * records or keyptrs.
488 if (level < root_level)
489 xchk_btree_set_corrupt(bs->sc, cur, level);
493 * Grab and scrub a btree block given a btree pointer. Returns block
494 * and buffer pointers (if applicable) if they're ok to use.
497 xchk_btree_get_block(
498 struct xchk_btree *bs,
500 union xfs_btree_ptr *pp,
501 struct xfs_btree_block **pblock,
502 struct xfs_buf **pbp)
504 xfs_failaddr_t failed_at;
510 error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
511 if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) ||
515 xfs_btree_get_block(bs->cur, level, pbp);
516 if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
517 failed_at = __xfs_btree_check_lblock(bs->cur, *pblock,
520 failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
523 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
527 xchk_buffer_recheck(bs->sc, *pbp);
529 xchk_btree_check_minrecs(bs, level, *pblock);
532 * Check the block's owner; this function absorbs error codes
535 error = xchk_btree_check_owner(bs, level, *pbp);
540 * Check the block's siblings; this function absorbs error codes
543 return xchk_btree_block_check_siblings(bs, *pblock);
547 * Check that the low and high keys of this block match the keys stored
548 * in the parent block.
551 xchk_btree_block_keys(
552 struct xchk_btree *bs,
554 struct xfs_btree_block *block)
556 union xfs_btree_key block_keys;
557 struct xfs_btree_cur *cur = bs->cur;
558 union xfs_btree_key *high_bk;
559 union xfs_btree_key *parent_keys;
560 union xfs_btree_key *high_pk;
561 struct xfs_btree_block *parent_block;
564 if (level >= cur->bc_nlevels - 1)
567 /* Calculate the keys for this block. */
568 xfs_btree_get_keys(cur, block, &block_keys);
570 /* Obtain the parent's copy of the keys for this block. */
571 parent_block = xfs_btree_get_block(cur, level + 1, &bp);
572 parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1],
575 if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0)
576 xchk_btree_set_corrupt(bs->sc, cur, 1);
578 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
582 high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
583 high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1],
586 if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0)
587 xchk_btree_set_corrupt(bs->sc, cur, 1);
591 * Visit all nodes and leaves of a btree. Check that all pointers and
592 * records are in order, that the keys reflect the records, and use a callback
593 * so that the caller can verify individual records.
597 struct xfs_scrub *sc,
598 struct xfs_btree_cur *cur,
599 xchk_btree_rec_fn scrub_fn,
600 const struct xfs_owner_info *oinfo,
603 struct xchk_btree bs = {
605 .scrub_rec = scrub_fn,
611 union xfs_btree_ptr ptr;
612 union xfs_btree_ptr *pp;
613 union xfs_btree_rec *recp;
614 struct xfs_btree_block *block;
617 struct check_owner *co;
618 struct check_owner *n;
622 /* Initialize scrub state */
623 for (i = 0; i < XFS_BTREE_MAXLEVELS; i++)
624 bs.firstkey[i] = true;
625 INIT_LIST_HEAD(&bs.to_check);
627 /* Don't try to check a tree with a height we can't handle. */
628 if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) {
629 xchk_btree_set_corrupt(sc, cur, 0);
634 * Load the root of the btree. The helper function absorbs
635 * error codes for us.
637 level = cur->bc_nlevels - 1;
638 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
639 if (!xchk_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr))
641 error = xchk_btree_get_block(&bs, level, &ptr, &block, &bp);
645 cur->bc_ptrs[level] = 1;
647 while (level < cur->bc_nlevels) {
648 block = xfs_btree_get_block(cur, level, &bp);
651 /* End of leaf, pop back towards the root. */
652 if (cur->bc_ptrs[level] >
653 be16_to_cpu(block->bb_numrecs)) {
654 xchk_btree_block_keys(&bs, level, block);
655 if (level < cur->bc_nlevels - 1)
656 cur->bc_ptrs[level + 1]++;
661 /* Records in order for scrub? */
664 /* Call out to the record checker. */
665 recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
666 error = bs.scrub_rec(&bs, recp);
669 if (xchk_should_terminate(sc, &error) ||
670 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
673 cur->bc_ptrs[level]++;
677 /* End of node, pop back towards the root. */
678 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
679 xchk_btree_block_keys(&bs, level, block);
680 if (level < cur->bc_nlevels - 1)
681 cur->bc_ptrs[level + 1]++;
686 /* Keys in order for scrub? */
687 xchk_btree_key(&bs, level);
689 /* Drill another level deeper. */
690 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
691 if (!xchk_btree_ptr_ok(&bs, level, pp)) {
692 cur->bc_ptrs[level]++;
696 error = xchk_btree_get_block(&bs, level, pp, &block, &bp);
700 cur->bc_ptrs[level] = 1;
704 /* Process deferred owner checks on btree blocks. */
705 list_for_each_entry_safe(co, n, &bs.to_check, list) {
706 if (!error && bs.cur)
707 error = xchk_btree_check_block_owner(&bs,
708 co->level, co->daddr);