]> Git Repo - linux.git/blob - fs/xfs/scrub/refcount.c
media: ipu-bridge: Fix Kconfig dependencies
[linux.git] / fs / xfs / scrub / refcount.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <[email protected]>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_ag.h"
13 #include "xfs_btree.h"
14 #include "xfs_rmap.h"
15 #include "xfs_refcount.h"
16 #include "scrub/scrub.h"
17 #include "scrub/common.h"
18 #include "scrub/btree.h"
19 #include "scrub/trace.h"
20
21 /*
22  * Set us up to scrub reference count btrees.
23  */
24 int
25 xchk_setup_ag_refcountbt(
26         struct xfs_scrub        *sc)
27 {
28         if (xchk_need_intent_drain(sc))
29                 xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
30         return xchk_setup_ag_btree(sc, false);
31 }
32
33 /* Reference count btree scrubber. */
34
35 /*
36  * Confirming Reference Counts via Reverse Mappings
37  *
38  * We want to count the reverse mappings overlapping a refcount record
39  * (bno, len, refcount), allowing for the possibility that some of the
40  * overlap may come from smaller adjoining reverse mappings, while some
41  * comes from single extents which overlap the range entirely.  The
42  * outer loop is as follows:
43  *
44  * 1. For all reverse mappings overlapping the refcount extent,
45  *    a. If a given rmap completely overlaps, mark it as seen.
46  *    b. Otherwise, record the fragment (in agbno order) for later
47  *       processing.
48  *
49  * Once we've seen all the rmaps, we know that for all blocks in the
50  * refcount record we want to find $refcount owners and we've already
51  * visited $seen extents that overlap all the blocks.  Therefore, we
52  * need to find ($refcount - $seen) owners for every block in the
53  * extent; call that quantity $target_nr.  Proceed as follows:
54  *
55  * 2. Pull the first $target_nr fragments from the list; all of them
56  *    should start at or before the start of the extent.
57  *    Call this subset of fragments the working set.
58  * 3. Until there are no more unprocessed fragments,
59  *    a. Find the shortest fragments in the set and remove them.
60  *    b. Note the block number of the end of these fragments.
61  *    c. Pull the same number of fragments from the list.  All of these
62  *       fragments should start at the block number recorded in the
63  *       previous step.
64  *    d. Put those fragments in the set.
65  * 4. Check that there are $target_nr fragments remaining in the list,
66  *    and that they all end at or beyond the end of the refcount extent.
67  *
68  * If the refcount is correct, all the check conditions in the algorithm
69  * should always hold true.  If not, the refcount is incorrect.
70  */
71 struct xchk_refcnt_frag {
72         struct list_head        list;
73         struct xfs_rmap_irec    rm;
74 };
75
76 struct xchk_refcnt_check {
77         struct xfs_scrub        *sc;
78         struct list_head        fragments;
79
80         /* refcount extent we're examining */
81         xfs_agblock_t           bno;
82         xfs_extlen_t            len;
83         xfs_nlink_t             refcount;
84
85         /* number of owners seen */
86         xfs_nlink_t             seen;
87 };
88
89 /*
90  * Decide if the given rmap is large enough that we can redeem it
91  * towards refcount verification now, or if it's a fragment, in
92  * which case we'll hang onto it in the hopes that we'll later
93  * discover that we've collected exactly the correct number of
94  * fragments as the refcountbt says we should have.
95  */
96 STATIC int
97 xchk_refcountbt_rmap_check(
98         struct xfs_btree_cur            *cur,
99         const struct xfs_rmap_irec      *rec,
100         void                            *priv)
101 {
102         struct xchk_refcnt_check        *refchk = priv;
103         struct xchk_refcnt_frag         *frag;
104         xfs_agblock_t                   rm_last;
105         xfs_agblock_t                   rc_last;
106         int                             error = 0;
107
108         if (xchk_should_terminate(refchk->sc, &error))
109                 return error;
110
111         rm_last = rec->rm_startblock + rec->rm_blockcount - 1;
112         rc_last = refchk->bno + refchk->len - 1;
113
114         /* Confirm that a single-owner refc extent is a CoW stage. */
115         if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) {
116                 xchk_btree_xref_set_corrupt(refchk->sc, cur, 0);
117                 return 0;
118         }
119
120         if (rec->rm_startblock <= refchk->bno && rm_last >= rc_last) {
121                 /*
122                  * The rmap overlaps the refcount record, so we can confirm
123                  * one refcount owner seen.
124                  */
125                 refchk->seen++;
126         } else {
127                 /*
128                  * This rmap covers only part of the refcount record, so
129                  * save the fragment for later processing.  If the rmapbt
130                  * is healthy each rmap_irec we see will be in agbno order
131                  * so we don't need insertion sort here.
132                  */
133                 frag = kmalloc(sizeof(struct xchk_refcnt_frag),
134                                 XCHK_GFP_FLAGS);
135                 if (!frag)
136                         return -ENOMEM;
137                 memcpy(&frag->rm, rec, sizeof(frag->rm));
138                 list_add_tail(&frag->list, &refchk->fragments);
139         }
140
141         return 0;
142 }
143
144 /*
145  * Given a bunch of rmap fragments, iterate through them, keeping
146  * a running tally of the refcount.  If this ever deviates from
147  * what we expect (which is the refcountbt's refcount minus the
148  * number of extents that totally covered the refcountbt extent),
149  * we have a refcountbt error.
150  */
151 STATIC void
152 xchk_refcountbt_process_rmap_fragments(
153         struct xchk_refcnt_check        *refchk)
154 {
155         struct list_head                worklist;
156         struct xchk_refcnt_frag         *frag;
157         struct xchk_refcnt_frag         *n;
158         xfs_agblock_t                   bno;
159         xfs_agblock_t                   rbno;
160         xfs_agblock_t                   next_rbno;
161         xfs_nlink_t                     nr;
162         xfs_nlink_t                     target_nr;
163
164         target_nr = refchk->refcount - refchk->seen;
165         if (target_nr == 0)
166                 return;
167
168         /*
169          * There are (refchk->rc.rc_refcount - refchk->nr refcount)
170          * references we haven't found yet.  Pull that many off the
171          * fragment list and figure out where the smallest rmap ends
172          * (and therefore the next rmap should start).  All the rmaps
173          * we pull off should start at or before the beginning of the
174          * refcount record's range.
175          */
176         INIT_LIST_HEAD(&worklist);
177         rbno = NULLAGBLOCK;
178
179         /* Make sure the fragments actually /are/ in agbno order. */
180         bno = 0;
181         list_for_each_entry(frag, &refchk->fragments, list) {
182                 if (frag->rm.rm_startblock < bno)
183                         goto done;
184                 bno = frag->rm.rm_startblock;
185         }
186
187         /*
188          * Find all the rmaps that start at or before the refc extent,
189          * and put them on the worklist.
190          */
191         nr = 0;
192         list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
193                 if (frag->rm.rm_startblock > refchk->bno || nr > target_nr)
194                         break;
195                 bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
196                 if (bno < rbno)
197                         rbno = bno;
198                 list_move_tail(&frag->list, &worklist);
199                 nr++;
200         }
201
202         /*
203          * We should have found exactly $target_nr rmap fragments starting
204          * at or before the refcount extent.
205          */
206         if (nr != target_nr)
207                 goto done;
208
209         while (!list_empty(&refchk->fragments)) {
210                 /* Discard any fragments ending at rbno from the worklist. */
211                 nr = 0;
212                 next_rbno = NULLAGBLOCK;
213                 list_for_each_entry_safe(frag, n, &worklist, list) {
214                         bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
215                         if (bno != rbno) {
216                                 if (bno < next_rbno)
217                                         next_rbno = bno;
218                                 continue;
219                         }
220                         list_del(&frag->list);
221                         kfree(frag);
222                         nr++;
223                 }
224
225                 /* Try to add nr rmaps starting at rbno to the worklist. */
226                 list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
227                         bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
228                         if (frag->rm.rm_startblock != rbno)
229                                 goto done;
230                         list_move_tail(&frag->list, &worklist);
231                         if (next_rbno > bno)
232                                 next_rbno = bno;
233                         nr--;
234                         if (nr == 0)
235                                 break;
236                 }
237
238                 /*
239                  * If we get here and nr > 0, this means that we added fewer
240                  * items to the worklist than we discarded because the fragment
241                  * list ran out of items.  Therefore, we cannot maintain the
242                  * required refcount.  Something is wrong, so we're done.
243                  */
244                 if (nr)
245                         goto done;
246
247                 rbno = next_rbno;
248         }
249
250         /*
251          * Make sure the last extent we processed ends at or beyond
252          * the end of the refcount extent.
253          */
254         if (rbno < refchk->bno + refchk->len)
255                 goto done;
256
257         /* Actually record us having seen the remaining refcount. */
258         refchk->seen = refchk->refcount;
259 done:
260         /* Delete fragments and work list. */
261         list_for_each_entry_safe(frag, n, &worklist, list) {
262                 list_del(&frag->list);
263                 kfree(frag);
264         }
265         list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
266                 list_del(&frag->list);
267                 kfree(frag);
268         }
269 }
270
271 /* Use the rmap entries covering this extent to verify the refcount. */
272 STATIC void
273 xchk_refcountbt_xref_rmap(
274         struct xfs_scrub                *sc,
275         const struct xfs_refcount_irec  *irec)
276 {
277         struct xchk_refcnt_check        refchk = {
278                 .sc                     = sc,
279                 .bno                    = irec->rc_startblock,
280                 .len                    = irec->rc_blockcount,
281                 .refcount               = irec->rc_refcount,
282                 .seen = 0,
283         };
284         struct xfs_rmap_irec            low;
285         struct xfs_rmap_irec            high;
286         struct xchk_refcnt_frag         *frag;
287         struct xchk_refcnt_frag         *n;
288         int                             error;
289
290         if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
291                 return;
292
293         /* Cross-reference with the rmapbt to confirm the refcount. */
294         memset(&low, 0, sizeof(low));
295         low.rm_startblock = irec->rc_startblock;
296         memset(&high, 0xFF, sizeof(high));
297         high.rm_startblock = irec->rc_startblock + irec->rc_blockcount - 1;
298
299         INIT_LIST_HEAD(&refchk.fragments);
300         error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high,
301                         &xchk_refcountbt_rmap_check, &refchk);
302         if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
303                 goto out_free;
304
305         xchk_refcountbt_process_rmap_fragments(&refchk);
306         if (irec->rc_refcount != refchk.seen) {
307                 trace_xchk_refcount_incorrect(sc->sa.pag, irec, refchk.seen);
308                 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
309         }
310
311 out_free:
312         list_for_each_entry_safe(frag, n, &refchk.fragments, list) {
313                 list_del(&frag->list);
314                 kfree(frag);
315         }
316 }
317
318 /* Cross-reference with the other btrees. */
319 STATIC void
320 xchk_refcountbt_xref(
321         struct xfs_scrub                *sc,
322         const struct xfs_refcount_irec  *irec)
323 {
324         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
325                 return;
326
327         xchk_xref_is_used_space(sc, irec->rc_startblock, irec->rc_blockcount);
328         xchk_xref_is_not_inode_chunk(sc, irec->rc_startblock,
329                         irec->rc_blockcount);
330         xchk_refcountbt_xref_rmap(sc, irec);
331 }
332
333 struct xchk_refcbt_records {
334         /* Previous refcount record. */
335         struct xfs_refcount_irec prev_rec;
336
337         /* The next AG block where we aren't expecting shared extents. */
338         xfs_agblock_t           next_unshared_agbno;
339
340         /* Number of CoW blocks we expect. */
341         xfs_agblock_t           cow_blocks;
342
343         /* Was the last record a shared or CoW staging extent? */
344         enum xfs_refc_domain    prev_domain;
345 };
346
347 STATIC int
348 xchk_refcountbt_rmap_check_gap(
349         struct xfs_btree_cur            *cur,
350         const struct xfs_rmap_irec      *rec,
351         void                            *priv)
352 {
353         xfs_agblock_t                   *next_bno = priv;
354
355         if (*next_bno != NULLAGBLOCK && rec->rm_startblock < *next_bno)
356                 return -ECANCELED;
357
358         *next_bno = rec->rm_startblock + rec->rm_blockcount;
359         return 0;
360 }
361
362 /*
363  * Make sure that a gap in the reference count records does not correspond to
364  * overlapping records (i.e. shared extents) in the reverse mappings.
365  */
366 static inline void
367 xchk_refcountbt_xref_gaps(
368         struct xfs_scrub        *sc,
369         struct xchk_refcbt_records *rrc,
370         xfs_agblock_t           bno)
371 {
372         struct xfs_rmap_irec    low;
373         struct xfs_rmap_irec    high;
374         xfs_agblock_t           next_bno = NULLAGBLOCK;
375         int                     error;
376
377         if (bno <= rrc->next_unshared_agbno || !sc->sa.rmap_cur ||
378             xchk_skip_xref(sc->sm))
379                 return;
380
381         memset(&low, 0, sizeof(low));
382         low.rm_startblock = rrc->next_unshared_agbno;
383         memset(&high, 0xFF, sizeof(high));
384         high.rm_startblock = bno - 1;
385
386         error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high,
387                         xchk_refcountbt_rmap_check_gap, &next_bno);
388         if (error == -ECANCELED)
389                 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
390         else
391                 xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur);
392 }
393
394 static inline bool
395 xchk_refcount_mergeable(
396         struct xchk_refcbt_records      *rrc,
397         const struct xfs_refcount_irec  *r2)
398 {
399         const struct xfs_refcount_irec  *r1 = &rrc->prev_rec;
400
401         /* Ignore if prev_rec is not yet initialized. */
402         if (r1->rc_blockcount > 0)
403                 return false;
404
405         if (r1->rc_domain != r2->rc_domain)
406                 return false;
407         if (r1->rc_startblock + r1->rc_blockcount != r2->rc_startblock)
408                 return false;
409         if (r1->rc_refcount != r2->rc_refcount)
410                 return false;
411         if ((unsigned long long)r1->rc_blockcount + r2->rc_blockcount >
412                         MAXREFCEXTLEN)
413                 return false;
414
415         return true;
416 }
417
418 /* Flag failures for records that could be merged. */
419 STATIC void
420 xchk_refcountbt_check_mergeable(
421         struct xchk_btree               *bs,
422         struct xchk_refcbt_records      *rrc,
423         const struct xfs_refcount_irec  *irec)
424 {
425         if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
426                 return;
427
428         if (xchk_refcount_mergeable(rrc, irec))
429                 xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
430
431         memcpy(&rrc->prev_rec, irec, sizeof(struct xfs_refcount_irec));
432 }
433
434 /* Scrub a refcountbt record. */
435 STATIC int
436 xchk_refcountbt_rec(
437         struct xchk_btree       *bs,
438         const union xfs_btree_rec *rec)
439 {
440         struct xfs_refcount_irec irec;
441         struct xchk_refcbt_records *rrc = bs->private;
442
443         xfs_refcount_btrec_to_irec(rec, &irec);
444         if (xfs_refcount_check_irec(bs->cur, &irec) != NULL) {
445                 xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
446                 return 0;
447         }
448
449         if (irec.rc_domain == XFS_REFC_DOMAIN_COW)
450                 rrc->cow_blocks += irec.rc_blockcount;
451
452         /* Shared records always come before CoW records. */
453         if (irec.rc_domain == XFS_REFC_DOMAIN_SHARED &&
454             rrc->prev_domain == XFS_REFC_DOMAIN_COW)
455                 xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
456         rrc->prev_domain = irec.rc_domain;
457
458         xchk_refcountbt_check_mergeable(bs, rrc, &irec);
459         xchk_refcountbt_xref(bs->sc, &irec);
460
461         /*
462          * If this is a record for a shared extent, check that all blocks
463          * between the previous record and this one have at most one reverse
464          * mapping.
465          */
466         if (irec.rc_domain == XFS_REFC_DOMAIN_SHARED) {
467                 xchk_refcountbt_xref_gaps(bs->sc, rrc, irec.rc_startblock);
468                 rrc->next_unshared_agbno = irec.rc_startblock +
469                                            irec.rc_blockcount;
470         }
471
472         return 0;
473 }
474
475 /* Make sure we have as many refc blocks as the rmap says. */
476 STATIC void
477 xchk_refcount_xref_rmap(
478         struct xfs_scrub        *sc,
479         xfs_filblks_t           cow_blocks)
480 {
481         xfs_extlen_t            refcbt_blocks = 0;
482         xfs_filblks_t           blocks;
483         int                     error;
484
485         if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
486                 return;
487
488         /* Check that we saw as many refcbt blocks as the rmap knows about. */
489         error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks);
490         if (!xchk_btree_process_error(sc, sc->sa.refc_cur, 0, &error))
491                 return;
492         error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur,
493                         &XFS_RMAP_OINFO_REFC, &blocks);
494         if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
495                 return;
496         if (blocks != refcbt_blocks)
497                 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
498
499         /* Check that we saw as many cow blocks as the rmap knows about. */
500         error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur,
501                         &XFS_RMAP_OINFO_COW, &blocks);
502         if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
503                 return;
504         if (blocks != cow_blocks)
505                 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
506 }
507
508 /* Scrub the refcount btree for some AG. */
509 int
510 xchk_refcountbt(
511         struct xfs_scrub        *sc)
512 {
513         struct xchk_refcbt_records rrc = {
514                 .cow_blocks             = 0,
515                 .next_unshared_agbno    = 0,
516                 .prev_domain            = XFS_REFC_DOMAIN_SHARED,
517         };
518         int                     error;
519
520         error = xchk_btree(sc, sc->sa.refc_cur, xchk_refcountbt_rec,
521                         &XFS_RMAP_OINFO_REFC, &rrc);
522         if (error)
523                 return error;
524
525         /*
526          * Check that all blocks between the last refcount > 1 record and the
527          * end of the AG have at most one reverse mapping.
528          */
529         xchk_refcountbt_xref_gaps(sc, &rrc, sc->mp->m_sb.sb_agblocks);
530
531         xchk_refcount_xref_rmap(sc, rrc.cow_blocks);
532
533         return 0;
534 }
535
536 /* xref check that a cow staging extent is marked in the refcountbt. */
537 void
538 xchk_xref_is_cow_staging(
539         struct xfs_scrub                *sc,
540         xfs_agblock_t                   agbno,
541         xfs_extlen_t                    len)
542 {
543         struct xfs_refcount_irec        rc;
544         int                             has_refcount;
545         int                             error;
546
547         if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm))
548                 return;
549
550         /* Find the CoW staging extent. */
551         error = xfs_refcount_lookup_le(sc->sa.refc_cur, XFS_REFC_DOMAIN_COW,
552                         agbno, &has_refcount);
553         if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
554                 return;
555         if (!has_refcount) {
556                 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
557                 return;
558         }
559
560         error = xfs_refcount_get_rec(sc->sa.refc_cur, &rc, &has_refcount);
561         if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
562                 return;
563         if (!has_refcount) {
564                 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
565                 return;
566         }
567
568         /* CoW lookup returned a shared extent record? */
569         if (rc.rc_domain != XFS_REFC_DOMAIN_COW)
570                 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
571
572         /* Must be at least as long as what was passed in */
573         if (rc.rc_blockcount < len)
574                 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
575 }
576
577 /*
578  * xref check that the extent is not shared.  Only file data blocks
579  * can have multiple owners.
580  */
581 void
582 xchk_xref_is_not_shared(
583         struct xfs_scrub        *sc,
584         xfs_agblock_t           agbno,
585         xfs_extlen_t            len)
586 {
587         enum xbtree_recpacking  outcome;
588         int                     error;
589
590         if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm))
591                 return;
592
593         error = xfs_refcount_has_records(sc->sa.refc_cur,
594                         XFS_REFC_DOMAIN_SHARED, agbno, len, &outcome);
595         if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
596                 return;
597         if (outcome != XBTREE_RECPACKING_EMPTY)
598                 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
599 }
600
601 /* xref check that the extent is not being used for CoW staging. */
602 void
603 xchk_xref_is_not_cow_staging(
604         struct xfs_scrub        *sc,
605         xfs_agblock_t           agbno,
606         xfs_extlen_t            len)
607 {
608         enum xbtree_recpacking  outcome;
609         int                     error;
610
611         if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm))
612                 return;
613
614         error = xfs_refcount_has_records(sc->sa.refc_cur, XFS_REFC_DOMAIN_COW,
615                         agbno, len, &outcome);
616         if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
617                 return;
618         if (outcome != XBTREE_RECPACKING_EMPTY)
619                 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
620 }
This page took 0.067278 seconds and 4 git commands to generate.