fs/xfs/scrub/common.c

   1 /*
   2  * Copyright (C) 2017 Oracle.  All Rights Reserved.
   3  *
   4  * Author: Darrick J. Wong <[email protected]>
   5  *
   6  * This program is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU General Public License
   8  * as published by the Free Software Foundation; either version 2
   9  * of the License, or (at your option) any later version.
  10  *
  11  * This program is distributed in the hope that it would be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with this program; if not, write the Free Software Foundation,
  18  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
  19  */
  20 #include "xfs.h"
  21 #include "xfs_fs.h"
  22 #include "xfs_shared.h"
  23 #include "xfs_format.h"
  24 #include "xfs_trans_resv.h"
  25 #include "xfs_mount.h"
  26 #include "xfs_defer.h"
  27 #include "xfs_btree.h"
  28 #include "xfs_bit.h"
  29 #include "xfs_log_format.h"
  30 #include "xfs_trans.h"
  31 #include "xfs_sb.h"
  32 #include "xfs_inode.h"
  33 #include "xfs_icache.h"
  34 #include "xfs_itable.h"
  35 #include "xfs_alloc.h"
  36 #include "xfs_alloc_btree.h"
  37 #include "xfs_bmap.h"
  38 #include "xfs_bmap_btree.h"
  39 #include "xfs_ialloc.h"
  40 #include "xfs_ialloc_btree.h"
  41 #include "xfs_refcount.h"
  42 #include "xfs_refcount_btree.h"
  43 #include "xfs_rmap.h"
  44 #include "xfs_rmap_btree.h"
  45 #include "xfs_log.h"
  46 #include "xfs_trans_priv.h"
  47 #include "xfs_attr.h"
  48 #include "xfs_reflink.h"
  49 #include "scrub/xfs_scrub.h"
  50 #include "scrub/scrub.h"
  51 #include "scrub/common.h"
  52 #include "scrub/trace.h"
  53 #include "scrub/btree.h"
  54 #include "scrub/repair.h"
  55
  56 /* Common code for the metadata scrubbers. */
  57
  58 /*
  59  * Handling operational errors.
  60  *
  61  * The *_process_error() family of functions are used to process error return
  62  * codes from functions called as part of a scrub operation.
  63  *
  64  * If there's no error, we return true to tell the caller that it's ok
  65  * to move on to the next check in its list.
  66  *
  67  * For non-verifier errors (e.g. ENOMEM) we return false to tell the
  68  * caller that something bad happened, and we preserve *error so that
  69  * the caller can return the *error up the stack to userspace.
  70  *
  71  * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting
  72  * OFLAG_CORRUPT in sm_flags and the *error is cleared.  In other words,
  73  * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT,
  74  * not via return codes.  We return false to tell the caller that
  75  * something bad happened.  Since the error has been cleared, the caller
  76  * will (presumably) return that zero and scrubbing will move on to
  77  * whatever's next.
  78  *
  79  * ftrace can be used to record the precise metadata location and the
  80  * approximate code location of the failed operation.
  81  */
  82
  83 /* Check for operational errors. */
  84 static bool
  85 __xfs_scrub_process_error(
  86         struct xfs_scrub_context        *sc,
  87         xfs_agnumber_t                  agno,
  88         xfs_agblock_t                   bno,
  89         int                             *error,
  90         __u32                           errflag,
  91         void                            *ret_ip)
  92 {
  93         switch (*error) {
  94         case 0:
  95                 return true;
  96         case -EDEADLOCK:
  97                 /* Used to restart an op with deadlock avoidance. */
  98                 trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
  99                 break;
 100         case -EFSBADCRC:
 101         case -EFSCORRUPTED:
 102                 /* Note the badness but don't abort. */
 103                 sc->sm->sm_flags |= errflag;
 104                 *error = 0;
 105                 /* fall through */
 106         default:
 107                 trace_xfs_scrub_op_error(sc, agno, bno, *error,
 108                                 ret_ip);
 109                 break;
 110         }
 111         return false;
 112 }
 113
 114 bool
 115 xfs_scrub_process_error(
 116         struct xfs_scrub_context        *sc,
 117         xfs_agnumber_t                  agno,
 118         xfs_agblock_t                   bno,
 119         int                             *error)
 120 {
 121         return __xfs_scrub_process_error(sc, agno, bno, error,
 122                         XFS_SCRUB_OFLAG_CORRUPT, __return_address);
 123 }
 124
 125 bool
 126 xfs_scrub_xref_process_error(
 127         struct xfs_scrub_context        *sc,
 128         xfs_agnumber_t                  agno,
 129         xfs_agblock_t                   bno,
 130         int                             *error)
 131 {
 132         return __xfs_scrub_process_error(sc, agno, bno, error,
 133                         XFS_SCRUB_OFLAG_XFAIL, __return_address);
 134 }
 135
 136 /* Check for operational errors for a file offset. */
 137 static bool
 138 __xfs_scrub_fblock_process_error(
 139         struct xfs_scrub_context        *sc,
 140         int                             whichfork,
 141         xfs_fileoff_t                   offset,
 142         int                             *error,
 143         __u32                           errflag,
 144         void                            *ret_ip)
 145 {
 146         switch (*error) {
 147         case 0:
 148                 return true;
 149         case -EDEADLOCK:
 150                 /* Used to restart an op with deadlock avoidance. */
 151                 trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
 152                 break;
 153         case -EFSBADCRC:
 154         case -EFSCORRUPTED:
 155                 /* Note the badness but don't abort. */
 156                 sc->sm->sm_flags |= errflag;
 157                 *error = 0;
 158                 /* fall through */
 159         default:
 160                 trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error,
 161                                 ret_ip);
 162                 break;
 163         }
 164         return false;
 165 }
 166
 167 bool
 168 xfs_scrub_fblock_process_error(
 169         struct xfs_scrub_context        *sc,
 170         int                             whichfork,
 171         xfs_fileoff_t                   offset,
 172         int                             *error)
 173 {
 174         return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error,
 175                         XFS_SCRUB_OFLAG_CORRUPT, __return_address);
 176 }
 177
 178 bool
 179 xfs_scrub_fblock_xref_process_error(
 180         struct xfs_scrub_context        *sc,
 181         int                             whichfork,
 182         xfs_fileoff_t                   offset,
 183         int                             *error)
 184 {
 185         return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error,
 186                         XFS_SCRUB_OFLAG_XFAIL, __return_address);
 187 }
 188
 189 /*
 190  * Handling scrub corruption/optimization/warning checks.
 191  *
 192  * The *_set_{corrupt,preen,warning}() family of functions are used to
 193  * record the presence of metadata that is incorrect (corrupt), could be
 194  * optimized somehow (preen), or should be flagged for administrative
 195  * review but is not incorrect (warn).
 196  *
 197  * ftrace can be used to record the precise metadata location and
 198  * approximate code location of the failed check.
 199  */
 200
 201 /* Record a block which could be optimized. */
 202 void
 203 xfs_scrub_block_set_preen(
 204         struct xfs_scrub_context        *sc,
 205         struct xfs_buf                  *bp)
 206 {
 207         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
 208         trace_xfs_scrub_block_preen(sc, bp->b_bn, __return_address);
 209 }
 210
 211 /*
 212  * Record an inode which could be optimized.  The trace data will
 213  * include the block given by bp if bp is given; otherwise it will use
 214  * the block location of the inode record itself.
 215  */
 216 void
 217 xfs_scrub_ino_set_preen(
 218         struct xfs_scrub_context        *sc,
 219         xfs_ino_t                       ino)
 220 {
 221         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
 222         trace_xfs_scrub_ino_preen(sc, ino, __return_address);
 223 }
 224
 225 /* Record a corrupt block. */
 226 void
 227 xfs_scrub_block_set_corrupt(
 228         struct xfs_scrub_context        *sc,
 229         struct xfs_buf                  *bp)
 230 {
 231         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
 232         trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address);
 233 }
 234
 235 /* Record a corruption while cross-referencing. */
 236 void
 237 xfs_scrub_block_xref_set_corrupt(
 238         struct xfs_scrub_context        *sc,
 239         struct xfs_buf                  *bp)
 240 {
 241         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
 242         trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address);
 243 }
 244
 245 /*
 246  * Record a corrupt inode.  The trace data will include the block given
 247  * by bp if bp is given; otherwise it will use the block location of the
 248  * inode record itself.
 249  */
 250 void
 251 xfs_scrub_ino_set_corrupt(
 252         struct xfs_scrub_context        *sc,
 253         xfs_ino_t                       ino)
 254 {
 255         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
 256         trace_xfs_scrub_ino_error(sc, ino, __return_address);
 257 }
 258
 259 /* Record a corruption while cross-referencing with an inode. */
 260 void
 261 xfs_scrub_ino_xref_set_corrupt(
 262         struct xfs_scrub_context        *sc,
 263         xfs_ino_t                       ino)
 264 {
 265         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
 266         trace_xfs_scrub_ino_error(sc, ino, __return_address);
 267 }
 268
 269 /* Record corruption in a block indexed by a file fork. */
 270 void
 271 xfs_scrub_fblock_set_corrupt(
 272         struct xfs_scrub_context        *sc,
 273         int                             whichfork,
 274         xfs_fileoff_t                   offset)
 275 {
 276         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
 277         trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address);
 278 }
 279
 280 /* Record a corruption while cross-referencing a fork block. */
 281 void
 282 xfs_scrub_fblock_xref_set_corrupt(
 283         struct xfs_scrub_context        *sc,
 284         int                             whichfork,
 285         xfs_fileoff_t                   offset)
 286 {
 287         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
 288         trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address);
 289 }
 290
 291 /*
 292  * Warn about inodes that need administrative review but is not
 293  * incorrect.
 294  */
 295 void
 296 xfs_scrub_ino_set_warning(
 297         struct xfs_scrub_context        *sc,
 298         xfs_ino_t                       ino)
 299 {
 300         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
 301         trace_xfs_scrub_ino_warning(sc, ino, __return_address);
 302 }
 303
 304 /* Warn about a block indexed by a file fork that needs review. */
 305 void
 306 xfs_scrub_fblock_set_warning(
 307         struct xfs_scrub_context        *sc,
 308         int                             whichfork,
 309         xfs_fileoff_t                   offset)
 310 {
 311         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
 312         trace_xfs_scrub_fblock_warning(sc, whichfork, offset, __return_address);
 313 }
 314
 315 /* Signal an incomplete scrub. */
 316 void
 317 xfs_scrub_set_incomplete(
 318         struct xfs_scrub_context        *sc)
 319 {
 320         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
 321         trace_xfs_scrub_incomplete(sc, __return_address);
 322 }
 323
 324 /*
 325  * rmap scrubbing -- compute the number of blocks with a given owner,
 326  * at least according to the reverse mapping data.
 327  */
 328
 329 struct xfs_scrub_rmap_ownedby_info {
 330         struct xfs_owner_info   *oinfo;
 331         xfs_filblks_t           *blocks;
 332 };
 333
 334 STATIC int
 335 xfs_scrub_count_rmap_ownedby_irec(
 336         struct xfs_btree_cur                    *cur,
 337         struct xfs_rmap_irec                    *rec,
 338         void                                    *priv)
 339 {
 340         struct xfs_scrub_rmap_ownedby_info      *sroi = priv;
 341         bool                                    irec_attr;
 342         bool                                    oinfo_attr;
 343
 344         irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK;
 345         oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK;
 346
 347         if (rec->rm_owner != sroi->oinfo->oi_owner)
 348                 return 0;
 349
 350         if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr)
 351                 (*sroi->blocks) += rec->rm_blockcount;
 352
 353         return 0;
 354 }
 355
 356 /*
 357  * Calculate the number of blocks the rmap thinks are owned by something.
 358  * The caller should pass us an rmapbt cursor.
 359  */
 360 int
 361 xfs_scrub_count_rmap_ownedby_ag(
 362         struct xfs_scrub_context                *sc,
 363         struct xfs_btree_cur                    *cur,
 364         struct xfs_owner_info                   *oinfo,
 365         xfs_filblks_t                           *blocks)
 366 {
 367         struct xfs_scrub_rmap_ownedby_info      sroi;
 368
 369         sroi.oinfo = oinfo;
 370         *blocks = 0;
 371         sroi.blocks = blocks;
 372
 373         return xfs_rmap_query_all(cur, xfs_scrub_count_rmap_ownedby_irec,
 374                         &sroi);
 375 }
 376
 377 /*
 378  * AG scrubbing
 379  *
 380  * These helpers facilitate locking an allocation group's header
 381  * buffers, setting up cursors for all btrees that are present, and
 382  * cleaning everything up once we're through.
 383  */
 384
 385 /* Decide if we want to return an AG header read failure. */
 386 static inline bool
 387 want_ag_read_header_failure(
 388         struct xfs_scrub_context        *sc,
 389         unsigned int                    type)
 390 {
 391         /* Return all AG header read failures when scanning btrees. */
 392         if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF &&
 393             sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL &&
 394             sc->sm->sm_type != XFS_SCRUB_TYPE_AGI)
 395                 return true;
 396         /*
 397          * If we're scanning a given type of AG header, we only want to
 398          * see read failures from that specific header.  We'd like the
 399          * other headers to cross-check them, but this isn't required.
 400          */
 401         if (sc->sm->sm_type == type)
 402                 return true;
 403         return false;
 404 }
 405
 406 /*
 407  * Grab all the headers for an AG.
 408  *
 409  * The headers should be released by xfs_scrub_ag_free, but as a fail
 410  * safe we attach all the buffers we grab to the scrub transaction so
 411  * they'll all be freed when we cancel it.
 412  */
 413 int
 414 xfs_scrub_ag_read_headers(
 415         struct xfs_scrub_context        *sc,
 416         xfs_agnumber_t                  agno,
 417         struct xfs_buf                  **agi,
 418         struct xfs_buf                  **agf,
 419         struct xfs_buf                  **agfl)
 420 {
 421         struct xfs_mount                *mp = sc->mp;
 422         int                             error;
 423
 424         error = xfs_ialloc_read_agi(mp, sc->tp, agno, agi);
 425         if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
 426                 goto out;
 427
 428         error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, agf);
 429         if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
 430                 goto out;
 431
 432         error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl);
 433         if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
 434                 goto out;
 435         error = 0;
 436 out:
 437         return error;
 438 }
 439
 440 /* Release all the AG btree cursors. */
 441 void
 442 xfs_scrub_ag_btcur_free(
 443         struct xfs_scrub_ag             *sa)
 444 {
 445         if (sa->refc_cur)
 446                 xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
 447         if (sa->rmap_cur)
 448                 xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR);
 449         if (sa->fino_cur)
 450                 xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR);
 451         if (sa->ino_cur)
 452                 xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR);
 453         if (sa->cnt_cur)
 454                 xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR);
 455         if (sa->bno_cur)
 456                 xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR);
 457
 458         sa->refc_cur = NULL;
 459         sa->rmap_cur = NULL;
 460         sa->fino_cur = NULL;
 461         sa->ino_cur = NULL;
 462         sa->bno_cur = NULL;
 463         sa->cnt_cur = NULL;
 464 }
 465
 466 /* Initialize all the btree cursors for an AG. */
 467 int
 468 xfs_scrub_ag_btcur_init(
 469         struct xfs_scrub_context        *sc,
 470         struct xfs_scrub_ag             *sa)
 471 {
 472         struct xfs_mount                *mp = sc->mp;
 473         xfs_agnumber_t                  agno = sa->agno;
 474
 475         if (sa->agf_bp) {
 476                 /* Set up a bnobt cursor for cross-referencing. */
 477                 sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
 478                                 agno, XFS_BTNUM_BNO);
 479                 if (!sa->bno_cur)
 480                         goto err;
 481
 482                 /* Set up a cntbt cursor for cross-referencing. */
 483                 sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
 484                                 agno, XFS_BTNUM_CNT);
 485                 if (!sa->cnt_cur)
 486                         goto err;
 487         }
 488
 489         /* Set up a inobt cursor for cross-referencing. */
 490         if (sa->agi_bp) {
 491                 sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
 492                                         agno, XFS_BTNUM_INO);
 493                 if (!sa->ino_cur)
 494                         goto err;
 495         }
 496
 497         /* Set up a finobt cursor for cross-referencing. */
 498         if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb)) {
 499                 sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
 500                                 agno, XFS_BTNUM_FINO);
 501                 if (!sa->fino_cur)
 502                         goto err;
 503         }
 504
 505         /* Set up a rmapbt cursor for cross-referencing. */
 506         if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb)) {
 507                 sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
 508                                 agno);
 509                 if (!sa->rmap_cur)
 510                         goto err;
 511         }
 512
 513         /* Set up a refcountbt cursor for cross-referencing. */
 514         if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb)) {
 515                 sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
 516                                 sa->agf_bp, agno, NULL);
 517                 if (!sa->refc_cur)
 518                         goto err;
 519         }
 520
 521         return 0;
 522 err:
 523         return -ENOMEM;
 524 }
 525
 526 /* Release the AG header context and btree cursors. */
 527 void
 528 xfs_scrub_ag_free(
 529         struct xfs_scrub_context        *sc,
 530         struct xfs_scrub_ag             *sa)
 531 {
 532         xfs_scrub_ag_btcur_free(sa);
 533         if (sa->agfl_bp) {
 534                 xfs_trans_brelse(sc->tp, sa->agfl_bp);
 535                 sa->agfl_bp = NULL;
 536         }
 537         if (sa->agf_bp) {
 538                 xfs_trans_brelse(sc->tp, sa->agf_bp);
 539                 sa->agf_bp = NULL;
 540         }
 541         if (sa->agi_bp) {
 542                 xfs_trans_brelse(sc->tp, sa->agi_bp);
 543                 sa->agi_bp = NULL;
 544         }
 545         if (sa->pag) {
 546                 xfs_perag_put(sa->pag);
 547                 sa->pag = NULL;
 548         }
 549         sa->agno = NULLAGNUMBER;
 550 }
 551
 552 /*
 553  * For scrub, grab the AGI and the AGF headers, in that order.  Locking
 554  * order requires us to get the AGI before the AGF.  We use the
 555  * transaction to avoid deadlocking on crosslinked metadata buffers;
 556  * either the caller passes one in (bmap scrub) or we have to create a
 557  * transaction ourselves.
 558  */
 559 int
 560 xfs_scrub_ag_init(
 561         struct xfs_scrub_context        *sc,
 562         xfs_agnumber_t                  agno,
 563         struct xfs_scrub_ag             *sa)
 564 {
 565         int                             error;
 566
 567         sa->agno = agno;
 568         error = xfs_scrub_ag_read_headers(sc, agno, &sa->agi_bp,
 569                         &sa->agf_bp, &sa->agfl_bp);
 570         if (error)
 571                 return error;
 572
 573         return xfs_scrub_ag_btcur_init(sc, sa);
 574 }
 575
 576 /*
 577  * Grab the per-ag structure if we haven't already gotten it.  Teardown of the
 578  * xfs_scrub_ag will release it for us.
 579  */
 580 void
 581 xfs_scrub_perag_get(
 582         struct xfs_mount        *mp,
 583         struct xfs_scrub_ag     *sa)
 584 {
 585         if (!sa->pag)
 586                 sa->pag = xfs_perag_get(mp, sa->agno);
 587 }
 588
 589 /* Per-scrubber setup functions */
 590
 591 /*
 592  * Grab an empty transaction so that we can re-grab locked buffers if
 593  * one of our btrees turns out to be cyclic.
 594  *
 595  * If we're going to repair something, we need to ask for the largest possible
 596  * log reservation so that we can handle the worst case scenario for metadata
 597  * updates while rebuilding a metadata item.  We also need to reserve as many
 598  * blocks in the head transaction as we think we're going to need to rebuild
 599  * the metadata object.
 600  */
 601 int
 602 xfs_scrub_trans_alloc(
 603         struct xfs_scrub_context        *sc,
 604         uint                            resblks)
 605 {
 606         if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
 607                 return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
 608                                 resblks, 0, 0, &sc->tp);
 609
 610         return xfs_trans_alloc_empty(sc->mp, &sc->tp);
 611 }
 612
 613 /* Set us up with a transaction and an empty context. */
 614 int
 615 xfs_scrub_setup_fs(
 616         struct xfs_scrub_context        *sc,
 617         struct xfs_inode                *ip)
 618 {
 619         uint                            resblks;
 620
 621         resblks = xfs_repair_calc_ag_resblks(sc);
 622         return xfs_scrub_trans_alloc(sc, resblks);
 623 }
 624
 625 /* Set us up with AG headers and btree cursors. */
 626 int
 627 xfs_scrub_setup_ag_btree(
 628         struct xfs_scrub_context        *sc,
 629         struct xfs_inode                *ip,
 630         bool                            force_log)
 631 {
 632         struct xfs_mount                *mp = sc->mp;
 633         int                             error;
 634
 635         /*
 636          * If the caller asks us to checkpont the log, do so.  This
 637          * expensive operation should be performed infrequently and only
 638          * as a last resort.  Any caller that sets force_log should
 639          * document why they need to do so.
 640          */
 641         if (force_log) {
 642                 error = xfs_scrub_checkpoint_log(mp);
 643                 if (error)
 644                         return error;
 645         }
 646
 647         error = xfs_scrub_setup_fs(sc, ip);
 648         if (error)
 649                 return error;
 650
 651         return xfs_scrub_ag_init(sc, sc->sm->sm_agno, &sc->sa);
 652 }
 653
 654 /* Push everything out of the log onto disk. */
 655 int
 656 xfs_scrub_checkpoint_log(
 657         struct xfs_mount        *mp)
 658 {
 659         int                     error;
 660
 661         error = xfs_log_force(mp, XFS_LOG_SYNC);
 662         if (error)
 663                 return error;
 664         xfs_ail_push_all_sync(mp->m_ail);
 665         return 0;
 666 }
 667
 668 /*
 669  * Given an inode and the scrub control structure, grab either the
 670  * inode referenced in the control structure or the inode passed in.
 671  * The inode is not locked.
 672  */
 673 int
 674 xfs_scrub_get_inode(
 675         struct xfs_scrub_context        *sc,
 676         struct xfs_inode                *ip_in)
 677 {
 678         struct xfs_imap                 imap;
 679         struct xfs_mount                *mp = sc->mp;
 680         struct xfs_inode                *ip = NULL;
 681         int                             error;
 682
 683         /* We want to scan the inode we already had opened. */
 684         if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
 685                 sc->ip = ip_in;
 686                 return 0;
 687         }
 688
 689         /* Look up the inode, see if the generation number matches. */
 690         if (xfs_internal_inum(mp, sc->sm->sm_ino))
 691                 return -ENOENT;
 692         error = xfs_iget(mp, NULL, sc->sm->sm_ino,
 693                         XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip);
 694         switch (error) {
 695         case -ENOENT:
 696                 /* Inode doesn't exist, just bail out. */
 697                 return error;
 698         case 0:
 699                 /* Got an inode, continue. */
 700                 break;
 701         case -EINVAL:
 702                 /*
 703                  * -EINVAL with IGET_UNTRUSTED could mean one of several
 704                  * things: userspace gave us an inode number that doesn't
 705                  * correspond to fs space, or doesn't have an inobt entry;
 706                  * or it could simply mean that the inode buffer failed the
 707                  * read verifiers.
 708                  *
 709                  * Try just the inode mapping lookup -- if it succeeds, then
 710                  * the inode buffer verifier failed and something needs fixing.
 711                  * Otherwise, we really couldn't find it so tell userspace
 712                  * that it no longer exists.
 713                  */
 714                 error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap,
 715                                 XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE);
 716                 if (error)
 717                         return -ENOENT;
 718                 error = -EFSCORRUPTED;
 719                 /* fall through */
 720         default:
 721                 trace_xfs_scrub_op_error(sc,
 722                                 XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
 723                                 XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
 724                                 error, __return_address);
 725                 return error;
 726         }
 727         if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
 728                 iput(VFS_I(ip));
 729                 return -ENOENT;
 730         }
 731
 732         sc->ip = ip;
 733         return 0;
 734 }
 735
 736 /* Set us up to scrub a file's contents. */
 737 int
 738 xfs_scrub_setup_inode_contents(
 739         struct xfs_scrub_context        *sc,
 740         struct xfs_inode                *ip,
 741         unsigned int                    resblks)
 742 {
 743         int                             error;
 744
 745         error = xfs_scrub_get_inode(sc, ip);
 746         if (error)
 747                 return error;
 748
 749         /* Got the inode, lock it and we're ready to go. */
 750         sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
 751         xfs_ilock(sc->ip, sc->ilock_flags);
 752         error = xfs_scrub_trans_alloc(sc, resblks);
 753         if (error)
 754                 goto out;
 755         sc->ilock_flags |= XFS_ILOCK_EXCL;
 756         xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
 757
 758 out:
 759         /* scrub teardown will unlock and release the inode for us */
 760         return error;
 761 }
 762
 763 /*
 764  * Predicate that decides if we need to evaluate the cross-reference check.
 765  * If there was an error accessing the cross-reference btree, just delete
 766  * the cursor and skip the check.
 767  */
 768 bool
 769 xfs_scrub_should_check_xref(
 770         struct xfs_scrub_context        *sc,
 771         int                             *error,
 772         struct xfs_btree_cur            **curpp)
 773 {
 774         /* No point in xref if we already know we're corrupt. */
 775         if (xfs_scrub_skip_xref(sc->sm))
 776                 return false;
 777
 778         if (*error == 0)
 779                 return true;
 780
 781         if (curpp) {
 782                 /* If we've already given up on xref, just bail out. */
 783                 if (!*curpp)
 784                         return false;
 785
 786                 /* xref error, delete cursor and bail out. */
 787                 xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR);
 788                 *curpp = NULL;
 789         }
 790
 791         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
 792         trace_xfs_scrub_xref_error(sc, *error, __return_address);
 793
 794         /*
 795          * Errors encountered during cross-referencing with another
 796          * data structure should not cause this scrubber to abort.
 797          */
 798         *error = 0;
 799         return false;
 800 }
 801
 802 /* Run the structure verifiers on in-memory buffers to detect bad memory. */
 803 void
 804 xfs_scrub_buffer_recheck(
 805         struct xfs_scrub_context        *sc,
 806         struct xfs_buf                  *bp)
 807 {
 808         xfs_failaddr_t                  fa;
 809
 810         if (bp->b_ops == NULL) {
 811                 xfs_scrub_block_set_corrupt(sc, bp);
 812                 return;
 813         }
 814         if (bp->b_ops->verify_struct == NULL) {
 815                 xfs_scrub_set_incomplete(sc);
 816                 return;
 817         }
 818         fa = bp->b_ops->verify_struct(bp);
 819         if (!fa)
 820                 return;
 821         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
 822         trace_xfs_scrub_block_error(sc, bp->b_bn, fa);
 823 }
 824
 825 /*
 826  * Scrub the attr/data forks of a metadata inode.  The metadata inode must be
 827  * pointed to by sc->ip and the ILOCK must be held.
 828  */
 829 int
 830 xfs_scrub_metadata_inode_forks(
 831         struct xfs_scrub_context        *sc)
 832 {
 833         __u32                           smtype;
 834         bool                            shared;
 835         int                             error;
 836
 837         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 838                 return 0;
 839
 840         /* Metadata inodes don't live on the rt device. */
 841         if (sc->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
 842                 xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
 843                 return 0;
 844         }
 845
 846         /* They should never participate in reflink. */
 847         if (xfs_is_reflink_inode(sc->ip)) {
 848                 xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
 849                 return 0;
 850         }
 851
 852         /* They also should never have extended attributes. */
 853         if (xfs_inode_hasattr(sc->ip)) {
 854                 xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
 855                 return 0;
 856         }
 857
 858         /* Invoke the data fork scrubber. */
 859         smtype = sc->sm->sm_type;
 860         sc->sm->sm_type = XFS_SCRUB_TYPE_BMBTD;
 861         error = xfs_scrub_bmap_data(sc);
 862         sc->sm->sm_type = smtype;
 863         if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 864                 return error;
 865
 866         /* Look for incorrect shared blocks. */
 867         if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) {
 868                 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
 869                                 &shared);
 870                 if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0,
 871                                 &error))
 872                         return error;
 873                 if (shared)
 874                         xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
 875         }
 876
 877         return error;
 878 }
 879
 880 /*
 881  * Try to lock an inode in violation of the usual locking order rules.  For
 882  * example, trying to get the IOLOCK while in transaction context, or just
 883  * plain breaking AG-order or inode-order inode locking rules.  Either way,
 884  * the only way to avoid an ABBA deadlock is to use trylock and back off if
 885  * we can't.
 886  */
 887 int
 888 xfs_scrub_ilock_inverted(
 889         struct xfs_inode        *ip,
 890         uint                    lock_mode)
 891 {
 892         int                     i;
 893
 894         for (i = 0; i < 20; i++) {
 895                 if (xfs_ilock_nowait(ip, lock_mode))
 896                         return 0;
 897                 delay(1);
 898         }
 899         return -EDEADLOCK;
 900 }