fs/xfs/scrub/readdir.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * Copyright (C) 2022-2023 Oracle.  All Rights Reserved.
   4  * Author: Darrick J. Wong <[email protected]>
   5  */
   6 #include "xfs.h"
   7 #include "xfs_fs.h"
   8 #include "xfs_shared.h"
   9 #include "xfs_format.h"
  10 #include "xfs_log_format.h"
  11 #include "xfs_trans_resv.h"
  12 #include "xfs_mount.h"
  13 #include "xfs_inode.h"
  14 #include "xfs_dir2.h"
  15 #include "xfs_dir2_priv.h"
  16 #include "xfs_trace.h"
  17 #include "xfs_bmap.h"
  18 #include "xfs_trans.h"
  19 #include "xfs_error.h"
  20 #include "scrub/scrub.h"
  21 #include "scrub/common.h"
  22 #include "scrub/readdir.h"
  23
  24 /* Call a function for every entry in a shortform directory. */
  25 STATIC int
  26 xchk_dir_walk_sf(
  27         struct xfs_scrub        *sc,
  28         struct xfs_inode        *dp,
  29         xchk_dirent_fn          dirent_fn,
  30         void                    *priv)
  31 {
  32         struct xfs_name         name = {
  33                 .name           = ".",
  34                 .len            = 1,
  35                 .type           = XFS_DIR3_FT_DIR,
  36         };
  37         struct xfs_mount        *mp = dp->i_mount;
  38         struct xfs_da_geometry  *geo = mp->m_dir_geo;
  39         struct xfs_dir2_sf_entry *sfep;
  40         struct xfs_dir2_sf_hdr  *sfp = dp->i_df.if_data;
  41         xfs_ino_t               ino;
  42         xfs_dir2_dataptr_t      dapos;
  43         unsigned int            i;
  44         int                     error;
  45
  46         ASSERT(dp->i_df.if_bytes == dp->i_disk_size);
  47         ASSERT(sfp != NULL);
  48
  49         /* dot entry */
  50         dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
  51                         geo->data_entry_offset);
  52
  53         error = dirent_fn(sc, dp, dapos, &name, dp->i_ino, priv);
  54         if (error)
  55                 return error;
  56
  57         /* dotdot entry */
  58         dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
  59                         geo->data_entry_offset +
  60                         xfs_dir2_data_entsize(mp, sizeof(".") - 1));
  61         ino = xfs_dir2_sf_get_parent_ino(sfp);
  62         name.name = "..";
  63         name.len = 2;
  64
  65         error = dirent_fn(sc, dp, dapos, &name, ino, priv);
  66         if (error)
  67                 return error;
  68
  69         /* iterate everything else */
  70         sfep = xfs_dir2_sf_firstentry(sfp);
  71         for (i = 0; i < sfp->count; i++) {
  72                 dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
  73                                 xfs_dir2_sf_get_offset(sfep));
  74                 ino = xfs_dir2_sf_get_ino(mp, sfp, sfep);
  75                 name.name = sfep->name;
  76                 name.len = sfep->namelen;
  77                 name.type = xfs_dir2_sf_get_ftype(mp, sfep);
  78
  79                 error = dirent_fn(sc, dp, dapos, &name, ino, priv);
  80                 if (error)
  81                         return error;
  82
  83                 sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
  84         }
  85
  86         return 0;
  87 }
  88
  89 /* Call a function for every entry in a block directory. */
  90 STATIC int
  91 xchk_dir_walk_block(
  92         struct xfs_scrub        *sc,
  93         struct xfs_inode        *dp,
  94         xchk_dirent_fn          dirent_fn,
  95         void                    *priv)
  96 {
  97         struct xfs_mount        *mp = dp->i_mount;
  98         struct xfs_da_geometry  *geo = mp->m_dir_geo;
  99         struct xfs_buf          *bp;
 100         unsigned int            off, next_off, end;
 101         int                     error;
 102
 103         error = xfs_dir3_block_read(sc->tp, dp, dp->i_ino, &bp);
 104         if (error)
 105                 return error;
 106
 107         /* Walk each directory entry. */
 108         end = xfs_dir3_data_end_offset(geo, bp->b_addr);
 109         for (off = geo->data_entry_offset; off < end; off = next_off) {
 110                 struct xfs_name                 name = { };
 111                 struct xfs_dir2_data_unused     *dup = bp->b_addr + off;
 112                 struct xfs_dir2_data_entry      *dep = bp->b_addr + off;
 113                 xfs_ino_t                       ino;
 114                 xfs_dir2_dataptr_t              dapos;
 115
 116                 /* Skip an empty entry. */
 117                 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
 118                         next_off = off + be16_to_cpu(dup->length);
 119                         continue;
 120                 }
 121
 122                 /* Otherwise, find the next entry and report it. */
 123                 next_off = off + xfs_dir2_data_entsize(mp, dep->namelen);
 124                 if (next_off > end)
 125                         break;
 126
 127                 dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, off);
 128                 ino = be64_to_cpu(dep->inumber);
 129                 name.name = dep->name;
 130                 name.len = dep->namelen;
 131                 name.type = xfs_dir2_data_get_ftype(mp, dep);
 132
 133                 error = dirent_fn(sc, dp, dapos, &name, ino, priv);
 134                 if (error)
 135                         break;
 136         }
 137
 138         xfs_trans_brelse(sc->tp, bp);
 139         return error;
 140 }
 141
 142 /* Read a leaf-format directory buffer. */
 143 STATIC int
 144 xchk_read_leaf_dir_buf(
 145         struct xfs_trans        *tp,
 146         struct xfs_inode        *dp,
 147         struct xfs_da_geometry  *geo,
 148         xfs_dir2_off_t          *curoff,
 149         struct xfs_buf          **bpp)
 150 {
 151         struct xfs_iext_cursor  icur;
 152         struct xfs_bmbt_irec    map;
 153         struct xfs_ifork        *ifp = xfs_ifork_ptr(dp, XFS_DATA_FORK);
 154         xfs_dablk_t             last_da;
 155         xfs_dablk_t             map_off;
 156         xfs_dir2_off_t          new_off;
 157
 158         *bpp = NULL;
 159
 160         /*
 161          * Look for mapped directory blocks at or above the current offset.
 162          * Truncate down to the nearest directory block to start the scanning
 163          * operation.
 164          */
 165         last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET);
 166         map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *curoff));
 167
 168         if (!xfs_iext_lookup_extent(dp, ifp, map_off, &icur, &map))
 169                 return 0;
 170         if (map.br_startoff >= last_da)
 171                 return 0;
 172         xfs_trim_extent(&map, map_off, last_da - map_off);
 173
 174         /* Read the directory block of that first mapping. */
 175         new_off = xfs_dir2_da_to_byte(geo, map.br_startoff);
 176         if (new_off > *curoff)
 177                 *curoff = new_off;
 178
 179         return xfs_dir3_data_read(tp, dp, dp->i_ino, map.br_startoff, 0, bpp);
 180 }
 181
 182 /* Call a function for every entry in a leaf directory. */
 183 STATIC int
 184 xchk_dir_walk_leaf(
 185         struct xfs_scrub        *sc,
 186         struct xfs_inode        *dp,
 187         xchk_dirent_fn          dirent_fn,
 188         void                    *priv)
 189 {
 190         struct xfs_mount        *mp = dp->i_mount;
 191         struct xfs_da_geometry  *geo = mp->m_dir_geo;
 192         struct xfs_buf          *bp = NULL;
 193         xfs_dir2_off_t          curoff = 0;
 194         unsigned int            offset = 0;
 195         int                     error;
 196
 197         /* Iterate every directory offset in this directory. */
 198         while (curoff < XFS_DIR2_LEAF_OFFSET) {
 199                 struct xfs_name                 name = { };
 200                 struct xfs_dir2_data_unused     *dup;
 201                 struct xfs_dir2_data_entry      *dep;
 202                 xfs_ino_t                       ino;
 203                 unsigned int                    length;
 204                 xfs_dir2_dataptr_t              dapos;
 205
 206                 /*
 207                  * If we have no buffer, or we're off the end of the
 208                  * current buffer, need to get another one.
 209                  */
 210                 if (!bp || offset >= geo->blksize) {
 211                         if (bp) {
 212                                 xfs_trans_brelse(sc->tp, bp);
 213                                 bp = NULL;
 214                         }
 215
 216                         error = xchk_read_leaf_dir_buf(sc->tp, dp, geo, &curoff,
 217                                         &bp);
 218                         if (error || !bp)
 219                                 break;
 220
 221                         /*
 222                          * Find our position in the block.
 223                          */
 224                         offset = geo->data_entry_offset;
 225                         curoff += geo->data_entry_offset;
 226                 }
 227
 228                 /* Skip an empty entry. */
 229                 dup = bp->b_addr + offset;
 230                 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
 231                         length = be16_to_cpu(dup->length);
 232                         offset += length;
 233                         curoff += length;
 234                         continue;
 235                 }
 236
 237                 /* Otherwise, find the next entry and report it. */
 238                 dep = bp->b_addr + offset;
 239                 length = xfs_dir2_data_entsize(mp, dep->namelen);
 240
 241                 dapos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
 242                 ino = be64_to_cpu(dep->inumber);
 243                 name.name = dep->name;
 244                 name.len = dep->namelen;
 245                 name.type = xfs_dir2_data_get_ftype(mp, dep);
 246
 247                 error = dirent_fn(sc, dp, dapos, &name, ino, priv);
 248                 if (error)
 249                         break;
 250
 251                 /* Advance to the next entry. */
 252                 offset += length;
 253                 curoff += length;
 254         }
 255
 256         if (bp)
 257                 xfs_trans_brelse(sc->tp, bp);
 258         return error;
 259 }
 260
 261 /*
 262  * Call a function for every entry in a directory.
 263  *
 264  * Callers must hold the ILOCK.  File types are XFS_DIR3_FT_*.
 265  */
 266 int
 267 xchk_dir_walk(
 268         struct xfs_scrub        *sc,
 269         struct xfs_inode        *dp,
 270         xchk_dirent_fn          dirent_fn,
 271         void                    *priv)
 272 {
 273         struct xfs_da_args      args = {
 274                 .dp             = dp,
 275                 .geo            = dp->i_mount->m_dir_geo,
 276                 .trans          = sc->tp,
 277                 .owner          = dp->i_ino,
 278         };
 279         int                     error;
 280
 281         if (xfs_is_shutdown(dp->i_mount))
 282                 return -EIO;
 283
 284         ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
 285         xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
 286
 287         switch (xfs_dir2_format(&args, &error)) {
 288         case XFS_DIR2_FMT_SF:
 289                 return xchk_dir_walk_sf(sc, dp, dirent_fn, priv);
 290         case XFS_DIR2_FMT_BLOCK:
 291                 return xchk_dir_walk_block(sc, dp, dirent_fn, priv);
 292         case XFS_DIR2_FMT_LEAF:
 293         case XFS_DIR2_FMT_NODE:
 294                 return xchk_dir_walk_leaf(sc, dp, dirent_fn, priv);
 295         default:
 296                 return error;
 297         }
 298 }
 299
 300 /*
 301  * Look up the inode number for an exact name in a directory.
 302  *
 303  * Callers must hold the ILOCK.  File types are XFS_DIR3_FT_*.  Names are not
 304  * checked for correctness.
 305  */
 306 int
 307 xchk_dir_lookup(
 308         struct xfs_scrub        *sc,
 309         struct xfs_inode        *dp,
 310         const struct xfs_name   *name,
 311         xfs_ino_t               *ino)
 312 {
 313         struct xfs_da_args      args = {
 314                 .dp             = dp,
 315                 .geo            = dp->i_mount->m_dir_geo,
 316                 .trans          = sc->tp,
 317                 .name           = name->name,
 318                 .namelen        = name->len,
 319                 .filetype       = name->type,
 320                 .hashval        = xfs_dir2_hashname(dp->i_mount, name),
 321                 .whichfork      = XFS_DATA_FORK,
 322                 .op_flags       = XFS_DA_OP_OKNOENT,
 323                 .owner          = dp->i_ino,
 324         };
 325         int                     error;
 326
 327         if (xfs_is_shutdown(dp->i_mount))
 328                 return -EIO;
 329
 330         /*
 331          * A temporary directory's block headers are written with the owner
 332          * set to sc->ip, so we must switch the owner here for the lookup.
 333          */
 334         if (dp == sc->tempip)
 335                 args.owner = sc->ip->i_ino;
 336
 337         ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
 338         xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
 339
 340         error = xfs_dir_lookup_args(&args);
 341         if (!error)
 342                 *ino = args.inumber;
 343         return error;
 344 }
 345
 346 /*
 347  * Try to grab the IOLOCK and ILOCK of sc->ip and ip, returning @ip's lock
 348  * state.  The caller may have a transaction, so we must use trylock for both
 349  * IOLOCKs.
 350  */
 351 static inline unsigned int
 352 xchk_dir_trylock_both(
 353         struct xfs_scrub        *sc,
 354         struct xfs_inode        *ip)
 355 {
 356         if (!xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
 357                 return 0;
 358
 359         if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
 360                 goto parent_iolock;
 361
 362         xchk_ilock(sc, XFS_ILOCK_EXCL);
 363         if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
 364                 goto parent_ilock;
 365
 366         return XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL;
 367
 368 parent_ilock:
 369         xchk_iunlock(sc, XFS_ILOCK_EXCL);
 370         xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 371 parent_iolock:
 372         xchk_iunlock(sc, XFS_IOLOCK_EXCL);
 373         return 0;
 374 }
 375
 376 /*
 377  * Try for a limited time to grab the IOLOCK and ILOCK of both the scrub target
 378  * (@sc->ip) and the inode at the other end (@ip) of a directory or parent
 379  * pointer link so that we can check that link.
 380  *
 381  * We do not know ahead of time that the directory tree is /not/ corrupt, so we
 382  * cannot use the "lock two inode" functions because we do not know that there
 383  * is not a racing thread trying to take the locks in opposite order.  First
 384  * take IOLOCK_EXCL of the scrub target, and then try to take IOLOCK_SHARED
 385  * of @ip to synchronize with the VFS.  Next, take ILOCK_EXCL of the scrub
 386  * target and @ip to synchronize with XFS.
 387  *
 388  * If the trylocks succeed, *lockmode will be set to the locks held for @ip;
 389  * @sc->ilock_flags will be set for the locks held for @sc->ip; and zero will
 390  * be returned.  If not, returns -EDEADLOCK to try again; or -ETIMEDOUT if
 391  * XCHK_TRY_HARDER was set.  Returns -EINTR if the process has been killed.
 392  */
 393 int
 394 xchk_dir_trylock_for_pptrs(
 395         struct xfs_scrub        *sc,
 396         struct xfs_inode        *ip,
 397         unsigned int            *lockmode)
 398 {
 399         unsigned int            nr;
 400         int                     error = 0;
 401
 402         ASSERT(sc->ilock_flags == 0);
 403
 404         for (nr = 0; nr < HZ; nr++) {
 405                 *lockmode = xchk_dir_trylock_both(sc, ip);
 406                 if (*lockmode)
 407                         return 0;
 408
 409                 if (xchk_should_terminate(sc, &error))
 410                         return error;
 411
 412                 delay(1);
 413         }
 414
 415         if (sc->flags & XCHK_TRY_HARDER) {
 416                 xchk_set_incomplete(sc);
 417                 return -ETIMEDOUT;
 418         }
 419
 420         return -EDEADLOCK;
 421 }