1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2022-2023 Oracle. All Rights Reserved.
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
15 #include "xfs_dir2_priv.h"
16 #include "xfs_trace.h"
18 #include "xfs_trans.h"
19 #include "xfs_error.h"
20 #include "scrub/scrub.h"
21 #include "scrub/common.h"
22 #include "scrub/readdir.h"
24 /* Call a function for every entry in a shortform directory. */
29 xchk_dirent_fn dirent_fn,
32 struct xfs_name name = {
35 .type = XFS_DIR3_FT_DIR,
37 struct xfs_mount *mp = dp->i_mount;
38 struct xfs_da_geometry *geo = mp->m_dir_geo;
39 struct xfs_dir2_sf_entry *sfep;
40 struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data;
42 xfs_dir2_dataptr_t dapos;
46 ASSERT(dp->i_df.if_bytes == dp->i_disk_size);
50 dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
51 geo->data_entry_offset);
53 error = dirent_fn(sc, dp, dapos, &name, dp->i_ino, priv);
58 dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
59 geo->data_entry_offset +
60 xfs_dir2_data_entsize(mp, sizeof(".") - 1));
61 ino = xfs_dir2_sf_get_parent_ino(sfp);
65 error = dirent_fn(sc, dp, dapos, &name, ino, priv);
69 /* iterate everything else */
70 sfep = xfs_dir2_sf_firstentry(sfp);
71 for (i = 0; i < sfp->count; i++) {
72 dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
73 xfs_dir2_sf_get_offset(sfep));
74 ino = xfs_dir2_sf_get_ino(mp, sfp, sfep);
75 name.name = sfep->name;
76 name.len = sfep->namelen;
77 name.type = xfs_dir2_sf_get_ftype(mp, sfep);
79 error = dirent_fn(sc, dp, dapos, &name, ino, priv);
83 sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
89 /* Call a function for every entry in a block directory. */
94 xchk_dirent_fn dirent_fn,
97 struct xfs_mount *mp = dp->i_mount;
98 struct xfs_da_geometry *geo = mp->m_dir_geo;
100 unsigned int off, next_off, end;
103 error = xfs_dir3_block_read(sc->tp, dp, dp->i_ino, &bp);
107 /* Walk each directory entry. */
108 end = xfs_dir3_data_end_offset(geo, bp->b_addr);
109 for (off = geo->data_entry_offset; off < end; off = next_off) {
110 struct xfs_name name = { };
111 struct xfs_dir2_data_unused *dup = bp->b_addr + off;
112 struct xfs_dir2_data_entry *dep = bp->b_addr + off;
114 xfs_dir2_dataptr_t dapos;
116 /* Skip an empty entry. */
117 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
118 next_off = off + be16_to_cpu(dup->length);
122 /* Otherwise, find the next entry and report it. */
123 next_off = off + xfs_dir2_data_entsize(mp, dep->namelen);
127 dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, off);
128 ino = be64_to_cpu(dep->inumber);
129 name.name = dep->name;
130 name.len = dep->namelen;
131 name.type = xfs_dir2_data_get_ftype(mp, dep);
133 error = dirent_fn(sc, dp, dapos, &name, ino, priv);
138 xfs_trans_brelse(sc->tp, bp);
142 /* Read a leaf-format directory buffer. */
144 xchk_read_leaf_dir_buf(
145 struct xfs_trans *tp,
146 struct xfs_inode *dp,
147 struct xfs_da_geometry *geo,
148 xfs_dir2_off_t *curoff,
149 struct xfs_buf **bpp)
151 struct xfs_iext_cursor icur;
152 struct xfs_bmbt_irec map;
153 struct xfs_ifork *ifp = xfs_ifork_ptr(dp, XFS_DATA_FORK);
156 xfs_dir2_off_t new_off;
161 * Look for mapped directory blocks at or above the current offset.
162 * Truncate down to the nearest directory block to start the scanning
165 last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET);
166 map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *curoff));
168 if (!xfs_iext_lookup_extent(dp, ifp, map_off, &icur, &map))
170 if (map.br_startoff >= last_da)
172 xfs_trim_extent(&map, map_off, last_da - map_off);
174 /* Read the directory block of that first mapping. */
175 new_off = xfs_dir2_da_to_byte(geo, map.br_startoff);
176 if (new_off > *curoff)
179 return xfs_dir3_data_read(tp, dp, dp->i_ino, map.br_startoff, 0, bpp);
182 /* Call a function for every entry in a leaf directory. */
185 struct xfs_scrub *sc,
186 struct xfs_inode *dp,
187 xchk_dirent_fn dirent_fn,
190 struct xfs_mount *mp = dp->i_mount;
191 struct xfs_da_geometry *geo = mp->m_dir_geo;
192 struct xfs_buf *bp = NULL;
193 xfs_dir2_off_t curoff = 0;
194 unsigned int offset = 0;
197 /* Iterate every directory offset in this directory. */
198 while (curoff < XFS_DIR2_LEAF_OFFSET) {
199 struct xfs_name name = { };
200 struct xfs_dir2_data_unused *dup;
201 struct xfs_dir2_data_entry *dep;
204 xfs_dir2_dataptr_t dapos;
207 * If we have no buffer, or we're off the end of the
208 * current buffer, need to get another one.
210 if (!bp || offset >= geo->blksize) {
212 xfs_trans_brelse(sc->tp, bp);
216 error = xchk_read_leaf_dir_buf(sc->tp, dp, geo, &curoff,
222 * Find our position in the block.
224 offset = geo->data_entry_offset;
225 curoff += geo->data_entry_offset;
228 /* Skip an empty entry. */
229 dup = bp->b_addr + offset;
230 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
231 length = be16_to_cpu(dup->length);
237 /* Otherwise, find the next entry and report it. */
238 dep = bp->b_addr + offset;
239 length = xfs_dir2_data_entsize(mp, dep->namelen);
241 dapos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
242 ino = be64_to_cpu(dep->inumber);
243 name.name = dep->name;
244 name.len = dep->namelen;
245 name.type = xfs_dir2_data_get_ftype(mp, dep);
247 error = dirent_fn(sc, dp, dapos, &name, ino, priv);
251 /* Advance to the next entry. */
257 xfs_trans_brelse(sc->tp, bp);
262 * Call a function for every entry in a directory.
264 * Callers must hold the ILOCK. File types are XFS_DIR3_FT_*.
268 struct xfs_scrub *sc,
269 struct xfs_inode *dp,
270 xchk_dirent_fn dirent_fn,
273 struct xfs_da_args args = {
275 .geo = dp->i_mount->m_dir_geo,
281 if (xfs_is_shutdown(dp->i_mount))
284 ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
285 xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
287 switch (xfs_dir2_format(&args, &error)) {
288 case XFS_DIR2_FMT_SF:
289 return xchk_dir_walk_sf(sc, dp, dirent_fn, priv);
290 case XFS_DIR2_FMT_BLOCK:
291 return xchk_dir_walk_block(sc, dp, dirent_fn, priv);
292 case XFS_DIR2_FMT_LEAF:
293 case XFS_DIR2_FMT_NODE:
294 return xchk_dir_walk_leaf(sc, dp, dirent_fn, priv);
301 * Look up the inode number for an exact name in a directory.
303 * Callers must hold the ILOCK. File types are XFS_DIR3_FT_*. Names are not
304 * checked for correctness.
308 struct xfs_scrub *sc,
309 struct xfs_inode *dp,
310 const struct xfs_name *name,
313 struct xfs_da_args args = {
315 .geo = dp->i_mount->m_dir_geo,
318 .namelen = name->len,
319 .filetype = name->type,
320 .hashval = xfs_dir2_hashname(dp->i_mount, name),
321 .whichfork = XFS_DATA_FORK,
322 .op_flags = XFS_DA_OP_OKNOENT,
327 if (xfs_is_shutdown(dp->i_mount))
331 * A temporary directory's block headers are written with the owner
332 * set to sc->ip, so we must switch the owner here for the lookup.
334 if (dp == sc->tempip)
335 args.owner = sc->ip->i_ino;
337 ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
338 xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
340 error = xfs_dir_lookup_args(&args);
347 * Try to grab the IOLOCK and ILOCK of sc->ip and ip, returning @ip's lock
348 * state. The caller may have a transaction, so we must use trylock for both
351 static inline unsigned int
352 xchk_dir_trylock_both(
353 struct xfs_scrub *sc,
354 struct xfs_inode *ip)
356 if (!xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
359 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
362 xchk_ilock(sc, XFS_ILOCK_EXCL);
363 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
366 return XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL;
369 xchk_iunlock(sc, XFS_ILOCK_EXCL);
370 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
372 xchk_iunlock(sc, XFS_IOLOCK_EXCL);
377 * Try for a limited time to grab the IOLOCK and ILOCK of both the scrub target
378 * (@sc->ip) and the inode at the other end (@ip) of a directory or parent
379 * pointer link so that we can check that link.
381 * We do not know ahead of time that the directory tree is /not/ corrupt, so we
382 * cannot use the "lock two inode" functions because we do not know that there
383 * is not a racing thread trying to take the locks in opposite order. First
384 * take IOLOCK_EXCL of the scrub target, and then try to take IOLOCK_SHARED
385 * of @ip to synchronize with the VFS. Next, take ILOCK_EXCL of the scrub
386 * target and @ip to synchronize with XFS.
388 * If the trylocks succeed, *lockmode will be set to the locks held for @ip;
389 * @sc->ilock_flags will be set for the locks held for @sc->ip; and zero will
390 * be returned. If not, returns -EDEADLOCK to try again; or -ETIMEDOUT if
391 * XCHK_TRY_HARDER was set. Returns -EINTR if the process has been killed.
394 xchk_dir_trylock_for_pptrs(
395 struct xfs_scrub *sc,
396 struct xfs_inode *ip,
397 unsigned int *lockmode)
402 ASSERT(sc->ilock_flags == 0);
404 for (nr = 0; nr < HZ; nr++) {
405 *lockmode = xchk_dir_trylock_both(sc, ip);
409 if (xchk_should_terminate(sc, &error))
415 if (sc->flags & XCHK_TRY_HARDER) {
416 xchk_set_incomplete(sc);