]>
Commit | Line | Data |
---|---|---|
0b61f8a4 | 1 | // SPDX-License-Identifier: GPL-2.0+ |
0f28b257 DW |
2 | /* |
3 | * Copyright (C) 2017 Oracle. All Rights Reserved. | |
0f28b257 | 4 | * Author: Darrick J. Wong <[email protected]> |
0f28b257 DW |
5 | */ |
6 | #include "xfs.h" | |
7 | #include "xfs_fs.h" | |
8 | #include "xfs_shared.h" | |
9 | #include "xfs_format.h" | |
10 | #include "xfs_trans_resv.h" | |
11 | #include "xfs_mount.h" | |
0f28b257 | 12 | #include "xfs_log_format.h" |
0f28b257 DW |
13 | #include "xfs_inode.h" |
14 | #include "xfs_icache.h" | |
15 | #include "xfs_dir2.h" | |
16 | #include "xfs_dir2_priv.h" | |
0f28b257 DW |
17 | #include "scrub/scrub.h" |
18 | #include "scrub/common.h" | |
0f28b257 DW |
19 | |
20 | /* Set us up to scrub parents. */ | |
21 | int | |
c517b3aa | 22 | xchk_setup_parent( |
1d8a748a | 23 | struct xfs_scrub *sc, |
032d91f9 | 24 | struct xfs_inode *ip) |
0f28b257 | 25 | { |
c517b3aa | 26 | return xchk_setup_inode_contents(sc, ip, 0); |
0f28b257 DW |
27 | } |
28 | ||
29 | /* Parent pointers */ | |
30 | ||
31 | /* Look for an entry in a parent pointing to this inode. */ | |
32 | ||
c517b3aa | 33 | struct xchk_parent_ctx { |
032d91f9 | 34 | struct dir_context dc; |
8feb4732 | 35 | struct xfs_scrub *sc; |
032d91f9 DW |
36 | xfs_ino_t ino; |
37 | xfs_nlink_t nlink; | |
8feb4732 | 38 | bool cancelled; |
0f28b257 DW |
39 | }; |
40 | ||
41 | /* Look for a single entry in a directory pointing to an inode. */ | |
42 | STATIC int | |
c517b3aa | 43 | xchk_parent_actor( |
032d91f9 DW |
44 | struct dir_context *dc, |
45 | const char *name, | |
46 | int namelen, | |
47 | loff_t pos, | |
48 | u64 ino, | |
49 | unsigned type) | |
0f28b257 | 50 | { |
032d91f9 | 51 | struct xchk_parent_ctx *spc; |
8feb4732 | 52 | int error = 0; |
0f28b257 | 53 | |
c517b3aa | 54 | spc = container_of(dc, struct xchk_parent_ctx, dc); |
0f28b257 DW |
55 | if (spc->ino == ino) |
56 | spc->nlink++; | |
8feb4732 DW |
57 | |
58 | /* | |
59 | * If we're facing a fatal signal, bail out. Store the cancellation | |
60 | * status separately because the VFS readdir code squashes error codes | |
61 | * into short directory reads. | |
62 | */ | |
63 | if (xchk_should_terminate(spc->sc, &error)) | |
64 | spc->cancelled = true; | |
65 | ||
66 | return error; | |
0f28b257 DW |
67 | } |
68 | ||
69 | /* Count the number of dentries in the parent dir that point to this inode. */ | |
70 | STATIC int | |
c517b3aa | 71 | xchk_parent_count_parent_dentries( |
1d8a748a | 72 | struct xfs_scrub *sc, |
032d91f9 DW |
73 | struct xfs_inode *parent, |
74 | xfs_nlink_t *nlink) | |
0f28b257 | 75 | { |
032d91f9 | 76 | struct xchk_parent_ctx spc = { |
8feb4732 DW |
77 | .dc.actor = xchk_parent_actor, |
78 | .ino = sc->ip->i_ino, | |
79 | .sc = sc, | |
0f28b257 | 80 | }; |
032d91f9 DW |
81 | size_t bufsize; |
82 | loff_t oldpos; | |
83 | uint lock_mode; | |
84 | int error = 0; | |
0f28b257 DW |
85 | |
86 | /* | |
87 | * If there are any blocks, read-ahead block 0 as we're almost | |
88 | * certain to have the next operation be a read there. This is | |
89 | * how we guarantee that the parent's extent map has been loaded, | |
90 | * if there is one. | |
91 | */ | |
92 | lock_mode = xfs_ilock_data_map_shared(parent); | |
daf83964 | 93 | if (parent->i_df.if_nextents > 0) |
06566fda | 94 | error = xfs_dir3_data_readahead(parent, 0, 0); |
0f28b257 DW |
95 | xfs_iunlock(parent, lock_mode); |
96 | if (error) | |
97 | return error; | |
98 | ||
99 | /* | |
100 | * Iterate the parent dir to confirm that there is | |
101 | * exactly one entry pointing back to the inode being | |
102 | * scanned. | |
103 | */ | |
104 | bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, | |
105 | parent->i_d.di_size); | |
106 | oldpos = 0; | |
107 | while (true) { | |
108 | error = xfs_readdir(sc->tp, parent, &spc.dc, bufsize); | |
109 | if (error) | |
110 | goto out; | |
8feb4732 DW |
111 | if (spc.cancelled) { |
112 | error = -EAGAIN; | |
113 | goto out; | |
114 | } | |
0f28b257 DW |
115 | if (oldpos == spc.dc.pos) |
116 | break; | |
117 | oldpos = spc.dc.pos; | |
118 | } | |
119 | *nlink = spc.nlink; | |
120 | out: | |
121 | return error; | |
122 | } | |
123 | ||
124 | /* | |
125 | * Given the inode number of the alleged parent of the inode being | |
126 | * scrubbed, try to validate that the parent has exactly one directory | |
127 | * entry pointing back to the inode being scrubbed. | |
128 | */ | |
129 | STATIC int | |
c517b3aa | 130 | xchk_parent_validate( |
1d8a748a | 131 | struct xfs_scrub *sc, |
032d91f9 DW |
132 | xfs_ino_t dnum, |
133 | bool *try_again) | |
0f28b257 | 134 | { |
032d91f9 DW |
135 | struct xfs_mount *mp = sc->mp; |
136 | struct xfs_inode *dp = NULL; | |
137 | xfs_nlink_t expected_nlink; | |
138 | xfs_nlink_t nlink; | |
139 | int error = 0; | |
0f28b257 DW |
140 | |
141 | *try_again = false; | |
142 | ||
8bc763c2 DW |
143 | if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
144 | goto out; | |
145 | ||
0f28b257 DW |
146 | /* '..' must not point to ourselves. */ |
147 | if (sc->ip->i_ino == dnum) { | |
c517b3aa | 148 | xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); |
0f28b257 DW |
149 | goto out; |
150 | } | |
151 | ||
152 | /* | |
153 | * If we're an unlinked directory, the parent /won't/ have a link | |
154 | * to us. Otherwise, it should have one link. | |
155 | */ | |
156 | expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; | |
157 | ||
158 | /* | |
159 | * Grab this parent inode. We release the inode before we | |
160 | * cancel the scrub transaction. Since we're don't know a | |
161 | * priori that releasing the inode won't trigger eofblocks | |
162 | * cleanup (which allocates what would be a nested transaction) | |
163 | * if the parent pointer erroneously points to a file, we | |
164 | * can't use DONTCACHE here because DONTCACHE inodes can trigger | |
165 | * immediate inactive cleanup of the inode. | |
5927268f DW |
166 | * |
167 | * If _iget returns -EINVAL then the parent inode number is garbage | |
168 | * and the directory is corrupt. If the _iget returns -EFSCORRUPTED | |
169 | * or -EFSBADCRC then the parent is corrupt which is a cross | |
170 | * referencing error. Any other error is an operational error. | |
0f28b257 | 171 | */ |
5927268f DW |
172 | error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp); |
173 | if (error == -EINVAL) { | |
174 | error = -EFSCORRUPTED; | |
c517b3aa | 175 | xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); |
5927268f DW |
176 | goto out; |
177 | } | |
c517b3aa | 178 | if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) |
0f28b257 | 179 | goto out; |
46c59736 | 180 | if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) { |
c517b3aa | 181 | xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); |
0f28b257 DW |
182 | goto out_rele; |
183 | } | |
184 | ||
185 | /* | |
186 | * We prefer to keep the inode locked while we lock and search | |
187 | * its alleged parent for a forward reference. If we can grab | |
188 | * the iolock, validate the pointers and we're done. We must | |
189 | * use nowait here to avoid an ABBA deadlock on the parent and | |
190 | * the child inodes. | |
191 | */ | |
192 | if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) { | |
c517b3aa DW |
193 | error = xchk_parent_count_parent_dentries(sc, dp, &nlink); |
194 | if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, | |
0f28b257 DW |
195 | &error)) |
196 | goto out_unlock; | |
197 | if (nlink != expected_nlink) | |
c517b3aa | 198 | xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); |
0f28b257 DW |
199 | goto out_unlock; |
200 | } | |
201 | ||
202 | /* | |
203 | * The game changes if we get here. We failed to lock the parent, | |
204 | * so we're going to try to verify both pointers while only holding | |
205 | * one lock so as to avoid deadlocking with something that's actually | |
206 | * trying to traverse down the directory tree. | |
207 | */ | |
208 | xfs_iunlock(sc->ip, sc->ilock_flags); | |
209 | sc->ilock_flags = 0; | |
c517b3aa | 210 | error = xchk_ilock_inverted(dp, XFS_IOLOCK_SHARED); |
ddd10c2f DW |
211 | if (error) |
212 | goto out_rele; | |
0f28b257 DW |
213 | |
214 | /* Go looking for our dentry. */ | |
c517b3aa DW |
215 | error = xchk_parent_count_parent_dentries(sc, dp, &nlink); |
216 | if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) | |
0f28b257 DW |
217 | goto out_unlock; |
218 | ||
219 | /* Drop the parent lock, relock this inode. */ | |
220 | xfs_iunlock(dp, XFS_IOLOCK_SHARED); | |
c517b3aa | 221 | error = xchk_ilock_inverted(sc->ip, XFS_IOLOCK_EXCL); |
ddd10c2f DW |
222 | if (error) |
223 | goto out_rele; | |
0f28b257 | 224 | sc->ilock_flags = XFS_IOLOCK_EXCL; |
0f28b257 DW |
225 | |
226 | /* | |
227 | * If we're an unlinked directory, the parent /won't/ have a link | |
228 | * to us. Otherwise, it should have one link. We have to re-set | |
229 | * it here because we dropped the lock on sc->ip. | |
230 | */ | |
231 | expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; | |
232 | ||
233 | /* Look up '..' to see if the inode changed. */ | |
234 | error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL); | |
c517b3aa | 235 | if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) |
0f28b257 DW |
236 | goto out_rele; |
237 | ||
238 | /* Drat, parent changed. Try again! */ | |
239 | if (dnum != dp->i_ino) { | |
44a8736b | 240 | xfs_irele(dp); |
0f28b257 DW |
241 | *try_again = true; |
242 | return 0; | |
243 | } | |
44a8736b | 244 | xfs_irele(dp); |
0f28b257 DW |
245 | |
246 | /* | |
247 | * '..' didn't change, so check that there was only one entry | |
248 | * for us in the parent. | |
249 | */ | |
250 | if (nlink != expected_nlink) | |
c517b3aa | 251 | xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); |
0f28b257 DW |
252 | return error; |
253 | ||
254 | out_unlock: | |
255 | xfs_iunlock(dp, XFS_IOLOCK_SHARED); | |
256 | out_rele: | |
44a8736b | 257 | xfs_irele(dp); |
0f28b257 DW |
258 | out: |
259 | return error; | |
260 | } | |
261 | ||
262 | /* Scrub a parent pointer. */ | |
263 | int | |
c517b3aa | 264 | xchk_parent( |
1d8a748a | 265 | struct xfs_scrub *sc) |
0f28b257 | 266 | { |
032d91f9 DW |
267 | struct xfs_mount *mp = sc->mp; |
268 | xfs_ino_t dnum; | |
269 | bool try_again; | |
270 | int tries = 0; | |
271 | int error = 0; | |
0f28b257 DW |
272 | |
273 | /* | |
274 | * If we're a directory, check that the '..' link points up to | |
275 | * a directory that has one entry pointing to us. | |
276 | */ | |
277 | if (!S_ISDIR(VFS_I(sc->ip)->i_mode)) | |
278 | return -ENOENT; | |
279 | ||
280 | /* We're not a special inode, are we? */ | |
281 | if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) { | |
c517b3aa | 282 | xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); |
0f28b257 DW |
283 | goto out; |
284 | } | |
285 | ||
286 | /* | |
287 | * The VFS grabs a read or write lock via i_rwsem before it reads | |
288 | * or writes to a directory. If we've gotten this far we've | |
289 | * already obtained IOLOCK_EXCL, which (since 4.10) is the same as | |
290 | * getting a write lock on i_rwsem. Therefore, it is safe for us | |
291 | * to drop the ILOCK here in order to do directory lookups. | |
292 | */ | |
293 | sc->ilock_flags &= ~(XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL); | |
294 | xfs_iunlock(sc->ip, XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL); | |
295 | ||
296 | /* Look up '..' */ | |
297 | error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL); | |
c517b3aa | 298 | if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) |
0f28b257 DW |
299 | goto out; |
300 | if (!xfs_verify_dir_ino(mp, dnum)) { | |
c517b3aa | 301 | xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); |
0f28b257 DW |
302 | goto out; |
303 | } | |
304 | ||
305 | /* Is this the root dir? Then '..' must point to itself. */ | |
306 | if (sc->ip == mp->m_rootip) { | |
307 | if (sc->ip->i_ino != mp->m_sb.sb_rootino || | |
308 | sc->ip->i_ino != dnum) | |
c517b3aa | 309 | xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); |
0f28b257 DW |
310 | goto out; |
311 | } | |
312 | ||
313 | do { | |
c517b3aa | 314 | error = xchk_parent_validate(sc, dnum, &try_again); |
0f28b257 DW |
315 | if (error) |
316 | goto out; | |
317 | } while (try_again && ++tries < 20); | |
318 | ||
319 | /* | |
320 | * We gave it our best shot but failed, so mark this scrub | |
321 | * incomplete. Userspace can decide if it wants to try again. | |
322 | */ | |
323 | if (try_again && tries == 20) | |
c517b3aa | 324 | xchk_set_incomplete(sc); |
0f28b257 | 325 | out: |
ddd10c2f DW |
326 | /* |
327 | * If we failed to lock the parent inode even after a retry, just mark | |
328 | * this scrub incomplete and return. | |
329 | */ | |
f8c2a225 | 330 | if ((sc->flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) { |
ddd10c2f | 331 | error = 0; |
c517b3aa | 332 | xchk_set_incomplete(sc); |
ddd10c2f | 333 | } |
0f28b257 DW |
334 | return error; |
335 | } |