]>
Commit | Line | Data |
---|---|---|
0b61f8a4 | 1 | // SPDX-License-Identifier: GPL-2.0+ |
0f28b257 DW |
2 | /* |
3 | * Copyright (C) 2017 Oracle. All Rights Reserved. | |
0f28b257 | 4 | * Author: Darrick J. Wong <[email protected]> |
0f28b257 DW |
5 | */ |
6 | #include "xfs.h" | |
7 | #include "xfs_fs.h" | |
8 | #include "xfs_shared.h" | |
9 | #include "xfs_format.h" | |
10 | #include "xfs_trans_resv.h" | |
11 | #include "xfs_mount.h" | |
12 | #include "xfs_defer.h" | |
13 | #include "xfs_btree.h" | |
14 | #include "xfs_bit.h" | |
15 | #include "xfs_log_format.h" | |
16 | #include "xfs_trans.h" | |
17 | #include "xfs_sb.h" | |
18 | #include "xfs_inode.h" | |
19 | #include "xfs_icache.h" | |
20 | #include "xfs_dir2.h" | |
21 | #include "xfs_dir2_priv.h" | |
22 | #include "xfs_ialloc.h" | |
23 | #include "scrub/xfs_scrub.h" | |
24 | #include "scrub/scrub.h" | |
25 | #include "scrub/common.h" | |
26 | #include "scrub/trace.h" | |
27 | ||
28 | /* Set us up to scrub parents. */ | |
29 | int | |
c517b3aa | 30 | xchk_setup_parent( |
1d8a748a | 31 | struct xfs_scrub *sc, |
032d91f9 | 32 | struct xfs_inode *ip) |
0f28b257 | 33 | { |
c517b3aa | 34 | return xchk_setup_inode_contents(sc, ip, 0); |
0f28b257 DW |
35 | } |
36 | ||
37 | /* Parent pointers */ | |
38 | ||
39 | /* Look for an entry in a parent pointing to this inode. */ | |
40 | ||
c517b3aa | 41 | struct xchk_parent_ctx { |
032d91f9 DW |
42 | struct dir_context dc; |
43 | xfs_ino_t ino; | |
44 | xfs_nlink_t nlink; | |
0f28b257 DW |
45 | }; |
46 | ||
47 | /* Look for a single entry in a directory pointing to an inode. */ | |
48 | STATIC int | |
c517b3aa | 49 | xchk_parent_actor( |
032d91f9 DW |
50 | struct dir_context *dc, |
51 | const char *name, | |
52 | int namelen, | |
53 | loff_t pos, | |
54 | u64 ino, | |
55 | unsigned type) | |
0f28b257 | 56 | { |
032d91f9 | 57 | struct xchk_parent_ctx *spc; |
0f28b257 | 58 | |
c517b3aa | 59 | spc = container_of(dc, struct xchk_parent_ctx, dc); |
0f28b257 DW |
60 | if (spc->ino == ino) |
61 | spc->nlink++; | |
62 | return 0; | |
63 | } | |
64 | ||
65 | /* Count the number of dentries in the parent dir that point to this inode. */ | |
66 | STATIC int | |
c517b3aa | 67 | xchk_parent_count_parent_dentries( |
1d8a748a | 68 | struct xfs_scrub *sc, |
032d91f9 DW |
69 | struct xfs_inode *parent, |
70 | xfs_nlink_t *nlink) | |
0f28b257 | 71 | { |
032d91f9 | 72 | struct xchk_parent_ctx spc = { |
c517b3aa | 73 | .dc.actor = xchk_parent_actor, |
0f28b257 DW |
74 | .dc.pos = 0, |
75 | .ino = sc->ip->i_ino, | |
76 | .nlink = 0, | |
77 | }; | |
032d91f9 DW |
78 | size_t bufsize; |
79 | loff_t oldpos; | |
80 | uint lock_mode; | |
81 | int error = 0; | |
0f28b257 DW |
82 | |
83 | /* | |
84 | * If there are any blocks, read-ahead block 0 as we're almost | |
85 | * certain to have the next operation be a read there. This is | |
86 | * how we guarantee that the parent's extent map has been loaded, | |
87 | * if there is one. | |
88 | */ | |
89 | lock_mode = xfs_ilock_data_map_shared(parent); | |
90 | if (parent->i_d.di_nextents > 0) | |
91 | error = xfs_dir3_data_readahead(parent, 0, -1); | |
92 | xfs_iunlock(parent, lock_mode); | |
93 | if (error) | |
94 | return error; | |
95 | ||
96 | /* | |
97 | * Iterate the parent dir to confirm that there is | |
98 | * exactly one entry pointing back to the inode being | |
99 | * scanned. | |
100 | */ | |
101 | bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, | |
102 | parent->i_d.di_size); | |
103 | oldpos = 0; | |
104 | while (true) { | |
105 | error = xfs_readdir(sc->tp, parent, &spc.dc, bufsize); | |
106 | if (error) | |
107 | goto out; | |
108 | if (oldpos == spc.dc.pos) | |
109 | break; | |
110 | oldpos = spc.dc.pos; | |
111 | } | |
112 | *nlink = spc.nlink; | |
113 | out: | |
114 | return error; | |
115 | } | |
116 | ||
117 | /* | |
118 | * Given the inode number of the alleged parent of the inode being | |
119 | * scrubbed, try to validate that the parent has exactly one directory | |
120 | * entry pointing back to the inode being scrubbed. | |
121 | */ | |
122 | STATIC int | |
c517b3aa | 123 | xchk_parent_validate( |
1d8a748a | 124 | struct xfs_scrub *sc, |
032d91f9 DW |
125 | xfs_ino_t dnum, |
126 | bool *try_again) | |
0f28b257 | 127 | { |
032d91f9 DW |
128 | struct xfs_mount *mp = sc->mp; |
129 | struct xfs_inode *dp = NULL; | |
130 | xfs_nlink_t expected_nlink; | |
131 | xfs_nlink_t nlink; | |
132 | int error = 0; | |
0f28b257 DW |
133 | |
134 | *try_again = false; | |
135 | ||
8bc763c2 DW |
136 | if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
137 | goto out; | |
138 | ||
0f28b257 DW |
139 | /* '..' must not point to ourselves. */ |
140 | if (sc->ip->i_ino == dnum) { | |
c517b3aa | 141 | xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); |
0f28b257 DW |
142 | goto out; |
143 | } | |
144 | ||
145 | /* | |
146 | * If we're an unlinked directory, the parent /won't/ have a link | |
147 | * to us. Otherwise, it should have one link. | |
148 | */ | |
149 | expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; | |
150 | ||
151 | /* | |
152 | * Grab this parent inode. We release the inode before we | |
153 | * cancel the scrub transaction. Since we're don't know a | |
154 | * priori that releasing the inode won't trigger eofblocks | |
155 | * cleanup (which allocates what would be a nested transaction) | |
156 | * if the parent pointer erroneously points to a file, we | |
157 | * can't use DONTCACHE here because DONTCACHE inodes can trigger | |
158 | * immediate inactive cleanup of the inode. | |
5927268f DW |
159 | * |
160 | * If _iget returns -EINVAL then the parent inode number is garbage | |
161 | * and the directory is corrupt. If the _iget returns -EFSCORRUPTED | |
162 | * or -EFSBADCRC then the parent is corrupt which is a cross | |
163 | * referencing error. Any other error is an operational error. | |
0f28b257 | 164 | */ |
5927268f DW |
165 | error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp); |
166 | if (error == -EINVAL) { | |
167 | error = -EFSCORRUPTED; | |
c517b3aa | 168 | xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); |
5927268f DW |
169 | goto out; |
170 | } | |
c517b3aa | 171 | if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) |
0f28b257 | 172 | goto out; |
46c59736 | 173 | if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) { |
c517b3aa | 174 | xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); |
0f28b257 DW |
175 | goto out_rele; |
176 | } | |
177 | ||
178 | /* | |
179 | * We prefer to keep the inode locked while we lock and search | |
180 | * its alleged parent for a forward reference. If we can grab | |
181 | * the iolock, validate the pointers and we're done. We must | |
182 | * use nowait here to avoid an ABBA deadlock on the parent and | |
183 | * the child inodes. | |
184 | */ | |
185 | if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) { | |
c517b3aa DW |
186 | error = xchk_parent_count_parent_dentries(sc, dp, &nlink); |
187 | if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, | |
0f28b257 DW |
188 | &error)) |
189 | goto out_unlock; | |
190 | if (nlink != expected_nlink) | |
c517b3aa | 191 | xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); |
0f28b257 DW |
192 | goto out_unlock; |
193 | } | |
194 | ||
195 | /* | |
196 | * The game changes if we get here. We failed to lock the parent, | |
197 | * so we're going to try to verify both pointers while only holding | |
198 | * one lock so as to avoid deadlocking with something that's actually | |
199 | * trying to traverse down the directory tree. | |
200 | */ | |
201 | xfs_iunlock(sc->ip, sc->ilock_flags); | |
202 | sc->ilock_flags = 0; | |
c517b3aa | 203 | error = xchk_ilock_inverted(dp, XFS_IOLOCK_SHARED); |
ddd10c2f DW |
204 | if (error) |
205 | goto out_rele; | |
0f28b257 DW |
206 | |
207 | /* Go looking for our dentry. */ | |
c517b3aa DW |
208 | error = xchk_parent_count_parent_dentries(sc, dp, &nlink); |
209 | if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) | |
0f28b257 DW |
210 | goto out_unlock; |
211 | ||
212 | /* Drop the parent lock, relock this inode. */ | |
213 | xfs_iunlock(dp, XFS_IOLOCK_SHARED); | |
c517b3aa | 214 | error = xchk_ilock_inverted(sc->ip, XFS_IOLOCK_EXCL); |
ddd10c2f DW |
215 | if (error) |
216 | goto out_rele; | |
0f28b257 | 217 | sc->ilock_flags = XFS_IOLOCK_EXCL; |
0f28b257 DW |
218 | |
219 | /* | |
220 | * If we're an unlinked directory, the parent /won't/ have a link | |
221 | * to us. Otherwise, it should have one link. We have to re-set | |
222 | * it here because we dropped the lock on sc->ip. | |
223 | */ | |
224 | expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; | |
225 | ||
226 | /* Look up '..' to see if the inode changed. */ | |
227 | error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL); | |
c517b3aa | 228 | if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) |
0f28b257 DW |
229 | goto out_rele; |
230 | ||
231 | /* Drat, parent changed. Try again! */ | |
232 | if (dnum != dp->i_ino) { | |
44a8736b | 233 | xfs_irele(dp); |
0f28b257 DW |
234 | *try_again = true; |
235 | return 0; | |
236 | } | |
44a8736b | 237 | xfs_irele(dp); |
0f28b257 DW |
238 | |
239 | /* | |
240 | * '..' didn't change, so check that there was only one entry | |
241 | * for us in the parent. | |
242 | */ | |
243 | if (nlink != expected_nlink) | |
c517b3aa | 244 | xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); |
0f28b257 DW |
245 | return error; |
246 | ||
247 | out_unlock: | |
248 | xfs_iunlock(dp, XFS_IOLOCK_SHARED); | |
249 | out_rele: | |
44a8736b | 250 | xfs_irele(dp); |
0f28b257 DW |
251 | out: |
252 | return error; | |
253 | } | |
254 | ||
255 | /* Scrub a parent pointer. */ | |
256 | int | |
c517b3aa | 257 | xchk_parent( |
1d8a748a | 258 | struct xfs_scrub *sc) |
0f28b257 | 259 | { |
032d91f9 DW |
260 | struct xfs_mount *mp = sc->mp; |
261 | xfs_ino_t dnum; | |
262 | bool try_again; | |
263 | int tries = 0; | |
264 | int error = 0; | |
0f28b257 DW |
265 | |
266 | /* | |
267 | * If we're a directory, check that the '..' link points up to | |
268 | * a directory that has one entry pointing to us. | |
269 | */ | |
270 | if (!S_ISDIR(VFS_I(sc->ip)->i_mode)) | |
271 | return -ENOENT; | |
272 | ||
273 | /* We're not a special inode, are we? */ | |
274 | if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) { | |
c517b3aa | 275 | xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); |
0f28b257 DW |
276 | goto out; |
277 | } | |
278 | ||
279 | /* | |
280 | * The VFS grabs a read or write lock via i_rwsem before it reads | |
281 | * or writes to a directory. If we've gotten this far we've | |
282 | * already obtained IOLOCK_EXCL, which (since 4.10) is the same as | |
283 | * getting a write lock on i_rwsem. Therefore, it is safe for us | |
284 | * to drop the ILOCK here in order to do directory lookups. | |
285 | */ | |
286 | sc->ilock_flags &= ~(XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL); | |
287 | xfs_iunlock(sc->ip, XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL); | |
288 | ||
289 | /* Look up '..' */ | |
290 | error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL); | |
c517b3aa | 291 | if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) |
0f28b257 DW |
292 | goto out; |
293 | if (!xfs_verify_dir_ino(mp, dnum)) { | |
c517b3aa | 294 | xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); |
0f28b257 DW |
295 | goto out; |
296 | } | |
297 | ||
298 | /* Is this the root dir? Then '..' must point to itself. */ | |
299 | if (sc->ip == mp->m_rootip) { | |
300 | if (sc->ip->i_ino != mp->m_sb.sb_rootino || | |
301 | sc->ip->i_ino != dnum) | |
c517b3aa | 302 | xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); |
0f28b257 DW |
303 | goto out; |
304 | } | |
305 | ||
306 | do { | |
c517b3aa | 307 | error = xchk_parent_validate(sc, dnum, &try_again); |
0f28b257 DW |
308 | if (error) |
309 | goto out; | |
310 | } while (try_again && ++tries < 20); | |
311 | ||
312 | /* | |
313 | * We gave it our best shot but failed, so mark this scrub | |
314 | * incomplete. Userspace can decide if it wants to try again. | |
315 | */ | |
316 | if (try_again && tries == 20) | |
c517b3aa | 317 | xchk_set_incomplete(sc); |
0f28b257 | 318 | out: |
ddd10c2f DW |
319 | /* |
320 | * If we failed to lock the parent inode even after a retry, just mark | |
321 | * this scrub incomplete and return. | |
322 | */ | |
323 | if (sc->try_harder && error == -EDEADLOCK) { | |
324 | error = 0; | |
c517b3aa | 325 | xchk_set_incomplete(sc); |
ddd10c2f | 326 | } |
0f28b257 DW |
327 | return error; |
328 | } |