]>
Commit | Line | Data |
---|---|---|
36fd6e86 DW |
1 | /* |
2 | * Copyright (C) 2017 Oracle. All Rights Reserved. | |
3 | * | |
4 | * Author: Darrick J. Wong <[email protected]> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version 2 | |
9 | * of the License, or (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it would be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write the Free Software Foundation, | |
18 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
19 | */ | |
20 | #include "xfs.h" | |
21 | #include "xfs_fs.h" | |
22 | #include "xfs_shared.h" | |
23 | #include "xfs_format.h" | |
24 | #include "xfs_trans_resv.h" | |
25 | #include "xfs_mount.h" | |
26 | #include "xfs_defer.h" | |
27 | #include "xfs_btree.h" | |
28 | #include "xfs_bit.h" | |
29 | #include "xfs_log_format.h" | |
30 | #include "xfs_trans.h" | |
31 | #include "xfs_sb.h" | |
32 | #include "xfs_inode.h" | |
80e4e126 DW |
33 | #include "xfs_icache.h" |
34 | #include "xfs_itable.h" | |
36fd6e86 DW |
35 | #include "xfs_alloc.h" |
36 | #include "xfs_alloc_btree.h" | |
37 | #include "xfs_bmap.h" | |
38 | #include "xfs_bmap_btree.h" | |
39 | #include "xfs_ialloc.h" | |
40 | #include "xfs_ialloc_btree.h" | |
41 | #include "xfs_refcount.h" | |
42 | #include "xfs_refcount_btree.h" | |
43 | #include "xfs_rmap.h" | |
44 | #include "xfs_rmap_btree.h" | |
eb41c93f DW |
45 | #include "xfs_quota.h" |
46 | #include "xfs_qm.h" | |
84d42ea6 DW |
47 | #include "xfs_errortag.h" |
48 | #include "xfs_error.h" | |
49 | #include "xfs_log.h" | |
50 | #include "xfs_trans_priv.h" | |
36fd6e86 DW |
51 | #include "scrub/xfs_scrub.h" |
52 | #include "scrub/scrub.h" | |
dcb660f9 | 53 | #include "scrub/common.h" |
36fd6e86 | 54 | #include "scrub/trace.h" |
b6c1beb9 | 55 | #include "scrub/btree.h" |
84d42ea6 | 56 | #include "scrub/repair.h" |
36fd6e86 | 57 | |
a5637186 DW |
58 | /* |
59 | * Online Scrub and Repair | |
60 | * | |
61 | * Traditionally, XFS (the kernel driver) did not know how to check or | |
62 | * repair on-disk data structures. That task was left to the xfs_check | |
63 | * and xfs_repair tools, both of which require taking the filesystem | |
64 | * offline for a thorough but time consuming examination. Online | |
65 | * scrub & repair, on the other hand, enables us to check the metadata | |
66 | * for obvious errors while carefully stepping around the filesystem's | |
67 | * ongoing operations, locking rules, etc. | |
68 | * | |
69 | * Given that most XFS metadata consist of records stored in a btree, | |
70 | * most of the checking functions iterate the btree blocks themselves | |
71 | * looking for irregularities. When a record block is encountered, each | |
72 | * record can be checked for obviously bad values. Record values can | |
73 | * also be cross-referenced against other btrees to look for potential | |
74 | * misunderstandings between pieces of metadata. | |
75 | * | |
76 | * It is expected that the checkers responsible for per-AG metadata | |
77 | * structures will lock the AG headers (AGI, AGF, AGFL), iterate the | |
78 | * metadata structure, and perform any relevant cross-referencing before | |
79 | * unlocking the AG and returning the results to userspace. These | |
80 | * scrubbers must not keep an AG locked for too long to avoid tying up | |
81 | * the block and inode allocators. | |
82 | * | |
83 | * Block maps and b-trees rooted in an inode present a special challenge | |
84 | * because they can involve extents from any AG. The general scrubber | |
85 | * structure of lock -> check -> xref -> unlock still holds, but AG | |
86 | * locking order rules /must/ be obeyed to avoid deadlocks. The | |
87 | * ordering rule, of course, is that we must lock in increasing AG | |
88 | * order. Helper functions are provided to track which AG headers we've | |
89 | * already locked. If we detect an imminent locking order violation, we | |
90 | * can signal a potential deadlock, in which case the scrubber can jump | |
91 | * out to the top level, lock all the AGs in order, and retry the scrub. | |
92 | * | |
93 | * For file data (directories, extended attributes, symlinks) scrub, we | |
94 | * can simply lock the inode and walk the data. For btree data | |
95 | * (directories and attributes) we follow the same btree-scrubbing | |
96 | * strategy outlined previously to check the records. | |
97 | * | |
98 | * We use a bit of trickery with transactions to avoid buffer deadlocks | |
99 | * if there is a cycle in the metadata. The basic problem is that | |
100 | * travelling down a btree involves locking the current buffer at each | |
101 | * tree level. If a pointer should somehow point back to a buffer that | |
102 | * we've already examined, we will deadlock due to the second buffer | |
103 | * locking attempt. Note however that grabbing a buffer in transaction | |
104 | * context links the locked buffer to the transaction. If we try to | |
105 | * re-grab the buffer in the context of the same transaction, we avoid | |
106 | * the second lock attempt and continue. Between the verifier and the | |
107 | * scrubber, something will notice that something is amiss and report | |
108 | * the corruption. Therefore, each scrubber will allocate an empty | |
109 | * transaction, attach buffers to it, and cancel the transaction at the | |
110 | * end of the scrub run. Cancelling a non-dirty transaction simply | |
111 | * unlocks the buffers. | |
112 | * | |
113 | * There are four pieces of data that scrub can communicate to | |
114 | * userspace. The first is the error code (errno), which can be used to | |
115 | * communicate operational errors in performing the scrub. There are | |
116 | * also three flags that can be set in the scrub context. If the data | |
117 | * structure itself is corrupt, the CORRUPT flag will be set. If | |
118 | * the metadata is correct but otherwise suboptimal, the PREEN flag | |
119 | * will be set. | |
64b12563 DW |
120 | * |
121 | * We perform secondary validation of filesystem metadata by | |
122 | * cross-referencing every record with all other available metadata. | |
123 | * For example, for block mapping extents, we verify that there are no | |
124 | * records in the free space and inode btrees corresponding to that | |
125 | * space extent and that there is a corresponding entry in the reverse | |
126 | * mapping btree. Inconsistent metadata is noted by setting the | |
127 | * XCORRUPT flag; btree query function errors are noted by setting the | |
128 | * XFAIL flag and deleting the cursor to prevent further attempts to | |
129 | * cross-reference with a defective btree. | |
84d42ea6 DW |
130 | * |
131 | * If a piece of metadata proves corrupt or suboptimal, the userspace | |
132 | * program can ask the kernel to apply some tender loving care (TLC) to | |
133 | * the metadata object by setting the REPAIR flag and re-calling the | |
134 | * scrub ioctl. "Corruption" is defined by metadata violating the | |
135 | * on-disk specification; operations cannot continue if the violation is | |
136 | * left untreated. It is possible for XFS to continue if an object is | |
137 | * "suboptimal", however performance may be degraded. Repairs are | |
138 | * usually performed by rebuilding the metadata entirely out of | |
139 | * redundant metadata. Optimizing, on the other hand, can sometimes be | |
140 | * done without rebuilding entire structures. | |
141 | * | |
142 | * Generally speaking, the repair code has the following code structure: | |
143 | * Lock -> scrub -> repair -> commit -> re-lock -> re-scrub -> unlock. | |
144 | * The first check helps us figure out if we need to rebuild or simply | |
145 | * optimize the structure so that the rebuild knows what to do. The | |
146 | * second check evaluates the completeness of the repair; that is what | |
147 | * is reported to userspace. | |
a5637186 DW |
148 | */ |
149 | ||
dcb660f9 DW |
150 | /* |
151 | * Scrub probe -- userspace uses this to probe if we're willing to scrub | |
152 | * or repair a given mountpoint. This will be used by xfs_scrub to | |
153 | * probe the kernel's abilities to scrub (and repair) the metadata. We | |
154 | * do this by validating the ioctl inputs from userspace, preparing the | |
155 | * filesystem for a scrub (or a repair) operation, and immediately | |
156 | * returning to userspace. Userspace can use the returned errno and | |
157 | * structure state to decide (in broad terms) if scrub/repair are | |
158 | * supported by the running kernel. | |
159 | */ | |
88aa5de4 | 160 | static int |
dcb660f9 DW |
161 | xfs_scrub_probe( |
162 | struct xfs_scrub_context *sc) | |
163 | { | |
164 | int error = 0; | |
165 | ||
dcb660f9 DW |
166 | if (xfs_scrub_should_terminate(sc, &error)) |
167 | return error; | |
168 | ||
169 | return 0; | |
170 | } | |
171 | ||
a5637186 DW |
172 | /* Scrub setup and teardown */ |
173 | ||
174 | /* Free all the resources and finish the transactions. */ | |
175 | STATIC int | |
176 | xfs_scrub_teardown( | |
177 | struct xfs_scrub_context *sc, | |
80e4e126 | 178 | struct xfs_inode *ip_in, |
a5637186 DW |
179 | int error) |
180 | { | |
b6c1beb9 | 181 | xfs_scrub_ag_free(sc, &sc->sa); |
a5637186 | 182 | if (sc->tp) { |
84d42ea6 DW |
183 | if (error == 0 && (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)) |
184 | error = xfs_trans_commit(sc->tp); | |
185 | else | |
186 | xfs_trans_cancel(sc->tp); | |
a5637186 DW |
187 | sc->tp = NULL; |
188 | } | |
80e4e126 | 189 | if (sc->ip) { |
638a7174 DW |
190 | if (sc->ilock_flags) |
191 | xfs_iunlock(sc->ip, sc->ilock_flags); | |
80e4e126 DW |
192 | if (sc->ip != ip_in && |
193 | !xfs_internal_inum(sc->mp, sc->ip->i_ino)) | |
194 | iput(VFS_I(sc->ip)); | |
195 | sc->ip = NULL; | |
196 | } | |
eb41c93f DW |
197 | if (sc->has_quotaofflock) |
198 | mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock); | |
eec0482e DW |
199 | if (sc->buf) { |
200 | kmem_free(sc->buf); | |
201 | sc->buf = NULL; | |
202 | } | |
a5637186 DW |
203 | return error; |
204 | } | |
205 | ||
206 | /* Scrubbing dispatch. */ | |
207 | ||
208 | static const struct xfs_scrub_meta_ops meta_scrub_ops[] = { | |
bfb3e9b9 | 209 | [XFS_SCRUB_TYPE_PROBE] = { /* ioctl presence test */ |
8e630837 | 210 | .type = ST_NONE, |
dcb660f9 DW |
211 | .setup = xfs_scrub_setup_fs, |
212 | .scrub = xfs_scrub_probe, | |
84d42ea6 | 213 | .repair = xfs_repair_probe, |
dcb660f9 | 214 | }, |
bfb3e9b9 | 215 | [XFS_SCRUB_TYPE_SB] = { /* superblock */ |
8e630837 ES |
216 | .type = ST_PERAG, |
217 | .setup = xfs_scrub_setup_fs, | |
21fb4cb1 | 218 | .scrub = xfs_scrub_superblock, |
d25522f1 | 219 | .repair = xfs_repair_superblock, |
21fb4cb1 | 220 | }, |
bfb3e9b9 | 221 | [XFS_SCRUB_TYPE_AGF] = { /* agf */ |
8e630837 ES |
222 | .type = ST_PERAG, |
223 | .setup = xfs_scrub_setup_fs, | |
ab9d5dc5 | 224 | .scrub = xfs_scrub_agf, |
84d42ea6 | 225 | .repair = xfs_repair_notsupported, |
ab9d5dc5 | 226 | }, |
bfb3e9b9 | 227 | [XFS_SCRUB_TYPE_AGFL]= { /* agfl */ |
8e630837 ES |
228 | .type = ST_PERAG, |
229 | .setup = xfs_scrub_setup_fs, | |
ab9d5dc5 | 230 | .scrub = xfs_scrub_agfl, |
84d42ea6 | 231 | .repair = xfs_repair_notsupported, |
ab9d5dc5 | 232 | }, |
bfb3e9b9 | 233 | [XFS_SCRUB_TYPE_AGI] = { /* agi */ |
8e630837 ES |
234 | .type = ST_PERAG, |
235 | .setup = xfs_scrub_setup_fs, | |
a12890ae | 236 | .scrub = xfs_scrub_agi, |
84d42ea6 | 237 | .repair = xfs_repair_notsupported, |
a12890ae | 238 | }, |
bfb3e9b9 | 239 | [XFS_SCRUB_TYPE_BNOBT] = { /* bnobt */ |
8e630837 | 240 | .type = ST_PERAG, |
efa7a99c DW |
241 | .setup = xfs_scrub_setup_ag_allocbt, |
242 | .scrub = xfs_scrub_bnobt, | |
84d42ea6 | 243 | .repair = xfs_repair_notsupported, |
efa7a99c | 244 | }, |
bfb3e9b9 | 245 | [XFS_SCRUB_TYPE_CNTBT] = { /* cntbt */ |
8e630837 | 246 | .type = ST_PERAG, |
efa7a99c DW |
247 | .setup = xfs_scrub_setup_ag_allocbt, |
248 | .scrub = xfs_scrub_cntbt, | |
84d42ea6 | 249 | .repair = xfs_repair_notsupported, |
efa7a99c | 250 | }, |
bfb3e9b9 | 251 | [XFS_SCRUB_TYPE_INOBT] = { /* inobt */ |
8e630837 | 252 | .type = ST_PERAG, |
3daa6641 DW |
253 | .setup = xfs_scrub_setup_ag_iallocbt, |
254 | .scrub = xfs_scrub_inobt, | |
84d42ea6 | 255 | .repair = xfs_repair_notsupported, |
3daa6641 | 256 | }, |
bfb3e9b9 | 257 | [XFS_SCRUB_TYPE_FINOBT] = { /* finobt */ |
8e630837 | 258 | .type = ST_PERAG, |
3daa6641 DW |
259 | .setup = xfs_scrub_setup_ag_iallocbt, |
260 | .scrub = xfs_scrub_finobt, | |
261 | .has = xfs_sb_version_hasfinobt, | |
84d42ea6 | 262 | .repair = xfs_repair_notsupported, |
3daa6641 | 263 | }, |
bfb3e9b9 | 264 | [XFS_SCRUB_TYPE_RMAPBT] = { /* rmapbt */ |
8e630837 | 265 | .type = ST_PERAG, |
c7e693d9 DW |
266 | .setup = xfs_scrub_setup_ag_rmapbt, |
267 | .scrub = xfs_scrub_rmapbt, | |
268 | .has = xfs_sb_version_hasrmapbt, | |
84d42ea6 | 269 | .repair = xfs_repair_notsupported, |
c7e693d9 | 270 | }, |
bfb3e9b9 | 271 | [XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */ |
8e630837 | 272 | .type = ST_PERAG, |
edc09b52 DW |
273 | .setup = xfs_scrub_setup_ag_refcountbt, |
274 | .scrub = xfs_scrub_refcountbt, | |
275 | .has = xfs_sb_version_hasreflink, | |
84d42ea6 | 276 | .repair = xfs_repair_notsupported, |
edc09b52 | 277 | }, |
bfb3e9b9 | 278 | [XFS_SCRUB_TYPE_INODE] = { /* inode record */ |
8e630837 | 279 | .type = ST_INODE, |
80e4e126 DW |
280 | .setup = xfs_scrub_setup_inode, |
281 | .scrub = xfs_scrub_inode, | |
84d42ea6 | 282 | .repair = xfs_repair_notsupported, |
80e4e126 | 283 | }, |
bfb3e9b9 | 284 | [XFS_SCRUB_TYPE_BMBTD] = { /* inode data fork */ |
8e630837 | 285 | .type = ST_INODE, |
99d9d8d0 DW |
286 | .setup = xfs_scrub_setup_inode_bmap, |
287 | .scrub = xfs_scrub_bmap_data, | |
84d42ea6 | 288 | .repair = xfs_repair_notsupported, |
99d9d8d0 | 289 | }, |
bfb3e9b9 | 290 | [XFS_SCRUB_TYPE_BMBTA] = { /* inode attr fork */ |
8e630837 | 291 | .type = ST_INODE, |
99d9d8d0 DW |
292 | .setup = xfs_scrub_setup_inode_bmap, |
293 | .scrub = xfs_scrub_bmap_attr, | |
84d42ea6 | 294 | .repair = xfs_repair_notsupported, |
99d9d8d0 | 295 | }, |
bfb3e9b9 | 296 | [XFS_SCRUB_TYPE_BMBTC] = { /* inode CoW fork */ |
8e630837 | 297 | .type = ST_INODE, |
99d9d8d0 DW |
298 | .setup = xfs_scrub_setup_inode_bmap, |
299 | .scrub = xfs_scrub_bmap_cow, | |
84d42ea6 | 300 | .repair = xfs_repair_notsupported, |
99d9d8d0 | 301 | }, |
bfb3e9b9 | 302 | [XFS_SCRUB_TYPE_DIR] = { /* directory */ |
8e630837 | 303 | .type = ST_INODE, |
a5c46e5e DW |
304 | .setup = xfs_scrub_setup_directory, |
305 | .scrub = xfs_scrub_directory, | |
84d42ea6 | 306 | .repair = xfs_repair_notsupported, |
a5c46e5e | 307 | }, |
bfb3e9b9 | 308 | [XFS_SCRUB_TYPE_XATTR] = { /* extended attributes */ |
8e630837 | 309 | .type = ST_INODE, |
eec0482e DW |
310 | .setup = xfs_scrub_setup_xattr, |
311 | .scrub = xfs_scrub_xattr, | |
84d42ea6 | 312 | .repair = xfs_repair_notsupported, |
eec0482e | 313 | }, |
bfb3e9b9 | 314 | [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */ |
8e630837 | 315 | .type = ST_INODE, |
2a721dbb DW |
316 | .setup = xfs_scrub_setup_symlink, |
317 | .scrub = xfs_scrub_symlink, | |
84d42ea6 | 318 | .repair = xfs_repair_notsupported, |
2a721dbb | 319 | }, |
bfb3e9b9 | 320 | [XFS_SCRUB_TYPE_PARENT] = { /* parent pointers */ |
8e630837 | 321 | .type = ST_INODE, |
0f28b257 DW |
322 | .setup = xfs_scrub_setup_parent, |
323 | .scrub = xfs_scrub_parent, | |
84d42ea6 | 324 | .repair = xfs_repair_notsupported, |
0f28b257 | 325 | }, |
bfb3e9b9 | 326 | [XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */ |
8e630837 | 327 | .type = ST_FS, |
29b0767b DW |
328 | .setup = xfs_scrub_setup_rt, |
329 | .scrub = xfs_scrub_rtbitmap, | |
330 | .has = xfs_sb_version_hasrealtime, | |
84d42ea6 | 331 | .repair = xfs_repair_notsupported, |
29b0767b | 332 | }, |
bfb3e9b9 | 333 | [XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */ |
8e630837 | 334 | .type = ST_FS, |
29b0767b DW |
335 | .setup = xfs_scrub_setup_rt, |
336 | .scrub = xfs_scrub_rtsummary, | |
337 | .has = xfs_sb_version_hasrealtime, | |
84d42ea6 | 338 | .repair = xfs_repair_notsupported, |
29b0767b | 339 | }, |
bfb3e9b9 | 340 | [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */ |
8e630837 ES |
341 | .type = ST_FS, |
342 | .setup = xfs_scrub_setup_quota, | |
343 | .scrub = xfs_scrub_quota, | |
84d42ea6 | 344 | .repair = xfs_repair_notsupported, |
c2fc338c | 345 | }, |
bfb3e9b9 | 346 | [XFS_SCRUB_TYPE_GQUOTA] = { /* group quota */ |
8e630837 ES |
347 | .type = ST_FS, |
348 | .setup = xfs_scrub_setup_quota, | |
349 | .scrub = xfs_scrub_quota, | |
84d42ea6 | 350 | .repair = xfs_repair_notsupported, |
c2fc338c | 351 | }, |
bfb3e9b9 | 352 | [XFS_SCRUB_TYPE_PQUOTA] = { /* project quota */ |
8e630837 ES |
353 | .type = ST_FS, |
354 | .setup = xfs_scrub_setup_quota, | |
355 | .scrub = xfs_scrub_quota, | |
84d42ea6 | 356 | .repair = xfs_repair_notsupported, |
c2fc338c | 357 | }, |
a5637186 DW |
358 | }; |
359 | ||
360 | /* This isn't a stable feature, warn once per day. */ | |
361 | static inline void | |
362 | xfs_scrub_experimental_warning( | |
363 | struct xfs_mount *mp) | |
364 | { | |
365 | static struct ratelimit_state scrub_warning = RATELIMIT_STATE_INIT( | |
366 | "xfs_scrub_warning", 86400 * HZ, 1); | |
367 | ratelimit_set_flags(&scrub_warning, RATELIMIT_MSG_ON_RELEASE); | |
368 | ||
369 | if (__ratelimit(&scrub_warning)) | |
370 | xfs_alert(mp, | |
371 | "EXPERIMENTAL online scrub feature in use. Use at your own risk!"); | |
372 | } | |
373 | ||
0a085ddf ES |
374 | static int |
375 | xfs_scrub_validate_inputs( | |
376 | struct xfs_mount *mp, | |
36fd6e86 DW |
377 | struct xfs_scrub_metadata *sm) |
378 | { | |
0a085ddf | 379 | int error; |
a5637186 | 380 | const struct xfs_scrub_meta_ops *ops; |
a5637186 | 381 | |
a5637186 | 382 | error = -EINVAL; |
0a085ddf | 383 | /* Check our inputs. */ |
a5637186 DW |
384 | sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; |
385 | if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN) | |
386 | goto out; | |
8e630837 | 387 | /* sm_reserved[] must be zero */ |
a5637186 DW |
388 | if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved))) |
389 | goto out; | |
390 | ||
a5637186 | 391 | error = -ENOENT; |
0a085ddf | 392 | /* Do we know about this type of metadata? */ |
a5637186 DW |
393 | if (sm->sm_type >= XFS_SCRUB_TYPE_NR) |
394 | goto out; | |
395 | ops = &meta_scrub_ops[sm->sm_type]; | |
bfb3e9b9 | 396 | if (ops->setup == NULL || ops->scrub == NULL) |
a5637186 | 397 | goto out; |
0a085ddf ES |
398 | /* Does this fs even support this type of metadata? */ |
399 | if (ops->has && !ops->has(&mp->m_sb)) | |
400 | goto out; | |
a5637186 | 401 | |
8e630837 ES |
402 | error = -EINVAL; |
403 | /* restricting fields must be appropriate for type */ | |
404 | switch (ops->type) { | |
405 | case ST_NONE: | |
406 | case ST_FS: | |
407 | if (sm->sm_ino || sm->sm_gen || sm->sm_agno) | |
408 | goto out; | |
409 | break; | |
410 | case ST_PERAG: | |
411 | if (sm->sm_ino || sm->sm_gen || | |
412 | sm->sm_agno >= mp->m_sb.sb_agcount) | |
413 | goto out; | |
414 | break; | |
415 | case ST_INODE: | |
416 | if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino)) | |
417 | goto out; | |
418 | break; | |
419 | default: | |
420 | goto out; | |
421 | } | |
422 | ||
0a085ddf | 423 | error = -EOPNOTSUPP; |
a5637186 DW |
424 | /* |
425 | * We won't scrub any filesystem that doesn't have the ability | |
426 | * to record unwritten extents. The option was made default in | |
427 | * 2003, removed from mkfs in 2007, and cannot be disabled in | |
428 | * v5, so if we find a filesystem without this flag it's either | |
429 | * really old or totally unsupported. Avoid it either way. | |
430 | * We also don't support v1-v3 filesystems, which aren't | |
431 | * mountable. | |
432 | */ | |
a5637186 DW |
433 | if (!xfs_sb_version_hasextflgbit(&mp->m_sb)) |
434 | goto out; | |
435 | ||
84d42ea6 DW |
436 | /* |
437 | * We only want to repair read-write v5+ filesystems. Defer the check | |
438 | * for ops->repair until after our scrub confirms that we need to | |
439 | * perform repairs so that we avoid failing due to not supporting | |
440 | * repairing an object that doesn't need repairs. | |
441 | */ | |
442 | if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) { | |
443 | error = -EOPNOTSUPP; | |
444 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | |
445 | goto out; | |
446 | ||
447 | error = -EROFS; | |
448 | if (mp->m_flags & XFS_MOUNT_RDONLY) | |
449 | goto out; | |
450 | } | |
a5637186 | 451 | |
0a085ddf ES |
452 | error = 0; |
453 | out: | |
454 | return error; | |
455 | } | |
456 | ||
84d42ea6 DW |
457 | #ifdef CONFIG_XFS_ONLINE_REPAIR |
458 | static inline void xfs_scrub_postmortem(struct xfs_scrub_context *sc) | |
459 | { | |
460 | /* | |
461 | * Userspace asked us to repair something, we repaired it, rescanned | |
462 | * it, and the rescan says it's still broken. Scream about this in | |
463 | * the system logs. | |
464 | */ | |
465 | if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && | |
466 | (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | | |
467 | XFS_SCRUB_OFLAG_XCORRUPT))) | |
468 | xfs_repair_failure(sc->mp); | |
469 | } | |
470 | #else | |
471 | static inline void xfs_scrub_postmortem(struct xfs_scrub_context *sc) | |
472 | { | |
473 | /* | |
474 | * Userspace asked us to scrub something, it's broken, and we have no | |
475 | * way of fixing it. Scream in the logs. | |
476 | */ | |
477 | if (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | | |
478 | XFS_SCRUB_OFLAG_XCORRUPT)) | |
479 | xfs_alert_ratelimited(sc->mp, | |
480 | "Corruption detected during scrub."); | |
481 | } | |
482 | #endif /* CONFIG_XFS_ONLINE_REPAIR */ | |
483 | ||
0a085ddf ES |
484 | /* Dispatch metadata scrubbing. */ |
485 | int | |
486 | xfs_scrub_metadata( | |
487 | struct xfs_inode *ip, | |
488 | struct xfs_scrub_metadata *sm) | |
489 | { | |
490 | struct xfs_scrub_context sc; | |
491 | struct xfs_mount *mp = ip->i_mount; | |
492 | bool try_harder = false; | |
84d42ea6 | 493 | bool already_fixed = false; |
0a085ddf ES |
494 | int error = 0; |
495 | ||
496 | BUILD_BUG_ON(sizeof(meta_scrub_ops) != | |
497 | (sizeof(struct xfs_scrub_meta_ops) * XFS_SCRUB_TYPE_NR)); | |
498 | ||
499 | trace_xfs_scrub_start(ip, sm, error); | |
500 | ||
501 | /* Forbidden if we are shut down or mounted norecovery. */ | |
502 | error = -ESHUTDOWN; | |
503 | if (XFS_FORCED_SHUTDOWN(mp)) | |
504 | goto out; | |
505 | error = -ENOTRECOVERABLE; | |
506 | if (mp->m_flags & XFS_MOUNT_NORECOVERY) | |
507 | goto out; | |
508 | ||
509 | error = xfs_scrub_validate_inputs(mp, sm); | |
510 | if (error) | |
511 | goto out; | |
512 | ||
a5637186 DW |
513 | xfs_scrub_experimental_warning(mp); |
514 | ||
515 | retry_op: | |
516 | /* Set up for the operation. */ | |
517 | memset(&sc, 0, sizeof(sc)); | |
518 | sc.mp = ip->i_mount; | |
519 | sc.sm = sm; | |
0a085ddf | 520 | sc.ops = &meta_scrub_ops[sm->sm_type]; |
a5637186 | 521 | sc.try_harder = try_harder; |
b6c1beb9 | 522 | sc.sa.agno = NULLAGNUMBER; |
a5637186 DW |
523 | error = sc.ops->setup(&sc, ip); |
524 | if (error) | |
525 | goto out_teardown; | |
526 | ||
527 | /* Scrub for errors. */ | |
528 | error = sc.ops->scrub(&sc); | |
529 | if (!try_harder && error == -EDEADLOCK) { | |
530 | /* | |
531 | * Scrubbers return -EDEADLOCK to mean 'try harder'. | |
532 | * Tear down everything we hold, then set up again with | |
533 | * preparation for worst-case scenarios. | |
534 | */ | |
80e4e126 | 535 | error = xfs_scrub_teardown(&sc, ip, 0); |
a5637186 DW |
536 | if (error) |
537 | goto out; | |
538 | try_harder = true; | |
539 | goto retry_op; | |
540 | } else if (error) | |
541 | goto out_teardown; | |
542 | ||
84d42ea6 DW |
543 | if ((sc.sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && !already_fixed) { |
544 | bool needs_fix; | |
545 | ||
546 | /* Let debug users force us into the repair routines. */ | |
547 | if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) | |
548 | sc.sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; | |
549 | ||
550 | needs_fix = (sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | | |
551 | XFS_SCRUB_OFLAG_XCORRUPT | | |
552 | XFS_SCRUB_OFLAG_PREEN)); | |
553 | /* | |
554 | * If userspace asked for a repair but it wasn't necessary, | |
555 | * report that back to userspace. | |
556 | */ | |
557 | if (!needs_fix) { | |
558 | sc.sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED; | |
559 | goto out_nofix; | |
560 | } | |
561 | ||
562 | /* | |
563 | * If it's broken, userspace wants us to fix it, and we haven't | |
564 | * already tried to fix it, then attempt a repair. | |
565 | */ | |
566 | error = xfs_repair_attempt(ip, &sc, &already_fixed); | |
567 | if (error == -EAGAIN) { | |
568 | if (sc.try_harder) | |
569 | try_harder = true; | |
570 | error = xfs_scrub_teardown(&sc, ip, 0); | |
571 | if (error) { | |
572 | xfs_repair_failure(mp); | |
573 | goto out; | |
574 | } | |
575 | goto retry_op; | |
576 | } | |
577 | } | |
a5637186 | 578 | |
84d42ea6 DW |
579 | out_nofix: |
580 | xfs_scrub_postmortem(&sc); | |
a5637186 | 581 | out_teardown: |
80e4e126 | 582 | error = xfs_scrub_teardown(&sc, ip, error); |
a5637186 DW |
583 | out: |
584 | trace_xfs_scrub_done(ip, sm, error); | |
585 | if (error == -EFSCORRUPTED || error == -EFSBADCRC) { | |
586 | sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; | |
587 | error = 0; | |
588 | } | |
589 | return error; | |
36fd6e86 | 590 | } |