1 // SPDX-License-Identifier: GPL-2.0
4 #include "bcachefs_ioctl.h"
6 #include "btree_cache.h"
7 #include "btree_update.h"
13 #include "fs-common.h"
17 #include "recovery_passes.h"
20 #include "thread_with_file.h"
23 #include <linux/bsearch.h>
24 #include <linux/dcache.h> /* struct qstr */
26 static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
27 struct bkey_s_c_dirent d)
29 return inode->bi_dir == d.k->p.inode &&
30 inode->bi_dir_offset == d.k->p.offset;
33 static int dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d,
34 struct bch_inode_unpacked *inode)
36 if (d.v->d_type == DT_SUBVOL
37 ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
38 : le64_to_cpu(d.v->d_inum) == inode->bi_inum)
40 return -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
43 static void dirent_inode_mismatch_msg(struct printbuf *out,
45 struct bkey_s_c_dirent dirent,
46 struct bch_inode_unpacked *inode)
48 prt_str(out, "inode points to dirent that does not point back:");
50 bch2_bkey_val_to_text(out, c, dirent.s_c);
52 bch2_inode_unpacked_to_text(out, inode);
55 static int dirent_points_to_inode(struct bch_fs *c,
56 struct bkey_s_c_dirent dirent,
57 struct bch_inode_unpacked *inode)
59 int ret = dirent_points_to_inode_nowarn(dirent, inode);
61 struct printbuf buf = PRINTBUF;
62 dirent_inode_mismatch_msg(&buf, c, dirent, inode);
63 bch_warn(c, "%s", buf.buf);
70 * XXX: this is handling transaction restarts without returning
71 * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore:
73 static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum,
78 int ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents,
79 SPOS(inum, 0, snapshot),
82 if (bkey_extent_is_allocation(k.k))
87 return ret ?: sectors;
90 static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum,
95 int ret = for_each_btree_key_max(trans, iter, BTREE_ID_dirents,
96 SPOS(inum, 0, snapshot),
99 if (k.k->type == KEY_TYPE_dirent &&
100 bkey_s_c_to_dirent(k).v->d_type == DT_DIR)
105 return ret ?: subdirs;
108 static int subvol_lookup(struct btree_trans *trans, u32 subvol,
109 u32 *snapshot, u64 *inum)
111 struct bch_subvolume s;
112 int ret = bch2_subvolume_get(trans, subvol, false, &s);
114 *snapshot = le32_to_cpu(s.snapshot);
115 *inum = le64_to_cpu(s.inode);
119 static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
120 struct bch_inode_unpacked *inode)
122 struct btree_iter iter;
126 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inode_nr),
127 BTREE_ITER_all_snapshots, k, ret) {
128 if (k.k->p.offset != inode_nr)
130 if (!bkey_is_inode(k.k))
132 ret = bch2_inode_unpack(k, inode);
135 ret = -BCH_ERR_ENOENT_inode;
137 bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr);
138 bch2_trans_iter_exit(trans, &iter);
142 static int lookup_inode(struct btree_trans *trans, u64 inode_nr, u32 snapshot,
143 struct bch_inode_unpacked *inode)
145 struct btree_iter iter;
149 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
150 SPOS(0, inode_nr, snapshot), 0);
155 ret = bkey_is_inode(k.k)
156 ? bch2_inode_unpack(k, inode)
157 : -BCH_ERR_ENOENT_inode;
159 bch2_trans_iter_exit(trans, &iter);
163 static int lookup_dirent_in_snapshot(struct btree_trans *trans,
164 struct bch_hash_info hash_info,
165 subvol_inum dir, struct qstr *name,
166 u64 *target, unsigned *type, u32 snapshot)
168 struct btree_iter iter;
169 struct bkey_s_c k = bch2_hash_lookup_in_snapshot(trans, &iter, bch2_dirent_hash_desc,
170 &hash_info, dir, name, 0, snapshot);
171 int ret = bkey_err(k);
175 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
176 *target = le64_to_cpu(d.v->d_inum);
178 bch2_trans_iter_exit(trans, &iter);
182 static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
184 struct bch_fs *c = trans->c;
185 struct btree_iter iter;
186 struct bch_inode_unpacked dir_inode;
187 struct bch_hash_info dir_hash_info;
190 ret = lookup_first_inode(trans, pos.inode, &dir_inode);
194 dir_hash_info = bch2_hash_info_init(c, &dir_inode);
196 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_intent);
198 ret = bch2_btree_iter_traverse(&iter) ?:
199 bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
200 &dir_hash_info, &iter,
201 BTREE_UPDATE_internal_snapshot_node);
202 bch2_trans_iter_exit(trans, &iter);
209 * Find any subvolume associated with a tree of snapshots
210 * We can't rely on master_subvol - it might have been deleted.
212 static int find_snapshot_tree_subvol(struct btree_trans *trans,
213 u32 tree_id, u32 *subvol)
215 struct btree_iter iter;
219 for_each_btree_key_norestart(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, ret) {
220 if (k.k->type != KEY_TYPE_snapshot)
223 struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
224 if (le32_to_cpu(s.v->tree) != tree_id)
228 *subvol = le32_to_cpu(s.v->subvol);
232 ret = -BCH_ERR_ENOENT_no_snapshot_tree_subvol;
234 bch2_trans_iter_exit(trans, &iter);
238 /* Get lost+found, create if it doesn't exist: */
239 static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
240 struct bch_inode_unpacked *lostfound,
241 u64 reattaching_inum)
243 struct bch_fs *c = trans->c;
244 struct qstr lostfound_str = QSTR("lost+found");
245 struct btree_iter lostfound_iter = { NULL };
250 struct bch_snapshot_tree st;
251 ret = bch2_snapshot_tree_lookup(trans,
252 bch2_snapshot_tree(c, snapshot), &st);
257 ret = find_snapshot_tree_subvol(trans,
258 bch2_snapshot_tree(c, snapshot), &subvolid);
259 bch_err_msg(c, ret, "finding subvol associated with snapshot tree %u",
260 bch2_snapshot_tree(c, snapshot));
264 struct bch_subvolume subvol;
265 ret = bch2_subvolume_get(trans, subvolid, false, &subvol);
266 bch_err_msg(c, ret, "looking up subvol %u for snapshot %u", subvolid, snapshot);
271 struct btree_iter iter;
272 struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter,
273 BTREE_ID_subvolumes, POS(0, subvolid),
275 ret = PTR_ERR_OR_ZERO(subvol);
279 subvol->v.inode = cpu_to_le64(reattaching_inum);
280 bch2_trans_iter_exit(trans, &iter);
283 subvol_inum root_inum = {
285 .inum = le64_to_cpu(subvol.inode)
288 struct bch_inode_unpacked root_inode;
289 struct bch_hash_info root_hash_info;
290 ret = lookup_inode(trans, root_inum.inum, snapshot, &root_inode);
291 bch_err_msg(c, ret, "looking up root inode %llu for subvol %u",
292 root_inum.inum, subvolid);
296 root_hash_info = bch2_hash_info_init(c, &root_inode);
298 ret = lookup_dirent_in_snapshot(trans, root_hash_info, root_inum,
299 &lostfound_str, &inum, &d_type, snapshot);
300 if (bch2_err_matches(ret, ENOENT))
301 goto create_lostfound;
307 if (d_type != DT_DIR) {
308 bch_err(c, "error looking up lost+found: not a directory");
309 return -BCH_ERR_ENOENT_not_directory;
313 * The bch2_check_dirents pass has already run, dangling dirents
314 * shouldn't exist here:
316 ret = lookup_inode(trans, inum, snapshot, lostfound);
317 bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)",
318 inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot));
323 * we always create lost+found in the root snapshot; we don't want
324 * different branches of the snapshot tree to have different lost+found
326 snapshot = le32_to_cpu(st.root_snapshot);
328 * XXX: we could have a nicer log message here if we had a nice way to
329 * walk backpointers to print a path
331 struct printbuf path = PRINTBUF;
332 ret = bch2_inum_to_path(trans, root_inum, &path);
336 bch_notice(c, "creating %s/lost+found in subvol %llu snapshot %u",
337 path.buf, root_inum.subvol, snapshot);
338 printbuf_exit(&path);
340 u64 now = bch2_current_time(c);
341 u64 cpu = raw_smp_processor_id();
343 bch2_inode_init_early(c, lostfound);
344 bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode);
345 lostfound->bi_dir = root_inode.bi_inum;
346 lostfound->bi_snapshot = le32_to_cpu(st.root_snapshot);
348 root_inode.bi_nlink++;
350 ret = bch2_inode_create(trans, &lostfound_iter, lostfound, snapshot, cpu);
354 bch2_btree_iter_set_snapshot(&lostfound_iter, snapshot);
355 ret = bch2_btree_iter_traverse(&lostfound_iter);
359 ret = bch2_dirent_create_snapshot(trans,
360 0, root_inode.bi_inum, snapshot, &root_hash_info,
361 mode_to_type(lostfound->bi_mode),
364 &lostfound->bi_dir_offset,
365 STR_HASH_must_create) ?:
366 bch2_inode_write_flags(trans, &lostfound_iter, lostfound,
367 BTREE_UPDATE_internal_snapshot_node);
369 bch_err_msg(c, ret, "creating lost+found");
370 bch2_trans_iter_exit(trans, &lostfound_iter);
374 static inline bool inode_should_reattach(struct bch_inode_unpacked *inode)
376 if (inode->bi_inum == BCACHEFS_ROOT_INO &&
377 inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)
380 return !inode->bi_dir && !(inode->bi_flags & BCH_INODE_unlinked);
383 static int maybe_delete_dirent(struct btree_trans *trans, struct bpos d_pos, u32 snapshot)
385 struct btree_iter iter;
386 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_dirents,
387 SPOS(d_pos.inode, d_pos.offset, snapshot),
389 BTREE_ITER_with_updates);
390 int ret = bkey_err(k);
394 if (bpos_eq(k.k->p, d_pos)) {
396 * delet_at() doesn't work because the update path doesn't
397 * internally use BTREE_ITER_with_updates yet
399 struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k));
400 ret = PTR_ERR_OR_ZERO(k);
405 k->k.type = KEY_TYPE_whiteout;
407 ret = bch2_trans_update(trans, &iter, k, BTREE_UPDATE_internal_snapshot_node);
410 bch2_trans_iter_exit(trans, &iter);
414 static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode)
416 struct bch_fs *c = trans->c;
417 struct bch_inode_unpacked lostfound;
421 u32 dirent_snapshot = inode->bi_snapshot;
422 if (inode->bi_subvol) {
423 inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL;
426 ret = subvol_lookup(trans, inode->bi_parent_subvol,
427 &dirent_snapshot, &root_inum);
431 snprintf(name_buf, sizeof(name_buf), "subvol-%u", inode->bi_subvol);
433 snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
436 ret = lookup_lostfound(trans, dirent_snapshot, &lostfound, inode->bi_inum);
440 lostfound.bi_nlink += S_ISDIR(inode->bi_mode);
442 /* ensure lost+found inode is also present in inode snapshot */
443 if (!inode->bi_subvol) {
444 BUG_ON(!bch2_snapshot_is_ancestor(c, inode->bi_snapshot, lostfound.bi_snapshot));
445 lostfound.bi_snapshot = inode->bi_snapshot;
448 ret = __bch2_fsck_write_inode(trans, &lostfound);
452 struct bch_hash_info dir_hash = bch2_hash_info_init(c, &lostfound);
453 struct qstr name = QSTR(name_buf);
455 inode->bi_dir = lostfound.bi_inum;
457 ret = bch2_dirent_create_snapshot(trans,
458 inode->bi_parent_subvol, lostfound.bi_inum,
463 inode->bi_subvol ?: inode->bi_inum,
464 &inode->bi_dir_offset,
465 STR_HASH_must_create);
467 bch_err_msg(c, ret, "error creating dirent");
471 ret = __bch2_fsck_write_inode(trans, inode);
476 * Fix up inodes in child snapshots: if they should also be reattached
477 * update the backpointer field, if they should not be we need to emit
478 * whiteouts for the dirent we just created.
480 if (!inode->bi_subvol && bch2_snapshot_is_leaf(c, inode->bi_snapshot) <= 0) {
481 snapshot_id_list whiteouts_done;
482 struct btree_iter iter;
485 darray_init(&whiteouts_done);
487 for_each_btree_key_reverse_norestart(trans, iter,
488 BTREE_ID_inodes, SPOS(0, inode->bi_inum, inode->bi_snapshot - 1),
489 BTREE_ITER_all_snapshots|BTREE_ITER_intent, k, ret) {
490 if (k.k->p.offset != inode->bi_inum)
493 if (!bkey_is_inode(k.k) ||
494 !bch2_snapshot_is_ancestor(c, k.k->p.snapshot, inode->bi_snapshot) ||
495 snapshot_list_has_ancestor(c, &whiteouts_done, k.k->p.snapshot))
498 struct bch_inode_unpacked child_inode;
499 ret = bch2_inode_unpack(k, &child_inode);
503 if (!inode_should_reattach(&child_inode)) {
504 ret = maybe_delete_dirent(trans,
505 SPOS(lostfound.bi_inum, inode->bi_dir_offset,
511 ret = snapshot_list_add(c, &whiteouts_done, k.k->p.snapshot);
515 iter.snapshot = k.k->p.snapshot;
516 child_inode.bi_dir = inode->bi_dir;
517 child_inode.bi_dir_offset = inode->bi_dir_offset;
519 ret = bch2_inode_write_flags(trans, &iter, &child_inode,
520 BTREE_UPDATE_internal_snapshot_node);
525 darray_exit(&whiteouts_done);
526 bch2_trans_iter_exit(trans, &iter);
532 static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
533 struct btree_iter *iter,
536 return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent);
539 static int remove_backpointer(struct btree_trans *trans,
540 struct bch_inode_unpacked *inode)
545 struct bch_fs *c = trans->c;
546 struct btree_iter iter;
547 struct bkey_s_c_dirent d = dirent_get_by_pos(trans, &iter,
548 SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot));
549 int ret = bkey_err(d) ?:
550 dirent_points_to_inode(c, d, inode) ?:
551 __remove_dirent(trans, d.k->p);
552 bch2_trans_iter_exit(trans, &iter);
556 static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume s)
558 struct bch_fs *c = trans->c;
560 struct bch_inode_unpacked inode;
561 int ret = bch2_inode_find_by_inum_trans(trans,
562 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) },
567 ret = remove_backpointer(trans, &inode);
568 if (!bch2_err_matches(ret, ENOENT))
569 bch_err_msg(c, ret, "removing dirent");
573 ret = reattach_inode(trans, &inode);
574 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
578 static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 subvolid, u64 inum)
580 struct bch_fs *c = trans->c;
582 if (!bch2_snapshot_is_leaf(c, snapshotid)) {
583 bch_err(c, "need to reconstruct subvol, but have interior node snapshot");
584 return -BCH_ERR_fsck_repair_unimplemented;
588 * If inum isn't set, that means we're being called from check_dirents,
589 * not check_inodes - the root of this subvolume doesn't exist or we
590 * would have found it there:
593 struct btree_iter inode_iter = {};
594 struct bch_inode_unpacked new_inode;
595 u64 cpu = raw_smp_processor_id();
597 bch2_inode_init_early(c, &new_inode);
598 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL);
600 new_inode.bi_subvol = subvolid;
602 int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?:
603 bch2_btree_iter_traverse(&inode_iter) ?:
604 bch2_inode_write(trans, &inode_iter, &new_inode);
605 bch2_trans_iter_exit(trans, &inode_iter);
609 inum = new_inode.bi_inum;
612 bch_info(c, "reconstructing subvol %u with root inode %llu", subvolid, inum);
614 struct bkey_i_subvolume *new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol));
615 int ret = PTR_ERR_OR_ZERO(new_subvol);
619 bkey_subvolume_init(&new_subvol->k_i);
620 new_subvol->k.p.offset = subvolid;
621 new_subvol->v.snapshot = cpu_to_le32(snapshotid);
622 new_subvol->v.inode = cpu_to_le64(inum);
623 ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &new_subvol->k_i, 0);
627 struct btree_iter iter;
628 struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, &iter,
629 BTREE_ID_snapshots, POS(0, snapshotid),
631 ret = PTR_ERR_OR_ZERO(s);
632 bch_err_msg(c, ret, "getting snapshot %u", snapshotid);
636 u32 snapshot_tree = le32_to_cpu(s->v.tree);
638 s->v.subvol = cpu_to_le32(subvolid);
639 SET_BCH_SNAPSHOT_SUBVOL(&s->v, true);
640 bch2_trans_iter_exit(trans, &iter);
642 struct bkey_i_snapshot_tree *st = bch2_bkey_get_mut_typed(trans, &iter,
643 BTREE_ID_snapshot_trees, POS(0, snapshot_tree),
645 ret = PTR_ERR_OR_ZERO(st);
646 bch_err_msg(c, ret, "getting snapshot tree %u", snapshot_tree);
650 if (!st->v.master_subvol)
651 st->v.master_subvol = cpu_to_le32(subvolid);
653 bch2_trans_iter_exit(trans, &iter);
657 static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 snapshot, u64 inum)
659 struct bch_fs *c = trans->c;
660 unsigned i_mode = S_IFREG;
664 case BTREE_ID_extents: {
665 struct btree_iter iter = {};
667 bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0);
668 struct bkey_s_c k = bch2_btree_iter_peek_prev_min(&iter, POS(inum, 0));
669 bch2_trans_iter_exit(trans, &iter);
670 int ret = bkey_err(k);
674 i_size = k.k->p.offset << 9;
677 case BTREE_ID_dirents:
680 case BTREE_ID_xattrs:
686 struct bch_inode_unpacked new_inode;
687 bch2_inode_init_early(c, &new_inode);
688 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, i_mode|0600, 0, NULL);
689 new_inode.bi_size = i_size;
690 new_inode.bi_inum = inum;
691 new_inode.bi_snapshot = snapshot;
693 return __bch2_fsck_write_inode(trans, &new_inode);
696 struct snapshots_seen {
698 snapshot_id_list ids;
701 static inline void snapshots_seen_exit(struct snapshots_seen *s)
703 darray_exit(&s->ids);
706 static inline void snapshots_seen_init(struct snapshots_seen *s)
708 memset(s, 0, sizeof(*s));
711 static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id)
714 __darray_for_each(s->ids, i) {
721 int ret = darray_insert_item(&s->ids, i - s->ids.data, id);
723 bch_err(c, "error reallocating snapshots_seen table (size %zu)",
728 static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
729 enum btree_id btree_id, struct bpos pos)
731 if (!bkey_eq(s->pos, pos))
735 return snapshot_list_add_nodup(c, &s->ids, pos.snapshot);
739 * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor,
740 * and @ancestor hasn't been overwritten in @seen
742 * @c: filesystem handle
743 * @seen: list of snapshot ids already seen at current position
744 * @id: descendent snapshot id
745 * @ancestor: ancestor snapshot id
747 * Returns: whether key in @ancestor snapshot is visible in @id snapshot
749 static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen,
750 u32 id, u32 ancestor)
754 EBUG_ON(id > ancestor);
756 /* @ancestor should be the snapshot most recently added to @seen */
757 EBUG_ON(ancestor != seen->pos.snapshot);
758 EBUG_ON(ancestor != darray_last(seen->ids));
763 if (!bch2_snapshot_is_ancestor(c, id, ancestor))
767 * We know that @id is a descendant of @ancestor, we're checking if
768 * we've seen a key that overwrote @ancestor - i.e. also a descendent of
769 * @ascestor and with @id as a descendent.
771 * But we already know that we're scanning IDs between @id and @ancestor
772 * numerically, since snapshot ID lists are kept sorted, so if we find
773 * an id that's an ancestor of @id we're done:
776 for (i = seen->ids.nr - 2;
777 i >= 0 && seen->ids.data[i] >= id;
779 if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i]))
786 * ref_visible - given a key with snapshot id @src that points to a key with
787 * snapshot id @dst, test whether there is some snapshot in which @dst is
790 * @c: filesystem handle
791 * @s: list of snapshot IDs already seen at @src
792 * @src: snapshot ID of src key
793 * @dst: snapshot ID of dst key
794 * Returns: true if there is some snapshot in which @dst is visible
796 * Assumes we're visiting @src keys in natural key order
798 static bool ref_visible(struct bch_fs *c, struct snapshots_seen *s,
802 ? key_visible_in_snapshot(c, s, dst, src)
803 : bch2_snapshot_is_ancestor(c, src, dst);
806 static int ref_visible2(struct bch_fs *c,
807 u32 src, struct snapshots_seen *src_seen,
808 u32 dst, struct snapshots_seen *dst_seen)
812 swap(dst_seen, src_seen);
814 return key_visible_in_snapshot(c, src_seen, dst, src);
817 #define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \
818 for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \
819 (_i)->snapshot <= (_snapshot); _i++) \
820 if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot))
822 struct inode_walker_entry {
823 struct bch_inode_unpacked inode;
828 struct inode_walker {
829 bool first_this_inode;
831 bool recalculate_sums;
832 struct bpos last_pos;
834 DARRAY(struct inode_walker_entry) inodes;
835 snapshot_id_list deletes;
838 static void inode_walker_exit(struct inode_walker *w)
840 darray_exit(&w->inodes);
841 darray_exit(&w->deletes);
844 static struct inode_walker inode_walker_init(void)
846 return (struct inode_walker) { 0, };
849 static int add_inode(struct bch_fs *c, struct inode_walker *w,
850 struct bkey_s_c inode)
852 struct bch_inode_unpacked u;
854 return bch2_inode_unpack(inode, &u) ?:
855 darray_push(&w->inodes, ((struct inode_walker_entry) {
857 .snapshot = inode.k->p.snapshot,
861 static int get_inodes_all_snapshots(struct btree_trans *trans,
862 struct inode_walker *w, u64 inum)
864 struct bch_fs *c = trans->c;
865 struct btree_iter iter;
870 * We no longer have inodes for w->last_pos; clear this to avoid
871 * screwing up check_i_sectors/check_subdir_count if we take a
872 * transaction restart here:
874 w->have_inodes = false;
875 w->recalculate_sums = false;
878 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum),
879 BTREE_ITER_all_snapshots, k, ret) {
880 if (k.k->p.offset != inum)
883 if (bkey_is_inode(k.k))
886 bch2_trans_iter_exit(trans, &iter);
891 w->first_this_inode = true;
892 w->have_inodes = true;
896 static struct inode_walker_entry *
897 lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k)
899 bool is_whiteout = k.k->type == KEY_TYPE_whiteout;
901 struct inode_walker_entry *i;
902 __darray_for_each(w->inodes, i)
903 if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->snapshot))
908 BUG_ON(k.k->p.snapshot > i->snapshot);
910 if (k.k->p.snapshot != i->snapshot && !is_whiteout) {
911 struct inode_walker_entry new = *i;
913 new.snapshot = k.k->p.snapshot;
916 struct printbuf buf = PRINTBUF;
917 bch2_bkey_val_to_text(&buf, c, k);
919 bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n"
920 "unexpected because we should always update the inode when we update a key in that inode\n"
922 w->last_pos.inode, k.k->p.snapshot, i->snapshot, buf.buf);
925 while (i > w->inodes.data && i[-1].snapshot > k.k->p.snapshot)
928 size_t pos = i - w->inodes.data;
929 int ret = darray_insert_item(&w->inodes, pos, new);
933 i = w->inodes.data + pos;
939 static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
940 struct inode_walker *w,
943 if (w->last_pos.inode != k.k->p.inode) {
944 int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode);
949 w->last_pos = k.k->p;
951 return lookup_inode_for_snapshot(trans->c, w, k);
954 static int get_visible_inodes(struct btree_trans *trans,
955 struct inode_walker *w,
956 struct snapshots_seen *s,
959 struct bch_fs *c = trans->c;
960 struct btree_iter iter;
967 for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, s->pos.snapshot),
968 BTREE_ITER_all_snapshots, k, ret) {
969 if (k.k->p.offset != inum)
972 if (!ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot))
975 if (snapshot_list_has_ancestor(c, &w->deletes, k.k->p.snapshot))
978 ret = bkey_is_inode(k.k)
980 : snapshot_list_add(c, &w->deletes, k.k->p.snapshot);
984 bch2_trans_iter_exit(trans, &iter);
990 * Prefer to delete the first one, since that will be the one at the wrong
992 * return value: 0 -> delete k1, 1 -> delete k2
994 int bch2_fsck_update_backpointers(struct btree_trans *trans,
995 struct snapshots_seen *s,
996 const struct bch_hash_desc desc,
997 struct bch_hash_info *hash_info,
1000 if (new->k.type != KEY_TYPE_dirent)
1003 struct bkey_i_dirent *d = bkey_i_to_dirent(new);
1004 struct inode_walker target = inode_walker_init();
1007 if (d->v.d_type == DT_SUBVOL) {
1010 ret = get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum));
1014 darray_for_each(target.inodes, i) {
1015 i->inode.bi_dir_offset = d->k.p.offset;
1016 ret = __bch2_fsck_write_inode(trans, &i->inode);
1022 inode_walker_exit(&target);
1026 static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans,
1027 struct btree_iter *iter,
1028 struct bch_inode_unpacked *inode,
1031 if (inode->bi_subvol) {
1033 int ret = subvol_lookup(trans, inode->bi_parent_subvol, snapshot, &inum);
1035 return ((struct bkey_s_c_dirent) { .k = ERR_PTR(ret) });
1038 return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot));
1041 static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
1043 struct btree_iter iter;
1044 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_deleted_inodes, p, 0);
1045 int ret = bkey_err(k) ?: k.k->type == KEY_TYPE_set;
1046 bch2_trans_iter_exit(trans, &iter);
1050 static int check_inode_dirent_inode(struct btree_trans *trans,
1051 struct bch_inode_unpacked *inode,
1054 struct bch_fs *c = trans->c;
1055 struct printbuf buf = PRINTBUF;
1057 u32 inode_snapshot = inode->bi_snapshot;
1058 struct btree_iter dirent_iter = {};
1059 struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot);
1060 int ret = bkey_err(d);
1061 if (ret && !bch2_err_matches(ret, ENOENT))
1064 if (fsck_err_on(ret,
1065 trans, inode_points_to_missing_dirent,
1066 "inode points to missing dirent\n%s",
1067 (bch2_inode_unpacked_to_text(&buf, inode), buf.buf)) ||
1068 fsck_err_on(!ret && dirent_points_to_inode_nowarn(d, inode),
1069 trans, inode_points_to_wrong_dirent,
1071 (printbuf_reset(&buf),
1072 dirent_inode_mismatch_msg(&buf, c, d, inode),
1075 * We just clear the backpointer fields for now. If we find a
1076 * dirent that points to this inode in check_dirents(), we'll
1077 * update it then; then when we get to check_path() if the
1078 * backpointer is still 0 we'll reattach it.
1081 inode->bi_dir_offset = 0;
1082 *write_inode = true;
1087 bch2_trans_iter_exit(trans, &dirent_iter);
1088 printbuf_exit(&buf);
1093 static int get_snapshot_root_inode(struct btree_trans *trans,
1094 struct bch_inode_unpacked *root,
1097 struct btree_iter iter;
1101 for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes,
1102 SPOS(0, inum, U32_MAX),
1103 BTREE_ITER_all_snapshots, k, ret) {
1104 if (k.k->p.offset != inum)
1106 if (bkey_is_inode(k.k))
1113 ret = bch2_inode_unpack(k, root);
1115 bch2_trans_iter_exit(trans, &iter);
1119 static int check_directory_size(struct btree_trans *trans,
1120 struct bch_inode_unpacked *inode_u,
1121 struct bkey_s_c inode_k, bool *write_inode)
1123 struct btree_iter iter;
1128 for_each_btree_key_max_norestart(trans, iter, BTREE_ID_dirents,
1129 SPOS(inode_k.k->p.offset, 0, inode_k.k->p.snapshot),
1130 POS(inode_k.k->p.offset, U64_MAX),
1132 if (k.k->type != KEY_TYPE_dirent)
1135 struct bkey_s_c_dirent dirent = bkey_s_c_to_dirent(k);
1136 struct qstr name = bch2_dirent_get_name(dirent);
1138 new_size += dirent_occupied_size(&name);
1140 bch2_trans_iter_exit(trans, &iter);
1142 if (!ret && inode_u->bi_size != new_size) {
1143 inode_u->bi_size = new_size;
1144 *write_inode = true;
1150 static int check_inode(struct btree_trans *trans,
1151 struct btree_iter *iter,
1153 struct bch_inode_unpacked *snapshot_root,
1154 struct snapshots_seen *s)
1156 struct bch_fs *c = trans->c;
1157 struct printbuf buf = PRINTBUF;
1158 struct bch_inode_unpacked u;
1159 bool do_update = false;
1162 ret = bch2_check_key_has_snapshot(trans, iter, k);
1168 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
1172 if (!bkey_is_inode(k.k))
1175 ret = bch2_inode_unpack(k, &u);
1179 if (snapshot_root->bi_inum != u.bi_inum) {
1180 ret = get_snapshot_root_inode(trans, snapshot_root, u.bi_inum);
1185 if (fsck_err_on(u.bi_hash_seed != snapshot_root->bi_hash_seed ||
1186 INODE_STR_HASH(&u) != INODE_STR_HASH(snapshot_root),
1187 trans, inode_snapshot_mismatch,
1188 "inode hash info in different snapshots don't match")) {
1189 u.bi_hash_seed = snapshot_root->bi_hash_seed;
1190 SET_INODE_STR_HASH(&u, INODE_STR_HASH(snapshot_root));
1194 if (u.bi_dir || u.bi_dir_offset) {
1195 ret = check_inode_dirent_inode(trans, &u, &do_update);
1200 if (fsck_err_on(u.bi_dir && (u.bi_flags & BCH_INODE_unlinked),
1201 trans, inode_unlinked_but_has_dirent,
1202 "inode unlinked but has dirent\n%s",
1203 (printbuf_reset(&buf),
1204 bch2_inode_unpacked_to_text(&buf, &u),
1206 u.bi_flags &= ~BCH_INODE_unlinked;
1210 if (S_ISDIR(u.bi_mode) && (u.bi_flags & BCH_INODE_unlinked)) {
1211 /* Check for this early so that check_unreachable_inode() will reattach it */
1213 ret = bch2_empty_dir_snapshot(trans, k.k->p.offset, 0, k.k->p.snapshot);
1214 if (ret && ret != -BCH_ERR_ENOTEMPTY_dir_not_empty)
1217 fsck_err_on(ret, trans, inode_dir_unlinked_but_not_empty,
1218 "dir unlinked but not empty\n%s",
1219 (printbuf_reset(&buf),
1220 bch2_inode_unpacked_to_text(&buf, &u),
1222 u.bi_flags &= ~BCH_INODE_unlinked;
1227 ret = bch2_inode_has_child_snapshots(trans, k.k->p);
1231 if (fsck_err_on(ret != !!(u.bi_flags & BCH_INODE_has_child_snapshot),
1232 trans, inode_has_child_snapshots_wrong,
1233 "inode has_child_snapshots flag wrong (should be %u)\n%s",
1235 (printbuf_reset(&buf),
1236 bch2_inode_unpacked_to_text(&buf, &u),
1239 u.bi_flags |= BCH_INODE_has_child_snapshot;
1241 u.bi_flags &= ~BCH_INODE_has_child_snapshot;
1246 if ((u.bi_flags & BCH_INODE_unlinked) &&
1247 !(u.bi_flags & BCH_INODE_has_child_snapshot)) {
1248 if (!test_bit(BCH_FS_started, &c->flags)) {
1250 * If we're not in online fsck, don't delete unlinked
1251 * inodes, just make sure they're on the deleted list.
1253 * They might be referred to by a logged operation -
1254 * i.e. we might have crashed in the middle of a
1255 * truncate on an unlinked but open file - so we want to
1256 * let the delete_dead_inodes kill it after resuming
1259 ret = check_inode_deleted_list(trans, k.k->p);
1264 trans, unlinked_inode_not_on_deleted_list,
1265 "inode %llu:%u unlinked, but not on deleted list",
1266 u.bi_inum, k.k->p.snapshot);
1268 ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, k.k->p, 1);
1272 ret = bch2_inode_or_descendents_is_open(trans, k.k->p);
1276 if (fsck_err_on(!ret,
1277 trans, inode_unlinked_and_not_open,
1278 "inode %llu:%u unlinked and not open",
1279 u.bi_inum, u.bi_snapshot)) {
1280 ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot);
1281 bch_err_msg(c, ret, "in fsck deleting inode");
1288 if (fsck_err_on(u.bi_parent_subvol &&
1289 (u.bi_subvol == 0 ||
1290 u.bi_subvol == BCACHEFS_ROOT_SUBVOL),
1291 trans, inode_bi_parent_nonzero,
1292 "inode %llu:%u has subvol %u but nonzero parent subvol %u",
1293 u.bi_inum, k.k->p.snapshot, u.bi_subvol, u.bi_parent_subvol)) {
1294 u.bi_parent_subvol = 0;
1299 struct bch_subvolume s;
1301 ret = bch2_subvolume_get(trans, u.bi_subvol, false, &s);
1302 if (ret && !bch2_err_matches(ret, ENOENT))
1305 if (ret && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) {
1306 ret = reconstruct_subvol(trans, k.k->p.snapshot, u.bi_subvol, u.bi_inum);
1310 if (fsck_err_on(ret,
1311 trans, inode_bi_subvol_missing,
1312 "inode %llu:%u bi_subvol points to missing subvolume %u",
1313 u.bi_inum, k.k->p.snapshot, u.bi_subvol) ||
1314 fsck_err_on(le64_to_cpu(s.inode) != u.bi_inum ||
1315 !bch2_snapshot_is_ancestor(c, le32_to_cpu(s.snapshot),
1317 trans, inode_bi_subvol_wrong,
1318 "inode %llu:%u points to subvol %u, but subvol points to %llu:%u",
1319 u.bi_inum, k.k->p.snapshot, u.bi_subvol,
1320 le64_to_cpu(s.inode),
1321 le32_to_cpu(s.snapshot))) {
1323 u.bi_parent_subvol = 0;
1328 if (fsck_err_on(u.bi_journal_seq > journal_cur_seq(&c->journal),
1329 trans, inode_journal_seq_in_future,
1330 "inode journal seq in future (currently at %llu)\n%s",
1331 journal_cur_seq(&c->journal),
1332 (printbuf_reset(&buf),
1333 bch2_inode_unpacked_to_text(&buf, &u),
1335 u.bi_journal_seq = journal_cur_seq(&c->journal);
1339 if (S_ISDIR(u.bi_mode)) {
1340 ret = check_directory_size(trans, &u, k, &do_update);
1343 trans, directory_size_mismatch,
1344 "directory inode %llu:%u with the mismatch directory size",
1345 u.bi_inum, k.k->p.snapshot);
1350 ret = __bch2_fsck_write_inode(trans, &u);
1351 bch_err_msg(c, ret, "in fsck updating inode");
1359 printbuf_exit(&buf);
1363 int bch2_check_inodes(struct bch_fs *c)
1365 struct bch_inode_unpacked snapshot_root = {};
1366 struct snapshots_seen s;
1368 snapshots_seen_init(&s);
1370 int ret = bch2_trans_run(c,
1371 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
1373 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
1374 NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
1375 check_inode(trans, &iter, k, &snapshot_root, &s)));
1377 snapshots_seen_exit(&s);
1382 static int find_oldest_inode_needs_reattach(struct btree_trans *trans,
1383 struct bch_inode_unpacked *inode)
1385 struct bch_fs *c = trans->c;
1386 struct btree_iter iter;
1391 * We look for inodes to reattach in natural key order, leaves first,
1392 * but we should do the reattach at the oldest version that needs to be
1395 for_each_btree_key_norestart(trans, iter,
1397 SPOS(0, inode->bi_inum, inode->bi_snapshot + 1),
1398 BTREE_ITER_all_snapshots, k, ret) {
1399 if (k.k->p.offset != inode->bi_inum)
1402 if (!bch2_snapshot_is_ancestor(c, inode->bi_snapshot, k.k->p.snapshot))
1405 if (!bkey_is_inode(k.k))
1408 struct bch_inode_unpacked parent_inode;
1409 ret = bch2_inode_unpack(k, &parent_inode);
1413 if (!inode_should_reattach(&parent_inode))
1416 *inode = parent_inode;
1418 bch2_trans_iter_exit(trans, &iter);
1423 static int check_unreachable_inode(struct btree_trans *trans,
1424 struct btree_iter *iter,
1427 struct printbuf buf = PRINTBUF;
1430 if (!bkey_is_inode(k.k))
1433 struct bch_inode_unpacked inode;
1434 ret = bch2_inode_unpack(k, &inode);
1438 if (!inode_should_reattach(&inode))
1441 ret = find_oldest_inode_needs_reattach(trans, &inode);
1445 if (fsck_err(trans, inode_unreachable,
1446 "unreachable inode:\n%s",
1447 (bch2_inode_unpacked_to_text(&buf, &inode),
1449 ret = reattach_inode(trans, &inode);
1451 printbuf_exit(&buf);
1456 * Reattach unreachable (but not unlinked) inodes
1458 * Run after check_inodes() and check_dirents(), so we node that inode
1459 * backpointer fields point to valid dirents, and every inode that has a dirent
1460 * that points to it has its backpointer field set - so we're just looking for
1461 * non-unlinked inodes without backpointers:
1463 * XXX: this is racy w.r.t. hardlink removal in online fsck
1465 int bch2_check_unreachable_inodes(struct bch_fs *c)
1467 int ret = bch2_trans_run(c,
1468 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
1470 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
1471 NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
1472 check_unreachable_inode(trans, &iter, k)));
1477 static inline bool btree_matches_i_mode(enum btree_id btree, unsigned mode)
1480 case BTREE_ID_extents:
1481 return S_ISREG(mode) || S_ISLNK(mode);
1482 case BTREE_ID_dirents:
1483 return S_ISDIR(mode);
1484 case BTREE_ID_xattrs:
1491 static int check_key_has_inode(struct btree_trans *trans,
1492 struct btree_iter *iter,
1493 struct inode_walker *inode,
1494 struct inode_walker_entry *i,
1497 struct bch_fs *c = trans->c;
1498 struct printbuf buf = PRINTBUF;
1499 int ret = PTR_ERR_OR_ZERO(i);
1503 if (k.k->type == KEY_TYPE_whiteout)
1506 if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) {
1507 ret = reconstruct_inode(trans, iter->btree_id, k.k->p.snapshot, k.k->p.inode) ?:
1508 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
1512 inode->last_pos.inode--;
1513 ret = -BCH_ERR_transaction_restart_nested;
1518 trans, key_in_missing_inode,
1519 "key in missing inode:\n %s",
1520 (printbuf_reset(&buf),
1521 bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
1524 if (fsck_err_on(i && !btree_matches_i_mode(iter->btree_id, i->inode.bi_mode),
1525 trans, key_in_wrong_inode_type,
1526 "key for wrong inode mode %o:\n %s",
1528 (printbuf_reset(&buf),
1529 bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
1534 printbuf_exit(&buf);
1538 ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_internal_snapshot_node);
1542 static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w)
1544 struct bch_fs *c = trans->c;
1548 darray_for_each(w->inodes, i) {
1549 if (i->inode.bi_sectors == i->count)
1552 count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot);
1554 if (w->recalculate_sums)
1557 if (i->count != count2) {
1558 bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
1559 w->last_pos.inode, i->snapshot, i->count, count2);
1563 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty),
1564 trans, inode_i_sectors_wrong,
1565 "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
1566 w->last_pos.inode, i->snapshot,
1567 i->inode.bi_sectors, i->count)) {
1568 i->inode.bi_sectors = i->count;
1569 ret = bch2_fsck_write_inode(trans, &i->inode);
1579 static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
1581 u32 restart_count = trans->restart_count;
1582 return check_i_sectors_notnested(trans, w) ?:
1583 trans_was_restarted(trans, restart_count);
1589 struct snapshots_seen seen;
1592 struct extent_ends {
1593 struct bpos last_pos;
1594 DARRAY(struct extent_end) e;
1597 static void extent_ends_reset(struct extent_ends *extent_ends)
1599 darray_for_each(extent_ends->e, i)
1600 snapshots_seen_exit(&i->seen);
1601 extent_ends->e.nr = 0;
1604 static void extent_ends_exit(struct extent_ends *extent_ends)
1606 extent_ends_reset(extent_ends);
1607 darray_exit(&extent_ends->e);
1610 static void extent_ends_init(struct extent_ends *extent_ends)
1612 memset(extent_ends, 0, sizeof(*extent_ends));
1615 static int extent_ends_at(struct bch_fs *c,
1616 struct extent_ends *extent_ends,
1617 struct snapshots_seen *seen,
1620 struct extent_end *i, n = (struct extent_end) {
1621 .offset = k.k->p.offset,
1622 .snapshot = k.k->p.snapshot,
1626 n.seen.ids.data = kmemdup(seen->ids.data,
1627 sizeof(seen->ids.data[0]) * seen->ids.size,
1629 if (!n.seen.ids.data)
1630 return -BCH_ERR_ENOMEM_fsck_extent_ends_at;
1632 __darray_for_each(extent_ends->e, i) {
1633 if (i->snapshot == k.k->p.snapshot) {
1634 snapshots_seen_exit(&i->seen);
1639 if (i->snapshot >= k.k->p.snapshot)
1643 return darray_insert_item(&extent_ends->e, i - extent_ends->e.data, n);
1646 static int overlapping_extents_found(struct btree_trans *trans,
1647 enum btree_id btree,
1648 struct bpos pos1, struct snapshots_seen *pos1_seen,
1651 struct extent_end *extent_end)
1653 struct bch_fs *c = trans->c;
1654 struct printbuf buf = PRINTBUF;
1655 struct btree_iter iter1, iter2 = { NULL };
1656 struct bkey_s_c k1, k2;
1659 BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2)));
1661 bch2_trans_iter_init(trans, &iter1, btree, pos1,
1662 BTREE_ITER_all_snapshots|
1663 BTREE_ITER_not_extents);
1664 k1 = bch2_btree_iter_peek_max(&iter1, POS(pos1.inode, U64_MAX));
1669 prt_str(&buf, "\n ");
1670 bch2_bkey_val_to_text(&buf, c, k1);
1672 if (!bpos_eq(pos1, k1.k->p)) {
1673 prt_str(&buf, "\n wanted\n ");
1674 bch2_bpos_to_text(&buf, pos1);
1675 prt_str(&buf, "\n ");
1676 bch2_bkey_to_text(&buf, &pos2);
1678 bch_err(c, "%s: error finding first overlapping extent when repairing, got%s",
1680 ret = -BCH_ERR_internal_fsck_err;
1684 bch2_trans_copy_iter(&iter2, &iter1);
1687 bch2_btree_iter_advance(&iter2);
1689 k2 = bch2_btree_iter_peek_max(&iter2, POS(pos1.inode, U64_MAX));
1694 if (bpos_ge(k2.k->p, pos2.p))
1698 prt_str(&buf, "\n ");
1699 bch2_bkey_val_to_text(&buf, c, k2);
1701 if (bpos_gt(k2.k->p, pos2.p) ||
1702 pos2.size != k2.k->size) {
1703 bch_err(c, "%s: error finding seconding overlapping extent when repairing%s",
1705 ret = -BCH_ERR_internal_fsck_err;
1709 prt_printf(&buf, "\n overwriting %s extent",
1710 pos1.snapshot >= pos2.p.snapshot ? "first" : "second");
1712 if (fsck_err(trans, extent_overlapping,
1713 "overlapping extents%s", buf.buf)) {
1714 struct btree_iter *old_iter = &iter1;
1715 struct disk_reservation res = { 0 };
1717 if (pos1.snapshot < pos2.p.snapshot) {
1722 trans->extra_disk_res += bch2_bkey_sectors_compressed(k2);
1724 ret = bch2_trans_update_extent_overwrite(trans, old_iter,
1725 BTREE_UPDATE_internal_snapshot_node,
1727 bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc);
1728 bch2_disk_reservation_put(c, &res);
1735 if (pos1.snapshot == pos2.p.snapshot) {
1737 * We overwrote the first extent, and did the overwrite
1738 * in the same snapshot:
1740 extent_end->offset = bkey_start_offset(&pos2);
1741 } else if (pos1.snapshot > pos2.p.snapshot) {
1743 * We overwrote the first extent in pos2's snapshot:
1745 ret = snapshots_seen_add_inorder(c, pos1_seen, pos2.p.snapshot);
1748 * We overwrote the second extent - restart
1749 * check_extent() from the top:
1751 ret = -BCH_ERR_transaction_restart_nested;
1756 bch2_trans_iter_exit(trans, &iter2);
1757 bch2_trans_iter_exit(trans, &iter1);
1758 printbuf_exit(&buf);
1762 static int check_overlapping_extents(struct btree_trans *trans,
1763 struct snapshots_seen *seen,
1764 struct extent_ends *extent_ends,
1766 struct btree_iter *iter,
1769 struct bch_fs *c = trans->c;
1772 /* transaction restart, running again */
1773 if (bpos_eq(extent_ends->last_pos, k.k->p))
1776 if (extent_ends->last_pos.inode != k.k->p.inode)
1777 extent_ends_reset(extent_ends);
1779 darray_for_each(extent_ends->e, i) {
1780 if (i->offset <= bkey_start_offset(k.k))
1783 if (!ref_visible2(c,
1784 k.k->p.snapshot, seen,
1785 i->snapshot, &i->seen))
1788 ret = overlapping_extents_found(trans, iter->btree_id,
1789 SPOS(iter->pos.inode,
1798 extent_ends->last_pos = k.k->p;
1803 static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter,
1806 struct bch_fs *c = trans->c;
1807 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
1808 struct bch_extent_crc_unpacked crc;
1809 const union bch_extent_entry *i;
1810 unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9;
1812 bkey_for_each_crc(k.k, ptrs, crc, i)
1813 if (crc_is_encoded(crc) &&
1814 crc.uncompressed_size > encoded_extent_max_sectors) {
1815 struct printbuf buf = PRINTBUF;
1817 bch2_bkey_val_to_text(&buf, c, k);
1818 bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf);
1819 printbuf_exit(&buf);
1825 static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
1827 struct inode_walker *inode,
1828 struct snapshots_seen *s,
1829 struct extent_ends *extent_ends,
1830 struct disk_reservation *res)
1832 struct bch_fs *c = trans->c;
1833 struct printbuf buf = PRINTBUF;
1836 ret = bch2_check_key_has_snapshot(trans, iter, k);
1838 ret = ret < 0 ? ret : 0;
1842 if (inode->last_pos.inode != k.k->p.inode && inode->have_inodes) {
1843 ret = check_i_sectors(trans, inode);
1848 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
1852 struct inode_walker_entry *extent_i = walk_inode(trans, inode, k);
1853 ret = PTR_ERR_OR_ZERO(extent_i);
1857 ret = check_key_has_inode(trans, iter, inode, extent_i, k);
1861 if (k.k->type != KEY_TYPE_whiteout) {
1862 ret = check_overlapping_extents(trans, s, extent_ends, k, iter,
1863 &inode->recalculate_sums);
1868 * Check inodes in reverse order, from oldest snapshots to
1869 * newest, starting from the inode that matches this extent's
1870 * snapshot. If we didn't have one, iterate over all inodes:
1872 for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes);
1873 inode->inodes.data && i >= inode->inodes.data;
1875 if (i->snapshot > k.k->p.snapshot ||
1876 !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot))
1879 if (fsck_err_on(k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
1880 !bkey_extent_is_reservation(k),
1881 trans, extent_past_end_of_inode,
1882 "extent type past end of inode %llu:%u, i_size %llu\n %s",
1883 i->inode.bi_inum, i->snapshot, i->inode.bi_size,
1884 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
1885 struct btree_iter iter2;
1887 bch2_trans_copy_iter(&iter2, iter);
1888 bch2_btree_iter_set_snapshot(&iter2, i->snapshot);
1889 ret = bch2_btree_iter_traverse(&iter2) ?:
1890 bch2_btree_delete_at(trans, &iter2,
1891 BTREE_UPDATE_internal_snapshot_node);
1892 bch2_trans_iter_exit(trans, &iter2);
1896 iter->k.type = KEY_TYPE_whiteout;
1902 ret = bch2_trans_commit(trans, res, NULL, BCH_TRANS_COMMIT_no_enospc);
1906 if (bkey_extent_is_allocation(k.k)) {
1907 for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes);
1908 inode->inodes.data && i >= inode->inodes.data;
1910 if (i->snapshot > k.k->p.snapshot ||
1911 !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot))
1914 i->count += k.k->size;
1918 if (k.k->type != KEY_TYPE_whiteout) {
1919 ret = extent_ends_at(c, extent_ends, s, k);
1926 printbuf_exit(&buf);
1932 * Walk extents: verify that extents have a corresponding S_ISREG inode, and
1933 * that i_size an i_sectors are consistent
1935 int bch2_check_extents(struct bch_fs *c)
1937 struct inode_walker w = inode_walker_init();
1938 struct snapshots_seen s;
1939 struct extent_ends extent_ends;
1940 struct disk_reservation res = { 0 };
1942 snapshots_seen_init(&s);
1943 extent_ends_init(&extent_ends);
1945 int ret = bch2_trans_run(c,
1946 for_each_btree_key(trans, iter, BTREE_ID_extents,
1947 POS(BCACHEFS_ROOT_INO, 0),
1948 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({
1949 bch2_disk_reservation_put(c, &res);
1950 check_extent(trans, &iter, k, &w, &s, &extent_ends, &res) ?:
1951 check_extent_overbig(trans, &iter, k);
1953 check_i_sectors_notnested(trans, &w));
1955 bch2_disk_reservation_put(c, &res);
1956 extent_ends_exit(&extent_ends);
1957 inode_walker_exit(&w);
1958 snapshots_seen_exit(&s);
1964 int bch2_check_indirect_extents(struct bch_fs *c)
1966 struct disk_reservation res = { 0 };
1968 int ret = bch2_trans_run(c,
1969 for_each_btree_key_commit(trans, iter, BTREE_ID_reflink,
1971 BTREE_ITER_prefetch, k,
1973 BCH_TRANS_COMMIT_no_enospc, ({
1974 bch2_disk_reservation_put(c, &res);
1975 check_extent_overbig(trans, &iter, k);
1978 bch2_disk_reservation_put(c, &res);
1983 static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_walker *w)
1985 struct bch_fs *c = trans->c;
1989 darray_for_each(w->inodes, i) {
1990 if (i->inode.bi_nlink == i->count)
1993 count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->snapshot);
1997 if (i->count != count2) {
1998 bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu",
1999 w->last_pos.inode, i->snapshot, i->count, count2);
2001 if (i->inode.bi_nlink == i->count)
2005 if (fsck_err_on(i->inode.bi_nlink != i->count,
2006 trans, inode_dir_wrong_nlink,
2007 "directory %llu:%u with wrong i_nlink: got %u, should be %llu",
2008 w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) {
2009 i->inode.bi_nlink = i->count;
2010 ret = bch2_fsck_write_inode(trans, &i->inode);
2020 static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
2022 u32 restart_count = trans->restart_count;
2023 return check_subdir_count_notnested(trans, w) ?:
2024 trans_was_restarted(trans, restart_count);
2028 static int check_dirent_inode_dirent(struct btree_trans *trans,
2029 struct btree_iter *iter,
2030 struct bkey_s_c_dirent d,
2031 struct bch_inode_unpacked *target)
2033 struct bch_fs *c = trans->c;
2034 struct printbuf buf = PRINTBUF;
2035 struct btree_iter bp_iter = { NULL };
2038 if (inode_points_to_dirent(target, d))
2041 if (!target->bi_dir &&
2042 !target->bi_dir_offset) {
2043 fsck_err_on(S_ISDIR(target->bi_mode),
2044 trans, inode_dir_missing_backpointer,
2045 "directory with missing backpointer\n%s",
2046 (printbuf_reset(&buf),
2047 bch2_bkey_val_to_text(&buf, c, d.s_c),
2048 prt_printf(&buf, "\n"),
2049 bch2_inode_unpacked_to_text(&buf, target),
2052 fsck_err_on(target->bi_flags & BCH_INODE_unlinked,
2053 trans, inode_unlinked_but_has_dirent,
2054 "inode unlinked but has dirent\n%s",
2055 (printbuf_reset(&buf),
2056 bch2_bkey_val_to_text(&buf, c, d.s_c),
2057 prt_printf(&buf, "\n"),
2058 bch2_inode_unpacked_to_text(&buf, target),
2061 target->bi_flags &= ~BCH_INODE_unlinked;
2062 target->bi_dir = d.k->p.inode;
2063 target->bi_dir_offset = d.k->p.offset;
2064 return __bch2_fsck_write_inode(trans, target);
2067 if (bch2_inode_should_have_single_bp(target) &&
2068 !fsck_err(trans, inode_wrong_backpointer,
2069 "dirent points to inode that does not point back:\n %s",
2070 (bch2_bkey_val_to_text(&buf, c, d.s_c),
2071 prt_printf(&buf, "\n "),
2072 bch2_inode_unpacked_to_text(&buf, target),
2076 struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
2077 SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot));
2078 ret = bkey_err(bp_dirent);
2079 if (ret && !bch2_err_matches(ret, ENOENT))
2082 bool backpointer_exists = !ret;
2085 if (fsck_err_on(!backpointer_exists,
2086 trans, inode_wrong_backpointer,
2087 "inode %llu:%u has wrong backpointer:\n"
2089 "should be %llu:%llu",
2090 target->bi_inum, target->bi_snapshot,
2092 target->bi_dir_offset,
2095 target->bi_dir = d.k->p.inode;
2096 target->bi_dir_offset = d.k->p.offset;
2097 ret = __bch2_fsck_write_inode(trans, target);
2101 bch2_bkey_val_to_text(&buf, c, d.s_c);
2103 if (backpointer_exists)
2104 bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
2106 if (fsck_err_on(backpointer_exists &&
2107 (S_ISDIR(target->bi_mode) ||
2109 trans, inode_dir_multiple_links,
2110 "%s %llu:%u with multiple links\n%s",
2111 S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
2112 target->bi_inum, target->bi_snapshot, buf.buf)) {
2113 ret = __remove_dirent(trans, d.k->p);
2118 * hardlinked file with nlink 0:
2119 * We're just adjusting nlink here so check_nlinks() will pick
2120 * it up, it ignores inodes with nlink 0
2122 if (fsck_err_on(backpointer_exists && !target->bi_nlink,
2123 trans, inode_multiple_links_but_nlink_0,
2124 "inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
2125 target->bi_inum, target->bi_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
2127 target->bi_flags &= ~BCH_INODE_unlinked;
2128 ret = __bch2_fsck_write_inode(trans, target);
2135 bch2_trans_iter_exit(trans, &bp_iter);
2136 printbuf_exit(&buf);
2142 static int check_dirent_target(struct btree_trans *trans,
2143 struct btree_iter *iter,
2144 struct bkey_s_c_dirent d,
2145 struct bch_inode_unpacked *target)
2147 struct bch_fs *c = trans->c;
2148 struct bkey_i_dirent *n;
2149 struct printbuf buf = PRINTBUF;
2152 ret = check_dirent_inode_dirent(trans, iter, d, target);
2156 if (fsck_err_on(d.v->d_type != inode_d_type(target),
2157 trans, dirent_d_type_wrong,
2158 "incorrect d_type: got %s, should be %s:\n%s",
2159 bch2_d_type_str(d.v->d_type),
2160 bch2_d_type_str(inode_d_type(target)),
2161 (printbuf_reset(&buf),
2162 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
2163 n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
2164 ret = PTR_ERR_OR_ZERO(n);
2168 bkey_reassemble(&n->k_i, d.s_c);
2169 n->v.d_type = inode_d_type(target);
2170 if (n->v.d_type == DT_SUBVOL) {
2171 n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
2172 n->v.d_child_subvol = cpu_to_le32(target->bi_subvol);
2174 n->v.d_inum = cpu_to_le64(target->bi_inum);
2177 ret = bch2_trans_update(trans, iter, &n->k_i, 0);
2181 d = dirent_i_to_s_c(n);
2185 printbuf_exit(&buf);
2190 /* find a subvolume that's a descendent of @snapshot: */
2191 static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid)
2193 struct btree_iter iter;
2197 for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) {
2198 if (k.k->type != KEY_TYPE_subvolume)
2201 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
2202 if (bch2_snapshot_is_ancestor(trans->c, le32_to_cpu(s.v->snapshot), snapshot)) {
2203 bch2_trans_iter_exit(trans, &iter);
2204 *subvolid = k.k->p.offset;
2211 bch2_trans_iter_exit(trans, &iter);
2216 static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter,
2217 struct bkey_s_c_dirent d)
2219 struct bch_fs *c = trans->c;
2220 struct btree_iter subvol_iter = {};
2221 struct bch_inode_unpacked subvol_root;
2222 u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol);
2223 u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
2224 u32 parent_snapshot;
2225 u32 new_parent_subvol = 0;
2227 struct printbuf buf = PRINTBUF;
2230 ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum);
2231 if (ret && !bch2_err_matches(ret, ENOENT))
2235 (!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot))) {
2236 int ret2 = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol);
2237 if (ret2 && !bch2_err_matches(ret, ENOENT))
2242 !new_parent_subvol &&
2243 (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) {
2245 * Couldn't find a subvol for dirent's snapshot - but we lost
2246 * subvols, so we need to reconstruct:
2248 ret = reconstruct_subvol(trans, d.k->p.snapshot, parent_subvol, 0);
2252 parent_snapshot = d.k->p.snapshot;
2255 if (fsck_err_on(ret,
2256 trans, dirent_to_missing_parent_subvol,
2257 "dirent parent_subvol points to missing subvolume\n%s",
2258 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) ||
2259 fsck_err_on(!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot),
2260 trans, dirent_not_visible_in_parent_subvol,
2261 "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s",
2263 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
2264 if (!new_parent_subvol) {
2265 bch_err(c, "could not find a subvol for snapshot %u", d.k->p.snapshot);
2266 return -BCH_ERR_fsck_repair_unimplemented;
2269 struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent);
2270 ret = PTR_ERR_OR_ZERO(new_dirent);
2274 new_dirent->v.d_parent_subvol = cpu_to_le32(new_parent_subvol);
2277 struct bkey_s_c_subvolume s =
2278 bch2_bkey_get_iter_typed(trans, &subvol_iter,
2279 BTREE_ID_subvolumes, POS(0, target_subvol),
2281 ret = bkey_err(s.s_c);
2282 if (ret && !bch2_err_matches(ret, ENOENT))
2286 if (fsck_err(trans, dirent_to_missing_subvol,
2287 "dirent points to missing subvolume\n%s",
2288 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)))
2289 return __remove_dirent(trans, d.k->p);
2294 if (fsck_err_on(le32_to_cpu(s.v->fs_path_parent) != parent_subvol,
2295 trans, subvol_fs_path_parent_wrong,
2296 "subvol with wrong fs_path_parent, should be be %u\n%s",
2298 (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
2299 struct bkey_i_subvolume *n =
2300 bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume);
2301 ret = PTR_ERR_OR_ZERO(n);
2305 n->v.fs_path_parent = cpu_to_le32(parent_subvol);
2308 u64 target_inum = le64_to_cpu(s.v->inode);
2309 u32 target_snapshot = le32_to_cpu(s.v->snapshot);
2311 ret = lookup_inode(trans, target_inum, target_snapshot, &subvol_root);
2312 if (ret && !bch2_err_matches(ret, ENOENT))
2316 bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum);
2317 ret = -BCH_ERR_fsck_repair_unimplemented;
2321 if (fsck_err_on(!ret && parent_subvol != subvol_root.bi_parent_subvol,
2322 trans, inode_bi_parent_wrong,
2323 "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u",
2325 subvol_root.bi_parent_subvol, parent_subvol)) {
2326 subvol_root.bi_parent_subvol = parent_subvol;
2327 subvol_root.bi_snapshot = le32_to_cpu(s.v->snapshot);
2328 ret = __bch2_fsck_write_inode(trans, &subvol_root);
2333 ret = check_dirent_target(trans, iter, d, &subvol_root);
2339 bch2_trans_iter_exit(trans, &subvol_iter);
2340 printbuf_exit(&buf);
2344 static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
2346 struct bch_hash_info *hash_info,
2347 struct inode_walker *dir,
2348 struct inode_walker *target,
2349 struct snapshots_seen *s)
2351 struct bch_fs *c = trans->c;
2352 struct inode_walker_entry *i;
2353 struct printbuf buf = PRINTBUF;
2356 ret = bch2_check_key_has_snapshot(trans, iter, k);
2358 ret = ret < 0 ? ret : 0;
2362 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
2366 if (k.k->type == KEY_TYPE_whiteout)
2369 if (dir->last_pos.inode != k.k->p.inode && dir->have_inodes) {
2370 ret = check_subdir_count(trans, dir);
2375 i = walk_inode(trans, dir, k);
2376 ret = PTR_ERR_OR_ZERO(i);
2380 ret = check_key_has_inode(trans, iter, dir, i, k);
2387 if (dir->first_this_inode)
2388 *hash_info = bch2_hash_info_init(c, &i->inode);
2389 dir->first_this_inode = false;
2391 ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info, iter, k);
2395 /* dirent has been deleted */
2400 if (k.k->type != KEY_TYPE_dirent)
2403 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
2405 if (d.v->d_type == DT_SUBVOL) {
2406 ret = check_dirent_to_subvol(trans, iter, d);
2410 ret = get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum));
2414 if (fsck_err_on(!target->inodes.nr,
2415 trans, dirent_to_missing_inode,
2416 "dirent points to missing inode:\n%s",
2417 (printbuf_reset(&buf),
2418 bch2_bkey_val_to_text(&buf, c, k),
2420 ret = __remove_dirent(trans, d.k->p);
2425 darray_for_each(target->inodes, i) {
2426 ret = check_dirent_target(trans, iter, d, &i->inode);
2431 darray_for_each(target->deletes, i)
2432 if (fsck_err_on(!snapshot_list_has_id(&s->ids, *i),
2433 trans, dirent_to_overwritten_inode,
2434 "dirent points to inode overwritten in snapshot %u:\n%s",
2436 (printbuf_reset(&buf),
2437 bch2_bkey_val_to_text(&buf, c, k),
2439 struct btree_iter delete_iter;
2440 bch2_trans_iter_init(trans, &delete_iter,
2442 SPOS(k.k->p.inode, k.k->p.offset, *i),
2444 ret = bch2_btree_iter_traverse(&delete_iter) ?:
2445 bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
2448 BTREE_UPDATE_internal_snapshot_node);
2449 bch2_trans_iter_exit(trans, &delete_iter);
2456 ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
2460 if (d.v->d_type == DT_DIR)
2461 for_each_visible_inode(c, s, dir, d.k->p.snapshot, i)
2466 printbuf_exit(&buf);
2472 * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
2475 int bch2_check_dirents(struct bch_fs *c)
2477 struct inode_walker dir = inode_walker_init();
2478 struct inode_walker target = inode_walker_init();
2479 struct snapshots_seen s;
2480 struct bch_hash_info hash_info;
2482 snapshots_seen_init(&s);
2484 int ret = bch2_trans_run(c,
2485 for_each_btree_key(trans, iter, BTREE_ID_dirents,
2486 POS(BCACHEFS_ROOT_INO, 0),
2487 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
2488 check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?:
2489 check_subdir_count_notnested(trans, &dir));
2491 snapshots_seen_exit(&s);
2492 inode_walker_exit(&dir);
2493 inode_walker_exit(&target);
2498 static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
2500 struct bch_hash_info *hash_info,
2501 struct inode_walker *inode)
2503 struct bch_fs *c = trans->c;
2504 struct inode_walker_entry *i;
2507 ret = bch2_check_key_has_snapshot(trans, iter, k);
2513 i = walk_inode(trans, inode, k);
2514 ret = PTR_ERR_OR_ZERO(i);
2518 ret = check_key_has_inode(trans, iter, inode, i, k);
2525 if (inode->first_this_inode)
2526 *hash_info = bch2_hash_info_init(c, &i->inode);
2527 inode->first_this_inode = false;
2529 ret = bch2_str_hash_check_key(trans, NULL, &bch2_xattr_hash_desc, hash_info, iter, k);
2535 * Walk xattrs: verify that they all have a corresponding inode
2537 int bch2_check_xattrs(struct bch_fs *c)
2539 struct inode_walker inode = inode_walker_init();
2540 struct bch_hash_info hash_info;
2543 ret = bch2_trans_run(c,
2544 for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs,
2545 POS(BCACHEFS_ROOT_INO, 0),
2546 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots,
2549 BCH_TRANS_COMMIT_no_enospc,
2550 check_xattr(trans, &iter, k, &hash_info, &inode)));
2552 inode_walker_exit(&inode);
2557 static int check_root_trans(struct btree_trans *trans)
2559 struct bch_fs *c = trans->c;
2560 struct bch_inode_unpacked root_inode;
2565 ret = subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum);
2566 if (ret && !bch2_err_matches(ret, ENOENT))
2569 if (mustfix_fsck_err_on(ret, trans, root_subvol_missing,
2570 "root subvol missing")) {
2571 struct bkey_i_subvolume *root_subvol =
2572 bch2_trans_kmalloc(trans, sizeof(*root_subvol));
2573 ret = PTR_ERR_OR_ZERO(root_subvol);
2578 inum = BCACHEFS_ROOT_INO;
2580 bkey_subvolume_init(&root_subvol->k_i);
2581 root_subvol->k.p.offset = BCACHEFS_ROOT_SUBVOL;
2582 root_subvol->v.flags = 0;
2583 root_subvol->v.snapshot = cpu_to_le32(snapshot);
2584 root_subvol->v.inode = cpu_to_le64(inum);
2585 ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol->k_i, 0);
2586 bch_err_msg(c, ret, "writing root subvol");
2591 ret = lookup_inode(trans, BCACHEFS_ROOT_INO, snapshot, &root_inode);
2592 if (ret && !bch2_err_matches(ret, ENOENT))
2595 if (mustfix_fsck_err_on(ret,
2596 trans, root_dir_missing,
2597 "root directory missing") ||
2598 mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode),
2599 trans, root_inode_not_dir,
2600 "root inode not a directory")) {
2601 bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755,
2603 root_inode.bi_inum = inum;
2604 root_inode.bi_snapshot = snapshot;
2606 ret = __bch2_fsck_write_inode(trans, &root_inode);
2607 bch_err_msg(c, ret, "writing root inode");
2614 /* Get root directory, create if it doesn't exist: */
2615 int bch2_check_root(struct bch_fs *c)
2617 int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
2618 check_root_trans(trans));
2623 typedef DARRAY(u32) darray_u32;
2625 static bool darray_u32_has(darray_u32 *d, u32 v)
2627 darray_for_each(*d, i)
2633 static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k)
2635 struct bch_fs *c = trans->c;
2636 struct btree_iter parent_iter = {};
2637 darray_u32 subvol_path = {};
2638 struct printbuf buf = PRINTBUF;
2641 if (k.k->type != KEY_TYPE_subvolume)
2644 while (k.k->p.offset != BCACHEFS_ROOT_SUBVOL) {
2645 ret = darray_push(&subvol_path, k.k->p.offset);
2649 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
2651 struct bch_inode_unpacked subvol_root;
2652 ret = bch2_inode_find_by_inum_trans(trans,
2653 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) },
2658 u32 parent = le32_to_cpu(s.v->fs_path_parent);
2660 if (darray_u32_has(&subvol_path, parent)) {
2661 if (fsck_err(c, subvol_loop, "subvolume loop"))
2662 ret = reattach_subvol(trans, s);
2666 bch2_trans_iter_exit(trans, &parent_iter);
2667 bch2_trans_iter_init(trans, &parent_iter,
2668 BTREE_ID_subvolumes, POS(0, parent), 0);
2669 k = bch2_btree_iter_peek_slot(&parent_iter);
2674 if (fsck_err_on(k.k->type != KEY_TYPE_subvolume,
2675 trans, subvol_unreachable,
2676 "unreachable subvolume %s",
2677 (bch2_bkey_val_to_text(&buf, c, s.s_c),
2679 ret = reattach_subvol(trans, s);
2685 printbuf_exit(&buf);
2686 darray_exit(&subvol_path);
2687 bch2_trans_iter_exit(trans, &parent_iter);
2691 int bch2_check_subvolume_structure(struct bch_fs *c)
2693 int ret = bch2_trans_run(c,
2694 for_each_btree_key_commit(trans, iter,
2695 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k,
2696 NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
2697 check_subvol_path(trans, &iter, k)));
2702 struct pathbuf_entry {
2707 typedef DARRAY(struct pathbuf_entry) pathbuf;
2709 static int bch2_bi_depth_renumber_one(struct btree_trans *trans, struct pathbuf_entry *p,
2712 struct btree_iter iter;
2713 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
2714 SPOS(0, p->inum, p->snapshot), 0);
2716 struct bch_inode_unpacked inode;
2717 int ret = bkey_err(k) ?:
2718 !bkey_is_inode(k.k) ? -BCH_ERR_ENOENT_inode
2719 : bch2_inode_unpack(k, &inode);
2723 if (inode.bi_depth != new_depth) {
2724 inode.bi_depth = new_depth;
2725 ret = __bch2_fsck_write_inode(trans, &inode) ?:
2726 bch2_trans_commit(trans, NULL, NULL, 0);
2729 bch2_trans_iter_exit(trans, &iter);
2733 static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 new_bi_depth)
2735 u32 restart_count = trans->restart_count;
2738 darray_for_each_reverse(*path, i) {
2739 ret = nested_lockrestart_do(trans,
2740 bch2_bi_depth_renumber_one(trans, i, new_bi_depth));
2741 bch_err_fn(trans->c, ret);
2748 return ret ?: trans_was_restarted(trans, restart_count);
2751 static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
2753 darray_for_each(*p, i)
2754 if (i->inum == inum &&
2755 i->snapshot == snapshot)
2760 static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
2762 struct bch_fs *c = trans->c;
2763 struct btree_iter inode_iter = {};
2765 struct printbuf buf = PRINTBUF;
2766 u32 snapshot = inode_k.k->p.snapshot;
2767 bool redo_bi_depth = false;
2768 u32 min_bi_depth = U32_MAX;
2771 struct bch_inode_unpacked inode;
2772 ret = bch2_inode_unpack(inode_k, &inode);
2776 while (!inode.bi_subvol) {
2777 struct btree_iter dirent_iter;
2778 struct bkey_s_c_dirent d;
2779 u32 parent_snapshot = snapshot;
2781 d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot);
2782 ret = bkey_err(d.s_c);
2783 if (ret && !bch2_err_matches(ret, ENOENT))
2786 if (!ret && (ret = dirent_points_to_inode(c, d, &inode)))
2787 bch2_trans_iter_exit(trans, &dirent_iter);
2789 if (bch2_err_matches(ret, ENOENT)) {
2790 printbuf_reset(&buf);
2791 bch2_bkey_val_to_text(&buf, c, inode_k);
2792 bch_err(c, "unreachable inode in check_directory_structure: %s\n%s",
2793 bch2_err_str(ret), buf.buf);
2797 bch2_trans_iter_exit(trans, &dirent_iter);
2799 ret = darray_push(&path, ((struct pathbuf_entry) {
2800 .inum = inode.bi_inum,
2801 .snapshot = snapshot,
2806 snapshot = parent_snapshot;
2808 bch2_trans_iter_exit(trans, &inode_iter);
2809 inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes,
2810 SPOS(0, inode.bi_dir, snapshot), 0);
2812 struct bch_inode_unpacked parent_inode;
2813 ret = bkey_err(inode_k) ?:
2814 !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode
2815 : bch2_inode_unpack(inode_k, &parent_inode);
2817 /* Should have been caught in dirents pass */
2818 bch_err_msg(c, ret, "error looking up parent directory");
2822 min_bi_depth = parent_inode.bi_depth;
2824 if (parent_inode.bi_depth < inode.bi_depth &&
2825 min_bi_depth < U16_MAX)
2828 inode = parent_inode;
2829 snapshot = inode_k.k->p.snapshot;
2830 redo_bi_depth = true;
2832 if (path_is_dup(&path, inode.bi_inum, snapshot)) {
2833 /* XXX print path */
2834 bch_err(c, "directory structure loop");
2836 darray_for_each(path, i)
2837 pr_err("%llu:%u", i->inum, i->snapshot);
2838 pr_err("%llu:%u", inode.bi_inum, snapshot);
2840 if (fsck_err(trans, dir_loop, "directory structure loop")) {
2841 ret = remove_backpointer(trans, &inode);
2842 bch_err_msg(c, ret, "removing dirent");
2846 ret = reattach_inode(trans, &inode);
2847 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
2854 if (inode.bi_subvol)
2858 ret = bch2_bi_depth_renumber(trans, &path, min_bi_depth);
2861 bch2_trans_iter_exit(trans, &inode_iter);
2863 printbuf_exit(&buf);
2869 * Check for loops in the directory structure: all other connectivity issues
2870 * have been fixed by prior passes
2872 int bch2_check_directory_structure(struct bch_fs *c)
2874 int ret = bch2_trans_run(c,
2875 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN,
2877 BTREE_ITER_prefetch|
2878 BTREE_ITER_all_snapshots, k,
2879 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
2880 if (!S_ISDIR(bkey_inode_mode(k)))
2883 if (bch2_inode_flags(k) & BCH_INODE_unlinked)
2886 check_path_loop(trans, k);
2893 struct nlink_table {
2904 static int add_nlink(struct bch_fs *c, struct nlink_table *t,
2905 u64 inum, u32 snapshot)
2907 if (t->nr == t->size) {
2908 size_t new_size = max_t(size_t, 128UL, t->size * 2);
2909 void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL);
2912 bch_err(c, "fsck: error allocating memory for nlink_table, size %zu",
2914 return -BCH_ERR_ENOMEM_fsck_add_nlink;
2918 memcpy(d, t->d, t->size * sizeof(t->d[0]));
2926 t->d[t->nr++] = (struct nlink) {
2928 .snapshot = snapshot,
2934 static int nlink_cmp(const void *_l, const void *_r)
2936 const struct nlink *l = _l;
2937 const struct nlink *r = _r;
2939 return cmp_int(l->inum, r->inum);
2942 static void inc_link(struct bch_fs *c, struct snapshots_seen *s,
2943 struct nlink_table *links,
2944 u64 range_start, u64 range_end, u64 inum, u32 snapshot)
2946 struct nlink *link, key = {
2947 .inum = inum, .snapshot = U32_MAX,
2950 if (inum < range_start || inum >= range_end)
2953 link = __inline_bsearch(&key, links->d, links->nr,
2954 sizeof(links->d[0]), nlink_cmp);
2958 while (link > links->d && link[0].inum == link[-1].inum)
2961 for (; link < links->d + links->nr && link->inum == inum; link++)
2962 if (ref_visible(c, s, snapshot, link->snapshot)) {
2964 if (link->snapshot >= snapshot)
2970 static int check_nlinks_find_hardlinks(struct bch_fs *c,
2971 struct nlink_table *t,
2972 u64 start, u64 *end)
2974 int ret = bch2_trans_run(c,
2975 for_each_btree_key(trans, iter, BTREE_ID_inodes,
2978 BTREE_ITER_prefetch|
2979 BTREE_ITER_all_snapshots, k, ({
2980 if (!bkey_is_inode(k.k))
2983 /* Should never fail, checked by bch2_inode_invalid: */
2984 struct bch_inode_unpacked u;
2985 _ret3 = bch2_inode_unpack(k, &u);
2990 * Backpointer and directory structure checks are sufficient for
2991 * directories, since they can't have hardlinks:
2993 if (S_ISDIR(u.bi_mode))
2997 * Previous passes ensured that bi_nlink is nonzero if
2998 * it had multiple hardlinks:
3003 ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot);
3005 *end = k.k->p.offset;
3017 static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links,
3018 u64 range_start, u64 range_end)
3020 struct snapshots_seen s;
3022 snapshots_seen_init(&s);
3024 int ret = bch2_trans_run(c,
3025 for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN,
3027 BTREE_ITER_prefetch|
3028 BTREE_ITER_all_snapshots, k, ({
3029 ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p);
3033 if (k.k->type == KEY_TYPE_dirent) {
3034 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
3036 if (d.v->d_type != DT_DIR &&
3037 d.v->d_type != DT_SUBVOL)
3038 inc_link(c, &s, links, range_start, range_end,
3039 le64_to_cpu(d.v->d_inum), d.k->p.snapshot);
3044 snapshots_seen_exit(&s);
3050 static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter,
3052 struct nlink_table *links,
3053 size_t *idx, u64 range_end)
3055 struct bch_inode_unpacked u;
3056 struct nlink *link = &links->d[*idx];
3059 if (k.k->p.offset >= range_end)
3062 if (!bkey_is_inode(k.k))
3065 ret = bch2_inode_unpack(k, &u);
3069 if (S_ISDIR(u.bi_mode))
3075 while ((cmp_int(link->inum, k.k->p.offset) ?:
3076 cmp_int(link->snapshot, k.k->p.snapshot)) < 0) {
3077 BUG_ON(*idx == links->nr);
3078 link = &links->d[++*idx];
3081 if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count,
3082 trans, inode_wrong_nlink,
3083 "inode %llu type %s has wrong i_nlink (%u, should be %u)",
3084 u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)],
3085 bch2_inode_nlink_get(&u), link->count)) {
3086 bch2_inode_nlink_set(&u, link->count);
3087 ret = __bch2_fsck_write_inode(trans, &u);
3094 static int check_nlinks_update_hardlinks(struct bch_fs *c,
3095 struct nlink_table *links,
3096 u64 range_start, u64 range_end)
3100 int ret = bch2_trans_run(c,
3101 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes,
3102 POS(0, range_start),
3103 BTREE_ITER_intent|BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
3104 NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
3105 check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end)));
3107 bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret));
3114 int bch2_check_nlinks(struct bch_fs *c)
3116 struct nlink_table links = { 0 };
3117 u64 this_iter_range_start, next_iter_range_start = 0;
3121 this_iter_range_start = next_iter_range_start;
3122 next_iter_range_start = U64_MAX;
3124 ret = check_nlinks_find_hardlinks(c, &links,
3125 this_iter_range_start,
3126 &next_iter_range_start);
3128 ret = check_nlinks_walk_dirents(c, &links,
3129 this_iter_range_start,
3130 next_iter_range_start);
3134 ret = check_nlinks_update_hardlinks(c, &links,
3135 this_iter_range_start,
3136 next_iter_range_start);
3141 } while (next_iter_range_start != U64_MAX);
3148 static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter,
3151 struct bkey_s_c_reflink_p p;
3152 struct bkey_i_reflink_p *u;
3154 if (k.k->type != KEY_TYPE_reflink_p)
3157 p = bkey_s_c_to_reflink_p(k);
3159 if (!p.v->front_pad && !p.v->back_pad)
3162 u = bch2_trans_kmalloc(trans, sizeof(*u));
3163 int ret = PTR_ERR_OR_ZERO(u);
3167 bkey_reassemble(&u->k_i, k);
3171 return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_norun);
3174 int bch2_fix_reflink_p(struct bch_fs *c)
3176 if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix)
3179 int ret = bch2_trans_run(c,
3180 for_each_btree_key_commit(trans, iter,
3181 BTREE_ID_extents, POS_MIN,
3182 BTREE_ITER_intent|BTREE_ITER_prefetch|
3183 BTREE_ITER_all_snapshots, k,
3184 NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
3185 fix_reflink_p_key(trans, &iter, k)));
3190 #ifndef NO_BCACHEFS_CHARDEV
3192 struct fsck_thread {
3193 struct thread_with_stdio thr;
3195 struct bch_opts opts;
3198 static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr)
3200 struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr);
3204 static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio)
3206 struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr);
3207 struct bch_fs *c = thr->c;
3209 int ret = PTR_ERR_OR_ZERO(c);
3213 ret = bch2_fs_start(thr->c);
3217 if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
3218 bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: errors fixed\n", c->name);
3221 if (test_bit(BCH_FS_error, &c->flags)) {
3222 bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: still has errors\n", c->name);
3230 static const struct thread_with_stdio_ops bch2_offline_fsck_ops = {
3231 .exit = bch2_fsck_thread_exit,
3232 .fn = bch2_fsck_offline_thread_fn,
3235 long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg)
3237 struct bch_ioctl_fsck_offline arg;
3238 struct fsck_thread *thr = NULL;
3239 darray_str(devs) = {};
3242 if (copy_from_user(&arg, user_arg, sizeof(arg)))
3248 if (!capable(CAP_SYS_ADMIN))
3251 for (size_t i = 0; i < arg.nr_devs; i++) {
3253 ret = copy_from_user_errcode(&dev_u64, &user_arg->devs[i], sizeof(u64));
3257 char *dev_str = strndup_user((char __user *)(unsigned long) dev_u64, PATH_MAX);
3258 ret = PTR_ERR_OR_ZERO(dev_str);
3262 ret = darray_push(&devs, dev_str);
3269 thr = kzalloc(sizeof(*thr), GFP_KERNEL);
3275 thr->opts = bch2_opts_empty();
3278 char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
3279 ret = PTR_ERR_OR_ZERO(optstr) ?:
3280 bch2_parse_mount_opts(NULL, &thr->opts, NULL, optstr);
3281 if (!IS_ERR(optstr))
3288 opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio);
3289 opt_set(thr->opts, read_only, 1);
3290 opt_set(thr->opts, ratelimit_errors, 0);
3292 /* We need request_key() to be called before we punt to kthread: */
3293 opt_set(thr->opts, nostart, true);
3295 bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops);
3297 thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts);
3299 if (!IS_ERR(thr->c) &&
3300 thr->c->opts.errors == BCH_ON_ERROR_panic)
3301 thr->c->opts.errors = BCH_ON_ERROR_ro;
3303 ret = __bch2_run_thread_with_stdio(&thr->thr);
3305 darray_for_each(devs, i)
3311 bch2_fsck_thread_exit(&thr->thr);
3312 pr_err("ret %s", bch2_err_str(ret));
3316 static int bch2_fsck_online_thread_fn(struct thread_with_stdio *stdio)
3318 struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr);
3319 struct bch_fs *c = thr->c;
3321 c->stdio_filter = current;
3322 c->stdio = &thr->thr.stdio;
3325 * XXX: can we figure out a way to do this without mucking with c->opts?
3327 unsigned old_fix_errors = c->opts.fix_errors;
3328 if (opt_defined(thr->opts, fix_errors))
3329 c->opts.fix_errors = thr->opts.fix_errors;
3331 c->opts.fix_errors = FSCK_FIX_ask;
3333 c->opts.fsck = true;
3334 set_bit(BCH_FS_fsck_running, &c->flags);
3336 c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info;
3337 int ret = bch2_run_online_recovery_passes(c);
3339 clear_bit(BCH_FS_fsck_running, &c->flags);
3343 c->stdio_filter = NULL;
3344 c->opts.fix_errors = old_fix_errors;
3346 up(&c->online_fsck_mutex);
3351 static const struct thread_with_stdio_ops bch2_online_fsck_ops = {
3352 .exit = bch2_fsck_thread_exit,
3353 .fn = bch2_fsck_online_thread_fn,
3356 long bch2_ioctl_fsck_online(struct bch_fs *c, struct bch_ioctl_fsck_online arg)
3358 struct fsck_thread *thr = NULL;
3364 if (!capable(CAP_SYS_ADMIN))
3367 if (!bch2_ro_ref_tryget(c))
3370 if (down_trylock(&c->online_fsck_mutex)) {
3375 thr = kzalloc(sizeof(*thr), GFP_KERNEL);
3382 thr->opts = bch2_opts_empty();
3385 char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
3387 ret = PTR_ERR_OR_ZERO(optstr) ?:
3388 bch2_parse_mount_opts(c, &thr->opts, NULL, optstr);
3389 if (!IS_ERR(optstr))
3396 ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_online_fsck_ops);
3401 bch2_fsck_thread_exit(&thr->thr);
3402 up(&c->online_fsck_mutex);
3408 #endif /* NO_BCACHEFS_CHARDEV */