1 // SPDX-License-Identifier: GPL-2.0
4 #include "alloc_background.h"
5 #include "backpointers.h"
7 #include "btree_cache.h"
8 #include "btree_update.h"
9 #include "btree_update_interior.h"
10 #include "btree_write_buffer.h"
15 static bool extent_matches_bp(struct bch_fs *c,
16 enum btree_id btree_id, unsigned level,
19 struct bch_backpointer bp)
21 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
22 const union bch_extent_entry *entry;
23 struct extent_ptr_decoded p;
25 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
27 struct bch_backpointer bp2;
32 bch2_extent_ptr_to_bp(c, btree_id, level, k, p,
34 if (bpos_eq(bucket, bucket2) &&
35 !memcmp(&bp, &bp2, sizeof(bp)))
42 int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
43 enum bkey_invalid_flags flags,
46 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
47 struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
50 bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
52 backpointer_pos_wrong,
53 "backpointer at wrong pos");
58 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
60 prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
61 bch2_btree_id_str(bp->btree_id),
63 (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
64 (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
66 bch2_bpos_to_text(out, bp->pos);
69 void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
71 if (bch2_dev_exists2(c, k.k->p.inode)) {
72 prt_str(out, "bucket=");
73 bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p));
77 bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
80 void bch2_backpointer_swab(struct bkey_s k)
82 struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
84 bp.v->bucket_offset = swab40(bp.v->bucket_offset);
85 bp.v->bucket_len = swab32(bp.v->bucket_len);
86 bch2_bpos_swab(&bp.v->pos);
89 static noinline int backpointer_mod_err(struct btree_trans *trans,
90 struct bch_backpointer bp,
92 struct bkey_s_c orig_k,
95 struct bch_fs *c = trans->c;
96 struct printbuf buf = PRINTBUF;
99 prt_printf(&buf, "existing backpointer found when inserting ");
100 bch2_backpointer_to_text(&buf, &bp);
102 printbuf_indent_add(&buf, 2);
104 prt_printf(&buf, "found ");
105 bch2_bkey_val_to_text(&buf, c, bp_k);
108 prt_printf(&buf, "for ");
109 bch2_bkey_val_to_text(&buf, c, orig_k);
111 bch_err(c, "%s", buf.buf);
112 } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
113 prt_printf(&buf, "backpointer not found when deleting");
115 printbuf_indent_add(&buf, 2);
117 prt_printf(&buf, "searching for ");
118 bch2_backpointer_to_text(&buf, &bp);
121 prt_printf(&buf, "got ");
122 bch2_bkey_val_to_text(&buf, c, bp_k);
125 prt_printf(&buf, "for ");
126 bch2_bkey_val_to_text(&buf, c, orig_k);
128 bch_err(c, "%s", buf.buf);
133 if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
134 return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0;
140 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
142 struct bch_backpointer bp,
143 struct bkey_s_c orig_k,
146 struct btree_iter bp_iter;
148 struct bkey_i_backpointer *bp_k;
151 bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
152 ret = PTR_ERR_OR_ZERO(bp_k);
156 bkey_backpointer_init(&bp_k->k_i);
157 bp_k->k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
161 bp_k->k.type = KEY_TYPE_deleted;
162 set_bkey_val_u64s(&bp_k->k, 0);
165 k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
169 BTREE_ITER_WITH_UPDATES);
176 : (k.k->type != KEY_TYPE_backpointer ||
177 memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) {
178 ret = backpointer_mod_err(trans, bp, k, orig_k, insert);
183 ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
185 bch2_trans_iter_exit(trans, &bp_iter);
190 * Find the next backpointer >= *bp_offset:
192 int bch2_get_next_backpointer(struct btree_trans *trans,
193 struct bpos bucket, int gen,
195 struct bch_backpointer *bp,
198 struct bch_fs *c = trans->c;
199 struct bpos bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
200 struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL };
204 if (bpos_ge(*bp_pos, bp_end_pos))
208 k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
209 bucket, BTREE_ITER_CACHED|iter_flags);
214 if (k.k->type != KEY_TYPE_alloc_v4 ||
215 bkey_s_c_to_alloc_v4(k).v->gen != gen)
219 *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(c, bucket, 0));
221 for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
222 *bp_pos, iter_flags, k, ret) {
223 if (bpos_ge(k.k->p, bp_end_pos))
227 *bp = *bkey_s_c_to_backpointer(k).v;
233 bch2_trans_iter_exit(trans, &bp_iter);
234 bch2_trans_iter_exit(trans, &alloc_iter);
238 static void backpointer_not_found(struct btree_trans *trans,
240 struct bch_backpointer bp,
243 struct bch_fs *c = trans->c;
244 struct printbuf buf = PRINTBUF;
245 struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
248 * If we're using the btree write buffer, the backpointer we were
249 * looking at may have already been deleted - failure to find what it
250 * pointed to is not an error:
252 if (likely(!bch2_backpointers_no_use_write_buffer))
255 prt_printf(&buf, "backpointer doesn't match %s it points to:\n ",
256 bp.level ? "btree node" : "extent");
257 prt_printf(&buf, "bucket: ");
258 bch2_bpos_to_text(&buf, bucket);
259 prt_printf(&buf, "\n ");
261 prt_printf(&buf, "backpointer pos: ");
262 bch2_bpos_to_text(&buf, bp_pos);
263 prt_printf(&buf, "\n ");
265 bch2_backpointer_to_text(&buf, &bp);
266 prt_printf(&buf, "\n ");
267 bch2_bkey_val_to_text(&buf, c, k);
268 if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers)
269 bch_err_ratelimited(c, "%s", buf.buf);
271 bch2_trans_inconsistent(trans, "%s", buf.buf);
276 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
277 struct btree_iter *iter,
279 struct bch_backpointer bp,
282 if (likely(!bp.level)) {
283 struct bch_fs *c = trans->c;
284 struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
287 bch2_trans_node_iter_init(trans, iter,
292 k = bch2_btree_iter_peek_slot(iter);
294 bch2_trans_iter_exit(trans, iter);
298 if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
301 bch2_trans_iter_exit(trans, iter);
302 backpointer_not_found(trans, bp_pos, bp, k);
303 return bkey_s_c_null;
305 struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
307 if (IS_ERR_OR_NULL(b)) {
308 bch2_trans_iter_exit(trans, iter);
309 return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null;
311 return bkey_i_to_s_c(&b->key);
315 struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
316 struct btree_iter *iter,
318 struct bch_backpointer bp)
320 struct bch_fs *c = trans->c;
321 struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
326 bch2_trans_node_iter_init(trans, iter,
332 b = bch2_btree_iter_peek_node(iter);
333 if (IS_ERR_OR_NULL(b))
336 BUG_ON(b->c.level != bp.level - 1);
338 if (extent_matches_bp(c, bp.btree_id, bp.level,
339 bkey_i_to_s_c(&b->key),
343 if (btree_node_will_make_reachable(b)) {
344 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
346 backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
350 bch2_trans_iter_exit(trans, iter);
354 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
357 struct bch_fs *c = trans->c;
358 struct btree_iter alloc_iter = { NULL };
359 struct bkey_s_c alloc_k;
360 struct printbuf buf = PRINTBUF;
363 if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
364 backpointer_to_missing_device,
365 "backpointer for missing device:\n%s",
366 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
367 ret = bch2_btree_delete_at(trans, bp_iter, 0);
371 alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
372 bp_pos_to_bucket(c, k.k->p), 0);
373 ret = bkey_err(alloc_k);
377 if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
378 backpointer_to_missing_alloc,
379 "backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
380 alloc_iter.pos.inode, alloc_iter.pos.offset,
381 (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
382 ret = bch2_btree_delete_at(trans, bp_iter, 0);
387 bch2_trans_iter_exit(trans, &alloc_iter);
392 /* verify that every backpointer has a corresponding alloc key */
393 int bch2_check_btree_backpointers(struct bch_fs *c)
395 int ret = bch2_trans_run(c,
396 for_each_btree_key_commit(trans, iter,
397 BTREE_ID_backpointers, POS_MIN, 0, k,
398 NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
399 bch2_check_btree_backpointer(trans, &iter, k)));
404 static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
406 return bpos_eq(l.k->p, r.k->p) &&
407 bkey_bytes(l.k) == bkey_bytes(r.k) &&
408 !memcmp(l.v, r.v, bkey_val_bytes(l.k));
411 struct extents_to_bp_state {
412 struct bpos bucket_start;
413 struct bpos bucket_end;
414 struct bkey_buf last_flushed;
417 static int check_bp_exists(struct btree_trans *trans,
418 struct extents_to_bp_state *s,
420 struct bch_backpointer bp,
421 struct bkey_s_c orig_k)
423 struct bch_fs *c = trans->c;
424 struct btree_iter bp_iter = { NULL };
425 struct printbuf buf = PRINTBUF;
426 struct bkey_s_c bp_k;
430 bch2_bkey_buf_init(&tmp);
432 if (bpos_lt(bucket, s->bucket_start) ||
433 bpos_gt(bucket, s->bucket_end))
436 if (!bch2_dev_bucket_exists(c, bucket))
439 bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
440 bucket_pos_to_bp(c, bucket, bp.bucket_offset),
442 ret = bkey_err(bp_k);
446 if (bp_k.k->type != KEY_TYPE_backpointer ||
447 memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
448 bch2_bkey_buf_reassemble(&tmp, c, orig_k);
450 if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) {
452 bch2_trans_unlock(trans);
453 bch2_btree_interior_updates_flush(c);
456 ret = bch2_btree_write_buffer_flush_sync(trans);
460 bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k);
461 ret = -BCH_ERR_transaction_restart_write_buffer_flush;
469 bch2_trans_iter_exit(trans, &bp_iter);
470 bch2_bkey_buf_exit(&tmp, c);
474 prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
475 bch2_btree_id_str(bp.btree_id), bp.level);
476 bch2_bkey_val_to_text(&buf, c, orig_k);
477 prt_printf(&buf, "\nbp pos ");
478 bch2_bpos_to_text(&buf, bp_iter.pos);
480 if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
481 ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
486 static int check_extent_to_backpointers(struct btree_trans *trans,
487 struct extents_to_bp_state *s,
488 enum btree_id btree, unsigned level,
491 struct bch_fs *c = trans->c;
492 struct bkey_ptrs_c ptrs;
493 const union bch_extent_entry *entry;
494 struct extent_ptr_decoded p;
497 ptrs = bch2_bkey_ptrs_c(k);
498 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
499 struct bpos bucket_pos;
500 struct bch_backpointer bp;
505 bch2_extent_ptr_to_bp(c, btree, level,
506 k, p, &bucket_pos, &bp);
508 ret = check_bp_exists(trans, s, bucket_pos, bp, k);
516 static int check_btree_root_to_backpointers(struct btree_trans *trans,
517 struct extents_to_bp_state *s,
518 enum btree_id btree_id,
521 struct bch_fs *c = trans->c;
522 struct btree_iter iter;
527 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
528 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0);
529 b = bch2_btree_iter_peek_node(&iter);
530 ret = PTR_ERR_OR_ZERO(b);
534 if (b != btree_node_root(c, b)) {
535 bch2_trans_iter_exit(trans, &iter);
541 k = bkey_i_to_s_c(&b->key);
542 ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k);
544 bch2_trans_iter_exit(trans, &iter);
548 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
550 return (struct bbpos) {
551 .btree = bp.btree_id,
556 static u64 mem_may_pin_bytes(struct bch_fs *c)
561 u64 mem_bytes = i.totalram * i.mem_unit;
562 return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100);
565 static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
567 return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size);
570 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
572 u64 btree_interior_mask,
573 struct bbpos start, struct bbpos *end)
575 struct bch_fs *c = trans->c;
576 s64 mem_may_pin = mem_may_pin_bytes(c);
579 btree_interior_mask |= btree_leaf_mask;
581 c->btree_cache.pinned_nodes_leaf_mask = btree_leaf_mask;
582 c->btree_cache.pinned_nodes_interior_mask = btree_interior_mask;
583 c->btree_cache.pinned_nodes_start = start;
584 c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX;
586 for (enum btree_id btree = start.btree;
587 btree < BTREE_ID_NR && !ret;
589 unsigned depth = ((1U << btree) & btree_leaf_mask) ? 0 : 1;
590 struct btree_iter iter;
593 if (!((1U << btree) & btree_leaf_mask) &&
594 !((1U << btree) & btree_interior_mask))
597 __for_each_btree_node(trans, iter, btree,
598 btree == start.btree ? start.pos : POS_MIN,
599 0, depth, BTREE_ITER_PREFETCH, b, ret) {
600 mem_may_pin -= btree_buf_bytes(b);
601 if (mem_may_pin <= 0) {
602 c->btree_cache.pinned_nodes_end = *end =
603 BBPOS(btree, b->key.k.p);
604 bch2_trans_iter_exit(trans, &iter);
608 bch2_trans_iter_exit(trans, &iter);
614 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
615 struct extents_to_bp_state *s)
617 struct bch_fs *c = trans->c;
620 for (enum btree_id btree_id = 0;
621 btree_id < btree_id_nr_alive(c);
623 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
625 ret = commit_do(trans, NULL, NULL,
626 BCH_TRANS_COMMIT_no_enospc,
627 check_btree_root_to_backpointers(trans, s, btree_id, &level));
631 while (level >= depth) {
632 struct btree_iter iter;
633 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
635 BTREE_ITER_PREFETCH);
637 bch2_trans_begin(trans);
639 struct bkey_s_c k = bch2_btree_iter_peek(&iter);
643 check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
644 bch2_trans_commit(trans, NULL, NULL,
645 BCH_TRANS_COMMIT_no_enospc);
646 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
652 if (bpos_eq(iter.pos, SPOS_MAX))
654 bch2_btree_iter_advance(&iter);
656 bch2_trans_iter_exit(trans, &iter);
668 int bch2_check_extents_to_backpointers(struct bch_fs *c)
670 struct btree_trans *trans = bch2_trans_get(c);
671 struct extents_to_bp_state s = { .bucket_start = POS_MIN };
674 bch2_bkey_buf_init(&s.last_flushed);
675 bkey_init(&s.last_flushed.k->k);
679 ret = bch2_get_btree_in_memory_pos(trans,
680 BIT_ULL(BTREE_ID_backpointers),
681 BIT_ULL(BTREE_ID_backpointers),
682 BBPOS(BTREE_ID_backpointers, s.bucket_start), &end);
686 s.bucket_end = end.pos;
688 if ( bpos_eq(s.bucket_start, POS_MIN) &&
689 !bpos_eq(s.bucket_end, SPOS_MAX))
690 bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
691 __func__, btree_nodes_fit_in_ram(c));
693 if (!bpos_eq(s.bucket_start, POS_MIN) ||
694 !bpos_eq(s.bucket_end, SPOS_MAX)) {
695 struct printbuf buf = PRINTBUF;
697 prt_str(&buf, "check_extents_to_backpointers(): ");
698 bch2_bpos_to_text(&buf, s.bucket_start);
700 bch2_bpos_to_text(&buf, s.bucket_end);
702 bch_verbose(c, "%s", buf.buf);
706 ret = bch2_check_extents_to_backpointers_pass(trans, &s);
707 if (ret || bpos_eq(s.bucket_end, SPOS_MAX))
710 s.bucket_start = bpos_successor(s.bucket_end);
712 bch2_trans_put(trans);
713 bch2_bkey_buf_exit(&s.last_flushed, c);
715 c->btree_cache.pinned_nodes_leaf_mask = 0;
716 c->btree_cache.pinned_nodes_interior_mask = 0;
722 static int check_one_backpointer(struct btree_trans *trans,
725 struct bkey_s_c_backpointer bp,
726 struct bpos *last_flushed_pos)
728 struct bch_fs *c = trans->c;
729 struct btree_iter iter;
730 struct bbpos pos = bp_to_bbpos(*bp.v);
732 struct printbuf buf = PRINTBUF;
735 if (bbpos_cmp(pos, start) < 0 ||
736 bbpos_cmp(pos, end) > 0)
739 k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0);
741 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
746 if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
747 *last_flushed_pos = bp.k->p;
748 ret = bch2_btree_write_buffer_flush_sync(trans) ?:
749 -BCH_ERR_transaction_restart_write_buffer_flush;
753 if (fsck_err_on(!k.k, c,
754 backpointer_to_missing_ptr,
755 "backpointer for missing %s\n %s",
756 bp.v->level ? "btree node" : "extent",
757 (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
758 ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
763 bch2_trans_iter_exit(trans, &iter);
768 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
772 struct bpos last_flushed_pos = SPOS_MAX;
774 return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
775 POS_MIN, BTREE_ITER_PREFETCH, k,
776 NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
777 check_one_backpointer(trans, start, end,
778 bkey_s_c_to_backpointer(k),
782 int bch2_check_backpointers_to_extents(struct bch_fs *c)
784 struct btree_trans *trans = bch2_trans_get(c);
785 struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
789 ret = bch2_get_btree_in_memory_pos(trans,
790 (1U << BTREE_ID_extents)|
791 (1U << BTREE_ID_reflink),
797 if (!bbpos_cmp(start, BBPOS_MIN) &&
798 bbpos_cmp(end, BBPOS_MAX))
799 bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
800 __func__, btree_nodes_fit_in_ram(c));
802 if (bbpos_cmp(start, BBPOS_MIN) ||
803 bbpos_cmp(end, BBPOS_MAX)) {
804 struct printbuf buf = PRINTBUF;
806 prt_str(&buf, "check_backpointers_to_extents(): ");
807 bch2_bbpos_to_text(&buf, start);
809 bch2_bbpos_to_text(&buf, end);
811 bch_verbose(c, "%s", buf.buf);
815 ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
816 if (ret || !bbpos_cmp(end, BBPOS_MAX))
819 start = bbpos_successor(end);
821 bch2_trans_put(trans);
823 c->btree_cache.pinned_nodes_leaf_mask = 0;
824 c->btree_cache.pinned_nodes_interior_mask = 0;