]> Git Repo - J-linux.git/blob - fs/bcachefs/backpointers.c
Merge tag 'vfs-6.13-rc7.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
[J-linux.git] / fs / bcachefs / backpointers.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "bbpos.h"
4 #include "alloc_background.h"
5 #include "backpointers.h"
6 #include "bkey_buf.h"
7 #include "btree_cache.h"
8 #include "btree_update.h"
9 #include "btree_update_interior.h"
10 #include "btree_write_buffer.h"
11 #include "checksum.h"
12 #include "disk_accounting.h"
13 #include "error.h"
14
15 #include <linux/mm.h>
16
17 static bool extent_matches_bp(struct bch_fs *c,
18                               enum btree_id btree_id, unsigned level,
19                               struct bkey_s_c k,
20                               struct bpos bucket,
21                               struct bch_backpointer bp)
22 {
23         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
24         const union bch_extent_entry *entry;
25         struct extent_ptr_decoded p;
26
27         rcu_read_lock();
28         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
29                 struct bpos bucket2;
30                 struct bch_backpointer bp2;
31
32                 if (p.ptr.cached)
33                         continue;
34
35                 struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev);
36                 if (!ca)
37                         continue;
38
39                 bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, &bucket2, &bp2);
40                 if (bpos_eq(bucket, bucket2) &&
41                     !memcmp(&bp, &bp2, sizeof(bp))) {
42                         rcu_read_unlock();
43                         return true;
44                 }
45         }
46         rcu_read_unlock();
47
48         return false;
49 }
50
51 int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k,
52                               enum bch_validate_flags flags)
53 {
54         struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
55         int ret = 0;
56
57         bkey_fsck_err_on(bp.v->level > BTREE_MAX_DEPTH,
58                          c, backpointer_level_bad,
59                          "backpointer level bad: %u >= %u",
60                          bp.v->level, BTREE_MAX_DEPTH);
61
62         rcu_read_lock();
63         struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode);
64         if (!ca) {
65                 /* these will be caught by fsck */
66                 rcu_read_unlock();
67                 return 0;
68         }
69
70         struct bpos bucket = bp_pos_to_bucket(ca, bp.k->p);
71         struct bpos bp_pos = bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset);
72         rcu_read_unlock();
73
74         bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
75                          !bpos_eq(bp.k->p, bp_pos),
76                          c, backpointer_bucket_offset_wrong,
77                          "backpointer bucket_offset wrong");
78 fsck_err:
79         return ret;
80 }
81
82 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
83 {
84         prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
85                bch2_btree_id_str(bp->btree_id),
86                bp->level,
87                (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
88                (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
89                bp->bucket_len);
90         bch2_bpos_to_text(out, bp->pos);
91 }
92
93 void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
94 {
95         rcu_read_lock();
96         struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.k->p.inode);
97         if (ca) {
98                 struct bpos bucket = bp_pos_to_bucket(ca, k.k->p);
99                 rcu_read_unlock();
100                 prt_str(out, "bucket=");
101                 bch2_bpos_to_text(out, bucket);
102                 prt_str(out, " ");
103         } else {
104                 rcu_read_unlock();
105         }
106
107         bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
108 }
109
110 void bch2_backpointer_swab(struct bkey_s k)
111 {
112         struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
113
114         bp.v->bucket_offset     = swab40(bp.v->bucket_offset);
115         bp.v->bucket_len        = swab32(bp.v->bucket_len);
116         bch2_bpos_swab(&bp.v->pos);
117 }
118
119 static noinline int backpointer_mod_err(struct btree_trans *trans,
120                                         struct bch_backpointer bp,
121                                         struct bkey_s_c bp_k,
122                                         struct bkey_s_c orig_k,
123                                         bool insert)
124 {
125         struct bch_fs *c = trans->c;
126         struct printbuf buf = PRINTBUF;
127
128         if (insert) {
129                 prt_printf(&buf, "existing backpointer found when inserting ");
130                 bch2_backpointer_to_text(&buf, &bp);
131                 prt_newline(&buf);
132                 printbuf_indent_add(&buf, 2);
133
134                 prt_printf(&buf, "found ");
135                 bch2_bkey_val_to_text(&buf, c, bp_k);
136                 prt_newline(&buf);
137
138                 prt_printf(&buf, "for ");
139                 bch2_bkey_val_to_text(&buf, c, orig_k);
140
141                 bch_err(c, "%s", buf.buf);
142         } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
143                 prt_printf(&buf, "backpointer not found when deleting\n");
144                 printbuf_indent_add(&buf, 2);
145
146                 prt_printf(&buf, "searching for ");
147                 bch2_backpointer_to_text(&buf, &bp);
148                 prt_newline(&buf);
149
150                 prt_printf(&buf, "got ");
151                 bch2_bkey_val_to_text(&buf, c, bp_k);
152                 prt_newline(&buf);
153
154                 prt_printf(&buf, "for ");
155                 bch2_bkey_val_to_text(&buf, c, orig_k);
156
157                 bch_err(c, "%s", buf.buf);
158         }
159
160         printbuf_exit(&buf);
161
162         if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
163                 return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0;
164         } else {
165                 return 0;
166         }
167 }
168
169 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
170                                 struct bch_dev *ca,
171                                 struct bpos bucket,
172                                 struct bch_backpointer bp,
173                                 struct bkey_s_c orig_k,
174                                 bool insert)
175 {
176         struct btree_iter bp_iter;
177         struct bkey_s_c k;
178         struct bkey_i_backpointer *bp_k;
179         int ret;
180
181         bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
182         ret = PTR_ERR_OR_ZERO(bp_k);
183         if (ret)
184                 return ret;
185
186         bkey_backpointer_init(&bp_k->k_i);
187         bp_k->k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset);
188         bp_k->v = bp;
189
190         if (!insert) {
191                 bp_k->k.type = KEY_TYPE_deleted;
192                 set_bkey_val_u64s(&bp_k->k, 0);
193         }
194
195         k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
196                                bp_k->k.p,
197                                BTREE_ITER_intent|
198                                BTREE_ITER_slots|
199                                BTREE_ITER_with_updates);
200         ret = bkey_err(k);
201         if (ret)
202                 goto err;
203
204         if (insert
205             ? k.k->type
206             : (k.k->type != KEY_TYPE_backpointer ||
207                memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) {
208                 ret = backpointer_mod_err(trans, bp, k, orig_k, insert);
209                 if (ret)
210                         goto err;
211         }
212
213         ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
214 err:
215         bch2_trans_iter_exit(trans, &bp_iter);
216         return ret;
217 }
218
219 /*
220  * Find the next backpointer >= *bp_offset:
221  */
222 int bch2_get_next_backpointer(struct btree_trans *trans,
223                               struct bch_dev *ca,
224                               struct bpos bucket, int gen,
225                               struct bpos *bp_pos,
226                               struct bch_backpointer *bp,
227                               unsigned iter_flags)
228 {
229         struct bpos bp_end_pos = bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket), 0);
230         struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL };
231         struct bkey_s_c k;
232         int ret = 0;
233
234         if (bpos_ge(*bp_pos, bp_end_pos))
235                 goto done;
236
237         if (gen >= 0) {
238                 k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
239                                        bucket, BTREE_ITER_cached|iter_flags);
240                 ret = bkey_err(k);
241                 if (ret)
242                         goto out;
243
244                 if (k.k->type != KEY_TYPE_alloc_v4 ||
245                     bkey_s_c_to_alloc_v4(k).v->gen != gen)
246                         goto done;
247         }
248
249         *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(ca, bucket, 0));
250
251         for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
252                                      *bp_pos, iter_flags, k, ret) {
253                 if (bpos_ge(k.k->p, bp_end_pos))
254                         break;
255
256                 *bp_pos = k.k->p;
257                 *bp = *bkey_s_c_to_backpointer(k).v;
258                 goto out;
259         }
260 done:
261         *bp_pos = SPOS_MAX;
262 out:
263         bch2_trans_iter_exit(trans, &bp_iter);
264         bch2_trans_iter_exit(trans, &alloc_iter);
265         return ret;
266 }
267
268 static void backpointer_not_found(struct btree_trans *trans,
269                                   struct bpos bp_pos,
270                                   struct bch_backpointer bp,
271                                   struct bkey_s_c k)
272 {
273         struct bch_fs *c = trans->c;
274         struct printbuf buf = PRINTBUF;
275
276         /*
277          * If we're using the btree write buffer, the backpointer we were
278          * looking at may have already been deleted - failure to find what it
279          * pointed to is not an error:
280          */
281         if (likely(!bch2_backpointers_no_use_write_buffer))
282                 return;
283
284         struct bpos bucket;
285         if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket))
286                 return;
287
288         prt_printf(&buf, "backpointer doesn't match %s it points to:\n  ",
289                    bp.level ? "btree node" : "extent");
290         prt_printf(&buf, "bucket: ");
291         bch2_bpos_to_text(&buf, bucket);
292         prt_printf(&buf, "\n  ");
293
294         prt_printf(&buf, "backpointer pos: ");
295         bch2_bpos_to_text(&buf, bp_pos);
296         prt_printf(&buf, "\n  ");
297
298         bch2_backpointer_to_text(&buf, &bp);
299         prt_printf(&buf, "\n  ");
300         bch2_bkey_val_to_text(&buf, c, k);
301         if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers)
302                 bch_err_ratelimited(c, "%s", buf.buf);
303         else
304                 bch2_trans_inconsistent(trans, "%s", buf.buf);
305
306         printbuf_exit(&buf);
307 }
308
309 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
310                                          struct btree_iter *iter,
311                                          struct bpos bp_pos,
312                                          struct bch_backpointer bp,
313                                          unsigned iter_flags)
314 {
315         if (likely(!bp.level)) {
316                 struct bch_fs *c = trans->c;
317
318                 struct bpos bucket;
319                 if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket))
320                         return bkey_s_c_err(-EIO);
321
322                 bch2_trans_node_iter_init(trans, iter,
323                                           bp.btree_id,
324                                           bp.pos,
325                                           0, 0,
326                                           iter_flags);
327                 struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
328                 if (bkey_err(k)) {
329                         bch2_trans_iter_exit(trans, iter);
330                         return k;
331                 }
332
333                 if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
334                         return k;
335
336                 bch2_trans_iter_exit(trans, iter);
337                 backpointer_not_found(trans, bp_pos, bp, k);
338                 return bkey_s_c_null;
339         } else {
340                 struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
341
342                 if (IS_ERR_OR_NULL(b)) {
343                         bch2_trans_iter_exit(trans, iter);
344                         return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null;
345                 }
346                 return bkey_i_to_s_c(&b->key);
347         }
348 }
349
350 struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
351                                         struct btree_iter *iter,
352                                         struct bpos bp_pos,
353                                         struct bch_backpointer bp)
354 {
355         struct bch_fs *c = trans->c;
356
357         BUG_ON(!bp.level);
358
359         struct bpos bucket;
360         if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket))
361                 return ERR_PTR(-EIO);
362
363         bch2_trans_node_iter_init(trans, iter,
364                                   bp.btree_id,
365                                   bp.pos,
366                                   0,
367                                   bp.level - 1,
368                                   0);
369         struct btree *b = bch2_btree_iter_peek_node(iter);
370         if (IS_ERR_OR_NULL(b))
371                 goto err;
372
373         BUG_ON(b->c.level != bp.level - 1);
374
375         if (extent_matches_bp(c, bp.btree_id, bp.level,
376                               bkey_i_to_s_c(&b->key),
377                               bucket, bp))
378                 return b;
379
380         if (btree_node_will_make_reachable(b)) {
381                 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
382         } else {
383                 backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
384                 b = NULL;
385         }
386 err:
387         bch2_trans_iter_exit(trans, iter);
388         return b;
389 }
390
391 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
392                                         struct bkey_s_c k)
393 {
394         struct bch_fs *c = trans->c;
395         struct btree_iter alloc_iter = { NULL };
396         struct bkey_s_c alloc_k;
397         struct printbuf buf = PRINTBUF;
398         int ret = 0;
399
400         struct bpos bucket;
401         if (!bp_pos_to_bucket_nodev_noerror(c, k.k->p, &bucket)) {
402                 if (fsck_err(trans, backpointer_to_missing_device,
403                              "backpointer for missing device:\n%s",
404                              (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
405                         ret = bch2_btree_delete_at(trans, bp_iter, 0);
406                 goto out;
407         }
408
409         alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bucket, 0);
410         ret = bkey_err(alloc_k);
411         if (ret)
412                 goto out;
413
414         if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4,
415                         trans, backpointer_to_missing_alloc,
416                         "backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
417                         alloc_iter.pos.inode, alloc_iter.pos.offset,
418                         (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
419                 ret = bch2_btree_delete_at(trans, bp_iter, 0);
420                 goto out;
421         }
422 out:
423 fsck_err:
424         bch2_trans_iter_exit(trans, &alloc_iter);
425         printbuf_exit(&buf);
426         return ret;
427 }
428
429 /* verify that every backpointer has a corresponding alloc key */
430 int bch2_check_btree_backpointers(struct bch_fs *c)
431 {
432         int ret = bch2_trans_run(c,
433                 for_each_btree_key_commit(trans, iter,
434                         BTREE_ID_backpointers, POS_MIN, 0, k,
435                         NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
436                   bch2_check_btree_backpointer(trans, &iter, k)));
437         bch_err_fn(c, ret);
438         return ret;
439 }
440
441 struct extents_to_bp_state {
442         struct bpos     bucket_start;
443         struct bpos     bucket_end;
444         struct bkey_buf last_flushed;
445 };
446
447 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
448                                struct bkey_s_c extent, unsigned dev)
449 {
450         struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent);
451         int ret = PTR_ERR_OR_ZERO(n);
452         if (ret)
453                 return ret;
454
455         bch2_bkey_drop_device(bkey_i_to_s(n), dev);
456         return bch2_btree_insert_trans(trans, btree, n, 0);
457 }
458
459 static int check_extent_checksum(struct btree_trans *trans,
460                                  enum btree_id btree, struct bkey_s_c extent,
461                                  enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev)
462 {
463         struct bch_fs *c = trans->c;
464         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent);
465         const union bch_extent_entry *entry;
466         struct extent_ptr_decoded p;
467         struct printbuf buf = PRINTBUF;
468         void *data_buf = NULL;
469         struct bio *bio = NULL;
470         size_t bytes;
471         int ret = 0;
472
473         if (bkey_is_btree_ptr(extent.k))
474                 return false;
475
476         bkey_for_each_ptr_decode(extent.k, ptrs, p, entry)
477                 if (p.ptr.dev == dev)
478                         goto found;
479         BUG();
480 found:
481         if (!p.crc.csum_type)
482                 return false;
483
484         bytes = p.crc.compressed_size << 9;
485
486         struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ);
487         if (!ca)
488                 return false;
489
490         data_buf = kvmalloc(bytes, GFP_KERNEL);
491         if (!data_buf) {
492                 ret = -ENOMEM;
493                 goto err;
494         }
495
496         bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL);
497         bio->bi_iter.bi_sector = p.ptr.offset;
498         bch2_bio_map(bio, data_buf, bytes);
499         ret = submit_bio_wait(bio);
500         if (ret)
501                 goto err;
502
503         prt_str(&buf, "extents pointing to same space, but first extent checksum bad:");
504         prt_printf(&buf, "\n  %s ", bch2_btree_id_str(btree));
505         bch2_bkey_val_to_text(&buf, c, extent);
506         prt_printf(&buf, "\n  %s ", bch2_btree_id_str(o_btree));
507         bch2_bkey_val_to_text(&buf, c, extent2);
508
509         struct nonce nonce = extent_nonce(extent.k->bversion, p.crc);
510         struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
511         if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
512                         trans, dup_backpointer_to_bad_csum_extent,
513                         "%s", buf.buf))
514                 ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1;
515 fsck_err:
516 err:
517         if (bio)
518                 bio_put(bio);
519         kvfree(data_buf);
520         percpu_ref_put(&ca->io_ref);
521         printbuf_exit(&buf);
522         return ret;
523 }
524
525 static int check_bp_exists(struct btree_trans *trans,
526                            struct extents_to_bp_state *s,
527                            struct bpos bucket,
528                            struct bch_backpointer bp,
529                            struct bkey_s_c orig_k)
530 {
531         struct bch_fs *c = trans->c;
532         struct btree_iter bp_iter = {};
533         struct btree_iter other_extent_iter = {};
534         struct printbuf buf = PRINTBUF;
535         struct bkey_s_c bp_k;
536         int ret = 0;
537
538         struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket);
539         if (!ca) {
540                 prt_str(&buf, "extent for nonexistent device:bucket ");
541                 bch2_bpos_to_text(&buf, bucket);
542                 prt_str(&buf, "\n  ");
543                 bch2_bkey_val_to_text(&buf, c, orig_k);
544                 bch_err(c, "%s", buf.buf);
545                 ret = -BCH_ERR_fsck_repair_unimplemented;
546                 goto err;
547         }
548
549         if (bpos_lt(bucket, s->bucket_start) ||
550             bpos_gt(bucket, s->bucket_end))
551                 goto out;
552
553         bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
554                                   bucket_pos_to_bp(ca, bucket, bp.bucket_offset),
555                                   0);
556         ret = bkey_err(bp_k);
557         if (ret)
558                 goto err;
559
560         if (bp_k.k->type != KEY_TYPE_backpointer ||
561             memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
562                 ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed);
563                 if (ret)
564                         goto err;
565
566                 goto check_existing_bp;
567         }
568 out:
569 err:
570 fsck_err:
571         bch2_trans_iter_exit(trans, &other_extent_iter);
572         bch2_trans_iter_exit(trans, &bp_iter);
573         bch2_dev_put(ca);
574         printbuf_exit(&buf);
575         return ret;
576 check_existing_bp:
577         /* Do we have a backpointer for a different extent? */
578         if (bp_k.k->type != KEY_TYPE_backpointer)
579                 goto missing;
580
581         struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v;
582
583         struct bkey_s_c other_extent =
584                 bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0);
585         ret = bkey_err(other_extent);
586         if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
587                 ret = 0;
588         if (ret)
589                 goto err;
590
591         if (!other_extent.k)
592                 goto missing;
593
594         if (bch2_extents_match(orig_k, other_extent)) {
595                 printbuf_reset(&buf);
596                 prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n  ");
597                 bch2_bkey_val_to_text(&buf, c, orig_k);
598                 prt_str(&buf, "\n  ");
599                 bch2_bkey_val_to_text(&buf, c, other_extent);
600                 bch_err(c, "%s", buf.buf);
601
602                 if (other_extent.k->size <= orig_k.k->size) {
603                         ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode);
604                         if (ret)
605                                 goto err;
606                         goto out;
607                 } else {
608                         ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode);
609                         if (ret)
610                                 goto err;
611                         goto missing;
612                 }
613         }
614
615         ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode);
616         if (ret < 0)
617                 goto err;
618         if (ret) {
619                 ret = 0;
620                 goto missing;
621         }
622
623         ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode);
624         if (ret < 0)
625                 goto err;
626         if (ret) {
627                 ret = 0;
628                 goto out;
629         }
630
631         printbuf_reset(&buf);
632         prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n  ", bucket.inode);
633         bch2_bkey_val_to_text(&buf, c, orig_k);
634         prt_str(&buf, "\n  ");
635         bch2_bkey_val_to_text(&buf, c, other_extent);
636         bch_err(c, "%s", buf.buf);
637         ret = -BCH_ERR_fsck_repair_unimplemented;
638         goto err;
639 missing:
640         printbuf_reset(&buf);
641         prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
642                bch2_btree_id_str(bp.btree_id), bp.level);
643         bch2_bkey_val_to_text(&buf, c, orig_k);
644         prt_printf(&buf, "\n  got:   ");
645         bch2_bkey_val_to_text(&buf, c, bp_k);
646
647         struct bkey_i_backpointer n_bp_k;
648         bkey_backpointer_init(&n_bp_k.k_i);
649         n_bp_k.k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset);
650         n_bp_k.v = bp;
651         prt_printf(&buf, "\n  want:  ");
652         bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i));
653
654         if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf))
655                 ret = bch2_bucket_backpointer_mod(trans, ca, bucket, bp, orig_k, true);
656
657         goto out;
658 }
659
660 static int check_extent_to_backpointers(struct btree_trans *trans,
661                                         struct extents_to_bp_state *s,
662                                         enum btree_id btree, unsigned level,
663                                         struct bkey_s_c k)
664 {
665         struct bch_fs *c = trans->c;
666         struct bkey_ptrs_c ptrs;
667         const union bch_extent_entry *entry;
668         struct extent_ptr_decoded p;
669         int ret;
670
671         ptrs = bch2_bkey_ptrs_c(k);
672         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
673                 struct bpos bucket_pos = POS_MIN;
674                 struct bch_backpointer bp;
675
676                 if (p.ptr.cached)
677                         continue;
678
679                 rcu_read_lock();
680                 struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev);
681                 if (ca)
682                         bch2_extent_ptr_to_bp(c, ca, btree, level, k, p, entry, &bucket_pos, &bp);
683                 rcu_read_unlock();
684
685                 if (!ca)
686                         continue;
687
688                 ret = check_bp_exists(trans, s, bucket_pos, bp, k);
689                 if (ret)
690                         return ret;
691         }
692
693         return 0;
694 }
695
696 static int check_btree_root_to_backpointers(struct btree_trans *trans,
697                                             struct extents_to_bp_state *s,
698                                             enum btree_id btree_id,
699                                             int *level)
700 {
701         struct bch_fs *c = trans->c;
702         struct btree_iter iter;
703         struct btree *b;
704         struct bkey_s_c k;
705         int ret;
706 retry:
707         bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
708                                   0, bch2_btree_id_root(c, btree_id)->b->c.level, 0);
709         b = bch2_btree_iter_peek_node(&iter);
710         ret = PTR_ERR_OR_ZERO(b);
711         if (ret)
712                 goto err;
713
714         if (b != btree_node_root(c, b)) {
715                 bch2_trans_iter_exit(trans, &iter);
716                 goto retry;
717         }
718
719         *level = b->c.level;
720
721         k = bkey_i_to_s_c(&b->key);
722         ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k);
723 err:
724         bch2_trans_iter_exit(trans, &iter);
725         return ret;
726 }
727
728 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
729 {
730         return (struct bbpos) {
731                 .btree  = bp.btree_id,
732                 .pos    = bp.pos,
733         };
734 }
735
736 static u64 mem_may_pin_bytes(struct bch_fs *c)
737 {
738         struct sysinfo i;
739         si_meminfo(&i);
740
741         u64 mem_bytes = i.totalram * i.mem_unit;
742         return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100);
743 }
744
745 static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
746 {
747         return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size);
748 }
749
750 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
751                                         u64 btree_leaf_mask,
752                                         u64 btree_interior_mask,
753                                         struct bbpos start, struct bbpos *end)
754 {
755         struct bch_fs *c = trans->c;
756         s64 mem_may_pin = mem_may_pin_bytes(c);
757         int ret = 0;
758
759         bch2_btree_cache_unpin(c);
760
761         btree_interior_mask |= btree_leaf_mask;
762
763         c->btree_cache.pinned_nodes_mask[0]             = btree_leaf_mask;
764         c->btree_cache.pinned_nodes_mask[1]             = btree_interior_mask;
765         c->btree_cache.pinned_nodes_start               = start;
766         c->btree_cache.pinned_nodes_end                 = *end = BBPOS_MAX;
767
768         for (enum btree_id btree = start.btree;
769              btree < BTREE_ID_NR && !ret;
770              btree++) {
771                 unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1;
772
773                 if (!(BIT_ULL(btree) & btree_leaf_mask) &&
774                     !(BIT_ULL(btree) & btree_interior_mask))
775                         continue;
776
777                 ret = __for_each_btree_node(trans, iter, btree,
778                                       btree == start.btree ? start.pos : POS_MIN,
779                                       0, depth, BTREE_ITER_prefetch, b, ({
780                         mem_may_pin -= btree_buf_bytes(b);
781                         if (mem_may_pin <= 0) {
782                                 c->btree_cache.pinned_nodes_end = *end =
783                                         BBPOS(btree, b->key.k.p);
784                                 break;
785                         }
786                         bch2_node_pin(c, b);
787                         0;
788                 }));
789         }
790
791         return ret;
792 }
793
794 struct progress_indicator_state {
795         unsigned long           next_print;
796         u64                     nodes_seen;
797         u64                     nodes_total;
798         struct btree            *last_node;
799 };
800
801 static inline void progress_init(struct progress_indicator_state *s,
802                                  struct bch_fs *c,
803                                  u64 btree_id_mask)
804 {
805         memset(s, 0, sizeof(*s));
806
807         s->next_print = jiffies + HZ * 10;
808
809         for (unsigned i = 0; i < BTREE_ID_NR; i++) {
810                 if (!(btree_id_mask & BIT_ULL(i)))
811                         continue;
812
813                 struct disk_accounting_pos acc = {
814                         .type           = BCH_DISK_ACCOUNTING_btree,
815                         .btree.id       = i,
816                 };
817
818                 u64 v;
819                 bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1);
820                 s->nodes_total += div64_ul(v, btree_sectors(c));
821         }
822 }
823
824 static inline bool progress_update_p(struct progress_indicator_state *s)
825 {
826         bool ret = time_after_eq(jiffies, s->next_print);
827
828         if (ret)
829                 s->next_print = jiffies + HZ * 10;
830         return ret;
831 }
832
833 static void progress_update_iter(struct btree_trans *trans,
834                                  struct progress_indicator_state *s,
835                                  struct btree_iter *iter,
836                                  const char *msg)
837 {
838         struct bch_fs *c = trans->c;
839         struct btree *b = path_l(btree_iter_path(trans, iter))->b;
840
841         s->nodes_seen += b != s->last_node;
842         s->last_node = b;
843
844         if (progress_update_p(s)) {
845                 struct printbuf buf = PRINTBUF;
846                 unsigned percent = s->nodes_total
847                         ? div64_u64(s->nodes_seen * 100, s->nodes_total)
848                         : 0;
849
850                 prt_printf(&buf, "%s: %d%%, done %llu/%llu nodes, at ",
851                            msg, percent, s->nodes_seen, s->nodes_total);
852                 bch2_bbpos_to_text(&buf, BBPOS(iter->btree_id, iter->pos));
853
854                 bch_info(c, "%s", buf.buf);
855                 printbuf_exit(&buf);
856         }
857 }
858
859 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
860                                                    struct extents_to_bp_state *s)
861 {
862         struct bch_fs *c = trans->c;
863         struct progress_indicator_state progress;
864         int ret = 0;
865
866         progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink));
867
868         for (enum btree_id btree_id = 0;
869              btree_id < btree_id_nr_alive(c);
870              btree_id++) {
871                 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
872
873                 ret = commit_do(trans, NULL, NULL,
874                                 BCH_TRANS_COMMIT_no_enospc,
875                                 check_btree_root_to_backpointers(trans, s, btree_id, &level));
876                 if (ret)
877                         return ret;
878
879                 while (level >= depth) {
880                         struct btree_iter iter;
881                         bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, level,
882                                                   BTREE_ITER_prefetch);
883
884                         ret = for_each_btree_key_continue(trans, iter, 0, k, ({
885                                 progress_update_iter(trans, &progress, &iter, "extents_to_backpointers");
886                                 check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
887                                 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
888                         }));
889                         if (ret)
890                                 return ret;
891
892                         --level;
893                 }
894         }
895
896         return 0;
897 }
898
899 int bch2_check_extents_to_backpointers(struct bch_fs *c)
900 {
901         struct btree_trans *trans = bch2_trans_get(c);
902         struct extents_to_bp_state s = { .bucket_start = POS_MIN };
903         int ret;
904
905         bch2_bkey_buf_init(&s.last_flushed);
906         bkey_init(&s.last_flushed.k->k);
907
908         while (1) {
909                 struct bbpos end;
910                 ret = bch2_get_btree_in_memory_pos(trans,
911                                 BIT_ULL(BTREE_ID_backpointers),
912                                 BIT_ULL(BTREE_ID_backpointers),
913                                 BBPOS(BTREE_ID_backpointers, s.bucket_start), &end);
914                 if (ret)
915                         break;
916
917                 s.bucket_end = end.pos;
918
919                 if ( bpos_eq(s.bucket_start, POS_MIN) &&
920                     !bpos_eq(s.bucket_end, SPOS_MAX))
921                         bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
922                                     __func__, btree_nodes_fit_in_ram(c));
923
924                 if (!bpos_eq(s.bucket_start, POS_MIN) ||
925                     !bpos_eq(s.bucket_end, SPOS_MAX)) {
926                         struct printbuf buf = PRINTBUF;
927
928                         prt_str(&buf, "check_extents_to_backpointers(): ");
929                         bch2_bpos_to_text(&buf, s.bucket_start);
930                         prt_str(&buf, "-");
931                         bch2_bpos_to_text(&buf, s.bucket_end);
932
933                         bch_verbose(c, "%s", buf.buf);
934                         printbuf_exit(&buf);
935                 }
936
937                 ret = bch2_check_extents_to_backpointers_pass(trans, &s);
938                 if (ret || bpos_eq(s.bucket_end, SPOS_MAX))
939                         break;
940
941                 s.bucket_start = bpos_successor(s.bucket_end);
942         }
943         bch2_trans_put(trans);
944         bch2_bkey_buf_exit(&s.last_flushed, c);
945
946         bch2_btree_cache_unpin(c);
947
948         bch_err_fn(c, ret);
949         return ret;
950 }
951
952 static int check_one_backpointer(struct btree_trans *trans,
953                                  struct bbpos start,
954                                  struct bbpos end,
955                                  struct bkey_s_c bp_k,
956                                  struct bkey_buf *last_flushed)
957 {
958         if (bp_k.k->type != KEY_TYPE_backpointer)
959                 return 0;
960
961         struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
962         struct bch_fs *c = trans->c;
963         struct btree_iter iter;
964         struct bbpos pos = bp_to_bbpos(*bp.v);
965         struct bkey_s_c k;
966         struct printbuf buf = PRINTBUF;
967         int ret;
968
969         if (bbpos_cmp(pos, start) < 0 ||
970             bbpos_cmp(pos, end) > 0)
971                 return 0;
972
973         k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0);
974         ret = bkey_err(k);
975         if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
976                 return 0;
977         if (ret)
978                 return ret;
979
980         if (!k.k) {
981                 ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed);
982                 if (ret)
983                         goto out;
984
985                 if (fsck_err(trans, backpointer_to_missing_ptr,
986                              "backpointer for missing %s\n  %s",
987                              bp.v->level ? "btree node" : "extent",
988                              (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
989                         ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
990                         goto out;
991                 }
992         }
993 out:
994 fsck_err:
995         bch2_trans_iter_exit(trans, &iter);
996         printbuf_exit(&buf);
997         return ret;
998 }
999
1000 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
1001                                                    struct bbpos start,
1002                                                    struct bbpos end)
1003 {
1004         struct bch_fs *c = trans->c;
1005         struct bkey_buf last_flushed;
1006         struct progress_indicator_state progress;
1007
1008         bch2_bkey_buf_init(&last_flushed);
1009         bkey_init(&last_flushed.k->k);
1010         progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers));
1011
1012         int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
1013                                   POS_MIN, BTREE_ITER_prefetch, k,
1014                                   NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
1015                         progress_update_iter(trans, &progress, &iter, "backpointers_to_extents");
1016                         check_one_backpointer(trans, start, end, k, &last_flushed);
1017         }));
1018
1019         bch2_bkey_buf_exit(&last_flushed, c);
1020         return ret;
1021 }
1022
1023 int bch2_check_backpointers_to_extents(struct bch_fs *c)
1024 {
1025         struct btree_trans *trans = bch2_trans_get(c);
1026         struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
1027         int ret;
1028
1029         while (1) {
1030                 ret = bch2_get_btree_in_memory_pos(trans,
1031                                                    BIT_ULL(BTREE_ID_extents)|
1032                                                    BIT_ULL(BTREE_ID_reflink),
1033                                                    ~0,
1034                                                    start, &end);
1035                 if (ret)
1036                         break;
1037
1038                 if (!bbpos_cmp(start, BBPOS_MIN) &&
1039                     bbpos_cmp(end, BBPOS_MAX))
1040                         bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
1041                                     __func__, btree_nodes_fit_in_ram(c));
1042
1043                 if (bbpos_cmp(start, BBPOS_MIN) ||
1044                     bbpos_cmp(end, BBPOS_MAX)) {
1045                         struct printbuf buf = PRINTBUF;
1046
1047                         prt_str(&buf, "check_backpointers_to_extents(): ");
1048                         bch2_bbpos_to_text(&buf, start);
1049                         prt_str(&buf, "-");
1050                         bch2_bbpos_to_text(&buf, end);
1051
1052                         bch_verbose(c, "%s", buf.buf);
1053                         printbuf_exit(&buf);
1054                 }
1055
1056                 ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
1057                 if (ret || !bbpos_cmp(end, BBPOS_MAX))
1058                         break;
1059
1060                 start = bbpos_successor(end);
1061         }
1062         bch2_trans_put(trans);
1063
1064         bch2_btree_cache_unpin(c);
1065
1066         bch_err_fn(c, ret);
1067         return ret;
1068 }
This page took 0.088503 seconds and 4 git commands to generate.