]> Git Repo - linux.git/blob - fs/bcachefs/btree_node_scan.c
cifs: Add a tracepoint to track credits involved in R/W requests
[linux.git] / fs / bcachefs / btree_node_scan.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 #include "bcachefs.h"
4 #include "btree_cache.h"
5 #include "btree_io.h"
6 #include "btree_journal_iter.h"
7 #include "btree_node_scan.h"
8 #include "btree_update_interior.h"
9 #include "buckets.h"
10 #include "error.h"
11 #include "journal_io.h"
12 #include "recovery_passes.h"
13
14 #include <linux/kthread.h>
15 #include <linux/sort.h>
16
17 struct find_btree_nodes_worker {
18         struct closure          *cl;
19         struct find_btree_nodes *f;
20         struct bch_dev          *ca;
21 };
22
23 static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct found_btree_node *n)
24 {
25         prt_printf(out, "%s l=%u seq=%u cookie=%llx ", bch2_btree_id_str(n->btree_id), n->level, n->seq, n->cookie);
26         bch2_bpos_to_text(out, n->min_key);
27         prt_str(out, "-");
28         bch2_bpos_to_text(out, n->max_key);
29
30         if (n->range_updated)
31                 prt_str(out, " range updated");
32         if (n->overwritten)
33                 prt_str(out, " overwritten");
34
35         for (unsigned i = 0; i < n->nr_ptrs; i++) {
36                 prt_char(out, ' ');
37                 bch2_extent_ptr_to_text(out, c, n->ptrs + i);
38         }
39 }
40
41 static void found_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c, found_btree_nodes nodes)
42 {
43         printbuf_indent_add(out, 2);
44         darray_for_each(nodes, i) {
45                 found_btree_node_to_text(out, c, i);
46                 prt_newline(out);
47         }
48         printbuf_indent_sub(out, 2);
49 }
50
51 static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_node *f)
52 {
53         struct bkey_i_btree_ptr_v2 *bp = bkey_btree_ptr_v2_init(k);
54
55         set_bkey_val_u64s(&bp->k, sizeof(struct bch_btree_ptr_v2) / sizeof(u64) + f->nr_ptrs);
56         bp->k.p                 = f->max_key;
57         bp->v.seq               = cpu_to_le64(f->cookie);
58         bp->v.sectors_written   = 0;
59         bp->v.flags             = 0;
60         bp->v.sectors_written   = cpu_to_le16(f->sectors_written);
61         bp->v.min_key           = f->min_key;
62         SET_BTREE_PTR_RANGE_UPDATED(&bp->v, f->range_updated);
63         memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs);
64 }
65
66 static bool found_btree_node_is_readable(struct btree_trans *trans,
67                                          struct found_btree_node *f)
68 {
69         struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } k;
70
71         found_btree_node_to_key(&k.k, f);
72
73         struct btree *b = bch2_btree_node_get_noiter(trans, &k.k, f->btree_id, f->level, false);
74         bool ret = !IS_ERR_OR_NULL(b);
75         if (!ret)
76                 return ret;
77
78         f->sectors_written = b->written;
79         six_unlock_read(&b->c.lock);
80
81         /*
82          * We might update this node's range; if that happens, we need the node
83          * to be re-read so the read path can trim keys that are no longer in
84          * this node
85          */
86         if (b != btree_node_root(trans->c, b))
87                 bch2_btree_node_evict(trans, &k.k);
88         return ret;
89 }
90
91 static int found_btree_node_cmp_cookie(const void *_l, const void *_r)
92 {
93         const struct found_btree_node *l = _l;
94         const struct found_btree_node *r = _r;
95
96         return  cmp_int(l->btree_id,    r->btree_id) ?:
97                 cmp_int(l->level,       r->level) ?:
98                 cmp_int(l->cookie,      r->cookie);
99 }
100
101 /*
102  * Given two found btree nodes, if their sequence numbers are equal, take the
103  * one that's readable:
104  */
105 static int found_btree_node_cmp_time(const struct found_btree_node *l,
106                                      const struct found_btree_node *r)
107 {
108         return cmp_int(l->seq, r->seq);
109 }
110
111 static int found_btree_node_cmp_pos(const void *_l, const void *_r)
112 {
113         const struct found_btree_node *l = _l;
114         const struct found_btree_node *r = _r;
115
116         return  cmp_int(l->btree_id,    r->btree_id) ?:
117                -cmp_int(l->level,       r->level) ?:
118                 bpos_cmp(l->min_key,    r->min_key) ?:
119                -found_btree_node_cmp_time(l, r);
120 }
121
122 static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
123                                 struct bio *bio, struct btree_node *bn, u64 offset)
124 {
125         struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
126
127         bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
128         bio->bi_iter.bi_sector  = offset;
129         bch2_bio_map(bio, bn, PAGE_SIZE);
130
131         submit_bio_wait(bio);
132         if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
133                                "IO error in try_read_btree_node() at %llu: %s",
134                                offset, bch2_blk_status_to_str(bio->bi_status)))
135                 return;
136
137         if (le64_to_cpu(bn->magic) != bset_magic(c))
138                 return;
139
140         if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) {
141                 struct nonce nonce = btree_nonce(&bn->keys, 0);
142                 unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
143
144                 bch2_encrypt(c, BSET_CSUM_TYPE(&bn->keys), nonce, &bn->flags, bytes);
145         }
146
147         if (btree_id_is_alloc(BTREE_NODE_ID(bn)))
148                 return;
149
150         if (BTREE_NODE_LEVEL(bn) >= BTREE_MAX_DEPTH)
151                 return;
152
153         rcu_read_lock();
154         struct found_btree_node n = {
155                 .btree_id       = BTREE_NODE_ID(bn),
156                 .level          = BTREE_NODE_LEVEL(bn),
157                 .seq            = BTREE_NODE_SEQ(bn),
158                 .cookie         = le64_to_cpu(bn->keys.seq),
159                 .min_key        = bn->min_key,
160                 .max_key        = bn->max_key,
161                 .nr_ptrs        = 1,
162                 .ptrs[0].type   = 1 << BCH_EXTENT_ENTRY_ptr,
163                 .ptrs[0].offset = offset,
164                 .ptrs[0].dev    = ca->dev_idx,
165                 .ptrs[0].gen    = *bucket_gen(ca, sector_to_bucket(ca, offset)),
166         };
167         rcu_read_unlock();
168
169         if (bch2_trans_run(c, found_btree_node_is_readable(trans, &n))) {
170                 mutex_lock(&f->lock);
171                 if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) {
172                         bch_err(c, "try_read_btree_node() can't handle endian conversion");
173                         f->ret = -EINVAL;
174                         goto unlock;
175                 }
176
177                 if (darray_push(&f->nodes, n))
178                         f->ret = -ENOMEM;
179 unlock:
180                 mutex_unlock(&f->lock);
181         }
182 }
183
184 static int read_btree_nodes_worker(void *p)
185 {
186         struct find_btree_nodes_worker *w = p;
187         struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes);
188         struct bch_dev *ca = w->ca;
189         void *buf = (void *) __get_free_page(GFP_KERNEL);
190         struct bio *bio = bio_alloc(NULL, 1, 0, GFP_KERNEL);
191         unsigned long last_print = jiffies;
192
193         if (!buf || !bio) {
194                 bch_err(c, "read_btree_nodes_worker: error allocating bio/buf");
195                 w->f->ret = -ENOMEM;
196                 goto err;
197         }
198
199         for (u64 bucket = ca->mi.first_bucket; bucket < ca->mi.nbuckets; bucket++)
200                 for (unsigned bucket_offset = 0;
201                      bucket_offset + btree_sectors(c) <= ca->mi.bucket_size;
202                      bucket_offset += btree_sectors(c)) {
203                         if (time_after(jiffies, last_print + HZ * 30)) {
204                                 u64 cur_sector = bucket * ca->mi.bucket_size + bucket_offset;
205                                 u64 end_sector = ca->mi.nbuckets * ca->mi.bucket_size;
206
207                                 bch_info(ca, "%s: %2u%% done", __func__,
208                                          (unsigned) div64_u64(cur_sector * 100, end_sector));
209                                 last_print = jiffies;
210                         }
211
212                         u64 sector = bucket * ca->mi.bucket_size + bucket_offset;
213
214                         if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_mi_btree_bitmap &&
215                             !bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c)))
216                                 continue;
217
218                         try_read_btree_node(w->f, ca, bio, buf, sector);
219                 }
220 err:
221         bio_put(bio);
222         free_page((unsigned long) buf);
223         percpu_ref_get(&ca->io_ref);
224         closure_put(w->cl);
225         kfree(w);
226         return 0;
227 }
228
229 static int read_btree_nodes(struct find_btree_nodes *f)
230 {
231         struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
232         struct closure cl;
233         int ret = 0;
234
235         closure_init_stack(&cl);
236
237         for_each_online_member(c, ca) {
238                 if (!(ca->mi.data_allowed & BIT(BCH_DATA_btree)))
239                         continue;
240
241                 struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
242                 struct task_struct *t;
243
244                 if (!w) {
245                         percpu_ref_put(&ca->io_ref);
246                         ret = -ENOMEM;
247                         goto err;
248                 }
249
250                 percpu_ref_get(&ca->io_ref);
251                 closure_get(&cl);
252                 w->cl           = &cl;
253                 w->f            = f;
254                 w->ca           = ca;
255
256                 t = kthread_run(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
257                 ret = IS_ERR_OR_NULL(t);
258                 if (ret) {
259                         percpu_ref_put(&ca->io_ref);
260                         closure_put(&cl);
261                         f->ret = ret;
262                         bch_err(c, "error starting kthread: %i", ret);
263                         break;
264                 }
265         }
266 err:
267         closure_sync(&cl);
268         return f->ret ?: ret;
269 }
270
271 static void bubble_up(struct found_btree_node *n, struct found_btree_node *end)
272 {
273         while (n + 1 < end &&
274                found_btree_node_cmp_pos(n, n + 1) > 0) {
275                 swap(n[0], n[1]);
276                 n++;
277         }
278 }
279
280 static int handle_overwrites(struct bch_fs *c,
281                              struct found_btree_node *start,
282                              struct found_btree_node *end)
283 {
284         struct found_btree_node *n;
285 again:
286         for (n = start + 1;
287              n < end &&
288              n->btree_id        == start->btree_id &&
289              n->level           == start->level &&
290              bpos_lt(n->min_key, start->max_key);
291              n++)  {
292                 int cmp = found_btree_node_cmp_time(start, n);
293
294                 if (cmp > 0) {
295                         if (bpos_cmp(start->max_key, n->max_key) >= 0)
296                                 n->overwritten = true;
297                         else {
298                                 n->range_updated = true;
299                                 n->min_key = bpos_successor(start->max_key);
300                                 n->range_updated = true;
301                                 bubble_up(n, end);
302                                 goto again;
303                         }
304                 } else if (cmp < 0) {
305                         BUG_ON(bpos_cmp(n->min_key, start->min_key) <= 0);
306
307                         start->max_key = bpos_predecessor(n->min_key);
308                         start->range_updated = true;
309                 } else if (n->level) {
310                         n->overwritten = true;
311                 } else {
312                         struct printbuf buf = PRINTBUF;
313
314                         prt_str(&buf, "overlapping btree nodes with same seq! halting\n  ");
315                         found_btree_node_to_text(&buf, c, start);
316                         prt_str(&buf, "\n  ");
317                         found_btree_node_to_text(&buf, c, n);
318                         bch_err(c, "%s", buf.buf);
319                         printbuf_exit(&buf);
320                         return -BCH_ERR_fsck_repair_unimplemented;
321                 }
322         }
323
324         return 0;
325 }
326
327 int bch2_scan_for_btree_nodes(struct bch_fs *c)
328 {
329         struct find_btree_nodes *f = &c->found_btree_nodes;
330         struct printbuf buf = PRINTBUF;
331         size_t dst;
332         int ret = 0;
333
334         if (f->nodes.nr)
335                 return 0;
336
337         mutex_init(&f->lock);
338
339         ret = read_btree_nodes(f);
340         if (ret)
341                 return ret;
342
343         if (!f->nodes.nr) {
344                 bch_err(c, "%s: no btree nodes found", __func__);
345                 ret = -EINVAL;
346                 goto err;
347         }
348
349         if (0 && c->opts.verbose) {
350                 printbuf_reset(&buf);
351                 prt_printf(&buf, "%s: nodes found:\n", __func__);
352                 found_btree_nodes_to_text(&buf, c, f->nodes);
353                 bch2_print_string_as_lines(KERN_INFO, buf.buf);
354         }
355
356         sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL);
357
358         dst = 0;
359         darray_for_each(f->nodes, i) {
360                 struct found_btree_node *prev = dst ? f->nodes.data + dst - 1 : NULL;
361
362                 if (prev &&
363                     prev->cookie == i->cookie) {
364                         if (prev->nr_ptrs == ARRAY_SIZE(prev->ptrs)) {
365                                 bch_err(c, "%s: found too many replicas for btree node", __func__);
366                                 ret = -EINVAL;
367                                 goto err;
368                         }
369                         prev->ptrs[prev->nr_ptrs++] = i->ptrs[0];
370                 } else {
371                         f->nodes.data[dst++] = *i;
372                 }
373         }
374         f->nodes.nr = dst;
375
376         sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
377
378         if (0 && c->opts.verbose) {
379                 printbuf_reset(&buf);
380                 prt_printf(&buf, "%s: nodes after merging replicas:\n", __func__);
381                 found_btree_nodes_to_text(&buf, c, f->nodes);
382                 bch2_print_string_as_lines(KERN_INFO, buf.buf);
383         }
384
385         dst = 0;
386         darray_for_each(f->nodes, i) {
387                 if (i->overwritten)
388                         continue;
389
390                 ret = handle_overwrites(c, i, &darray_top(f->nodes));
391                 if (ret)
392                         goto err;
393
394                 BUG_ON(i->overwritten);
395                 f->nodes.data[dst++] = *i;
396         }
397         f->nodes.nr = dst;
398
399         if (c->opts.verbose) {
400                 printbuf_reset(&buf);
401                 prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__);
402                 found_btree_nodes_to_text(&buf, c, f->nodes);
403                 bch2_print_string_as_lines(KERN_INFO, buf.buf);
404         }
405
406         eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
407 err:
408         printbuf_exit(&buf);
409         return ret;
410 }
411
412 static int found_btree_node_range_start_cmp(const void *_l, const void *_r)
413 {
414         const struct found_btree_node *l = _l;
415         const struct found_btree_node *r = _r;
416
417         return  cmp_int(l->btree_id,    r->btree_id) ?:
418                -cmp_int(l->level,       r->level) ?:
419                 bpos_cmp(l->max_key,    r->min_key);
420 }
421
422 #define for_each_found_btree_node_in_range(_f, _search, _idx)                           \
423         for (size_t _idx = eytzinger0_find_gt((_f)->nodes.data, (_f)->nodes.nr,         \
424                                         sizeof((_f)->nodes.data[0]),                    \
425                                         found_btree_node_range_start_cmp, &search);     \
426              _idx < (_f)->nodes.nr &&                                                   \
427              (_f)->nodes.data[_idx].btree_id == _search.btree_id &&                     \
428              (_f)->nodes.data[_idx].level == _search.level &&                           \
429              bpos_lt((_f)->nodes.data[_idx].min_key, _search.max_key);                  \
430              _idx = eytzinger0_next(_idx, (_f)->nodes.nr))
431
432 bool bch2_btree_node_is_stale(struct bch_fs *c, struct btree *b)
433 {
434         struct find_btree_nodes *f = &c->found_btree_nodes;
435
436         struct found_btree_node search = {
437                 .btree_id       = b->c.btree_id,
438                 .level          = b->c.level,
439                 .min_key        = b->data->min_key,
440                 .max_key        = b->key.k.p,
441         };
442
443         for_each_found_btree_node_in_range(f, search, idx)
444                 if (f->nodes.data[idx].seq > BTREE_NODE_SEQ(b->data))
445                         return true;
446         return false;
447 }
448
449 bool bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree)
450 {
451         struct found_btree_node search = {
452                 .btree_id       = btree,
453                 .level          = 0,
454                 .min_key        = POS_MIN,
455                 .max_key        = SPOS_MAX,
456         };
457
458         for_each_found_btree_node_in_range(&c->found_btree_nodes, search, idx)
459                 return true;
460         return false;
461 }
462
463 int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
464                            unsigned level, struct bpos node_min, struct bpos node_max)
465 {
466         if (btree_id_is_alloc(btree))
467                 return 0;
468
469         struct find_btree_nodes *f = &c->found_btree_nodes;
470
471         int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
472         if (ret)
473                 return ret;
474
475         if (c->opts.verbose) {
476                 struct printbuf buf = PRINTBUF;
477
478                 prt_printf(&buf, "recovering %s l=%u ", bch2_btree_id_str(btree), level);
479                 bch2_bpos_to_text(&buf, node_min);
480                 prt_str(&buf, " - ");
481                 bch2_bpos_to_text(&buf, node_max);
482
483                 bch_info(c, "%s(): %s", __func__, buf.buf);
484                 printbuf_exit(&buf);
485         }
486
487         struct found_btree_node search = {
488                 .btree_id       = btree,
489                 .level          = level,
490                 .min_key        = node_min,
491                 .max_key        = node_max,
492         };
493
494         for_each_found_btree_node_in_range(f, search, idx) {
495                 struct found_btree_node n = f->nodes.data[idx];
496
497                 n.range_updated |= bpos_lt(n.min_key, node_min);
498                 n.min_key = bpos_max(n.min_key, node_min);
499
500                 n.range_updated |= bpos_gt(n.max_key, node_max);
501                 n.max_key = bpos_min(n.max_key, node_max);
502
503                 struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp;
504
505                 found_btree_node_to_key(&tmp.k, &n);
506
507                 struct printbuf buf = PRINTBUF;
508                 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k));
509                 bch_verbose(c, "%s(): recovering %s", __func__, buf.buf);
510                 printbuf_exit(&buf);
511
512                 BUG_ON(bch2_bkey_invalid(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0, NULL));
513
514                 ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k);
515                 if (ret)
516                         return ret;
517         }
518
519         return 0;
520 }
521
522 void bch2_find_btree_nodes_exit(struct find_btree_nodes *f)
523 {
524         darray_exit(&f->nodes);
525 }
This page took 0.063786 seconds and 4 git commands to generate.