]> Git Repo - J-linux.git/blob - fs/bcachefs/btree_gc.c
Merge tag 'vfs-6.13-rc7.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
[J-linux.git] / fs / bcachefs / btree_gc.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2010 Kent Overstreet <[email protected]>
4  * Copyright (C) 2014 Datera Inc.
5  */
6
7 #include "bcachefs.h"
8 #include "alloc_background.h"
9 #include "alloc_foreground.h"
10 #include "backpointers.h"
11 #include "bkey_methods.h"
12 #include "bkey_buf.h"
13 #include "btree_journal_iter.h"
14 #include "btree_key_cache.h"
15 #include "btree_locking.h"
16 #include "btree_node_scan.h"
17 #include "btree_update_interior.h"
18 #include "btree_io.h"
19 #include "btree_gc.h"
20 #include "buckets.h"
21 #include "clock.h"
22 #include "debug.h"
23 #include "disk_accounting.h"
24 #include "ec.h"
25 #include "error.h"
26 #include "extents.h"
27 #include "journal.h"
28 #include "keylist.h"
29 #include "move.h"
30 #include "recovery_passes.h"
31 #include "reflink.h"
32 #include "replicas.h"
33 #include "super-io.h"
34 #include "trace.h"
35
36 #include <linux/slab.h>
37 #include <linux/bitops.h>
38 #include <linux/freezer.h>
39 #include <linux/kthread.h>
40 #include <linux/preempt.h>
41 #include <linux/rcupdate.h>
42 #include <linux/sched/task.h>
43
44 #define DROP_THIS_NODE          10
45 #define DROP_PREV_NODE          11
46 #define DID_FILL_FROM_SCAN      12
47
48 static const char * const bch2_gc_phase_strs[] = {
49 #define x(n)    #n,
50         GC_PHASES()
51 #undef x
52         NULL
53 };
54
55 void bch2_gc_pos_to_text(struct printbuf *out, struct gc_pos *p)
56 {
57         prt_str(out, bch2_gc_phase_strs[p->phase]);
58         prt_char(out, ' ');
59         bch2_btree_id_to_text(out, p->btree);
60         prt_printf(out, " l=%u ", p->level);
61         bch2_bpos_to_text(out, p->pos);
62 }
63
64 static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k)
65 {
66         return (struct bkey_s) {{{
67                 (struct bkey *) k.k,
68                 (struct bch_val *) k.v
69         }}};
70 }
71
72 static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
73 {
74         preempt_disable();
75         write_seqcount_begin(&c->gc_pos_lock);
76         c->gc_pos = new_pos;
77         write_seqcount_end(&c->gc_pos_lock);
78         preempt_enable();
79 }
80
81 static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
82 {
83         BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) < 0);
84         __gc_pos_set(c, new_pos);
85 }
86
87 static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst)
88 {
89         switch (b->key.k.type) {
90         case KEY_TYPE_btree_ptr: {
91                 struct bkey_i_btree_ptr *src = bkey_i_to_btree_ptr(&b->key);
92
93                 dst->k.p                = src->k.p;
94                 dst->v.mem_ptr          = 0;
95                 dst->v.seq              = b->data->keys.seq;
96                 dst->v.sectors_written  = 0;
97                 dst->v.flags            = 0;
98                 dst->v.min_key          = b->data->min_key;
99                 set_bkey_val_bytes(&dst->k, sizeof(dst->v) + bkey_val_bytes(&src->k));
100                 memcpy(dst->v.start, src->v.start, bkey_val_bytes(&src->k));
101                 break;
102         }
103         case KEY_TYPE_btree_ptr_v2:
104                 bkey_copy(&dst->k_i, &b->key);
105                 break;
106         default:
107                 BUG();
108         }
109 }
110
111 static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min)
112 {
113         struct bkey_i_btree_ptr_v2 *new;
114         int ret;
115
116         if (c->opts.verbose) {
117                 struct printbuf buf = PRINTBUF;
118
119                 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
120                 prt_str(&buf, " -> ");
121                 bch2_bpos_to_text(&buf, new_min);
122
123                 bch_info(c, "%s(): %s", __func__, buf.buf);
124                 printbuf_exit(&buf);
125         }
126
127         new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL);
128         if (!new)
129                 return -BCH_ERR_ENOMEM_gc_repair_key;
130
131         btree_ptr_to_v2(b, new);
132         b->data->min_key        = new_min;
133         new->v.min_key          = new_min;
134         SET_BTREE_PTR_RANGE_UPDATED(&new->v, true);
135
136         ret = bch2_journal_key_insert_take(c, b->c.btree_id, b->c.level + 1, &new->k_i);
137         if (ret) {
138                 kfree(new);
139                 return ret;
140         }
141
142         bch2_btree_node_drop_keys_outside_node(b);
143         bkey_copy(&b->key, &new->k_i);
144         return 0;
145 }
146
147 static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
148 {
149         struct bkey_i_btree_ptr_v2 *new;
150         int ret;
151
152         if (c->opts.verbose) {
153                 struct printbuf buf = PRINTBUF;
154
155                 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
156                 prt_str(&buf, " -> ");
157                 bch2_bpos_to_text(&buf, new_max);
158
159                 bch_info(c, "%s(): %s", __func__, buf.buf);
160                 printbuf_exit(&buf);
161         }
162
163         ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p);
164         if (ret)
165                 return ret;
166
167         new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL);
168         if (!new)
169                 return -BCH_ERR_ENOMEM_gc_repair_key;
170
171         btree_ptr_to_v2(b, new);
172         b->data->max_key        = new_max;
173         new->k.p                = new_max;
174         SET_BTREE_PTR_RANGE_UPDATED(&new->v, true);
175
176         ret = bch2_journal_key_insert_take(c, b->c.btree_id, b->c.level + 1, &new->k_i);
177         if (ret) {
178                 kfree(new);
179                 return ret;
180         }
181
182         bch2_btree_node_drop_keys_outside_node(b);
183
184         mutex_lock(&c->btree_cache.lock);
185         __bch2_btree_node_hash_remove(&c->btree_cache, b);
186
187         bkey_copy(&b->key, &new->k_i);
188         ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
189         BUG_ON(ret);
190         mutex_unlock(&c->btree_cache.lock);
191         return 0;
192 }
193
194 static int btree_check_node_boundaries(struct btree_trans *trans, struct btree *b,
195                                        struct btree *prev, struct btree *cur,
196                                        struct bpos *pulled_from_scan)
197 {
198         struct bch_fs *c = trans->c;
199         struct bpos expected_start = !prev
200                 ? b->data->min_key
201                 : bpos_successor(prev->key.k.p);
202         struct printbuf buf = PRINTBUF;
203         int ret = 0;
204
205         BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
206                !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
207                         b->data->min_key));
208
209         if (bpos_eq(expected_start, cur->data->min_key))
210                 return 0;
211
212         prt_printf(&buf, "  at btree %s level %u:\n  parent: ",
213                    bch2_btree_id_str(b->c.btree_id), b->c.level);
214         bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
215
216         if (prev) {
217                 prt_printf(&buf, "\n  prev: ");
218                 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&prev->key));
219         }
220
221         prt_str(&buf, "\n  next: ");
222         bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&cur->key));
223
224         if (bpos_lt(expected_start, cur->data->min_key)) {                              /* gap */
225                 if (b->c.level == 1 &&
226                     bpos_lt(*pulled_from_scan, cur->data->min_key)) {
227                         ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0,
228                                                      expected_start,
229                                                      bpos_predecessor(cur->data->min_key));
230                         if (ret)
231                                 goto err;
232
233                         *pulled_from_scan = cur->data->min_key;
234                         ret = DID_FILL_FROM_SCAN;
235                 } else {
236                         if (mustfix_fsck_err(trans, btree_node_topology_bad_min_key,
237                                              "btree node with incorrect min_key%s", buf.buf))
238                                 ret = set_node_min(c, cur, expected_start);
239                 }
240         } else {                                                                        /* overlap */
241                 if (prev && BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) {   /* cur overwrites prev */
242                         if (bpos_ge(prev->data->min_key, cur->data->min_key)) {         /* fully? */
243                                 if (mustfix_fsck_err(trans, btree_node_topology_overwritten_by_next_node,
244                                                      "btree node overwritten by next node%s", buf.buf))
245                                         ret = DROP_PREV_NODE;
246                         } else {
247                                 if (mustfix_fsck_err(trans, btree_node_topology_bad_max_key,
248                                                      "btree node with incorrect max_key%s", buf.buf))
249                                         ret = set_node_max(c, prev,
250                                                            bpos_predecessor(cur->data->min_key));
251                         }
252                 } else {
253                         if (bpos_ge(expected_start, cur->data->max_key)) {              /* fully? */
254                                 if (mustfix_fsck_err(trans, btree_node_topology_overwritten_by_prev_node,
255                                                      "btree node overwritten by prev node%s", buf.buf))
256                                         ret = DROP_THIS_NODE;
257                         } else {
258                                 if (mustfix_fsck_err(trans, btree_node_topology_bad_min_key,
259                                                      "btree node with incorrect min_key%s", buf.buf))
260                                         ret = set_node_min(c, cur, expected_start);
261                         }
262                 }
263         }
264 err:
265 fsck_err:
266         printbuf_exit(&buf);
267         return ret;
268 }
269
270 static int btree_repair_node_end(struct btree_trans *trans, struct btree *b,
271                                  struct btree *child, struct bpos *pulled_from_scan)
272 {
273         struct bch_fs *c = trans->c;
274         struct printbuf buf = PRINTBUF;
275         int ret = 0;
276
277         if (bpos_eq(child->key.k.p, b->key.k.p))
278                 return 0;
279
280         prt_printf(&buf, "at btree %s level %u:\n  parent: ",
281                    bch2_btree_id_str(b->c.btree_id), b->c.level);
282         bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
283
284         prt_str(&buf, "\n  child: ");
285         bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&child->key));
286
287         if (mustfix_fsck_err(trans, btree_node_topology_bad_max_key,
288                              "btree node with incorrect max_key%s", buf.buf)) {
289                 if (b->c.level == 1 &&
290                     bpos_lt(*pulled_from_scan, b->key.k.p)) {
291                         ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0,
292                                                 bpos_successor(child->key.k.p), b->key.k.p);
293                         if (ret)
294                                 goto err;
295
296                         *pulled_from_scan = b->key.k.p;
297                         ret = DID_FILL_FROM_SCAN;
298                 } else {
299                         ret = set_node_max(c, child, b->key.k.p);
300                 }
301         }
302 err:
303 fsck_err:
304         printbuf_exit(&buf);
305         return ret;
306 }
307
308 static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b,
309                                               struct bpos *pulled_from_scan)
310 {
311         struct bch_fs *c = trans->c;
312         struct btree_and_journal_iter iter;
313         struct bkey_s_c k;
314         struct bkey_buf prev_k, cur_k;
315         struct btree *prev = NULL, *cur = NULL;
316         bool have_child, new_pass = false;
317         struct printbuf buf = PRINTBUF;
318         int ret = 0;
319
320         if (!b->c.level)
321                 return 0;
322
323         bch2_bkey_buf_init(&prev_k);
324         bch2_bkey_buf_init(&cur_k);
325 again:
326         cur = prev = NULL;
327         have_child = new_pass = false;
328         bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
329         iter.prefetch = true;
330
331         while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
332                 BUG_ON(bpos_lt(k.k->p, b->data->min_key));
333                 BUG_ON(bpos_gt(k.k->p, b->data->max_key));
334
335                 bch2_btree_and_journal_iter_advance(&iter);
336                 bch2_bkey_buf_reassemble(&cur_k, c, k);
337
338                 cur = bch2_btree_node_get_noiter(trans, cur_k.k,
339                                         b->c.btree_id, b->c.level - 1,
340                                         false);
341                 ret = PTR_ERR_OR_ZERO(cur);
342
343                 printbuf_reset(&buf);
344                 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k));
345
346                 if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO),
347                                 trans, btree_node_unreadable,
348                                 "Topology repair: unreadable btree node at btree %s level %u:\n"
349                                 "  %s",
350                                 bch2_btree_id_str(b->c.btree_id),
351                                 b->c.level - 1,
352                                 buf.buf)) {
353                         bch2_btree_node_evict(trans, cur_k.k);
354                         cur = NULL;
355                         ret = bch2_journal_key_delete(c, b->c.btree_id,
356                                                       b->c.level, cur_k.k->k.p);
357                         if (ret)
358                                 break;
359
360                         if (!btree_id_is_alloc(b->c.btree_id)) {
361                                 ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
362                                 if (ret)
363                                         break;
364                         }
365                         continue;
366                 }
367
368                 bch_err_msg(c, ret, "getting btree node");
369                 if (ret)
370                         break;
371
372                 if (bch2_btree_node_is_stale(c, cur)) {
373                         bch_info(c, "btree node %s older than nodes found by scanning", buf.buf);
374                         six_unlock_read(&cur->c.lock);
375                         bch2_btree_node_evict(trans, cur_k.k);
376                         ret = bch2_journal_key_delete(c, b->c.btree_id,
377                                                       b->c.level, cur_k.k->k.p);
378                         cur = NULL;
379                         if (ret)
380                                 break;
381                         continue;
382                 }
383
384                 ret = btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan);
385                 if (ret == DID_FILL_FROM_SCAN) {
386                         new_pass = true;
387                         ret = 0;
388                 }
389
390                 if (ret == DROP_THIS_NODE) {
391                         six_unlock_read(&cur->c.lock);
392                         bch2_btree_node_evict(trans, cur_k.k);
393                         ret = bch2_journal_key_delete(c, b->c.btree_id,
394                                                       b->c.level, cur_k.k->k.p);
395                         cur = NULL;
396                         if (ret)
397                                 break;
398                         continue;
399                 }
400
401                 if (prev)
402                         six_unlock_read(&prev->c.lock);
403                 prev = NULL;
404
405                 if (ret == DROP_PREV_NODE) {
406                         bch_info(c, "dropped prev node");
407                         bch2_btree_node_evict(trans, prev_k.k);
408                         ret = bch2_journal_key_delete(c, b->c.btree_id,
409                                                       b->c.level, prev_k.k->k.p);
410                         if (ret)
411                                 break;
412
413                         bch2_btree_and_journal_iter_exit(&iter);
414                         goto again;
415                 } else if (ret)
416                         break;
417
418                 prev = cur;
419                 cur = NULL;
420                 bch2_bkey_buf_copy(&prev_k, c, cur_k.k);
421         }
422
423         if (!ret && !IS_ERR_OR_NULL(prev)) {
424                 BUG_ON(cur);
425                 ret = btree_repair_node_end(trans, b, prev, pulled_from_scan);
426                 if (ret == DID_FILL_FROM_SCAN) {
427                         new_pass = true;
428                         ret = 0;
429                 }
430         }
431
432         if (!IS_ERR_OR_NULL(prev))
433                 six_unlock_read(&prev->c.lock);
434         prev = NULL;
435         if (!IS_ERR_OR_NULL(cur))
436                 six_unlock_read(&cur->c.lock);
437         cur = NULL;
438
439         if (ret)
440                 goto err;
441
442         bch2_btree_and_journal_iter_exit(&iter);
443
444         if (new_pass)
445                 goto again;
446
447         bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
448         iter.prefetch = true;
449
450         while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
451                 bch2_bkey_buf_reassemble(&cur_k, c, k);
452                 bch2_btree_and_journal_iter_advance(&iter);
453
454                 cur = bch2_btree_node_get_noiter(trans, cur_k.k,
455                                         b->c.btree_id, b->c.level - 1,
456                                         false);
457                 ret = PTR_ERR_OR_ZERO(cur);
458
459                 bch_err_msg(c, ret, "getting btree node");
460                 if (ret)
461                         goto err;
462
463                 ret = bch2_btree_repair_topology_recurse(trans, cur, pulled_from_scan);
464                 six_unlock_read(&cur->c.lock);
465                 cur = NULL;
466
467                 if (ret == DROP_THIS_NODE) {
468                         bch2_btree_node_evict(trans, cur_k.k);
469                         ret = bch2_journal_key_delete(c, b->c.btree_id,
470                                                       b->c.level, cur_k.k->k.p);
471                         new_pass = true;
472                 }
473
474                 if (ret)
475                         goto err;
476
477                 have_child = true;
478         }
479
480         printbuf_reset(&buf);
481         bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
482
483         if (mustfix_fsck_err_on(!have_child,
484                         trans, btree_node_topology_interior_node_empty,
485                         "empty interior btree node at btree %s level %u\n"
486                         "  %s",
487                         bch2_btree_id_str(b->c.btree_id),
488                         b->c.level, buf.buf))
489                 ret = DROP_THIS_NODE;
490 err:
491 fsck_err:
492         if (!IS_ERR_OR_NULL(prev))
493                 six_unlock_read(&prev->c.lock);
494         if (!IS_ERR_OR_NULL(cur))
495                 six_unlock_read(&cur->c.lock);
496
497         bch2_btree_and_journal_iter_exit(&iter);
498
499         if (!ret && new_pass)
500                 goto again;
501
502         BUG_ON(!ret && bch2_btree_node_check_topology(trans, b));
503
504         bch2_bkey_buf_exit(&prev_k, c);
505         bch2_bkey_buf_exit(&cur_k, c);
506         printbuf_exit(&buf);
507         return ret;
508 }
509
510 int bch2_check_topology(struct bch_fs *c)
511 {
512         struct btree_trans *trans = bch2_trans_get(c);
513         struct bpos pulled_from_scan = POS_MIN;
514         int ret = 0;
515
516         bch2_trans_srcu_unlock(trans);
517
518         for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
519                 struct btree_root *r = bch2_btree_id_root(c, i);
520                 bool reconstructed_root = false;
521
522                 if (r->error) {
523                         ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
524                         if (ret)
525                                 break;
526 reconstruct_root:
527                         bch_info(c, "btree root %s unreadable, must recover from scan", bch2_btree_id_str(i));
528
529                         r->alive = false;
530                         r->error = 0;
531
532                         if (!bch2_btree_has_scanned_nodes(c, i)) {
533                                 mustfix_fsck_err(trans, btree_root_unreadable_and_scan_found_nothing,
534                                                  "no nodes found for btree %s, continue?", bch2_btree_id_str(i));
535                                 bch2_btree_root_alloc_fake_trans(trans, i, 0);
536                         } else {
537                                 bch2_btree_root_alloc_fake_trans(trans, i, 1);
538                                 bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
539                                 ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX);
540                                 if (ret)
541                                         break;
542                         }
543
544                         reconstructed_root = true;
545                 }
546
547                 struct btree *b = r->b;
548
549                 btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
550                 ret = bch2_btree_repair_topology_recurse(trans, b, &pulled_from_scan);
551                 six_unlock_read(&b->c.lock);
552
553                 if (ret == DROP_THIS_NODE) {
554                         mutex_lock(&c->btree_cache.lock);
555                         bch2_btree_node_hash_remove(&c->btree_cache, b);
556                         mutex_unlock(&c->btree_cache.lock);
557
558                         r->b = NULL;
559
560                         if (!reconstructed_root)
561                                 goto reconstruct_root;
562
563                         bch_err(c, "empty btree root %s", bch2_btree_id_str(i));
564                         bch2_btree_root_alloc_fake_trans(trans, i, 0);
565                         r->alive = false;
566                         ret = 0;
567                 }
568         }
569 fsck_err:
570         bch2_trans_put(trans);
571         return ret;
572 }
573
574 /* marking of btree keys/nodes: */
575
576 static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
577                             unsigned level, struct btree **prev,
578                             struct btree_iter *iter, struct bkey_s_c k,
579                             bool initial)
580 {
581         struct bch_fs *c = trans->c;
582
583         if (iter) {
584                 struct btree_path *path = btree_iter_path(trans, iter);
585                 struct btree *b = path_l(path)->b;
586
587                 if (*prev != b) {
588                         int ret = bch2_btree_node_check_topology(trans, b);
589                         if (ret)
590                                 return ret;
591                 }
592                 *prev = b;
593         }
594
595         struct bkey deleted = KEY(0, 0, 0);
596         struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
597         struct printbuf buf = PRINTBUF;
598         int ret = 0;
599
600         deleted.p = k.k->p;
601
602         if (initial) {
603                 BUG_ON(bch2_journal_seq_verify &&
604                        k.k->bversion.lo > atomic64_read(&c->journal.seq));
605
606                 if (fsck_err_on(btree_id != BTREE_ID_accounting &&
607                                 k.k->bversion.lo > atomic64_read(&c->key_version),
608                                 trans, bkey_version_in_future,
609                                 "key version number higher than recorded %llu\n  %s",
610                                 atomic64_read(&c->key_version),
611                                 (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
612                         atomic64_set(&c->key_version, k.k->bversion.lo);
613         }
614
615         if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k),
616                                 trans, btree_bitmap_not_marked,
617                                 "btree ptr not marked in member info btree allocated bitmap\n  %s",
618                                 (printbuf_reset(&buf),
619                                  bch2_bkey_val_to_text(&buf, c, k),
620                                  buf.buf))) {
621                 mutex_lock(&c->sb_lock);
622                 bch2_dev_btree_bitmap_mark(c, k);
623                 bch2_write_super(c);
624                 mutex_unlock(&c->sb_lock);
625         }
626
627         /*
628          * We require a commit before key_trigger() because
629          * key_trigger(BTREE_TRIGGER_GC) is not idempotant; we'll calculate the
630          * wrong result if we run it multiple times.
631          */
632         unsigned flags = !iter ? BTREE_TRIGGER_is_root : 0;
633
634         ret = bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(k),
635                                BTREE_TRIGGER_check_repair|flags);
636         if (ret)
637                 goto out;
638
639         if (trans->nr_updates) {
640                 ret = bch2_trans_commit(trans, NULL, NULL, 0) ?:
641                         -BCH_ERR_transaction_restart_nested;
642                 goto out;
643         }
644
645         ret = bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(k),
646                                BTREE_TRIGGER_gc|BTREE_TRIGGER_insert|flags);
647 out:
648 fsck_err:
649         printbuf_exit(&buf);
650         bch_err_fn(c, ret);
651         return ret;
652 }
653
654 static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool initial)
655 {
656         struct bch_fs *c = trans->c;
657         unsigned target_depth = btree_node_type_has_triggers(__btree_node_type(0, btree)) ? 0 : 1;
658         int ret = 0;
659
660         /* We need to make sure every leaf node is readable before going RW */
661         if (initial)
662                 target_depth = 0;
663
664         for (unsigned level = target_depth; level < BTREE_MAX_DEPTH; level++) {
665                 struct btree *prev = NULL;
666                 struct btree_iter iter;
667                 bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN, 0, level,
668                                           BTREE_ITER_prefetch);
669
670                 ret = for_each_btree_key_continue(trans, iter, 0, k, ({
671                         gc_pos_set(c, gc_pos_btree(btree, level, k.k->p));
672                         bch2_gc_mark_key(trans, btree, level, &prev, &iter, k, initial);
673                 }));
674                 if (ret)
675                         goto err;
676         }
677
678         /* root */
679         do {
680 retry_root:
681                 bch2_trans_begin(trans);
682
683                 struct btree_iter iter;
684                 bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN,
685                                           0, bch2_btree_id_root(c, btree)->b->c.level, 0);
686                 struct btree *b = bch2_btree_iter_peek_node(&iter);
687                 ret = PTR_ERR_OR_ZERO(b);
688                 if (ret)
689                         goto err_root;
690
691                 if (b != btree_node_root(c, b)) {
692                         bch2_trans_iter_exit(trans, &iter);
693                         goto retry_root;
694                 }
695
696                 gc_pos_set(c, gc_pos_btree(btree, b->c.level + 1, SPOS_MAX));
697                 struct bkey_s_c k = bkey_i_to_s_c(&b->key);
698                 ret = bch2_gc_mark_key(trans, btree, b->c.level + 1, NULL, NULL, k, initial);
699 err_root:
700                 bch2_trans_iter_exit(trans, &iter);
701         } while (bch2_err_matches(ret, BCH_ERR_transaction_restart));
702 err:
703         bch_err_fn(c, ret);
704         return ret;
705 }
706
707 static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
708 {
709         return cmp_int(gc_btree_order(l), gc_btree_order(r));
710 }
711
712 static int bch2_gc_btrees(struct bch_fs *c)
713 {
714         struct btree_trans *trans = bch2_trans_get(c);
715         enum btree_id ids[BTREE_ID_NR];
716         unsigned i;
717         int ret = 0;
718
719         for (i = 0; i < BTREE_ID_NR; i++)
720                 ids[i] = i;
721         bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp);
722
723         for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
724                 unsigned btree = i < BTREE_ID_NR ? ids[i] : i;
725
726                 if (IS_ERR_OR_NULL(bch2_btree_id_root(c, btree)->b))
727                         continue;
728
729                 ret = bch2_gc_btree(trans, btree, true);
730
731                 if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO),
732                                         trans, btree_node_read_error,
733                                "btree node read error for %s",
734                                bch2_btree_id_str(btree)))
735                         ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
736         }
737 fsck_err:
738         bch2_trans_put(trans);
739         bch_err_fn(c, ret);
740         return ret;
741 }
742
743 static int bch2_mark_superblocks(struct bch_fs *c)
744 {
745         gc_pos_set(c, gc_phase(GC_PHASE_sb));
746
747         return bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc);
748 }
749
750 static void bch2_gc_free(struct bch_fs *c)
751 {
752         bch2_accounting_gc_free(c);
753
754         genradix_free(&c->reflink_gc_table);
755         genradix_free(&c->gc_stripes);
756
757         for_each_member_device(c, ca)
758                 genradix_free(&ca->buckets_gc);
759 }
760
761 static int bch2_gc_start(struct bch_fs *c)
762 {
763         for_each_member_device(c, ca) {
764                 int ret = bch2_dev_usage_init(ca, true);
765                 if (ret) {
766                         bch2_dev_put(ca);
767                         return ret;
768                 }
769         }
770
771         return 0;
772 }
773
774 /* returns true if not equal */
775 static inline bool bch2_alloc_v4_cmp(struct bch_alloc_v4 l,
776                                      struct bch_alloc_v4 r)
777 {
778         return  l.gen != r.gen                          ||
779                 l.oldest_gen != r.oldest_gen            ||
780                 l.data_type != r.data_type              ||
781                 l.dirty_sectors != r.dirty_sectors      ||
782                 l.stripe_sectors != r.stripe_sectors    ||
783                 l.cached_sectors != r.cached_sectors     ||
784                 l.stripe_redundancy != r.stripe_redundancy ||
785                 l.stripe != r.stripe;
786 }
787
788 static int bch2_alloc_write_key(struct btree_trans *trans,
789                                 struct btree_iter *iter,
790                                 struct bch_dev *ca,
791                                 struct bkey_s_c k)
792 {
793         struct bch_fs *c = trans->c;
794         struct bkey_i_alloc_v4 *a;
795         struct bch_alloc_v4 old_gc, gc, old_convert, new;
796         const struct bch_alloc_v4 *old;
797         int ret;
798
799         if (!bucket_valid(ca, k.k->p.offset))
800                 return 0;
801
802         old = bch2_alloc_to_v4(k, &old_convert);
803         gc = new = *old;
804
805         percpu_down_read(&c->mark_lock);
806         __bucket_m_to_alloc(&gc, *gc_bucket(ca, iter->pos.offset));
807
808         old_gc = gc;
809
810         if ((old->data_type == BCH_DATA_sb ||
811              old->data_type == BCH_DATA_journal) &&
812             !bch2_dev_is_online(ca)) {
813                 gc.data_type = old->data_type;
814                 gc.dirty_sectors = old->dirty_sectors;
815         }
816         percpu_up_read(&c->mark_lock);
817
818         /*
819          * gc.data_type doesn't yet include need_discard & need_gc_gen states -
820          * fix that here:
821          */
822         alloc_data_type_set(&gc, gc.data_type);
823         if (gc.data_type != old_gc.data_type ||
824             gc.dirty_sectors != old_gc.dirty_sectors) {
825                 ret = bch2_alloc_key_to_dev_counters(trans, ca, &old_gc, &gc, BTREE_TRIGGER_gc);
826                 if (ret)
827                         return ret;
828
829                 /*
830                  * Ugly: alloc_key_to_dev_counters(..., BTREE_TRIGGER_gc) is not
831                  * safe w.r.t. transaction restarts, so fixup the gc_bucket so
832                  * we don't run it twice:
833                  */
834                 percpu_down_read(&c->mark_lock);
835                 struct bucket *gc_m = gc_bucket(ca, iter->pos.offset);
836                 gc_m->data_type = gc.data_type;
837                 gc_m->dirty_sectors = gc.dirty_sectors;
838                 percpu_up_read(&c->mark_lock);
839         }
840
841         if (fsck_err_on(new.data_type != gc.data_type,
842                         trans, alloc_key_data_type_wrong,
843                         "bucket %llu:%llu gen %u has wrong data_type"
844                         ": got %s, should be %s",
845                         iter->pos.inode, iter->pos.offset,
846                         gc.gen,
847                         bch2_data_type_str(new.data_type),
848                         bch2_data_type_str(gc.data_type)))
849                 new.data_type = gc.data_type;
850
851 #define copy_bucket_field(_errtype, _f)                                 \
852         if (fsck_err_on(new._f != gc._f,                                \
853                         trans, _errtype,                                \
854                         "bucket %llu:%llu gen %u data type %s has wrong " #_f   \
855                         ": got %llu, should be %llu",                   \
856                         iter->pos.inode, iter->pos.offset,              \
857                         gc.gen,                                         \
858                         bch2_data_type_str(gc.data_type),               \
859                         (u64) new._f, (u64) gc._f))                             \
860                 new._f = gc._f;                                         \
861
862         copy_bucket_field(alloc_key_gen_wrong,                  gen);
863         copy_bucket_field(alloc_key_dirty_sectors_wrong,        dirty_sectors);
864         copy_bucket_field(alloc_key_stripe_sectors_wrong,       stripe_sectors);
865         copy_bucket_field(alloc_key_cached_sectors_wrong,       cached_sectors);
866         copy_bucket_field(alloc_key_stripe_wrong,               stripe);
867         copy_bucket_field(alloc_key_stripe_redundancy_wrong,    stripe_redundancy);
868 #undef copy_bucket_field
869
870         if (!bch2_alloc_v4_cmp(*old, new))
871                 return 0;
872
873         a = bch2_alloc_to_v4_mut(trans, k);
874         ret = PTR_ERR_OR_ZERO(a);
875         if (ret)
876                 return ret;
877
878         a->v = new;
879
880         /*
881          * The trigger normally makes sure these are set, but we're not running
882          * triggers:
883          */
884         if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ])
885                 a->v.io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
886
887         ret = bch2_trans_update(trans, iter, &a->k_i, BTREE_TRIGGER_norun);
888 fsck_err:
889         return ret;
890 }
891
892 static int bch2_gc_alloc_done(struct bch_fs *c)
893 {
894         int ret = 0;
895
896         for_each_member_device(c, ca) {
897                 ret = bch2_trans_run(c,
898                         for_each_btree_key_upto_commit(trans, iter, BTREE_ID_alloc,
899                                         POS(ca->dev_idx, ca->mi.first_bucket),
900                                         POS(ca->dev_idx, ca->mi.nbuckets - 1),
901                                         BTREE_ITER_slots|BTREE_ITER_prefetch, k,
902                                         NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
903                                 bch2_alloc_write_key(trans, &iter, ca, k)));
904                 if (ret) {
905                         bch2_dev_put(ca);
906                         break;
907                 }
908         }
909
910         bch_err_fn(c, ret);
911         return ret;
912 }
913
914 static int bch2_gc_alloc_start(struct bch_fs *c)
915 {
916         int ret = 0;
917
918         for_each_member_device(c, ca) {
919                 ret = genradix_prealloc(&ca->buckets_gc, ca->mi.nbuckets, GFP_KERNEL);
920                 if (ret) {
921                         bch2_dev_put(ca);
922                         ret = -BCH_ERR_ENOMEM_gc_alloc_start;
923                         break;
924                 }
925         }
926
927         bch_err_fn(c, ret);
928         return ret;
929 }
930
931 static int bch2_gc_write_reflink_key(struct btree_trans *trans,
932                                      struct btree_iter *iter,
933                                      struct bkey_s_c k,
934                                      size_t *idx)
935 {
936         struct bch_fs *c = trans->c;
937         const __le64 *refcount = bkey_refcount_c(k);
938         struct printbuf buf = PRINTBUF;
939         struct reflink_gc *r;
940         int ret = 0;
941
942         if (!refcount)
943                 return 0;
944
945         while ((r = genradix_ptr(&c->reflink_gc_table, *idx)) &&
946                r->offset < k.k->p.offset)
947                 ++*idx;
948
949         if (!r ||
950             r->offset != k.k->p.offset ||
951             r->size != k.k->size) {
952                 bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
953                 return -EINVAL;
954         }
955
956         if (fsck_err_on(r->refcount != le64_to_cpu(*refcount),
957                         trans, reflink_v_refcount_wrong,
958                         "reflink key has wrong refcount:\n"
959                         "  %s\n"
960                         "  should be %u",
961                         (bch2_bkey_val_to_text(&buf, c, k), buf.buf),
962                         r->refcount)) {
963                 struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
964                 ret = PTR_ERR_OR_ZERO(new);
965                 if (ret)
966                         goto out;
967
968                 if (!r->refcount)
969                         new->k.type = KEY_TYPE_deleted;
970                 else
971                         *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount);
972                 ret = bch2_trans_update(trans, iter, new, 0);
973         }
974 out:
975 fsck_err:
976         printbuf_exit(&buf);
977         return ret;
978 }
979
980 static int bch2_gc_reflink_done(struct bch_fs *c)
981 {
982         size_t idx = 0;
983
984         int ret = bch2_trans_run(c,
985                 for_each_btree_key_commit(trans, iter,
986                                 BTREE_ID_reflink, POS_MIN,
987                                 BTREE_ITER_prefetch, k,
988                                 NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
989                         bch2_gc_write_reflink_key(trans, &iter, k, &idx)));
990         c->reflink_gc_nr = 0;
991         return ret;
992 }
993
994 static int bch2_gc_reflink_start(struct bch_fs *c)
995 {
996         c->reflink_gc_nr = 0;
997
998         int ret = bch2_trans_run(c,
999                 for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN,
1000                                    BTREE_ITER_prefetch, k, ({
1001                         const __le64 *refcount = bkey_refcount_c(k);
1002
1003                         if (!refcount)
1004                                 continue;
1005
1006                         struct reflink_gc *r = genradix_ptr_alloc(&c->reflink_gc_table,
1007                                                         c->reflink_gc_nr++, GFP_KERNEL);
1008                         if (!r) {
1009                                 ret = -BCH_ERR_ENOMEM_gc_reflink_start;
1010                                 break;
1011                         }
1012
1013                         r->offset       = k.k->p.offset;
1014                         r->size         = k.k->size;
1015                         r->refcount     = 0;
1016                         0;
1017                 })));
1018
1019         bch_err_fn(c, ret);
1020         return ret;
1021 }
1022
1023 static int bch2_gc_write_stripes_key(struct btree_trans *trans,
1024                                      struct btree_iter *iter,
1025                                      struct bkey_s_c k)
1026 {
1027         struct bch_fs *c = trans->c;
1028         struct printbuf buf = PRINTBUF;
1029         const struct bch_stripe *s;
1030         struct gc_stripe *m;
1031         bool bad = false;
1032         unsigned i;
1033         int ret = 0;
1034
1035         if (k.k->type != KEY_TYPE_stripe)
1036                 return 0;
1037
1038         s = bkey_s_c_to_stripe(k).v;
1039         m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
1040
1041         for (i = 0; i < s->nr_blocks; i++) {
1042                 u32 old = stripe_blockcount_get(s, i);
1043                 u32 new = (m ? m->block_sectors[i] : 0);
1044
1045                 if (old != new) {
1046                         prt_printf(&buf, "stripe block %u has wrong sector count: got %u, should be %u\n",
1047                                    i, old, new);
1048                         bad = true;
1049                 }
1050         }
1051
1052         if (bad)
1053                 bch2_bkey_val_to_text(&buf, c, k);
1054
1055         if (fsck_err_on(bad,
1056                         trans, stripe_sector_count_wrong,
1057                         "%s", buf.buf)) {
1058                 struct bkey_i_stripe *new;
1059
1060                 new = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
1061                 ret = PTR_ERR_OR_ZERO(new);
1062                 if (ret)
1063                         return ret;
1064
1065                 bkey_reassemble(&new->k_i, k);
1066
1067                 for (i = 0; i < new->v.nr_blocks; i++)
1068                         stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
1069
1070                 ret = bch2_trans_update(trans, iter, &new->k_i, 0);
1071         }
1072 fsck_err:
1073         printbuf_exit(&buf);
1074         return ret;
1075 }
1076
1077 static int bch2_gc_stripes_done(struct bch_fs *c)
1078 {
1079         return bch2_trans_run(c,
1080                 for_each_btree_key_commit(trans, iter,
1081                                 BTREE_ID_stripes, POS_MIN,
1082                                 BTREE_ITER_prefetch, k,
1083                                 NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
1084                         bch2_gc_write_stripes_key(trans, &iter, k)));
1085 }
1086
1087 /**
1088  * bch2_check_allocations - walk all references to buckets, and recompute them:
1089  *
1090  * @c:                  filesystem object
1091  *
1092  * Returns: 0 on success, or standard errcode on failure
1093  *
1094  * Order matters here:
1095  *  - Concurrent GC relies on the fact that we have a total ordering for
1096  *    everything that GC walks - see  gc_will_visit_node(),
1097  *    gc_will_visit_root()
1098  *
1099  *  - also, references move around in the course of index updates and
1100  *    various other crap: everything needs to agree on the ordering
1101  *    references are allowed to move around in - e.g., we're allowed to
1102  *    start with a reference owned by an open_bucket (the allocator) and
1103  *    move it to the btree, but not the reverse.
1104  *
1105  *    This is necessary to ensure that gc doesn't miss references that
1106  *    move around - if references move backwards in the ordering GC
1107  *    uses, GC could skip past them
1108  */
1109 int bch2_check_allocations(struct bch_fs *c)
1110 {
1111         int ret;
1112
1113         lockdep_assert_held(&c->state_lock);
1114
1115         down_write(&c->gc_lock);
1116
1117         bch2_btree_interior_updates_flush(c);
1118
1119         ret   = bch2_gc_accounting_start(c) ?:
1120                 bch2_gc_start(c) ?:
1121                 bch2_gc_alloc_start(c) ?:
1122                 bch2_gc_reflink_start(c);
1123         if (ret)
1124                 goto out;
1125
1126         gc_pos_set(c, gc_phase(GC_PHASE_start));
1127
1128         ret = bch2_mark_superblocks(c);
1129         bch_err_msg(c, ret, "marking superblocks");
1130         if (ret)
1131                 goto out;
1132
1133         ret = bch2_gc_btrees(c);
1134         if (ret)
1135                 goto out;
1136
1137         c->gc_count++;
1138
1139         ret   = bch2_gc_alloc_done(c) ?:
1140                 bch2_gc_accounting_done(c) ?:
1141                 bch2_gc_stripes_done(c) ?:
1142                 bch2_gc_reflink_done(c);
1143 out:
1144         percpu_down_write(&c->mark_lock);
1145         /* Indicates that gc is no longer in progress: */
1146         __gc_pos_set(c, gc_phase(GC_PHASE_not_running));
1147
1148         bch2_gc_free(c);
1149         percpu_up_write(&c->mark_lock);
1150
1151         up_write(&c->gc_lock);
1152
1153         /*
1154          * At startup, allocations can happen directly instead of via the
1155          * allocator thread - issue wakeup in case they blocked on gc_lock:
1156          */
1157         closure_wake_up(&c->freelist_wait);
1158         bch_err_fn(c, ret);
1159         return ret;
1160 }
1161
1162 static int gc_btree_gens_key(struct btree_trans *trans,
1163                              struct btree_iter *iter,
1164                              struct bkey_s_c k)
1165 {
1166         struct bch_fs *c = trans->c;
1167         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
1168         struct bkey_i *u;
1169         int ret;
1170
1171         if (unlikely(test_bit(BCH_FS_going_ro, &c->flags)))
1172                 return -EROFS;
1173
1174         percpu_down_read(&c->mark_lock);
1175         rcu_read_lock();
1176         bkey_for_each_ptr(ptrs, ptr) {
1177                 struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
1178                 if (!ca)
1179                         continue;
1180
1181                 if (dev_ptr_stale(ca, ptr) > 16) {
1182                         rcu_read_unlock();
1183                         percpu_up_read(&c->mark_lock);
1184                         goto update;
1185                 }
1186         }
1187
1188         bkey_for_each_ptr(ptrs, ptr) {
1189                 struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
1190                 if (!ca)
1191                         continue;
1192
1193                 u8 *gen = &ca->oldest_gen[PTR_BUCKET_NR(ca, ptr)];
1194                 if (gen_after(*gen, ptr->gen))
1195                         *gen = ptr->gen;
1196         }
1197         rcu_read_unlock();
1198         percpu_up_read(&c->mark_lock);
1199         return 0;
1200 update:
1201         u = bch2_bkey_make_mut(trans, iter, &k, 0);
1202         ret = PTR_ERR_OR_ZERO(u);
1203         if (ret)
1204                 return ret;
1205
1206         bch2_extent_normalize(c, bkey_i_to_s(u));
1207         return 0;
1208 }
1209
1210 static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct bch_dev *ca,
1211                                        struct btree_iter *iter, struct bkey_s_c k)
1212 {
1213         struct bch_alloc_v4 a_convert;
1214         const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert);
1215         struct bkey_i_alloc_v4 *a_mut;
1216         int ret;
1217
1218         if (a->oldest_gen == ca->oldest_gen[iter->pos.offset])
1219                 return 0;
1220
1221         a_mut = bch2_alloc_to_v4_mut(trans, k);
1222         ret = PTR_ERR_OR_ZERO(a_mut);
1223         if (ret)
1224                 return ret;
1225
1226         a_mut->v.oldest_gen = ca->oldest_gen[iter->pos.offset];
1227         alloc_data_type_set(&a_mut->v, a_mut->v.data_type);
1228
1229         return bch2_trans_update(trans, iter, &a_mut->k_i, 0);
1230 }
1231
1232 int bch2_gc_gens(struct bch_fs *c)
1233 {
1234         u64 b, start_time = local_clock();
1235         int ret;
1236
1237         if (!mutex_trylock(&c->gc_gens_lock))
1238                 return 0;
1239
1240         trace_and_count(c, gc_gens_start, c);
1241
1242         /*
1243          * We have to use trylock here. Otherwise, we would
1244          * introduce a deadlock in the RO path - we take the
1245          * state lock at the start of going RO.
1246          */
1247         if (!down_read_trylock(&c->state_lock)) {
1248                 mutex_unlock(&c->gc_gens_lock);
1249                 return 0;
1250         }
1251
1252         for_each_member_device(c, ca) {
1253                 struct bucket_gens *gens = bucket_gens(ca);
1254
1255                 BUG_ON(ca->oldest_gen);
1256
1257                 ca->oldest_gen = kvmalloc(gens->nbuckets, GFP_KERNEL);
1258                 if (!ca->oldest_gen) {
1259                         bch2_dev_put(ca);
1260                         ret = -BCH_ERR_ENOMEM_gc_gens;
1261                         goto err;
1262                 }
1263
1264                 for (b = gens->first_bucket;
1265                      b < gens->nbuckets; b++)
1266                         ca->oldest_gen[b] = gens->b[b];
1267         }
1268
1269         for (unsigned i = 0; i < BTREE_ID_NR; i++)
1270                 if (btree_type_has_ptrs(i)) {
1271                         c->gc_gens_btree = i;
1272                         c->gc_gens_pos = POS_MIN;
1273
1274                         ret = bch2_trans_run(c,
1275                                 for_each_btree_key_commit(trans, iter, i,
1276                                                 POS_MIN,
1277                                                 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots,
1278                                                 k,
1279                                                 NULL, NULL,
1280                                                 BCH_TRANS_COMMIT_no_enospc,
1281                                         gc_btree_gens_key(trans, &iter, k)));
1282                         if (ret)
1283                                 goto err;
1284                 }
1285
1286         struct bch_dev *ca = NULL;
1287         ret = bch2_trans_run(c,
1288                 for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
1289                                 POS_MIN,
1290                                 BTREE_ITER_prefetch,
1291                                 k,
1292                                 NULL, NULL,
1293                                 BCH_TRANS_COMMIT_no_enospc, ({
1294                         ca = bch2_dev_iterate(c, ca, k.k->p.inode);
1295                         if (!ca) {
1296                                 bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0));
1297                                 continue;
1298                         }
1299                         bch2_alloc_write_oldest_gen(trans, ca, &iter, k);
1300                 })));
1301         bch2_dev_put(ca);
1302
1303         if (ret)
1304                 goto err;
1305
1306         c->gc_gens_btree        = 0;
1307         c->gc_gens_pos          = POS_MIN;
1308
1309         c->gc_count++;
1310
1311         bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
1312         trace_and_count(c, gc_gens_end, c);
1313 err:
1314         for_each_member_device(c, ca) {
1315                 kvfree(ca->oldest_gen);
1316                 ca->oldest_gen = NULL;
1317         }
1318
1319         up_read(&c->state_lock);
1320         mutex_unlock(&c->gc_gens_lock);
1321         if (!bch2_err_matches(ret, EROFS))
1322                 bch_err_fn(c, ret);
1323         return ret;
1324 }
1325
1326 static void bch2_gc_gens_work(struct work_struct *work)
1327 {
1328         struct bch_fs *c = container_of(work, struct bch_fs, gc_gens_work);
1329         bch2_gc_gens(c);
1330         bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens);
1331 }
1332
1333 void bch2_gc_gens_async(struct bch_fs *c)
1334 {
1335         if (bch2_write_ref_tryget(c, BCH_WRITE_REF_gc_gens) &&
1336             !queue_work(c->write_ref_wq, &c->gc_gens_work))
1337                 bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens);
1338 }
1339
1340 void bch2_fs_gc_init(struct bch_fs *c)
1341 {
1342         seqcount_init(&c->gc_pos_lock);
1343
1344         INIT_WORK(&c->gc_gens_work, bch2_gc_gens_work);
1345 }
This page took 0.108189 seconds and 4 git commands to generate.