]> Git Repo - linux.git/blob - fs/bcachefs/buckets.c
Linux 6.14-rc3
[linux.git] / fs / bcachefs / buckets.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Code for manipulating bucket marks for garbage collection.
4  *
5  * Copyright 2014 Datera, Inc.
6  */
7
8 #include "bcachefs.h"
9 #include "alloc_background.h"
10 #include "backpointers.h"
11 #include "bset.h"
12 #include "btree_gc.h"
13 #include "btree_update.h"
14 #include "buckets.h"
15 #include "buckets_waiting_for_journal.h"
16 #include "disk_accounting.h"
17 #include "ec.h"
18 #include "error.h"
19 #include "inode.h"
20 #include "movinggc.h"
21 #include "rebalance.h"
22 #include "recovery.h"
23 #include "recovery_passes.h"
24 #include "reflink.h"
25 #include "replicas.h"
26 #include "subvolume.h"
27 #include "trace.h"
28
29 #include <linux/preempt.h>
30
31 void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage)
32 {
33         memset(usage, 0, sizeof(*usage));
34         acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, dev_usage_u64s());
35 }
36
37 static u64 reserve_factor(u64 r)
38 {
39         return r + (round_up(r, (1 << RESERVE_FACTOR)) >> RESERVE_FACTOR);
40 }
41
42 static struct bch_fs_usage_short
43 __bch2_fs_usage_read_short(struct bch_fs *c)
44 {
45         struct bch_fs_usage_short ret;
46         u64 data, reserved;
47
48         ret.capacity = c->capacity -
49                 percpu_u64_get(&c->usage->hidden);
50
51         data            = percpu_u64_get(&c->usage->data) +
52                 percpu_u64_get(&c->usage->btree);
53         reserved        = percpu_u64_get(&c->usage->reserved) +
54                 percpu_u64_get(c->online_reserved);
55
56         ret.used        = min(ret.capacity, data + reserve_factor(reserved));
57         ret.free        = ret.capacity - ret.used;
58
59         ret.nr_inodes   = percpu_u64_get(&c->usage->nr_inodes);
60
61         return ret;
62 }
63
64 struct bch_fs_usage_short
65 bch2_fs_usage_read_short(struct bch_fs *c)
66 {
67         struct bch_fs_usage_short ret;
68
69         percpu_down_read(&c->mark_lock);
70         ret = __bch2_fs_usage_read_short(c);
71         percpu_up_read(&c->mark_lock);
72
73         return ret;
74 }
75
76 void bch2_dev_usage_to_text(struct printbuf *out,
77                             struct bch_dev *ca,
78                             struct bch_dev_usage *usage)
79 {
80         if (out->nr_tabstops < 5) {
81                 printbuf_tabstops_reset(out);
82                 printbuf_tabstop_push(out, 12);
83                 printbuf_tabstop_push(out, 16);
84                 printbuf_tabstop_push(out, 16);
85                 printbuf_tabstop_push(out, 16);
86                 printbuf_tabstop_push(out, 16);
87         }
88
89         prt_printf(out, "\tbuckets\rsectors\rfragmented\r\n");
90
91         for (unsigned i = 0; i < BCH_DATA_NR; i++) {
92                 bch2_prt_data_type(out, i);
93                 prt_printf(out, "\t%llu\r%llu\r%llu\r\n",
94                            usage->d[i].buckets,
95                            usage->d[i].sectors,
96                            usage->d[i].fragmented);
97         }
98
99         prt_printf(out, "capacity\t%llu\r\n", ca->mi.nbuckets);
100 }
101
102 static int bch2_check_fix_ptr(struct btree_trans *trans,
103                               struct bkey_s_c k,
104                               struct extent_ptr_decoded p,
105                               const union bch_extent_entry *entry,
106                               bool *do_update)
107 {
108         struct bch_fs *c = trans->c;
109         struct printbuf buf = PRINTBUF;
110         int ret = 0;
111
112         struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev);
113         if (!ca) {
114                 if (fsck_err_on(p.ptr.dev != BCH_SB_MEMBER_INVALID,
115                                 trans, ptr_to_invalid_device,
116                                 "pointer to missing device %u\n"
117                                 "while marking %s",
118                                 p.ptr.dev,
119                                 (printbuf_reset(&buf),
120                                  bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
121                         *do_update = true;
122                 return 0;
123         }
124
125         struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
126         if (!g) {
127                 if (fsck_err(trans, ptr_to_invalid_device,
128                              "pointer to invalid bucket on device %u\n"
129                              "while marking %s",
130                              p.ptr.dev,
131                              (printbuf_reset(&buf),
132                               bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
133                         *do_update = true;
134                 goto out;
135         }
136
137         enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry);
138
139         if (fsck_err_on(!g->gen_valid,
140                         trans, ptr_to_missing_alloc_key,
141                         "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n"
142                         "while marking %s",
143                         p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
144                         bch2_data_type_str(ptr_data_type(k.k, &p.ptr)),
145                         p.ptr.gen,
146                         (printbuf_reset(&buf),
147                          bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
148                 if (!p.ptr.cached) {
149                         g->gen_valid            = true;
150                         g->gen                  = p.ptr.gen;
151                 } else {
152                         *do_update = true;
153                 }
154         }
155
156         if (fsck_err_on(gen_cmp(p.ptr.gen, g->gen) > 0,
157                         trans, ptr_gen_newer_than_bucket_gen,
158                         "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
159                         "while marking %s",
160                         p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
161                         bch2_data_type_str(ptr_data_type(k.k, &p.ptr)),
162                         p.ptr.gen, g->gen,
163                         (printbuf_reset(&buf),
164                          bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
165                 if (!p.ptr.cached &&
166                     (g->data_type != BCH_DATA_btree ||
167                      data_type == BCH_DATA_btree)) {
168                         g->gen_valid            = true;
169                         g->gen                  = p.ptr.gen;
170                         g->data_type            = 0;
171                         g->stripe_sectors       = 0;
172                         g->dirty_sectors        = 0;
173                         g->cached_sectors       = 0;
174                 } else {
175                         *do_update = true;
176                 }
177         }
178
179         if (fsck_err_on(gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX,
180                         trans, ptr_gen_newer_than_bucket_gen,
181                         "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
182                         "while marking %s",
183                         p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen,
184                         bch2_data_type_str(ptr_data_type(k.k, &p.ptr)),
185                         p.ptr.gen,
186                         (printbuf_reset(&buf),
187                          bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
188                 *do_update = true;
189
190         if (fsck_err_on(!p.ptr.cached && gen_cmp(p.ptr.gen, g->gen) < 0,
191                         trans, stale_dirty_ptr,
192                         "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n"
193                         "while marking %s",
194                         p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
195                         bch2_data_type_str(ptr_data_type(k.k, &p.ptr)),
196                         p.ptr.gen, g->gen,
197                         (printbuf_reset(&buf),
198                          bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
199                 *do_update = true;
200
201         if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen)
202                 goto out;
203
204         if (fsck_err_on(bucket_data_type_mismatch(g->data_type, data_type),
205                         trans, ptr_bucket_data_type_mismatch,
206                         "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
207                         "while marking %s",
208                         p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen,
209                         bch2_data_type_str(g->data_type),
210                         bch2_data_type_str(data_type),
211                         (printbuf_reset(&buf),
212                          bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
213                 if (data_type == BCH_DATA_btree) {
214                         g->gen_valid            = true;
215                         g->gen                  = p.ptr.gen;
216                         g->data_type            = data_type;
217                         g->stripe_sectors       = 0;
218                         g->dirty_sectors        = 0;
219                         g->cached_sectors       = 0;
220                 } else {
221                         *do_update = true;
222                 }
223         }
224
225         if (p.has_ec) {
226                 struct gc_stripe *m = genradix_ptr(&c->gc_stripes, p.ec.idx);
227
228                 if (fsck_err_on(!m || !m->alive,
229                                 trans, ptr_to_missing_stripe,
230                                 "pointer to nonexistent stripe %llu\n"
231                                 "while marking %s",
232                                 (u64) p.ec.idx,
233                                 (printbuf_reset(&buf),
234                                  bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
235                         *do_update = true;
236
237                 if (fsck_err_on(m && m->alive && !bch2_ptr_matches_stripe_m(m, p),
238                                 trans, ptr_to_incorrect_stripe,
239                                 "pointer does not match stripe %llu\n"
240                                 "while marking %s",
241                                 (u64) p.ec.idx,
242                                 (printbuf_reset(&buf),
243                                  bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
244                         *do_update = true;
245         }
246 out:
247 fsck_err:
248         bch2_dev_put(ca);
249         printbuf_exit(&buf);
250         return ret;
251 }
252
253 int bch2_check_fix_ptrs(struct btree_trans *trans,
254                         enum btree_id btree, unsigned level, struct bkey_s_c k,
255                         enum btree_iter_update_trigger_flags flags)
256 {
257         struct bch_fs *c = trans->c;
258         struct bkey_ptrs_c ptrs_c = bch2_bkey_ptrs_c(k);
259         const union bch_extent_entry *entry_c;
260         struct extent_ptr_decoded p = { 0 };
261         bool do_update = false;
262         struct printbuf buf = PRINTBUF;
263         int ret = 0;
264
265         bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) {
266                 ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update);
267                 if (ret)
268                         goto err;
269         }
270
271         if (do_update) {
272                 if (flags & BTREE_TRIGGER_is_root) {
273                         bch_err(c, "cannot update btree roots yet");
274                         ret = -EINVAL;
275                         goto err;
276                 }
277
278                 struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
279                 ret = PTR_ERR_OR_ZERO(new);
280                 if (ret)
281                         goto err;
282
283                 rcu_read_lock();
284                 bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, !bch2_dev_exists(c, ptr->dev));
285                 rcu_read_unlock();
286
287                 if (level) {
288                         /*
289                          * We don't want to drop btree node pointers - if the
290                          * btree node isn't there anymore, the read path will
291                          * sort it out:
292                          */
293                         struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
294                         rcu_read_lock();
295                         bkey_for_each_ptr(ptrs, ptr) {
296                                 struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
297                                 struct bucket *g = PTR_GC_BUCKET(ca, ptr);
298
299                                 ptr->gen = g->gen;
300                         }
301                         rcu_read_unlock();
302                 } else {
303                         struct bkey_ptrs ptrs;
304                         union bch_extent_entry *entry;
305
306                         rcu_read_lock();
307 restart_drop_ptrs:
308                         ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
309                         bkey_for_each_ptr_decode(bkey_i_to_s(new).k, ptrs, p, entry) {
310                                 struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev);
311                                 struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
312                                 enum bch_data_type data_type = bch2_bkey_ptr_data_type(bkey_i_to_s_c(new), p, entry);
313
314                                 if ((p.ptr.cached &&
315                                      (!g->gen_valid || gen_cmp(p.ptr.gen, g->gen) > 0)) ||
316                                     (!p.ptr.cached &&
317                                      gen_cmp(p.ptr.gen, g->gen) < 0) ||
318                                     gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX ||
319                                     (g->data_type &&
320                                      g->data_type != data_type)) {
321                                         bch2_bkey_drop_ptr(bkey_i_to_s(new), &entry->ptr);
322                                         goto restart_drop_ptrs;
323                                 }
324                         }
325                         rcu_read_unlock();
326 again:
327                         ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
328                         bkey_extent_entry_for_each(ptrs, entry) {
329                                 if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) {
330                                         struct gc_stripe *m = genradix_ptr(&c->gc_stripes,
331                                                                         entry->stripe_ptr.idx);
332                                         union bch_extent_entry *next_ptr;
333
334                                         bkey_extent_entry_for_each_from(ptrs, next_ptr, entry)
335                                                 if (extent_entry_type(next_ptr) == BCH_EXTENT_ENTRY_ptr)
336                                                         goto found;
337                                         next_ptr = NULL;
338 found:
339                                         if (!next_ptr) {
340                                                 bch_err(c, "aieee, found stripe ptr with no data ptr");
341                                                 continue;
342                                         }
343
344                                         if (!m || !m->alive ||
345                                             !__bch2_ptr_matches_stripe(&m->ptrs[entry->stripe_ptr.block],
346                                                                        &next_ptr->ptr,
347                                                                        m->sectors)) {
348                                                 bch2_bkey_extent_entry_drop(new, entry);
349                                                 goto again;
350                                         }
351                                 }
352                         }
353                 }
354
355                 if (0) {
356                         printbuf_reset(&buf);
357                         bch2_bkey_val_to_text(&buf, c, k);
358                         bch_info(c, "updated %s", buf.buf);
359
360                         printbuf_reset(&buf);
361                         bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(new));
362                         bch_info(c, "new key %s", buf.buf);
363                 }
364
365                 struct btree_iter iter;
366                 bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level,
367                                           BTREE_ITER_intent|BTREE_ITER_all_snapshots);
368                 ret =   bch2_btree_iter_traverse(&iter) ?:
369                         bch2_trans_update(trans, &iter, new,
370                                           BTREE_UPDATE_internal_snapshot_node|
371                                           BTREE_TRIGGER_norun);
372                 bch2_trans_iter_exit(trans, &iter);
373                 if (ret)
374                         goto err;
375
376                 if (level)
377                         bch2_btree_node_update_key_early(trans, btree, level - 1, k, new);
378         }
379 err:
380         printbuf_exit(&buf);
381         return ret;
382 }
383
384 int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
385                            struct bkey_s_c k,
386                            const struct bch_extent_ptr *ptr,
387                            s64 sectors, enum bch_data_type ptr_data_type,
388                            u8 b_gen, u8 bucket_data_type,
389                            u32 *bucket_sectors)
390 {
391         struct bch_fs *c = trans->c;
392         size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
393         struct printbuf buf = PRINTBUF;
394         bool inserting = sectors > 0;
395         int ret = 0;
396
397         BUG_ON(!sectors);
398
399         if (gen_after(ptr->gen, b_gen)) {
400                 bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
401                 log_fsck_err(trans, ptr_gen_newer_than_bucket_gen,
402                         "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
403                         "while marking %s",
404                         ptr->dev, bucket_nr, b_gen,
405                         bch2_data_type_str(bucket_data_type ?: ptr_data_type),
406                         ptr->gen,
407                         (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
408                 if (inserting)
409                         goto err;
410                 goto out;
411         }
412
413         if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) {
414                 bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
415                 log_fsck_err(trans, ptr_too_stale,
416                         "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
417                         "while marking %s",
418                         ptr->dev, bucket_nr, b_gen,
419                         bch2_data_type_str(bucket_data_type ?: ptr_data_type),
420                         ptr->gen,
421                         (printbuf_reset(&buf),
422                          bch2_bkey_val_to_text(&buf, c, k), buf.buf));
423                 if (inserting)
424                         goto err;
425                 goto out;
426         }
427
428         if (b_gen != ptr->gen && ptr->cached) {
429                 ret = 1;
430                 goto out;
431         }
432
433         if (b_gen != ptr->gen) {
434                 bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
435                 log_fsck_err(trans, stale_dirty_ptr,
436                         "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n"
437                         "while marking %s",
438                         ptr->dev, bucket_nr, b_gen,
439                         bucket_gen_get(ca, bucket_nr),
440                         bch2_data_type_str(bucket_data_type ?: ptr_data_type),
441                         ptr->gen,
442                         (printbuf_reset(&buf),
443                          bch2_bkey_val_to_text(&buf, c, k), buf.buf));
444                 if (inserting)
445                         goto err;
446                 goto out;
447         }
448
449         if (bucket_data_type_mismatch(bucket_data_type, ptr_data_type)) {
450                 bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
451                 log_fsck_err(trans, ptr_bucket_data_type_mismatch,
452                         "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
453                         "while marking %s",
454                         ptr->dev, bucket_nr, b_gen,
455                         bch2_data_type_str(bucket_data_type),
456                         bch2_data_type_str(ptr_data_type),
457                         (printbuf_reset(&buf),
458                          bch2_bkey_val_to_text(&buf, c, k), buf.buf));
459                 if (inserting)
460                         goto err;
461                 goto out;
462         }
463
464         if ((u64) *bucket_sectors + sectors > U32_MAX) {
465                 bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
466                 log_fsck_err(trans, bucket_sector_count_overflow,
467                         "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n"
468                         "while marking %s",
469                         ptr->dev, bucket_nr, b_gen,
470                         bch2_data_type_str(bucket_data_type ?: ptr_data_type),
471                         *bucket_sectors, sectors,
472                         (printbuf_reset(&buf),
473                          bch2_bkey_val_to_text(&buf, c, k), buf.buf));
474                 if (inserting)
475                         goto err;
476                 sectors = -*bucket_sectors;
477         }
478
479         *bucket_sectors += sectors;
480 out:
481         printbuf_exit(&buf);
482         return ret;
483 err:
484 fsck_err:
485         bch2_dump_trans_updates(trans);
486         bch2_inconsistent_error(c);
487         ret = -BCH_ERR_bucket_ref_update;
488         goto out;
489 }
490
491 void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
492 {
493         struct bch_fs *c = trans->c;
494         u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
495         static int warned_disk_usage = 0;
496         bool warn = false;
497
498         percpu_down_read(&c->mark_lock);
499         struct bch_fs_usage_base *src = &trans->fs_usage_delta;
500
501         s64 added = src->btree + src->data + src->reserved;
502
503         /*
504          * Not allowed to reduce sectors_available except by getting a
505          * reservation:
506          */
507         s64 should_not_have_added = added - (s64) disk_res_sectors;
508         if (unlikely(should_not_have_added > 0)) {
509                 u64 old, new;
510
511                 old = atomic64_read(&c->sectors_available);
512                 do {
513                         new = max_t(s64, 0, old - should_not_have_added);
514                 } while (!atomic64_try_cmpxchg(&c->sectors_available,
515                                                &old, new));
516
517                 added -= should_not_have_added;
518                 warn = true;
519         }
520
521         if (added > 0) {
522                 trans->disk_res->sectors -= added;
523                 this_cpu_sub(*c->online_reserved, added);
524         }
525
526         preempt_disable();
527         struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage);
528         acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64));
529         preempt_enable();
530         percpu_up_read(&c->mark_lock);
531
532         if (unlikely(warn) && !xchg(&warned_disk_usage, 1))
533                 bch2_trans_inconsistent(trans,
534                                         "disk usage increased %lli more than %llu sectors reserved)",
535                                         should_not_have_added, disk_res_sectors);
536 }
537
538 /* KEY_TYPE_extent: */
539
540 static int __mark_pointer(struct btree_trans *trans, struct bch_dev *ca,
541                           struct bkey_s_c k,
542                           const struct extent_ptr_decoded *p,
543                           s64 sectors, enum bch_data_type ptr_data_type,
544                           struct bch_alloc_v4 *a,
545                           bool insert)
546 {
547         u32 *dst_sectors = p->has_ec    ? &a->stripe_sectors :
548                 !p->ptr.cached          ? &a->dirty_sectors :
549                                           &a->cached_sectors;
550         int ret = bch2_bucket_ref_update(trans, ca, k, &p->ptr, sectors, ptr_data_type,
551                                          a->gen, a->data_type, dst_sectors);
552
553         if (ret)
554                 return ret;
555         if (insert)
556                 alloc_data_type_set(a, ptr_data_type);
557         return 0;
558 }
559
560 static int bch2_trigger_pointer(struct btree_trans *trans,
561                         enum btree_id btree_id, unsigned level,
562                         struct bkey_s_c k, struct extent_ptr_decoded p,
563                         const union bch_extent_entry *entry,
564                         s64 *sectors,
565                         enum btree_iter_update_trigger_flags flags)
566 {
567         struct bch_fs *c = trans->c;
568         bool insert = !(flags & BTREE_TRIGGER_overwrite);
569         struct printbuf buf = PRINTBUF;
570         int ret = 0;
571
572         struct bkey_i_backpointer bp;
573         bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp);
574
575         *sectors = insert ? bp.v.bucket_len : -(s64) bp.v.bucket_len;
576
577         struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev);
578         if (unlikely(!ca)) {
579                 if (insert && p.ptr.dev != BCH_SB_MEMBER_INVALID)
580                         ret = -BCH_ERR_trigger_pointer;
581                 goto err;
582         }
583
584         struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr);
585
586         if (flags & BTREE_TRIGGER_transactional) {
587                 struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0);
588                 ret = PTR_ERR_OR_ZERO(a) ?:
589                         __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v, insert);
590                 if (ret)
591                         goto err;
592
593                 if (!p.ptr.cached) {
594                         ret = bch2_bucket_backpointer_mod(trans, k, &bp, insert);
595                         if (ret)
596                                 goto err;
597                 }
598         }
599
600         if (flags & BTREE_TRIGGER_gc) {
601                 struct bucket *g = gc_bucket(ca, bucket.offset);
602                 if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n  %s",
603                                             p.ptr.dev,
604                                             (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
605                         ret = -BCH_ERR_trigger_pointer;
606                         goto err;
607                 }
608
609                 bucket_lock(g);
610                 struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old;
611                 ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new, insert);
612                 alloc_to_bucket(g, new);
613                 bucket_unlock(g);
614
615                 if (!ret)
616                         ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
617         }
618 err:
619         bch2_dev_put(ca);
620         printbuf_exit(&buf);
621         return ret;
622 }
623
624 static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
625                                 struct bkey_s_c k,
626                                 struct extent_ptr_decoded p,
627                                 enum bch_data_type data_type,
628                                 s64 sectors,
629                                 enum btree_iter_update_trigger_flags flags)
630 {
631         if (flags & BTREE_TRIGGER_transactional) {
632                 struct btree_iter iter;
633                 struct bkey_i_stripe *s = bch2_bkey_get_mut_typed(trans, &iter,
634                                 BTREE_ID_stripes, POS(0, p.ec.idx),
635                                 BTREE_ITER_with_updates, stripe);
636                 int ret = PTR_ERR_OR_ZERO(s);
637                 if (unlikely(ret)) {
638                         bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans,
639                                 "pointer to nonexistent stripe %llu",
640                                 (u64) p.ec.idx);
641                         goto err;
642                 }
643
644                 if (!bch2_ptr_matches_stripe(&s->v, p)) {
645                         bch2_trans_inconsistent(trans,
646                                 "stripe pointer doesn't match stripe %llu",
647                                 (u64) p.ec.idx);
648                         ret = -BCH_ERR_trigger_stripe_pointer;
649                         goto err;
650                 }
651
652                 stripe_blockcount_set(&s->v, p.ec.block,
653                         stripe_blockcount_get(&s->v, p.ec.block) +
654                         sectors);
655
656                 struct disk_accounting_pos acc = {
657                         .type = BCH_DISK_ACCOUNTING_replicas,
658                 };
659                 bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i));
660                 acc.replicas.data_type = data_type;
661                 ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, false);
662 err:
663                 bch2_trans_iter_exit(trans, &iter);
664                 return ret;
665         }
666
667         if (flags & BTREE_TRIGGER_gc) {
668                 struct bch_fs *c = trans->c;
669
670                 struct gc_stripe *m = genradix_ptr_alloc(&c->gc_stripes, p.ec.idx, GFP_KERNEL);
671                 if (!m) {
672                         bch_err(c, "error allocating memory for gc_stripes, idx %llu",
673                                 (u64) p.ec.idx);
674                         return -BCH_ERR_ENOMEM_mark_stripe_ptr;
675                 }
676
677                 mutex_lock(&c->ec_stripes_heap_lock);
678
679                 if (!m || !m->alive) {
680                         mutex_unlock(&c->ec_stripes_heap_lock);
681                         struct printbuf buf = PRINTBUF;
682                         bch2_bkey_val_to_text(&buf, c, k);
683                         bch_err_ratelimited(c, "pointer to nonexistent stripe %llu\n  while marking %s",
684                                             (u64) p.ec.idx, buf.buf);
685                         printbuf_exit(&buf);
686                         bch2_inconsistent_error(c);
687                         return -BCH_ERR_trigger_stripe_pointer;
688                 }
689
690                 m->block_sectors[p.ec.block] += sectors;
691
692                 struct disk_accounting_pos acc = {
693                         .type = BCH_DISK_ACCOUNTING_replicas,
694                 };
695                 memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e));
696                 mutex_unlock(&c->ec_stripes_heap_lock);
697
698                 acc.replicas.data_type = data_type;
699                 int ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, true);
700                 if (ret)
701                         return ret;
702         }
703
704         return 0;
705 }
706
707 static int __trigger_extent(struct btree_trans *trans,
708                             enum btree_id btree_id, unsigned level,
709                             struct bkey_s_c k,
710                             enum btree_iter_update_trigger_flags flags,
711                             s64 *replicas_sectors)
712 {
713         bool gc = flags & BTREE_TRIGGER_gc;
714         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
715         const union bch_extent_entry *entry;
716         struct extent_ptr_decoded p;
717         enum bch_data_type data_type = bkey_is_btree_ptr(k.k)
718                 ? BCH_DATA_btree
719                 : BCH_DATA_user;
720         int ret = 0;
721
722         struct disk_accounting_pos acc_replicas_key = {
723                 .type                   = BCH_DISK_ACCOUNTING_replicas,
724                 .replicas.data_type     = data_type,
725                 .replicas.nr_devs       = 0,
726                 .replicas.nr_required   = 1,
727         };
728
729         struct disk_accounting_pos acct_compression_key = {
730                 .type                   = BCH_DISK_ACCOUNTING_compression,
731         };
732         u64 compression_acct[3] = { 1, 0, 0 };
733
734         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
735                 s64 disk_sectors = 0;
736                 ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, &disk_sectors, flags);
737                 if (ret < 0)
738                         return ret;
739
740                 bool stale = ret > 0;
741
742                 if (p.ptr.cached && stale)
743                         continue;
744
745                 if (p.ptr.cached) {
746                         ret = bch2_mod_dev_cached_sectors(trans, p.ptr.dev, disk_sectors, gc);
747                         if (ret)
748                                 return ret;
749                 } else if (!p.has_ec) {
750                         *replicas_sectors       += disk_sectors;
751                         replicas_entry_add_dev(&acc_replicas_key.replicas, p.ptr.dev);
752                 } else {
753                         ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags);
754                         if (ret)
755                                 return ret;
756
757                         /*
758                          * There may be other dirty pointers in this extent, but
759                          * if so they're not required for mounting if we have an
760                          * erasure coded pointer in this extent:
761                          */
762                         acc_replicas_key.replicas.nr_required = 0;
763                 }
764
765                 if (acct_compression_key.compression.type &&
766                     acct_compression_key.compression.type != p.crc.compression_type) {
767                         if (flags & BTREE_TRIGGER_overwrite)
768                                 bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct));
769
770                         ret = bch2_disk_accounting_mod(trans, &acct_compression_key, compression_acct,
771                                                        ARRAY_SIZE(compression_acct), gc);
772                         if (ret)
773                                 return ret;
774
775                         compression_acct[0] = 1;
776                         compression_acct[1] = 0;
777                         compression_acct[2] = 0;
778                 }
779
780                 acct_compression_key.compression.type = p.crc.compression_type;
781                 if (p.crc.compression_type) {
782                         compression_acct[1] += p.crc.uncompressed_size;
783                         compression_acct[2] += p.crc.compressed_size;
784                 }
785         }
786
787         if (acc_replicas_key.replicas.nr_devs) {
788                 ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, replicas_sectors, 1, gc);
789                 if (ret)
790                         return ret;
791         }
792
793         if (acc_replicas_key.replicas.nr_devs && !level && k.k->p.snapshot) {
794                 struct disk_accounting_pos acc_snapshot_key = {
795                         .type                   = BCH_DISK_ACCOUNTING_snapshot,
796                         .snapshot.id            = k.k->p.snapshot,
797                 };
798                 ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, replicas_sectors, 1, gc);
799                 if (ret)
800                         return ret;
801         }
802
803         if (acct_compression_key.compression.type) {
804                 if (flags & BTREE_TRIGGER_overwrite)
805                         bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct));
806
807                 ret = bch2_disk_accounting_mod(trans, &acct_compression_key, compression_acct,
808                                                ARRAY_SIZE(compression_acct), gc);
809                 if (ret)
810                         return ret;
811         }
812
813         if (level) {
814                 struct disk_accounting_pos acc_btree_key = {
815                         .type           = BCH_DISK_ACCOUNTING_btree,
816                         .btree.id       = btree_id,
817                 };
818                 ret = bch2_disk_accounting_mod(trans, &acc_btree_key, replicas_sectors, 1, gc);
819                 if (ret)
820                         return ret;
821         } else {
822                 bool insert = !(flags & BTREE_TRIGGER_overwrite);
823                 struct disk_accounting_pos acc_inum_key = {
824                         .type           = BCH_DISK_ACCOUNTING_inum,
825                         .inum.inum      = k.k->p.inode,
826                 };
827                 s64 v[3] = {
828                         insert ? 1 : -1,
829                         insert ? k.k->size : -((s64) k.k->size),
830                         *replicas_sectors,
831                 };
832                 ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc);
833                 if (ret)
834                         return ret;
835         }
836
837         return 0;
838 }
839
840 int bch2_trigger_extent(struct btree_trans *trans,
841                         enum btree_id btree, unsigned level,
842                         struct bkey_s_c old, struct bkey_s new,
843                         enum btree_iter_update_trigger_flags flags)
844 {
845         struct bch_fs *c = trans->c;
846         struct bkey_ptrs_c new_ptrs = bch2_bkey_ptrs_c(new.s_c);
847         struct bkey_ptrs_c old_ptrs = bch2_bkey_ptrs_c(old);
848         unsigned new_ptrs_bytes = (void *) new_ptrs.end - (void *) new_ptrs.start;
849         unsigned old_ptrs_bytes = (void *) old_ptrs.end - (void *) old_ptrs.start;
850
851         if (unlikely(flags & BTREE_TRIGGER_check_repair))
852                 return bch2_check_fix_ptrs(trans, btree, level, new.s_c, flags);
853
854         /* if pointers aren't changing - nothing to do: */
855         if (new_ptrs_bytes == old_ptrs_bytes &&
856             !memcmp(new_ptrs.start,
857                     old_ptrs.start,
858                     new_ptrs_bytes))
859                 return 0;
860
861         if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
862                 s64 old_replicas_sectors = 0, new_replicas_sectors = 0;
863
864                 if (old.k->type) {
865                         int ret = __trigger_extent(trans, btree, level, old,
866                                                    flags & ~BTREE_TRIGGER_insert,
867                                                    &old_replicas_sectors);
868                         if (ret)
869                                 return ret;
870                 }
871
872                 if (new.k->type) {
873                         int ret = __trigger_extent(trans, btree, level, new.s_c,
874                                                    flags & ~BTREE_TRIGGER_overwrite,
875                                                    &new_replicas_sectors);
876                         if (ret)
877                                 return ret;
878                 }
879
880                 int need_rebalance_delta = 0;
881                 s64 need_rebalance_sectors_delta = 0;
882
883                 s64 s = bch2_bkey_sectors_need_rebalance(c, old);
884                 need_rebalance_delta -= s != 0;
885                 need_rebalance_sectors_delta -= s;
886
887                 s = bch2_bkey_sectors_need_rebalance(c, new.s_c);
888                 need_rebalance_delta += s != 0;
889                 need_rebalance_sectors_delta += s;
890
891                 if ((flags & BTREE_TRIGGER_transactional) && need_rebalance_delta) {
892                         int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
893                                                           new.k->p, need_rebalance_delta > 0);
894                         if (ret)
895                                 return ret;
896                 }
897
898                 if (need_rebalance_sectors_delta) {
899                         struct disk_accounting_pos acc = {
900                                 .type           = BCH_DISK_ACCOUNTING_rebalance_work,
901                         };
902                         int ret = bch2_disk_accounting_mod(trans, &acc, &need_rebalance_sectors_delta, 1,
903                                                            flags & BTREE_TRIGGER_gc);
904                         if (ret)
905                                 return ret;
906                 }
907         }
908
909         return 0;
910 }
911
912 /* KEY_TYPE_reservation */
913
914 static int __trigger_reservation(struct btree_trans *trans,
915                         enum btree_id btree_id, unsigned level, struct bkey_s_c k,
916                         enum btree_iter_update_trigger_flags flags)
917 {
918         if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
919                 s64 sectors = k.k->size;
920
921                 if (flags & BTREE_TRIGGER_overwrite)
922                         sectors = -sectors;
923
924                 struct disk_accounting_pos acc = {
925                         .type = BCH_DISK_ACCOUNTING_persistent_reserved,
926                         .persistent_reserved.nr_replicas = bkey_s_c_to_reservation(k).v->nr_replicas,
927                 };
928
929                 return bch2_disk_accounting_mod(trans, &acc, &sectors, 1, flags & BTREE_TRIGGER_gc);
930         }
931
932         return 0;
933 }
934
935 int bch2_trigger_reservation(struct btree_trans *trans,
936                           enum btree_id btree_id, unsigned level,
937                           struct bkey_s_c old, struct bkey_s new,
938                           enum btree_iter_update_trigger_flags flags)
939 {
940         return trigger_run_overwrite_then_insert(__trigger_reservation, trans, btree_id, level, old, new, flags);
941 }
942
943 /* Mark superblocks: */
944
945 static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
946                                     struct bch_dev *ca, u64 b,
947                                     enum bch_data_type type,
948                                     unsigned sectors)
949 {
950         struct bch_fs *c = trans->c;
951         struct btree_iter iter;
952         int ret = 0;
953
954         struct bkey_i_alloc_v4 *a =
955                 bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(ca->dev_idx, b));
956         if (IS_ERR(a))
957                 return PTR_ERR(a);
958
959         if (a->v.data_type && type && a->v.data_type != type) {
960                 bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
961                 log_fsck_err(trans, bucket_metadata_type_mismatch,
962                         "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
963                         "while marking %s",
964                         iter.pos.inode, iter.pos.offset, a->v.gen,
965                         bch2_data_type_str(a->v.data_type),
966                         bch2_data_type_str(type),
967                         bch2_data_type_str(type));
968                 ret = -BCH_ERR_metadata_bucket_inconsistency;
969                 goto err;
970         }
971
972         if (a->v.data_type      != type ||
973             a->v.dirty_sectors  != sectors) {
974                 a->v.data_type          = type;
975                 a->v.dirty_sectors      = sectors;
976                 ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
977         }
978 err:
979 fsck_err:
980         bch2_trans_iter_exit(trans, &iter);
981         return ret;
982 }
983
984 static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *ca,
985                         u64 b, enum bch_data_type data_type, unsigned sectors,
986                         enum btree_iter_update_trigger_flags flags)
987 {
988         struct bch_fs *c = trans->c;
989         int ret = 0;
990
991         struct bucket *g = gc_bucket(ca, b);
992         if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s",
993                                     ca->dev_idx, bch2_data_type_str(data_type)))
994                 goto err;
995
996         bucket_lock(g);
997         struct bch_alloc_v4 old = bucket_m_to_alloc(*g);
998
999         if (bch2_fs_inconsistent_on(g->data_type &&
1000                         g->data_type != data_type, c,
1001                         "different types of data in same bucket: %s, %s",
1002                         bch2_data_type_str(g->data_type),
1003                         bch2_data_type_str(data_type)))
1004                 goto err_unlock;
1005
1006         if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c,
1007                         "bucket %u:%llu gen %u data type %s sector count overflow: %u + %u > bucket size",
1008                         ca->dev_idx, b, g->gen,
1009                         bch2_data_type_str(g->data_type ?: data_type),
1010                         g->dirty_sectors, sectors))
1011                 goto err_unlock;
1012
1013         g->data_type = data_type;
1014         g->dirty_sectors += sectors;
1015         struct bch_alloc_v4 new = bucket_m_to_alloc(*g);
1016         bucket_unlock(g);
1017         ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
1018         return ret;
1019 err_unlock:
1020         bucket_unlock(g);
1021 err:
1022         return -BCH_ERR_metadata_bucket_inconsistency;
1023 }
1024
1025 int bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
1026                         struct bch_dev *ca, u64 b,
1027                         enum bch_data_type type, unsigned sectors,
1028                         enum btree_iter_update_trigger_flags flags)
1029 {
1030         BUG_ON(type != BCH_DATA_free &&
1031                type != BCH_DATA_sb &&
1032                type != BCH_DATA_journal);
1033
1034         /*
1035          * Backup superblock might be past the end of our normal usable space:
1036          */
1037         if (b >= ca->mi.nbuckets)
1038                 return 0;
1039
1040         if (flags & BTREE_TRIGGER_gc)
1041                 return bch2_mark_metadata_bucket(trans, ca, b, type, sectors, flags);
1042         else if (flags & BTREE_TRIGGER_transactional)
1043                 return commit_do(trans, NULL, NULL, 0,
1044                                  __bch2_trans_mark_metadata_bucket(trans, ca, b, type, sectors));
1045         else
1046                 BUG();
1047 }
1048
1049 static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans,
1050                         struct bch_dev *ca, u64 start, u64 end,
1051                         enum bch_data_type type, u64 *bucket, unsigned *bucket_sectors,
1052                         enum btree_iter_update_trigger_flags flags)
1053 {
1054         do {
1055                 u64 b = sector_to_bucket(ca, start);
1056                 unsigned sectors =
1057                         min_t(u64, bucket_to_sector(ca, b + 1), end) - start;
1058
1059                 if (b != *bucket && *bucket_sectors) {
1060                         int ret = bch2_trans_mark_metadata_bucket(trans, ca, *bucket,
1061                                                         type, *bucket_sectors, flags);
1062                         if (ret)
1063                                 return ret;
1064
1065                         *bucket_sectors = 0;
1066                 }
1067
1068                 *bucket         = b;
1069                 *bucket_sectors += sectors;
1070                 start += sectors;
1071         } while (start < end);
1072
1073         return 0;
1074 }
1075
1076 static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *ca,
1077                         enum btree_iter_update_trigger_flags flags)
1078 {
1079         struct bch_fs *c = trans->c;
1080
1081         mutex_lock(&c->sb_lock);
1082         struct bch_sb_layout layout = ca->disk_sb.sb->layout;
1083         mutex_unlock(&c->sb_lock);
1084
1085         u64 bucket = 0;
1086         unsigned i, bucket_sectors = 0;
1087         int ret;
1088
1089         for (i = 0; i < layout.nr_superblocks; i++) {
1090                 u64 offset = le64_to_cpu(layout.sb_offset[i]);
1091
1092                 if (offset == BCH_SB_SECTOR) {
1093                         ret = bch2_trans_mark_metadata_sectors(trans, ca,
1094                                                 0, BCH_SB_SECTOR,
1095                                                 BCH_DATA_sb, &bucket, &bucket_sectors, flags);
1096                         if (ret)
1097                                 return ret;
1098                 }
1099
1100                 ret = bch2_trans_mark_metadata_sectors(trans, ca, offset,
1101                                       offset + (1 << layout.sb_max_size_bits),
1102                                       BCH_DATA_sb, &bucket, &bucket_sectors, flags);
1103                 if (ret)
1104                         return ret;
1105         }
1106
1107         if (bucket_sectors) {
1108                 ret = bch2_trans_mark_metadata_bucket(trans, ca,
1109                                 bucket, BCH_DATA_sb, bucket_sectors, flags);
1110                 if (ret)
1111                         return ret;
1112         }
1113
1114         for (i = 0; i < ca->journal.nr; i++) {
1115                 ret = bch2_trans_mark_metadata_bucket(trans, ca,
1116                                 ca->journal.buckets[i],
1117                                 BCH_DATA_journal, ca->mi.bucket_size, flags);
1118                 if (ret)
1119                         return ret;
1120         }
1121
1122         return 0;
1123 }
1124
1125 int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca,
1126                         enum btree_iter_update_trigger_flags flags)
1127 {
1128         int ret = bch2_trans_run(c,
1129                 __bch2_trans_mark_dev_sb(trans, ca, flags));
1130         bch_err_fn(c, ret);
1131         return ret;
1132 }
1133
1134 int bch2_trans_mark_dev_sbs_flags(struct bch_fs *c,
1135                         enum btree_iter_update_trigger_flags flags)
1136 {
1137         for_each_online_member(c, ca) {
1138                 int ret = bch2_trans_mark_dev_sb(c, ca, flags);
1139                 if (ret) {
1140                         percpu_ref_put(&ca->io_ref);
1141                         return ret;
1142                 }
1143         }
1144
1145         return 0;
1146 }
1147
1148 int bch2_trans_mark_dev_sbs(struct bch_fs *c)
1149 {
1150         return bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_transactional);
1151 }
1152
1153 bool bch2_is_superblock_bucket(struct bch_dev *ca, u64 b)
1154 {
1155         struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
1156         u64 b_offset    = bucket_to_sector(ca, b);
1157         u64 b_end       = bucket_to_sector(ca, b + 1);
1158         unsigned i;
1159
1160         if (!b)
1161                 return true;
1162
1163         for (i = 0; i < layout->nr_superblocks; i++) {
1164                 u64 offset = le64_to_cpu(layout->sb_offset[i]);
1165                 u64 end = offset + (1 << layout->sb_max_size_bits);
1166
1167                 if (!(offset >= b_end || end <= b_offset))
1168                         return true;
1169         }
1170
1171         for (i = 0; i < ca->journal.nr; i++)
1172                 if (b == ca->journal.buckets[i])
1173                         return true;
1174
1175         return false;
1176 }
1177
1178 /* Disk reservations: */
1179
1180 #define SECTORS_CACHE   1024
1181
1182 int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
1183                                 u64 sectors, enum bch_reservation_flags flags)
1184 {
1185         struct bch_fs_pcpu *pcpu;
1186         u64 old, get;
1187         u64 sectors_available;
1188         int ret;
1189
1190         percpu_down_read(&c->mark_lock);
1191         preempt_disable();
1192         pcpu = this_cpu_ptr(c->pcpu);
1193
1194         if (sectors <= pcpu->sectors_available)
1195                 goto out;
1196
1197         old = atomic64_read(&c->sectors_available);
1198         do {
1199                 get = min((u64) sectors + SECTORS_CACHE, old);
1200
1201                 if (get < sectors) {
1202                         preempt_enable();
1203                         goto recalculate;
1204                 }
1205         } while (!atomic64_try_cmpxchg(&c->sectors_available,
1206                                        &old, old - get));
1207
1208         pcpu->sectors_available         += get;
1209
1210 out:
1211         pcpu->sectors_available         -= sectors;
1212         this_cpu_add(*c->online_reserved, sectors);
1213         res->sectors                    += sectors;
1214
1215         preempt_enable();
1216         percpu_up_read(&c->mark_lock);
1217         return 0;
1218
1219 recalculate:
1220         mutex_lock(&c->sectors_available_lock);
1221
1222         percpu_u64_set(&c->pcpu->sectors_available, 0);
1223         sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free);
1224
1225         if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL))
1226                 sectors = min(sectors, sectors_available);
1227
1228         if (sectors <= sectors_available ||
1229             (flags & BCH_DISK_RESERVATION_NOFAIL)) {
1230                 atomic64_set(&c->sectors_available,
1231                              max_t(s64, 0, sectors_available - sectors));
1232                 this_cpu_add(*c->online_reserved, sectors);
1233                 res->sectors                    += sectors;
1234                 ret = 0;
1235         } else {
1236                 atomic64_set(&c->sectors_available, sectors_available);
1237                 ret = -BCH_ERR_ENOSPC_disk_reservation;
1238         }
1239
1240         mutex_unlock(&c->sectors_available_lock);
1241         percpu_up_read(&c->mark_lock);
1242
1243         return ret;
1244 }
1245
1246 /* Startup/shutdown: */
1247
1248 void bch2_buckets_nouse_free(struct bch_fs *c)
1249 {
1250         for_each_member_device(c, ca) {
1251                 kvfree_rcu_mightsleep(ca->buckets_nouse);
1252                 ca->buckets_nouse = NULL;
1253         }
1254 }
1255
1256 int bch2_buckets_nouse_alloc(struct bch_fs *c)
1257 {
1258         for_each_member_device(c, ca) {
1259                 BUG_ON(ca->buckets_nouse);
1260
1261                 ca->buckets_nouse = bch2_kvmalloc(BITS_TO_LONGS(ca->mi.nbuckets) *
1262                                             sizeof(unsigned long),
1263                                             GFP_KERNEL|__GFP_ZERO);
1264                 if (!ca->buckets_nouse) {
1265                         bch2_dev_put(ca);
1266                         return -BCH_ERR_ENOMEM_buckets_nouse;
1267                 }
1268         }
1269
1270         return 0;
1271 }
1272
1273 static void bucket_gens_free_rcu(struct rcu_head *rcu)
1274 {
1275         struct bucket_gens *buckets =
1276                 container_of(rcu, struct bucket_gens, rcu);
1277
1278         kvfree(buckets);
1279 }
1280
1281 int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
1282 {
1283         struct bucket_gens *bucket_gens = NULL, *old_bucket_gens = NULL;
1284         bool resize = ca->bucket_gens != NULL;
1285         int ret;
1286
1287         if (resize)
1288                 lockdep_assert_held(&c->state_lock);
1289
1290         if (resize && ca->buckets_nouse)
1291                 return -BCH_ERR_no_resize_with_buckets_nouse;
1292
1293         bucket_gens = bch2_kvmalloc(struct_size(bucket_gens, b, nbuckets),
1294                                     GFP_KERNEL|__GFP_ZERO);
1295         if (!bucket_gens) {
1296                 ret = -BCH_ERR_ENOMEM_bucket_gens;
1297                 goto err;
1298         }
1299
1300         bucket_gens->first_bucket = ca->mi.first_bucket;
1301         bucket_gens->nbuckets   = nbuckets;
1302         bucket_gens->nbuckets_minus_first =
1303                 bucket_gens->nbuckets - bucket_gens->first_bucket;
1304
1305         old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1);
1306
1307         if (resize) {
1308                 bucket_gens->nbuckets = min(bucket_gens->nbuckets,
1309                                             old_bucket_gens->nbuckets);
1310                 bucket_gens->nbuckets_minus_first =
1311                         bucket_gens->nbuckets - bucket_gens->first_bucket;
1312                 memcpy(bucket_gens->b,
1313                        old_bucket_gens->b,
1314                        bucket_gens->nbuckets);
1315         }
1316
1317         rcu_assign_pointer(ca->bucket_gens, bucket_gens);
1318         bucket_gens     = old_bucket_gens;
1319
1320         nbuckets = ca->mi.nbuckets;
1321
1322         ret = 0;
1323 err:
1324         if (bucket_gens)
1325                 call_rcu(&bucket_gens->rcu, bucket_gens_free_rcu);
1326
1327         return ret;
1328 }
1329
1330 void bch2_dev_buckets_free(struct bch_dev *ca)
1331 {
1332         kvfree(ca->buckets_nouse);
1333         kvfree(rcu_dereference_protected(ca->bucket_gens, 1));
1334         free_percpu(ca->usage);
1335 }
1336
1337 int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
1338 {
1339         ca->usage = alloc_percpu(struct bch_dev_usage);
1340         if (!ca->usage)
1341                 return -BCH_ERR_ENOMEM_usage_init;
1342
1343         return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);
1344 }
This page took 0.108279 seconds and 4 git commands to generate.