]> Git Repo - J-linux.git/blob - fs/bcachefs/buckets.c
Merge tag 'vfs-6.13-rc7.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
[J-linux.git] / fs / bcachefs / buckets.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Code for manipulating bucket marks for garbage collection.
4  *
5  * Copyright 2014 Datera, Inc.
6  */
7
8 #include "bcachefs.h"
9 #include "alloc_background.h"
10 #include "backpointers.h"
11 #include "bset.h"
12 #include "btree_gc.h"
13 #include "btree_update.h"
14 #include "buckets.h"
15 #include "buckets_waiting_for_journal.h"
16 #include "disk_accounting.h"
17 #include "ec.h"
18 #include "error.h"
19 #include "inode.h"
20 #include "movinggc.h"
21 #include "recovery.h"
22 #include "reflink.h"
23 #include "replicas.h"
24 #include "subvolume.h"
25 #include "trace.h"
26
27 #include <linux/preempt.h>
28
29 void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage)
30 {
31         memset(usage, 0, sizeof(*usage));
32         acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, dev_usage_u64s());
33 }
34
35 static u64 reserve_factor(u64 r)
36 {
37         return r + (round_up(r, (1 << RESERVE_FACTOR)) >> RESERVE_FACTOR);
38 }
39
40 static struct bch_fs_usage_short
41 __bch2_fs_usage_read_short(struct bch_fs *c)
42 {
43         struct bch_fs_usage_short ret;
44         u64 data, reserved;
45
46         ret.capacity = c->capacity -
47                 percpu_u64_get(&c->usage->hidden);
48
49         data            = percpu_u64_get(&c->usage->data) +
50                 percpu_u64_get(&c->usage->btree);
51         reserved        = percpu_u64_get(&c->usage->reserved) +
52                 percpu_u64_get(c->online_reserved);
53
54         ret.used        = min(ret.capacity, data + reserve_factor(reserved));
55         ret.free        = ret.capacity - ret.used;
56
57         ret.nr_inodes   = percpu_u64_get(&c->usage->nr_inodes);
58
59         return ret;
60 }
61
62 struct bch_fs_usage_short
63 bch2_fs_usage_read_short(struct bch_fs *c)
64 {
65         struct bch_fs_usage_short ret;
66
67         percpu_down_read(&c->mark_lock);
68         ret = __bch2_fs_usage_read_short(c);
69         percpu_up_read(&c->mark_lock);
70
71         return ret;
72 }
73
74 void bch2_dev_usage_to_text(struct printbuf *out,
75                             struct bch_dev *ca,
76                             struct bch_dev_usage *usage)
77 {
78         if (out->nr_tabstops < 5) {
79                 printbuf_tabstops_reset(out);
80                 printbuf_tabstop_push(out, 12);
81                 printbuf_tabstop_push(out, 16);
82                 printbuf_tabstop_push(out, 16);
83                 printbuf_tabstop_push(out, 16);
84                 printbuf_tabstop_push(out, 16);
85         }
86
87         prt_printf(out, "\tbuckets\rsectors\rfragmented\r\n");
88
89         for (unsigned i = 0; i < BCH_DATA_NR; i++) {
90                 bch2_prt_data_type(out, i);
91                 prt_printf(out, "\t%llu\r%llu\r%llu\r\n",
92                            usage->d[i].buckets,
93                            usage->d[i].sectors,
94                            usage->d[i].fragmented);
95         }
96
97         prt_printf(out, "capacity\t%llu\r\n", ca->mi.nbuckets);
98 }
99
100 static int bch2_check_fix_ptr(struct btree_trans *trans,
101                               struct bkey_s_c k,
102                               struct extent_ptr_decoded p,
103                               const union bch_extent_entry *entry,
104                               bool *do_update)
105 {
106         struct bch_fs *c = trans->c;
107         struct printbuf buf = PRINTBUF;
108         int ret = 0;
109
110         struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev);
111         if (!ca) {
112                 if (fsck_err_on(p.ptr.dev != BCH_SB_MEMBER_INVALID,
113                                 trans, ptr_to_invalid_device,
114                                 "pointer to missing device %u\n"
115                                 "while marking %s",
116                                 p.ptr.dev,
117                                 (printbuf_reset(&buf),
118                                  bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
119                         *do_update = true;
120                 return 0;
121         }
122
123         struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
124         if (!g) {
125                 if (fsck_err(trans, ptr_to_invalid_device,
126                              "pointer to invalid bucket on device %u\n"
127                              "while marking %s",
128                              p.ptr.dev,
129                              (printbuf_reset(&buf),
130                               bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
131                         *do_update = true;
132                 goto out;
133         }
134
135         enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry);
136
137         if (fsck_err_on(!g->gen_valid,
138                         trans, ptr_to_missing_alloc_key,
139                         "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n"
140                         "while marking %s",
141                         p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
142                         bch2_data_type_str(ptr_data_type(k.k, &p.ptr)),
143                         p.ptr.gen,
144                         (printbuf_reset(&buf),
145                          bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
146                 if (!p.ptr.cached) {
147                         g->gen_valid            = true;
148                         g->gen                  = p.ptr.gen;
149                 } else {
150                         *do_update = true;
151                 }
152         }
153
154         if (fsck_err_on(gen_cmp(p.ptr.gen, g->gen) > 0,
155                         trans, ptr_gen_newer_than_bucket_gen,
156                         "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
157                         "while marking %s",
158                         p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
159                         bch2_data_type_str(ptr_data_type(k.k, &p.ptr)),
160                         p.ptr.gen, g->gen,
161                         (printbuf_reset(&buf),
162                          bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
163                 if (!p.ptr.cached &&
164                     (g->data_type != BCH_DATA_btree ||
165                      data_type == BCH_DATA_btree)) {
166                         g->gen_valid            = true;
167                         g->gen                  = p.ptr.gen;
168                         g->data_type            = 0;
169                         g->stripe_sectors       = 0;
170                         g->dirty_sectors        = 0;
171                         g->cached_sectors       = 0;
172                 } else {
173                         *do_update = true;
174                 }
175         }
176
177         if (fsck_err_on(gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX,
178                         trans, ptr_gen_newer_than_bucket_gen,
179                         "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
180                         "while marking %s",
181                         p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen,
182                         bch2_data_type_str(ptr_data_type(k.k, &p.ptr)),
183                         p.ptr.gen,
184                         (printbuf_reset(&buf),
185                          bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
186                 *do_update = true;
187
188         if (fsck_err_on(!p.ptr.cached && gen_cmp(p.ptr.gen, g->gen) < 0,
189                         trans, stale_dirty_ptr,
190                         "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n"
191                         "while marking %s",
192                         p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
193                         bch2_data_type_str(ptr_data_type(k.k, &p.ptr)),
194                         p.ptr.gen, g->gen,
195                         (printbuf_reset(&buf),
196                          bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
197                 *do_update = true;
198
199         if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen)
200                 goto out;
201
202         if (fsck_err_on(bucket_data_type_mismatch(g->data_type, data_type),
203                         trans, ptr_bucket_data_type_mismatch,
204                         "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
205                         "while marking %s",
206                         p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen,
207                         bch2_data_type_str(g->data_type),
208                         bch2_data_type_str(data_type),
209                         (printbuf_reset(&buf),
210                          bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
211                 if (data_type == BCH_DATA_btree) {
212                         g->gen_valid            = true;
213                         g->gen                  = p.ptr.gen;
214                         g->data_type            = data_type;
215                         g->stripe_sectors       = 0;
216                         g->dirty_sectors        = 0;
217                         g->cached_sectors       = 0;
218                 } else {
219                         *do_update = true;
220                 }
221         }
222
223         if (p.has_ec) {
224                 struct gc_stripe *m = genradix_ptr(&c->gc_stripes, p.ec.idx);
225
226                 if (fsck_err_on(!m || !m->alive,
227                                 trans, ptr_to_missing_stripe,
228                                 "pointer to nonexistent stripe %llu\n"
229                                 "while marking %s",
230                                 (u64) p.ec.idx,
231                                 (printbuf_reset(&buf),
232                                  bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
233                         *do_update = true;
234
235                 if (fsck_err_on(m && m->alive && !bch2_ptr_matches_stripe_m(m, p),
236                                 trans, ptr_to_incorrect_stripe,
237                                 "pointer does not match stripe %llu\n"
238                                 "while marking %s",
239                                 (u64) p.ec.idx,
240                                 (printbuf_reset(&buf),
241                                  bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
242                         *do_update = true;
243         }
244 out:
245 fsck_err:
246         bch2_dev_put(ca);
247         printbuf_exit(&buf);
248         return ret;
249 }
250
251 int bch2_check_fix_ptrs(struct btree_trans *trans,
252                         enum btree_id btree, unsigned level, struct bkey_s_c k,
253                         enum btree_iter_update_trigger_flags flags)
254 {
255         struct bch_fs *c = trans->c;
256         struct bkey_ptrs_c ptrs_c = bch2_bkey_ptrs_c(k);
257         const union bch_extent_entry *entry_c;
258         struct extent_ptr_decoded p = { 0 };
259         bool do_update = false;
260         struct printbuf buf = PRINTBUF;
261         int ret = 0;
262
263         percpu_down_read(&c->mark_lock);
264
265         bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) {
266                 ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update);
267                 if (ret)
268                         goto err;
269         }
270
271         if (do_update) {
272                 if (flags & BTREE_TRIGGER_is_root) {
273                         bch_err(c, "cannot update btree roots yet");
274                         ret = -EINVAL;
275                         goto err;
276                 }
277
278                 struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
279                 ret = PTR_ERR_OR_ZERO(new);
280                 if (ret)
281                         goto err;
282
283                 rcu_read_lock();
284                 bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, !bch2_dev_exists(c, ptr->dev));
285                 rcu_read_unlock();
286
287                 if (level) {
288                         /*
289                          * We don't want to drop btree node pointers - if the
290                          * btree node isn't there anymore, the read path will
291                          * sort it out:
292                          */
293                         struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
294                         rcu_read_lock();
295                         bkey_for_each_ptr(ptrs, ptr) {
296                                 struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
297                                 struct bucket *g = PTR_GC_BUCKET(ca, ptr);
298
299                                 ptr->gen = g->gen;
300                         }
301                         rcu_read_unlock();
302                 } else {
303                         struct bkey_ptrs ptrs;
304                         union bch_extent_entry *entry;
305
306                         rcu_read_lock();
307 restart_drop_ptrs:
308                         ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
309                         bkey_for_each_ptr_decode(bkey_i_to_s(new).k, ptrs, p, entry) {
310                                 struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev);
311                                 struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
312                                 enum bch_data_type data_type = bch2_bkey_ptr_data_type(bkey_i_to_s_c(new), p, entry);
313
314                                 if ((p.ptr.cached &&
315                                      (!g->gen_valid || gen_cmp(p.ptr.gen, g->gen) > 0)) ||
316                                     (!p.ptr.cached &&
317                                      gen_cmp(p.ptr.gen, g->gen) < 0) ||
318                                     gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX ||
319                                     (g->data_type &&
320                                      g->data_type != data_type)) {
321                                         bch2_bkey_drop_ptr(bkey_i_to_s(new), &entry->ptr);
322                                         goto restart_drop_ptrs;
323                                 }
324                         }
325                         rcu_read_unlock();
326 again:
327                         ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
328                         bkey_extent_entry_for_each(ptrs, entry) {
329                                 if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) {
330                                         struct gc_stripe *m = genradix_ptr(&c->gc_stripes,
331                                                                         entry->stripe_ptr.idx);
332                                         union bch_extent_entry *next_ptr;
333
334                                         bkey_extent_entry_for_each_from(ptrs, next_ptr, entry)
335                                                 if (extent_entry_type(next_ptr) == BCH_EXTENT_ENTRY_ptr)
336                                                         goto found;
337                                         next_ptr = NULL;
338 found:
339                                         if (!next_ptr) {
340                                                 bch_err(c, "aieee, found stripe ptr with no data ptr");
341                                                 continue;
342                                         }
343
344                                         if (!m || !m->alive ||
345                                             !__bch2_ptr_matches_stripe(&m->ptrs[entry->stripe_ptr.block],
346                                                                        &next_ptr->ptr,
347                                                                        m->sectors)) {
348                                                 bch2_bkey_extent_entry_drop(new, entry);
349                                                 goto again;
350                                         }
351                                 }
352                         }
353                 }
354
355                 if (0) {
356                         printbuf_reset(&buf);
357                         bch2_bkey_val_to_text(&buf, c, k);
358                         bch_info(c, "updated %s", buf.buf);
359
360                         printbuf_reset(&buf);
361                         bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(new));
362                         bch_info(c, "new key %s", buf.buf);
363                 }
364
365                 percpu_up_read(&c->mark_lock);
366                 struct btree_iter iter;
367                 bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level,
368                                           BTREE_ITER_intent|BTREE_ITER_all_snapshots);
369                 ret =   bch2_btree_iter_traverse(&iter) ?:
370                         bch2_trans_update(trans, &iter, new,
371                                           BTREE_UPDATE_internal_snapshot_node|
372                                           BTREE_TRIGGER_norun);
373                 bch2_trans_iter_exit(trans, &iter);
374                 percpu_down_read(&c->mark_lock);
375
376                 if (ret)
377                         goto err;
378
379                 if (level)
380                         bch2_btree_node_update_key_early(trans, btree, level - 1, k, new);
381         }
382 err:
383         percpu_up_read(&c->mark_lock);
384         printbuf_exit(&buf);
385         return ret;
386 }
387
388 int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
389                            struct bkey_s_c k,
390                            const struct bch_extent_ptr *ptr,
391                            s64 sectors, enum bch_data_type ptr_data_type,
392                            u8 b_gen, u8 bucket_data_type,
393                            u32 *bucket_sectors)
394 {
395         struct bch_fs *c = trans->c;
396         size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
397         struct printbuf buf = PRINTBUF;
398         bool inserting = sectors > 0;
399         int ret = 0;
400
401         BUG_ON(!sectors);
402
403         if (gen_after(ptr->gen, b_gen)) {
404                 bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
405                               ptr_gen_newer_than_bucket_gen,
406                         "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
407                         "while marking %s",
408                         ptr->dev, bucket_nr, b_gen,
409                         bch2_data_type_str(bucket_data_type ?: ptr_data_type),
410                         ptr->gen,
411                         (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
412                 if (inserting)
413                         goto err;
414                 goto out;
415         }
416
417         if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) {
418                 bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
419                               ptr_too_stale,
420                         "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
421                         "while marking %s",
422                         ptr->dev, bucket_nr, b_gen,
423                         bch2_data_type_str(bucket_data_type ?: ptr_data_type),
424                         ptr->gen,
425                         (printbuf_reset(&buf),
426                          bch2_bkey_val_to_text(&buf, c, k), buf.buf));
427                 if (inserting)
428                         goto err;
429                 goto out;
430         }
431
432         if (b_gen != ptr->gen && ptr->cached) {
433                 ret = 1;
434                 goto out;
435         }
436
437         if (b_gen != ptr->gen) {
438                 bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
439                               stale_dirty_ptr,
440                         "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n"
441                         "while marking %s",
442                         ptr->dev, bucket_nr, b_gen,
443                         bucket_gen_get(ca, bucket_nr),
444                         bch2_data_type_str(bucket_data_type ?: ptr_data_type),
445                         ptr->gen,
446                         (printbuf_reset(&buf),
447                          bch2_bkey_val_to_text(&buf, c, k), buf.buf));
448                 if (inserting)
449                         goto err;
450                 goto out;
451         }
452
453         if (bucket_data_type_mismatch(bucket_data_type, ptr_data_type)) {
454                 bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
455                               ptr_bucket_data_type_mismatch,
456                         "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
457                         "while marking %s",
458                         ptr->dev, bucket_nr, b_gen,
459                         bch2_data_type_str(bucket_data_type),
460                         bch2_data_type_str(ptr_data_type),
461                         (printbuf_reset(&buf),
462                          bch2_bkey_val_to_text(&buf, c, k), buf.buf));
463                 if (inserting)
464                         goto err;
465                 goto out;
466         }
467
468         if ((u64) *bucket_sectors + sectors > U32_MAX) {
469                 bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
470                               bucket_sector_count_overflow,
471                         "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n"
472                         "while marking %s",
473                         ptr->dev, bucket_nr, b_gen,
474                         bch2_data_type_str(bucket_data_type ?: ptr_data_type),
475                         *bucket_sectors, sectors,
476                         (printbuf_reset(&buf),
477                          bch2_bkey_val_to_text(&buf, c, k), buf.buf));
478                 if (inserting)
479                         goto err;
480                 sectors = -*bucket_sectors;
481         }
482
483         *bucket_sectors += sectors;
484 out:
485         printbuf_exit(&buf);
486         return ret;
487 err:
488         bch2_dump_trans_updates(trans);
489         ret = -BCH_ERR_bucket_ref_update;
490         goto out;
491 }
492
493 void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
494 {
495         struct bch_fs *c = trans->c;
496         u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
497         static int warned_disk_usage = 0;
498         bool warn = false;
499
500         percpu_down_read(&c->mark_lock);
501         struct bch_fs_usage_base *src = &trans->fs_usage_delta;
502
503         s64 added = src->btree + src->data + src->reserved;
504
505         /*
506          * Not allowed to reduce sectors_available except by getting a
507          * reservation:
508          */
509         s64 should_not_have_added = added - (s64) disk_res_sectors;
510         if (unlikely(should_not_have_added > 0)) {
511                 u64 old, new;
512
513                 old = atomic64_read(&c->sectors_available);
514                 do {
515                         new = max_t(s64, 0, old - should_not_have_added);
516                 } while (!atomic64_try_cmpxchg(&c->sectors_available,
517                                                &old, new));
518
519                 added -= should_not_have_added;
520                 warn = true;
521         }
522
523         if (added > 0) {
524                 trans->disk_res->sectors -= added;
525                 this_cpu_sub(*c->online_reserved, added);
526         }
527
528         preempt_disable();
529         struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage);
530         acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64));
531         preempt_enable();
532         percpu_up_read(&c->mark_lock);
533
534         if (unlikely(warn) && !xchg(&warned_disk_usage, 1))
535                 bch2_trans_inconsistent(trans,
536                                         "disk usage increased %lli more than %llu sectors reserved)",
537                                         should_not_have_added, disk_res_sectors);
538 }
539
540 /* KEY_TYPE_extent: */
541
542 static int __mark_pointer(struct btree_trans *trans, struct bch_dev *ca,
543                           struct bkey_s_c k,
544                           const struct extent_ptr_decoded *p,
545                           s64 sectors, enum bch_data_type ptr_data_type,
546                           struct bch_alloc_v4 *a)
547 {
548         u32 *dst_sectors = p->has_ec    ? &a->stripe_sectors :
549                 !p->ptr.cached          ? &a->dirty_sectors :
550                                           &a->cached_sectors;
551         int ret = bch2_bucket_ref_update(trans, ca, k, &p->ptr, sectors, ptr_data_type,
552                                          a->gen, a->data_type, dst_sectors);
553
554         if (ret)
555                 return ret;
556
557         alloc_data_type_set(a, ptr_data_type);
558         return 0;
559 }
560
561 static int bch2_trigger_pointer(struct btree_trans *trans,
562                         enum btree_id btree_id, unsigned level,
563                         struct bkey_s_c k, struct extent_ptr_decoded p,
564                         const union bch_extent_entry *entry,
565                         s64 *sectors,
566                         enum btree_iter_update_trigger_flags flags)
567 {
568         struct bch_fs *c = trans->c;
569         bool insert = !(flags & BTREE_TRIGGER_overwrite);
570         struct printbuf buf = PRINTBUF;
571         int ret = 0;
572
573         u64 abs_sectors = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p);
574         *sectors = insert ? abs_sectors : -abs_sectors;
575
576         struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev);
577         if (unlikely(!ca)) {
578                 if (insert && p.ptr.dev != BCH_SB_MEMBER_INVALID)
579                         ret = -BCH_ERR_trigger_pointer;
580                 goto err;
581         }
582
583         struct bpos bucket;
584         struct bch_backpointer bp;
585         __bch2_extent_ptr_to_bp(trans->c, ca, btree_id, level, k, p, entry, &bucket, &bp, abs_sectors);
586
587         if (flags & BTREE_TRIGGER_transactional) {
588                 struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0);
589                 ret = PTR_ERR_OR_ZERO(a) ?:
590                         __mark_pointer(trans, ca, k, &p, *sectors, bp.data_type, &a->v);
591                 if (ret)
592                         goto err;
593
594                 if (!p.ptr.cached) {
595                         ret = bch2_bucket_backpointer_mod(trans, ca, bucket, bp, k, insert);
596                         if (ret)
597                                 goto err;
598                 }
599         }
600
601         if (flags & BTREE_TRIGGER_gc) {
602                 percpu_down_read(&c->mark_lock);
603                 struct bucket *g = gc_bucket(ca, bucket.offset);
604                 if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n  %s",
605                                             p.ptr.dev,
606                                             (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
607                         ret = -BCH_ERR_trigger_pointer;
608                         goto err_unlock;
609                 }
610
611                 bucket_lock(g);
612                 struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old;
613                 ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.data_type, &new);
614                 alloc_to_bucket(g, new);
615                 bucket_unlock(g);
616 err_unlock:
617                 percpu_up_read(&c->mark_lock);
618
619                 if (!ret)
620                         ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
621         }
622 err:
623         bch2_dev_put(ca);
624         printbuf_exit(&buf);
625         return ret;
626 }
627
628 static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
629                                 struct bkey_s_c k,
630                                 struct extent_ptr_decoded p,
631                                 enum bch_data_type data_type,
632                                 s64 sectors,
633                                 enum btree_iter_update_trigger_flags flags)
634 {
635         if (flags & BTREE_TRIGGER_transactional) {
636                 struct btree_iter iter;
637                 struct bkey_i_stripe *s = bch2_bkey_get_mut_typed(trans, &iter,
638                                 BTREE_ID_stripes, POS(0, p.ec.idx),
639                                 BTREE_ITER_with_updates, stripe);
640                 int ret = PTR_ERR_OR_ZERO(s);
641                 if (unlikely(ret)) {
642                         bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans,
643                                 "pointer to nonexistent stripe %llu",
644                                 (u64) p.ec.idx);
645                         goto err;
646                 }
647
648                 if (!bch2_ptr_matches_stripe(&s->v, p)) {
649                         bch2_trans_inconsistent(trans,
650                                 "stripe pointer doesn't match stripe %llu",
651                                 (u64) p.ec.idx);
652                         ret = -BCH_ERR_trigger_stripe_pointer;
653                         goto err;
654                 }
655
656                 stripe_blockcount_set(&s->v, p.ec.block,
657                         stripe_blockcount_get(&s->v, p.ec.block) +
658                         sectors);
659
660                 struct disk_accounting_pos acc = {
661                         .type = BCH_DISK_ACCOUNTING_replicas,
662                 };
663                 bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i));
664                 acc.replicas.data_type = data_type;
665                 ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, false);
666 err:
667                 bch2_trans_iter_exit(trans, &iter);
668                 return ret;
669         }
670
671         if (flags & BTREE_TRIGGER_gc) {
672                 struct bch_fs *c = trans->c;
673
674                 struct gc_stripe *m = genradix_ptr_alloc(&c->gc_stripes, p.ec.idx, GFP_KERNEL);
675                 if (!m) {
676                         bch_err(c, "error allocating memory for gc_stripes, idx %llu",
677                                 (u64) p.ec.idx);
678                         return -BCH_ERR_ENOMEM_mark_stripe_ptr;
679                 }
680
681                 mutex_lock(&c->ec_stripes_heap_lock);
682
683                 if (!m || !m->alive) {
684                         mutex_unlock(&c->ec_stripes_heap_lock);
685                         struct printbuf buf = PRINTBUF;
686                         bch2_bkey_val_to_text(&buf, c, k);
687                         bch_err_ratelimited(c, "pointer to nonexistent stripe %llu\n  while marking %s",
688                                             (u64) p.ec.idx, buf.buf);
689                         printbuf_exit(&buf);
690                         bch2_inconsistent_error(c);
691                         return -BCH_ERR_trigger_stripe_pointer;
692                 }
693
694                 m->block_sectors[p.ec.block] += sectors;
695
696                 struct disk_accounting_pos acc = {
697                         .type = BCH_DISK_ACCOUNTING_replicas,
698                 };
699                 memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e));
700                 mutex_unlock(&c->ec_stripes_heap_lock);
701
702                 acc.replicas.data_type = data_type;
703                 int ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, true);
704                 if (ret)
705                         return ret;
706         }
707
708         return 0;
709 }
710
711 static int __trigger_extent(struct btree_trans *trans,
712                             enum btree_id btree_id, unsigned level,
713                             struct bkey_s_c k,
714                             enum btree_iter_update_trigger_flags flags,
715                             s64 *replicas_sectors)
716 {
717         bool gc = flags & BTREE_TRIGGER_gc;
718         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
719         const union bch_extent_entry *entry;
720         struct extent_ptr_decoded p;
721         enum bch_data_type data_type = bkey_is_btree_ptr(k.k)
722                 ? BCH_DATA_btree
723                 : BCH_DATA_user;
724         int ret = 0;
725
726         struct disk_accounting_pos acc_replicas_key = {
727                 .type                   = BCH_DISK_ACCOUNTING_replicas,
728                 .replicas.data_type     = data_type,
729                 .replicas.nr_devs       = 0,
730                 .replicas.nr_required   = 1,
731         };
732
733         struct disk_accounting_pos acct_compression_key = {
734                 .type                   = BCH_DISK_ACCOUNTING_compression,
735         };
736         u64 compression_acct[3] = { 1, 0, 0 };
737
738         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
739                 s64 disk_sectors = 0;
740                 ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, &disk_sectors, flags);
741                 if (ret < 0)
742                         return ret;
743
744                 bool stale = ret > 0;
745
746                 if (p.ptr.cached && stale)
747                         continue;
748
749                 if (p.ptr.cached) {
750                         ret = bch2_mod_dev_cached_sectors(trans, p.ptr.dev, disk_sectors, gc);
751                         if (ret)
752                                 return ret;
753                 } else if (!p.has_ec) {
754                         *replicas_sectors       += disk_sectors;
755                         replicas_entry_add_dev(&acc_replicas_key.replicas, p.ptr.dev);
756                 } else {
757                         ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags);
758                         if (ret)
759                                 return ret;
760
761                         /*
762                          * There may be other dirty pointers in this extent, but
763                          * if so they're not required for mounting if we have an
764                          * erasure coded pointer in this extent:
765                          */
766                         acc_replicas_key.replicas.nr_required = 0;
767                 }
768
769                 if (acct_compression_key.compression.type &&
770                     acct_compression_key.compression.type != p.crc.compression_type) {
771                         if (flags & BTREE_TRIGGER_overwrite)
772                                 bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct));
773
774                         ret = bch2_disk_accounting_mod(trans, &acct_compression_key, compression_acct,
775                                                        ARRAY_SIZE(compression_acct), gc);
776                         if (ret)
777                                 return ret;
778
779                         compression_acct[0] = 1;
780                         compression_acct[1] = 0;
781                         compression_acct[2] = 0;
782                 }
783
784                 acct_compression_key.compression.type = p.crc.compression_type;
785                 if (p.crc.compression_type) {
786                         compression_acct[1] += p.crc.uncompressed_size;
787                         compression_acct[2] += p.crc.compressed_size;
788                 }
789         }
790
791         if (acc_replicas_key.replicas.nr_devs) {
792                 ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, replicas_sectors, 1, gc);
793                 if (ret)
794                         return ret;
795         }
796
797         if (acc_replicas_key.replicas.nr_devs && !level && k.k->p.snapshot) {
798                 struct disk_accounting_pos acc_snapshot_key = {
799                         .type                   = BCH_DISK_ACCOUNTING_snapshot,
800                         .snapshot.id            = k.k->p.snapshot,
801                 };
802                 ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, replicas_sectors, 1, gc);
803                 if (ret)
804                         return ret;
805         }
806
807         if (acct_compression_key.compression.type) {
808                 if (flags & BTREE_TRIGGER_overwrite)
809                         bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct));
810
811                 ret = bch2_disk_accounting_mod(trans, &acct_compression_key, compression_acct,
812                                                ARRAY_SIZE(compression_acct), gc);
813                 if (ret)
814                         return ret;
815         }
816
817         if (level) {
818                 struct disk_accounting_pos acc_btree_key = {
819                         .type           = BCH_DISK_ACCOUNTING_btree,
820                         .btree.id       = btree_id,
821                 };
822                 ret = bch2_disk_accounting_mod(trans, &acc_btree_key, replicas_sectors, 1, gc);
823                 if (ret)
824                         return ret;
825         } else {
826                 bool insert = !(flags & BTREE_TRIGGER_overwrite);
827                 struct disk_accounting_pos acc_inum_key = {
828                         .type           = BCH_DISK_ACCOUNTING_inum,
829                         .inum.inum      = k.k->p.inode,
830                 };
831                 s64 v[3] = {
832                         insert ? 1 : -1,
833                         insert ? k.k->size : -((s64) k.k->size),
834                         *replicas_sectors,
835                 };
836                 ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc);
837                 if (ret)
838                         return ret;
839         }
840
841         return 0;
842 }
843
844 int bch2_trigger_extent(struct btree_trans *trans,
845                         enum btree_id btree, unsigned level,
846                         struct bkey_s_c old, struct bkey_s new,
847                         enum btree_iter_update_trigger_flags flags)
848 {
849         struct bch_fs *c = trans->c;
850         struct bkey_ptrs_c new_ptrs = bch2_bkey_ptrs_c(new.s_c);
851         struct bkey_ptrs_c old_ptrs = bch2_bkey_ptrs_c(old);
852         unsigned new_ptrs_bytes = (void *) new_ptrs.end - (void *) new_ptrs.start;
853         unsigned old_ptrs_bytes = (void *) old_ptrs.end - (void *) old_ptrs.start;
854
855         if (unlikely(flags & BTREE_TRIGGER_check_repair))
856                 return bch2_check_fix_ptrs(trans, btree, level, new.s_c, flags);
857
858         /* if pointers aren't changing - nothing to do: */
859         if (new_ptrs_bytes == old_ptrs_bytes &&
860             !memcmp(new_ptrs.start,
861                     old_ptrs.start,
862                     new_ptrs_bytes))
863                 return 0;
864
865         if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
866                 s64 old_replicas_sectors = 0, new_replicas_sectors = 0;
867
868                 if (old.k->type) {
869                         int ret = __trigger_extent(trans, btree, level, old,
870                                                    flags & ~BTREE_TRIGGER_insert,
871                                                    &old_replicas_sectors);
872                         if (ret)
873                                 return ret;
874                 }
875
876                 if (new.k->type) {
877                         int ret = __trigger_extent(trans, btree, level, new.s_c,
878                                                    flags & ~BTREE_TRIGGER_overwrite,
879                                                    &new_replicas_sectors);
880                         if (ret)
881                                 return ret;
882                 }
883
884                 int need_rebalance_delta = 0;
885                 s64 need_rebalance_sectors_delta = 0;
886
887                 s64 s = bch2_bkey_sectors_need_rebalance(c, old);
888                 need_rebalance_delta -= s != 0;
889                 need_rebalance_sectors_delta -= s;
890
891                 s = bch2_bkey_sectors_need_rebalance(c, new.s_c);
892                 need_rebalance_delta += s != 0;
893                 need_rebalance_sectors_delta += s;
894
895                 if ((flags & BTREE_TRIGGER_transactional) && need_rebalance_delta) {
896                         int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
897                                                           new.k->p, need_rebalance_delta > 0);
898                         if (ret)
899                                 return ret;
900                 }
901
902                 if (need_rebalance_sectors_delta) {
903                         struct disk_accounting_pos acc = {
904                                 .type           = BCH_DISK_ACCOUNTING_rebalance_work,
905                         };
906                         int ret = bch2_disk_accounting_mod(trans, &acc, &need_rebalance_sectors_delta, 1,
907                                                            flags & BTREE_TRIGGER_gc);
908                         if (ret)
909                                 return ret;
910                 }
911         }
912
913         return 0;
914 }
915
916 /* KEY_TYPE_reservation */
917
918 static int __trigger_reservation(struct btree_trans *trans,
919                         enum btree_id btree_id, unsigned level, struct bkey_s_c k,
920                         enum btree_iter_update_trigger_flags flags)
921 {
922         if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
923                 s64 sectors = k.k->size;
924
925                 if (flags & BTREE_TRIGGER_overwrite)
926                         sectors = -sectors;
927
928                 struct disk_accounting_pos acc = {
929                         .type = BCH_DISK_ACCOUNTING_persistent_reserved,
930                         .persistent_reserved.nr_replicas = bkey_s_c_to_reservation(k).v->nr_replicas,
931                 };
932
933                 return bch2_disk_accounting_mod(trans, &acc, &sectors, 1, flags & BTREE_TRIGGER_gc);
934         }
935
936         return 0;
937 }
938
939 int bch2_trigger_reservation(struct btree_trans *trans,
940                           enum btree_id btree_id, unsigned level,
941                           struct bkey_s_c old, struct bkey_s new,
942                           enum btree_iter_update_trigger_flags flags)
943 {
944         return trigger_run_overwrite_then_insert(__trigger_reservation, trans, btree_id, level, old, new, flags);
945 }
946
947 /* Mark superblocks: */
948
949 static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
950                                     struct bch_dev *ca, u64 b,
951                                     enum bch_data_type type,
952                                     unsigned sectors)
953 {
954         struct btree_iter iter;
955         int ret = 0;
956
957         struct bkey_i_alloc_v4 *a =
958                 bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(ca->dev_idx, b));
959         if (IS_ERR(a))
960                 return PTR_ERR(a);
961
962         if (a->v.data_type && type && a->v.data_type != type) {
963                 bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
964                               bucket_metadata_type_mismatch,
965                         "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
966                         "while marking %s",
967                         iter.pos.inode, iter.pos.offset, a->v.gen,
968                         bch2_data_type_str(a->v.data_type),
969                         bch2_data_type_str(type),
970                         bch2_data_type_str(type));
971                 ret = -BCH_ERR_metadata_bucket_inconsistency;
972                 goto err;
973         }
974
975         if (a->v.data_type      != type ||
976             a->v.dirty_sectors  != sectors) {
977                 a->v.data_type          = type;
978                 a->v.dirty_sectors      = sectors;
979                 ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
980         }
981 err:
982         bch2_trans_iter_exit(trans, &iter);
983         return ret;
984 }
985
986 static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *ca,
987                         u64 b, enum bch_data_type data_type, unsigned sectors,
988                         enum btree_iter_update_trigger_flags flags)
989 {
990         struct bch_fs *c = trans->c;
991         int ret = 0;
992
993         percpu_down_read(&c->mark_lock);
994         struct bucket *g = gc_bucket(ca, b);
995         if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s",
996                                     ca->dev_idx, bch2_data_type_str(data_type)))
997                 goto err_unlock;
998
999         bucket_lock(g);
1000         struct bch_alloc_v4 old = bucket_m_to_alloc(*g);
1001
1002         if (bch2_fs_inconsistent_on(g->data_type &&
1003                         g->data_type != data_type, c,
1004                         "different types of data in same bucket: %s, %s",
1005                         bch2_data_type_str(g->data_type),
1006                         bch2_data_type_str(data_type)))
1007                 goto err;
1008
1009         if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c,
1010                         "bucket %u:%llu gen %u data type %s sector count overflow: %u + %u > bucket size",
1011                         ca->dev_idx, b, g->gen,
1012                         bch2_data_type_str(g->data_type ?: data_type),
1013                         g->dirty_sectors, sectors))
1014                 goto err;
1015
1016         g->data_type = data_type;
1017         g->dirty_sectors += sectors;
1018         struct bch_alloc_v4 new = bucket_m_to_alloc(*g);
1019         bucket_unlock(g);
1020         percpu_up_read(&c->mark_lock);
1021         ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
1022         return ret;
1023 err:
1024         bucket_unlock(g);
1025 err_unlock:
1026         percpu_up_read(&c->mark_lock);
1027         return -BCH_ERR_metadata_bucket_inconsistency;
1028 }
1029
1030 int bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
1031                         struct bch_dev *ca, u64 b,
1032                         enum bch_data_type type, unsigned sectors,
1033                         enum btree_iter_update_trigger_flags flags)
1034 {
1035         BUG_ON(type != BCH_DATA_free &&
1036                type != BCH_DATA_sb &&
1037                type != BCH_DATA_journal);
1038
1039         /*
1040          * Backup superblock might be past the end of our normal usable space:
1041          */
1042         if (b >= ca->mi.nbuckets)
1043                 return 0;
1044
1045         if (flags & BTREE_TRIGGER_gc)
1046                 return bch2_mark_metadata_bucket(trans, ca, b, type, sectors, flags);
1047         else if (flags & BTREE_TRIGGER_transactional)
1048                 return commit_do(trans, NULL, NULL, 0,
1049                                  __bch2_trans_mark_metadata_bucket(trans, ca, b, type, sectors));
1050         else
1051                 BUG();
1052 }
1053
1054 static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans,
1055                         struct bch_dev *ca, u64 start, u64 end,
1056                         enum bch_data_type type, u64 *bucket, unsigned *bucket_sectors,
1057                         enum btree_iter_update_trigger_flags flags)
1058 {
1059         do {
1060                 u64 b = sector_to_bucket(ca, start);
1061                 unsigned sectors =
1062                         min_t(u64, bucket_to_sector(ca, b + 1), end) - start;
1063
1064                 if (b != *bucket && *bucket_sectors) {
1065                         int ret = bch2_trans_mark_metadata_bucket(trans, ca, *bucket,
1066                                                         type, *bucket_sectors, flags);
1067                         if (ret)
1068                                 return ret;
1069
1070                         *bucket_sectors = 0;
1071                 }
1072
1073                 *bucket         = b;
1074                 *bucket_sectors += sectors;
1075                 start += sectors;
1076         } while (start < end);
1077
1078         return 0;
1079 }
1080
1081 static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *ca,
1082                         enum btree_iter_update_trigger_flags flags)
1083 {
1084         struct bch_fs *c = trans->c;
1085
1086         mutex_lock(&c->sb_lock);
1087         struct bch_sb_layout layout = ca->disk_sb.sb->layout;
1088         mutex_unlock(&c->sb_lock);
1089
1090         u64 bucket = 0;
1091         unsigned i, bucket_sectors = 0;
1092         int ret;
1093
1094         for (i = 0; i < layout.nr_superblocks; i++) {
1095                 u64 offset = le64_to_cpu(layout.sb_offset[i]);
1096
1097                 if (offset == BCH_SB_SECTOR) {
1098                         ret = bch2_trans_mark_metadata_sectors(trans, ca,
1099                                                 0, BCH_SB_SECTOR,
1100                                                 BCH_DATA_sb, &bucket, &bucket_sectors, flags);
1101                         if (ret)
1102                                 return ret;
1103                 }
1104
1105                 ret = bch2_trans_mark_metadata_sectors(trans, ca, offset,
1106                                       offset + (1 << layout.sb_max_size_bits),
1107                                       BCH_DATA_sb, &bucket, &bucket_sectors, flags);
1108                 if (ret)
1109                         return ret;
1110         }
1111
1112         if (bucket_sectors) {
1113                 ret = bch2_trans_mark_metadata_bucket(trans, ca,
1114                                 bucket, BCH_DATA_sb, bucket_sectors, flags);
1115                 if (ret)
1116                         return ret;
1117         }
1118
1119         for (i = 0; i < ca->journal.nr; i++) {
1120                 ret = bch2_trans_mark_metadata_bucket(trans, ca,
1121                                 ca->journal.buckets[i],
1122                                 BCH_DATA_journal, ca->mi.bucket_size, flags);
1123                 if (ret)
1124                         return ret;
1125         }
1126
1127         return 0;
1128 }
1129
1130 int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca,
1131                         enum btree_iter_update_trigger_flags flags)
1132 {
1133         int ret = bch2_trans_run(c,
1134                 __bch2_trans_mark_dev_sb(trans, ca, flags));
1135         bch_err_fn(c, ret);
1136         return ret;
1137 }
1138
1139 int bch2_trans_mark_dev_sbs_flags(struct bch_fs *c,
1140                         enum btree_iter_update_trigger_flags flags)
1141 {
1142         for_each_online_member(c, ca) {
1143                 int ret = bch2_trans_mark_dev_sb(c, ca, flags);
1144                 if (ret) {
1145                         percpu_ref_put(&ca->io_ref);
1146                         return ret;
1147                 }
1148         }
1149
1150         return 0;
1151 }
1152
1153 int bch2_trans_mark_dev_sbs(struct bch_fs *c)
1154 {
1155         return bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_transactional);
1156 }
1157
1158 /* Disk reservations: */
1159
1160 #define SECTORS_CACHE   1024
1161
1162 int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
1163                                 u64 sectors, enum bch_reservation_flags flags)
1164 {
1165         struct bch_fs_pcpu *pcpu;
1166         u64 old, get;
1167         u64 sectors_available;
1168         int ret;
1169
1170         percpu_down_read(&c->mark_lock);
1171         preempt_disable();
1172         pcpu = this_cpu_ptr(c->pcpu);
1173
1174         if (sectors <= pcpu->sectors_available)
1175                 goto out;
1176
1177         old = atomic64_read(&c->sectors_available);
1178         do {
1179                 get = min((u64) sectors + SECTORS_CACHE, old);
1180
1181                 if (get < sectors) {
1182                         preempt_enable();
1183                         goto recalculate;
1184                 }
1185         } while (!atomic64_try_cmpxchg(&c->sectors_available,
1186                                        &old, old - get));
1187
1188         pcpu->sectors_available         += get;
1189
1190 out:
1191         pcpu->sectors_available         -= sectors;
1192         this_cpu_add(*c->online_reserved, sectors);
1193         res->sectors                    += sectors;
1194
1195         preempt_enable();
1196         percpu_up_read(&c->mark_lock);
1197         return 0;
1198
1199 recalculate:
1200         mutex_lock(&c->sectors_available_lock);
1201
1202         percpu_u64_set(&c->pcpu->sectors_available, 0);
1203         sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free);
1204
1205         if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL))
1206                 sectors = min(sectors, sectors_available);
1207
1208         if (sectors <= sectors_available ||
1209             (flags & BCH_DISK_RESERVATION_NOFAIL)) {
1210                 atomic64_set(&c->sectors_available,
1211                              max_t(s64, 0, sectors_available - sectors));
1212                 this_cpu_add(*c->online_reserved, sectors);
1213                 res->sectors                    += sectors;
1214                 ret = 0;
1215         } else {
1216                 atomic64_set(&c->sectors_available, sectors_available);
1217                 ret = -BCH_ERR_ENOSPC_disk_reservation;
1218         }
1219
1220         mutex_unlock(&c->sectors_available_lock);
1221         percpu_up_read(&c->mark_lock);
1222
1223         return ret;
1224 }
1225
1226 /* Startup/shutdown: */
1227
1228 void bch2_buckets_nouse_free(struct bch_fs *c)
1229 {
1230         for_each_member_device(c, ca) {
1231                 kvfree_rcu_mightsleep(ca->buckets_nouse);
1232                 ca->buckets_nouse = NULL;
1233         }
1234 }
1235
1236 int bch2_buckets_nouse_alloc(struct bch_fs *c)
1237 {
1238         for_each_member_device(c, ca) {
1239                 BUG_ON(ca->buckets_nouse);
1240
1241                 ca->buckets_nouse = kvmalloc(BITS_TO_LONGS(ca->mi.nbuckets) *
1242                                             sizeof(unsigned long),
1243                                             GFP_KERNEL|__GFP_ZERO);
1244                 if (!ca->buckets_nouse) {
1245                         bch2_dev_put(ca);
1246                         return -BCH_ERR_ENOMEM_buckets_nouse;
1247                 }
1248         }
1249
1250         return 0;
1251 }
1252
1253 static void bucket_gens_free_rcu(struct rcu_head *rcu)
1254 {
1255         struct bucket_gens *buckets =
1256                 container_of(rcu, struct bucket_gens, rcu);
1257
1258         kvfree(buckets);
1259 }
1260
1261 int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
1262 {
1263         struct bucket_gens *bucket_gens = NULL, *old_bucket_gens = NULL;
1264         bool resize = ca->bucket_gens != NULL;
1265         int ret;
1266
1267         BUG_ON(resize && ca->buckets_nouse);
1268
1269         if (!(bucket_gens       = kvmalloc(sizeof(struct bucket_gens) + nbuckets,
1270                                            GFP_KERNEL|__GFP_ZERO))) {
1271                 ret = -BCH_ERR_ENOMEM_bucket_gens;
1272                 goto err;
1273         }
1274
1275         bucket_gens->first_bucket = ca->mi.first_bucket;
1276         bucket_gens->nbuckets   = nbuckets;
1277         bucket_gens->nbuckets_minus_first =
1278                 bucket_gens->nbuckets - bucket_gens->first_bucket;
1279
1280         if (resize) {
1281                 down_write(&ca->bucket_lock);
1282                 percpu_down_write(&c->mark_lock);
1283         }
1284
1285         old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1);
1286
1287         if (resize) {
1288                 size_t n = min(bucket_gens->nbuckets, old_bucket_gens->nbuckets);
1289
1290                 memcpy(bucket_gens->b,
1291                        old_bucket_gens->b,
1292                        n);
1293         }
1294
1295         rcu_assign_pointer(ca->bucket_gens, bucket_gens);
1296         bucket_gens     = old_bucket_gens;
1297
1298         nbuckets = ca->mi.nbuckets;
1299
1300         if (resize) {
1301                 percpu_up_write(&c->mark_lock);
1302                 up_write(&ca->bucket_lock);
1303         }
1304
1305         ret = 0;
1306 err:
1307         if (bucket_gens)
1308                 call_rcu(&bucket_gens->rcu, bucket_gens_free_rcu);
1309
1310         return ret;
1311 }
1312
1313 void bch2_dev_buckets_free(struct bch_dev *ca)
1314 {
1315         kvfree(ca->buckets_nouse);
1316         kvfree(rcu_dereference_protected(ca->bucket_gens, 1));
1317         free_percpu(ca->usage);
1318 }
1319
1320 int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
1321 {
1322         ca->usage = alloc_percpu(struct bch_dev_usage);
1323         if (!ca->usage)
1324                 return -BCH_ERR_ENOMEM_usage_init;
1325
1326         return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);
1327 }
This page took 0.105349 seconds and 4 git commands to generate.