]> Git Repo - linux.git/blob - fs/btrfs/delayed-ref.c
Merge patch series "riscv: Extension parsing fixes"
[linux.git] / fs / btrfs / delayed-ref.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2009 Oracle.  All rights reserved.
4  */
5
6 #include <linux/sched.h>
7 #include <linux/slab.h>
8 #include <linux/sort.h>
9 #include "messages.h"
10 #include "ctree.h"
11 #include "delayed-ref.h"
12 #include "transaction.h"
13 #include "qgroup.h"
14 #include "space-info.h"
15 #include "tree-mod-log.h"
16 #include "fs.h"
17
18 struct kmem_cache *btrfs_delayed_ref_head_cachep;
19 struct kmem_cache *btrfs_delayed_ref_node_cachep;
20 struct kmem_cache *btrfs_delayed_extent_op_cachep;
21 /*
22  * delayed back reference update tracking.  For subvolume trees
23  * we queue up extent allocations and backref maintenance for
24  * delayed processing.   This avoids deep call chains where we
25  * add extents in the middle of btrfs_search_slot, and it allows
26  * us to buffer up frequently modified backrefs in an rb tree instead
27  * of hammering updates on the extent allocation tree.
28  */
29
30 bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
31 {
32         struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
33         struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
34         bool ret = false;
35         u64 reserved;
36
37         spin_lock(&global_rsv->lock);
38         reserved = global_rsv->reserved;
39         spin_unlock(&global_rsv->lock);
40
41         /*
42          * Since the global reserve is just kind of magic we don't really want
43          * to rely on it to save our bacon, so if our size is more than the
44          * delayed_refs_rsv and the global rsv then it's time to think about
45          * bailing.
46          */
47         spin_lock(&delayed_refs_rsv->lock);
48         reserved += delayed_refs_rsv->reserved;
49         if (delayed_refs_rsv->size >= reserved)
50                 ret = true;
51         spin_unlock(&delayed_refs_rsv->lock);
52         return ret;
53 }
54
55 /*
56  * Release a ref head's reservation.
57  *
58  * @fs_info:  the filesystem
59  * @nr_refs:  number of delayed refs to drop
60  * @nr_csums: number of csum items to drop
61  *
62  * Drops the delayed ref head's count from the delayed refs rsv and free any
63  * excess reservation we had.
64  */
65 void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr_refs, int nr_csums)
66 {
67         struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
68         u64 num_bytes;
69         u64 released;
70
71         num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, nr_refs);
72         num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums);
73
74         released = btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
75         if (released)
76                 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
77                                               0, released, 0);
78 }
79
80 /*
81  * Adjust the size of the delayed refs rsv.
82  *
83  * This is to be called anytime we may have adjusted trans->delayed_ref_updates
84  * or trans->delayed_ref_csum_deletions, it'll calculate the additional size and
85  * add it to the delayed_refs_rsv.
86  */
87 void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
88 {
89         struct btrfs_fs_info *fs_info = trans->fs_info;
90         struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
91         struct btrfs_block_rsv *local_rsv = &trans->delayed_rsv;
92         u64 num_bytes;
93         u64 reserved_bytes;
94
95         num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, trans->delayed_ref_updates);
96         num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info,
97                                                        trans->delayed_ref_csum_deletions);
98
99         if (num_bytes == 0)
100                 return;
101
102         /*
103          * Try to take num_bytes from the transaction's local delayed reserve.
104          * If not possible, try to take as much as it's available. If the local
105          * reserve doesn't have enough reserved space, the delayed refs reserve
106          * will be refilled next time btrfs_delayed_refs_rsv_refill() is called
107          * by someone or if a transaction commit is triggered before that, the
108          * global block reserve will be used. We want to minimize using the
109          * global block reserve for cases we can account for in advance, to
110          * avoid exhausting it and reach -ENOSPC during a transaction commit.
111          */
112         spin_lock(&local_rsv->lock);
113         reserved_bytes = min(num_bytes, local_rsv->reserved);
114         local_rsv->reserved -= reserved_bytes;
115         local_rsv->full = (local_rsv->reserved >= local_rsv->size);
116         spin_unlock(&local_rsv->lock);
117
118         spin_lock(&delayed_rsv->lock);
119         delayed_rsv->size += num_bytes;
120         delayed_rsv->reserved += reserved_bytes;
121         delayed_rsv->full = (delayed_rsv->reserved >= delayed_rsv->size);
122         spin_unlock(&delayed_rsv->lock);
123         trans->delayed_ref_updates = 0;
124         trans->delayed_ref_csum_deletions = 0;
125 }
126
127 /*
128  * Adjust the size of the delayed refs block reserve for 1 block group item
129  * insertion, used after allocating a block group.
130  */
131 void btrfs_inc_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info)
132 {
133         struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
134
135         spin_lock(&delayed_rsv->lock);
136         /*
137          * Inserting a block group item does not require changing the free space
138          * tree, only the extent tree or the block group tree, so this is all we
139          * need.
140          */
141         delayed_rsv->size += btrfs_calc_insert_metadata_size(fs_info, 1);
142         delayed_rsv->full = false;
143         spin_unlock(&delayed_rsv->lock);
144 }
145
146 /*
147  * Adjust the size of the delayed refs block reserve to release space for 1
148  * block group item insertion.
149  */
150 void btrfs_dec_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info)
151 {
152         struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
153         const u64 num_bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
154         u64 released;
155
156         released = btrfs_block_rsv_release(fs_info, delayed_rsv, num_bytes, NULL);
157         if (released > 0)
158                 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
159                                               0, released, 0);
160 }
161
162 /*
163  * Adjust the size of the delayed refs block reserve for 1 block group item
164  * update.
165  */
166 void btrfs_inc_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info)
167 {
168         struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
169
170         spin_lock(&delayed_rsv->lock);
171         /*
172          * Updating a block group item does not result in new nodes/leaves and
173          * does not require changing the free space tree, only the extent tree
174          * or the block group tree, so this is all we need.
175          */
176         delayed_rsv->size += btrfs_calc_metadata_size(fs_info, 1);
177         delayed_rsv->full = false;
178         spin_unlock(&delayed_rsv->lock);
179 }
180
181 /*
182  * Adjust the size of the delayed refs block reserve to release space for 1
183  * block group item update.
184  */
185 void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info)
186 {
187         struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
188         const u64 num_bytes = btrfs_calc_metadata_size(fs_info, 1);
189         u64 released;
190
191         released = btrfs_block_rsv_release(fs_info, delayed_rsv, num_bytes, NULL);
192         if (released > 0)
193                 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
194                                               0, released, 0);
195 }
196
197 /*
198  * Transfer bytes to our delayed refs rsv.
199  *
200  * @fs_info:   the filesystem
201  * @num_bytes: number of bytes to transfer
202  *
203  * This transfers up to the num_bytes amount, previously reserved, to the
204  * delayed_refs_rsv.  Any extra bytes are returned to the space info.
205  */
206 void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
207                                        u64 num_bytes)
208 {
209         struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
210         u64 to_free = 0;
211
212         spin_lock(&delayed_refs_rsv->lock);
213         if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
214                 u64 delta = delayed_refs_rsv->size -
215                         delayed_refs_rsv->reserved;
216                 if (num_bytes > delta) {
217                         to_free = num_bytes - delta;
218                         num_bytes = delta;
219                 }
220         } else {
221                 to_free = num_bytes;
222                 num_bytes = 0;
223         }
224
225         if (num_bytes)
226                 delayed_refs_rsv->reserved += num_bytes;
227         if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
228                 delayed_refs_rsv->full = true;
229         spin_unlock(&delayed_refs_rsv->lock);
230
231         if (num_bytes)
232                 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
233                                               0, num_bytes, 1);
234         if (to_free)
235                 btrfs_space_info_free_bytes_may_use(fs_info,
236                                 delayed_refs_rsv->space_info, to_free);
237 }
238
239 /*
240  * Refill based on our delayed refs usage.
241  *
242  * @fs_info: the filesystem
243  * @flush:   control how we can flush for this reservation.
244  *
245  * This will refill the delayed block_rsv up to 1 items size worth of space and
246  * will return -ENOSPC if we can't make the reservation.
247  */
248 int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
249                                   enum btrfs_reserve_flush_enum flush)
250 {
251         struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
252         struct btrfs_space_info *space_info = block_rsv->space_info;
253         u64 limit = btrfs_calc_delayed_ref_bytes(fs_info, 1);
254         u64 num_bytes = 0;
255         u64 refilled_bytes;
256         u64 to_free;
257         int ret = -ENOSPC;
258
259         spin_lock(&block_rsv->lock);
260         if (block_rsv->reserved < block_rsv->size) {
261                 num_bytes = block_rsv->size - block_rsv->reserved;
262                 num_bytes = min(num_bytes, limit);
263         }
264         spin_unlock(&block_rsv->lock);
265
266         if (!num_bytes)
267                 return 0;
268
269         ret = btrfs_reserve_metadata_bytes(fs_info, space_info, num_bytes, flush);
270         if (ret)
271                 return ret;
272
273         /*
274          * We may have raced with someone else, so check again if we the block
275          * reserve is still not full and release any excess space.
276          */
277         spin_lock(&block_rsv->lock);
278         if (block_rsv->reserved < block_rsv->size) {
279                 u64 needed = block_rsv->size - block_rsv->reserved;
280
281                 if (num_bytes >= needed) {
282                         block_rsv->reserved += needed;
283                         block_rsv->full = true;
284                         to_free = num_bytes - needed;
285                         refilled_bytes = needed;
286                 } else {
287                         block_rsv->reserved += num_bytes;
288                         to_free = 0;
289                         refilled_bytes = num_bytes;
290                 }
291         } else {
292                 to_free = num_bytes;
293                 refilled_bytes = 0;
294         }
295         spin_unlock(&block_rsv->lock);
296
297         if (to_free > 0)
298                 btrfs_space_info_free_bytes_may_use(fs_info, space_info, to_free);
299
300         if (refilled_bytes > 0)
301                 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", 0,
302                                               refilled_bytes, 1);
303         return 0;
304 }
305
306 /*
307  * compare two delayed data backrefs with same bytenr and type
308  */
309 static int comp_data_refs(struct btrfs_delayed_ref_node *ref1,
310                           struct btrfs_delayed_ref_node *ref2)
311 {
312         if (ref1->data_ref.objectid < ref2->data_ref.objectid)
313                 return -1;
314         if (ref1->data_ref.objectid > ref2->data_ref.objectid)
315                 return 1;
316         if (ref1->data_ref.offset < ref2->data_ref.offset)
317                 return -1;
318         if (ref1->data_ref.offset > ref2->data_ref.offset)
319                 return 1;
320         return 0;
321 }
322
323 static int comp_refs(struct btrfs_delayed_ref_node *ref1,
324                      struct btrfs_delayed_ref_node *ref2,
325                      bool check_seq)
326 {
327         int ret = 0;
328
329         if (ref1->type < ref2->type)
330                 return -1;
331         if (ref1->type > ref2->type)
332                 return 1;
333         if (ref1->type == BTRFS_SHARED_BLOCK_REF_KEY ||
334             ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
335                 if (ref1->parent < ref2->parent)
336                         return -1;
337                 if (ref1->parent > ref2->parent)
338                         return 1;
339         } else {
340                 if (ref1->ref_root < ref2->ref_root)
341                         return -1;
342                 if (ref1->ref_root > ref2->ref_root)
343                         return -1;
344                 if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY)
345                         ret = comp_data_refs(ref1, ref2);
346         }
347         if (ret)
348                 return ret;
349         if (check_seq) {
350                 if (ref1->seq < ref2->seq)
351                         return -1;
352                 if (ref1->seq > ref2->seq)
353                         return 1;
354         }
355         return 0;
356 }
357
358 /* insert a new ref to head ref rbtree */
359 static struct btrfs_delayed_ref_head *htree_insert(struct rb_root_cached *root,
360                                                    struct rb_node *node)
361 {
362         struct rb_node **p = &root->rb_root.rb_node;
363         struct rb_node *parent_node = NULL;
364         struct btrfs_delayed_ref_head *entry;
365         struct btrfs_delayed_ref_head *ins;
366         u64 bytenr;
367         bool leftmost = true;
368
369         ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
370         bytenr = ins->bytenr;
371         while (*p) {
372                 parent_node = *p;
373                 entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
374                                  href_node);
375
376                 if (bytenr < entry->bytenr) {
377                         p = &(*p)->rb_left;
378                 } else if (bytenr > entry->bytenr) {
379                         p = &(*p)->rb_right;
380                         leftmost = false;
381                 } else {
382                         return entry;
383                 }
384         }
385
386         rb_link_node(node, parent_node, p);
387         rb_insert_color_cached(node, root, leftmost);
388         return NULL;
389 }
390
391 static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root,
392                 struct btrfs_delayed_ref_node *ins)
393 {
394         struct rb_node **p = &root->rb_root.rb_node;
395         struct rb_node *node = &ins->ref_node;
396         struct rb_node *parent_node = NULL;
397         struct btrfs_delayed_ref_node *entry;
398         bool leftmost = true;
399
400         while (*p) {
401                 int comp;
402
403                 parent_node = *p;
404                 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
405                                  ref_node);
406                 comp = comp_refs(ins, entry, true);
407                 if (comp < 0) {
408                         p = &(*p)->rb_left;
409                 } else if (comp > 0) {
410                         p = &(*p)->rb_right;
411                         leftmost = false;
412                 } else {
413                         return entry;
414                 }
415         }
416
417         rb_link_node(node, parent_node, p);
418         rb_insert_color_cached(node, root, leftmost);
419         return NULL;
420 }
421
422 static struct btrfs_delayed_ref_head *find_first_ref_head(
423                 struct btrfs_delayed_ref_root *dr)
424 {
425         struct rb_node *n;
426         struct btrfs_delayed_ref_head *entry;
427
428         n = rb_first_cached(&dr->href_root);
429         if (!n)
430                 return NULL;
431
432         entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
433
434         return entry;
435 }
436
437 /*
438  * Find a head entry based on bytenr. This returns the delayed ref head if it
439  * was able to find one, or NULL if nothing was in that spot.  If return_bigger
440  * is given, the next bigger entry is returned if no exact match is found.
441  */
442 static struct btrfs_delayed_ref_head *find_ref_head(
443                 struct btrfs_delayed_ref_root *dr, u64 bytenr,
444                 bool return_bigger)
445 {
446         struct rb_root *root = &dr->href_root.rb_root;
447         struct rb_node *n;
448         struct btrfs_delayed_ref_head *entry;
449
450         n = root->rb_node;
451         entry = NULL;
452         while (n) {
453                 entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
454
455                 if (bytenr < entry->bytenr)
456                         n = n->rb_left;
457                 else if (bytenr > entry->bytenr)
458                         n = n->rb_right;
459                 else
460                         return entry;
461         }
462         if (entry && return_bigger) {
463                 if (bytenr > entry->bytenr) {
464                         n = rb_next(&entry->href_node);
465                         if (!n)
466                                 return NULL;
467                         entry = rb_entry(n, struct btrfs_delayed_ref_head,
468                                          href_node);
469                 }
470                 return entry;
471         }
472         return NULL;
473 }
474
475 int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs,
476                            struct btrfs_delayed_ref_head *head)
477 {
478         lockdep_assert_held(&delayed_refs->lock);
479         if (mutex_trylock(&head->mutex))
480                 return 0;
481
482         refcount_inc(&head->refs);
483         spin_unlock(&delayed_refs->lock);
484
485         mutex_lock(&head->mutex);
486         spin_lock(&delayed_refs->lock);
487         if (RB_EMPTY_NODE(&head->href_node)) {
488                 mutex_unlock(&head->mutex);
489                 btrfs_put_delayed_ref_head(head);
490                 return -EAGAIN;
491         }
492         btrfs_put_delayed_ref_head(head);
493         return 0;
494 }
495
496 static inline void drop_delayed_ref(struct btrfs_fs_info *fs_info,
497                                     struct btrfs_delayed_ref_root *delayed_refs,
498                                     struct btrfs_delayed_ref_head *head,
499                                     struct btrfs_delayed_ref_node *ref)
500 {
501         lockdep_assert_held(&head->lock);
502         rb_erase_cached(&ref->ref_node, &head->ref_tree);
503         RB_CLEAR_NODE(&ref->ref_node);
504         if (!list_empty(&ref->add_list))
505                 list_del(&ref->add_list);
506         btrfs_put_delayed_ref(ref);
507         atomic_dec(&delayed_refs->num_entries);
508         btrfs_delayed_refs_rsv_release(fs_info, 1, 0);
509 }
510
511 static bool merge_ref(struct btrfs_fs_info *fs_info,
512                       struct btrfs_delayed_ref_root *delayed_refs,
513                       struct btrfs_delayed_ref_head *head,
514                       struct btrfs_delayed_ref_node *ref,
515                       u64 seq)
516 {
517         struct btrfs_delayed_ref_node *next;
518         struct rb_node *node = rb_next(&ref->ref_node);
519         bool done = false;
520
521         while (!done && node) {
522                 int mod;
523
524                 next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
525                 node = rb_next(node);
526                 if (seq && next->seq >= seq)
527                         break;
528                 if (comp_refs(ref, next, false))
529                         break;
530
531                 if (ref->action == next->action) {
532                         mod = next->ref_mod;
533                 } else {
534                         if (ref->ref_mod < next->ref_mod) {
535                                 swap(ref, next);
536                                 done = true;
537                         }
538                         mod = -next->ref_mod;
539                 }
540
541                 drop_delayed_ref(fs_info, delayed_refs, head, next);
542                 ref->ref_mod += mod;
543                 if (ref->ref_mod == 0) {
544                         drop_delayed_ref(fs_info, delayed_refs, head, ref);
545                         done = true;
546                 } else {
547                         /*
548                          * Can't have multiples of the same ref on a tree block.
549                          */
550                         WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
551                                 ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
552                 }
553         }
554
555         return done;
556 }
557
558 void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info,
559                               struct btrfs_delayed_ref_root *delayed_refs,
560                               struct btrfs_delayed_ref_head *head)
561 {
562         struct btrfs_delayed_ref_node *ref;
563         struct rb_node *node;
564         u64 seq = 0;
565
566         lockdep_assert_held(&head->lock);
567
568         if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
569                 return;
570
571         /* We don't have too many refs to merge for data. */
572         if (head->is_data)
573                 return;
574
575         seq = btrfs_tree_mod_log_lowest_seq(fs_info);
576 again:
577         for (node = rb_first_cached(&head->ref_tree); node;
578              node = rb_next(node)) {
579                 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
580                 if (seq && ref->seq >= seq)
581                         continue;
582                 if (merge_ref(fs_info, delayed_refs, head, ref, seq))
583                         goto again;
584         }
585 }
586
587 int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
588 {
589         int ret = 0;
590         u64 min_seq = btrfs_tree_mod_log_lowest_seq(fs_info);
591
592         if (min_seq != 0 && seq >= min_seq) {
593                 btrfs_debug(fs_info,
594                             "holding back delayed_ref %llu, lowest is %llu",
595                             seq, min_seq);
596                 ret = 1;
597         }
598
599         return ret;
600 }
601
602 struct btrfs_delayed_ref_head *btrfs_select_ref_head(
603                 struct btrfs_delayed_ref_root *delayed_refs)
604 {
605         struct btrfs_delayed_ref_head *head;
606
607         lockdep_assert_held(&delayed_refs->lock);
608 again:
609         head = find_ref_head(delayed_refs, delayed_refs->run_delayed_start,
610                              true);
611         if (!head && delayed_refs->run_delayed_start != 0) {
612                 delayed_refs->run_delayed_start = 0;
613                 head = find_first_ref_head(delayed_refs);
614         }
615         if (!head)
616                 return NULL;
617
618         while (head->processing) {
619                 struct rb_node *node;
620
621                 node = rb_next(&head->href_node);
622                 if (!node) {
623                         if (delayed_refs->run_delayed_start == 0)
624                                 return NULL;
625                         delayed_refs->run_delayed_start = 0;
626                         goto again;
627                 }
628                 head = rb_entry(node, struct btrfs_delayed_ref_head,
629                                 href_node);
630         }
631
632         head->processing = true;
633         WARN_ON(delayed_refs->num_heads_ready == 0);
634         delayed_refs->num_heads_ready--;
635         delayed_refs->run_delayed_start = head->bytenr +
636                 head->num_bytes;
637         return head;
638 }
639
640 void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
641                            struct btrfs_delayed_ref_head *head)
642 {
643         lockdep_assert_held(&delayed_refs->lock);
644         lockdep_assert_held(&head->lock);
645
646         rb_erase_cached(&head->href_node, &delayed_refs->href_root);
647         RB_CLEAR_NODE(&head->href_node);
648         atomic_dec(&delayed_refs->num_entries);
649         delayed_refs->num_heads--;
650         if (!head->processing)
651                 delayed_refs->num_heads_ready--;
652 }
653
654 /*
655  * Helper to insert the ref_node to the tail or merge with tail.
656  *
657  * Return false if the ref was inserted.
658  * Return true if the ref was merged into an existing one (and therefore can be
659  * freed by the caller).
660  */
661 static bool insert_delayed_ref(struct btrfs_trans_handle *trans,
662                                struct btrfs_delayed_ref_head *href,
663                                struct btrfs_delayed_ref_node *ref)
664 {
665         struct btrfs_delayed_ref_root *root = &trans->transaction->delayed_refs;
666         struct btrfs_delayed_ref_node *exist;
667         int mod;
668
669         spin_lock(&href->lock);
670         exist = tree_insert(&href->ref_tree, ref);
671         if (!exist) {
672                 if (ref->action == BTRFS_ADD_DELAYED_REF)
673                         list_add_tail(&ref->add_list, &href->ref_add_list);
674                 atomic_inc(&root->num_entries);
675                 spin_unlock(&href->lock);
676                 trans->delayed_ref_updates++;
677                 return false;
678         }
679
680         /* Now we are sure we can merge */
681         if (exist->action == ref->action) {
682                 mod = ref->ref_mod;
683         } else {
684                 /* Need to change action */
685                 if (exist->ref_mod < ref->ref_mod) {
686                         exist->action = ref->action;
687                         mod = -exist->ref_mod;
688                         exist->ref_mod = ref->ref_mod;
689                         if (ref->action == BTRFS_ADD_DELAYED_REF)
690                                 list_add_tail(&exist->add_list,
691                                               &href->ref_add_list);
692                         else if (ref->action == BTRFS_DROP_DELAYED_REF) {
693                                 ASSERT(!list_empty(&exist->add_list));
694                                 list_del(&exist->add_list);
695                         } else {
696                                 ASSERT(0);
697                         }
698                 } else
699                         mod = -ref->ref_mod;
700         }
701         exist->ref_mod += mod;
702
703         /* remove existing tail if its ref_mod is zero */
704         if (exist->ref_mod == 0)
705                 drop_delayed_ref(trans->fs_info, root, href, exist);
706         spin_unlock(&href->lock);
707         return true;
708 }
709
710 /*
711  * helper function to update the accounting in the head ref
712  * existing and update must have the same bytenr
713  */
714 static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans,
715                          struct btrfs_delayed_ref_head *existing,
716                          struct btrfs_delayed_ref_head *update)
717 {
718         struct btrfs_delayed_ref_root *delayed_refs =
719                 &trans->transaction->delayed_refs;
720         struct btrfs_fs_info *fs_info = trans->fs_info;
721         int old_ref_mod;
722
723         BUG_ON(existing->is_data != update->is_data);
724
725         spin_lock(&existing->lock);
726
727         /*
728          * When freeing an extent, we may not know the owning root when we
729          * first create the head_ref. However, some deref before the last deref
730          * will know it, so we just need to update the head_ref accordingly.
731          */
732         if (!existing->owning_root)
733                 existing->owning_root = update->owning_root;
734
735         if (update->must_insert_reserved) {
736                 /* if the extent was freed and then
737                  * reallocated before the delayed ref
738                  * entries were processed, we can end up
739                  * with an existing head ref without
740                  * the must_insert_reserved flag set.
741                  * Set it again here
742                  */
743                 existing->must_insert_reserved = update->must_insert_reserved;
744                 existing->owning_root = update->owning_root;
745
746                 /*
747                  * update the num_bytes so we make sure the accounting
748                  * is done correctly
749                  */
750                 existing->num_bytes = update->num_bytes;
751
752         }
753
754         if (update->extent_op) {
755                 if (!existing->extent_op) {
756                         existing->extent_op = update->extent_op;
757                 } else {
758                         if (update->extent_op->update_key) {
759                                 memcpy(&existing->extent_op->key,
760                                        &update->extent_op->key,
761                                        sizeof(update->extent_op->key));
762                                 existing->extent_op->update_key = true;
763                         }
764                         if (update->extent_op->update_flags) {
765                                 existing->extent_op->flags_to_set |=
766                                         update->extent_op->flags_to_set;
767                                 existing->extent_op->update_flags = true;
768                         }
769                         btrfs_free_delayed_extent_op(update->extent_op);
770                 }
771         }
772         /*
773          * update the reference mod on the head to reflect this new operation,
774          * only need the lock for this case cause we could be processing it
775          * currently, for refs we just added we know we're a-ok.
776          */
777         old_ref_mod = existing->total_ref_mod;
778         existing->ref_mod += update->ref_mod;
779         existing->total_ref_mod += update->ref_mod;
780
781         /*
782          * If we are going to from a positive ref mod to a negative or vice
783          * versa we need to make sure to adjust pending_csums accordingly.
784          * We reserve bytes for csum deletion when adding or updating a ref head
785          * see add_delayed_ref_head() for more details.
786          */
787         if (existing->is_data) {
788                 u64 csum_leaves =
789                         btrfs_csum_bytes_to_leaves(fs_info,
790                                                    existing->num_bytes);
791
792                 if (existing->total_ref_mod >= 0 && old_ref_mod < 0) {
793                         delayed_refs->pending_csums -= existing->num_bytes;
794                         btrfs_delayed_refs_rsv_release(fs_info, 0, csum_leaves);
795                 }
796                 if (existing->total_ref_mod < 0 && old_ref_mod >= 0) {
797                         delayed_refs->pending_csums += existing->num_bytes;
798                         trans->delayed_ref_csum_deletions += csum_leaves;
799                 }
800         }
801
802         spin_unlock(&existing->lock);
803 }
804
805 static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
806                                   struct btrfs_ref *generic_ref,
807                                   struct btrfs_qgroup_extent_record *qrecord,
808                                   u64 reserved)
809 {
810         int count_mod = 1;
811         bool must_insert_reserved = false;
812
813         /* If reserved is provided, it must be a data extent. */
814         BUG_ON(generic_ref->type != BTRFS_REF_DATA && reserved);
815
816         switch (generic_ref->action) {
817         case BTRFS_ADD_DELAYED_REF:
818                 /* count_mod is already set to 1. */
819                 break;
820         case BTRFS_UPDATE_DELAYED_HEAD:
821                 count_mod = 0;
822                 break;
823         case BTRFS_DROP_DELAYED_REF:
824                 /*
825                  * The head node stores the sum of all the mods, so dropping a ref
826                  * should drop the sum in the head node by one.
827                  */
828                 count_mod = -1;
829                 break;
830         case BTRFS_ADD_DELAYED_EXTENT:
831                 /*
832                  * BTRFS_ADD_DELAYED_EXTENT means that we need to update the
833                  * reserved accounting when the extent is finally added, or if a
834                  * later modification deletes the delayed ref without ever
835                  * inserting the extent into the extent allocation tree.
836                  * ref->must_insert_reserved is the flag used to record that
837                  * accounting mods are required.
838                  *
839                  * Once we record must_insert_reserved, switch the action to
840                  * BTRFS_ADD_DELAYED_REF because other special casing is not
841                  * required.
842                  */
843                 must_insert_reserved = true;
844                 break;
845         }
846
847         refcount_set(&head_ref->refs, 1);
848         head_ref->bytenr = generic_ref->bytenr;
849         head_ref->num_bytes = generic_ref->num_bytes;
850         head_ref->ref_mod = count_mod;
851         head_ref->reserved_bytes = reserved;
852         head_ref->must_insert_reserved = must_insert_reserved;
853         head_ref->owning_root = generic_ref->owning_root;
854         head_ref->is_data = (generic_ref->type == BTRFS_REF_DATA);
855         head_ref->is_system = (generic_ref->ref_root == BTRFS_CHUNK_TREE_OBJECTID);
856         head_ref->ref_tree = RB_ROOT_CACHED;
857         INIT_LIST_HEAD(&head_ref->ref_add_list);
858         RB_CLEAR_NODE(&head_ref->href_node);
859         head_ref->processing = false;
860         head_ref->total_ref_mod = count_mod;
861         spin_lock_init(&head_ref->lock);
862         mutex_init(&head_ref->mutex);
863
864         if (qrecord) {
865                 if (generic_ref->ref_root && reserved) {
866                         qrecord->data_rsv = reserved;
867                         qrecord->data_rsv_refroot = generic_ref->ref_root;
868                 }
869                 qrecord->bytenr = generic_ref->bytenr;
870                 qrecord->num_bytes = generic_ref->num_bytes;
871                 qrecord->old_roots = NULL;
872         }
873 }
874
875 /*
876  * helper function to actually insert a head node into the rbtree.
877  * this does all the dirty work in terms of maintaining the correct
878  * overall modification count.
879  */
880 static noinline struct btrfs_delayed_ref_head *
881 add_delayed_ref_head(struct btrfs_trans_handle *trans,
882                      struct btrfs_delayed_ref_head *head_ref,
883                      struct btrfs_qgroup_extent_record *qrecord,
884                      int action, bool *qrecord_inserted_ret)
885 {
886         struct btrfs_delayed_ref_head *existing;
887         struct btrfs_delayed_ref_root *delayed_refs;
888         bool qrecord_inserted = false;
889
890         delayed_refs = &trans->transaction->delayed_refs;
891
892         /* Record qgroup extent info if provided */
893         if (qrecord) {
894                 if (btrfs_qgroup_trace_extent_nolock(trans->fs_info,
895                                         delayed_refs, qrecord))
896                         kfree(qrecord);
897                 else
898                         qrecord_inserted = true;
899         }
900
901         trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
902
903         existing = htree_insert(&delayed_refs->href_root,
904                                 &head_ref->href_node);
905         if (existing) {
906                 update_existing_head_ref(trans, existing, head_ref);
907                 /*
908                  * we've updated the existing ref, free the newly
909                  * allocated ref
910                  */
911                 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
912                 head_ref = existing;
913         } else {
914                 /*
915                  * We reserve the amount of bytes needed to delete csums when
916                  * adding the ref head and not when adding individual drop refs
917                  * since the csum items are deleted only after running the last
918                  * delayed drop ref (the data extent's ref count drops to 0).
919                  */
920                 if (head_ref->is_data && head_ref->ref_mod < 0) {
921                         delayed_refs->pending_csums += head_ref->num_bytes;
922                         trans->delayed_ref_csum_deletions +=
923                                 btrfs_csum_bytes_to_leaves(trans->fs_info,
924                                                            head_ref->num_bytes);
925                 }
926                 delayed_refs->num_heads++;
927                 delayed_refs->num_heads_ready++;
928                 atomic_inc(&delayed_refs->num_entries);
929         }
930         if (qrecord_inserted_ret)
931                 *qrecord_inserted_ret = qrecord_inserted;
932
933         return head_ref;
934 }
935
936 /*
937  * Initialize the structure which represents a modification to a an extent.
938  *
939  * @fs_info:    Internal to the mounted filesystem mount structure.
940  *
941  * @ref:        The structure which is going to be initialized.
942  *
943  * @bytenr:     The logical address of the extent for which a modification is
944  *              going to be recorded.
945  *
946  * @num_bytes:  Size of the extent whose modification is being recorded.
947  *
948  * @ref_root:   The id of the root where this modification has originated, this
949  *              can be either one of the well-known metadata trees or the
950  *              subvolume id which references this extent.
951  *
952  * @action:     Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or
953  *              BTRFS_ADD_DELAYED_EXTENT
954  *
955  * @ref_type:   Holds the type of the extent which is being recorded, can be
956  *              one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY
957  *              when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/
958  *              BTRFS_EXTENT_DATA_REF_KEY when recording data extent
959  */
960 static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
961                                     struct btrfs_delayed_ref_node *ref,
962                                     struct btrfs_ref *generic_ref)
963 {
964         int action = generic_ref->action;
965         u64 seq = 0;
966
967         if (action == BTRFS_ADD_DELAYED_EXTENT)
968                 action = BTRFS_ADD_DELAYED_REF;
969
970         if (is_fstree(generic_ref->ref_root))
971                 seq = atomic64_read(&fs_info->tree_mod_seq);
972
973         refcount_set(&ref->refs, 1);
974         ref->bytenr = generic_ref->bytenr;
975         ref->num_bytes = generic_ref->num_bytes;
976         ref->ref_mod = 1;
977         ref->action = action;
978         ref->seq = seq;
979         ref->type = btrfs_ref_type(generic_ref);
980         ref->ref_root = generic_ref->ref_root;
981         ref->parent = generic_ref->parent;
982         RB_CLEAR_NODE(&ref->ref_node);
983         INIT_LIST_HEAD(&ref->add_list);
984
985         if (generic_ref->type == BTRFS_REF_DATA)
986                 ref->data_ref = generic_ref->data_ref;
987         else
988                 ref->tree_ref = generic_ref->tree_ref;
989 }
990
991 void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 mod_root,
992                          bool skip_qgroup)
993 {
994 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
995         /* If @real_root not set, use @root as fallback */
996         generic_ref->real_root = mod_root ?: generic_ref->ref_root;
997 #endif
998         generic_ref->tree_ref.level = level;
999         generic_ref->type = BTRFS_REF_METADATA;
1000         if (skip_qgroup || !(is_fstree(generic_ref->ref_root) &&
1001                              (!mod_root || is_fstree(mod_root))))
1002                 generic_ref->skip_qgroup = true;
1003         else
1004                 generic_ref->skip_qgroup = false;
1005
1006 }
1007
1008 void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ino, u64 offset,
1009                          u64 mod_root, bool skip_qgroup)
1010 {
1011 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
1012         /* If @real_root not set, use @root as fallback */
1013         generic_ref->real_root = mod_root ?: generic_ref->ref_root;
1014 #endif
1015         generic_ref->data_ref.objectid = ino;
1016         generic_ref->data_ref.offset = offset;
1017         generic_ref->type = BTRFS_REF_DATA;
1018         if (skip_qgroup || !(is_fstree(generic_ref->ref_root) &&
1019                              (!mod_root || is_fstree(mod_root))))
1020                 generic_ref->skip_qgroup = true;
1021         else
1022                 generic_ref->skip_qgroup = false;
1023 }
1024
1025 static int add_delayed_ref(struct btrfs_trans_handle *trans,
1026                            struct btrfs_ref *generic_ref,
1027                            struct btrfs_delayed_extent_op *extent_op,
1028                            u64 reserved)
1029 {
1030         struct btrfs_fs_info *fs_info = trans->fs_info;
1031         struct btrfs_delayed_ref_node *node;
1032         struct btrfs_delayed_ref_head *head_ref;
1033         struct btrfs_delayed_ref_root *delayed_refs;
1034         struct btrfs_qgroup_extent_record *record = NULL;
1035         bool qrecord_inserted;
1036         int action = generic_ref->action;
1037         bool merged;
1038
1039         node = kmem_cache_alloc(btrfs_delayed_ref_node_cachep, GFP_NOFS);
1040         if (!node)
1041                 return -ENOMEM;
1042
1043         head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
1044         if (!head_ref) {
1045                 kmem_cache_free(btrfs_delayed_ref_node_cachep, node);
1046                 return -ENOMEM;
1047         }
1048
1049         if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) {
1050                 record = kzalloc(sizeof(*record), GFP_NOFS);
1051                 if (!record) {
1052                         kmem_cache_free(btrfs_delayed_ref_node_cachep, node);
1053                         kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
1054                         return -ENOMEM;
1055                 }
1056         }
1057
1058         init_delayed_ref_common(fs_info, node, generic_ref);
1059         init_delayed_ref_head(head_ref, generic_ref, record, reserved);
1060         head_ref->extent_op = extent_op;
1061
1062         delayed_refs = &trans->transaction->delayed_refs;
1063         spin_lock(&delayed_refs->lock);
1064
1065         /*
1066          * insert both the head node and the new ref without dropping
1067          * the spin lock
1068          */
1069         head_ref = add_delayed_ref_head(trans, head_ref, record,
1070                                         action, &qrecord_inserted);
1071
1072         merged = insert_delayed_ref(trans, head_ref, node);
1073         spin_unlock(&delayed_refs->lock);
1074
1075         /*
1076          * Need to update the delayed_refs_rsv with any changes we may have
1077          * made.
1078          */
1079         btrfs_update_delayed_refs_rsv(trans);
1080
1081         if (generic_ref->type == BTRFS_REF_DATA)
1082                 trace_add_delayed_data_ref(trans->fs_info, node);
1083         else
1084                 trace_add_delayed_tree_ref(trans->fs_info, node);
1085         if (merged)
1086                 kmem_cache_free(btrfs_delayed_ref_node_cachep, node);
1087
1088         if (qrecord_inserted)
1089                 return btrfs_qgroup_trace_extent_post(trans, record);
1090         return 0;
1091 }
1092
1093 /*
1094  * Add a delayed tree ref. This does all of the accounting required to make sure
1095  * the delayed ref is eventually processed before this transaction commits.
1096  */
1097 int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
1098                                struct btrfs_ref *generic_ref,
1099                                struct btrfs_delayed_extent_op *extent_op)
1100 {
1101         ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action);
1102         return add_delayed_ref(trans, generic_ref, extent_op, 0);
1103 }
1104
1105 /*
1106  * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
1107  */
1108 int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
1109                                struct btrfs_ref *generic_ref,
1110                                u64 reserved)
1111 {
1112         ASSERT(generic_ref->type == BTRFS_REF_DATA && generic_ref->action);
1113         return add_delayed_ref(trans, generic_ref, NULL, reserved);
1114 }
1115
1116 int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
1117                                 u64 bytenr, u64 num_bytes,
1118                                 struct btrfs_delayed_extent_op *extent_op)
1119 {
1120         struct btrfs_delayed_ref_head *head_ref;
1121         struct btrfs_delayed_ref_root *delayed_refs;
1122         struct btrfs_ref generic_ref = {
1123                 .type = BTRFS_REF_METADATA,
1124                 .action = BTRFS_UPDATE_DELAYED_HEAD,
1125                 .bytenr = bytenr,
1126                 .num_bytes = num_bytes,
1127         };
1128
1129         head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
1130         if (!head_ref)
1131                 return -ENOMEM;
1132
1133         init_delayed_ref_head(head_ref, &generic_ref, NULL, 0);
1134         head_ref->extent_op = extent_op;
1135
1136         delayed_refs = &trans->transaction->delayed_refs;
1137         spin_lock(&delayed_refs->lock);
1138
1139         add_delayed_ref_head(trans, head_ref, NULL, BTRFS_UPDATE_DELAYED_HEAD,
1140                              NULL);
1141
1142         spin_unlock(&delayed_refs->lock);
1143
1144         /*
1145          * Need to update the delayed_refs_rsv with any changes we may have
1146          * made.
1147          */
1148         btrfs_update_delayed_refs_rsv(trans);
1149         return 0;
1150 }
1151
1152 void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
1153 {
1154         if (refcount_dec_and_test(&ref->refs)) {
1155                 WARN_ON(!RB_EMPTY_NODE(&ref->ref_node));
1156                 kmem_cache_free(btrfs_delayed_ref_node_cachep, ref);
1157         }
1158 }
1159
1160 /*
1161  * This does a simple search for the head node for a given extent.  Returns the
1162  * head node if found, or NULL if not.
1163  */
1164 struct btrfs_delayed_ref_head *
1165 btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
1166 {
1167         lockdep_assert_held(&delayed_refs->lock);
1168
1169         return find_ref_head(delayed_refs, bytenr, false);
1170 }
1171
1172 void __cold btrfs_delayed_ref_exit(void)
1173 {
1174         kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
1175         kmem_cache_destroy(btrfs_delayed_ref_node_cachep);
1176         kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
1177 }
1178
1179 int __init btrfs_delayed_ref_init(void)
1180 {
1181         btrfs_delayed_ref_head_cachep = KMEM_CACHE(btrfs_delayed_ref_head, 0);
1182         if (!btrfs_delayed_ref_head_cachep)
1183                 goto fail;
1184
1185         btrfs_delayed_ref_node_cachep = KMEM_CACHE(btrfs_delayed_ref_node, 0);
1186         if (!btrfs_delayed_ref_node_cachep)
1187                 goto fail;
1188
1189         btrfs_delayed_extent_op_cachep = KMEM_CACHE(btrfs_delayed_extent_op, 0);
1190         if (!btrfs_delayed_extent_op_cachep)
1191                 goto fail;
1192
1193         return 0;
1194 fail:
1195         btrfs_delayed_ref_exit();
1196         return -ENOMEM;
1197 }
This page took 0.107065 seconds and 4 git commands to generate.