]>
Commit | Line | Data |
---|---|---|
8719aaae JB |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | ||
3 | #ifndef BTRFS_SPACE_INFO_H | |
4 | #define BTRFS_SPACE_INFO_H | |
5 | ||
5335f437 | 6 | #include <trace/events/btrfs.h> |
f04fbcc6 QW |
7 | #include "volumes.h" |
8 | ||
f1e5c618 JB |
9 | /* |
10 | * Different levels for to flush space when doing space reservations. | |
11 | * | |
12 | * The higher the level, the more methods we try to reclaim space. | |
13 | */ | |
14 | enum btrfs_reserve_flush_enum { | |
15 | /* If we are in the transaction, we can't flush anything.*/ | |
16 | BTRFS_RESERVE_NO_FLUSH, | |
17 | ||
18 | /* | |
19 | * Flush space by: | |
20 | * - Running delayed inode items | |
21 | * - Allocating a new chunk | |
22 | */ | |
23 | BTRFS_RESERVE_FLUSH_LIMIT, | |
24 | ||
25 | /* | |
26 | * Flush space by: | |
27 | * - Running delayed inode items | |
28 | * - Running delayed refs | |
29 | * - Running delalloc and waiting for ordered extents | |
30 | * - Allocating a new chunk | |
1a332502 | 31 | * - Committing transaction |
f1e5c618 JB |
32 | */ |
33 | BTRFS_RESERVE_FLUSH_EVICT, | |
34 | ||
35 | /* | |
36 | * Flush space by above mentioned methods and by: | |
37 | * - Running delayed iputs | |
38 | * - Committing transaction | |
39 | * | |
40 | * Can be interrupted by a fatal signal. | |
41 | */ | |
42 | BTRFS_RESERVE_FLUSH_DATA, | |
43 | BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE, | |
44 | BTRFS_RESERVE_FLUSH_ALL, | |
45 | ||
46 | /* | |
47 | * Pretty much the same as FLUSH_ALL, but can also steal space from | |
48 | * global rsv. | |
49 | * | |
50 | * Can be interrupted by a fatal signal. | |
51 | */ | |
52 | BTRFS_RESERVE_FLUSH_ALL_STEAL, | |
765c3fe9 JB |
53 | |
54 | /* | |
55 | * This is for btrfs_use_block_rsv only. We have exhausted our block | |
56 | * rsv and our global block rsv. This can happen for things like | |
57 | * delalloc where we are overwriting a lot of extents with a single | |
58 | * extent and didn't reserve enough space. Alternatively it can happen | |
59 | * with delalloc where we reserve 1 extents worth for a large extent but | |
60 | * fragmentation leads to multiple extents being created. This will | |
61 | * give us the reservation in the case of | |
62 | * | |
63 | * if (num_bytes < (space_info->total_bytes - | |
64 | * btrfs_space_info_used(space_info, false)) | |
65 | * | |
66 | * Which ignores bytes_may_use. This is potentially dangerous, but our | |
67 | * reservation system is generally pessimistic so is able to absorb this | |
68 | * style of mistake. | |
69 | */ | |
70 | BTRFS_RESERVE_FLUSH_EMERGENCY, | |
f1e5c618 JB |
71 | }; |
72 | ||
73 | enum btrfs_flush_state { | |
74 | FLUSH_DELAYED_ITEMS_NR = 1, | |
75 | FLUSH_DELAYED_ITEMS = 2, | |
76 | FLUSH_DELAYED_REFS_NR = 3, | |
77 | FLUSH_DELAYED_REFS = 4, | |
78 | FLUSH_DELALLOC = 5, | |
79 | FLUSH_DELALLOC_WAIT = 6, | |
80 | FLUSH_DELALLOC_FULL = 7, | |
81 | ALLOC_CHUNK = 8, | |
82 | ALLOC_CHUNK_FORCE = 9, | |
83 | RUN_DELAYED_IPUTS = 10, | |
84 | COMMIT_TRANS = 11, | |
85 | }; | |
86 | ||
8719aaae JB |
87 | struct btrfs_space_info { |
88 | spinlock_t lock; | |
89 | ||
90 | u64 total_bytes; /* total bytes in the space, | |
91 | this doesn't take mirrors into account */ | |
92 | u64 bytes_used; /* total bytes used, | |
93 | this doesn't take mirrors into account */ | |
94 | u64 bytes_pinned; /* total bytes pinned, will be freed when the | |
95 | transaction finishes */ | |
96 | u64 bytes_reserved; /* total bytes the allocator has reserved for | |
97 | current allocations */ | |
98 | u64 bytes_may_use; /* number of bytes that may be used for | |
99 | delalloc/allocations */ | |
100 | u64 bytes_readonly; /* total bytes that are read only */ | |
169e0da9 NA |
101 | u64 bytes_zone_unusable; /* total bytes that are unusable until |
102 | resetting the device zone */ | |
8719aaae JB |
103 | |
104 | u64 max_extent_size; /* This will hold the maximum extent size of | |
105 | the space info if we had an ENOSPC in the | |
106 | allocator. */ | |
f6fca391 SR |
107 | /* Chunk size in bytes */ |
108 | u64 chunk_size; | |
8719aaae | 109 | |
bb5a098d JB |
110 | /* |
111 | * Once a block group drops below this threshold (percents) we'll | |
112 | * schedule it for reclaim. | |
113 | */ | |
114 | int bg_reclaim_threshold; | |
115 | ||
88a777a6 JB |
116 | int clamp; /* Used to scale our threshold for preemptive |
117 | flushing. The value is >> clamp, so turns | |
118 | out to be a 2^clamp divisor. */ | |
119 | ||
8719aaae JB |
120 | unsigned int full:1; /* indicates that we cannot allocate any more |
121 | chunks for this space */ | |
122 | unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ | |
123 | ||
124 | unsigned int flush:1; /* set if we are trying to make space */ | |
125 | ||
126 | unsigned int force_alloc; /* set if we need to force a chunk | |
127 | alloc for this space */ | |
128 | ||
129 | u64 disk_used; /* total bytes used on disk */ | |
130 | u64 disk_total; /* total bytes on disk, takes mirrors into | |
131 | account */ | |
132 | ||
133 | u64 flags; | |
134 | ||
8719aaae JB |
135 | struct list_head list; |
136 | /* Protected by the spinlock 'lock'. */ | |
137 | struct list_head ro_bgs; | |
138 | struct list_head priority_tickets; | |
139 | struct list_head tickets; | |
db161806 NB |
140 | |
141 | /* | |
142 | * Size of space that needs to be reclaimed in order to satisfy pending | |
143 | * tickets | |
144 | */ | |
145 | u64 reclaim_size; | |
146 | ||
8719aaae JB |
147 | /* |
148 | * tickets_id just indicates the next ticket will be handled, so note | |
149 | * it's not stored per ticket. | |
150 | */ | |
151 | u64 tickets_id; | |
152 | ||
153 | struct rw_semaphore groups_sem; | |
154 | /* for block groups in our same type */ | |
155 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; | |
8719aaae JB |
156 | |
157 | struct kobject kobj; | |
158 | struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES]; | |
159 | }; | |
160 | ||
b338b013 | 161 | struct reserve_ticket { |
b338b013 JB |
162 | u64 bytes; |
163 | int error; | |
7f9fe614 | 164 | bool steal; |
b338b013 JB |
165 | struct list_head list; |
166 | wait_queue_head_t wait; | |
167 | }; | |
168 | ||
8719aaae JB |
169 | static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) |
170 | { | |
171 | return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) && | |
172 | (space_info->flags & BTRFS_BLOCK_GROUP_DATA)); | |
173 | } | |
174 | ||
bb96c4e5 JB |
175 | /* |
176 | * | |
177 | * Declare a helper function to detect underflow of various space info members | |
178 | */ | |
f3e75e38 | 179 | #define DECLARE_SPACE_INFO_UPDATE(name, trace_name) \ |
bb96c4e5 JB |
180 | static inline void \ |
181 | btrfs_space_info_update_##name(struct btrfs_fs_info *fs_info, \ | |
182 | struct btrfs_space_info *sinfo, \ | |
183 | s64 bytes) \ | |
184 | { \ | |
f3e75e38 | 185 | const u64 abs_bytes = (bytes < 0) ? -bytes : bytes; \ |
bb96c4e5 JB |
186 | lockdep_assert_held(&sinfo->lock); \ |
187 | trace_update_##name(fs_info, sinfo, sinfo->name, bytes); \ | |
f3e75e38 JB |
188 | trace_btrfs_space_reservation(fs_info, trace_name, \ |
189 | sinfo->flags, abs_bytes, \ | |
190 | bytes > 0); \ | |
bb96c4e5 JB |
191 | if (bytes < 0 && sinfo->name < -bytes) { \ |
192 | WARN_ON(1); \ | |
193 | sinfo->name = 0; \ | |
194 | return; \ | |
195 | } \ | |
196 | sinfo->name += bytes; \ | |
197 | } | |
198 | ||
f3e75e38 JB |
199 | DECLARE_SPACE_INFO_UPDATE(bytes_may_use, "space_info"); |
200 | DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned"); | |
bb96c4e5 | 201 | |
280c2908 | 202 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info); |
9d4b0a12 | 203 | void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, |
723de71d | 204 | struct btrfs_block_group *block_group); |
f6fca391 SR |
205 | void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info, |
206 | u64 chunk_size); | |
280c2908 JB |
207 | struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info, |
208 | u64 flags); | |
e1f60a65 | 209 | u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info, |
280c2908 JB |
210 | bool may_use_included); |
211 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | |
5da6afeb JB |
212 | void btrfs_dump_space_info(struct btrfs_fs_info *fs_info, |
213 | struct btrfs_space_info *info, u64 bytes, | |
214 | int dump_block_groups); | |
9270501c | 215 | int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info, |
03551d65 | 216 | struct btrfs_space_info *space_info, |
0d9764f6 JB |
217 | u64 orig_bytes, |
218 | enum btrfs_reserve_flush_enum flush); | |
18fa2284 JB |
219 | void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info, |
220 | struct btrfs_space_info *space_info); | |
a30a3d20 JB |
221 | int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, |
222 | struct btrfs_space_info *space_info, u64 bytes, | |
223 | enum btrfs_reserve_flush_enum flush); | |
18fa2284 | 224 | |
d05e4649 | 225 | static inline void btrfs_space_info_free_bytes_may_use( |
18fa2284 JB |
226 | struct btrfs_fs_info *fs_info, |
227 | struct btrfs_space_info *space_info, | |
228 | u64 num_bytes) | |
229 | { | |
230 | spin_lock(&space_info->lock); | |
231 | btrfs_space_info_update_bytes_may_use(fs_info, space_info, -num_bytes); | |
232 | btrfs_try_granting_tickets(fs_info, space_info); | |
233 | spin_unlock(&space_info->lock); | |
234 | } | |
8698fc4e JB |
235 | int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes, |
236 | enum btrfs_reserve_flush_enum flush); | |
8e327b9c | 237 | void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info); |
43712116 | 238 | void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info); |
e2f13b34 | 239 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); |
8e327b9c | 240 | |
8719aaae | 241 | #endif /* BTRFS_SPACE_INFO_H */ |