]>
Commit | Line | Data |
---|---|---|
8f6e39a7 MC |
1 | /* |
2 | * fs/ext4/mballoc.h | |
3 | * | |
4 | * Written by: Alex Tomas <[email protected]> | |
5 | * | |
6 | */ | |
7 | #ifndef _EXT4_MBALLOC_H | |
8 | #define _EXT4_MBALLOC_H | |
9 | ||
10 | #include <linux/time.h> | |
11 | #include <linux/fs.h> | |
12 | #include <linux/namei.h> | |
13 | #include <linux/quotaops.h> | |
14 | #include <linux/buffer_head.h> | |
15 | #include <linux/module.h> | |
16 | #include <linux/swap.h> | |
17 | #include <linux/proc_fs.h> | |
18 | #include <linux/pagemap.h> | |
19 | #include <linux/seq_file.h> | |
20 | #include <linux/version.h> | |
21 | #include "ext4_jbd2.h" | |
22 | #include "ext4.h" | |
23 | #include "group.h" | |
24 | ||
25 | /* | |
26 | * with AGGRESSIVE_CHECK allocator runs consistency checks over | |
27 | * structures. these checks slow things down a lot | |
28 | */ | |
29 | #define AGGRESSIVE_CHECK__ | |
30 | ||
31 | /* | |
32 | * with DOUBLE_CHECK defined mballoc creates persistent in-core | |
33 | * bitmaps, maintains and uses them to check for double allocations | |
34 | */ | |
35 | #define DOUBLE_CHECK__ | |
36 | ||
37 | /* | |
38 | */ | |
39 | #define MB_DEBUG__ | |
40 | #ifdef MB_DEBUG | |
41 | #define mb_debug(fmt, a...) printk(fmt, ##a) | |
42 | #else | |
43 | #define mb_debug(fmt, a...) | |
44 | #endif | |
45 | ||
46 | /* | |
47 | * with EXT4_MB_HISTORY mballoc stores last N allocations in memory | |
48 | * and you can monitor it in /proc/fs/ext4/<dev>/mb_history | |
49 | */ | |
50 | #define EXT4_MB_HISTORY | |
51 | #define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ | |
52 | #define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */ | |
53 | #define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */ | |
54 | #define EXT4_MB_HISTORY_FREE 8 /* free */ | |
55 | ||
56 | #define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \ | |
57 | EXT4_MB_HISTORY_PREALLOC) | |
58 | ||
59 | /* | |
60 | * How long mballoc can look for a best extent (in found extents) | |
61 | */ | |
62 | #define MB_DEFAULT_MAX_TO_SCAN 200 | |
63 | ||
64 | /* | |
65 | * How long mballoc must look for a best extent | |
66 | */ | |
67 | #define MB_DEFAULT_MIN_TO_SCAN 10 | |
68 | ||
69 | /* | |
70 | * How many groups mballoc will scan looking for the best chunk | |
71 | */ | |
72 | #define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5 | |
73 | ||
74 | /* | |
75 | * with 'ext4_mb_stats' allocator will collect stats that will be | |
76 | * shown at umount. The collecting costs though! | |
77 | */ | |
78 | #define MB_DEFAULT_STATS 1 | |
79 | ||
80 | /* | |
81 | * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served | |
82 | * by the stream allocator, which purpose is to pack requests | |
83 | * as close each to other as possible to produce smooth I/O traffic | |
84 | * We use locality group prealloc space for stream request. | |
85 | * We can tune the same via /proc/fs/ext4/<parition>/stream_req | |
86 | */ | |
87 | #define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */ | |
88 | ||
89 | /* | |
90 | * for which requests use 2^N search using buddies | |
91 | */ | |
92 | #define MB_DEFAULT_ORDER2_REQS 2 | |
93 | ||
94 | /* | |
95 | * default group prealloc size 512 blocks | |
96 | */ | |
97 | #define MB_DEFAULT_GROUP_PREALLOC 512 | |
98 | ||
99 | static struct kmem_cache *ext4_pspace_cachep; | |
100 | static struct kmem_cache *ext4_ac_cachep; | |
101 | ||
102 | #ifdef EXT4_BB_MAX_BLOCKS | |
103 | #undef EXT4_BB_MAX_BLOCKS | |
104 | #endif | |
105 | #define EXT4_BB_MAX_BLOCKS 30 | |
106 | ||
107 | struct ext4_free_metadata { | |
108 | ext4_group_t group; | |
109 | unsigned short num; | |
110 | ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS]; | |
111 | struct list_head list; | |
112 | }; | |
113 | ||
114 | struct ext4_group_info { | |
115 | unsigned long bb_state; | |
116 | unsigned long bb_tid; | |
117 | struct ext4_free_metadata *bb_md_cur; | |
118 | unsigned short bb_first_free; | |
119 | unsigned short bb_free; | |
120 | unsigned short bb_fragments; | |
121 | struct list_head bb_prealloc_list; | |
122 | #ifdef DOUBLE_CHECK | |
123 | void *bb_bitmap; | |
124 | #endif | |
125 | unsigned short bb_counters[]; | |
126 | }; | |
127 | ||
128 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 | |
129 | #define EXT4_GROUP_INFO_LOCKED_BIT 1 | |
130 | ||
131 | #define EXT4_MB_GRP_NEED_INIT(grp) \ | |
132 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) | |
133 | ||
134 | ||
135 | struct ext4_prealloc_space { | |
136 | struct list_head pa_inode_list; | |
137 | struct list_head pa_group_list; | |
138 | union { | |
139 | struct list_head pa_tmp_list; | |
140 | struct rcu_head pa_rcu; | |
141 | } u; | |
142 | spinlock_t pa_lock; | |
143 | atomic_t pa_count; | |
144 | unsigned pa_deleted; | |
145 | ext4_fsblk_t pa_pstart; /* phys. block */ | |
146 | ext4_lblk_t pa_lstart; /* log. block */ | |
147 | unsigned short pa_len; /* len of preallocated chunk */ | |
148 | unsigned short pa_free; /* how many blocks are free */ | |
149 | unsigned short pa_linear; /* consumed in one direction | |
150 | * strictly, for grp prealloc */ | |
151 | spinlock_t *pa_obj_lock; | |
152 | struct inode *pa_inode; /* hack, for history only */ | |
153 | }; | |
154 | ||
155 | ||
156 | struct ext4_free_extent { | |
157 | ext4_lblk_t fe_logical; | |
158 | ext4_grpblk_t fe_start; | |
159 | ext4_group_t fe_group; | |
160 | int fe_len; | |
161 | }; | |
162 | ||
163 | /* | |
164 | * Locality group: | |
165 | * we try to group all related changes together | |
166 | * so that writeback can flush/allocate them together as well | |
167 | */ | |
168 | struct ext4_locality_group { | |
169 | /* for allocator */ | |
170 | struct mutex lg_mutex; /* to serialize allocates */ | |
171 | struct list_head lg_prealloc_list;/* list of preallocations */ | |
172 | spinlock_t lg_prealloc_lock; | |
173 | }; | |
174 | ||
175 | struct ext4_allocation_context { | |
176 | struct inode *ac_inode; | |
177 | struct super_block *ac_sb; | |
178 | ||
179 | /* original request */ | |
180 | struct ext4_free_extent ac_o_ex; | |
181 | ||
182 | /* goal request (after normalization) */ | |
183 | struct ext4_free_extent ac_g_ex; | |
184 | ||
185 | /* the best found extent */ | |
186 | struct ext4_free_extent ac_b_ex; | |
187 | ||
188 | /* copy of the bext found extent taken before preallocation efforts */ | |
189 | struct ext4_free_extent ac_f_ex; | |
190 | ||
191 | /* number of iterations done. we have to track to limit searching */ | |
192 | unsigned long ac_ex_scanned; | |
193 | __u16 ac_groups_scanned; | |
194 | __u16 ac_found; | |
195 | __u16 ac_tail; | |
196 | __u16 ac_buddy; | |
197 | __u16 ac_flags; /* allocation hints */ | |
198 | __u8 ac_status; | |
199 | __u8 ac_criteria; | |
200 | __u8 ac_repeats; | |
201 | __u8 ac_2order; /* if request is to allocate 2^N blocks and | |
202 | * N > 0, the field stores N, otherwise 0 */ | |
203 | __u8 ac_op; /* operation, for history only */ | |
204 | struct page *ac_bitmap_page; | |
205 | struct page *ac_buddy_page; | |
206 | struct ext4_prealloc_space *ac_pa; | |
207 | struct ext4_locality_group *ac_lg; | |
208 | }; | |
209 | ||
210 | #define AC_STATUS_CONTINUE 1 | |
211 | #define AC_STATUS_FOUND 2 | |
212 | #define AC_STATUS_BREAK 3 | |
213 | ||
214 | struct ext4_mb_history { | |
215 | struct ext4_free_extent orig; /* orig allocation */ | |
216 | struct ext4_free_extent goal; /* goal allocation */ | |
217 | struct ext4_free_extent result; /* result allocation */ | |
218 | unsigned pid; | |
219 | unsigned ino; | |
220 | __u16 found; /* how many extents have been found */ | |
221 | __u16 groups; /* how many groups have been scanned */ | |
222 | __u16 tail; /* what tail broke some buddy */ | |
223 | __u16 buddy; /* buddy the tail ^^^ broke */ | |
224 | __u16 flags; | |
225 | __u8 cr:3; /* which phase the result extent was found at */ | |
226 | __u8 op:4; | |
227 | __u8 merged:1; | |
228 | }; | |
229 | ||
230 | struct ext4_buddy { | |
231 | struct page *bd_buddy_page; | |
232 | void *bd_buddy; | |
233 | struct page *bd_bitmap_page; | |
234 | void *bd_bitmap; | |
235 | struct ext4_group_info *bd_info; | |
236 | struct super_block *bd_sb; | |
237 | __u16 bd_blkbits; | |
238 | ext4_group_t bd_group; | |
239 | }; | |
240 | #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) | |
241 | #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) | |
242 | ||
243 | #ifndef EXT4_MB_HISTORY | |
244 | static inline void ext4_mb_store_history(struct ext4_allocation_context *ac) | |
245 | { | |
246 | return; | |
247 | } | |
248 | #else | |
249 | static void ext4_mb_store_history(struct ext4_allocation_context *ac); | |
250 | #endif | |
251 | ||
252 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | |
253 | ||
254 | static struct proc_dir_entry *proc_root_ext4; | |
255 | struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); | |
256 | ||
257 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |
258 | ext4_group_t group); | |
259 | static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *); | |
260 | static void ext4_mb_free_committed_blocks(struct super_block *); | |
261 | static void ext4_mb_return_to_preallocation(struct inode *inode, | |
262 | struct ext4_buddy *e4b, sector_t block, | |
263 | int count); | |
264 | static void ext4_mb_put_pa(struct ext4_allocation_context *, | |
265 | struct super_block *, struct ext4_prealloc_space *pa); | |
266 | static int ext4_mb_init_per_dev_proc(struct super_block *sb); | |
267 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); | |
268 | ||
269 | ||
270 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) | |
271 | { | |
272 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | |
273 | ||
274 | bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); | |
275 | } | |
276 | ||
277 | static inline void ext4_unlock_group(struct super_block *sb, | |
278 | ext4_group_t group) | |
279 | { | |
280 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | |
281 | ||
282 | bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); | |
283 | } | |
284 | ||
285 | static inline int ext4_is_group_locked(struct super_block *sb, | |
286 | ext4_group_t group) | |
287 | { | |
288 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | |
289 | ||
290 | return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT, | |
291 | &(grinfo->bb_state)); | |
292 | } | |
293 | ||
294 | static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, | |
295 | struct ext4_free_extent *fex) | |
296 | { | |
297 | ext4_fsblk_t block; | |
298 | ||
299 | block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb) | |
300 | + fex->fe_start | |
301 | + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | |
302 | return block; | |
303 | } | |
304 | #endif |