]>
Commit | Line | Data |
---|---|---|
1e51764a AB |
1 | /* |
2 | * This file is part of UBIFS. | |
3 | * | |
4 | * Copyright (C) 2006-2008 Nokia Corporation. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify it | |
7 | * under the terms of the GNU General Public License version 2 as published by | |
8 | * the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but WITHOUT | |
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
13 | * more details. | |
14 | * | |
15 | * You should have received a copy of the GNU General Public License along with | |
16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | |
17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
18 | * | |
19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | |
20 | * Adrian Hunter | |
21 | */ | |
22 | ||
23 | /* | |
24 | * This file implements UBIFS shrinker which evicts clean znodes from the TNC | |
25 | * tree when Linux VM needs more RAM. | |
26 | * | |
27 | * We do not implement any LRU lists to find oldest znodes to free because it | |
28 | * would add additional overhead to the file system fast paths. So the shrinker | |
29 | * just walks the TNC tree when searching for znodes to free. | |
30 | * | |
31 | * If the root of a TNC sub-tree is clean and old enough, then the children are | |
32 | * also clean and old enough. So the shrinker walks the TNC in level order and | |
33 | * dumps entire sub-trees. | |
34 | * | |
35 | * The age of znodes is just the time-stamp when they were last looked at. | |
36 | * The current shrinker first tries to evict old znodes, then young ones. | |
37 | * | |
38 | * Since the shrinker is global, it has to protect against races with FS | |
39 | * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'. | |
40 | */ | |
41 | ||
42 | #include "ubifs.h" | |
43 | ||
44 | /* List of all UBIFS file-system instances */ | |
45 | LIST_HEAD(ubifs_infos); | |
46 | ||
47 | /* | |
48 | * We number each shrinker run and record the number on the ubifs_info structure | |
49 | * so that we can easily work out which ubifs_info structures have already been | |
50 | * done by the current run. | |
51 | */ | |
52 | static unsigned int shrinker_run_no; | |
53 | ||
54 | /* Protects 'ubifs_infos' list */ | |
55 | DEFINE_SPINLOCK(ubifs_infos_lock); | |
56 | ||
57 | /* Global clean znode counter (for all mounted UBIFS instances) */ | |
58 | atomic_long_t ubifs_clean_zn_cnt; | |
59 | ||
60 | /** | |
61 | * shrink_tnc - shrink TNC tree. | |
62 | * @c: UBIFS file-system description object | |
63 | * @nr: number of znodes to free | |
64 | * @age: the age of znodes to free | |
65 | * @contention: if any contention, this is set to %1 | |
66 | * | |
67 | * This function traverses TNC tree and frees clean znodes. It does not free | |
68 | * clean znodes which younger then @age. Returns number of freed znodes. | |
69 | */ | |
70 | static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) | |
71 | { | |
72 | int total_freed = 0; | |
73 | struct ubifs_znode *znode, *zprev; | |
74 | int time = get_seconds(); | |
75 | ||
76 | ubifs_assert(mutex_is_locked(&c->umount_mutex)); | |
77 | ubifs_assert(mutex_is_locked(&c->tnc_mutex)); | |
78 | ||
79 | if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0) | |
80 | return 0; | |
81 | ||
82 | /* | |
83 | * Traverse the TNC tree in levelorder manner, so that it is possible | |
84 | * to destroy large sub-trees. Indeed, if a znode is old, then all its | |
85 | * children are older or of the same age. | |
86 | * | |
87 | * Note, we are holding 'c->tnc_mutex', so we do not have to lock the | |
88 | * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is | |
89 | * changed only when the 'c->tnc_mutex' is held. | |
90 | */ | |
91 | zprev = NULL; | |
92 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); | |
93 | while (znode && total_freed < nr && | |
94 | atomic_long_read(&c->clean_zn_cnt) > 0) { | |
95 | int freed; | |
96 | ||
97 | /* | |
98 | * If the znode is clean, but it is in the 'c->cnext' list, this | |
99 | * means that this znode has just been written to flash as a | |
100 | * part of commit and was marked clean. They will be removed | |
101 | * from the list at end commit. We cannot change the list, | |
102 | * because it is not protected by any mutex (design decision to | |
103 | * make commit really independent and parallel to main I/O). So | |
104 | * we just skip these znodes. | |
105 | * | |
106 | * Note, the 'clean_zn_cnt' counters are not updated until | |
107 | * after the commit, so the UBIFS shrinker does not report | |
108 | * the znodes which are in the 'c->cnext' list as freeable. | |
109 | * | |
110 | * Also note, if the root of a sub-tree is not in 'c->cnext', | |
111 | * then the whole sub-tree is not in 'c->cnext' as well, so it | |
112 | * is safe to dump whole sub-tree. | |
113 | */ | |
114 | ||
115 | if (znode->cnext) { | |
116 | /* | |
117 | * Very soon these znodes will be removed from the list | |
118 | * and become freeable. | |
119 | */ | |
120 | *contention = 1; | |
121 | } else if (!ubifs_zn_dirty(znode) && | |
122 | abs(time - znode->time) >= age) { | |
123 | if (znode->parent) | |
124 | znode->parent->zbranch[znode->iip].znode = NULL; | |
125 | else | |
126 | c->zroot.znode = NULL; | |
127 | ||
128 | freed = ubifs_destroy_tnc_subtree(znode); | |
129 | atomic_long_sub(freed, &ubifs_clean_zn_cnt); | |
130 | atomic_long_sub(freed, &c->clean_zn_cnt); | |
131 | ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0); | |
132 | total_freed += freed; | |
133 | znode = zprev; | |
134 | } | |
135 | ||
136 | if (unlikely(!c->zroot.znode)) | |
137 | break; | |
138 | ||
139 | zprev = znode; | |
140 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); | |
141 | cond_resched(); | |
142 | } | |
143 | ||
144 | return total_freed; | |
145 | } | |
146 | ||
147 | /** | |
148 | * shrink_tnc_trees - shrink UBIFS TNC trees. | |
149 | * @nr: number of znodes to free | |
150 | * @age: the age of znodes to free | |
151 | * @contention: if any contention, this is set to %1 | |
152 | * | |
153 | * This function walks the list of mounted UBIFS file-systems and frees clean | |
025dfdaf | 154 | * znodes which are older than @age, until at least @nr znodes are freed. |
1e51764a AB |
155 | * Returns the number of freed znodes. |
156 | */ | |
157 | static int shrink_tnc_trees(int nr, int age, int *contention) | |
158 | { | |
159 | struct ubifs_info *c; | |
160 | struct list_head *p; | |
161 | unsigned int run_no; | |
162 | int freed = 0; | |
163 | ||
164 | spin_lock(&ubifs_infos_lock); | |
165 | do { | |
166 | run_no = ++shrinker_run_no; | |
167 | } while (run_no == 0); | |
168 | /* Iterate over all mounted UBIFS file-systems and try to shrink them */ | |
169 | p = ubifs_infos.next; | |
170 | while (p != &ubifs_infos) { | |
171 | c = list_entry(p, struct ubifs_info, infos_list); | |
172 | /* | |
173 | * We move the ones we do to the end of the list, so we stop | |
174 | * when we see one we have already done. | |
175 | */ | |
176 | if (c->shrinker_run_no == run_no) | |
177 | break; | |
178 | if (!mutex_trylock(&c->umount_mutex)) { | |
179 | /* Some un-mount is in progress, try next FS */ | |
180 | *contention = 1; | |
181 | p = p->next; | |
182 | continue; | |
183 | } | |
184 | /* | |
185 | * We're holding 'c->umount_mutex', so the file-system won't go | |
186 | * away. | |
187 | */ | |
188 | if (!mutex_trylock(&c->tnc_mutex)) { | |
189 | mutex_unlock(&c->umount_mutex); | |
190 | *contention = 1; | |
191 | p = p->next; | |
192 | continue; | |
193 | } | |
194 | spin_unlock(&ubifs_infos_lock); | |
195 | /* | |
196 | * OK, now we have TNC locked, the file-system cannot go away - | |
197 | * it is safe to reap the cache. | |
198 | */ | |
199 | c->shrinker_run_no = run_no; | |
200 | freed += shrink_tnc(c, nr, age, contention); | |
201 | mutex_unlock(&c->tnc_mutex); | |
202 | spin_lock(&ubifs_infos_lock); | |
203 | /* Get the next list element before we move this one */ | |
204 | p = p->next; | |
205 | /* | |
206 | * Move this one to the end of the list to provide some | |
207 | * fairness. | |
208 | */ | |
ec32816f | 209 | list_move_tail(&c->infos_list, &ubifs_infos); |
1e51764a AB |
210 | mutex_unlock(&c->umount_mutex); |
211 | if (freed >= nr) | |
212 | break; | |
213 | } | |
214 | spin_unlock(&ubifs_infos_lock); | |
215 | return freed; | |
216 | } | |
217 | ||
218 | /** | |
219 | * kick_a_thread - kick a background thread to start commit. | |
220 | * | |
221 | * This function kicks a background thread to start background commit. Returns | |
222 | * %-1 if a thread was kicked or there is another reason to assume the memory | |
223 | * will soon be freed or become freeable. If there are no dirty znodes, returns | |
224 | * %0. | |
225 | */ | |
226 | static int kick_a_thread(void) | |
227 | { | |
228 | int i; | |
229 | struct ubifs_info *c; | |
230 | ||
231 | /* | |
232 | * Iterate over all mounted UBIFS file-systems and find out if there is | |
233 | * already an ongoing commit operation there. If no, then iterate for | |
234 | * the second time and initiate background commit. | |
235 | */ | |
236 | spin_lock(&ubifs_infos_lock); | |
237 | for (i = 0; i < 2; i++) { | |
238 | list_for_each_entry(c, &ubifs_infos, infos_list) { | |
239 | long dirty_zn_cnt; | |
240 | ||
241 | if (!mutex_trylock(&c->umount_mutex)) { | |
242 | /* | |
243 | * Some un-mount is in progress, it will | |
244 | * certainly free memory, so just return. | |
245 | */ | |
246 | spin_unlock(&ubifs_infos_lock); | |
247 | return -1; | |
248 | } | |
249 | ||
250 | dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); | |
251 | ||
252 | if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || | |
2ef13294 | 253 | c->ro_mount || c->ro_error) { |
1e51764a AB |
254 | mutex_unlock(&c->umount_mutex); |
255 | continue; | |
256 | } | |
257 | ||
258 | if (c->cmt_state != COMMIT_RESTING) { | |
259 | spin_unlock(&ubifs_infos_lock); | |
260 | mutex_unlock(&c->umount_mutex); | |
261 | return -1; | |
262 | } | |
263 | ||
264 | if (i == 1) { | |
ec32816f | 265 | list_move_tail(&c->infos_list, &ubifs_infos); |
1e51764a AB |
266 | spin_unlock(&ubifs_infos_lock); |
267 | ||
268 | ubifs_request_bg_commit(c); | |
269 | mutex_unlock(&c->umount_mutex); | |
270 | return -1; | |
271 | } | |
272 | mutex_unlock(&c->umount_mutex); | |
273 | } | |
274 | } | |
275 | spin_unlock(&ubifs_infos_lock); | |
276 | ||
277 | return 0; | |
278 | } | |
279 | ||
ef1d5759 | 280 | int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) |
1e51764a | 281 | { |
ef1d5759 | 282 | int nr = sc->nr_to_scan; |
1e51764a AB |
283 | int freed, contention = 0; |
284 | long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); | |
285 | ||
286 | if (nr == 0) | |
cf610bf4 AB |
287 | /* |
288 | * Due to the way UBIFS updates the clean znode counter it may | |
289 | * temporarily be negative. | |
290 | */ | |
291 | return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; | |
1e51764a AB |
292 | |
293 | if (!clean_zn_cnt) { | |
294 | /* | |
295 | * No clean znodes, nothing to reap. All we can do in this case | |
296 | * is to kick background threads to start commit, which will | |
297 | * probably make clean znodes which, in turn, will be freeable. | |
298 | * And we return -1 which means will make VM call us again | |
299 | * later. | |
300 | */ | |
301 | dbg_tnc("no clean znodes, kick a thread"); | |
302 | return kick_a_thread(); | |
303 | } | |
304 | ||
305 | freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention); | |
306 | if (freed >= nr) | |
307 | goto out; | |
308 | ||
309 | dbg_tnc("not enough old znodes, try to free young ones"); | |
310 | freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention); | |
311 | if (freed >= nr) | |
312 | goto out; | |
313 | ||
314 | dbg_tnc("not enough young znodes, free all"); | |
315 | freed += shrink_tnc_trees(nr - freed, 0, &contention); | |
316 | ||
317 | if (!freed && contention) { | |
318 | dbg_tnc("freed nothing, but contention"); | |
319 | return -1; | |
320 | } | |
321 | ||
322 | out: | |
323 | dbg_tnc("%d znodes were freed, requested %d", freed, nr); | |
324 | return freed; | |
325 | } |