]>
Commit | Line | Data |
---|---|---|
2b27bdcc | 1 | // SPDX-License-Identifier: GPL-2.0-only |
1e51764a AB |
2 | /* |
3 | * This file is part of UBIFS. | |
4 | * | |
5 | * Copyright (C) 2006-2008 Nokia Corporation. | |
6 | * | |
1e51764a AB |
7 | * Authors: Artem Bityutskiy (Битюцкий Артём) |
8 | * Adrian Hunter | |
9 | */ | |
10 | ||
11 | /* | |
12 | * This file implements UBIFS shrinker which evicts clean znodes from the TNC | |
13 | * tree when Linux VM needs more RAM. | |
14 | * | |
15 | * We do not implement any LRU lists to find oldest znodes to free because it | |
16 | * would add additional overhead to the file system fast paths. So the shrinker | |
17 | * just walks the TNC tree when searching for znodes to free. | |
18 | * | |
19 | * If the root of a TNC sub-tree is clean and old enough, then the children are | |
20 | * also clean and old enough. So the shrinker walks the TNC in level order and | |
21 | * dumps entire sub-trees. | |
22 | * | |
23 | * The age of znodes is just the time-stamp when they were last looked at. | |
24 | * The current shrinker first tries to evict old znodes, then young ones. | |
25 | * | |
26 | * Since the shrinker is global, it has to protect against races with FS | |
27 | * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'. | |
28 | */ | |
29 | ||
30 | #include "ubifs.h" | |
31 | ||
32 | /* List of all UBIFS file-system instances */ | |
33 | LIST_HEAD(ubifs_infos); | |
34 | ||
35 | /* | |
36 | * We number each shrinker run and record the number on the ubifs_info structure | |
37 | * so that we can easily work out which ubifs_info structures have already been | |
38 | * done by the current run. | |
39 | */ | |
40 | static unsigned int shrinker_run_no; | |
41 | ||
42 | /* Protects 'ubifs_infos' list */ | |
43 | DEFINE_SPINLOCK(ubifs_infos_lock); | |
44 | ||
45 | /* Global clean znode counter (for all mounted UBIFS instances) */ | |
46 | atomic_long_t ubifs_clean_zn_cnt; | |
47 | ||
48 | /** | |
49 | * shrink_tnc - shrink TNC tree. | |
50 | * @c: UBIFS file-system description object | |
51 | * @nr: number of znodes to free | |
52 | * @age: the age of znodes to free | |
53 | * @contention: if any contention, this is set to %1 | |
54 | * | |
55 | * This function traverses TNC tree and frees clean znodes. It does not free | |
56 | * clean znodes which younger then @age. Returns number of freed znodes. | |
57 | */ | |
58 | static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) | |
59 | { | |
60 | int total_freed = 0; | |
61 | struct ubifs_znode *znode, *zprev; | |
6cff5732 | 62 | time64_t time = ktime_get_seconds(); |
1e51764a | 63 | |
6eb61d58 RW |
64 | ubifs_assert(c, mutex_is_locked(&c->umount_mutex)); |
65 | ubifs_assert(c, mutex_is_locked(&c->tnc_mutex)); | |
1e51764a AB |
66 | |
67 | if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0) | |
68 | return 0; | |
69 | ||
70 | /* | |
71 | * Traverse the TNC tree in levelorder manner, so that it is possible | |
72 | * to destroy large sub-trees. Indeed, if a znode is old, then all its | |
73 | * children are older or of the same age. | |
74 | * | |
75 | * Note, we are holding 'c->tnc_mutex', so we do not have to lock the | |
76 | * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is | |
77 | * changed only when the 'c->tnc_mutex' is held. | |
78 | */ | |
79 | zprev = NULL; | |
6eb61d58 | 80 | znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); |
1e51764a AB |
81 | while (znode && total_freed < nr && |
82 | atomic_long_read(&c->clean_zn_cnt) > 0) { | |
83 | int freed; | |
84 | ||
85 | /* | |
86 | * If the znode is clean, but it is in the 'c->cnext' list, this | |
87 | * means that this znode has just been written to flash as a | |
88 | * part of commit and was marked clean. They will be removed | |
89 | * from the list at end commit. We cannot change the list, | |
90 | * because it is not protected by any mutex (design decision to | |
91 | * make commit really independent and parallel to main I/O). So | |
92 | * we just skip these znodes. | |
93 | * | |
94 | * Note, the 'clean_zn_cnt' counters are not updated until | |
95 | * after the commit, so the UBIFS shrinker does not report | |
96 | * the znodes which are in the 'c->cnext' list as freeable. | |
97 | * | |
98 | * Also note, if the root of a sub-tree is not in 'c->cnext', | |
99 | * then the whole sub-tree is not in 'c->cnext' as well, so it | |
100 | * is safe to dump whole sub-tree. | |
101 | */ | |
102 | ||
103 | if (znode->cnext) { | |
104 | /* | |
105 | * Very soon these znodes will be removed from the list | |
106 | * and become freeable. | |
107 | */ | |
108 | *contention = 1; | |
109 | } else if (!ubifs_zn_dirty(znode) && | |
110 | abs(time - znode->time) >= age) { | |
111 | if (znode->parent) | |
112 | znode->parent->zbranch[znode->iip].znode = NULL; | |
113 | else | |
114 | c->zroot.znode = NULL; | |
115 | ||
6eb61d58 | 116 | freed = ubifs_destroy_tnc_subtree(c, znode); |
1e51764a AB |
117 | atomic_long_sub(freed, &ubifs_clean_zn_cnt); |
118 | atomic_long_sub(freed, &c->clean_zn_cnt); | |
1e51764a AB |
119 | total_freed += freed; |
120 | znode = zprev; | |
121 | } | |
122 | ||
123 | if (unlikely(!c->zroot.znode)) | |
124 | break; | |
125 | ||
126 | zprev = znode; | |
6eb61d58 | 127 | znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); |
1e51764a AB |
128 | cond_resched(); |
129 | } | |
130 | ||
131 | return total_freed; | |
132 | } | |
133 | ||
134 | /** | |
135 | * shrink_tnc_trees - shrink UBIFS TNC trees. | |
136 | * @nr: number of znodes to free | |
137 | * @age: the age of znodes to free | |
138 | * @contention: if any contention, this is set to %1 | |
139 | * | |
140 | * This function walks the list of mounted UBIFS file-systems and frees clean | |
025dfdaf | 141 | * znodes which are older than @age, until at least @nr znodes are freed. |
1e51764a AB |
142 | * Returns the number of freed znodes. |
143 | */ | |
144 | static int shrink_tnc_trees(int nr, int age, int *contention) | |
145 | { | |
146 | struct ubifs_info *c; | |
147 | struct list_head *p; | |
148 | unsigned int run_no; | |
149 | int freed = 0; | |
150 | ||
151 | spin_lock(&ubifs_infos_lock); | |
152 | do { | |
153 | run_no = ++shrinker_run_no; | |
154 | } while (run_no == 0); | |
155 | /* Iterate over all mounted UBIFS file-systems and try to shrink them */ | |
156 | p = ubifs_infos.next; | |
157 | while (p != &ubifs_infos) { | |
158 | c = list_entry(p, struct ubifs_info, infos_list); | |
159 | /* | |
160 | * We move the ones we do to the end of the list, so we stop | |
161 | * when we see one we have already done. | |
162 | */ | |
163 | if (c->shrinker_run_no == run_no) | |
164 | break; | |
165 | if (!mutex_trylock(&c->umount_mutex)) { | |
166 | /* Some un-mount is in progress, try next FS */ | |
167 | *contention = 1; | |
168 | p = p->next; | |
169 | continue; | |
170 | } | |
171 | /* | |
172 | * We're holding 'c->umount_mutex', so the file-system won't go | |
173 | * away. | |
174 | */ | |
175 | if (!mutex_trylock(&c->tnc_mutex)) { | |
176 | mutex_unlock(&c->umount_mutex); | |
177 | *contention = 1; | |
178 | p = p->next; | |
179 | continue; | |
180 | } | |
181 | spin_unlock(&ubifs_infos_lock); | |
182 | /* | |
183 | * OK, now we have TNC locked, the file-system cannot go away - | |
184 | * it is safe to reap the cache. | |
185 | */ | |
186 | c->shrinker_run_no = run_no; | |
187 | freed += shrink_tnc(c, nr, age, contention); | |
188 | mutex_unlock(&c->tnc_mutex); | |
189 | spin_lock(&ubifs_infos_lock); | |
190 | /* Get the next list element before we move this one */ | |
191 | p = p->next; | |
192 | /* | |
193 | * Move this one to the end of the list to provide some | |
194 | * fairness. | |
195 | */ | |
ec32816f | 196 | list_move_tail(&c->infos_list, &ubifs_infos); |
1e51764a AB |
197 | mutex_unlock(&c->umount_mutex); |
198 | if (freed >= nr) | |
199 | break; | |
200 | } | |
201 | spin_unlock(&ubifs_infos_lock); | |
202 | return freed; | |
203 | } | |
204 | ||
205 | /** | |
206 | * kick_a_thread - kick a background thread to start commit. | |
207 | * | |
208 | * This function kicks a background thread to start background commit. Returns | |
209 | * %-1 if a thread was kicked or there is another reason to assume the memory | |
210 | * will soon be freed or become freeable. If there are no dirty znodes, returns | |
211 | * %0. | |
212 | */ | |
213 | static int kick_a_thread(void) | |
214 | { | |
215 | int i; | |
216 | struct ubifs_info *c; | |
217 | ||
218 | /* | |
219 | * Iterate over all mounted UBIFS file-systems and find out if there is | |
220 | * already an ongoing commit operation there. If no, then iterate for | |
221 | * the second time and initiate background commit. | |
222 | */ | |
223 | spin_lock(&ubifs_infos_lock); | |
224 | for (i = 0; i < 2; i++) { | |
225 | list_for_each_entry(c, &ubifs_infos, infos_list) { | |
226 | long dirty_zn_cnt; | |
227 | ||
228 | if (!mutex_trylock(&c->umount_mutex)) { | |
229 | /* | |
230 | * Some un-mount is in progress, it will | |
231 | * certainly free memory, so just return. | |
232 | */ | |
233 | spin_unlock(&ubifs_infos_lock); | |
234 | return -1; | |
235 | } | |
236 | ||
237 | dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); | |
238 | ||
239 | if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || | |
2ef13294 | 240 | c->ro_mount || c->ro_error) { |
1e51764a AB |
241 | mutex_unlock(&c->umount_mutex); |
242 | continue; | |
243 | } | |
244 | ||
245 | if (c->cmt_state != COMMIT_RESTING) { | |
246 | spin_unlock(&ubifs_infos_lock); | |
247 | mutex_unlock(&c->umount_mutex); | |
248 | return -1; | |
249 | } | |
250 | ||
251 | if (i == 1) { | |
ec32816f | 252 | list_move_tail(&c->infos_list, &ubifs_infos); |
1e51764a AB |
253 | spin_unlock(&ubifs_infos_lock); |
254 | ||
255 | ubifs_request_bg_commit(c); | |
256 | mutex_unlock(&c->umount_mutex); | |
257 | return -1; | |
258 | } | |
259 | mutex_unlock(&c->umount_mutex); | |
260 | } | |
261 | } | |
262 | spin_unlock(&ubifs_infos_lock); | |
263 | ||
264 | return 0; | |
265 | } | |
266 | ||
1ab6c499 DC |
267 | unsigned long ubifs_shrink_count(struct shrinker *shrink, |
268 | struct shrink_control *sc) | |
1e51764a | 269 | { |
1e51764a AB |
270 | long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); |
271 | ||
1ab6c499 DC |
272 | /* |
273 | * Due to the way UBIFS updates the clean znode counter it may | |
274 | * temporarily be negative. | |
275 | */ | |
276 | return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; | |
277 | } | |
278 | ||
279 | unsigned long ubifs_shrink_scan(struct shrinker *shrink, | |
280 | struct shrink_control *sc) | |
281 | { | |
282 | unsigned long nr = sc->nr_to_scan; | |
283 | int contention = 0; | |
284 | unsigned long freed; | |
285 | long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); | |
1e51764a AB |
286 | |
287 | if (!clean_zn_cnt) { | |
288 | /* | |
289 | * No clean znodes, nothing to reap. All we can do in this case | |
290 | * is to kick background threads to start commit, which will | |
291 | * probably make clean znodes which, in turn, will be freeable. | |
292 | * And we return -1 which means will make VM call us again | |
293 | * later. | |
294 | */ | |
295 | dbg_tnc("no clean znodes, kick a thread"); | |
296 | return kick_a_thread(); | |
297 | } | |
298 | ||
299 | freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention); | |
300 | if (freed >= nr) | |
301 | goto out; | |
302 | ||
303 | dbg_tnc("not enough old znodes, try to free young ones"); | |
304 | freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention); | |
305 | if (freed >= nr) | |
306 | goto out; | |
307 | ||
308 | dbg_tnc("not enough young znodes, free all"); | |
309 | freed += shrink_tnc_trees(nr - freed, 0, &contention); | |
310 | ||
311 | if (!freed && contention) { | |
312 | dbg_tnc("freed nothing, but contention"); | |
1ab6c499 | 313 | return SHRINK_STOP; |
1e51764a AB |
314 | } |
315 | ||
316 | out: | |
1ab6c499 | 317 | dbg_tnc("%lu znodes were freed, requested %lu", freed, nr); |
1e51764a AB |
318 | return freed; |
319 | } |