1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2018 HUAWEI, Inc.
4 * https://www.huawei.com/
15 static struct z_erofs_gbuf *z_erofs_gbufpool, *z_erofs_rsvbuf;
16 static unsigned int z_erofs_gbuf_count, z_erofs_gbuf_nrpages,
19 module_param_named(global_buffers, z_erofs_gbuf_count, uint, 0444);
20 module_param_named(reserved_pages, z_erofs_rsv_nrpages, uint, 0444);
22 static atomic_long_t erofs_global_shrink_cnt; /* for all mounted instances */
23 /* protected by 'erofs_sb_list_lock' */
24 static unsigned int shrinker_run_no;
26 /* protects the mounted 'erofs_sb_list' */
27 static DEFINE_SPINLOCK(erofs_sb_list_lock);
28 static LIST_HEAD(erofs_sb_list);
29 static struct shrinker *erofs_shrinker_info;
31 static unsigned int z_erofs_gbuf_id(void)
33 return raw_smp_processor_id() % z_erofs_gbuf_count;
36 void *z_erofs_get_gbuf(unsigned int requiredpages)
37 __acquires(gbuf->lock)
39 struct z_erofs_gbuf *gbuf;
41 gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()];
42 spin_lock(&gbuf->lock);
43 /* check if the buffer is too small */
44 if (requiredpages > gbuf->nrpages) {
45 spin_unlock(&gbuf->lock);
46 /* (for sparse checker) pretend gbuf->lock is still taken */
47 __acquire(gbuf->lock);
53 void z_erofs_put_gbuf(void *ptr) __releases(gbuf->lock)
55 struct z_erofs_gbuf *gbuf;
57 gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()];
58 DBG_BUGON(gbuf->ptr != ptr);
59 spin_unlock(&gbuf->lock);
62 int z_erofs_gbuf_growsize(unsigned int nrpages)
64 static DEFINE_MUTEX(gbuf_resize_mutex);
65 struct page **tmp_pages = NULL;
66 struct z_erofs_gbuf *gbuf;
70 mutex_lock(&gbuf_resize_mutex);
71 /* avoid shrinking gbufs, since no idea how many fses rely on */
72 if (nrpages <= z_erofs_gbuf_nrpages) {
73 mutex_unlock(&gbuf_resize_mutex);
77 for (i = 0; i < z_erofs_gbuf_count; ++i) {
78 gbuf = &z_erofs_gbufpool[i];
79 tmp_pages = kcalloc(nrpages, sizeof(*tmp_pages), GFP_KERNEL);
83 for (j = 0; j < gbuf->nrpages; ++j)
84 tmp_pages[j] = gbuf->pages[j];
87 j = alloc_pages_bulk_array(GFP_KERNEL, nrpages,
91 } while (j != nrpages);
93 ptr = vmap(tmp_pages, nrpages, VM_MAP, PAGE_KERNEL);
97 spin_lock(&gbuf->lock);
99 gbuf->pages = tmp_pages;
102 gbuf->nrpages = nrpages;
103 spin_unlock(&gbuf->lock);
107 z_erofs_gbuf_nrpages = nrpages;
109 if (i < z_erofs_gbuf_count && tmp_pages) {
110 for (j = 0; j < nrpages; ++j)
111 if (tmp_pages[j] && tmp_pages[j] != gbuf->pages[j])
112 __free_page(tmp_pages[j]);
115 mutex_unlock(&gbuf_resize_mutex);
116 return i < z_erofs_gbuf_count ? -ENOMEM : 0;
119 int __init z_erofs_gbuf_init(void)
121 unsigned int i, total = num_possible_cpus();
123 if (z_erofs_gbuf_count)
124 total = min(z_erofs_gbuf_count, total);
125 z_erofs_gbuf_count = total;
127 /* The last (special) global buffer is the reserved buffer */
128 total += !!z_erofs_rsv_nrpages;
130 z_erofs_gbufpool = kcalloc(total, sizeof(*z_erofs_gbufpool),
132 if (!z_erofs_gbufpool)
135 if (z_erofs_rsv_nrpages) {
136 z_erofs_rsvbuf = &z_erofs_gbufpool[total - 1];
137 z_erofs_rsvbuf->pages = kcalloc(z_erofs_rsv_nrpages,
138 sizeof(*z_erofs_rsvbuf->pages), GFP_KERNEL);
139 if (!z_erofs_rsvbuf->pages) {
140 z_erofs_rsvbuf = NULL;
141 z_erofs_rsv_nrpages = 0;
144 for (i = 0; i < total; ++i)
145 spin_lock_init(&z_erofs_gbufpool[i].lock);
149 void z_erofs_gbuf_exit(void)
153 for (i = 0; i < z_erofs_gbuf_count + (!!z_erofs_rsvbuf); ++i) {
154 struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i];
164 for (j = 0; j < gbuf->nrpages; ++j)
166 put_page(gbuf->pages[j]);
170 kfree(z_erofs_gbufpool);
173 struct page *__erofs_allocpage(struct page **pagepool, gfp_t gfp, bool tryrsv)
175 struct page *page = *pagepool;
178 *pagepool = (struct page *)page_private(page);
179 } else if (tryrsv && z_erofs_rsvbuf && z_erofs_rsvbuf->nrpages) {
180 spin_lock(&z_erofs_rsvbuf->lock);
181 if (z_erofs_rsvbuf->nrpages)
182 page = z_erofs_rsvbuf->pages[--z_erofs_rsvbuf->nrpages];
183 spin_unlock(&z_erofs_rsvbuf->lock);
186 page = alloc_page(gfp);
187 DBG_BUGON(page && page_ref_count(page) != 1);
191 void erofs_release_pages(struct page **pagepool)
194 struct page *page = *pagepool;
196 *pagepool = (struct page *)page_private(page);
197 /* try to fill reserved global pool first */
198 if (z_erofs_rsvbuf && z_erofs_rsvbuf->nrpages <
199 z_erofs_rsv_nrpages) {
200 spin_lock(&z_erofs_rsvbuf->lock);
201 if (z_erofs_rsvbuf->nrpages < z_erofs_rsv_nrpages) {
202 z_erofs_rsvbuf->pages[z_erofs_rsvbuf->nrpages++]
204 spin_unlock(&z_erofs_rsvbuf->lock);
207 spin_unlock(&z_erofs_rsvbuf->lock);
213 static bool erofs_workgroup_get(struct erofs_workgroup *grp)
215 if (lockref_get_not_zero(&grp->lockref))
218 spin_lock(&grp->lockref.lock);
219 if (__lockref_is_dead(&grp->lockref)) {
220 spin_unlock(&grp->lockref.lock);
224 if (!grp->lockref.count++)
225 atomic_long_dec(&erofs_global_shrink_cnt);
226 spin_unlock(&grp->lockref.lock);
230 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
233 struct erofs_sb_info *sbi = EROFS_SB(sb);
234 struct erofs_workgroup *grp;
238 grp = xa_load(&sbi->managed_pslots, index);
240 if (!erofs_workgroup_get(grp)) {
241 /* prefer to relax rcu read side */
246 DBG_BUGON(index != grp->index);
252 struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
253 struct erofs_workgroup *grp)
255 struct erofs_sb_info *const sbi = EROFS_SB(sb);
256 struct erofs_workgroup *pre;
258 DBG_BUGON(grp->lockref.count < 1);
260 xa_lock(&sbi->managed_pslots);
261 pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
262 NULL, grp, GFP_KERNEL);
264 if (xa_is_err(pre)) {
265 pre = ERR_PTR(xa_err(pre));
266 } else if (!erofs_workgroup_get(pre)) {
267 /* try to legitimize the current in-tree one */
268 xa_unlock(&sbi->managed_pslots);
274 xa_unlock(&sbi->managed_pslots);
278 static void __erofs_workgroup_free(struct erofs_workgroup *grp)
280 atomic_long_dec(&erofs_global_shrink_cnt);
281 erofs_workgroup_free_rcu(grp);
284 void erofs_workgroup_put(struct erofs_workgroup *grp)
286 if (lockref_put_or_lock(&grp->lockref))
289 DBG_BUGON(__lockref_is_dead(&grp->lockref));
290 if (grp->lockref.count == 1)
291 atomic_long_inc(&erofs_global_shrink_cnt);
292 --grp->lockref.count;
293 spin_unlock(&grp->lockref.lock);
296 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
297 struct erofs_workgroup *grp)
301 spin_lock(&grp->lockref.lock);
302 if (grp->lockref.count)
306 * Note that all cached pages should be detached before deleted from
307 * the XArray. Otherwise some cached pages could be still attached to
308 * the orphan old workgroup when the new one is available in the tree.
310 if (erofs_try_to_free_all_cached_folios(sbi, grp))
314 * It's impossible to fail after the workgroup is freezed,
315 * however in order to avoid some race conditions, add a
316 * DBG_BUGON to observe this in advance.
318 DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp);
320 lockref_mark_dead(&grp->lockref);
323 spin_unlock(&grp->lockref.lock);
325 __erofs_workgroup_free(grp);
329 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
330 unsigned long nr_shrink)
332 struct erofs_workgroup *grp;
333 unsigned int freed = 0;
336 xa_lock(&sbi->managed_pslots);
337 xa_for_each(&sbi->managed_pslots, index, grp) {
338 /* try to shrink each valid workgroup */
339 if (!erofs_try_to_release_workgroup(sbi, grp))
341 xa_unlock(&sbi->managed_pslots);
346 xa_lock(&sbi->managed_pslots);
348 xa_unlock(&sbi->managed_pslots);
352 void erofs_shrinker_register(struct super_block *sb)
354 struct erofs_sb_info *sbi = EROFS_SB(sb);
356 mutex_init(&sbi->umount_mutex);
358 spin_lock(&erofs_sb_list_lock);
359 list_add(&sbi->list, &erofs_sb_list);
360 spin_unlock(&erofs_sb_list_lock);
363 void erofs_shrinker_unregister(struct super_block *sb)
365 struct erofs_sb_info *const sbi = EROFS_SB(sb);
367 mutex_lock(&sbi->umount_mutex);
368 /* clean up all remaining workgroups in memory */
369 erofs_shrink_workstation(sbi, ~0UL);
371 spin_lock(&erofs_sb_list_lock);
372 list_del(&sbi->list);
373 spin_unlock(&erofs_sb_list_lock);
374 mutex_unlock(&sbi->umount_mutex);
377 static unsigned long erofs_shrink_count(struct shrinker *shrink,
378 struct shrink_control *sc)
380 return atomic_long_read(&erofs_global_shrink_cnt);
383 static unsigned long erofs_shrink_scan(struct shrinker *shrink,
384 struct shrink_control *sc)
386 struct erofs_sb_info *sbi;
389 unsigned long nr = sc->nr_to_scan;
391 unsigned long freed = 0;
393 spin_lock(&erofs_sb_list_lock);
395 run_no = ++shrinker_run_no;
396 } while (run_no == 0);
398 /* Iterate over all mounted superblocks and try to shrink them */
399 p = erofs_sb_list.next;
400 while (p != &erofs_sb_list) {
401 sbi = list_entry(p, struct erofs_sb_info, list);
404 * We move the ones we do to the end of the list, so we stop
405 * when we see one we have already done.
407 if (sbi->shrinker_run_no == run_no)
410 if (!mutex_trylock(&sbi->umount_mutex)) {
415 spin_unlock(&erofs_sb_list_lock);
416 sbi->shrinker_run_no = run_no;
418 freed += erofs_shrink_workstation(sbi, nr - freed);
420 spin_lock(&erofs_sb_list_lock);
421 /* Get the next list element before we move this one */
425 * Move this one to the end of the list to provide some
428 list_move_tail(&sbi->list, &erofs_sb_list);
429 mutex_unlock(&sbi->umount_mutex);
434 spin_unlock(&erofs_sb_list_lock);
438 int __init erofs_init_shrinker(void)
440 erofs_shrinker_info = shrinker_alloc(0, "erofs-shrinker");
441 if (!erofs_shrinker_info)
444 erofs_shrinker_info->count_objects = erofs_shrink_count;
445 erofs_shrinker_info->scan_objects = erofs_shrink_scan;
446 shrinker_register(erofs_shrinker_info);
450 void erofs_exit_shrinker(void)
452 shrinker_free(erofs_shrinker_info);