net: phy: aquantia: wait for the GLOBAL_CFG to start returning real values
[linux.git] / fs / erofs / zutil.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2018 HUAWEI, Inc.
4  *             https://www.huawei.com/
5  */
6 #include "internal.h"
7
8 struct z_erofs_gbuf {
9         spinlock_t lock;
10         void *ptr;
11         struct page **pages;
12         unsigned int nrpages;
13 };
14
15 static struct z_erofs_gbuf *z_erofs_gbufpool, *z_erofs_rsvbuf;
16 static unsigned int z_erofs_gbuf_count, z_erofs_gbuf_nrpages,
17                 z_erofs_rsv_nrpages;
18
19 module_param_named(global_buffers, z_erofs_gbuf_count, uint, 0444);
20 module_param_named(reserved_pages, z_erofs_rsv_nrpages, uint, 0444);
21
22 static atomic_long_t erofs_global_shrink_cnt;   /* for all mounted instances */
23 /* protected by 'erofs_sb_list_lock' */
24 static unsigned int shrinker_run_no;
25
26 /* protects the mounted 'erofs_sb_list' */
27 static DEFINE_SPINLOCK(erofs_sb_list_lock);
28 static LIST_HEAD(erofs_sb_list);
29 static struct shrinker *erofs_shrinker_info;
30
31 static unsigned int z_erofs_gbuf_id(void)
32 {
33         return raw_smp_processor_id() % z_erofs_gbuf_count;
34 }
35
36 void *z_erofs_get_gbuf(unsigned int requiredpages)
37         __acquires(gbuf->lock)
38 {
39         struct z_erofs_gbuf *gbuf;
40
41         gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()];
42         spin_lock(&gbuf->lock);
43         /* check if the buffer is too small */
44         if (requiredpages > gbuf->nrpages) {
45                 spin_unlock(&gbuf->lock);
46                 /* (for sparse checker) pretend gbuf->lock is still taken */
47                 __acquire(gbuf->lock);
48                 return NULL;
49         }
50         return gbuf->ptr;
51 }
52
53 void z_erofs_put_gbuf(void *ptr) __releases(gbuf->lock)
54 {
55         struct z_erofs_gbuf *gbuf;
56
57         gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()];
58         DBG_BUGON(gbuf->ptr != ptr);
59         spin_unlock(&gbuf->lock);
60 }
61
62 int z_erofs_gbuf_growsize(unsigned int nrpages)
63 {
64         static DEFINE_MUTEX(gbuf_resize_mutex);
65         struct page **tmp_pages = NULL;
66         struct z_erofs_gbuf *gbuf;
67         void *ptr, *old_ptr;
68         int last, i, j;
69
70         mutex_lock(&gbuf_resize_mutex);
71         /* avoid shrinking gbufs, since no idea how many fses rely on */
72         if (nrpages <= z_erofs_gbuf_nrpages) {
73                 mutex_unlock(&gbuf_resize_mutex);
74                 return 0;
75         }
76
77         for (i = 0; i < z_erofs_gbuf_count; ++i) {
78                 gbuf = &z_erofs_gbufpool[i];
79                 tmp_pages = kcalloc(nrpages, sizeof(*tmp_pages), GFP_KERNEL);
80                 if (!tmp_pages)
81                         goto out;
82
83                 for (j = 0; j < gbuf->nrpages; ++j)
84                         tmp_pages[j] = gbuf->pages[j];
85                 do {
86                         last = j;
87                         j = alloc_pages_bulk_array(GFP_KERNEL, nrpages,
88                                                    tmp_pages);
89                         if (last == j)
90                                 goto out;
91                 } while (j != nrpages);
92
93                 ptr = vmap(tmp_pages, nrpages, VM_MAP, PAGE_KERNEL);
94                 if (!ptr)
95                         goto out;
96
97                 spin_lock(&gbuf->lock);
98                 kfree(gbuf->pages);
99                 gbuf->pages = tmp_pages;
100                 old_ptr = gbuf->ptr;
101                 gbuf->ptr = ptr;
102                 gbuf->nrpages = nrpages;
103                 spin_unlock(&gbuf->lock);
104                 if (old_ptr)
105                         vunmap(old_ptr);
106         }
107         z_erofs_gbuf_nrpages = nrpages;
108 out:
109         if (i < z_erofs_gbuf_count && tmp_pages) {
110                 for (j = 0; j < nrpages; ++j)
111                         if (tmp_pages[j] && tmp_pages[j] != gbuf->pages[j])
112                                 __free_page(tmp_pages[j]);
113                 kfree(tmp_pages);
114         }
115         mutex_unlock(&gbuf_resize_mutex);
116         return i < z_erofs_gbuf_count ? -ENOMEM : 0;
117 }
118
119 int __init z_erofs_gbuf_init(void)
120 {
121         unsigned int i, total = num_possible_cpus();
122
123         if (z_erofs_gbuf_count)
124                 total = min(z_erofs_gbuf_count, total);
125         z_erofs_gbuf_count = total;
126
127         /* The last (special) global buffer is the reserved buffer */
128         total += !!z_erofs_rsv_nrpages;
129
130         z_erofs_gbufpool = kcalloc(total, sizeof(*z_erofs_gbufpool),
131                                    GFP_KERNEL);
132         if (!z_erofs_gbufpool)
133                 return -ENOMEM;
134
135         if (z_erofs_rsv_nrpages) {
136                 z_erofs_rsvbuf = &z_erofs_gbufpool[total - 1];
137                 z_erofs_rsvbuf->pages = kcalloc(z_erofs_rsv_nrpages,
138                                 sizeof(*z_erofs_rsvbuf->pages), GFP_KERNEL);
139                 if (!z_erofs_rsvbuf->pages) {
140                         z_erofs_rsvbuf = NULL;
141                         z_erofs_rsv_nrpages = 0;
142                 }
143         }
144         for (i = 0; i < total; ++i)
145                 spin_lock_init(&z_erofs_gbufpool[i].lock);
146         return 0;
147 }
148
149 void z_erofs_gbuf_exit(void)
150 {
151         int i, j;
152
153         for (i = 0; i < z_erofs_gbuf_count + (!!z_erofs_rsvbuf); ++i) {
154                 struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i];
155
156                 if (gbuf->ptr) {
157                         vunmap(gbuf->ptr);
158                         gbuf->ptr = NULL;
159                 }
160
161                 if (!gbuf->pages)
162                         continue;
163
164                 for (j = 0; j < gbuf->nrpages; ++j)
165                         if (gbuf->pages[j])
166                                 put_page(gbuf->pages[j]);
167                 kfree(gbuf->pages);
168                 gbuf->pages = NULL;
169         }
170         kfree(z_erofs_gbufpool);
171 }
172
173 struct page *__erofs_allocpage(struct page **pagepool, gfp_t gfp, bool tryrsv)
174 {
175         struct page *page = *pagepool;
176
177         if (page) {
178                 *pagepool = (struct page *)page_private(page);
179         } else if (tryrsv && z_erofs_rsvbuf && z_erofs_rsvbuf->nrpages) {
180                 spin_lock(&z_erofs_rsvbuf->lock);
181                 if (z_erofs_rsvbuf->nrpages)
182                         page = z_erofs_rsvbuf->pages[--z_erofs_rsvbuf->nrpages];
183                 spin_unlock(&z_erofs_rsvbuf->lock);
184         }
185         if (!page)
186                 page = alloc_page(gfp);
187         DBG_BUGON(page && page_ref_count(page) != 1);
188         return page;
189 }
190
191 void erofs_release_pages(struct page **pagepool)
192 {
193         while (*pagepool) {
194                 struct page *page = *pagepool;
195
196                 *pagepool = (struct page *)page_private(page);
197                 /* try to fill reserved global pool first */
198                 if (z_erofs_rsvbuf && z_erofs_rsvbuf->nrpages <
199                                 z_erofs_rsv_nrpages) {
200                         spin_lock(&z_erofs_rsvbuf->lock);
201                         if (z_erofs_rsvbuf->nrpages < z_erofs_rsv_nrpages) {
202                                 z_erofs_rsvbuf->pages[z_erofs_rsvbuf->nrpages++]
203                                                 = page;
204                                 spin_unlock(&z_erofs_rsvbuf->lock);
205                                 continue;
206                         }
207                         spin_unlock(&z_erofs_rsvbuf->lock);
208                 }
209                 put_page(page);
210         }
211 }
212
213 static bool erofs_workgroup_get(struct erofs_workgroup *grp)
214 {
215         if (lockref_get_not_zero(&grp->lockref))
216                 return true;
217
218         spin_lock(&grp->lockref.lock);
219         if (__lockref_is_dead(&grp->lockref)) {
220                 spin_unlock(&grp->lockref.lock);
221                 return false;
222         }
223
224         if (!grp->lockref.count++)
225                 atomic_long_dec(&erofs_global_shrink_cnt);
226         spin_unlock(&grp->lockref.lock);
227         return true;
228 }
229
230 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
231                                              pgoff_t index)
232 {
233         struct erofs_sb_info *sbi = EROFS_SB(sb);
234         struct erofs_workgroup *grp;
235
236 repeat:
237         rcu_read_lock();
238         grp = xa_load(&sbi->managed_pslots, index);
239         if (grp) {
240                 if (!erofs_workgroup_get(grp)) {
241                         /* prefer to relax rcu read side */
242                         rcu_read_unlock();
243                         goto repeat;
244                 }
245
246                 DBG_BUGON(index != grp->index);
247         }
248         rcu_read_unlock();
249         return grp;
250 }
251
252 struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
253                                                struct erofs_workgroup *grp)
254 {
255         struct erofs_sb_info *const sbi = EROFS_SB(sb);
256         struct erofs_workgroup *pre;
257
258         DBG_BUGON(grp->lockref.count < 1);
259 repeat:
260         xa_lock(&sbi->managed_pslots);
261         pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
262                            NULL, grp, GFP_KERNEL);
263         if (pre) {
264                 if (xa_is_err(pre)) {
265                         pre = ERR_PTR(xa_err(pre));
266                 } else if (!erofs_workgroup_get(pre)) {
267                         /* try to legitimize the current in-tree one */
268                         xa_unlock(&sbi->managed_pslots);
269                         cond_resched();
270                         goto repeat;
271                 }
272                 grp = pre;
273         }
274         xa_unlock(&sbi->managed_pslots);
275         return grp;
276 }
277
278 static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
279 {
280         atomic_long_dec(&erofs_global_shrink_cnt);
281         erofs_workgroup_free_rcu(grp);
282 }
283
284 void erofs_workgroup_put(struct erofs_workgroup *grp)
285 {
286         if (lockref_put_or_lock(&grp->lockref))
287                 return;
288
289         DBG_BUGON(__lockref_is_dead(&grp->lockref));
290         if (grp->lockref.count == 1)
291                 atomic_long_inc(&erofs_global_shrink_cnt);
292         --grp->lockref.count;
293         spin_unlock(&grp->lockref.lock);
294 }
295
296 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
297                                            struct erofs_workgroup *grp)
298 {
299         int free = false;
300
301         spin_lock(&grp->lockref.lock);
302         if (grp->lockref.count)
303                 goto out;
304
305         /*
306          * Note that all cached pages should be detached before deleted from
307          * the XArray. Otherwise some cached pages could be still attached to
308          * the orphan old workgroup when the new one is available in the tree.
309          */
310         if (erofs_try_to_free_all_cached_folios(sbi, grp))
311                 goto out;
312
313         /*
314          * It's impossible to fail after the workgroup is freezed,
315          * however in order to avoid some race conditions, add a
316          * DBG_BUGON to observe this in advance.
317          */
318         DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp);
319
320         lockref_mark_dead(&grp->lockref);
321         free = true;
322 out:
323         spin_unlock(&grp->lockref.lock);
324         if (free)
325                 __erofs_workgroup_free(grp);
326         return free;
327 }
328
329 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
330                                               unsigned long nr_shrink)
331 {
332         struct erofs_workgroup *grp;
333         unsigned int freed = 0;
334         unsigned long index;
335
336         xa_lock(&sbi->managed_pslots);
337         xa_for_each(&sbi->managed_pslots, index, grp) {
338                 /* try to shrink each valid workgroup */
339                 if (!erofs_try_to_release_workgroup(sbi, grp))
340                         continue;
341                 xa_unlock(&sbi->managed_pslots);
342
343                 ++freed;
344                 if (!--nr_shrink)
345                         return freed;
346                 xa_lock(&sbi->managed_pslots);
347         }
348         xa_unlock(&sbi->managed_pslots);
349         return freed;
350 }
351
352 void erofs_shrinker_register(struct super_block *sb)
353 {
354         struct erofs_sb_info *sbi = EROFS_SB(sb);
355
356         mutex_init(&sbi->umount_mutex);
357
358         spin_lock(&erofs_sb_list_lock);
359         list_add(&sbi->list, &erofs_sb_list);
360         spin_unlock(&erofs_sb_list_lock);
361 }
362
363 void erofs_shrinker_unregister(struct super_block *sb)
364 {
365         struct erofs_sb_info *const sbi = EROFS_SB(sb);
366
367         mutex_lock(&sbi->umount_mutex);
368         /* clean up all remaining workgroups in memory */
369         erofs_shrink_workstation(sbi, ~0UL);
370
371         spin_lock(&erofs_sb_list_lock);
372         list_del(&sbi->list);
373         spin_unlock(&erofs_sb_list_lock);
374         mutex_unlock(&sbi->umount_mutex);
375 }
376
377 static unsigned long erofs_shrink_count(struct shrinker *shrink,
378                                         struct shrink_control *sc)
379 {
380         return atomic_long_read(&erofs_global_shrink_cnt);
381 }
382
383 static unsigned long erofs_shrink_scan(struct shrinker *shrink,
384                                        struct shrink_control *sc)
385 {
386         struct erofs_sb_info *sbi;
387         struct list_head *p;
388
389         unsigned long nr = sc->nr_to_scan;
390         unsigned int run_no;
391         unsigned long freed = 0;
392
393         spin_lock(&erofs_sb_list_lock);
394         do {
395                 run_no = ++shrinker_run_no;
396         } while (run_no == 0);
397
398         /* Iterate over all mounted superblocks and try to shrink them */
399         p = erofs_sb_list.next;
400         while (p != &erofs_sb_list) {
401                 sbi = list_entry(p, struct erofs_sb_info, list);
402
403                 /*
404                  * We move the ones we do to the end of the list, so we stop
405                  * when we see one we have already done.
406                  */
407                 if (sbi->shrinker_run_no == run_no)
408                         break;
409
410                 if (!mutex_trylock(&sbi->umount_mutex)) {
411                         p = p->next;
412                         continue;
413                 }
414
415                 spin_unlock(&erofs_sb_list_lock);
416                 sbi->shrinker_run_no = run_no;
417
418                 freed += erofs_shrink_workstation(sbi, nr - freed);
419
420                 spin_lock(&erofs_sb_list_lock);
421                 /* Get the next list element before we move this one */
422                 p = p->next;
423
424                 /*
425                  * Move this one to the end of the list to provide some
426                  * fairness.
427                  */
428                 list_move_tail(&sbi->list, &erofs_sb_list);
429                 mutex_unlock(&sbi->umount_mutex);
430
431                 if (freed >= nr)
432                         break;
433         }
434         spin_unlock(&erofs_sb_list_lock);
435         return freed;
436 }
437
438 int __init erofs_init_shrinker(void)
439 {
440         erofs_shrinker_info = shrinker_alloc(0, "erofs-shrinker");
441         if (!erofs_shrinker_info)
442                 return -ENOMEM;
443
444         erofs_shrinker_info->count_objects = erofs_shrink_count;
445         erofs_shrinker_info->scan_objects = erofs_shrink_scan;
446         shrinker_register(erofs_shrinker_info);
447         return 0;
448 }
449
450 void erofs_exit_shrinker(void)
451 {
452         shrinker_free(erofs_shrinker_info);
453 }
This page took 0.082972 seconds and 4 git commands to generate.