fs/erofs/zutil.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (C) 2018 HUAWEI, Inc.
   4  *             https://www.huawei.com/
   5  */
   6 #include "internal.h"
   7
   8 struct z_erofs_gbuf {
   9         spinlock_t lock;
  10         void *ptr;
  11         struct page **pages;
  12         unsigned int nrpages;
  13 };
  14
  15 static struct z_erofs_gbuf *z_erofs_gbufpool, *z_erofs_rsvbuf;
  16 static unsigned int z_erofs_gbuf_count, z_erofs_gbuf_nrpages,
  17                 z_erofs_rsv_nrpages;
  18
  19 module_param_named(global_buffers, z_erofs_gbuf_count, uint, 0444);
  20 module_param_named(reserved_pages, z_erofs_rsv_nrpages, uint, 0444);
  21
  22 static atomic_long_t erofs_global_shrink_cnt;   /* for all mounted instances */
  23 /* protected by 'erofs_sb_list_lock' */
  24 static unsigned int shrinker_run_no;
  25
  26 /* protects the mounted 'erofs_sb_list' */
  27 static DEFINE_SPINLOCK(erofs_sb_list_lock);
  28 static LIST_HEAD(erofs_sb_list);
  29 static struct shrinker *erofs_shrinker_info;
  30
  31 static unsigned int z_erofs_gbuf_id(void)
  32 {
  33         return raw_smp_processor_id() % z_erofs_gbuf_count;
  34 }
  35
  36 void *z_erofs_get_gbuf(unsigned int requiredpages)
  37         __acquires(gbuf->lock)
  38 {
  39         struct z_erofs_gbuf *gbuf;
  40
  41         gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()];
  42         spin_lock(&gbuf->lock);
  43         /* check if the buffer is too small */
  44         if (requiredpages > gbuf->nrpages) {
  45                 spin_unlock(&gbuf->lock);
  46                 /* (for sparse checker) pretend gbuf->lock is still taken */
  47                 __acquire(gbuf->lock);
  48                 return NULL;
  49         }
  50         return gbuf->ptr;
  51 }
  52
  53 void z_erofs_put_gbuf(void *ptr) __releases(gbuf->lock)
  54 {
  55         struct z_erofs_gbuf *gbuf;
  56
  57         gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()];
  58         DBG_BUGON(gbuf->ptr != ptr);
  59         spin_unlock(&gbuf->lock);
  60 }
  61
  62 int z_erofs_gbuf_growsize(unsigned int nrpages)
  63 {
  64         static DEFINE_MUTEX(gbuf_resize_mutex);
  65         struct page **tmp_pages = NULL;
  66         struct z_erofs_gbuf *gbuf;
  67         void *ptr, *old_ptr;
  68         int last, i, j;
  69
  70         mutex_lock(&gbuf_resize_mutex);
  71         /* avoid shrinking gbufs, since no idea how many fses rely on */
  72         if (nrpages <= z_erofs_gbuf_nrpages) {
  73                 mutex_unlock(&gbuf_resize_mutex);
  74                 return 0;
  75         }
  76
  77         for (i = 0; i < z_erofs_gbuf_count; ++i) {
  78                 gbuf = &z_erofs_gbufpool[i];
  79                 tmp_pages = kcalloc(nrpages, sizeof(*tmp_pages), GFP_KERNEL);
  80                 if (!tmp_pages)
  81                         goto out;
  82
  83                 for (j = 0; j < gbuf->nrpages; ++j)
  84                         tmp_pages[j] = gbuf->pages[j];
  85                 do {
  86                         last = j;
  87                         j = alloc_pages_bulk_array(GFP_KERNEL, nrpages,
  88                                                    tmp_pages);
  89                         if (last == j)
  90                                 goto out;
  91                 } while (j != nrpages);
  92
  93                 ptr = vmap(tmp_pages, nrpages, VM_MAP, PAGE_KERNEL);
  94                 if (!ptr)
  95                         goto out;
  96
  97                 spin_lock(&gbuf->lock);
  98                 kfree(gbuf->pages);
  99                 gbuf->pages = tmp_pages;
 100                 old_ptr = gbuf->ptr;
 101                 gbuf->ptr = ptr;
 102                 gbuf->nrpages = nrpages;
 103                 spin_unlock(&gbuf->lock);
 104                 if (old_ptr)
 105                         vunmap(old_ptr);
 106         }
 107         z_erofs_gbuf_nrpages = nrpages;
 108 out:
 109         if (i < z_erofs_gbuf_count && tmp_pages) {
 110                 for (j = 0; j < nrpages; ++j)
 111                         if (tmp_pages[j] && tmp_pages[j] != gbuf->pages[j])
 112                                 __free_page(tmp_pages[j]);
 113                 kfree(tmp_pages);
 114         }
 115         mutex_unlock(&gbuf_resize_mutex);
 116         return i < z_erofs_gbuf_count ? -ENOMEM : 0;
 117 }
 118
 119 int __init z_erofs_gbuf_init(void)
 120 {
 121         unsigned int i, total = num_possible_cpus();
 122
 123         if (z_erofs_gbuf_count)
 124                 total = min(z_erofs_gbuf_count, total);
 125         z_erofs_gbuf_count = total;
 126
 127         /* The last (special) global buffer is the reserved buffer */
 128         total += !!z_erofs_rsv_nrpages;
 129
 130         z_erofs_gbufpool = kcalloc(total, sizeof(*z_erofs_gbufpool),
 131                                    GFP_KERNEL);
 132         if (!z_erofs_gbufpool)
 133                 return -ENOMEM;
 134
 135         if (z_erofs_rsv_nrpages) {
 136                 z_erofs_rsvbuf = &z_erofs_gbufpool[total - 1];
 137                 z_erofs_rsvbuf->pages = kcalloc(z_erofs_rsv_nrpages,
 138                                 sizeof(*z_erofs_rsvbuf->pages), GFP_KERNEL);
 139                 if (!z_erofs_rsvbuf->pages) {
 140                         z_erofs_rsvbuf = NULL;
 141                         z_erofs_rsv_nrpages = 0;
 142                 }
 143         }
 144         for (i = 0; i < total; ++i)
 145                 spin_lock_init(&z_erofs_gbufpool[i].lock);
 146         return 0;
 147 }
 148
 149 void z_erofs_gbuf_exit(void)
 150 {
 151         int i, j;
 152
 153         for (i = 0; i < z_erofs_gbuf_count + (!!z_erofs_rsvbuf); ++i) {
 154                 struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i];
 155
 156                 if (gbuf->ptr) {
 157                         vunmap(gbuf->ptr);
 158                         gbuf->ptr = NULL;
 159                 }
 160
 161                 if (!gbuf->pages)
 162                         continue;
 163
 164                 for (j = 0; j < gbuf->nrpages; ++j)
 165                         if (gbuf->pages[j])
 166                                 put_page(gbuf->pages[j]);
 167                 kfree(gbuf->pages);
 168                 gbuf->pages = NULL;
 169         }
 170         kfree(z_erofs_gbufpool);
 171 }
 172
 173 struct page *__erofs_allocpage(struct page **pagepool, gfp_t gfp, bool tryrsv)
 174 {
 175         struct page *page = *pagepool;
 176
 177         if (page) {
 178                 *pagepool = (struct page *)page_private(page);
 179         } else if (tryrsv && z_erofs_rsvbuf && z_erofs_rsvbuf->nrpages) {
 180                 spin_lock(&z_erofs_rsvbuf->lock);
 181                 if (z_erofs_rsvbuf->nrpages)
 182                         page = z_erofs_rsvbuf->pages[--z_erofs_rsvbuf->nrpages];
 183                 spin_unlock(&z_erofs_rsvbuf->lock);
 184         }
 185         if (!page)
 186                 page = alloc_page(gfp);
 187         DBG_BUGON(page && page_ref_count(page) != 1);
 188         return page;
 189 }
 190
 191 void erofs_release_pages(struct page **pagepool)
 192 {
 193         while (*pagepool) {
 194                 struct page *page = *pagepool;
 195
 196                 *pagepool = (struct page *)page_private(page);
 197                 /* try to fill reserved global pool first */
 198                 if (z_erofs_rsvbuf && z_erofs_rsvbuf->nrpages <
 199                                 z_erofs_rsv_nrpages) {
 200                         spin_lock(&z_erofs_rsvbuf->lock);
 201                         if (z_erofs_rsvbuf->nrpages < z_erofs_rsv_nrpages) {
 202                                 z_erofs_rsvbuf->pages[z_erofs_rsvbuf->nrpages++]
 203                                                 = page;
 204                                 spin_unlock(&z_erofs_rsvbuf->lock);
 205                                 continue;
 206                         }
 207                         spin_unlock(&z_erofs_rsvbuf->lock);
 208                 }
 209                 put_page(page);
 210         }
 211 }
 212
 213 static bool erofs_workgroup_get(struct erofs_workgroup *grp)
 214 {
 215         if (lockref_get_not_zero(&grp->lockref))
 216                 return true;
 217
 218         spin_lock(&grp->lockref.lock);
 219         if (__lockref_is_dead(&grp->lockref)) {
 220                 spin_unlock(&grp->lockref.lock);
 221                 return false;
 222         }
 223
 224         if (!grp->lockref.count++)
 225                 atomic_long_dec(&erofs_global_shrink_cnt);
 226         spin_unlock(&grp->lockref.lock);
 227         return true;
 228 }
 229
 230 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
 231                                              pgoff_t index)
 232 {
 233         struct erofs_sb_info *sbi = EROFS_SB(sb);
 234         struct erofs_workgroup *grp;
 235
 236 repeat:
 237         rcu_read_lock();
 238         grp = xa_load(&sbi->managed_pslots, index);
 239         if (grp) {
 240                 if (!erofs_workgroup_get(grp)) {
 241                         /* prefer to relax rcu read side */
 242                         rcu_read_unlock();
 243                         goto repeat;
 244                 }
 245
 246                 DBG_BUGON(index != grp->index);
 247         }
 248         rcu_read_unlock();
 249         return grp;
 250 }
 251
 252 struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
 253                                                struct erofs_workgroup *grp)
 254 {
 255         struct erofs_sb_info *const sbi = EROFS_SB(sb);
 256         struct erofs_workgroup *pre;
 257
 258         DBG_BUGON(grp->lockref.count < 1);
 259 repeat:
 260         xa_lock(&sbi->managed_pslots);
 261         pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
 262                            NULL, grp, GFP_KERNEL);
 263         if (pre) {
 264                 if (xa_is_err(pre)) {
 265                         pre = ERR_PTR(xa_err(pre));
 266                 } else if (!erofs_workgroup_get(pre)) {
 267                         /* try to legitimize the current in-tree one */
 268                         xa_unlock(&sbi->managed_pslots);
 269                         cond_resched();
 270                         goto repeat;
 271                 }
 272                 grp = pre;
 273         }
 274         xa_unlock(&sbi->managed_pslots);
 275         return grp;
 276 }
 277
 278 static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
 279 {
 280         atomic_long_dec(&erofs_global_shrink_cnt);
 281         erofs_workgroup_free_rcu(grp);
 282 }
 283
 284 void erofs_workgroup_put(struct erofs_workgroup *grp)
 285 {
 286         if (lockref_put_or_lock(&grp->lockref))
 287                 return;
 288
 289         DBG_BUGON(__lockref_is_dead(&grp->lockref));
 290         if (grp->lockref.count == 1)
 291                 atomic_long_inc(&erofs_global_shrink_cnt);
 292         --grp->lockref.count;
 293         spin_unlock(&grp->lockref.lock);
 294 }
 295
 296 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
 297                                            struct erofs_workgroup *grp)
 298 {
 299         int free = false;
 300
 301         spin_lock(&grp->lockref.lock);
 302         if (grp->lockref.count)
 303                 goto out;
 304
 305         /*
 306          * Note that all cached pages should be detached before deleted from
 307          * the XArray. Otherwise some cached pages could be still attached to
 308          * the orphan old workgroup when the new one is available in the tree.
 309          */
 310         if (erofs_try_to_free_all_cached_folios(sbi, grp))
 311                 goto out;
 312
 313         /*
 314          * It's impossible to fail after the workgroup is freezed,
 315          * however in order to avoid some race conditions, add a
 316          * DBG_BUGON to observe this in advance.
 317          */
 318         DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp);
 319
 320         lockref_mark_dead(&grp->lockref);
 321         free = true;
 322 out:
 323         spin_unlock(&grp->lockref.lock);
 324         if (free)
 325                 __erofs_workgroup_free(grp);
 326         return free;
 327 }
 328
 329 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
 330                                               unsigned long nr_shrink)
 331 {
 332         struct erofs_workgroup *grp;
 333         unsigned int freed = 0;
 334         unsigned long index;
 335
 336         xa_lock(&sbi->managed_pslots);
 337         xa_for_each(&sbi->managed_pslots, index, grp) {
 338                 /* try to shrink each valid workgroup */
 339                 if (!erofs_try_to_release_workgroup(sbi, grp))
 340                         continue;
 341                 xa_unlock(&sbi->managed_pslots);
 342
 343                 ++freed;
 344                 if (!--nr_shrink)
 345                         return freed;
 346                 xa_lock(&sbi->managed_pslots);
 347         }
 348         xa_unlock(&sbi->managed_pslots);
 349         return freed;
 350 }
 351
 352 void erofs_shrinker_register(struct super_block *sb)
 353 {
 354         struct erofs_sb_info *sbi = EROFS_SB(sb);
 355
 356         mutex_init(&sbi->umount_mutex);
 357
 358         spin_lock(&erofs_sb_list_lock);
 359         list_add(&sbi->list, &erofs_sb_list);
 360         spin_unlock(&erofs_sb_list_lock);
 361 }
 362
 363 void erofs_shrinker_unregister(struct super_block *sb)
 364 {
 365         struct erofs_sb_info *const sbi = EROFS_SB(sb);
 366
 367         mutex_lock(&sbi->umount_mutex);
 368         /* clean up all remaining workgroups in memory */
 369         erofs_shrink_workstation(sbi, ~0UL);
 370
 371         spin_lock(&erofs_sb_list_lock);
 372         list_del(&sbi->list);
 373         spin_unlock(&erofs_sb_list_lock);
 374         mutex_unlock(&sbi->umount_mutex);
 375 }
 376
 377 static unsigned long erofs_shrink_count(struct shrinker *shrink,
 378                                         struct shrink_control *sc)
 379 {
 380         return atomic_long_read(&erofs_global_shrink_cnt);
 381 }
 382
 383 static unsigned long erofs_shrink_scan(struct shrinker *shrink,
 384                                        struct shrink_control *sc)
 385 {
 386         struct erofs_sb_info *sbi;
 387         struct list_head *p;
 388
 389         unsigned long nr = sc->nr_to_scan;
 390         unsigned int run_no;
 391         unsigned long freed = 0;
 392
 393         spin_lock(&erofs_sb_list_lock);
 394         do {
 395                 run_no = ++shrinker_run_no;
 396         } while (run_no == 0);
 397
 398         /* Iterate over all mounted superblocks and try to shrink them */
 399         p = erofs_sb_list.next;
 400         while (p != &erofs_sb_list) {
 401                 sbi = list_entry(p, struct erofs_sb_info, list);
 402
 403                 /*
 404                  * We move the ones we do to the end of the list, so we stop
 405                  * when we see one we have already done.
 406                  */
 407                 if (sbi->shrinker_run_no == run_no)
 408                         break;
 409
 410                 if (!mutex_trylock(&sbi->umount_mutex)) {
 411                         p = p->next;
 412                         continue;
 413                 }
 414
 415                 spin_unlock(&erofs_sb_list_lock);
 416                 sbi->shrinker_run_no = run_no;
 417
 418                 freed += erofs_shrink_workstation(sbi, nr - freed);
 419
 420                 spin_lock(&erofs_sb_list_lock);
 421                 /* Get the next list element before we move this one */
 422                 p = p->next;
 423
 424                 /*
 425                  * Move this one to the end of the list to provide some
 426                  * fairness.
 427                  */
 428                 list_move_tail(&sbi->list, &erofs_sb_list);
 429                 mutex_unlock(&sbi->umount_mutex);
 430
 431                 if (freed >= nr)
 432                         break;
 433         }
 434         spin_unlock(&erofs_sb_list_lock);
 435         return freed;
 436 }
 437
 438 int __init erofs_init_shrinker(void)
 439 {
 440         erofs_shrinker_info = shrinker_alloc(0, "erofs-shrinker");
 441         if (!erofs_shrinker_info)
 442                 return -ENOMEM;
 443
 444         erofs_shrinker_info->count_objects = erofs_shrink_count;
 445         erofs_shrinker_info->scan_objects = erofs_shrink_scan;
 446         shrinker_register(erofs_shrinker_info);
 447         return 0;
 448 }
 449
 450 void erofs_exit_shrinker(void)
 451 {
 452         shrinker_free(erofs_shrinker_info);
 453 }