]> Git Repo - linux.git/blame - mm/zswap.c
memory tier: make memory_tier_subsys const
[linux.git] / mm / zswap.c
CommitLineData
c942fddf 1// SPDX-License-Identifier: GPL-2.0-or-later
2b281117
SJ
2/*
3 * zswap.c - zswap driver file
4 *
42c06a0e 5 * zswap is a cache that takes pages that are in the process
2b281117
SJ
6 * of being swapped out and attempts to compress and store them in a
7 * RAM-based memory pool. This can result in a significant I/O reduction on
8 * the swap device and, in the case where decompressing from RAM is faster
9 * than reading from the swap device, can also improve workload performance.
10 *
11 * Copyright (C) 2012 Seth Jennings <[email protected]>
2b281117
SJ
12*/
13
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
16#include <linux/module.h>
17#include <linux/cpu.h>
18#include <linux/highmem.h>
19#include <linux/slab.h>
20#include <linux/spinlock.h>
21#include <linux/types.h>
22#include <linux/atomic.h>
2b281117
SJ
23#include <linux/rbtree.h>
24#include <linux/swap.h>
25#include <linux/crypto.h>
1ec3b5fe 26#include <linux/scatterlist.h>
ddc1a5cb 27#include <linux/mempolicy.h>
2b281117 28#include <linux/mempool.h>
12d79d64 29#include <linux/zpool.h>
1ec3b5fe 30#include <crypto/acompress.h>
42c06a0e 31#include <linux/zswap.h>
2b281117
SJ
32#include <linux/mm_types.h>
33#include <linux/page-flags.h>
34#include <linux/swapops.h>
35#include <linux/writeback.h>
36#include <linux/pagemap.h>
45190f01 37#include <linux/workqueue.h>
a65b0e76 38#include <linux/list_lru.h>
2b281117 39
014bb1de 40#include "swap.h"
e0228d59 41#include "internal.h"
014bb1de 42
2b281117
SJ
43/*********************************
44* statistics
45**********************************/
12d79d64 46/* Total bytes used by the compressed storage */
f6498b77 47u64 zswap_pool_total_size;
2b281117 48/* The number of compressed pages currently stored in zswap */
f6498b77 49atomic_t zswap_stored_pages = ATOMIC_INIT(0);
a85f878b
SD
50/* The number of same-value filled pages currently stored in zswap */
51static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0);
2b281117
SJ
52
53/*
54 * The statistics below are not protected from concurrent access for
55 * performance reasons so they may not be a 100% accurate. However,
56 * they do provide useful information on roughly how many times a
57 * certain event is occurring.
58*/
59
60/* Pool limit was hit (see zswap_max_pool_percent) */
61static u64 zswap_pool_limit_hit;
62/* Pages written back when pool limit was reached */
63static u64 zswap_written_back_pages;
64/* Store failed due to a reclaim failure after pool limit was reached */
65static u64 zswap_reject_reclaim_fail;
cb61dad8
NP
66/* Store failed due to compression algorithm failure */
67static u64 zswap_reject_compress_fail;
2b281117
SJ
68/* Compressed page was too big for the allocator to (optimally) store */
69static u64 zswap_reject_compress_poor;
70/* Store failed because underlying allocator could not get memory */
71static u64 zswap_reject_alloc_fail;
72/* Store failed because the entry metadata could not be allocated (rare) */
73static u64 zswap_reject_kmemcache_fail;
74/* Duplicate store was encountered (rare) */
75static u64 zswap_duplicate_entry;
76
45190f01
VW
77/* Shrinker work queue */
78static struct workqueue_struct *shrink_wq;
79/* Pool limit was hit, we need to calm down */
80static bool zswap_pool_reached_full;
81
2b281117
SJ
82/*********************************
83* tunables
84**********************************/
c00ed16a 85
bae21db8
DS
86#define ZSWAP_PARAM_UNSET ""
87
141fdeec
LS
88static int zswap_setup(void);
89
bb8b93b5
MS
90/* Enable/disable zswap */
91static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
d7b028f5
DS
92static int zswap_enabled_param_set(const char *,
93 const struct kernel_param *);
83aed6cd 94static const struct kernel_param_ops zswap_enabled_param_ops = {
d7b028f5
DS
95 .set = zswap_enabled_param_set,
96 .get = param_get_bool,
97};
98module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644);
2b281117 99
90b0fc26 100/* Crypto compressor to use */
bb8b93b5 101static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
90b0fc26
DS
102static int zswap_compressor_param_set(const char *,
103 const struct kernel_param *);
83aed6cd 104static const struct kernel_param_ops zswap_compressor_param_ops = {
90b0fc26 105 .set = zswap_compressor_param_set,
c99b42c3
DS
106 .get = param_get_charp,
107 .free = param_free_charp,
90b0fc26
DS
108};
109module_param_cb(compressor, &zswap_compressor_param_ops,
c99b42c3 110 &zswap_compressor, 0644);
2b281117 111
90b0fc26 112/* Compressed storage zpool to use */
bb8b93b5 113static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
90b0fc26 114static int zswap_zpool_param_set(const char *, const struct kernel_param *);
83aed6cd 115static const struct kernel_param_ops zswap_zpool_param_ops = {
c99b42c3
DS
116 .set = zswap_zpool_param_set,
117 .get = param_get_charp,
118 .free = param_free_charp,
90b0fc26 119};
c99b42c3 120module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644);
12d79d64 121
90b0fc26
DS
122/* The maximum percentage of memory that the compressed pool can occupy */
123static unsigned int zswap_max_pool_percent = 20;
124module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644);
60105e12 125
45190f01
VW
126/* The threshold for accepting new pages after the max_pool_percent was hit */
127static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
128module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
129 uint, 0644);
130
cb325ddd
MS
131/*
132 * Enable/disable handling same-value filled pages (enabled by default).
133 * If disabled every page is considered non-same-value filled.
134 */
a85f878b
SD
135static bool zswap_same_filled_pages_enabled = true;
136module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled,
137 bool, 0644);
138
cb325ddd
MS
139/* Enable/disable handling non-same-value filled pages (enabled by default) */
140static bool zswap_non_same_filled_pages_enabled = true;
141module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled,
142 bool, 0644);
143
b9c91c43
YA
144static bool zswap_exclusive_loads_enabled = IS_ENABLED(
145 CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON);
146module_param_named(exclusive_loads, zswap_exclusive_loads_enabled, bool, 0644);
147
b8cf32dc
YA
148/* Number of zpools in zswap_pool (empirically determined for scalability) */
149#define ZSWAP_NR_ZPOOLS 32
150
b5ba474f
NP
151/* Enable/disable memory pressure-based shrinker. */
152static bool zswap_shrinker_enabled = IS_ENABLED(
153 CONFIG_ZSWAP_SHRINKER_DEFAULT_ON);
154module_param_named(shrinker_enabled, zswap_shrinker_enabled, bool, 0644);
155
501a06fe
NP
156bool is_zswap_enabled(void)
157{
158 return zswap_enabled;
159}
160
2b281117 161/*********************************
f1c54846 162* data structures
2b281117 163**********************************/
2b281117 164
1ec3b5fe
BS
165struct crypto_acomp_ctx {
166 struct crypto_acomp *acomp;
167 struct acomp_req *req;
168 struct crypto_wait wait;
8ba2f844
CZ
169 u8 *buffer;
170 struct mutex mutex;
1ec3b5fe
BS
171};
172
f999f38b
DC
173/*
174 * The lock ordering is zswap_tree.lock -> zswap_pool.lru_lock.
175 * The only case where lru_lock is not acquired while holding tree.lock is
176 * when a zswap_entry is taken off the lru for writeback, in that case it
177 * needs to be verified that it's still valid in the tree.
178 */
f1c54846 179struct zswap_pool {
b8cf32dc 180 struct zpool *zpools[ZSWAP_NR_ZPOOLS];
1ec3b5fe 181 struct crypto_acomp_ctx __percpu *acomp_ctx;
f1c54846
DS
182 struct kref kref;
183 struct list_head list;
45190f01
VW
184 struct work_struct release_work;
185 struct work_struct shrink_work;
cab7a7e5 186 struct hlist_node node;
f1c54846 187 char tfm_name[CRYPTO_MAX_ALG_NAME];
a65b0e76
DC
188 struct list_lru list_lru;
189 struct mem_cgroup *next_shrink;
b5ba474f
NP
190 struct shrinker *shrinker;
191 atomic_t nr_stored;
2b281117
SJ
192};
193
2b281117
SJ
194/*
195 * struct zswap_entry
196 *
197 * This structure contains the metadata for tracking a single compressed
198 * page within zswap.
199 *
200 * rbnode - links the entry into red-black tree for the appropriate swap type
97157d89 201 * swpentry - associated swap entry, the offset indexes into the red-black tree
2b281117
SJ
202 * refcount - the number of outstanding reference to the entry. This is needed
203 * to protect against premature freeing of the entry by code
6b452516 204 * concurrent calls to load, invalidate, and writeback. The lock
2b281117
SJ
205 * for the zswap_tree structure that contains the entry must
206 * be held while changing the refcount. Since the lock must
207 * be held, there is no reason to also make refcount atomic.
2b281117 208 * length - the length in bytes of the compressed page data. Needed during
f999f38b
DC
209 * decompression. For a same value filled page length is 0, and both
210 * pool and lru are invalid and must be ignored.
f1c54846
DS
211 * pool - the zswap_pool the entry's data is in
212 * handle - zpool allocation handle that stores the compressed page data
a85f878b 213 * value - value of the same-value filled pages which have same content
97157d89 214 * objcg - the obj_cgroup that the compressed memory is charged to
f999f38b 215 * lru - handle to the pool's lru used to evict pages.
2b281117
SJ
216 */
217struct zswap_entry {
218 struct rb_node rbnode;
0bb48849 219 swp_entry_t swpentry;
2b281117
SJ
220 int refcount;
221 unsigned int length;
f1c54846 222 struct zswap_pool *pool;
a85f878b
SD
223 union {
224 unsigned long handle;
225 unsigned long value;
226 };
f4840ccf 227 struct obj_cgroup *objcg;
f999f38b 228 struct list_head lru;
2b281117
SJ
229};
230
2b281117
SJ
231/*
232 * The tree lock in the zswap_tree struct protects a few things:
233 * - the rbtree
234 * - the refcount field of each entry in the tree
235 */
236struct zswap_tree {
237 struct rb_root rbroot;
238 spinlock_t lock;
2b281117
SJ
239};
240
241static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
44c7c734 242static unsigned int nr_zswap_trees[MAX_SWAPFILES];
2b281117 243
f1c54846
DS
244/* RCU-protected iteration */
245static LIST_HEAD(zswap_pools);
246/* protects zswap_pools list modification */
247static DEFINE_SPINLOCK(zswap_pools_lock);
32a4e169
DS
248/* pool counter to provide unique names to zpool */
249static atomic_t zswap_pools_count = ATOMIC_INIT(0);
f1c54846 250
9021ccec
LS
251enum zswap_init_type {
252 ZSWAP_UNINIT,
253 ZSWAP_INIT_SUCCEED,
254 ZSWAP_INIT_FAILED
255};
90b0fc26 256
9021ccec 257static enum zswap_init_type zswap_init_state;
90b0fc26 258
141fdeec
LS
259/* used to ensure the integrity of initialization */
260static DEFINE_MUTEX(zswap_init_lock);
d7b028f5 261
ae3d89a7
DS
262/* init completed, but couldn't create the initial pool */
263static bool zswap_has_pool;
264
f1c54846
DS
265/*********************************
266* helpers and fwd declarations
267**********************************/
268
44c7c734
CZ
269static inline struct zswap_tree *swap_zswap_tree(swp_entry_t swp)
270{
271 return &zswap_trees[swp_type(swp)][swp_offset(swp)
272 >> SWAP_ADDRESS_SPACE_SHIFT];
273}
274
f1c54846
DS
275#define zswap_pool_debug(msg, p) \
276 pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \
b8cf32dc 277 zpool_get_type((p)->zpools[0]))
f1c54846 278
f1c54846
DS
279static bool zswap_is_full(void)
280{
ca79b0c2
AK
281 return totalram_pages() * zswap_max_pool_percent / 100 <
282 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
f1c54846
DS
283}
284
45190f01
VW
285static bool zswap_can_accept(void)
286{
287 return totalram_pages() * zswap_accept_thr_percent / 100 *
288 zswap_max_pool_percent / 100 >
289 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
290}
291
b5ba474f
NP
292static u64 get_zswap_pool_size(struct zswap_pool *pool)
293{
294 u64 pool_size = 0;
295 int i;
296
297 for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
298 pool_size += zpool_get_total_size(pool->zpools[i]);
299
300 return pool_size;
301}
302
f1c54846
DS
303static void zswap_update_total_size(void)
304{
305 struct zswap_pool *pool;
306 u64 total = 0;
307
308 rcu_read_lock();
309
310 list_for_each_entry_rcu(pool, &zswap_pools, list)
b5ba474f 311 total += get_zswap_pool_size(pool);
f1c54846
DS
312
313 rcu_read_unlock();
314
315 zswap_pool_total_size = total;
316}
317
a984649b
JW
318/*********************************
319* pool functions
320**********************************/
321
322static void zswap_alloc_shrinker(struct zswap_pool *pool);
323static void shrink_worker(struct work_struct *w);
324
325static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
326{
327 int i;
328 struct zswap_pool *pool;
329 char name[38]; /* 'zswap' + 32 char (max) num + \0 */
330 gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
331 int ret;
332
333 if (!zswap_has_pool) {
334 /* if either are unset, pool initialization failed, and we
335 * need both params to be set correctly before trying to
336 * create a pool.
337 */
338 if (!strcmp(type, ZSWAP_PARAM_UNSET))
339 return NULL;
340 if (!strcmp(compressor, ZSWAP_PARAM_UNSET))
341 return NULL;
342 }
343
344 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
345 if (!pool)
346 return NULL;
347
348 for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) {
349 /* unique name for each pool specifically required by zsmalloc */
350 snprintf(name, 38, "zswap%x",
351 atomic_inc_return(&zswap_pools_count));
352
353 pool->zpools[i] = zpool_create_pool(type, name, gfp);
354 if (!pool->zpools[i]) {
355 pr_err("%s zpool not available\n", type);
356 goto error;
357 }
358 }
359 pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0]));
360
361 strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
362
363 pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx);
364 if (!pool->acomp_ctx) {
365 pr_err("percpu alloc failed\n");
366 goto error;
367 }
368
369 ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
370 &pool->node);
371 if (ret)
372 goto error;
373
374 zswap_alloc_shrinker(pool);
375 if (!pool->shrinker)
376 goto error;
377
378 pr_debug("using %s compressor\n", pool->tfm_name);
379
380 /* being the current pool takes 1 ref; this func expects the
381 * caller to always add the new pool as the current pool
382 */
383 kref_init(&pool->kref);
384 INIT_LIST_HEAD(&pool->list);
385 if (list_lru_init_memcg(&pool->list_lru, pool->shrinker))
386 goto lru_fail;
387 shrinker_register(pool->shrinker);
388 INIT_WORK(&pool->shrink_work, shrink_worker);
389 atomic_set(&pool->nr_stored, 0);
390
391 zswap_pool_debug("created", pool);
392
393 return pool;
394
395lru_fail:
396 list_lru_destroy(&pool->list_lru);
397 shrinker_free(pool->shrinker);
398error:
399 if (pool->acomp_ctx)
400 free_percpu(pool->acomp_ctx);
401 while (i--)
402 zpool_destroy_pool(pool->zpools[i]);
403 kfree(pool);
404 return NULL;
405}
406
407static struct zswap_pool *__zswap_pool_create_fallback(void)
408{
409 bool has_comp, has_zpool;
410
411 has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
412 if (!has_comp && strcmp(zswap_compressor,
413 CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
414 pr_err("compressor %s not available, using default %s\n",
415 zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
416 param_free_charp(&zswap_compressor);
417 zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
418 has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
419 }
420 if (!has_comp) {
421 pr_err("default compressor %s not available\n",
422 zswap_compressor);
423 param_free_charp(&zswap_compressor);
424 zswap_compressor = ZSWAP_PARAM_UNSET;
425 }
426
427 has_zpool = zpool_has_pool(zswap_zpool_type);
428 if (!has_zpool && strcmp(zswap_zpool_type,
429 CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
430 pr_err("zpool %s not available, using default %s\n",
431 zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
432 param_free_charp(&zswap_zpool_type);
433 zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
434 has_zpool = zpool_has_pool(zswap_zpool_type);
435 }
436 if (!has_zpool) {
437 pr_err("default zpool %s not available\n",
438 zswap_zpool_type);
439 param_free_charp(&zswap_zpool_type);
440 zswap_zpool_type = ZSWAP_PARAM_UNSET;
441 }
442
443 if (!has_comp || !has_zpool)
444 return NULL;
445
446 return zswap_pool_create(zswap_zpool_type, zswap_compressor);
447}
448
449static void zswap_pool_destroy(struct zswap_pool *pool)
450{
451 int i;
452
453 zswap_pool_debug("destroying", pool);
454
455 shrinker_free(pool->shrinker);
456 cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
457 free_percpu(pool->acomp_ctx);
458 list_lru_destroy(&pool->list_lru);
459
460 spin_lock(&zswap_pools_lock);
461 mem_cgroup_iter_break(NULL, pool->next_shrink);
462 pool->next_shrink = NULL;
463 spin_unlock(&zswap_pools_lock);
464
465 for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
466 zpool_destroy_pool(pool->zpools[i]);
467 kfree(pool);
468}
469
39f3ec8e
JW
470static void __zswap_pool_release(struct work_struct *work)
471{
472 struct zswap_pool *pool = container_of(work, typeof(*pool),
473 release_work);
474
475 synchronize_rcu();
476
477 /* nobody should have been able to get a kref... */
478 WARN_ON(kref_get_unless_zero(&pool->kref));
479
480 /* pool is now off zswap_pools list and has no references. */
481 zswap_pool_destroy(pool);
482}
483
484static struct zswap_pool *zswap_pool_current(void);
485
486static void __zswap_pool_empty(struct kref *kref)
487{
488 struct zswap_pool *pool;
489
490 pool = container_of(kref, typeof(*pool), kref);
491
492 spin_lock(&zswap_pools_lock);
493
494 WARN_ON(pool == zswap_pool_current());
495
496 list_del_rcu(&pool->list);
497
498 INIT_WORK(&pool->release_work, __zswap_pool_release);
499 schedule_work(&pool->release_work);
500
501 spin_unlock(&zswap_pools_lock);
502}
503
504static int __must_check zswap_pool_get(struct zswap_pool *pool)
505{
506 if (!pool)
507 return 0;
508
509 return kref_get_unless_zero(&pool->kref);
510}
511
512static void zswap_pool_put(struct zswap_pool *pool)
513{
514 kref_put(&pool->kref, __zswap_pool_empty);
515}
516
c1a0ecb8
JW
517static struct zswap_pool *__zswap_pool_current(void)
518{
519 struct zswap_pool *pool;
520
521 pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
522 WARN_ONCE(!pool && zswap_has_pool,
523 "%s: no page storage pool!\n", __func__);
524
525 return pool;
526}
527
528static struct zswap_pool *zswap_pool_current(void)
529{
530 assert_spin_locked(&zswap_pools_lock);
531
532 return __zswap_pool_current();
533}
534
535static struct zswap_pool *zswap_pool_current_get(void)
536{
537 struct zswap_pool *pool;
538
539 rcu_read_lock();
540
541 pool = __zswap_pool_current();
542 if (!zswap_pool_get(pool))
543 pool = NULL;
544
545 rcu_read_unlock();
546
547 return pool;
548}
549
550static struct zswap_pool *zswap_pool_last_get(void)
551{
552 struct zswap_pool *pool, *last = NULL;
553
554 rcu_read_lock();
555
556 list_for_each_entry_rcu(pool, &zswap_pools, list)
557 last = pool;
558 WARN_ONCE(!last && zswap_has_pool,
559 "%s: no page storage pool!\n", __func__);
560 if (!zswap_pool_get(last))
561 last = NULL;
562
563 rcu_read_unlock();
564
565 return last;
566}
567
568/* type and compressor must be null-terminated */
569static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
570{
571 struct zswap_pool *pool;
572
573 assert_spin_locked(&zswap_pools_lock);
574
575 list_for_each_entry_rcu(pool, &zswap_pools, list) {
576 if (strcmp(pool->tfm_name, compressor))
577 continue;
578 /* all zpools share the same type */
579 if (strcmp(zpool_get_type(pool->zpools[0]), type))
580 continue;
581 /* if we can't get it, it's about to be destroyed */
582 if (!zswap_pool_get(pool))
583 continue;
584 return pool;
585 }
586
587 return NULL;
588}
589
abca07c0
JW
590/*********************************
591* param callbacks
592**********************************/
593
594static bool zswap_pool_changed(const char *s, const struct kernel_param *kp)
595{
596 /* no change required */
597 if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool)
598 return false;
599 return true;
600}
601
602/* val must be a null-terminated string */
603static int __zswap_param_set(const char *val, const struct kernel_param *kp,
604 char *type, char *compressor)
605{
606 struct zswap_pool *pool, *put_pool = NULL;
607 char *s = strstrip((char *)val);
608 int ret = 0;
609 bool new_pool = false;
610
611 mutex_lock(&zswap_init_lock);
612 switch (zswap_init_state) {
613 case ZSWAP_UNINIT:
614 /* if this is load-time (pre-init) param setting,
615 * don't create a pool; that's done during init.
616 */
617 ret = param_set_charp(s, kp);
618 break;
619 case ZSWAP_INIT_SUCCEED:
620 new_pool = zswap_pool_changed(s, kp);
621 break;
622 case ZSWAP_INIT_FAILED:
623 pr_err("can't set param, initialization failed\n");
624 ret = -ENODEV;
625 }
626 mutex_unlock(&zswap_init_lock);
627
628 /* no need to create a new pool, return directly */
629 if (!new_pool)
630 return ret;
631
632 if (!type) {
633 if (!zpool_has_pool(s)) {
634 pr_err("zpool %s not available\n", s);
635 return -ENOENT;
636 }
637 type = s;
638 } else if (!compressor) {
639 if (!crypto_has_acomp(s, 0, 0)) {
640 pr_err("compressor %s not available\n", s);
641 return -ENOENT;
642 }
643 compressor = s;
644 } else {
645 WARN_ON(1);
646 return -EINVAL;
647 }
648
649 spin_lock(&zswap_pools_lock);
650
651 pool = zswap_pool_find_get(type, compressor);
652 if (pool) {
653 zswap_pool_debug("using existing", pool);
654 WARN_ON(pool == zswap_pool_current());
655 list_del_rcu(&pool->list);
656 }
657
658 spin_unlock(&zswap_pools_lock);
659
660 if (!pool)
661 pool = zswap_pool_create(type, compressor);
662
663 if (pool)
664 ret = param_set_charp(s, kp);
665 else
666 ret = -EINVAL;
667
668 spin_lock(&zswap_pools_lock);
669
670 if (!ret) {
671 put_pool = zswap_pool_current();
672 list_add_rcu(&pool->list, &zswap_pools);
673 zswap_has_pool = true;
674 } else if (pool) {
675 /* add the possibly pre-existing pool to the end of the pools
676 * list; if it's new (and empty) then it'll be removed and
677 * destroyed by the put after we drop the lock
678 */
679 list_add_tail_rcu(&pool->list, &zswap_pools);
680 put_pool = pool;
681 }
682
683 spin_unlock(&zswap_pools_lock);
684
685 if (!zswap_has_pool && !pool) {
686 /* if initial pool creation failed, and this pool creation also
687 * failed, maybe both compressor and zpool params were bad.
688 * Allow changing this param, so pool creation will succeed
689 * when the other param is changed. We already verified this
690 * param is ok in the zpool_has_pool() or crypto_has_acomp()
691 * checks above.
692 */
693 ret = param_set_charp(s, kp);
694 }
695
696 /* drop the ref from either the old current pool,
697 * or the new pool we failed to add
698 */
699 if (put_pool)
700 zswap_pool_put(put_pool);
701
702 return ret;
703}
704
705static int zswap_compressor_param_set(const char *val,
706 const struct kernel_param *kp)
707{
708 return __zswap_param_set(val, kp, zswap_zpool_type, NULL);
709}
710
711static int zswap_zpool_param_set(const char *val,
712 const struct kernel_param *kp)
713{
714 return __zswap_param_set(val, kp, NULL, zswap_compressor);
715}
716
717static int zswap_enabled_param_set(const char *val,
718 const struct kernel_param *kp)
719{
720 int ret = -ENODEV;
721
722 /* if this is load-time (pre-init) param setting, only set param. */
723 if (system_state != SYSTEM_RUNNING)
724 return param_set_bool(val, kp);
725
726 mutex_lock(&zswap_init_lock);
727 switch (zswap_init_state) {
728 case ZSWAP_UNINIT:
729 if (zswap_setup())
730 break;
731 fallthrough;
732 case ZSWAP_INIT_SUCCEED:
733 if (!zswap_has_pool)
734 pr_err("can't enable, no pool configured\n");
735 else
736 ret = param_set_bool(val, kp);
737 break;
738 case ZSWAP_INIT_FAILED:
739 pr_err("can't enable, initialization failed\n");
740 }
741 mutex_unlock(&zswap_init_lock);
742
743 return ret;
744}
745
506a86c5
JW
746/*********************************
747* lru functions
748**********************************/
749
a65b0e76
DC
750/* should be called under RCU */
751#ifdef CONFIG_MEMCG
752static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
753{
754 return entry->objcg ? obj_cgroup_memcg(entry->objcg) : NULL;
755}
756#else
757static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
758{
759 return NULL;
760}
761#endif
762
763static inline int entry_to_nid(struct zswap_entry *entry)
764{
765 return page_to_nid(virt_to_page(entry));
766}
767
a65b0e76
DC
768static void zswap_lru_add(struct list_lru *list_lru, struct zswap_entry *entry)
769{
b5ba474f
NP
770 atomic_long_t *nr_zswap_protected;
771 unsigned long lru_size, old, new;
a65b0e76
DC
772 int nid = entry_to_nid(entry);
773 struct mem_cgroup *memcg;
b5ba474f 774 struct lruvec *lruvec;
a65b0e76
DC
775
776 /*
777 * Note that it is safe to use rcu_read_lock() here, even in the face of
778 * concurrent memcg offlining. Thanks to the memcg->kmemcg_id indirection
779 * used in list_lru lookup, only two scenarios are possible:
780 *
781 * 1. list_lru_add() is called before memcg->kmemcg_id is updated. The
782 * new entry will be reparented to memcg's parent's list_lru.
783 * 2. list_lru_add() is called after memcg->kmemcg_id is updated. The
784 * new entry will be added directly to memcg's parent's list_lru.
785 *
3f798aa6 786 * Similar reasoning holds for list_lru_del().
a65b0e76
DC
787 */
788 rcu_read_lock();
789 memcg = mem_cgroup_from_entry(entry);
790 /* will always succeed */
791 list_lru_add(list_lru, &entry->lru, nid, memcg);
b5ba474f
NP
792
793 /* Update the protection area */
794 lru_size = list_lru_count_one(list_lru, nid, memcg);
795 lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
796 nr_zswap_protected = &lruvec->zswap_lruvec_state.nr_zswap_protected;
797 old = atomic_long_inc_return(nr_zswap_protected);
798 /*
799 * Decay to avoid overflow and adapt to changing workloads.
800 * This is based on LRU reclaim cost decaying heuristics.
801 */
802 do {
803 new = old > lru_size / 4 ? old / 2 : old;
804 } while (!atomic_long_try_cmpxchg(nr_zswap_protected, &old, new));
a65b0e76
DC
805 rcu_read_unlock();
806}
807
808static void zswap_lru_del(struct list_lru *list_lru, struct zswap_entry *entry)
809{
810 int nid = entry_to_nid(entry);
811 struct mem_cgroup *memcg;
812
813 rcu_read_lock();
814 memcg = mem_cgroup_from_entry(entry);
815 /* will always succeed */
816 list_lru_del(list_lru, &entry->lru, nid, memcg);
817 rcu_read_unlock();
818}
819
5182661a
JW
820void zswap_lruvec_state_init(struct lruvec *lruvec)
821{
822 atomic_long_set(&lruvec->zswap_lruvec_state.nr_zswap_protected, 0);
823}
824
825void zswap_folio_swapin(struct folio *folio)
826{
827 struct lruvec *lruvec;
828
829 if (folio) {
830 lruvec = folio_lruvec(folio);
831 atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
832 }
833}
834
835void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
836{
837 struct zswap_pool *pool;
838
839 /* lock out zswap pools list modification */
840 spin_lock(&zswap_pools_lock);
841 list_for_each_entry(pool, &zswap_pools, list) {
842 if (pool->next_shrink == memcg)
843 pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL);
844 }
845 spin_unlock(&zswap_pools_lock);
846}
847
2b281117
SJ
848/*********************************
849* rbtree functions
850**********************************/
851static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset)
852{
853 struct rb_node *node = root->rb_node;
854 struct zswap_entry *entry;
0bb48849 855 pgoff_t entry_offset;
2b281117
SJ
856
857 while (node) {
858 entry = rb_entry(node, struct zswap_entry, rbnode);
0bb48849
DC
859 entry_offset = swp_offset(entry->swpentry);
860 if (entry_offset > offset)
2b281117 861 node = node->rb_left;
0bb48849 862 else if (entry_offset < offset)
2b281117
SJ
863 node = node->rb_right;
864 else
865 return entry;
866 }
867 return NULL;
868}
869
870/*
871 * In the case that a entry with the same offset is found, a pointer to
872 * the existing entry is stored in dupentry and the function returns -EEXIST
873 */
874static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry,
875 struct zswap_entry **dupentry)
876{
877 struct rb_node **link = &root->rb_node, *parent = NULL;
878 struct zswap_entry *myentry;
0bb48849 879 pgoff_t myentry_offset, entry_offset = swp_offset(entry->swpentry);
2b281117
SJ
880
881 while (*link) {
882 parent = *link;
883 myentry = rb_entry(parent, struct zswap_entry, rbnode);
0bb48849
DC
884 myentry_offset = swp_offset(myentry->swpentry);
885 if (myentry_offset > entry_offset)
2b281117 886 link = &(*link)->rb_left;
0bb48849 887 else if (myentry_offset < entry_offset)
2b281117
SJ
888 link = &(*link)->rb_right;
889 else {
890 *dupentry = myentry;
891 return -EEXIST;
892 }
893 }
894 rb_link_node(&entry->rbnode, parent, link);
895 rb_insert_color(&entry->rbnode, root);
896 return 0;
897}
898
18a93707 899static bool zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
0ab0abcf
WY
900{
901 if (!RB_EMPTY_NODE(&entry->rbnode)) {
902 rb_erase(&entry->rbnode, root);
903 RB_CLEAR_NODE(&entry->rbnode);
18a93707 904 return true;
0ab0abcf 905 }
18a93707 906 return false;
0ab0abcf
WY
907}
908
36034bf6
JW
909/*********************************
910* zswap entry functions
911**********************************/
912static struct kmem_cache *zswap_entry_cache;
913
914static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
915{
916 struct zswap_entry *entry;
917 entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
918 if (!entry)
919 return NULL;
920 entry->refcount = 1;
921 RB_CLEAR_NODE(&entry->rbnode);
922 return entry;
923}
924
925static void zswap_entry_cache_free(struct zswap_entry *entry)
926{
927 kmem_cache_free(zswap_entry_cache, entry);
928}
929
b8cf32dc
YA
930static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
931{
932 int i = 0;
933
934 if (ZSWAP_NR_ZPOOLS > 1)
935 i = hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS));
936
937 return entry->pool->zpools[i];
938}
939
0ab0abcf 940/*
12d79d64 941 * Carries out the common pattern of freeing and entry's zpool allocation,
0ab0abcf
WY
942 * freeing the entry itself, and decrementing the number of stored pages.
943 */
42398be2 944static void zswap_entry_free(struct zswap_entry *entry)
0ab0abcf 945{
a85f878b
SD
946 if (!entry->length)
947 atomic_dec(&zswap_same_filled_pages);
948 else {
a65b0e76 949 zswap_lru_del(&entry->pool->list_lru, entry);
b8cf32dc 950 zpool_free(zswap_find_zpool(entry), entry->handle);
b5ba474f 951 atomic_dec(&entry->pool->nr_stored);
a85f878b
SD
952 zswap_pool_put(entry->pool);
953 }
2e601e1e
JW
954 if (entry->objcg) {
955 obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
956 obj_cgroup_put(entry->objcg);
957 }
0ab0abcf
WY
958 zswap_entry_cache_free(entry);
959 atomic_dec(&zswap_stored_pages);
f1c54846 960 zswap_update_total_size();
0ab0abcf
WY
961}
962
963/* caller must hold the tree lock */
964static void zswap_entry_get(struct zswap_entry *entry)
965{
e477559c 966 WARN_ON_ONCE(!entry->refcount);
0ab0abcf
WY
967 entry->refcount++;
968}
969
dab7711f 970/* caller must hold the tree lock */
db128f5f 971static void zswap_entry_put(struct zswap_entry *entry)
0ab0abcf 972{
dab7711f
JW
973 WARN_ON_ONCE(!entry->refcount);
974 if (--entry->refcount == 0) {
73108957 975 WARN_ON_ONCE(!RB_EMPTY_NODE(&entry->rbnode));
42398be2 976 zswap_entry_free(entry);
0ab0abcf
WY
977 }
978}
979
7dd1f7f0
JW
980/*
981 * If the entry is still valid in the tree, drop the initial ref and remove it
982 * from the tree. This function must be called with an additional ref held,
983 * otherwise it may race with another invalidation freeing the entry.
984 */
985static void zswap_invalidate_entry(struct zswap_tree *tree,
986 struct zswap_entry *entry)
987{
988 if (zswap_rb_erase(&tree->rbroot, entry))
989 zswap_entry_put(entry);
990}
991
f91e81d3
JW
992/*********************************
993* compressed storage functions
994**********************************/
64f200b8
JW
995static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
996{
997 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
998 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
999 struct crypto_acomp *acomp;
1000 struct acomp_req *req;
1001 int ret;
1002
1003 mutex_init(&acomp_ctx->mutex);
1004
1005 acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
1006 if (!acomp_ctx->buffer)
1007 return -ENOMEM;
1008
1009 acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
1010 if (IS_ERR(acomp)) {
1011 pr_err("could not alloc crypto acomp %s : %ld\n",
1012 pool->tfm_name, PTR_ERR(acomp));
1013 ret = PTR_ERR(acomp);
1014 goto acomp_fail;
1015 }
1016 acomp_ctx->acomp = acomp;
1017
1018 req = acomp_request_alloc(acomp_ctx->acomp);
1019 if (!req) {
1020 pr_err("could not alloc crypto acomp_request %s\n",
1021 pool->tfm_name);
1022 ret = -ENOMEM;
1023 goto req_fail;
1024 }
1025 acomp_ctx->req = req;
1026
1027 crypto_init_wait(&acomp_ctx->wait);
1028 /*
1029 * if the backend of acomp is async zip, crypto_req_done() will wakeup
1030 * crypto_wait_req(); if the backend of acomp is scomp, the callback
1031 * won't be called, crypto_wait_req() will return without blocking.
1032 */
1033 acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
1034 crypto_req_done, &acomp_ctx->wait);
1035
1036 return 0;
1037
1038req_fail:
1039 crypto_free_acomp(acomp_ctx->acomp);
1040acomp_fail:
1041 kfree(acomp_ctx->buffer);
1042 return ret;
1043}
1044
1045static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
1046{
1047 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
1048 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
1049
1050 if (!IS_ERR_OR_NULL(acomp_ctx)) {
1051 if (!IS_ERR_OR_NULL(acomp_ctx->req))
1052 acomp_request_free(acomp_ctx->req);
1053 if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
1054 crypto_free_acomp(acomp_ctx->acomp);
1055 kfree(acomp_ctx->buffer);
1056 }
1057
1058 return 0;
1059}
1060
f91e81d3
JW
1061static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
1062{
1063 struct crypto_acomp_ctx *acomp_ctx;
1064 struct scatterlist input, output;
1065 unsigned int dlen = PAGE_SIZE;
1066 unsigned long handle;
1067 struct zpool *zpool;
1068 char *buf;
1069 gfp_t gfp;
1070 int ret;
1071 u8 *dst;
1072
1073 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
1074
1075 mutex_lock(&acomp_ctx->mutex);
1076
1077 dst = acomp_ctx->buffer;
1078 sg_init_table(&input, 1);
1079 sg_set_page(&input, &folio->page, PAGE_SIZE, 0);
1080
1081 /*
1082 * We need PAGE_SIZE * 2 here since there maybe over-compression case,
1083 * and hardware-accelerators may won't check the dst buffer size, so
1084 * giving the dst buffer with enough length to avoid buffer overflow.
1085 */
1086 sg_init_one(&output, dst, PAGE_SIZE * 2);
1087 acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
1088
1089 /*
1090 * it maybe looks a little bit silly that we send an asynchronous request,
1091 * then wait for its completion synchronously. This makes the process look
1092 * synchronous in fact.
1093 * Theoretically, acomp supports users send multiple acomp requests in one
1094 * acomp instance, then get those requests done simultaneously. but in this
1095 * case, zswap actually does store and load page by page, there is no
1096 * existing method to send the second page before the first page is done
1097 * in one thread doing zwap.
1098 * but in different threads running on different cpu, we have different
1099 * acomp instance, so multiple threads can do (de)compression in parallel.
1100 */
1101 ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
1102 dlen = acomp_ctx->req->dlen;
1103 if (ret) {
1104 zswap_reject_compress_fail++;
1105 goto unlock;
1106 }
1107
1108 zpool = zswap_find_zpool(entry);
1109 gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
1110 if (zpool_malloc_support_movable(zpool))
1111 gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
1112 ret = zpool_malloc(zpool, dlen, gfp, &handle);
1113 if (ret == -ENOSPC) {
1114 zswap_reject_compress_poor++;
1115 goto unlock;
1116 }
1117 if (ret) {
1118 zswap_reject_alloc_fail++;
1119 goto unlock;
1120 }
1121
1122 buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO);
1123 memcpy(buf, dst, dlen);
1124 zpool_unmap_handle(zpool, handle);
1125
1126 entry->handle = handle;
1127 entry->length = dlen;
1128
1129unlock:
1130 mutex_unlock(&acomp_ctx->mutex);
1131 return ret == 0;
1132}
1133
1134static void zswap_decompress(struct zswap_entry *entry, struct page *page)
1135{
1136 struct zpool *zpool = zswap_find_zpool(entry);
1137 struct scatterlist input, output;
1138 struct crypto_acomp_ctx *acomp_ctx;
1139 u8 *src;
1140
1141 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
1142 mutex_lock(&acomp_ctx->mutex);
1143
1144 src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
1145 if (!zpool_can_sleep_mapped(zpool)) {
1146 memcpy(acomp_ctx->buffer, src, entry->length);
1147 src = acomp_ctx->buffer;
1148 zpool_unmap_handle(zpool, entry->handle);
1149 }
1150
1151 sg_init_one(&input, src, entry->length);
1152 sg_init_table(&output, 1);
1153 sg_set_page(&output, page, PAGE_SIZE, 0);
1154 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
1155 BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
1156 BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
1157 mutex_unlock(&acomp_ctx->mutex);
1158
1159 if (zpool_can_sleep_mapped(zpool))
1160 zpool_unmap_handle(zpool, entry->handle);
1161}
1162
9986d35d
JW
1163/*********************************
1164* writeback code
1165**********************************/
1166/*
1167 * Attempts to free an entry by adding a folio to the swap cache,
1168 * decompressing the entry data into the folio, and issuing a
1169 * bio write to write the folio back to the swap device.
1170 *
1171 * This can be thought of as a "resumed writeback" of the folio
1172 * to the swap device. We are basically resuming the same swap
1173 * writeback path that was intercepted with the zswap_store()
1174 * in the first place. After the folio has been decompressed into
1175 * the swap cache, the compressed version stored by zswap can be
1176 * freed.
1177 */
1178static int zswap_writeback_entry(struct zswap_entry *entry,
1179 swp_entry_t swpentry)
1180{
1181 struct zswap_tree *tree;
1182 struct folio *folio;
1183 struct mempolicy *mpol;
1184 bool folio_was_allocated;
1185 struct writeback_control wbc = {
1186 .sync_mode = WB_SYNC_NONE,
1187 };
1188
1189 /* try to allocate swap cache folio */
1190 mpol = get_task_policy(current);
1191 folio = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
1192 NO_INTERLEAVE_INDEX, &folio_was_allocated, true);
1193 if (!folio)
1194 return -ENOMEM;
1195
1196 /*
1197 * Found an existing folio, we raced with swapin or concurrent
1198 * shrinker. We generally writeback cold folios from zswap, and
1199 * swapin means the folio just became hot, so skip this folio.
1200 * For unlikely concurrent shrinker case, it will be unlinked
1201 * and freed when invalidated by the concurrent shrinker anyway.
1202 */
1203 if (!folio_was_allocated) {
1204 folio_put(folio);
1205 return -EEXIST;
1206 }
1207
1208 /*
1209 * folio is locked, and the swapcache is now secured against
1210 * concurrent swapping to and from the slot. Verify that the
1211 * swap entry hasn't been invalidated and recycled behind our
1212 * backs (our zswap_entry reference doesn't prevent that), to
1213 * avoid overwriting a new swap folio with old compressed data.
1214 */
1215 tree = swap_zswap_tree(swpentry);
1216 spin_lock(&tree->lock);
1217 if (zswap_rb_search(&tree->rbroot, swp_offset(swpentry)) != entry) {
1218 spin_unlock(&tree->lock);
1219 delete_from_swap_cache(folio);
1220 folio_unlock(folio);
1221 folio_put(folio);
1222 return -ENOMEM;
1223 }
1224
1225 /* Safe to deref entry after the entry is verified above. */
1226 zswap_entry_get(entry);
1227 spin_unlock(&tree->lock);
1228
1229 zswap_decompress(entry, &folio->page);
1230
1231 count_vm_event(ZSWPWB);
1232 if (entry->objcg)
1233 count_objcg_event(entry->objcg, ZSWPWB);
1234
1235 spin_lock(&tree->lock);
1236 zswap_invalidate_entry(tree, entry);
1237 zswap_entry_put(entry);
1238 spin_unlock(&tree->lock);
1239
1240 /* folio is up to date */
1241 folio_mark_uptodate(folio);
1242
1243 /* move it to the tail of the inactive list after end_writeback */
1244 folio_set_reclaim(folio);
1245
1246 /* start writeback */
1247 __swap_writepage(folio, &wbc);
1248 folio_put(folio);
1249
1250 return 0;
1251}
1252
b5ba474f
NP
1253/*********************************
1254* shrinker functions
1255**********************************/
1256static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l,
eb23ee4f
JW
1257 spinlock_t *lock, void *arg)
1258{
1259 struct zswap_entry *entry = container_of(item, struct zswap_entry, lru);
1260 bool *encountered_page_in_swapcache = (bool *)arg;
1261 swp_entry_t swpentry;
1262 enum lru_status ret = LRU_REMOVED_RETRY;
1263 int writeback_result;
1264
1265 /*
1266 * Rotate the entry to the tail before unlocking the LRU,
1267 * so that in case of an invalidation race concurrent
1268 * reclaimers don't waste their time on it.
1269 *
1270 * If writeback succeeds, or failure is due to the entry
1271 * being invalidated by the swap subsystem, the invalidation
1272 * will unlink and free it.
1273 *
1274 * Temporary failures, where the same entry should be tried
1275 * again immediately, almost never happen for this shrinker.
1276 * We don't do any trylocking; -ENOMEM comes closest,
1277 * but that's extremely rare and doesn't happen spuriously
1278 * either. Don't bother distinguishing this case.
1279 *
1280 * But since they do exist in theory, the entry cannot just
1281 * be unlinked, or we could leak it. Hence, rotate.
1282 */
1283 list_move_tail(item, &l->list);
1284
1285 /*
1286 * Once the lru lock is dropped, the entry might get freed. The
1287 * swpentry is copied to the stack, and entry isn't deref'd again
1288 * until the entry is verified to still be alive in the tree.
1289 */
1290 swpentry = entry->swpentry;
1291
1292 /*
1293 * It's safe to drop the lock here because we return either
1294 * LRU_REMOVED_RETRY or LRU_RETRY.
1295 */
1296 spin_unlock(lock);
1297
1298 writeback_result = zswap_writeback_entry(entry, swpentry);
1299
1300 if (writeback_result) {
1301 zswap_reject_reclaim_fail++;
1302 ret = LRU_RETRY;
1303
1304 /*
1305 * Encountering a page already in swap cache is a sign that we are shrinking
1306 * into the warmer region. We should terminate shrinking (if we're in the dynamic
1307 * shrinker context).
1308 */
1309 if (writeback_result == -EEXIST && encountered_page_in_swapcache)
1310 *encountered_page_in_swapcache = true;
1311 } else {
1312 zswap_written_back_pages++;
1313 }
1314
1315 spin_lock(lock);
1316 return ret;
1317}
b5ba474f
NP
1318
1319static unsigned long zswap_shrinker_scan(struct shrinker *shrinker,
1320 struct shrink_control *sc)
1321{
1322 struct lruvec *lruvec = mem_cgroup_lruvec(sc->memcg, NODE_DATA(sc->nid));
1323 unsigned long shrink_ret, nr_protected, lru_size;
1324 struct zswap_pool *pool = shrinker->private_data;
1325 bool encountered_page_in_swapcache = false;
1326
501a06fe
NP
1327 if (!zswap_shrinker_enabled ||
1328 !mem_cgroup_zswap_writeback_enabled(sc->memcg)) {
b5ba474f
NP
1329 sc->nr_scanned = 0;
1330 return SHRINK_STOP;
1331 }
1332
1333 nr_protected =
1334 atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected);
1335 lru_size = list_lru_shrink_count(&pool->list_lru, sc);
1336
1337 /*
1338 * Abort if we are shrinking into the protected region.
1339 *
1340 * This short-circuiting is necessary because if we have too many multiple
1341 * concurrent reclaimers getting the freeable zswap object counts at the
1342 * same time (before any of them made reasonable progress), the total
1343 * number of reclaimed objects might be more than the number of unprotected
1344 * objects (i.e the reclaimers will reclaim into the protected area of the
1345 * zswap LRU).
1346 */
1347 if (nr_protected >= lru_size - sc->nr_to_scan) {
1348 sc->nr_scanned = 0;
1349 return SHRINK_STOP;
1350 }
1351
1352 shrink_ret = list_lru_shrink_walk(&pool->list_lru, sc, &shrink_memcg_cb,
1353 &encountered_page_in_swapcache);
1354
1355 if (encountered_page_in_swapcache)
1356 return SHRINK_STOP;
1357
1358 return shrink_ret ? shrink_ret : SHRINK_STOP;
1359}
1360
1361static unsigned long zswap_shrinker_count(struct shrinker *shrinker,
1362 struct shrink_control *sc)
1363{
1364 struct zswap_pool *pool = shrinker->private_data;
1365 struct mem_cgroup *memcg = sc->memcg;
1366 struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(sc->nid));
1367 unsigned long nr_backing, nr_stored, nr_freeable, nr_protected;
1368
501a06fe 1369 if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(memcg))
b5ba474f
NP
1370 return 0;
1371
1372#ifdef CONFIG_MEMCG_KMEM
7d7ef0a4 1373 mem_cgroup_flush_stats(memcg);
b5ba474f
NP
1374 nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT;
1375 nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
1376#else
1377 /* use pool stats instead of memcg stats */
1378 nr_backing = get_zswap_pool_size(pool) >> PAGE_SHIFT;
1379 nr_stored = atomic_read(&pool->nr_stored);
1380#endif
1381
1382 if (!nr_stored)
1383 return 0;
1384
1385 nr_protected =
1386 atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected);
1387 nr_freeable = list_lru_shrink_count(&pool->list_lru, sc);
1388 /*
1389 * Subtract the lru size by an estimate of the number of pages
1390 * that should be protected.
1391 */
1392 nr_freeable = nr_freeable > nr_protected ? nr_freeable - nr_protected : 0;
1393
1394 /*
1395 * Scale the number of freeable pages by the memory saving factor.
1396 * This ensures that the better zswap compresses memory, the fewer
1397 * pages we will evict to swap (as it will otherwise incur IO for
1398 * relatively small memory saving).
1399 */
1400 return mult_frac(nr_freeable, nr_backing, nr_stored);
1401}
1402
1403static void zswap_alloc_shrinker(struct zswap_pool *pool)
1404{
1405 pool->shrinker =
1406 shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, "mm-zswap");
1407 if (!pool->shrinker)
1408 return;
1409
1410 pool->shrinker->private_data = pool;
1411 pool->shrinker->scan_objects = zswap_shrinker_scan;
1412 pool->shrinker->count_objects = zswap_shrinker_count;
1413 pool->shrinker->batch = 0;
1414 pool->shrinker->seeks = DEFAULT_SEEKS;
1415}
1416
a65b0e76
DC
1417static int shrink_memcg(struct mem_cgroup *memcg)
1418{
1419 struct zswap_pool *pool;
1420 int nid, shrunk = 0;
1421
501a06fe
NP
1422 if (!mem_cgroup_zswap_writeback_enabled(memcg))
1423 return -EINVAL;
1424
a65b0e76
DC
1425 /*
1426 * Skip zombies because their LRUs are reparented and we would be
1427 * reclaiming from the parent instead of the dead memcg.
1428 */
1429 if (memcg && !mem_cgroup_online(memcg))
1430 return -ENOENT;
1431
1432 pool = zswap_pool_current_get();
1433 if (!pool)
1434 return -EINVAL;
1435
1436 for_each_node_state(nid, N_NORMAL_MEMORY) {
1437 unsigned long nr_to_walk = 1;
1438
1439 shrunk += list_lru_walk_one(&pool->list_lru, nid, memcg,
1440 &shrink_memcg_cb, NULL, &nr_to_walk);
1441 }
1442 zswap_pool_put(pool);
1443 return shrunk ? 0 : -EAGAIN;
f999f38b
DC
1444}
1445
45190f01
VW
1446static void shrink_worker(struct work_struct *w)
1447{
1448 struct zswap_pool *pool = container_of(w, typeof(*pool),
1449 shrink_work);
a65b0e76 1450 struct mem_cgroup *memcg;
e0228d59
DC
1451 int ret, failures = 0;
1452
a65b0e76 1453 /* global reclaim will select cgroup in a round-robin fashion. */
e0228d59 1454 do {
a65b0e76
DC
1455 spin_lock(&zswap_pools_lock);
1456 pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL);
1457 memcg = pool->next_shrink;
1458
1459 /*
1460 * We need to retry if we have gone through a full round trip, or if we
1461 * got an offline memcg (or else we risk undoing the effect of the
1462 * zswap memcg offlining cleanup callback). This is not catastrophic
1463 * per se, but it will keep the now offlined memcg hostage for a while.
1464 *
1465 * Note that if we got an online memcg, we will keep the extra
1466 * reference in case the original reference obtained by mem_cgroup_iter
1467 * is dropped by the zswap memcg offlining callback, ensuring that the
1468 * memcg is not killed when we are reclaiming.
1469 */
1470 if (!memcg) {
1471 spin_unlock(&zswap_pools_lock);
1472 if (++failures == MAX_RECLAIM_RETRIES)
e0228d59 1473 break;
a65b0e76
DC
1474
1475 goto resched;
1476 }
1477
1478 if (!mem_cgroup_tryget_online(memcg)) {
1479 /* drop the reference from mem_cgroup_iter() */
1480 mem_cgroup_iter_break(NULL, memcg);
1481 pool->next_shrink = NULL;
1482 spin_unlock(&zswap_pools_lock);
1483
e0228d59
DC
1484 if (++failures == MAX_RECLAIM_RETRIES)
1485 break;
a65b0e76
DC
1486
1487 goto resched;
e0228d59 1488 }
a65b0e76
DC
1489 spin_unlock(&zswap_pools_lock);
1490
1491 ret = shrink_memcg(memcg);
1492 /* drop the extra reference */
1493 mem_cgroup_put(memcg);
1494
1495 if (ret == -EINVAL)
1496 break;
1497 if (ret && ++failures == MAX_RECLAIM_RETRIES)
1498 break;
1499
1500resched:
e0228d59
DC
1501 cond_resched();
1502 } while (!zswap_can_accept());
45190f01
VW
1503 zswap_pool_put(pool);
1504}
1505
a85f878b
SD
1506static int zswap_is_page_same_filled(void *ptr, unsigned long *value)
1507{
a85f878b 1508 unsigned long *page;
62bf1258
TS
1509 unsigned long val;
1510 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
a85f878b
SD
1511
1512 page = (unsigned long *)ptr;
62bf1258
TS
1513 val = page[0];
1514
1515 if (val != page[last_pos])
1516 return 0;
1517
1518 for (pos = 1; pos < last_pos; pos++) {
1519 if (val != page[pos])
a85f878b
SD
1520 return 0;
1521 }
62bf1258
TS
1522
1523 *value = val;
1524
a85f878b
SD
1525 return 1;
1526}
1527
1528static void zswap_fill_page(void *ptr, unsigned long value)
1529{
1530 unsigned long *page;
1531
1532 page = (unsigned long *)ptr;
1533 memset_l(page, value, PAGE_SIZE / sizeof(unsigned long));
1534}
1535
34f4c198 1536bool zswap_store(struct folio *folio)
2b281117 1537{
3d2c9087 1538 swp_entry_t swp = folio->swap;
42c06a0e 1539 pgoff_t offset = swp_offset(swp);
44c7c734 1540 struct zswap_tree *tree = swap_zswap_tree(swp);
2b281117 1541 struct zswap_entry *entry, *dupentry;
f4840ccf 1542 struct obj_cgroup *objcg = NULL;
a65b0e76 1543 struct mem_cgroup *memcg = NULL;
be7fc97c 1544 struct zswap_pool *shrink_pool;
42c06a0e 1545
34f4c198
MWO
1546 VM_WARN_ON_ONCE(!folio_test_locked(folio));
1547 VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
2b281117 1548
34f4c198
MWO
1549 /* Large folios aren't supported */
1550 if (folio_test_large(folio))
42c06a0e 1551 return false;
7ba71669 1552
ca56489c
DC
1553 /*
1554 * If this is a duplicate, it must be removed before attempting to store
1555 * it, otherwise, if the store fails the old page won't be removed from
1556 * the tree, and it might be written back overriding the new data.
1557 */
1558 spin_lock(&tree->lock);
be7fc97c
JW
1559 entry = zswap_rb_search(&tree->rbroot, offset);
1560 if (entry) {
1561 zswap_invalidate_entry(tree, entry);
ca56489c 1562 zswap_duplicate_entry++;
ca56489c
DC
1563 }
1564 spin_unlock(&tree->lock);
678e54d4
CZ
1565
1566 if (!zswap_enabled)
1567 return false;
1568
074e3e26 1569 objcg = get_obj_cgroup_from_folio(folio);
a65b0e76
DC
1570 if (objcg && !obj_cgroup_may_zswap(objcg)) {
1571 memcg = get_mem_cgroup_from_objcg(objcg);
1572 if (shrink_memcg(memcg)) {
1573 mem_cgroup_put(memcg);
1574 goto reject;
1575 }
1576 mem_cgroup_put(memcg);
1577 }
f4840ccf 1578
2b281117
SJ
1579 /* reclaim space if needed */
1580 if (zswap_is_full()) {
1581 zswap_pool_limit_hit++;
45190f01 1582 zswap_pool_reached_full = true;
f4840ccf 1583 goto shrink;
45190f01 1584 }
16e536ef 1585
45190f01 1586 if (zswap_pool_reached_full) {
42c06a0e 1587 if (!zswap_can_accept())
e0228d59 1588 goto shrink;
42c06a0e 1589 else
45190f01 1590 zswap_pool_reached_full = false;
2b281117
SJ
1591 }
1592
1593 /* allocate entry */
be7fc97c 1594 entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio));
2b281117
SJ
1595 if (!entry) {
1596 zswap_reject_kmemcache_fail++;
2b281117
SJ
1597 goto reject;
1598 }
1599
a85f878b 1600 if (zswap_same_filled_pages_enabled) {
be7fc97c
JW
1601 unsigned long value;
1602 u8 *src;
1603
1604 src = kmap_local_folio(folio, 0);
a85f878b 1605 if (zswap_is_page_same_filled(src, &value)) {
003ae2fb 1606 kunmap_local(src);
a85f878b
SD
1607 entry->length = 0;
1608 entry->value = value;
1609 atomic_inc(&zswap_same_filled_pages);
1610 goto insert_entry;
1611 }
003ae2fb 1612 kunmap_local(src);
a85f878b
SD
1613 }
1614
42c06a0e 1615 if (!zswap_non_same_filled_pages_enabled)
cb325ddd 1616 goto freepage;
cb325ddd 1617
f1c54846
DS
1618 /* if entry is successfully added, it keeps the reference */
1619 entry->pool = zswap_pool_current_get();
42c06a0e 1620 if (!entry->pool)
f1c54846 1621 goto freepage;
f1c54846 1622
a65b0e76
DC
1623 if (objcg) {
1624 memcg = get_mem_cgroup_from_objcg(objcg);
1625 if (memcg_list_lru_alloc(memcg, &entry->pool->list_lru, GFP_KERNEL)) {
1626 mem_cgroup_put(memcg);
1627 goto put_pool;
1628 }
1629 mem_cgroup_put(memcg);
1630 }
1631
fa9ad6e2
JW
1632 if (!zswap_compress(folio, entry))
1633 goto put_pool;
1ec3b5fe 1634
a85f878b 1635insert_entry:
be7fc97c 1636 entry->swpentry = swp;
f4840ccf
JW
1637 entry->objcg = objcg;
1638 if (objcg) {
1639 obj_cgroup_charge_zswap(objcg, entry->length);
1640 /* Account before objcg ref is moved to tree */
1641 count_objcg_event(objcg, ZSWPOUT);
1642 }
1643
2b281117
SJ
1644 /* map */
1645 spin_lock(&tree->lock);
ca56489c
DC
1646 /*
1647 * A duplicate entry should have been removed at the beginning of this
1648 * function. Since the swap entry should be pinned, if a duplicate is
1649 * found again here it means that something went wrong in the swap
1650 * cache.
1651 */
42c06a0e 1652 while (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) {
ca56489c 1653 WARN_ON(1);
42c06a0e 1654 zswap_duplicate_entry++;
56c67049 1655 zswap_invalidate_entry(tree, dupentry);
42c06a0e 1656 }
35499e2b 1657 if (entry->length) {
a65b0e76
DC
1658 INIT_LIST_HEAD(&entry->lru);
1659 zswap_lru_add(&entry->pool->list_lru, entry);
b5ba474f 1660 atomic_inc(&entry->pool->nr_stored);
f999f38b 1661 }
2b281117
SJ
1662 spin_unlock(&tree->lock);
1663
1664 /* update stats */
1665 atomic_inc(&zswap_stored_pages);
f1c54846 1666 zswap_update_total_size();
f6498b77 1667 count_vm_event(ZSWPOUT);
2b281117 1668
42c06a0e 1669 return true;
2b281117 1670
a65b0e76 1671put_pool:
f1c54846
DS
1672 zswap_pool_put(entry->pool);
1673freepage:
2b281117
SJ
1674 zswap_entry_cache_free(entry);
1675reject:
f4840ccf
JW
1676 if (objcg)
1677 obj_cgroup_put(objcg);
42c06a0e 1678 return false;
f4840ccf
JW
1679
1680shrink:
be7fc97c
JW
1681 shrink_pool = zswap_pool_last_get();
1682 if (shrink_pool && !queue_work(shrink_wq, &shrink_pool->shrink_work))
1683 zswap_pool_put(shrink_pool);
f4840ccf 1684 goto reject;
2b281117
SJ
1685}
1686
ca54f6d8 1687bool zswap_load(struct folio *folio)
2b281117 1688{
3d2c9087 1689 swp_entry_t swp = folio->swap;
42c06a0e 1690 pgoff_t offset = swp_offset(swp);
ca54f6d8 1691 struct page *page = &folio->page;
44c7c734 1692 struct zswap_tree *tree = swap_zswap_tree(swp);
2b281117 1693 struct zswap_entry *entry;
32acba4c 1694 u8 *dst;
42c06a0e 1695
ca54f6d8 1696 VM_WARN_ON_ONCE(!folio_test_locked(folio));
2b281117 1697
2b281117 1698 spin_lock(&tree->lock);
5b297f70 1699 entry = zswap_rb_search(&tree->rbroot, offset);
2b281117 1700 if (!entry) {
2b281117 1701 spin_unlock(&tree->lock);
42c06a0e 1702 return false;
2b281117 1703 }
5b297f70 1704 zswap_entry_get(entry);
2b281117
SJ
1705 spin_unlock(&tree->lock);
1706
66447fd0 1707 if (entry->length)
ff2972aa 1708 zswap_decompress(entry, page);
66447fd0 1709 else {
003ae2fb 1710 dst = kmap_local_page(page);
a85f878b 1711 zswap_fill_page(dst, entry->value);
003ae2fb 1712 kunmap_local(dst);
a85f878b
SD
1713 }
1714
f6498b77 1715 count_vm_event(ZSWPIN);
f4840ccf
JW
1716 if (entry->objcg)
1717 count_objcg_event(entry->objcg, ZSWPIN);
c75f5c1e 1718
2b281117 1719 spin_lock(&tree->lock);
66447fd0 1720 if (zswap_exclusive_loads_enabled) {
b9c91c43 1721 zswap_invalidate_entry(tree, entry);
ca54f6d8 1722 folio_mark_dirty(folio);
35499e2b 1723 } else if (entry->length) {
a65b0e76
DC
1724 zswap_lru_del(&entry->pool->list_lru, entry);
1725 zswap_lru_add(&entry->pool->list_lru, entry);
b9c91c43 1726 }
db128f5f 1727 zswap_entry_put(entry);
2b281117
SJ
1728 spin_unlock(&tree->lock);
1729
66447fd0 1730 return true;
2b281117
SJ
1731}
1732
42c06a0e 1733void zswap_invalidate(int type, pgoff_t offset)
2b281117 1734{
44c7c734 1735 struct zswap_tree *tree = swap_zswap_tree(swp_entry(type, offset));
2b281117 1736 struct zswap_entry *entry;
2b281117 1737
2b281117
SJ
1738 spin_lock(&tree->lock);
1739 entry = zswap_rb_search(&tree->rbroot, offset);
06ed2289
JW
1740 if (entry)
1741 zswap_invalidate_entry(tree, entry);
2b281117 1742 spin_unlock(&tree->lock);
2b281117
SJ
1743}
1744
44c7c734 1745int zswap_swapon(int type, unsigned long nr_pages)
42c06a0e 1746{
44c7c734
CZ
1747 struct zswap_tree *trees, *tree;
1748 unsigned int nr, i;
42c06a0e 1749
44c7c734
CZ
1750 nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
1751 trees = kvcalloc(nr, sizeof(*tree), GFP_KERNEL);
1752 if (!trees) {
42c06a0e 1753 pr_err("alloc failed, zswap disabled for swap type %d\n", type);
bb29fd77 1754 return -ENOMEM;
42c06a0e
JW
1755 }
1756
44c7c734
CZ
1757 for (i = 0; i < nr; i++) {
1758 tree = trees + i;
1759 tree->rbroot = RB_ROOT;
1760 spin_lock_init(&tree->lock);
1761 }
1762
1763 nr_zswap_trees[type] = nr;
1764 zswap_trees[type] = trees;
bb29fd77 1765 return 0;
42c06a0e
JW
1766}
1767
1768void zswap_swapoff(int type)
2b281117 1769{
44c7c734
CZ
1770 struct zswap_tree *trees = zswap_trees[type];
1771 unsigned int i;
2b281117 1772
44c7c734 1773 if (!trees)
2b281117
SJ
1774 return;
1775
83e68f25
YA
1776 /* try_to_unuse() invalidated all the entries already */
1777 for (i = 0; i < nr_zswap_trees[type]; i++)
1778 WARN_ON_ONCE(!RB_EMPTY_ROOT(&trees[i].rbroot));
44c7c734
CZ
1779
1780 kvfree(trees);
1781 nr_zswap_trees[type] = 0;
aa9bca05 1782 zswap_trees[type] = NULL;
2b281117
SJ
1783}
1784
2b281117
SJ
1785/*********************************
1786* debugfs functions
1787**********************************/
1788#ifdef CONFIG_DEBUG_FS
1789#include <linux/debugfs.h>
1790
1791static struct dentry *zswap_debugfs_root;
1792
141fdeec 1793static int zswap_debugfs_init(void)
2b281117
SJ
1794{
1795 if (!debugfs_initialized())
1796 return -ENODEV;
1797
1798 zswap_debugfs_root = debugfs_create_dir("zswap", NULL);
2b281117 1799
0825a6f9
JP
1800 debugfs_create_u64("pool_limit_hit", 0444,
1801 zswap_debugfs_root, &zswap_pool_limit_hit);
1802 debugfs_create_u64("reject_reclaim_fail", 0444,
1803 zswap_debugfs_root, &zswap_reject_reclaim_fail);
1804 debugfs_create_u64("reject_alloc_fail", 0444,
1805 zswap_debugfs_root, &zswap_reject_alloc_fail);
1806 debugfs_create_u64("reject_kmemcache_fail", 0444,
1807 zswap_debugfs_root, &zswap_reject_kmemcache_fail);
cb61dad8
NP
1808 debugfs_create_u64("reject_compress_fail", 0444,
1809 zswap_debugfs_root, &zswap_reject_compress_fail);
0825a6f9
JP
1810 debugfs_create_u64("reject_compress_poor", 0444,
1811 zswap_debugfs_root, &zswap_reject_compress_poor);
1812 debugfs_create_u64("written_back_pages", 0444,
1813 zswap_debugfs_root, &zswap_written_back_pages);
1814 debugfs_create_u64("duplicate_entry", 0444,
1815 zswap_debugfs_root, &zswap_duplicate_entry);
1816 debugfs_create_u64("pool_total_size", 0444,
1817 zswap_debugfs_root, &zswap_pool_total_size);
1818 debugfs_create_atomic_t("stored_pages", 0444,
1819 zswap_debugfs_root, &zswap_stored_pages);
a85f878b 1820 debugfs_create_atomic_t("same_filled_pages", 0444,
0825a6f9 1821 zswap_debugfs_root, &zswap_same_filled_pages);
2b281117
SJ
1822
1823 return 0;
1824}
2b281117 1825#else
141fdeec 1826static int zswap_debugfs_init(void)
2b281117
SJ
1827{
1828 return 0;
1829}
2b281117
SJ
1830#endif
1831
1832/*********************************
1833* module init and exit
1834**********************************/
141fdeec 1835static int zswap_setup(void)
2b281117 1836{
f1c54846 1837 struct zswap_pool *pool;
ad7ed770 1838 int ret;
60105e12 1839
b7919122
LS
1840 zswap_entry_cache = KMEM_CACHE(zswap_entry, 0);
1841 if (!zswap_entry_cache) {
2b281117 1842 pr_err("entry cache creation failed\n");
f1c54846 1843 goto cache_fail;
2b281117 1844 }
f1c54846 1845
cab7a7e5
SAS
1846 ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE,
1847 "mm/zswap_pool:prepare",
1848 zswap_cpu_comp_prepare,
1849 zswap_cpu_comp_dead);
1850 if (ret)
1851 goto hp_fail;
1852
f1c54846 1853 pool = __zswap_pool_create_fallback();
ae3d89a7
DS
1854 if (pool) {
1855 pr_info("loaded using pool %s/%s\n", pool->tfm_name,
b8cf32dc 1856 zpool_get_type(pool->zpools[0]));
ae3d89a7
DS
1857 list_add(&pool->list, &zswap_pools);
1858 zswap_has_pool = true;
1859 } else {
f1c54846 1860 pr_err("pool creation failed\n");
ae3d89a7 1861 zswap_enabled = false;
2b281117 1862 }
60105e12 1863
8409a385
RM
1864 shrink_wq = alloc_workqueue("zswap-shrink",
1865 WQ_UNBOUND|WQ_MEM_RECLAIM, 1);
45190f01
VW
1866 if (!shrink_wq)
1867 goto fallback_fail;
1868
2b281117
SJ
1869 if (zswap_debugfs_init())
1870 pr_warn("debugfs initialization failed\n");
9021ccec 1871 zswap_init_state = ZSWAP_INIT_SUCCEED;
2b281117 1872 return 0;
f1c54846 1873
45190f01 1874fallback_fail:
38aeb071
DC
1875 if (pool)
1876 zswap_pool_destroy(pool);
cab7a7e5 1877hp_fail:
b7919122 1878 kmem_cache_destroy(zswap_entry_cache);
f1c54846 1879cache_fail:
d7b028f5 1880 /* if built-in, we aren't unloaded on failure; don't allow use */
9021ccec 1881 zswap_init_state = ZSWAP_INIT_FAILED;
d7b028f5 1882 zswap_enabled = false;
2b281117
SJ
1883 return -ENOMEM;
1884}
141fdeec
LS
1885
1886static int __init zswap_init(void)
1887{
1888 if (!zswap_enabled)
1889 return 0;
1890 return zswap_setup();
1891}
2b281117 1892/* must be late so crypto has time to come up */
141fdeec 1893late_initcall(zswap_init);
2b281117 1894
68386da8 1895MODULE_AUTHOR("Seth Jennings <[email protected]>");
2b281117 1896MODULE_DESCRIPTION("Compressed cache for swap pages");
This page took 0.888894 seconds and 4 git commands to generate.