]> Git Repo - linux.git/blame - mm/zswap.c
mm/zswap: only support zswap_exclusive_loads_enabled
[linux.git] / mm / zswap.c
CommitLineData
c942fddf 1// SPDX-License-Identifier: GPL-2.0-or-later
2b281117
SJ
2/*
3 * zswap.c - zswap driver file
4 *
42c06a0e 5 * zswap is a cache that takes pages that are in the process
2b281117
SJ
6 * of being swapped out and attempts to compress and store them in a
7 * RAM-based memory pool. This can result in a significant I/O reduction on
8 * the swap device and, in the case where decompressing from RAM is faster
9 * than reading from the swap device, can also improve workload performance.
10 *
11 * Copyright (C) 2012 Seth Jennings <[email protected]>
2b281117
SJ
12*/
13
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
16#include <linux/module.h>
17#include <linux/cpu.h>
18#include <linux/highmem.h>
19#include <linux/slab.h>
20#include <linux/spinlock.h>
21#include <linux/types.h>
22#include <linux/atomic.h>
2b281117
SJ
23#include <linux/rbtree.h>
24#include <linux/swap.h>
25#include <linux/crypto.h>
1ec3b5fe 26#include <linux/scatterlist.h>
ddc1a5cb 27#include <linux/mempolicy.h>
2b281117 28#include <linux/mempool.h>
12d79d64 29#include <linux/zpool.h>
1ec3b5fe 30#include <crypto/acompress.h>
42c06a0e 31#include <linux/zswap.h>
2b281117
SJ
32#include <linux/mm_types.h>
33#include <linux/page-flags.h>
34#include <linux/swapops.h>
35#include <linux/writeback.h>
36#include <linux/pagemap.h>
45190f01 37#include <linux/workqueue.h>
a65b0e76 38#include <linux/list_lru.h>
2b281117 39
014bb1de 40#include "swap.h"
e0228d59 41#include "internal.h"
014bb1de 42
2b281117
SJ
43/*********************************
44* statistics
45**********************************/
12d79d64 46/* Total bytes used by the compressed storage */
f6498b77 47u64 zswap_pool_total_size;
2b281117 48/* The number of compressed pages currently stored in zswap */
f6498b77 49atomic_t zswap_stored_pages = ATOMIC_INIT(0);
a85f878b
SD
50/* The number of same-value filled pages currently stored in zswap */
51static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0);
2b281117
SJ
52
53/*
54 * The statistics below are not protected from concurrent access for
55 * performance reasons so they may not be a 100% accurate. However,
56 * they do provide useful information on roughly how many times a
57 * certain event is occurring.
58*/
59
60/* Pool limit was hit (see zswap_max_pool_percent) */
61static u64 zswap_pool_limit_hit;
62/* Pages written back when pool limit was reached */
63static u64 zswap_written_back_pages;
64/* Store failed due to a reclaim failure after pool limit was reached */
65static u64 zswap_reject_reclaim_fail;
cb61dad8
NP
66/* Store failed due to compression algorithm failure */
67static u64 zswap_reject_compress_fail;
2b281117
SJ
68/* Compressed page was too big for the allocator to (optimally) store */
69static u64 zswap_reject_compress_poor;
70/* Store failed because underlying allocator could not get memory */
71static u64 zswap_reject_alloc_fail;
72/* Store failed because the entry metadata could not be allocated (rare) */
73static u64 zswap_reject_kmemcache_fail;
2b281117 74
45190f01
VW
75/* Shrinker work queue */
76static struct workqueue_struct *shrink_wq;
77/* Pool limit was hit, we need to calm down */
78static bool zswap_pool_reached_full;
79
2b281117
SJ
80/*********************************
81* tunables
82**********************************/
c00ed16a 83
bae21db8
DS
84#define ZSWAP_PARAM_UNSET ""
85
141fdeec
LS
86static int zswap_setup(void);
87
bb8b93b5
MS
88/* Enable/disable zswap */
89static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
d7b028f5
DS
90static int zswap_enabled_param_set(const char *,
91 const struct kernel_param *);
83aed6cd 92static const struct kernel_param_ops zswap_enabled_param_ops = {
d7b028f5
DS
93 .set = zswap_enabled_param_set,
94 .get = param_get_bool,
95};
96module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644);
2b281117 97
90b0fc26 98/* Crypto compressor to use */
bb8b93b5 99static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
90b0fc26
DS
100static int zswap_compressor_param_set(const char *,
101 const struct kernel_param *);
83aed6cd 102static const struct kernel_param_ops zswap_compressor_param_ops = {
90b0fc26 103 .set = zswap_compressor_param_set,
c99b42c3
DS
104 .get = param_get_charp,
105 .free = param_free_charp,
90b0fc26
DS
106};
107module_param_cb(compressor, &zswap_compressor_param_ops,
c99b42c3 108 &zswap_compressor, 0644);
2b281117 109
90b0fc26 110/* Compressed storage zpool to use */
bb8b93b5 111static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
90b0fc26 112static int zswap_zpool_param_set(const char *, const struct kernel_param *);
83aed6cd 113static const struct kernel_param_ops zswap_zpool_param_ops = {
c99b42c3
DS
114 .set = zswap_zpool_param_set,
115 .get = param_get_charp,
116 .free = param_free_charp,
90b0fc26 117};
c99b42c3 118module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644);
12d79d64 119
90b0fc26
DS
120/* The maximum percentage of memory that the compressed pool can occupy */
121static unsigned int zswap_max_pool_percent = 20;
122module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644);
60105e12 123
45190f01
VW
124/* The threshold for accepting new pages after the max_pool_percent was hit */
125static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
126module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
127 uint, 0644);
128
cb325ddd
MS
129/*
130 * Enable/disable handling same-value filled pages (enabled by default).
131 * If disabled every page is considered non-same-value filled.
132 */
a85f878b
SD
133static bool zswap_same_filled_pages_enabled = true;
134module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled,
135 bool, 0644);
136
cb325ddd
MS
137/* Enable/disable handling non-same-value filled pages (enabled by default) */
138static bool zswap_non_same_filled_pages_enabled = true;
139module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled,
140 bool, 0644);
141
b8cf32dc
YA
142/* Number of zpools in zswap_pool (empirically determined for scalability) */
143#define ZSWAP_NR_ZPOOLS 32
144
b5ba474f
NP
145/* Enable/disable memory pressure-based shrinker. */
146static bool zswap_shrinker_enabled = IS_ENABLED(
147 CONFIG_ZSWAP_SHRINKER_DEFAULT_ON);
148module_param_named(shrinker_enabled, zswap_shrinker_enabled, bool, 0644);
149
501a06fe
NP
150bool is_zswap_enabled(void)
151{
152 return zswap_enabled;
153}
154
2b281117 155/*********************************
f1c54846 156* data structures
2b281117 157**********************************/
2b281117 158
1ec3b5fe
BS
159struct crypto_acomp_ctx {
160 struct crypto_acomp *acomp;
161 struct acomp_req *req;
162 struct crypto_wait wait;
8ba2f844
CZ
163 u8 *buffer;
164 struct mutex mutex;
1ec3b5fe
BS
165};
166
f999f38b
DC
167/*
168 * The lock ordering is zswap_tree.lock -> zswap_pool.lru_lock.
169 * The only case where lru_lock is not acquired while holding tree.lock is
170 * when a zswap_entry is taken off the lru for writeback, in that case it
171 * needs to be verified that it's still valid in the tree.
172 */
f1c54846 173struct zswap_pool {
b8cf32dc 174 struct zpool *zpools[ZSWAP_NR_ZPOOLS];
1ec3b5fe 175 struct crypto_acomp_ctx __percpu *acomp_ctx;
f1c54846
DS
176 struct kref kref;
177 struct list_head list;
45190f01
VW
178 struct work_struct release_work;
179 struct work_struct shrink_work;
cab7a7e5 180 struct hlist_node node;
f1c54846 181 char tfm_name[CRYPTO_MAX_ALG_NAME];
a65b0e76
DC
182 struct list_lru list_lru;
183 struct mem_cgroup *next_shrink;
b5ba474f
NP
184 struct shrinker *shrinker;
185 atomic_t nr_stored;
2b281117
SJ
186};
187
2b281117
SJ
188/*
189 * struct zswap_entry
190 *
191 * This structure contains the metadata for tracking a single compressed
192 * page within zswap.
193 *
194 * rbnode - links the entry into red-black tree for the appropriate swap type
97157d89 195 * swpentry - associated swap entry, the offset indexes into the red-black tree
2b281117
SJ
196 * refcount - the number of outstanding reference to the entry. This is needed
197 * to protect against premature freeing of the entry by code
6b452516 198 * concurrent calls to load, invalidate, and writeback. The lock
2b281117
SJ
199 * for the zswap_tree structure that contains the entry must
200 * be held while changing the refcount. Since the lock must
201 * be held, there is no reason to also make refcount atomic.
2b281117 202 * length - the length in bytes of the compressed page data. Needed during
f999f38b
DC
203 * decompression. For a same value filled page length is 0, and both
204 * pool and lru are invalid and must be ignored.
f1c54846
DS
205 * pool - the zswap_pool the entry's data is in
206 * handle - zpool allocation handle that stores the compressed page data
a85f878b 207 * value - value of the same-value filled pages which have same content
97157d89 208 * objcg - the obj_cgroup that the compressed memory is charged to
f999f38b 209 * lru - handle to the pool's lru used to evict pages.
2b281117
SJ
210 */
211struct zswap_entry {
212 struct rb_node rbnode;
0bb48849 213 swp_entry_t swpentry;
2b281117
SJ
214 int refcount;
215 unsigned int length;
f1c54846 216 struct zswap_pool *pool;
a85f878b
SD
217 union {
218 unsigned long handle;
219 unsigned long value;
220 };
f4840ccf 221 struct obj_cgroup *objcg;
f999f38b 222 struct list_head lru;
2b281117
SJ
223};
224
2b281117
SJ
225/*
226 * The tree lock in the zswap_tree struct protects a few things:
227 * - the rbtree
228 * - the refcount field of each entry in the tree
229 */
230struct zswap_tree {
231 struct rb_root rbroot;
232 spinlock_t lock;
2b281117
SJ
233};
234
235static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
44c7c734 236static unsigned int nr_zswap_trees[MAX_SWAPFILES];
2b281117 237
f1c54846
DS
238/* RCU-protected iteration */
239static LIST_HEAD(zswap_pools);
240/* protects zswap_pools list modification */
241static DEFINE_SPINLOCK(zswap_pools_lock);
32a4e169
DS
242/* pool counter to provide unique names to zpool */
243static atomic_t zswap_pools_count = ATOMIC_INIT(0);
f1c54846 244
9021ccec
LS
245enum zswap_init_type {
246 ZSWAP_UNINIT,
247 ZSWAP_INIT_SUCCEED,
248 ZSWAP_INIT_FAILED
249};
90b0fc26 250
9021ccec 251static enum zswap_init_type zswap_init_state;
90b0fc26 252
141fdeec
LS
253/* used to ensure the integrity of initialization */
254static DEFINE_MUTEX(zswap_init_lock);
d7b028f5 255
ae3d89a7
DS
256/* init completed, but couldn't create the initial pool */
257static bool zswap_has_pool;
258
f1c54846
DS
259/*********************************
260* helpers and fwd declarations
261**********************************/
262
44c7c734
CZ
263static inline struct zswap_tree *swap_zswap_tree(swp_entry_t swp)
264{
265 return &zswap_trees[swp_type(swp)][swp_offset(swp)
266 >> SWAP_ADDRESS_SPACE_SHIFT];
267}
268
f1c54846
DS
269#define zswap_pool_debug(msg, p) \
270 pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \
b8cf32dc 271 zpool_get_type((p)->zpools[0]))
f1c54846 272
f1c54846
DS
273static bool zswap_is_full(void)
274{
ca79b0c2
AK
275 return totalram_pages() * zswap_max_pool_percent / 100 <
276 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
f1c54846
DS
277}
278
45190f01
VW
279static bool zswap_can_accept(void)
280{
281 return totalram_pages() * zswap_accept_thr_percent / 100 *
282 zswap_max_pool_percent / 100 >
283 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
284}
285
b5ba474f
NP
286static u64 get_zswap_pool_size(struct zswap_pool *pool)
287{
288 u64 pool_size = 0;
289 int i;
290
291 for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
292 pool_size += zpool_get_total_size(pool->zpools[i]);
293
294 return pool_size;
295}
296
f1c54846
DS
297static void zswap_update_total_size(void)
298{
299 struct zswap_pool *pool;
300 u64 total = 0;
301
302 rcu_read_lock();
303
304 list_for_each_entry_rcu(pool, &zswap_pools, list)
b5ba474f 305 total += get_zswap_pool_size(pool);
f1c54846
DS
306
307 rcu_read_unlock();
308
309 zswap_pool_total_size = total;
310}
311
a984649b
JW
312/*********************************
313* pool functions
314**********************************/
315
316static void zswap_alloc_shrinker(struct zswap_pool *pool);
317static void shrink_worker(struct work_struct *w);
318
319static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
320{
321 int i;
322 struct zswap_pool *pool;
323 char name[38]; /* 'zswap' + 32 char (max) num + \0 */
324 gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
325 int ret;
326
327 if (!zswap_has_pool) {
328 /* if either are unset, pool initialization failed, and we
329 * need both params to be set correctly before trying to
330 * create a pool.
331 */
332 if (!strcmp(type, ZSWAP_PARAM_UNSET))
333 return NULL;
334 if (!strcmp(compressor, ZSWAP_PARAM_UNSET))
335 return NULL;
336 }
337
338 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
339 if (!pool)
340 return NULL;
341
342 for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) {
343 /* unique name for each pool specifically required by zsmalloc */
344 snprintf(name, 38, "zswap%x",
345 atomic_inc_return(&zswap_pools_count));
346
347 pool->zpools[i] = zpool_create_pool(type, name, gfp);
348 if (!pool->zpools[i]) {
349 pr_err("%s zpool not available\n", type);
350 goto error;
351 }
352 }
353 pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0]));
354
355 strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
356
357 pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx);
358 if (!pool->acomp_ctx) {
359 pr_err("percpu alloc failed\n");
360 goto error;
361 }
362
363 ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
364 &pool->node);
365 if (ret)
366 goto error;
367
368 zswap_alloc_shrinker(pool);
369 if (!pool->shrinker)
370 goto error;
371
372 pr_debug("using %s compressor\n", pool->tfm_name);
373
374 /* being the current pool takes 1 ref; this func expects the
375 * caller to always add the new pool as the current pool
376 */
377 kref_init(&pool->kref);
378 INIT_LIST_HEAD(&pool->list);
379 if (list_lru_init_memcg(&pool->list_lru, pool->shrinker))
380 goto lru_fail;
381 shrinker_register(pool->shrinker);
382 INIT_WORK(&pool->shrink_work, shrink_worker);
383 atomic_set(&pool->nr_stored, 0);
384
385 zswap_pool_debug("created", pool);
386
387 return pool;
388
389lru_fail:
390 list_lru_destroy(&pool->list_lru);
391 shrinker_free(pool->shrinker);
392error:
393 if (pool->acomp_ctx)
394 free_percpu(pool->acomp_ctx);
395 while (i--)
396 zpool_destroy_pool(pool->zpools[i]);
397 kfree(pool);
398 return NULL;
399}
400
401static struct zswap_pool *__zswap_pool_create_fallback(void)
402{
403 bool has_comp, has_zpool;
404
405 has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
406 if (!has_comp && strcmp(zswap_compressor,
407 CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
408 pr_err("compressor %s not available, using default %s\n",
409 zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
410 param_free_charp(&zswap_compressor);
411 zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
412 has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
413 }
414 if (!has_comp) {
415 pr_err("default compressor %s not available\n",
416 zswap_compressor);
417 param_free_charp(&zswap_compressor);
418 zswap_compressor = ZSWAP_PARAM_UNSET;
419 }
420
421 has_zpool = zpool_has_pool(zswap_zpool_type);
422 if (!has_zpool && strcmp(zswap_zpool_type,
423 CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
424 pr_err("zpool %s not available, using default %s\n",
425 zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
426 param_free_charp(&zswap_zpool_type);
427 zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
428 has_zpool = zpool_has_pool(zswap_zpool_type);
429 }
430 if (!has_zpool) {
431 pr_err("default zpool %s not available\n",
432 zswap_zpool_type);
433 param_free_charp(&zswap_zpool_type);
434 zswap_zpool_type = ZSWAP_PARAM_UNSET;
435 }
436
437 if (!has_comp || !has_zpool)
438 return NULL;
439
440 return zswap_pool_create(zswap_zpool_type, zswap_compressor);
441}
442
443static void zswap_pool_destroy(struct zswap_pool *pool)
444{
445 int i;
446
447 zswap_pool_debug("destroying", pool);
448
449 shrinker_free(pool->shrinker);
450 cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
451 free_percpu(pool->acomp_ctx);
452 list_lru_destroy(&pool->list_lru);
453
454 spin_lock(&zswap_pools_lock);
455 mem_cgroup_iter_break(NULL, pool->next_shrink);
456 pool->next_shrink = NULL;
457 spin_unlock(&zswap_pools_lock);
458
459 for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
460 zpool_destroy_pool(pool->zpools[i]);
461 kfree(pool);
462}
463
39f3ec8e
JW
464static void __zswap_pool_release(struct work_struct *work)
465{
466 struct zswap_pool *pool = container_of(work, typeof(*pool),
467 release_work);
468
469 synchronize_rcu();
470
471 /* nobody should have been able to get a kref... */
472 WARN_ON(kref_get_unless_zero(&pool->kref));
473
474 /* pool is now off zswap_pools list and has no references. */
475 zswap_pool_destroy(pool);
476}
477
478static struct zswap_pool *zswap_pool_current(void);
479
480static void __zswap_pool_empty(struct kref *kref)
481{
482 struct zswap_pool *pool;
483
484 pool = container_of(kref, typeof(*pool), kref);
485
486 spin_lock(&zswap_pools_lock);
487
488 WARN_ON(pool == zswap_pool_current());
489
490 list_del_rcu(&pool->list);
491
492 INIT_WORK(&pool->release_work, __zswap_pool_release);
493 schedule_work(&pool->release_work);
494
495 spin_unlock(&zswap_pools_lock);
496}
497
498static int __must_check zswap_pool_get(struct zswap_pool *pool)
499{
500 if (!pool)
501 return 0;
502
503 return kref_get_unless_zero(&pool->kref);
504}
505
506static void zswap_pool_put(struct zswap_pool *pool)
507{
508 kref_put(&pool->kref, __zswap_pool_empty);
509}
510
c1a0ecb8
JW
511static struct zswap_pool *__zswap_pool_current(void)
512{
513 struct zswap_pool *pool;
514
515 pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
516 WARN_ONCE(!pool && zswap_has_pool,
517 "%s: no page storage pool!\n", __func__);
518
519 return pool;
520}
521
522static struct zswap_pool *zswap_pool_current(void)
523{
524 assert_spin_locked(&zswap_pools_lock);
525
526 return __zswap_pool_current();
527}
528
529static struct zswap_pool *zswap_pool_current_get(void)
530{
531 struct zswap_pool *pool;
532
533 rcu_read_lock();
534
535 pool = __zswap_pool_current();
536 if (!zswap_pool_get(pool))
537 pool = NULL;
538
539 rcu_read_unlock();
540
541 return pool;
542}
543
544static struct zswap_pool *zswap_pool_last_get(void)
545{
546 struct zswap_pool *pool, *last = NULL;
547
548 rcu_read_lock();
549
550 list_for_each_entry_rcu(pool, &zswap_pools, list)
551 last = pool;
552 WARN_ONCE(!last && zswap_has_pool,
553 "%s: no page storage pool!\n", __func__);
554 if (!zswap_pool_get(last))
555 last = NULL;
556
557 rcu_read_unlock();
558
559 return last;
560}
561
562/* type and compressor must be null-terminated */
563static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
564{
565 struct zswap_pool *pool;
566
567 assert_spin_locked(&zswap_pools_lock);
568
569 list_for_each_entry_rcu(pool, &zswap_pools, list) {
570 if (strcmp(pool->tfm_name, compressor))
571 continue;
572 /* all zpools share the same type */
573 if (strcmp(zpool_get_type(pool->zpools[0]), type))
574 continue;
575 /* if we can't get it, it's about to be destroyed */
576 if (!zswap_pool_get(pool))
577 continue;
578 return pool;
579 }
580
581 return NULL;
582}
583
abca07c0
JW
584/*********************************
585* param callbacks
586**********************************/
587
588static bool zswap_pool_changed(const char *s, const struct kernel_param *kp)
589{
590 /* no change required */
591 if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool)
592 return false;
593 return true;
594}
595
596/* val must be a null-terminated string */
597static int __zswap_param_set(const char *val, const struct kernel_param *kp,
598 char *type, char *compressor)
599{
600 struct zswap_pool *pool, *put_pool = NULL;
601 char *s = strstrip((char *)val);
602 int ret = 0;
603 bool new_pool = false;
604
605 mutex_lock(&zswap_init_lock);
606 switch (zswap_init_state) {
607 case ZSWAP_UNINIT:
608 /* if this is load-time (pre-init) param setting,
609 * don't create a pool; that's done during init.
610 */
611 ret = param_set_charp(s, kp);
612 break;
613 case ZSWAP_INIT_SUCCEED:
614 new_pool = zswap_pool_changed(s, kp);
615 break;
616 case ZSWAP_INIT_FAILED:
617 pr_err("can't set param, initialization failed\n");
618 ret = -ENODEV;
619 }
620 mutex_unlock(&zswap_init_lock);
621
622 /* no need to create a new pool, return directly */
623 if (!new_pool)
624 return ret;
625
626 if (!type) {
627 if (!zpool_has_pool(s)) {
628 pr_err("zpool %s not available\n", s);
629 return -ENOENT;
630 }
631 type = s;
632 } else if (!compressor) {
633 if (!crypto_has_acomp(s, 0, 0)) {
634 pr_err("compressor %s not available\n", s);
635 return -ENOENT;
636 }
637 compressor = s;
638 } else {
639 WARN_ON(1);
640 return -EINVAL;
641 }
642
643 spin_lock(&zswap_pools_lock);
644
645 pool = zswap_pool_find_get(type, compressor);
646 if (pool) {
647 zswap_pool_debug("using existing", pool);
648 WARN_ON(pool == zswap_pool_current());
649 list_del_rcu(&pool->list);
650 }
651
652 spin_unlock(&zswap_pools_lock);
653
654 if (!pool)
655 pool = zswap_pool_create(type, compressor);
656
657 if (pool)
658 ret = param_set_charp(s, kp);
659 else
660 ret = -EINVAL;
661
662 spin_lock(&zswap_pools_lock);
663
664 if (!ret) {
665 put_pool = zswap_pool_current();
666 list_add_rcu(&pool->list, &zswap_pools);
667 zswap_has_pool = true;
668 } else if (pool) {
669 /* add the possibly pre-existing pool to the end of the pools
670 * list; if it's new (and empty) then it'll be removed and
671 * destroyed by the put after we drop the lock
672 */
673 list_add_tail_rcu(&pool->list, &zswap_pools);
674 put_pool = pool;
675 }
676
677 spin_unlock(&zswap_pools_lock);
678
679 if (!zswap_has_pool && !pool) {
680 /* if initial pool creation failed, and this pool creation also
681 * failed, maybe both compressor and zpool params were bad.
682 * Allow changing this param, so pool creation will succeed
683 * when the other param is changed. We already verified this
684 * param is ok in the zpool_has_pool() or crypto_has_acomp()
685 * checks above.
686 */
687 ret = param_set_charp(s, kp);
688 }
689
690 /* drop the ref from either the old current pool,
691 * or the new pool we failed to add
692 */
693 if (put_pool)
694 zswap_pool_put(put_pool);
695
696 return ret;
697}
698
699static int zswap_compressor_param_set(const char *val,
700 const struct kernel_param *kp)
701{
702 return __zswap_param_set(val, kp, zswap_zpool_type, NULL);
703}
704
705static int zswap_zpool_param_set(const char *val,
706 const struct kernel_param *kp)
707{
708 return __zswap_param_set(val, kp, NULL, zswap_compressor);
709}
710
711static int zswap_enabled_param_set(const char *val,
712 const struct kernel_param *kp)
713{
714 int ret = -ENODEV;
715
716 /* if this is load-time (pre-init) param setting, only set param. */
717 if (system_state != SYSTEM_RUNNING)
718 return param_set_bool(val, kp);
719
720 mutex_lock(&zswap_init_lock);
721 switch (zswap_init_state) {
722 case ZSWAP_UNINIT:
723 if (zswap_setup())
724 break;
725 fallthrough;
726 case ZSWAP_INIT_SUCCEED:
727 if (!zswap_has_pool)
728 pr_err("can't enable, no pool configured\n");
729 else
730 ret = param_set_bool(val, kp);
731 break;
732 case ZSWAP_INIT_FAILED:
733 pr_err("can't enable, initialization failed\n");
734 }
735 mutex_unlock(&zswap_init_lock);
736
737 return ret;
738}
739
506a86c5
JW
740/*********************************
741* lru functions
742**********************************/
743
a65b0e76
DC
744/* should be called under RCU */
745#ifdef CONFIG_MEMCG
746static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
747{
748 return entry->objcg ? obj_cgroup_memcg(entry->objcg) : NULL;
749}
750#else
751static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
752{
753 return NULL;
754}
755#endif
756
757static inline int entry_to_nid(struct zswap_entry *entry)
758{
759 return page_to_nid(virt_to_page(entry));
760}
761
a65b0e76
DC
762static void zswap_lru_add(struct list_lru *list_lru, struct zswap_entry *entry)
763{
b5ba474f
NP
764 atomic_long_t *nr_zswap_protected;
765 unsigned long lru_size, old, new;
a65b0e76
DC
766 int nid = entry_to_nid(entry);
767 struct mem_cgroup *memcg;
b5ba474f 768 struct lruvec *lruvec;
a65b0e76
DC
769
770 /*
771 * Note that it is safe to use rcu_read_lock() here, even in the face of
772 * concurrent memcg offlining. Thanks to the memcg->kmemcg_id indirection
773 * used in list_lru lookup, only two scenarios are possible:
774 *
775 * 1. list_lru_add() is called before memcg->kmemcg_id is updated. The
776 * new entry will be reparented to memcg's parent's list_lru.
777 * 2. list_lru_add() is called after memcg->kmemcg_id is updated. The
778 * new entry will be added directly to memcg's parent's list_lru.
779 *
3f798aa6 780 * Similar reasoning holds for list_lru_del().
a65b0e76
DC
781 */
782 rcu_read_lock();
783 memcg = mem_cgroup_from_entry(entry);
784 /* will always succeed */
785 list_lru_add(list_lru, &entry->lru, nid, memcg);
b5ba474f
NP
786
787 /* Update the protection area */
788 lru_size = list_lru_count_one(list_lru, nid, memcg);
789 lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
790 nr_zswap_protected = &lruvec->zswap_lruvec_state.nr_zswap_protected;
791 old = atomic_long_inc_return(nr_zswap_protected);
792 /*
793 * Decay to avoid overflow and adapt to changing workloads.
794 * This is based on LRU reclaim cost decaying heuristics.
795 */
796 do {
797 new = old > lru_size / 4 ? old / 2 : old;
798 } while (!atomic_long_try_cmpxchg(nr_zswap_protected, &old, new));
a65b0e76
DC
799 rcu_read_unlock();
800}
801
802static void zswap_lru_del(struct list_lru *list_lru, struct zswap_entry *entry)
803{
804 int nid = entry_to_nid(entry);
805 struct mem_cgroup *memcg;
806
807 rcu_read_lock();
808 memcg = mem_cgroup_from_entry(entry);
809 /* will always succeed */
810 list_lru_del(list_lru, &entry->lru, nid, memcg);
811 rcu_read_unlock();
812}
813
5182661a
JW
814void zswap_lruvec_state_init(struct lruvec *lruvec)
815{
816 atomic_long_set(&lruvec->zswap_lruvec_state.nr_zswap_protected, 0);
817}
818
819void zswap_folio_swapin(struct folio *folio)
820{
821 struct lruvec *lruvec;
822
823 if (folio) {
824 lruvec = folio_lruvec(folio);
825 atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
826 }
827}
828
829void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
830{
831 struct zswap_pool *pool;
832
833 /* lock out zswap pools list modification */
834 spin_lock(&zswap_pools_lock);
835 list_for_each_entry(pool, &zswap_pools, list) {
836 if (pool->next_shrink == memcg)
837 pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL);
838 }
839 spin_unlock(&zswap_pools_lock);
840}
841
2b281117
SJ
842/*********************************
843* rbtree functions
844**********************************/
845static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset)
846{
847 struct rb_node *node = root->rb_node;
848 struct zswap_entry *entry;
0bb48849 849 pgoff_t entry_offset;
2b281117
SJ
850
851 while (node) {
852 entry = rb_entry(node, struct zswap_entry, rbnode);
0bb48849
DC
853 entry_offset = swp_offset(entry->swpentry);
854 if (entry_offset > offset)
2b281117 855 node = node->rb_left;
0bb48849 856 else if (entry_offset < offset)
2b281117
SJ
857 node = node->rb_right;
858 else
859 return entry;
860 }
861 return NULL;
862}
863
864/*
865 * In the case that a entry with the same offset is found, a pointer to
866 * the existing entry is stored in dupentry and the function returns -EEXIST
867 */
868static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry,
869 struct zswap_entry **dupentry)
870{
871 struct rb_node **link = &root->rb_node, *parent = NULL;
872 struct zswap_entry *myentry;
0bb48849 873 pgoff_t myentry_offset, entry_offset = swp_offset(entry->swpentry);
2b281117
SJ
874
875 while (*link) {
876 parent = *link;
877 myentry = rb_entry(parent, struct zswap_entry, rbnode);
0bb48849
DC
878 myentry_offset = swp_offset(myentry->swpentry);
879 if (myentry_offset > entry_offset)
2b281117 880 link = &(*link)->rb_left;
0bb48849 881 else if (myentry_offset < entry_offset)
2b281117
SJ
882 link = &(*link)->rb_right;
883 else {
884 *dupentry = myentry;
885 return -EEXIST;
886 }
887 }
888 rb_link_node(&entry->rbnode, parent, link);
889 rb_insert_color(&entry->rbnode, root);
890 return 0;
891}
892
18a93707 893static bool zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
0ab0abcf
WY
894{
895 if (!RB_EMPTY_NODE(&entry->rbnode)) {
896 rb_erase(&entry->rbnode, root);
897 RB_CLEAR_NODE(&entry->rbnode);
18a93707 898 return true;
0ab0abcf 899 }
18a93707 900 return false;
0ab0abcf
WY
901}
902
36034bf6
JW
903/*********************************
904* zswap entry functions
905**********************************/
906static struct kmem_cache *zswap_entry_cache;
907
908static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
909{
910 struct zswap_entry *entry;
911 entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
912 if (!entry)
913 return NULL;
914 entry->refcount = 1;
915 RB_CLEAR_NODE(&entry->rbnode);
916 return entry;
917}
918
919static void zswap_entry_cache_free(struct zswap_entry *entry)
920{
921 kmem_cache_free(zswap_entry_cache, entry);
922}
923
b8cf32dc
YA
924static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
925{
926 int i = 0;
927
928 if (ZSWAP_NR_ZPOOLS > 1)
929 i = hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS));
930
931 return entry->pool->zpools[i];
932}
933
0ab0abcf 934/*
12d79d64 935 * Carries out the common pattern of freeing and entry's zpool allocation,
0ab0abcf
WY
936 * freeing the entry itself, and decrementing the number of stored pages.
937 */
42398be2 938static void zswap_entry_free(struct zswap_entry *entry)
0ab0abcf 939{
a85f878b
SD
940 if (!entry->length)
941 atomic_dec(&zswap_same_filled_pages);
942 else {
a65b0e76 943 zswap_lru_del(&entry->pool->list_lru, entry);
b8cf32dc 944 zpool_free(zswap_find_zpool(entry), entry->handle);
b5ba474f 945 atomic_dec(&entry->pool->nr_stored);
a85f878b
SD
946 zswap_pool_put(entry->pool);
947 }
2e601e1e
JW
948 if (entry->objcg) {
949 obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
950 obj_cgroup_put(entry->objcg);
951 }
0ab0abcf
WY
952 zswap_entry_cache_free(entry);
953 atomic_dec(&zswap_stored_pages);
f1c54846 954 zswap_update_total_size();
0ab0abcf
WY
955}
956
957/* caller must hold the tree lock */
958static void zswap_entry_get(struct zswap_entry *entry)
959{
e477559c 960 WARN_ON_ONCE(!entry->refcount);
0ab0abcf
WY
961 entry->refcount++;
962}
963
dab7711f 964/* caller must hold the tree lock */
db128f5f 965static void zswap_entry_put(struct zswap_entry *entry)
0ab0abcf 966{
dab7711f
JW
967 WARN_ON_ONCE(!entry->refcount);
968 if (--entry->refcount == 0) {
73108957 969 WARN_ON_ONCE(!RB_EMPTY_NODE(&entry->rbnode));
42398be2 970 zswap_entry_free(entry);
0ab0abcf
WY
971 }
972}
973
7dd1f7f0
JW
974/*
975 * If the entry is still valid in the tree, drop the initial ref and remove it
976 * from the tree. This function must be called with an additional ref held,
977 * otherwise it may race with another invalidation freeing the entry.
978 */
979static void zswap_invalidate_entry(struct zswap_tree *tree,
980 struct zswap_entry *entry)
981{
982 if (zswap_rb_erase(&tree->rbroot, entry))
983 zswap_entry_put(entry);
984}
985
f91e81d3
JW
986/*********************************
987* compressed storage functions
988**********************************/
64f200b8
JW
989static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
990{
991 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
992 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
993 struct crypto_acomp *acomp;
994 struct acomp_req *req;
995 int ret;
996
997 mutex_init(&acomp_ctx->mutex);
998
999 acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
1000 if (!acomp_ctx->buffer)
1001 return -ENOMEM;
1002
1003 acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
1004 if (IS_ERR(acomp)) {
1005 pr_err("could not alloc crypto acomp %s : %ld\n",
1006 pool->tfm_name, PTR_ERR(acomp));
1007 ret = PTR_ERR(acomp);
1008 goto acomp_fail;
1009 }
1010 acomp_ctx->acomp = acomp;
1011
1012 req = acomp_request_alloc(acomp_ctx->acomp);
1013 if (!req) {
1014 pr_err("could not alloc crypto acomp_request %s\n",
1015 pool->tfm_name);
1016 ret = -ENOMEM;
1017 goto req_fail;
1018 }
1019 acomp_ctx->req = req;
1020
1021 crypto_init_wait(&acomp_ctx->wait);
1022 /*
1023 * if the backend of acomp is async zip, crypto_req_done() will wakeup
1024 * crypto_wait_req(); if the backend of acomp is scomp, the callback
1025 * won't be called, crypto_wait_req() will return without blocking.
1026 */
1027 acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
1028 crypto_req_done, &acomp_ctx->wait);
1029
1030 return 0;
1031
1032req_fail:
1033 crypto_free_acomp(acomp_ctx->acomp);
1034acomp_fail:
1035 kfree(acomp_ctx->buffer);
1036 return ret;
1037}
1038
1039static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
1040{
1041 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
1042 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
1043
1044 if (!IS_ERR_OR_NULL(acomp_ctx)) {
1045 if (!IS_ERR_OR_NULL(acomp_ctx->req))
1046 acomp_request_free(acomp_ctx->req);
1047 if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
1048 crypto_free_acomp(acomp_ctx->acomp);
1049 kfree(acomp_ctx->buffer);
1050 }
1051
1052 return 0;
1053}
1054
f91e81d3
JW
1055static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
1056{
1057 struct crypto_acomp_ctx *acomp_ctx;
1058 struct scatterlist input, output;
1059 unsigned int dlen = PAGE_SIZE;
1060 unsigned long handle;
1061 struct zpool *zpool;
1062 char *buf;
1063 gfp_t gfp;
1064 int ret;
1065 u8 *dst;
1066
1067 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
1068
1069 mutex_lock(&acomp_ctx->mutex);
1070
1071 dst = acomp_ctx->buffer;
1072 sg_init_table(&input, 1);
1073 sg_set_page(&input, &folio->page, PAGE_SIZE, 0);
1074
1075 /*
1076 * We need PAGE_SIZE * 2 here since there maybe over-compression case,
1077 * and hardware-accelerators may won't check the dst buffer size, so
1078 * giving the dst buffer with enough length to avoid buffer overflow.
1079 */
1080 sg_init_one(&output, dst, PAGE_SIZE * 2);
1081 acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
1082
1083 /*
1084 * it maybe looks a little bit silly that we send an asynchronous request,
1085 * then wait for its completion synchronously. This makes the process look
1086 * synchronous in fact.
1087 * Theoretically, acomp supports users send multiple acomp requests in one
1088 * acomp instance, then get those requests done simultaneously. but in this
1089 * case, zswap actually does store and load page by page, there is no
1090 * existing method to send the second page before the first page is done
1091 * in one thread doing zwap.
1092 * but in different threads running on different cpu, we have different
1093 * acomp instance, so multiple threads can do (de)compression in parallel.
1094 */
1095 ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
1096 dlen = acomp_ctx->req->dlen;
1097 if (ret) {
1098 zswap_reject_compress_fail++;
1099 goto unlock;
1100 }
1101
1102 zpool = zswap_find_zpool(entry);
1103 gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
1104 if (zpool_malloc_support_movable(zpool))
1105 gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
1106 ret = zpool_malloc(zpool, dlen, gfp, &handle);
1107 if (ret == -ENOSPC) {
1108 zswap_reject_compress_poor++;
1109 goto unlock;
1110 }
1111 if (ret) {
1112 zswap_reject_alloc_fail++;
1113 goto unlock;
1114 }
1115
1116 buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO);
1117 memcpy(buf, dst, dlen);
1118 zpool_unmap_handle(zpool, handle);
1119
1120 entry->handle = handle;
1121 entry->length = dlen;
1122
1123unlock:
1124 mutex_unlock(&acomp_ctx->mutex);
1125 return ret == 0;
1126}
1127
1128static void zswap_decompress(struct zswap_entry *entry, struct page *page)
1129{
1130 struct zpool *zpool = zswap_find_zpool(entry);
1131 struct scatterlist input, output;
1132 struct crypto_acomp_ctx *acomp_ctx;
1133 u8 *src;
1134
1135 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
1136 mutex_lock(&acomp_ctx->mutex);
1137
1138 src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
1139 if (!zpool_can_sleep_mapped(zpool)) {
1140 memcpy(acomp_ctx->buffer, src, entry->length);
1141 src = acomp_ctx->buffer;
1142 zpool_unmap_handle(zpool, entry->handle);
1143 }
1144
1145 sg_init_one(&input, src, entry->length);
1146 sg_init_table(&output, 1);
1147 sg_set_page(&output, page, PAGE_SIZE, 0);
1148 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
1149 BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
1150 BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
1151 mutex_unlock(&acomp_ctx->mutex);
1152
1153 if (zpool_can_sleep_mapped(zpool))
1154 zpool_unmap_handle(zpool, entry->handle);
1155}
1156
9986d35d
JW
1157/*********************************
1158* writeback code
1159**********************************/
1160/*
1161 * Attempts to free an entry by adding a folio to the swap cache,
1162 * decompressing the entry data into the folio, and issuing a
1163 * bio write to write the folio back to the swap device.
1164 *
1165 * This can be thought of as a "resumed writeback" of the folio
1166 * to the swap device. We are basically resuming the same swap
1167 * writeback path that was intercepted with the zswap_store()
1168 * in the first place. After the folio has been decompressed into
1169 * the swap cache, the compressed version stored by zswap can be
1170 * freed.
1171 */
1172static int zswap_writeback_entry(struct zswap_entry *entry,
1173 swp_entry_t swpentry)
1174{
1175 struct zswap_tree *tree;
1176 struct folio *folio;
1177 struct mempolicy *mpol;
1178 bool folio_was_allocated;
1179 struct writeback_control wbc = {
1180 .sync_mode = WB_SYNC_NONE,
1181 };
1182
1183 /* try to allocate swap cache folio */
1184 mpol = get_task_policy(current);
1185 folio = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
1186 NO_INTERLEAVE_INDEX, &folio_was_allocated, true);
1187 if (!folio)
1188 return -ENOMEM;
1189
1190 /*
1191 * Found an existing folio, we raced with swapin or concurrent
1192 * shrinker. We generally writeback cold folios from zswap, and
1193 * swapin means the folio just became hot, so skip this folio.
1194 * For unlikely concurrent shrinker case, it will be unlinked
1195 * and freed when invalidated by the concurrent shrinker anyway.
1196 */
1197 if (!folio_was_allocated) {
1198 folio_put(folio);
1199 return -EEXIST;
1200 }
1201
1202 /*
1203 * folio is locked, and the swapcache is now secured against
f9c0f1c3
CZ
1204 * concurrent swapping to and from the slot, and concurrent
1205 * swapoff so we can safely dereference the zswap tree here.
1206 * Verify that the swap entry hasn't been invalidated and recycled
1207 * behind our backs, to avoid overwriting a new swap folio with
1208 * old compressed data. Only when this is successful can the entry
1209 * be dereferenced.
9986d35d
JW
1210 */
1211 tree = swap_zswap_tree(swpentry);
1212 spin_lock(&tree->lock);
1213 if (zswap_rb_search(&tree->rbroot, swp_offset(swpentry)) != entry) {
1214 spin_unlock(&tree->lock);
1215 delete_from_swap_cache(folio);
1216 folio_unlock(folio);
1217 folio_put(folio);
1218 return -ENOMEM;
1219 }
1220
1221 /* Safe to deref entry after the entry is verified above. */
1222 zswap_entry_get(entry);
1223 spin_unlock(&tree->lock);
1224
1225 zswap_decompress(entry, &folio->page);
1226
1227 count_vm_event(ZSWPWB);
1228 if (entry->objcg)
1229 count_objcg_event(entry->objcg, ZSWPWB);
1230
1231 spin_lock(&tree->lock);
1232 zswap_invalidate_entry(tree, entry);
1233 zswap_entry_put(entry);
1234 spin_unlock(&tree->lock);
1235
1236 /* folio is up to date */
1237 folio_mark_uptodate(folio);
1238
1239 /* move it to the tail of the inactive list after end_writeback */
1240 folio_set_reclaim(folio);
1241
1242 /* start writeback */
1243 __swap_writepage(folio, &wbc);
1244 folio_put(folio);
1245
1246 return 0;
1247}
1248
b5ba474f
NP
1249/*********************************
1250* shrinker functions
1251**********************************/
1252static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l,
eb23ee4f
JW
1253 spinlock_t *lock, void *arg)
1254{
1255 struct zswap_entry *entry = container_of(item, struct zswap_entry, lru);
1256 bool *encountered_page_in_swapcache = (bool *)arg;
1257 swp_entry_t swpentry;
1258 enum lru_status ret = LRU_REMOVED_RETRY;
1259 int writeback_result;
1260
1261 /*
f9c0f1c3
CZ
1262 * As soon as we drop the LRU lock, the entry can be freed by
1263 * a concurrent invalidation. This means the following:
eb23ee4f 1264 *
f9c0f1c3
CZ
1265 * 1. We extract the swp_entry_t to the stack, allowing
1266 * zswap_writeback_entry() to pin the swap entry and
1267 * then validate the zwap entry against that swap entry's
1268 * tree using pointer value comparison. Only when that
1269 * is successful can the entry be dereferenced.
eb23ee4f 1270 *
f9c0f1c3
CZ
1271 * 2. Usually, objects are taken off the LRU for reclaim. In
1272 * this case this isn't possible, because if reclaim fails
1273 * for whatever reason, we have no means of knowing if the
1274 * entry is alive to put it back on the LRU.
eb23ee4f 1275 *
f9c0f1c3
CZ
1276 * So rotate it before dropping the lock. If the entry is
1277 * written back or invalidated, the free path will unlink
1278 * it. For failures, rotation is the right thing as well.
1279 *
1280 * Temporary failures, where the same entry should be tried
1281 * again immediately, almost never happen for this shrinker.
1282 * We don't do any trylocking; -ENOMEM comes closest,
1283 * but that's extremely rare and doesn't happen spuriously
1284 * either. Don't bother distinguishing this case.
eb23ee4f
JW
1285 */
1286 list_move_tail(item, &l->list);
1287
1288 /*
1289 * Once the lru lock is dropped, the entry might get freed. The
1290 * swpentry is copied to the stack, and entry isn't deref'd again
1291 * until the entry is verified to still be alive in the tree.
1292 */
1293 swpentry = entry->swpentry;
1294
1295 /*
1296 * It's safe to drop the lock here because we return either
1297 * LRU_REMOVED_RETRY or LRU_RETRY.
1298 */
1299 spin_unlock(lock);
1300
1301 writeback_result = zswap_writeback_entry(entry, swpentry);
1302
1303 if (writeback_result) {
1304 zswap_reject_reclaim_fail++;
1305 ret = LRU_RETRY;
1306
1307 /*
1308 * Encountering a page already in swap cache is a sign that we are shrinking
1309 * into the warmer region. We should terminate shrinking (if we're in the dynamic
1310 * shrinker context).
1311 */
b49547ad
CZ
1312 if (writeback_result == -EEXIST && encountered_page_in_swapcache) {
1313 ret = LRU_STOP;
eb23ee4f 1314 *encountered_page_in_swapcache = true;
b49547ad 1315 }
eb23ee4f
JW
1316 } else {
1317 zswap_written_back_pages++;
1318 }
1319
1320 spin_lock(lock);
1321 return ret;
1322}
b5ba474f
NP
1323
1324static unsigned long zswap_shrinker_scan(struct shrinker *shrinker,
1325 struct shrink_control *sc)
1326{
1327 struct lruvec *lruvec = mem_cgroup_lruvec(sc->memcg, NODE_DATA(sc->nid));
1328 unsigned long shrink_ret, nr_protected, lru_size;
1329 struct zswap_pool *pool = shrinker->private_data;
1330 bool encountered_page_in_swapcache = false;
1331
501a06fe
NP
1332 if (!zswap_shrinker_enabled ||
1333 !mem_cgroup_zswap_writeback_enabled(sc->memcg)) {
b5ba474f
NP
1334 sc->nr_scanned = 0;
1335 return SHRINK_STOP;
1336 }
1337
1338 nr_protected =
1339 atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected);
1340 lru_size = list_lru_shrink_count(&pool->list_lru, sc);
1341
1342 /*
1343 * Abort if we are shrinking into the protected region.
1344 *
1345 * This short-circuiting is necessary because if we have too many multiple
1346 * concurrent reclaimers getting the freeable zswap object counts at the
1347 * same time (before any of them made reasonable progress), the total
1348 * number of reclaimed objects might be more than the number of unprotected
1349 * objects (i.e the reclaimers will reclaim into the protected area of the
1350 * zswap LRU).
1351 */
1352 if (nr_protected >= lru_size - sc->nr_to_scan) {
1353 sc->nr_scanned = 0;
1354 return SHRINK_STOP;
1355 }
1356
1357 shrink_ret = list_lru_shrink_walk(&pool->list_lru, sc, &shrink_memcg_cb,
1358 &encountered_page_in_swapcache);
1359
1360 if (encountered_page_in_swapcache)
1361 return SHRINK_STOP;
1362
1363 return shrink_ret ? shrink_ret : SHRINK_STOP;
1364}
1365
1366static unsigned long zswap_shrinker_count(struct shrinker *shrinker,
1367 struct shrink_control *sc)
1368{
1369 struct zswap_pool *pool = shrinker->private_data;
1370 struct mem_cgroup *memcg = sc->memcg;
1371 struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(sc->nid));
1372 unsigned long nr_backing, nr_stored, nr_freeable, nr_protected;
1373
501a06fe 1374 if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(memcg))
b5ba474f
NP
1375 return 0;
1376
1377#ifdef CONFIG_MEMCG_KMEM
7d7ef0a4 1378 mem_cgroup_flush_stats(memcg);
b5ba474f
NP
1379 nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT;
1380 nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
1381#else
1382 /* use pool stats instead of memcg stats */
1383 nr_backing = get_zswap_pool_size(pool) >> PAGE_SHIFT;
1384 nr_stored = atomic_read(&pool->nr_stored);
1385#endif
1386
1387 if (!nr_stored)
1388 return 0;
1389
1390 nr_protected =
1391 atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected);
1392 nr_freeable = list_lru_shrink_count(&pool->list_lru, sc);
1393 /*
1394 * Subtract the lru size by an estimate of the number of pages
1395 * that should be protected.
1396 */
1397 nr_freeable = nr_freeable > nr_protected ? nr_freeable - nr_protected : 0;
1398
1399 /*
1400 * Scale the number of freeable pages by the memory saving factor.
1401 * This ensures that the better zswap compresses memory, the fewer
1402 * pages we will evict to swap (as it will otherwise incur IO for
1403 * relatively small memory saving).
1404 */
1405 return mult_frac(nr_freeable, nr_backing, nr_stored);
1406}
1407
1408static void zswap_alloc_shrinker(struct zswap_pool *pool)
1409{
1410 pool->shrinker =
1411 shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, "mm-zswap");
1412 if (!pool->shrinker)
1413 return;
1414
1415 pool->shrinker->private_data = pool;
1416 pool->shrinker->scan_objects = zswap_shrinker_scan;
1417 pool->shrinker->count_objects = zswap_shrinker_count;
1418 pool->shrinker->batch = 0;
1419 pool->shrinker->seeks = DEFAULT_SEEKS;
1420}
1421
a65b0e76
DC
1422static int shrink_memcg(struct mem_cgroup *memcg)
1423{
1424 struct zswap_pool *pool;
1425 int nid, shrunk = 0;
1426
501a06fe
NP
1427 if (!mem_cgroup_zswap_writeback_enabled(memcg))
1428 return -EINVAL;
1429
a65b0e76
DC
1430 /*
1431 * Skip zombies because their LRUs are reparented and we would be
1432 * reclaiming from the parent instead of the dead memcg.
1433 */
1434 if (memcg && !mem_cgroup_online(memcg))
1435 return -ENOENT;
1436
1437 pool = zswap_pool_current_get();
1438 if (!pool)
1439 return -EINVAL;
1440
1441 for_each_node_state(nid, N_NORMAL_MEMORY) {
1442 unsigned long nr_to_walk = 1;
1443
1444 shrunk += list_lru_walk_one(&pool->list_lru, nid, memcg,
1445 &shrink_memcg_cb, NULL, &nr_to_walk);
1446 }
1447 zswap_pool_put(pool);
1448 return shrunk ? 0 : -EAGAIN;
f999f38b
DC
1449}
1450
45190f01
VW
1451static void shrink_worker(struct work_struct *w)
1452{
1453 struct zswap_pool *pool = container_of(w, typeof(*pool),
1454 shrink_work);
a65b0e76 1455 struct mem_cgroup *memcg;
e0228d59
DC
1456 int ret, failures = 0;
1457
a65b0e76 1458 /* global reclaim will select cgroup in a round-robin fashion. */
e0228d59 1459 do {
a65b0e76
DC
1460 spin_lock(&zswap_pools_lock);
1461 pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL);
1462 memcg = pool->next_shrink;
1463
1464 /*
1465 * We need to retry if we have gone through a full round trip, or if we
1466 * got an offline memcg (or else we risk undoing the effect of the
1467 * zswap memcg offlining cleanup callback). This is not catastrophic
1468 * per se, but it will keep the now offlined memcg hostage for a while.
1469 *
1470 * Note that if we got an online memcg, we will keep the extra
1471 * reference in case the original reference obtained by mem_cgroup_iter
1472 * is dropped by the zswap memcg offlining callback, ensuring that the
1473 * memcg is not killed when we are reclaiming.
1474 */
1475 if (!memcg) {
1476 spin_unlock(&zswap_pools_lock);
1477 if (++failures == MAX_RECLAIM_RETRIES)
e0228d59 1478 break;
a65b0e76
DC
1479
1480 goto resched;
1481 }
1482
1483 if (!mem_cgroup_tryget_online(memcg)) {
1484 /* drop the reference from mem_cgroup_iter() */
1485 mem_cgroup_iter_break(NULL, memcg);
1486 pool->next_shrink = NULL;
1487 spin_unlock(&zswap_pools_lock);
1488
e0228d59
DC
1489 if (++failures == MAX_RECLAIM_RETRIES)
1490 break;
a65b0e76
DC
1491
1492 goto resched;
e0228d59 1493 }
a65b0e76
DC
1494 spin_unlock(&zswap_pools_lock);
1495
1496 ret = shrink_memcg(memcg);
1497 /* drop the extra reference */
1498 mem_cgroup_put(memcg);
1499
1500 if (ret == -EINVAL)
1501 break;
1502 if (ret && ++failures == MAX_RECLAIM_RETRIES)
1503 break;
1504
1505resched:
e0228d59
DC
1506 cond_resched();
1507 } while (!zswap_can_accept());
45190f01
VW
1508 zswap_pool_put(pool);
1509}
1510
a85f878b
SD
1511static int zswap_is_page_same_filled(void *ptr, unsigned long *value)
1512{
a85f878b 1513 unsigned long *page;
62bf1258
TS
1514 unsigned long val;
1515 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
a85f878b
SD
1516
1517 page = (unsigned long *)ptr;
62bf1258
TS
1518 val = page[0];
1519
1520 if (val != page[last_pos])
1521 return 0;
1522
1523 for (pos = 1; pos < last_pos; pos++) {
1524 if (val != page[pos])
a85f878b
SD
1525 return 0;
1526 }
62bf1258
TS
1527
1528 *value = val;
1529
a85f878b
SD
1530 return 1;
1531}
1532
1533static void zswap_fill_page(void *ptr, unsigned long value)
1534{
1535 unsigned long *page;
1536
1537 page = (unsigned long *)ptr;
1538 memset_l(page, value, PAGE_SIZE / sizeof(unsigned long));
1539}
1540
34f4c198 1541bool zswap_store(struct folio *folio)
2b281117 1542{
3d2c9087 1543 swp_entry_t swp = folio->swap;
42c06a0e 1544 pgoff_t offset = swp_offset(swp);
44c7c734 1545 struct zswap_tree *tree = swap_zswap_tree(swp);
2b281117 1546 struct zswap_entry *entry, *dupentry;
f4840ccf 1547 struct obj_cgroup *objcg = NULL;
a65b0e76 1548 struct mem_cgroup *memcg = NULL;
be7fc97c 1549 struct zswap_pool *shrink_pool;
42c06a0e 1550
34f4c198
MWO
1551 VM_WARN_ON_ONCE(!folio_test_locked(folio));
1552 VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
2b281117 1553
34f4c198
MWO
1554 /* Large folios aren't supported */
1555 if (folio_test_large(folio))
42c06a0e 1556 return false;
7ba71669 1557
ca56489c
DC
1558 /*
1559 * If this is a duplicate, it must be removed before attempting to store
1560 * it, otherwise, if the store fails the old page won't be removed from
1561 * the tree, and it might be written back overriding the new data.
1562 */
1563 spin_lock(&tree->lock);
be7fc97c 1564 entry = zswap_rb_search(&tree->rbroot, offset);
3b631bd0 1565 if (entry)
be7fc97c 1566 zswap_invalidate_entry(tree, entry);
ca56489c 1567 spin_unlock(&tree->lock);
678e54d4
CZ
1568
1569 if (!zswap_enabled)
1570 return false;
1571
074e3e26 1572 objcg = get_obj_cgroup_from_folio(folio);
a65b0e76
DC
1573 if (objcg && !obj_cgroup_may_zswap(objcg)) {
1574 memcg = get_mem_cgroup_from_objcg(objcg);
1575 if (shrink_memcg(memcg)) {
1576 mem_cgroup_put(memcg);
1577 goto reject;
1578 }
1579 mem_cgroup_put(memcg);
1580 }
f4840ccf 1581
2b281117
SJ
1582 /* reclaim space if needed */
1583 if (zswap_is_full()) {
1584 zswap_pool_limit_hit++;
45190f01 1585 zswap_pool_reached_full = true;
f4840ccf 1586 goto shrink;
45190f01 1587 }
16e536ef 1588
45190f01 1589 if (zswap_pool_reached_full) {
42c06a0e 1590 if (!zswap_can_accept())
e0228d59 1591 goto shrink;
42c06a0e 1592 else
45190f01 1593 zswap_pool_reached_full = false;
2b281117
SJ
1594 }
1595
1596 /* allocate entry */
be7fc97c 1597 entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio));
2b281117
SJ
1598 if (!entry) {
1599 zswap_reject_kmemcache_fail++;
2b281117
SJ
1600 goto reject;
1601 }
1602
a85f878b 1603 if (zswap_same_filled_pages_enabled) {
be7fc97c
JW
1604 unsigned long value;
1605 u8 *src;
1606
1607 src = kmap_local_folio(folio, 0);
a85f878b 1608 if (zswap_is_page_same_filled(src, &value)) {
003ae2fb 1609 kunmap_local(src);
a85f878b
SD
1610 entry->length = 0;
1611 entry->value = value;
1612 atomic_inc(&zswap_same_filled_pages);
1613 goto insert_entry;
1614 }
003ae2fb 1615 kunmap_local(src);
a85f878b
SD
1616 }
1617
42c06a0e 1618 if (!zswap_non_same_filled_pages_enabled)
cb325ddd 1619 goto freepage;
cb325ddd 1620
f1c54846
DS
1621 /* if entry is successfully added, it keeps the reference */
1622 entry->pool = zswap_pool_current_get();
42c06a0e 1623 if (!entry->pool)
f1c54846 1624 goto freepage;
f1c54846 1625
a65b0e76
DC
1626 if (objcg) {
1627 memcg = get_mem_cgroup_from_objcg(objcg);
1628 if (memcg_list_lru_alloc(memcg, &entry->pool->list_lru, GFP_KERNEL)) {
1629 mem_cgroup_put(memcg);
1630 goto put_pool;
1631 }
1632 mem_cgroup_put(memcg);
1633 }
1634
fa9ad6e2
JW
1635 if (!zswap_compress(folio, entry))
1636 goto put_pool;
1ec3b5fe 1637
a85f878b 1638insert_entry:
be7fc97c 1639 entry->swpentry = swp;
f4840ccf
JW
1640 entry->objcg = objcg;
1641 if (objcg) {
1642 obj_cgroup_charge_zswap(objcg, entry->length);
1643 /* Account before objcg ref is moved to tree */
1644 count_objcg_event(objcg, ZSWPOUT);
1645 }
1646
2b281117
SJ
1647 /* map */
1648 spin_lock(&tree->lock);
ca56489c
DC
1649 /*
1650 * A duplicate entry should have been removed at the beginning of this
1651 * function. Since the swap entry should be pinned, if a duplicate is
1652 * found again here it means that something went wrong in the swap
1653 * cache.
1654 */
42c06a0e 1655 while (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) {
ca56489c 1656 WARN_ON(1);
56c67049 1657 zswap_invalidate_entry(tree, dupentry);
42c06a0e 1658 }
35499e2b 1659 if (entry->length) {
a65b0e76
DC
1660 INIT_LIST_HEAD(&entry->lru);
1661 zswap_lru_add(&entry->pool->list_lru, entry);
b5ba474f 1662 atomic_inc(&entry->pool->nr_stored);
f999f38b 1663 }
2b281117
SJ
1664 spin_unlock(&tree->lock);
1665
1666 /* update stats */
1667 atomic_inc(&zswap_stored_pages);
f1c54846 1668 zswap_update_total_size();
f6498b77 1669 count_vm_event(ZSWPOUT);
2b281117 1670
42c06a0e 1671 return true;
2b281117 1672
a65b0e76 1673put_pool:
f1c54846
DS
1674 zswap_pool_put(entry->pool);
1675freepage:
2b281117
SJ
1676 zswap_entry_cache_free(entry);
1677reject:
f4840ccf
JW
1678 if (objcg)
1679 obj_cgroup_put(objcg);
42c06a0e 1680 return false;
f4840ccf
JW
1681
1682shrink:
be7fc97c
JW
1683 shrink_pool = zswap_pool_last_get();
1684 if (shrink_pool && !queue_work(shrink_wq, &shrink_pool->shrink_work))
1685 zswap_pool_put(shrink_pool);
f4840ccf 1686 goto reject;
2b281117
SJ
1687}
1688
ca54f6d8 1689bool zswap_load(struct folio *folio)
2b281117 1690{
3d2c9087 1691 swp_entry_t swp = folio->swap;
42c06a0e 1692 pgoff_t offset = swp_offset(swp);
ca54f6d8 1693 struct page *page = &folio->page;
44c7c734 1694 struct zswap_tree *tree = swap_zswap_tree(swp);
2b281117 1695 struct zswap_entry *entry;
32acba4c 1696 u8 *dst;
42c06a0e 1697
ca54f6d8 1698 VM_WARN_ON_ONCE(!folio_test_locked(folio));
2b281117 1699
2b281117 1700 spin_lock(&tree->lock);
5b297f70 1701 entry = zswap_rb_search(&tree->rbroot, offset);
2b281117 1702 if (!entry) {
2b281117 1703 spin_unlock(&tree->lock);
42c06a0e 1704 return false;
2b281117 1705 }
5b297f70 1706 zswap_entry_get(entry);
2b281117
SJ
1707 spin_unlock(&tree->lock);
1708
66447fd0 1709 if (entry->length)
ff2972aa 1710 zswap_decompress(entry, page);
66447fd0 1711 else {
003ae2fb 1712 dst = kmap_local_page(page);
a85f878b 1713 zswap_fill_page(dst, entry->value);
003ae2fb 1714 kunmap_local(dst);
a85f878b
SD
1715 }
1716
f6498b77 1717 count_vm_event(ZSWPIN);
f4840ccf
JW
1718 if (entry->objcg)
1719 count_objcg_event(entry->objcg, ZSWPIN);
c75f5c1e 1720
2b281117 1721 spin_lock(&tree->lock);
c2e2ba77 1722 zswap_invalidate_entry(tree, entry);
db128f5f 1723 zswap_entry_put(entry);
2b281117
SJ
1724 spin_unlock(&tree->lock);
1725
c2e2ba77
CZ
1726 folio_mark_dirty(folio);
1727
66447fd0 1728 return true;
2b281117
SJ
1729}
1730
0827a1fb 1731void zswap_invalidate(swp_entry_t swp)
2b281117 1732{
0827a1fb
CZ
1733 pgoff_t offset = swp_offset(swp);
1734 struct zswap_tree *tree = swap_zswap_tree(swp);
2b281117 1735 struct zswap_entry *entry;
2b281117 1736
2b281117
SJ
1737 spin_lock(&tree->lock);
1738 entry = zswap_rb_search(&tree->rbroot, offset);
06ed2289
JW
1739 if (entry)
1740 zswap_invalidate_entry(tree, entry);
2b281117 1741 spin_unlock(&tree->lock);
2b281117
SJ
1742}
1743
44c7c734 1744int zswap_swapon(int type, unsigned long nr_pages)
42c06a0e 1745{
44c7c734
CZ
1746 struct zswap_tree *trees, *tree;
1747 unsigned int nr, i;
42c06a0e 1748
44c7c734
CZ
1749 nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
1750 trees = kvcalloc(nr, sizeof(*tree), GFP_KERNEL);
1751 if (!trees) {
42c06a0e 1752 pr_err("alloc failed, zswap disabled for swap type %d\n", type);
bb29fd77 1753 return -ENOMEM;
42c06a0e
JW
1754 }
1755
44c7c734
CZ
1756 for (i = 0; i < nr; i++) {
1757 tree = trees + i;
1758 tree->rbroot = RB_ROOT;
1759 spin_lock_init(&tree->lock);
1760 }
1761
1762 nr_zswap_trees[type] = nr;
1763 zswap_trees[type] = trees;
bb29fd77 1764 return 0;
42c06a0e
JW
1765}
1766
1767void zswap_swapoff(int type)
2b281117 1768{
44c7c734
CZ
1769 struct zswap_tree *trees = zswap_trees[type];
1770 unsigned int i;
2b281117 1771
44c7c734 1772 if (!trees)
2b281117
SJ
1773 return;
1774
83e68f25
YA
1775 /* try_to_unuse() invalidated all the entries already */
1776 for (i = 0; i < nr_zswap_trees[type]; i++)
1777 WARN_ON_ONCE(!RB_EMPTY_ROOT(&trees[i].rbroot));
44c7c734
CZ
1778
1779 kvfree(trees);
1780 nr_zswap_trees[type] = 0;
aa9bca05 1781 zswap_trees[type] = NULL;
2b281117
SJ
1782}
1783
2b281117
SJ
1784/*********************************
1785* debugfs functions
1786**********************************/
1787#ifdef CONFIG_DEBUG_FS
1788#include <linux/debugfs.h>
1789
1790static struct dentry *zswap_debugfs_root;
1791
141fdeec 1792static int zswap_debugfs_init(void)
2b281117
SJ
1793{
1794 if (!debugfs_initialized())
1795 return -ENODEV;
1796
1797 zswap_debugfs_root = debugfs_create_dir("zswap", NULL);
2b281117 1798
0825a6f9
JP
1799 debugfs_create_u64("pool_limit_hit", 0444,
1800 zswap_debugfs_root, &zswap_pool_limit_hit);
1801 debugfs_create_u64("reject_reclaim_fail", 0444,
1802 zswap_debugfs_root, &zswap_reject_reclaim_fail);
1803 debugfs_create_u64("reject_alloc_fail", 0444,
1804 zswap_debugfs_root, &zswap_reject_alloc_fail);
1805 debugfs_create_u64("reject_kmemcache_fail", 0444,
1806 zswap_debugfs_root, &zswap_reject_kmemcache_fail);
cb61dad8
NP
1807 debugfs_create_u64("reject_compress_fail", 0444,
1808 zswap_debugfs_root, &zswap_reject_compress_fail);
0825a6f9
JP
1809 debugfs_create_u64("reject_compress_poor", 0444,
1810 zswap_debugfs_root, &zswap_reject_compress_poor);
1811 debugfs_create_u64("written_back_pages", 0444,
1812 zswap_debugfs_root, &zswap_written_back_pages);
0825a6f9
JP
1813 debugfs_create_u64("pool_total_size", 0444,
1814 zswap_debugfs_root, &zswap_pool_total_size);
1815 debugfs_create_atomic_t("stored_pages", 0444,
1816 zswap_debugfs_root, &zswap_stored_pages);
a85f878b 1817 debugfs_create_atomic_t("same_filled_pages", 0444,
0825a6f9 1818 zswap_debugfs_root, &zswap_same_filled_pages);
2b281117
SJ
1819
1820 return 0;
1821}
2b281117 1822#else
141fdeec 1823static int zswap_debugfs_init(void)
2b281117
SJ
1824{
1825 return 0;
1826}
2b281117
SJ
1827#endif
1828
1829/*********************************
1830* module init and exit
1831**********************************/
141fdeec 1832static int zswap_setup(void)
2b281117 1833{
f1c54846 1834 struct zswap_pool *pool;
ad7ed770 1835 int ret;
60105e12 1836
b7919122
LS
1837 zswap_entry_cache = KMEM_CACHE(zswap_entry, 0);
1838 if (!zswap_entry_cache) {
2b281117 1839 pr_err("entry cache creation failed\n");
f1c54846 1840 goto cache_fail;
2b281117 1841 }
f1c54846 1842
cab7a7e5
SAS
1843 ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE,
1844 "mm/zswap_pool:prepare",
1845 zswap_cpu_comp_prepare,
1846 zswap_cpu_comp_dead);
1847 if (ret)
1848 goto hp_fail;
1849
f1c54846 1850 pool = __zswap_pool_create_fallback();
ae3d89a7
DS
1851 if (pool) {
1852 pr_info("loaded using pool %s/%s\n", pool->tfm_name,
b8cf32dc 1853 zpool_get_type(pool->zpools[0]));
ae3d89a7
DS
1854 list_add(&pool->list, &zswap_pools);
1855 zswap_has_pool = true;
1856 } else {
f1c54846 1857 pr_err("pool creation failed\n");
ae3d89a7 1858 zswap_enabled = false;
2b281117 1859 }
60105e12 1860
8409a385
RM
1861 shrink_wq = alloc_workqueue("zswap-shrink",
1862 WQ_UNBOUND|WQ_MEM_RECLAIM, 1);
45190f01
VW
1863 if (!shrink_wq)
1864 goto fallback_fail;
1865
2b281117
SJ
1866 if (zswap_debugfs_init())
1867 pr_warn("debugfs initialization failed\n");
9021ccec 1868 zswap_init_state = ZSWAP_INIT_SUCCEED;
2b281117 1869 return 0;
f1c54846 1870
45190f01 1871fallback_fail:
38aeb071
DC
1872 if (pool)
1873 zswap_pool_destroy(pool);
cab7a7e5 1874hp_fail:
b7919122 1875 kmem_cache_destroy(zswap_entry_cache);
f1c54846 1876cache_fail:
d7b028f5 1877 /* if built-in, we aren't unloaded on failure; don't allow use */
9021ccec 1878 zswap_init_state = ZSWAP_INIT_FAILED;
d7b028f5 1879 zswap_enabled = false;
2b281117
SJ
1880 return -ENOMEM;
1881}
141fdeec
LS
1882
1883static int __init zswap_init(void)
1884{
1885 if (!zswap_enabled)
1886 return 0;
1887 return zswap_setup();
1888}
2b281117 1889/* must be late so crypto has time to come up */
141fdeec 1890late_initcall(zswap_init);
2b281117 1891
68386da8 1892MODULE_AUTHOR("Seth Jennings <[email protected]>");
2b281117 1893MODULE_DESCRIPTION("Compressed cache for swap pages");
This page took 0.905913 seconds and 4 git commands to generate.