]> Git Repo - linux.git/blob - mm/zswap.c
mm/pagewalk: walk_pte_range() allow for pte_offset_map()
[linux.git] / mm / zswap.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * zswap.c - zswap driver file
4  *
5  * zswap is a backend for frontswap that takes pages that are in the process
6  * of being swapped out and attempts to compress and store them in a
7  * RAM-based memory pool.  This can result in a significant I/O reduction on
8  * the swap device and, in the case where decompressing from RAM is faster
9  * than reading from the swap device, can also improve workload performance.
10  *
11  * Copyright (C) 2012  Seth Jennings <[email protected]>
12 */
13
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
16 #include <linux/module.h>
17 #include <linux/cpu.h>
18 #include <linux/highmem.h>
19 #include <linux/slab.h>
20 #include <linux/spinlock.h>
21 #include <linux/types.h>
22 #include <linux/atomic.h>
23 #include <linux/frontswap.h>
24 #include <linux/rbtree.h>
25 #include <linux/swap.h>
26 #include <linux/crypto.h>
27 #include <linux/scatterlist.h>
28 #include <linux/mempool.h>
29 #include <linux/zpool.h>
30 #include <crypto/acompress.h>
31
32 #include <linux/mm_types.h>
33 #include <linux/page-flags.h>
34 #include <linux/swapops.h>
35 #include <linux/writeback.h>
36 #include <linux/pagemap.h>
37 #include <linux/workqueue.h>
38
39 #include "swap.h"
40 #include "internal.h"
41
42 /*********************************
43 * statistics
44 **********************************/
45 /* Total bytes used by the compressed storage */
46 u64 zswap_pool_total_size;
47 /* The number of compressed pages currently stored in zswap */
48 atomic_t zswap_stored_pages = ATOMIC_INIT(0);
49 /* The number of same-value filled pages currently stored in zswap */
50 static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0);
51
52 /*
53  * The statistics below are not protected from concurrent access for
54  * performance reasons so they may not be a 100% accurate.  However,
55  * they do provide useful information on roughly how many times a
56  * certain event is occurring.
57 */
58
59 /* Pool limit was hit (see zswap_max_pool_percent) */
60 static u64 zswap_pool_limit_hit;
61 /* Pages written back when pool limit was reached */
62 static u64 zswap_written_back_pages;
63 /* Store failed due to a reclaim failure after pool limit was reached */
64 static u64 zswap_reject_reclaim_fail;
65 /* Compressed page was too big for the allocator to (optimally) store */
66 static u64 zswap_reject_compress_poor;
67 /* Store failed because underlying allocator could not get memory */
68 static u64 zswap_reject_alloc_fail;
69 /* Store failed because the entry metadata could not be allocated (rare) */
70 static u64 zswap_reject_kmemcache_fail;
71 /* Duplicate store was encountered (rare) */
72 static u64 zswap_duplicate_entry;
73
74 /* Shrinker work queue */
75 static struct workqueue_struct *shrink_wq;
76 /* Pool limit was hit, we need to calm down */
77 static bool zswap_pool_reached_full;
78
79 /*********************************
80 * tunables
81 **********************************/
82
83 #define ZSWAP_PARAM_UNSET ""
84
85 static int zswap_setup(void);
86
87 /* Enable/disable zswap */
88 static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
89 static int zswap_enabled_param_set(const char *,
90                                    const struct kernel_param *);
91 static const struct kernel_param_ops zswap_enabled_param_ops = {
92         .set =          zswap_enabled_param_set,
93         .get =          param_get_bool,
94 };
95 module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644);
96
97 /* Crypto compressor to use */
98 static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
99 static int zswap_compressor_param_set(const char *,
100                                       const struct kernel_param *);
101 static const struct kernel_param_ops zswap_compressor_param_ops = {
102         .set =          zswap_compressor_param_set,
103         .get =          param_get_charp,
104         .free =         param_free_charp,
105 };
106 module_param_cb(compressor, &zswap_compressor_param_ops,
107                 &zswap_compressor, 0644);
108
109 /* Compressed storage zpool to use */
110 static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
111 static int zswap_zpool_param_set(const char *, const struct kernel_param *);
112 static const struct kernel_param_ops zswap_zpool_param_ops = {
113         .set =          zswap_zpool_param_set,
114         .get =          param_get_charp,
115         .free =         param_free_charp,
116 };
117 module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644);
118
119 /* The maximum percentage of memory that the compressed pool can occupy */
120 static unsigned int zswap_max_pool_percent = 20;
121 module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644);
122
123 /* The threshold for accepting new pages after the max_pool_percent was hit */
124 static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
125 module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
126                    uint, 0644);
127
128 /*
129  * Enable/disable handling same-value filled pages (enabled by default).
130  * If disabled every page is considered non-same-value filled.
131  */
132 static bool zswap_same_filled_pages_enabled = true;
133 module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled,
134                    bool, 0644);
135
136 /* Enable/disable handling non-same-value filled pages (enabled by default) */
137 static bool zswap_non_same_filled_pages_enabled = true;
138 module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled,
139                    bool, 0644);
140
141 static bool zswap_exclusive_loads_enabled = IS_ENABLED(
142                 CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON);
143 module_param_named(exclusive_loads, zswap_exclusive_loads_enabled, bool, 0644);
144
145 /*********************************
146 * data structures
147 **********************************/
148
149 struct crypto_acomp_ctx {
150         struct crypto_acomp *acomp;
151         struct acomp_req *req;
152         struct crypto_wait wait;
153         u8 *dstmem;
154         struct mutex *mutex;
155 };
156
157 struct zswap_pool {
158         struct zpool *zpool;
159         struct crypto_acomp_ctx __percpu *acomp_ctx;
160         struct kref kref;
161         struct list_head list;
162         struct work_struct release_work;
163         struct work_struct shrink_work;
164         struct hlist_node node;
165         char tfm_name[CRYPTO_MAX_ALG_NAME];
166 };
167
168 /*
169  * struct zswap_entry
170  *
171  * This structure contains the metadata for tracking a single compressed
172  * page within zswap.
173  *
174  * rbnode - links the entry into red-black tree for the appropriate swap type
175  * offset - the swap offset for the entry.  Index into the red-black tree.
176  * refcount - the number of outstanding reference to the entry. This is needed
177  *            to protect against premature freeing of the entry by code
178  *            concurrent calls to load, invalidate, and writeback.  The lock
179  *            for the zswap_tree structure that contains the entry must
180  *            be held while changing the refcount.  Since the lock must
181  *            be held, there is no reason to also make refcount atomic.
182  * length - the length in bytes of the compressed page data.  Needed during
183  *          decompression. For a same value filled page length is 0.
184  * pool - the zswap_pool the entry's data is in
185  * handle - zpool allocation handle that stores the compressed page data
186  * value - value of the same-value filled pages which have same content
187  */
188 struct zswap_entry {
189         struct rb_node rbnode;
190         pgoff_t offset;
191         int refcount;
192         unsigned int length;
193         struct zswap_pool *pool;
194         union {
195                 unsigned long handle;
196                 unsigned long value;
197         };
198         struct obj_cgroup *objcg;
199 };
200
201 struct zswap_header {
202         swp_entry_t swpentry;
203 };
204
205 /*
206  * The tree lock in the zswap_tree struct protects a few things:
207  * - the rbtree
208  * - the refcount field of each entry in the tree
209  */
210 struct zswap_tree {
211         struct rb_root rbroot;
212         spinlock_t lock;
213 };
214
215 static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
216
217 /* RCU-protected iteration */
218 static LIST_HEAD(zswap_pools);
219 /* protects zswap_pools list modification */
220 static DEFINE_SPINLOCK(zswap_pools_lock);
221 /* pool counter to provide unique names to zpool */
222 static atomic_t zswap_pools_count = ATOMIC_INIT(0);
223
224 enum zswap_init_type {
225         ZSWAP_UNINIT,
226         ZSWAP_INIT_SUCCEED,
227         ZSWAP_INIT_FAILED
228 };
229
230 static enum zswap_init_type zswap_init_state;
231
232 /* used to ensure the integrity of initialization */
233 static DEFINE_MUTEX(zswap_init_lock);
234
235 /* init completed, but couldn't create the initial pool */
236 static bool zswap_has_pool;
237
238 /*********************************
239 * helpers and fwd declarations
240 **********************************/
241
242 #define zswap_pool_debug(msg, p)                                \
243         pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name,         \
244                  zpool_get_type((p)->zpool))
245
246 static int zswap_writeback_entry(struct zpool *pool, unsigned long handle);
247 static int zswap_pool_get(struct zswap_pool *pool);
248 static void zswap_pool_put(struct zswap_pool *pool);
249
250 static const struct zpool_ops zswap_zpool_ops = {
251         .evict = zswap_writeback_entry
252 };
253
254 static bool zswap_is_full(void)
255 {
256         return totalram_pages() * zswap_max_pool_percent / 100 <
257                         DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
258 }
259
260 static bool zswap_can_accept(void)
261 {
262         return totalram_pages() * zswap_accept_thr_percent / 100 *
263                                 zswap_max_pool_percent / 100 >
264                         DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
265 }
266
267 static void zswap_update_total_size(void)
268 {
269         struct zswap_pool *pool;
270         u64 total = 0;
271
272         rcu_read_lock();
273
274         list_for_each_entry_rcu(pool, &zswap_pools, list)
275                 total += zpool_get_total_size(pool->zpool);
276
277         rcu_read_unlock();
278
279         zswap_pool_total_size = total;
280 }
281
282 /*********************************
283 * zswap entry functions
284 **********************************/
285 static struct kmem_cache *zswap_entry_cache;
286
287 static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp)
288 {
289         struct zswap_entry *entry;
290         entry = kmem_cache_alloc(zswap_entry_cache, gfp);
291         if (!entry)
292                 return NULL;
293         entry->refcount = 1;
294         RB_CLEAR_NODE(&entry->rbnode);
295         return entry;
296 }
297
298 static void zswap_entry_cache_free(struct zswap_entry *entry)
299 {
300         kmem_cache_free(zswap_entry_cache, entry);
301 }
302
303 /*********************************
304 * rbtree functions
305 **********************************/
306 static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset)
307 {
308         struct rb_node *node = root->rb_node;
309         struct zswap_entry *entry;
310
311         while (node) {
312                 entry = rb_entry(node, struct zswap_entry, rbnode);
313                 if (entry->offset > offset)
314                         node = node->rb_left;
315                 else if (entry->offset < offset)
316                         node = node->rb_right;
317                 else
318                         return entry;
319         }
320         return NULL;
321 }
322
323 /*
324  * In the case that a entry with the same offset is found, a pointer to
325  * the existing entry is stored in dupentry and the function returns -EEXIST
326  */
327 static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry,
328                         struct zswap_entry **dupentry)
329 {
330         struct rb_node **link = &root->rb_node, *parent = NULL;
331         struct zswap_entry *myentry;
332
333         while (*link) {
334                 parent = *link;
335                 myentry = rb_entry(parent, struct zswap_entry, rbnode);
336                 if (myentry->offset > entry->offset)
337                         link = &(*link)->rb_left;
338                 else if (myentry->offset < entry->offset)
339                         link = &(*link)->rb_right;
340                 else {
341                         *dupentry = myentry;
342                         return -EEXIST;
343                 }
344         }
345         rb_link_node(&entry->rbnode, parent, link);
346         rb_insert_color(&entry->rbnode, root);
347         return 0;
348 }
349
350 static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
351 {
352         if (!RB_EMPTY_NODE(&entry->rbnode)) {
353                 rb_erase(&entry->rbnode, root);
354                 RB_CLEAR_NODE(&entry->rbnode);
355         }
356 }
357
358 /*
359  * Carries out the common pattern of freeing and entry's zpool allocation,
360  * freeing the entry itself, and decrementing the number of stored pages.
361  */
362 static void zswap_free_entry(struct zswap_entry *entry)
363 {
364         if (entry->objcg) {
365                 obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
366                 obj_cgroup_put(entry->objcg);
367         }
368         if (!entry->length)
369                 atomic_dec(&zswap_same_filled_pages);
370         else {
371                 zpool_free(entry->pool->zpool, entry->handle);
372                 zswap_pool_put(entry->pool);
373         }
374         zswap_entry_cache_free(entry);
375         atomic_dec(&zswap_stored_pages);
376         zswap_update_total_size();
377 }
378
379 /* caller must hold the tree lock */
380 static void zswap_entry_get(struct zswap_entry *entry)
381 {
382         entry->refcount++;
383 }
384
385 /* caller must hold the tree lock
386 * remove from the tree and free it, if nobody reference the entry
387 */
388 static void zswap_entry_put(struct zswap_tree *tree,
389                         struct zswap_entry *entry)
390 {
391         int refcount = --entry->refcount;
392
393         BUG_ON(refcount < 0);
394         if (refcount == 0) {
395                 zswap_rb_erase(&tree->rbroot, entry);
396                 zswap_free_entry(entry);
397         }
398 }
399
400 /* caller must hold the tree lock */
401 static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
402                                 pgoff_t offset)
403 {
404         struct zswap_entry *entry;
405
406         entry = zswap_rb_search(root, offset);
407         if (entry)
408                 zswap_entry_get(entry);
409
410         return entry;
411 }
412
413 /*********************************
414 * per-cpu code
415 **********************************/
416 static DEFINE_PER_CPU(u8 *, zswap_dstmem);
417 /*
418  * If users dynamically change the zpool type and compressor at runtime, i.e.
419  * zswap is running, zswap can have more than one zpool on one cpu, but they
420  * are sharing dtsmem. So we need this mutex to be per-cpu.
421  */
422 static DEFINE_PER_CPU(struct mutex *, zswap_mutex);
423
424 static int zswap_dstmem_prepare(unsigned int cpu)
425 {
426         struct mutex *mutex;
427         u8 *dst;
428
429         dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
430         if (!dst)
431                 return -ENOMEM;
432
433         mutex = kmalloc_node(sizeof(*mutex), GFP_KERNEL, cpu_to_node(cpu));
434         if (!mutex) {
435                 kfree(dst);
436                 return -ENOMEM;
437         }
438
439         mutex_init(mutex);
440         per_cpu(zswap_dstmem, cpu) = dst;
441         per_cpu(zswap_mutex, cpu) = mutex;
442         return 0;
443 }
444
445 static int zswap_dstmem_dead(unsigned int cpu)
446 {
447         struct mutex *mutex;
448         u8 *dst;
449
450         mutex = per_cpu(zswap_mutex, cpu);
451         kfree(mutex);
452         per_cpu(zswap_mutex, cpu) = NULL;
453
454         dst = per_cpu(zswap_dstmem, cpu);
455         kfree(dst);
456         per_cpu(zswap_dstmem, cpu) = NULL;
457
458         return 0;
459 }
460
461 static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
462 {
463         struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
464         struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
465         struct crypto_acomp *acomp;
466         struct acomp_req *req;
467
468         acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
469         if (IS_ERR(acomp)) {
470                 pr_err("could not alloc crypto acomp %s : %ld\n",
471                                 pool->tfm_name, PTR_ERR(acomp));
472                 return PTR_ERR(acomp);
473         }
474         acomp_ctx->acomp = acomp;
475
476         req = acomp_request_alloc(acomp_ctx->acomp);
477         if (!req) {
478                 pr_err("could not alloc crypto acomp_request %s\n",
479                        pool->tfm_name);
480                 crypto_free_acomp(acomp_ctx->acomp);
481                 return -ENOMEM;
482         }
483         acomp_ctx->req = req;
484
485         crypto_init_wait(&acomp_ctx->wait);
486         /*
487          * if the backend of acomp is async zip, crypto_req_done() will wakeup
488          * crypto_wait_req(); if the backend of acomp is scomp, the callback
489          * won't be called, crypto_wait_req() will return without blocking.
490          */
491         acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
492                                    crypto_req_done, &acomp_ctx->wait);
493
494         acomp_ctx->mutex = per_cpu(zswap_mutex, cpu);
495         acomp_ctx->dstmem = per_cpu(zswap_dstmem, cpu);
496
497         return 0;
498 }
499
500 static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
501 {
502         struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
503         struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
504
505         if (!IS_ERR_OR_NULL(acomp_ctx)) {
506                 if (!IS_ERR_OR_NULL(acomp_ctx->req))
507                         acomp_request_free(acomp_ctx->req);
508                 if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
509                         crypto_free_acomp(acomp_ctx->acomp);
510         }
511
512         return 0;
513 }
514
515 /*********************************
516 * pool functions
517 **********************************/
518
519 static struct zswap_pool *__zswap_pool_current(void)
520 {
521         struct zswap_pool *pool;
522
523         pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
524         WARN_ONCE(!pool && zswap_has_pool,
525                   "%s: no page storage pool!\n", __func__);
526
527         return pool;
528 }
529
530 static struct zswap_pool *zswap_pool_current(void)
531 {
532         assert_spin_locked(&zswap_pools_lock);
533
534         return __zswap_pool_current();
535 }
536
537 static struct zswap_pool *zswap_pool_current_get(void)
538 {
539         struct zswap_pool *pool;
540
541         rcu_read_lock();
542
543         pool = __zswap_pool_current();
544         if (!zswap_pool_get(pool))
545                 pool = NULL;
546
547         rcu_read_unlock();
548
549         return pool;
550 }
551
552 static struct zswap_pool *zswap_pool_last_get(void)
553 {
554         struct zswap_pool *pool, *last = NULL;
555
556         rcu_read_lock();
557
558         list_for_each_entry_rcu(pool, &zswap_pools, list)
559                 last = pool;
560         WARN_ONCE(!last && zswap_has_pool,
561                   "%s: no page storage pool!\n", __func__);
562         if (!zswap_pool_get(last))
563                 last = NULL;
564
565         rcu_read_unlock();
566
567         return last;
568 }
569
570 /* type and compressor must be null-terminated */
571 static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
572 {
573         struct zswap_pool *pool;
574
575         assert_spin_locked(&zswap_pools_lock);
576
577         list_for_each_entry_rcu(pool, &zswap_pools, list) {
578                 if (strcmp(pool->tfm_name, compressor))
579                         continue;
580                 if (strcmp(zpool_get_type(pool->zpool), type))
581                         continue;
582                 /* if we can't get it, it's about to be destroyed */
583                 if (!zswap_pool_get(pool))
584                         continue;
585                 return pool;
586         }
587
588         return NULL;
589 }
590
591 static void shrink_worker(struct work_struct *w)
592 {
593         struct zswap_pool *pool = container_of(w, typeof(*pool),
594                                                 shrink_work);
595         int ret, failures = 0;
596
597         do {
598                 ret = zpool_shrink(pool->zpool, 1, NULL);
599                 if (ret) {
600                         zswap_reject_reclaim_fail++;
601                         if (ret != -EAGAIN)
602                                 break;
603                         if (++failures == MAX_RECLAIM_RETRIES)
604                                 break;
605                 }
606                 cond_resched();
607         } while (!zswap_can_accept());
608         zswap_pool_put(pool);
609 }
610
611 static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
612 {
613         struct zswap_pool *pool;
614         char name[38]; /* 'zswap' + 32 char (max) num + \0 */
615         gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
616         int ret;
617
618         if (!zswap_has_pool) {
619                 /* if either are unset, pool initialization failed, and we
620                  * need both params to be set correctly before trying to
621                  * create a pool.
622                  */
623                 if (!strcmp(type, ZSWAP_PARAM_UNSET))
624                         return NULL;
625                 if (!strcmp(compressor, ZSWAP_PARAM_UNSET))
626                         return NULL;
627         }
628
629         pool = kzalloc(sizeof(*pool), GFP_KERNEL);
630         if (!pool)
631                 return NULL;
632
633         /* unique name for each pool specifically required by zsmalloc */
634         snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count));
635
636         pool->zpool = zpool_create_pool(type, name, gfp, &zswap_zpool_ops);
637         if (!pool->zpool) {
638                 pr_err("%s zpool not available\n", type);
639                 goto error;
640         }
641         pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
642
643         strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
644
645         pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx);
646         if (!pool->acomp_ctx) {
647                 pr_err("percpu alloc failed\n");
648                 goto error;
649         }
650
651         ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
652                                        &pool->node);
653         if (ret)
654                 goto error;
655         pr_debug("using %s compressor\n", pool->tfm_name);
656
657         /* being the current pool takes 1 ref; this func expects the
658          * caller to always add the new pool as the current pool
659          */
660         kref_init(&pool->kref);
661         INIT_LIST_HEAD(&pool->list);
662         INIT_WORK(&pool->shrink_work, shrink_worker);
663
664         zswap_pool_debug("created", pool);
665
666         return pool;
667
668 error:
669         if (pool->acomp_ctx)
670                 free_percpu(pool->acomp_ctx);
671         if (pool->zpool)
672                 zpool_destroy_pool(pool->zpool);
673         kfree(pool);
674         return NULL;
675 }
676
677 static struct zswap_pool *__zswap_pool_create_fallback(void)
678 {
679         bool has_comp, has_zpool;
680
681         has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
682         if (!has_comp && strcmp(zswap_compressor,
683                                 CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
684                 pr_err("compressor %s not available, using default %s\n",
685                        zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
686                 param_free_charp(&zswap_compressor);
687                 zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
688                 has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
689         }
690         if (!has_comp) {
691                 pr_err("default compressor %s not available\n",
692                        zswap_compressor);
693                 param_free_charp(&zswap_compressor);
694                 zswap_compressor = ZSWAP_PARAM_UNSET;
695         }
696
697         has_zpool = zpool_has_pool(zswap_zpool_type);
698         if (!has_zpool && strcmp(zswap_zpool_type,
699                                  CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
700                 pr_err("zpool %s not available, using default %s\n",
701                        zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
702                 param_free_charp(&zswap_zpool_type);
703                 zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
704                 has_zpool = zpool_has_pool(zswap_zpool_type);
705         }
706         if (!has_zpool) {
707                 pr_err("default zpool %s not available\n",
708                        zswap_zpool_type);
709                 param_free_charp(&zswap_zpool_type);
710                 zswap_zpool_type = ZSWAP_PARAM_UNSET;
711         }
712
713         if (!has_comp || !has_zpool)
714                 return NULL;
715
716         return zswap_pool_create(zswap_zpool_type, zswap_compressor);
717 }
718
719 static void zswap_pool_destroy(struct zswap_pool *pool)
720 {
721         zswap_pool_debug("destroying", pool);
722
723         cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
724         free_percpu(pool->acomp_ctx);
725         zpool_destroy_pool(pool->zpool);
726         kfree(pool);
727 }
728
729 static int __must_check zswap_pool_get(struct zswap_pool *pool)
730 {
731         if (!pool)
732                 return 0;
733
734         return kref_get_unless_zero(&pool->kref);
735 }
736
737 static void __zswap_pool_release(struct work_struct *work)
738 {
739         struct zswap_pool *pool = container_of(work, typeof(*pool),
740                                                 release_work);
741
742         synchronize_rcu();
743
744         /* nobody should have been able to get a kref... */
745         WARN_ON(kref_get_unless_zero(&pool->kref));
746
747         /* pool is now off zswap_pools list and has no references. */
748         zswap_pool_destroy(pool);
749 }
750
751 static void __zswap_pool_empty(struct kref *kref)
752 {
753         struct zswap_pool *pool;
754
755         pool = container_of(kref, typeof(*pool), kref);
756
757         spin_lock(&zswap_pools_lock);
758
759         WARN_ON(pool == zswap_pool_current());
760
761         list_del_rcu(&pool->list);
762
763         INIT_WORK(&pool->release_work, __zswap_pool_release);
764         schedule_work(&pool->release_work);
765
766         spin_unlock(&zswap_pools_lock);
767 }
768
769 static void zswap_pool_put(struct zswap_pool *pool)
770 {
771         kref_put(&pool->kref, __zswap_pool_empty);
772 }
773
774 /*********************************
775 * param callbacks
776 **********************************/
777
778 static bool zswap_pool_changed(const char *s, const struct kernel_param *kp)
779 {
780         /* no change required */
781         if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool)
782                 return false;
783         return true;
784 }
785
786 /* val must be a null-terminated string */
787 static int __zswap_param_set(const char *val, const struct kernel_param *kp,
788                              char *type, char *compressor)
789 {
790         struct zswap_pool *pool, *put_pool = NULL;
791         char *s = strstrip((char *)val);
792         int ret = 0;
793         bool new_pool = false;
794
795         mutex_lock(&zswap_init_lock);
796         switch (zswap_init_state) {
797         case ZSWAP_UNINIT:
798                 /* if this is load-time (pre-init) param setting,
799                  * don't create a pool; that's done during init.
800                  */
801                 ret = param_set_charp(s, kp);
802                 break;
803         case ZSWAP_INIT_SUCCEED:
804                 new_pool = zswap_pool_changed(s, kp);
805                 break;
806         case ZSWAP_INIT_FAILED:
807                 pr_err("can't set param, initialization failed\n");
808                 ret = -ENODEV;
809         }
810         mutex_unlock(&zswap_init_lock);
811
812         /* no need to create a new pool, return directly */
813         if (!new_pool)
814                 return ret;
815
816         if (!type) {
817                 if (!zpool_has_pool(s)) {
818                         pr_err("zpool %s not available\n", s);
819                         return -ENOENT;
820                 }
821                 type = s;
822         } else if (!compressor) {
823                 if (!crypto_has_acomp(s, 0, 0)) {
824                         pr_err("compressor %s not available\n", s);
825                         return -ENOENT;
826                 }
827                 compressor = s;
828         } else {
829                 WARN_ON(1);
830                 return -EINVAL;
831         }
832
833         spin_lock(&zswap_pools_lock);
834
835         pool = zswap_pool_find_get(type, compressor);
836         if (pool) {
837                 zswap_pool_debug("using existing", pool);
838                 WARN_ON(pool == zswap_pool_current());
839                 list_del_rcu(&pool->list);
840         }
841
842         spin_unlock(&zswap_pools_lock);
843
844         if (!pool)
845                 pool = zswap_pool_create(type, compressor);
846
847         if (pool)
848                 ret = param_set_charp(s, kp);
849         else
850                 ret = -EINVAL;
851
852         spin_lock(&zswap_pools_lock);
853
854         if (!ret) {
855                 put_pool = zswap_pool_current();
856                 list_add_rcu(&pool->list, &zswap_pools);
857                 zswap_has_pool = true;
858         } else if (pool) {
859                 /* add the possibly pre-existing pool to the end of the pools
860                  * list; if it's new (and empty) then it'll be removed and
861                  * destroyed by the put after we drop the lock
862                  */
863                 list_add_tail_rcu(&pool->list, &zswap_pools);
864                 put_pool = pool;
865         }
866
867         spin_unlock(&zswap_pools_lock);
868
869         if (!zswap_has_pool && !pool) {
870                 /* if initial pool creation failed, and this pool creation also
871                  * failed, maybe both compressor and zpool params were bad.
872                  * Allow changing this param, so pool creation will succeed
873                  * when the other param is changed. We already verified this
874                  * param is ok in the zpool_has_pool() or crypto_has_acomp()
875                  * checks above.
876                  */
877                 ret = param_set_charp(s, kp);
878         }
879
880         /* drop the ref from either the old current pool,
881          * or the new pool we failed to add
882          */
883         if (put_pool)
884                 zswap_pool_put(put_pool);
885
886         return ret;
887 }
888
889 static int zswap_compressor_param_set(const char *val,
890                                       const struct kernel_param *kp)
891 {
892         return __zswap_param_set(val, kp, zswap_zpool_type, NULL);
893 }
894
895 static int zswap_zpool_param_set(const char *val,
896                                  const struct kernel_param *kp)
897 {
898         return __zswap_param_set(val, kp, NULL, zswap_compressor);
899 }
900
901 static int zswap_enabled_param_set(const char *val,
902                                    const struct kernel_param *kp)
903 {
904         int ret = -ENODEV;
905
906         /* if this is load-time (pre-init) param setting, only set param. */
907         if (system_state != SYSTEM_RUNNING)
908                 return param_set_bool(val, kp);
909
910         mutex_lock(&zswap_init_lock);
911         switch (zswap_init_state) {
912         case ZSWAP_UNINIT:
913                 if (zswap_setup())
914                         break;
915                 fallthrough;
916         case ZSWAP_INIT_SUCCEED:
917                 if (!zswap_has_pool)
918                         pr_err("can't enable, no pool configured\n");
919                 else
920                         ret = param_set_bool(val, kp);
921                 break;
922         case ZSWAP_INIT_FAILED:
923                 pr_err("can't enable, initialization failed\n");
924         }
925         mutex_unlock(&zswap_init_lock);
926
927         return ret;
928 }
929
930 /*********************************
931 * writeback code
932 **********************************/
933 /* return enum for zswap_get_swap_cache_page */
934 enum zswap_get_swap_ret {
935         ZSWAP_SWAPCACHE_NEW,
936         ZSWAP_SWAPCACHE_EXIST,
937         ZSWAP_SWAPCACHE_FAIL,
938 };
939
940 /*
941  * zswap_get_swap_cache_page
942  *
943  * This is an adaption of read_swap_cache_async()
944  *
945  * This function tries to find a page with the given swap entry
946  * in the swapper_space address space (the swap cache).  If the page
947  * is found, it is returned in retpage.  Otherwise, a page is allocated,
948  * added to the swap cache, and returned in retpage.
949  *
950  * If success, the swap cache page is returned in retpage
951  * Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache
952  * Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated,
953  *     the new page is added to swapcache and locked
954  * Returns ZSWAP_SWAPCACHE_FAIL on error
955  */
956 static int zswap_get_swap_cache_page(swp_entry_t entry,
957                                 struct page **retpage)
958 {
959         bool page_was_allocated;
960
961         *retpage = __read_swap_cache_async(entry, GFP_KERNEL,
962                         NULL, 0, &page_was_allocated);
963         if (page_was_allocated)
964                 return ZSWAP_SWAPCACHE_NEW;
965         if (!*retpage)
966                 return ZSWAP_SWAPCACHE_FAIL;
967         return ZSWAP_SWAPCACHE_EXIST;
968 }
969
970 /*
971  * Attempts to free an entry by adding a page to the swap cache,
972  * decompressing the entry data into the page, and issuing a
973  * bio write to write the page back to the swap device.
974  *
975  * This can be thought of as a "resumed writeback" of the page
976  * to the swap device.  We are basically resuming the same swap
977  * writeback path that was intercepted with the frontswap_store()
978  * in the first place.  After the page has been decompressed into
979  * the swap cache, the compressed version stored by zswap can be
980  * freed.
981  */
982 static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
983 {
984         struct zswap_header *zhdr;
985         swp_entry_t swpentry;
986         struct zswap_tree *tree;
987         pgoff_t offset;
988         struct zswap_entry *entry;
989         struct page *page;
990         struct scatterlist input, output;
991         struct crypto_acomp_ctx *acomp_ctx;
992
993         u8 *src, *tmp = NULL;
994         unsigned int dlen;
995         int ret;
996         struct writeback_control wbc = {
997                 .sync_mode = WB_SYNC_NONE,
998         };
999
1000         if (!zpool_can_sleep_mapped(pool)) {
1001                 tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
1002                 if (!tmp)
1003                         return -ENOMEM;
1004         }
1005
1006         /* extract swpentry from data */
1007         zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
1008         swpentry = zhdr->swpentry; /* here */
1009         tree = zswap_trees[swp_type(swpentry)];
1010         offset = swp_offset(swpentry);
1011         zpool_unmap_handle(pool, handle);
1012
1013         /* find and ref zswap entry */
1014         spin_lock(&tree->lock);
1015         entry = zswap_entry_find_get(&tree->rbroot, offset);
1016         if (!entry) {
1017                 /* entry was invalidated */
1018                 spin_unlock(&tree->lock);
1019                 kfree(tmp);
1020                 return 0;
1021         }
1022         spin_unlock(&tree->lock);
1023         BUG_ON(offset != entry->offset);
1024
1025         /* try to allocate swap cache page */
1026         switch (zswap_get_swap_cache_page(swpentry, &page)) {
1027         case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */
1028                 ret = -ENOMEM;
1029                 goto fail;
1030
1031         case ZSWAP_SWAPCACHE_EXIST:
1032                 /* page is already in the swap cache, ignore for now */
1033                 put_page(page);
1034                 ret = -EEXIST;
1035                 goto fail;
1036
1037         case ZSWAP_SWAPCACHE_NEW: /* page is locked */
1038                 /*
1039                  * Having a local reference to the zswap entry doesn't exclude
1040                  * swapping from invalidating and recycling the swap slot. Once
1041                  * the swapcache is secured against concurrent swapping to and
1042                  * from the slot, recheck that the entry is still current before
1043                  * writing.
1044                  */
1045                 spin_lock(&tree->lock);
1046                 if (zswap_rb_search(&tree->rbroot, entry->offset) != entry) {
1047                         spin_unlock(&tree->lock);
1048                         delete_from_swap_cache(page_folio(page));
1049                         ret = -ENOMEM;
1050                         goto fail;
1051                 }
1052                 spin_unlock(&tree->lock);
1053
1054                 /* decompress */
1055                 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
1056                 dlen = PAGE_SIZE;
1057
1058                 zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
1059                 src = (u8 *)zhdr + sizeof(struct zswap_header);
1060                 if (!zpool_can_sleep_mapped(pool)) {
1061                         memcpy(tmp, src, entry->length);
1062                         src = tmp;
1063                         zpool_unmap_handle(pool, handle);
1064                 }
1065
1066                 mutex_lock(acomp_ctx->mutex);
1067                 sg_init_one(&input, src, entry->length);
1068                 sg_init_table(&output, 1);
1069                 sg_set_page(&output, page, PAGE_SIZE, 0);
1070                 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen);
1071                 ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
1072                 dlen = acomp_ctx->req->dlen;
1073                 mutex_unlock(acomp_ctx->mutex);
1074
1075                 if (!zpool_can_sleep_mapped(pool))
1076                         kfree(tmp);
1077                 else
1078                         zpool_unmap_handle(pool, handle);
1079
1080                 BUG_ON(ret);
1081                 BUG_ON(dlen != PAGE_SIZE);
1082
1083                 /* page is up to date */
1084                 SetPageUptodate(page);
1085         }
1086
1087         /* move it to the tail of the inactive list after end_writeback */
1088         SetPageReclaim(page);
1089
1090         /* start writeback */
1091         __swap_writepage(page, &wbc);
1092         put_page(page);
1093         zswap_written_back_pages++;
1094
1095         spin_lock(&tree->lock);
1096         /* drop local reference */
1097         zswap_entry_put(tree, entry);
1098
1099         /*
1100         * There are two possible situations for entry here:
1101         * (1) refcount is 1(normal case),  entry is valid and on the tree
1102         * (2) refcount is 0, entry is freed and not on the tree
1103         *     because invalidate happened during writeback
1104         *  search the tree and free the entry if find entry
1105         */
1106         if (entry == zswap_rb_search(&tree->rbroot, offset))
1107                 zswap_entry_put(tree, entry);
1108         spin_unlock(&tree->lock);
1109
1110         return ret;
1111
1112 fail:
1113         if (!zpool_can_sleep_mapped(pool))
1114                 kfree(tmp);
1115
1116         /*
1117         * if we get here due to ZSWAP_SWAPCACHE_EXIST
1118         * a load may be happening concurrently.
1119         * it is safe and okay to not free the entry.
1120         * if we free the entry in the following put
1121         * it is also okay to return !0
1122         */
1123         spin_lock(&tree->lock);
1124         zswap_entry_put(tree, entry);
1125         spin_unlock(&tree->lock);
1126
1127         return ret;
1128 }
1129
1130 static int zswap_is_page_same_filled(void *ptr, unsigned long *value)
1131 {
1132         unsigned long *page;
1133         unsigned long val;
1134         unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
1135
1136         page = (unsigned long *)ptr;
1137         val = page[0];
1138
1139         if (val != page[last_pos])
1140                 return 0;
1141
1142         for (pos = 1; pos < last_pos; pos++) {
1143                 if (val != page[pos])
1144                         return 0;
1145         }
1146
1147         *value = val;
1148
1149         return 1;
1150 }
1151
1152 static void zswap_fill_page(void *ptr, unsigned long value)
1153 {
1154         unsigned long *page;
1155
1156         page = (unsigned long *)ptr;
1157         memset_l(page, value, PAGE_SIZE / sizeof(unsigned long));
1158 }
1159
1160 /*********************************
1161 * frontswap hooks
1162 **********************************/
1163 /* attempts to compress and store an single page */
1164 static int zswap_frontswap_store(unsigned type, pgoff_t offset,
1165                                 struct page *page)
1166 {
1167         struct zswap_tree *tree = zswap_trees[type];
1168         struct zswap_entry *entry, *dupentry;
1169         struct scatterlist input, output;
1170         struct crypto_acomp_ctx *acomp_ctx;
1171         struct obj_cgroup *objcg = NULL;
1172         struct zswap_pool *pool;
1173         int ret;
1174         unsigned int hlen, dlen = PAGE_SIZE;
1175         unsigned long handle, value;
1176         char *buf;
1177         u8 *src, *dst;
1178         struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
1179         gfp_t gfp;
1180
1181         /* THP isn't supported */
1182         if (PageTransHuge(page)) {
1183                 ret = -EINVAL;
1184                 goto reject;
1185         }
1186
1187         if (!zswap_enabled || !tree) {
1188                 ret = -ENODEV;
1189                 goto reject;
1190         }
1191
1192         objcg = get_obj_cgroup_from_page(page);
1193         if (objcg && !obj_cgroup_may_zswap(objcg))
1194                 goto shrink;
1195
1196         /* reclaim space if needed */
1197         if (zswap_is_full()) {
1198                 zswap_pool_limit_hit++;
1199                 zswap_pool_reached_full = true;
1200                 goto shrink;
1201         }
1202
1203         if (zswap_pool_reached_full) {
1204                if (!zswap_can_accept()) {
1205                         ret = -ENOMEM;
1206                         goto shrink;
1207                 } else
1208                         zswap_pool_reached_full = false;
1209         }
1210
1211         /* allocate entry */
1212         entry = zswap_entry_cache_alloc(GFP_KERNEL);
1213         if (!entry) {
1214                 zswap_reject_kmemcache_fail++;
1215                 ret = -ENOMEM;
1216                 goto reject;
1217         }
1218
1219         if (zswap_same_filled_pages_enabled) {
1220                 src = kmap_atomic(page);
1221                 if (zswap_is_page_same_filled(src, &value)) {
1222                         kunmap_atomic(src);
1223                         entry->offset = offset;
1224                         entry->length = 0;
1225                         entry->value = value;
1226                         atomic_inc(&zswap_same_filled_pages);
1227                         goto insert_entry;
1228                 }
1229                 kunmap_atomic(src);
1230         }
1231
1232         if (!zswap_non_same_filled_pages_enabled) {
1233                 ret = -EINVAL;
1234                 goto freepage;
1235         }
1236
1237         /* if entry is successfully added, it keeps the reference */
1238         entry->pool = zswap_pool_current_get();
1239         if (!entry->pool) {
1240                 ret = -EINVAL;
1241                 goto freepage;
1242         }
1243
1244         /* compress */
1245         acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
1246
1247         mutex_lock(acomp_ctx->mutex);
1248
1249         dst = acomp_ctx->dstmem;
1250         sg_init_table(&input, 1);
1251         sg_set_page(&input, page, PAGE_SIZE, 0);
1252
1253         /* zswap_dstmem is of size (PAGE_SIZE * 2). Reflect same in sg_list */
1254         sg_init_one(&output, dst, PAGE_SIZE * 2);
1255         acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
1256         /*
1257          * it maybe looks a little bit silly that we send an asynchronous request,
1258          * then wait for its completion synchronously. This makes the process look
1259          * synchronous in fact.
1260          * Theoretically, acomp supports users send multiple acomp requests in one
1261          * acomp instance, then get those requests done simultaneously. but in this
1262          * case, frontswap actually does store and load page by page, there is no
1263          * existing method to send the second page before the first page is done
1264          * in one thread doing frontswap.
1265          * but in different threads running on different cpu, we have different
1266          * acomp instance, so multiple threads can do (de)compression in parallel.
1267          */
1268         ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
1269         dlen = acomp_ctx->req->dlen;
1270
1271         if (ret) {
1272                 ret = -EINVAL;
1273                 goto put_dstmem;
1274         }
1275
1276         /* store */
1277         hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0;
1278         gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
1279         if (zpool_malloc_support_movable(entry->pool->zpool))
1280                 gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
1281         ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle);
1282         if (ret == -ENOSPC) {
1283                 zswap_reject_compress_poor++;
1284                 goto put_dstmem;
1285         }
1286         if (ret) {
1287                 zswap_reject_alloc_fail++;
1288                 goto put_dstmem;
1289         }
1290         buf = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_WO);
1291         memcpy(buf, &zhdr, hlen);
1292         memcpy(buf + hlen, dst, dlen);
1293         zpool_unmap_handle(entry->pool->zpool, handle);
1294         mutex_unlock(acomp_ctx->mutex);
1295
1296         /* populate entry */
1297         entry->offset = offset;
1298         entry->handle = handle;
1299         entry->length = dlen;
1300
1301 insert_entry:
1302         entry->objcg = objcg;
1303         if (objcg) {
1304                 obj_cgroup_charge_zswap(objcg, entry->length);
1305                 /* Account before objcg ref is moved to tree */
1306                 count_objcg_event(objcg, ZSWPOUT);
1307         }
1308
1309         /* map */
1310         spin_lock(&tree->lock);
1311         do {
1312                 ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry);
1313                 if (ret == -EEXIST) {
1314                         zswap_duplicate_entry++;
1315                         /* remove from rbtree */
1316                         zswap_rb_erase(&tree->rbroot, dupentry);
1317                         zswap_entry_put(tree, dupentry);
1318                 }
1319         } while (ret == -EEXIST);
1320         spin_unlock(&tree->lock);
1321
1322         /* update stats */
1323         atomic_inc(&zswap_stored_pages);
1324         zswap_update_total_size();
1325         count_vm_event(ZSWPOUT);
1326
1327         return 0;
1328
1329 put_dstmem:
1330         mutex_unlock(acomp_ctx->mutex);
1331         zswap_pool_put(entry->pool);
1332 freepage:
1333         zswap_entry_cache_free(entry);
1334 reject:
1335         if (objcg)
1336                 obj_cgroup_put(objcg);
1337         return ret;
1338
1339 shrink:
1340         pool = zswap_pool_last_get();
1341         if (pool)
1342                 queue_work(shrink_wq, &pool->shrink_work);
1343         ret = -ENOMEM;
1344         goto reject;
1345 }
1346
1347 static void zswap_invalidate_entry(struct zswap_tree *tree,
1348                                    struct zswap_entry *entry)
1349 {
1350         /* remove from rbtree */
1351         zswap_rb_erase(&tree->rbroot, entry);
1352
1353         /* drop the initial reference from entry creation */
1354         zswap_entry_put(tree, entry);
1355 }
1356
1357 /*
1358  * returns 0 if the page was successfully decompressed
1359  * return -1 on entry not found or error
1360 */
1361 static int zswap_frontswap_load(unsigned type, pgoff_t offset,
1362                                 struct page *page, bool *exclusive)
1363 {
1364         struct zswap_tree *tree = zswap_trees[type];
1365         struct zswap_entry *entry;
1366         struct scatterlist input, output;
1367         struct crypto_acomp_ctx *acomp_ctx;
1368         u8 *src, *dst, *tmp;
1369         unsigned int dlen;
1370         int ret;
1371
1372         /* find */
1373         spin_lock(&tree->lock);
1374         entry = zswap_entry_find_get(&tree->rbroot, offset);
1375         if (!entry) {
1376                 /* entry was written back */
1377                 spin_unlock(&tree->lock);
1378                 return -1;
1379         }
1380         spin_unlock(&tree->lock);
1381
1382         if (!entry->length) {
1383                 dst = kmap_atomic(page);
1384                 zswap_fill_page(dst, entry->value);
1385                 kunmap_atomic(dst);
1386                 ret = 0;
1387                 goto stats;
1388         }
1389
1390         if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
1391                 tmp = kmalloc(entry->length, GFP_KERNEL);
1392                 if (!tmp) {
1393                         ret = -ENOMEM;
1394                         goto freeentry;
1395                 }
1396         }
1397
1398         /* decompress */
1399         dlen = PAGE_SIZE;
1400         src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO);
1401         if (zpool_evictable(entry->pool->zpool))
1402                 src += sizeof(struct zswap_header);
1403
1404         if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
1405                 memcpy(tmp, src, entry->length);
1406                 src = tmp;
1407                 zpool_unmap_handle(entry->pool->zpool, entry->handle);
1408         }
1409
1410         acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
1411         mutex_lock(acomp_ctx->mutex);
1412         sg_init_one(&input, src, entry->length);
1413         sg_init_table(&output, 1);
1414         sg_set_page(&output, page, PAGE_SIZE, 0);
1415         acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen);
1416         ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
1417         mutex_unlock(acomp_ctx->mutex);
1418
1419         if (zpool_can_sleep_mapped(entry->pool->zpool))
1420                 zpool_unmap_handle(entry->pool->zpool, entry->handle);
1421         else
1422                 kfree(tmp);
1423
1424         BUG_ON(ret);
1425 stats:
1426         count_vm_event(ZSWPIN);
1427         if (entry->objcg)
1428                 count_objcg_event(entry->objcg, ZSWPIN);
1429 freeentry:
1430         spin_lock(&tree->lock);
1431         zswap_entry_put(tree, entry);
1432         if (!ret && zswap_exclusive_loads_enabled) {
1433                 zswap_invalidate_entry(tree, entry);
1434                 *exclusive = true;
1435         }
1436         spin_unlock(&tree->lock);
1437
1438         return ret;
1439 }
1440
1441 /* frees an entry in zswap */
1442 static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset)
1443 {
1444         struct zswap_tree *tree = zswap_trees[type];
1445         struct zswap_entry *entry;
1446
1447         /* find */
1448         spin_lock(&tree->lock);
1449         entry = zswap_rb_search(&tree->rbroot, offset);
1450         if (!entry) {
1451                 /* entry was written back */
1452                 spin_unlock(&tree->lock);
1453                 return;
1454         }
1455         zswap_invalidate_entry(tree, entry);
1456         spin_unlock(&tree->lock);
1457 }
1458
1459 /* frees all zswap entries for the given swap type */
1460 static void zswap_frontswap_invalidate_area(unsigned type)
1461 {
1462         struct zswap_tree *tree = zswap_trees[type];
1463         struct zswap_entry *entry, *n;
1464
1465         if (!tree)
1466                 return;
1467
1468         /* walk the tree and free everything */
1469         spin_lock(&tree->lock);
1470         rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)
1471                 zswap_free_entry(entry);
1472         tree->rbroot = RB_ROOT;
1473         spin_unlock(&tree->lock);
1474         kfree(tree);
1475         zswap_trees[type] = NULL;
1476 }
1477
1478 static void zswap_frontswap_init(unsigned type)
1479 {
1480         struct zswap_tree *tree;
1481
1482         tree = kzalloc(sizeof(*tree), GFP_KERNEL);
1483         if (!tree) {
1484                 pr_err("alloc failed, zswap disabled for swap type %d\n", type);
1485                 return;
1486         }
1487
1488         tree->rbroot = RB_ROOT;
1489         spin_lock_init(&tree->lock);
1490         zswap_trees[type] = tree;
1491 }
1492
1493 static const struct frontswap_ops zswap_frontswap_ops = {
1494         .store = zswap_frontswap_store,
1495         .load = zswap_frontswap_load,
1496         .invalidate_page = zswap_frontswap_invalidate_page,
1497         .invalidate_area = zswap_frontswap_invalidate_area,
1498         .init = zswap_frontswap_init
1499 };
1500
1501 /*********************************
1502 * debugfs functions
1503 **********************************/
1504 #ifdef CONFIG_DEBUG_FS
1505 #include <linux/debugfs.h>
1506
1507 static struct dentry *zswap_debugfs_root;
1508
1509 static int zswap_debugfs_init(void)
1510 {
1511         if (!debugfs_initialized())
1512                 return -ENODEV;
1513
1514         zswap_debugfs_root = debugfs_create_dir("zswap", NULL);
1515
1516         debugfs_create_u64("pool_limit_hit", 0444,
1517                            zswap_debugfs_root, &zswap_pool_limit_hit);
1518         debugfs_create_u64("reject_reclaim_fail", 0444,
1519                            zswap_debugfs_root, &zswap_reject_reclaim_fail);
1520         debugfs_create_u64("reject_alloc_fail", 0444,
1521                            zswap_debugfs_root, &zswap_reject_alloc_fail);
1522         debugfs_create_u64("reject_kmemcache_fail", 0444,
1523                            zswap_debugfs_root, &zswap_reject_kmemcache_fail);
1524         debugfs_create_u64("reject_compress_poor", 0444,
1525                            zswap_debugfs_root, &zswap_reject_compress_poor);
1526         debugfs_create_u64("written_back_pages", 0444,
1527                            zswap_debugfs_root, &zswap_written_back_pages);
1528         debugfs_create_u64("duplicate_entry", 0444,
1529                            zswap_debugfs_root, &zswap_duplicate_entry);
1530         debugfs_create_u64("pool_total_size", 0444,
1531                            zswap_debugfs_root, &zswap_pool_total_size);
1532         debugfs_create_atomic_t("stored_pages", 0444,
1533                                 zswap_debugfs_root, &zswap_stored_pages);
1534         debugfs_create_atomic_t("same_filled_pages", 0444,
1535                                 zswap_debugfs_root, &zswap_same_filled_pages);
1536
1537         return 0;
1538 }
1539 #else
1540 static int zswap_debugfs_init(void)
1541 {
1542         return 0;
1543 }
1544 #endif
1545
1546 /*********************************
1547 * module init and exit
1548 **********************************/
1549 static int zswap_setup(void)
1550 {
1551         struct zswap_pool *pool;
1552         int ret;
1553
1554         zswap_entry_cache = KMEM_CACHE(zswap_entry, 0);
1555         if (!zswap_entry_cache) {
1556                 pr_err("entry cache creation failed\n");
1557                 goto cache_fail;
1558         }
1559
1560         ret = cpuhp_setup_state(CPUHP_MM_ZSWP_MEM_PREPARE, "mm/zswap:prepare",
1561                                 zswap_dstmem_prepare, zswap_dstmem_dead);
1562         if (ret) {
1563                 pr_err("dstmem alloc failed\n");
1564                 goto dstmem_fail;
1565         }
1566
1567         ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE,
1568                                       "mm/zswap_pool:prepare",
1569                                       zswap_cpu_comp_prepare,
1570                                       zswap_cpu_comp_dead);
1571         if (ret)
1572                 goto hp_fail;
1573
1574         pool = __zswap_pool_create_fallback();
1575         if (pool) {
1576                 pr_info("loaded using pool %s/%s\n", pool->tfm_name,
1577                         zpool_get_type(pool->zpool));
1578                 list_add(&pool->list, &zswap_pools);
1579                 zswap_has_pool = true;
1580         } else {
1581                 pr_err("pool creation failed\n");
1582                 zswap_enabled = false;
1583         }
1584
1585         shrink_wq = create_workqueue("zswap-shrink");
1586         if (!shrink_wq)
1587                 goto fallback_fail;
1588
1589         ret = frontswap_register_ops(&zswap_frontswap_ops);
1590         if (ret)
1591                 goto destroy_wq;
1592         if (zswap_debugfs_init())
1593                 pr_warn("debugfs initialization failed\n");
1594         zswap_init_state = ZSWAP_INIT_SUCCEED;
1595         return 0;
1596
1597 destroy_wq:
1598         destroy_workqueue(shrink_wq);
1599 fallback_fail:
1600         if (pool)
1601                 zswap_pool_destroy(pool);
1602 hp_fail:
1603         cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE);
1604 dstmem_fail:
1605         kmem_cache_destroy(zswap_entry_cache);
1606 cache_fail:
1607         /* if built-in, we aren't unloaded on failure; don't allow use */
1608         zswap_init_state = ZSWAP_INIT_FAILED;
1609         zswap_enabled = false;
1610         return -ENOMEM;
1611 }
1612
1613 static int __init zswap_init(void)
1614 {
1615         if (!zswap_enabled)
1616                 return 0;
1617         return zswap_setup();
1618 }
1619 /* must be late so crypto has time to come up */
1620 late_initcall(zswap_init);
1621
1622 MODULE_AUTHOR("Seth Jennings <[email protected]>");
1623 MODULE_DESCRIPTION("Compressed cache for swap pages");
This page took 0.144212 seconds and 4 git commands to generate.