]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
67afa38e TC |
2 | /* |
3 | * Manage cache of swap slots to be used for and returned from | |
4 | * swap. | |
5 | * | |
6 | * Copyright(c) 2016 Intel Corporation. | |
7 | * | |
8 | * Author: Tim Chen <[email protected]> | |
9 | * | |
10 | * We allocate the swap slots from the global pool and put | |
11 | * it into local per cpu caches. This has the advantage | |
12 | * of no needing to acquire the swap_info lock every time | |
13 | * we need a new slot. | |
14 | * | |
15 | * There is also opportunity to simply return the slot | |
16 | * to local caches without needing to acquire swap_info | |
17 | * lock. We do not reuse the returned slots directly but | |
18 | * move them back to the global pool in a batch. This | |
f0953a1b | 19 | * allows the slots to coalesce and reduce fragmentation. |
67afa38e TC |
20 | * |
21 | * The swap entry allocated is marked with SWAP_HAS_CACHE | |
22 | * flag in map_count that prevents it from being allocated | |
23 | * again from the global pool. | |
24 | * | |
25 | * The swap slots cache is protected by a mutex instead of | |
26 | * a spin lock as when we search for slots with scan_swap_map, | |
27 | * we can possibly sleep. | |
28 | */ | |
29 | ||
30 | #include <linux/swap_slots.h> | |
31 | #include <linux/cpu.h> | |
32 | #include <linux/cpumask.h> | |
8581fd40 | 33 | #include <linux/slab.h> |
67afa38e TC |
34 | #include <linux/vmalloc.h> |
35 | #include <linux/mutex.h> | |
54f180d3 | 36 | #include <linux/mm.h> |
67afa38e | 37 | |
67afa38e TC |
38 | static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots); |
39 | static bool swap_slot_cache_active; | |
ba81f838 | 40 | bool swap_slot_cache_enabled; |
67afa38e | 41 | static bool swap_slot_cache_initialized; |
31f21da1 | 42 | static DEFINE_MUTEX(swap_slots_cache_mutex); |
67afa38e | 43 | /* Serialize swap slots cache enable/disable operations */ |
31f21da1 | 44 | static DEFINE_MUTEX(swap_slots_cache_enable_mutex); |
67afa38e TC |
45 | |
46 | static void __drain_swap_slots_cache(unsigned int type); | |
67afa38e | 47 | |
e0f3ebba | 48 | #define use_swap_slot_cache (swap_slot_cache_active && swap_slot_cache_enabled) |
67afa38e TC |
49 | #define SLOTS_CACHE 0x1 |
50 | #define SLOTS_CACHE_RET 0x2 | |
51 | ||
52 | static void deactivate_swap_slots_cache(void) | |
53 | { | |
54 | mutex_lock(&swap_slots_cache_mutex); | |
55 | swap_slot_cache_active = false; | |
56 | __drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET); | |
57 | mutex_unlock(&swap_slots_cache_mutex); | |
58 | } | |
59 | ||
60 | static void reactivate_swap_slots_cache(void) | |
61 | { | |
62 | mutex_lock(&swap_slots_cache_mutex); | |
63 | swap_slot_cache_active = true; | |
64 | mutex_unlock(&swap_slots_cache_mutex); | |
65 | } | |
66 | ||
67 | /* Must not be called with cpu hot plug lock */ | |
68 | void disable_swap_slots_cache_lock(void) | |
69 | { | |
70 | mutex_lock(&swap_slots_cache_enable_mutex); | |
71 | swap_slot_cache_enabled = false; | |
72 | if (swap_slot_cache_initialized) { | |
73 | /* serialize with cpu hotplug operations */ | |
7625eccd | 74 | cpus_read_lock(); |
67afa38e | 75 | __drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET); |
7625eccd | 76 | cpus_read_unlock(); |
67afa38e TC |
77 | } |
78 | } | |
79 | ||
80 | static void __reenable_swap_slots_cache(void) | |
81 | { | |
82 | swap_slot_cache_enabled = has_usable_swap(); | |
83 | } | |
84 | ||
85 | void reenable_swap_slots_cache_unlock(void) | |
86 | { | |
87 | __reenable_swap_slots_cache(); | |
88 | mutex_unlock(&swap_slots_cache_enable_mutex); | |
89 | } | |
90 | ||
91 | static bool check_cache_active(void) | |
92 | { | |
93 | long pages; | |
94 | ||
e0f3ebba | 95 | if (!swap_slot_cache_enabled) |
67afa38e TC |
96 | return false; |
97 | ||
98 | pages = get_nr_swap_pages(); | |
99 | if (!swap_slot_cache_active) { | |
100 | if (pages > num_online_cpus() * | |
101 | THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE) | |
102 | reactivate_swap_slots_cache(); | |
103 | goto out; | |
104 | } | |
105 | ||
106 | /* if global pool of slot caches too low, deactivate cache */ | |
107 | if (pages < num_online_cpus() * THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE) | |
108 | deactivate_swap_slots_cache(); | |
109 | out: | |
110 | return swap_slot_cache_active; | |
111 | } | |
112 | ||
113 | static int alloc_swap_slot_cache(unsigned int cpu) | |
114 | { | |
115 | struct swap_slots_cache *cache; | |
116 | swp_entry_t *slots, *slots_ret; | |
117 | ||
118 | /* | |
119 | * Do allocation outside swap_slots_cache_mutex | |
e2e3fdc7 | 120 | * as kvzalloc could trigger reclaim and folio_alloc_swap, |
67afa38e TC |
121 | * which can lock swap_slots_cache_mutex. |
122 | */ | |
778e1cdd | 123 | slots = kvcalloc(SWAP_SLOTS_CACHE_SIZE, sizeof(swp_entry_t), |
54f180d3 | 124 | GFP_KERNEL); |
67afa38e TC |
125 | if (!slots) |
126 | return -ENOMEM; | |
127 | ||
778e1cdd | 128 | slots_ret = kvcalloc(SWAP_SLOTS_CACHE_SIZE, sizeof(swp_entry_t), |
54f180d3 | 129 | GFP_KERNEL); |
67afa38e | 130 | if (!slots_ret) { |
54f180d3 | 131 | kvfree(slots); |
67afa38e TC |
132 | return -ENOMEM; |
133 | } | |
134 | ||
135 | mutex_lock(&swap_slots_cache_mutex); | |
136 | cache = &per_cpu(swp_slots, cpu); | |
f90eae2a | 137 | if (cache->slots || cache->slots_ret) { |
67afa38e | 138 | /* cache already allocated */ |
f90eae2a ZL |
139 | mutex_unlock(&swap_slots_cache_mutex); |
140 | ||
141 | kvfree(slots); | |
142 | kvfree(slots_ret); | |
143 | ||
144 | return 0; | |
145 | } | |
146 | ||
67afa38e TC |
147 | if (!cache->lock_initialized) { |
148 | mutex_init(&cache->alloc_lock); | |
149 | spin_lock_init(&cache->free_lock); | |
150 | cache->lock_initialized = true; | |
151 | } | |
152 | cache->nr = 0; | |
153 | cache->cur = 0; | |
154 | cache->n_ret = 0; | |
a2e16731 TC |
155 | /* |
156 | * We initialized alloc_lock and free_lock earlier. We use | |
157 | * !cache->slots or !cache->slots_ret to know if it is safe to acquire | |
158 | * the corresponding lock and use the cache. Memory barrier below | |
159 | * ensures the assumption. | |
160 | */ | |
161 | mb(); | |
67afa38e | 162 | cache->slots = slots; |
67afa38e | 163 | cache->slots_ret = slots_ret; |
67afa38e | 164 | mutex_unlock(&swap_slots_cache_mutex); |
67afa38e TC |
165 | return 0; |
166 | } | |
167 | ||
168 | static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type, | |
169 | bool free_slots) | |
170 | { | |
171 | struct swap_slots_cache *cache; | |
172 | swp_entry_t *slots = NULL; | |
173 | ||
174 | cache = &per_cpu(swp_slots, cpu); | |
175 | if ((type & SLOTS_CACHE) && cache->slots) { | |
176 | mutex_lock(&cache->alloc_lock); | |
177 | swapcache_free_entries(cache->slots + cache->cur, cache->nr); | |
178 | cache->cur = 0; | |
179 | cache->nr = 0; | |
180 | if (free_slots && cache->slots) { | |
54f180d3 | 181 | kvfree(cache->slots); |
67afa38e TC |
182 | cache->slots = NULL; |
183 | } | |
184 | mutex_unlock(&cache->alloc_lock); | |
185 | } | |
186 | if ((type & SLOTS_CACHE_RET) && cache->slots_ret) { | |
187 | spin_lock_irq(&cache->free_lock); | |
188 | swapcache_free_entries(cache->slots_ret, cache->n_ret); | |
189 | cache->n_ret = 0; | |
190 | if (free_slots && cache->slots_ret) { | |
191 | slots = cache->slots_ret; | |
192 | cache->slots_ret = NULL; | |
193 | } | |
194 | spin_unlock_irq(&cache->free_lock); | |
191a7221 | 195 | kvfree(slots); |
67afa38e TC |
196 | } |
197 | } | |
198 | ||
199 | static void __drain_swap_slots_cache(unsigned int type) | |
200 | { | |
201 | unsigned int cpu; | |
202 | ||
203 | /* | |
204 | * This function is called during | |
205 | * 1) swapoff, when we have to make sure no | |
206 | * left over slots are in cache when we remove | |
207 | * a swap device; | |
208 | * 2) disabling of swap slot cache, when we run low | |
209 | * on swap slots when allocating memory and need | |
210 | * to return swap slots to global pool. | |
211 | * | |
212 | * We cannot acquire cpu hot plug lock here as | |
213 | * this function can be invoked in the cpu | |
214 | * hot plug path: | |
215 | * cpu_up -> lock cpu_hotplug -> cpu hotplug state callback | |
e2e3fdc7 | 216 | * -> memory allocation -> direct reclaim -> folio_alloc_swap |
67afa38e TC |
217 | * -> drain_swap_slots_cache |
218 | * | |
219 | * Hence the loop over current online cpu below could miss cpu that | |
220 | * is being brought online but not yet marked as online. | |
221 | * That is okay as we do not schedule and run anything on a | |
222 | * cpu before it has been marked online. Hence, we will not | |
223 | * fill any swap slots in slots cache of such cpu. | |
224 | * There are no slots on such cpu that need to be drained. | |
225 | */ | |
226 | for_each_online_cpu(cpu) | |
227 | drain_slots_cache_cpu(cpu, type, false); | |
228 | } | |
229 | ||
230 | static int free_slot_cache(unsigned int cpu) | |
231 | { | |
232 | mutex_lock(&swap_slots_cache_mutex); | |
233 | drain_slots_cache_cpu(cpu, SLOTS_CACHE | SLOTS_CACHE_RET, true); | |
234 | mutex_unlock(&swap_slots_cache_mutex); | |
235 | return 0; | |
236 | } | |
237 | ||
f3bc52cb | 238 | void enable_swap_slots_cache(void) |
67afa38e | 239 | { |
67afa38e | 240 | mutex_lock(&swap_slots_cache_enable_mutex); |
d69a9575 ZL |
241 | if (!swap_slot_cache_initialized) { |
242 | int ret; | |
67afa38e | 243 | |
d69a9575 ZL |
244 | ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "swap_slots_cache", |
245 | alloc_swap_slot_cache, free_slot_cache); | |
246 | if (WARN_ONCE(ret < 0, "Cache allocation failed (%s), operating " | |
247 | "without swap slots cache.\n", __func__)) | |
248 | goto out_unlock; | |
249 | ||
250 | swap_slot_cache_initialized = true; | |
251 | } | |
9b7a8143 | 252 | |
67afa38e TC |
253 | __reenable_swap_slots_cache(); |
254 | out_unlock: | |
255 | mutex_unlock(&swap_slots_cache_enable_mutex); | |
67afa38e TC |
256 | } |
257 | ||
258 | /* called with swap slot cache's alloc lock held */ | |
259 | static int refill_swap_slots_cache(struct swap_slots_cache *cache) | |
260 | { | |
f19c2568 | 261 | if (!use_swap_slot_cache) |
67afa38e TC |
262 | return 0; |
263 | ||
264 | cache->cur = 0; | |
265 | if (swap_slot_cache_active) | |
5d5e8f19 | 266 | cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE, |
9faaa0f8 | 267 | cache->slots, 0); |
67afa38e TC |
268 | |
269 | return cache->nr; | |
270 | } | |
271 | ||
bc4a68ad | 272 | void free_swap_slot(swp_entry_t entry) |
67afa38e TC |
273 | { |
274 | struct swap_slots_cache *cache; | |
275 | ||
0827a1fb CZ |
276 | /* Large folio swap slot is not covered. */ |
277 | zswap_invalidate(entry); | |
278 | ||
f07e0f84 | 279 | cache = raw_cpu_ptr(&swp_slots); |
a2e16731 | 280 | if (likely(use_swap_slot_cache && cache->slots_ret)) { |
67afa38e TC |
281 | spin_lock_irq(&cache->free_lock); |
282 | /* Swap slots cache may be deactivated before acquiring lock */ | |
f07e0f84 | 283 | if (!use_swap_slot_cache || !cache->slots_ret) { |
67afa38e TC |
284 | spin_unlock_irq(&cache->free_lock); |
285 | goto direct_free; | |
286 | } | |
287 | if (cache->n_ret >= SWAP_SLOTS_CACHE_SIZE) { | |
288 | /* | |
289 | * Return slots to global pool. | |
290 | * The current swap_map value is SWAP_HAS_CACHE. | |
291 | * Set it to 0 to indicate it is available for | |
292 | * allocation in global pool | |
293 | */ | |
294 | swapcache_free_entries(cache->slots_ret, cache->n_ret); | |
295 | cache->n_ret = 0; | |
296 | } | |
297 | cache->slots_ret[cache->n_ret++] = entry; | |
298 | spin_unlock_irq(&cache->free_lock); | |
299 | } else { | |
300 | direct_free: | |
301 | swapcache_free_entries(&entry, 1); | |
302 | } | |
67afa38e TC |
303 | } |
304 | ||
e2e3fdc7 | 305 | swp_entry_t folio_alloc_swap(struct folio *folio) |
67afa38e | 306 | { |
2406b76f | 307 | swp_entry_t entry; |
67afa38e TC |
308 | struct swap_slots_cache *cache; |
309 | ||
38d8b4e6 YH |
310 | entry.val = 0; |
311 | ||
e2e3fdc7 | 312 | if (folio_test_large(folio)) { |
f238b8c3 | 313 | if (IS_ENABLED(CONFIG_THP_SWAP)) |
9faaa0f8 | 314 | get_swap_pages(1, &entry, folio_order(folio)); |
bb98f2c5 | 315 | goto out; |
38d8b4e6 YH |
316 | } |
317 | ||
67afa38e TC |
318 | /* |
319 | * Preemption is allowed here, because we may sleep | |
320 | * in refill_swap_slots_cache(). But it is safe, because | |
321 | * accesses to the per-CPU data structure are protected by the | |
322 | * mutex cache->alloc_lock. | |
323 | * | |
324 | * The alloc path here does not touch cache->slots_ret | |
325 | * so cache->free_lock is not taken. | |
326 | */ | |
327 | cache = raw_cpu_ptr(&swp_slots); | |
328 | ||
a2e16731 | 329 | if (likely(check_cache_active() && cache->slots)) { |
67afa38e TC |
330 | mutex_lock(&cache->alloc_lock); |
331 | if (cache->slots) { | |
332 | repeat: | |
333 | if (cache->nr) { | |
2406b76f WY |
334 | entry = cache->slots[cache->cur]; |
335 | cache->slots[cache->cur++].val = 0; | |
67afa38e | 336 | cache->nr--; |
2406b76f WY |
337 | } else if (refill_swap_slots_cache(cache)) { |
338 | goto repeat; | |
67afa38e TC |
339 | } |
340 | } | |
341 | mutex_unlock(&cache->alloc_lock); | |
342 | if (entry.val) | |
bb98f2c5 | 343 | goto out; |
67afa38e TC |
344 | } |
345 | ||
9faaa0f8 | 346 | get_swap_pages(1, &entry, 0); |
bb98f2c5 | 347 | out: |
e2e3fdc7 | 348 | if (mem_cgroup_try_charge_swap(folio, entry)) { |
4081f744 | 349 | put_swap_folio(folio, entry); |
bb98f2c5 TH |
350 | entry.val = 0; |
351 | } | |
67afa38e TC |
352 | return entry; |
353 | } |