]>
Commit | Line | Data |
---|---|---|
306b0c95 | 1 | /* |
f1e3cfff | 2 | * Compressed RAM block device |
306b0c95 | 3 | * |
1130ebba | 4 | * Copyright (C) 2008, 2009, 2010 Nitin Gupta |
7bfb3de8 | 5 | * 2012, 2013 Minchan Kim |
306b0c95 NG |
6 | * |
7 | * This code is released using a dual license strategy: BSD/GPL | |
8 | * You can choose the licence that better fits your requirements. | |
9 | * | |
10 | * Released under the terms of 3-clause BSD License | |
11 | * Released under the terms of GNU General Public License Version 2.0 | |
12 | * | |
306b0c95 NG |
13 | */ |
14 | ||
f1e3cfff | 15 | #define KMSG_COMPONENT "zram" |
306b0c95 NG |
16 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
17 | ||
18 | #include <linux/module.h> | |
19 | #include <linux/kernel.h> | |
8946a086 | 20 | #include <linux/bio.h> |
306b0c95 NG |
21 | #include <linux/bitops.h> |
22 | #include <linux/blkdev.h> | |
23 | #include <linux/buffer_head.h> | |
24 | #include <linux/device.h> | |
25 | #include <linux/genhd.h> | |
26 | #include <linux/highmem.h> | |
5a0e3ad6 | 27 | #include <linux/slab.h> |
b09ab054 | 28 | #include <linux/backing-dev.h> |
306b0c95 | 29 | #include <linux/string.h> |
306b0c95 | 30 | #include <linux/vmalloc.h> |
fcfa8d95 | 31 | #include <linux/err.h> |
85508ec6 | 32 | #include <linux/idr.h> |
6566d1a3 | 33 | #include <linux/sysfs.h> |
1dd6c834 | 34 | #include <linux/cpuhotplug.h> |
306b0c95 | 35 | |
16a4bfb9 | 36 | #include "zram_drv.h" |
306b0c95 | 37 | |
85508ec6 | 38 | static DEFINE_IDR(zram_index_idr); |
6566d1a3 SS |
39 | /* idr index must be protected */ |
40 | static DEFINE_MUTEX(zram_index_mutex); | |
41 | ||
f1e3cfff | 42 | static int zram_major; |
b7ca232e | 43 | static const char *default_compressor = "lzo"; |
306b0c95 | 44 | |
306b0c95 | 45 | /* Module params (documentation at end) */ |
ca3d70bd | 46 | static unsigned int num_devices = 1; |
33863c21 | 47 | |
1f7319c7 MK |
48 | static void zram_free_page(struct zram *zram, size_t index); |
49 | ||
08eee69f | 50 | static inline bool init_done(struct zram *zram) |
be2d1d56 | 51 | { |
08eee69f | 52 | return zram->disksize; |
be2d1d56 SS |
53 | } |
54 | ||
9b3bb7ab SS |
55 | static inline struct zram *dev_to_zram(struct device *dev) |
56 | { | |
57 | return (struct zram *)dev_to_disk(dev)->private_data; | |
58 | } | |
59 | ||
643ae61d MK |
60 | static unsigned long zram_get_handle(struct zram *zram, u32 index) |
61 | { | |
62 | return zram->table[index].handle; | |
63 | } | |
64 | ||
65 | static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) | |
66 | { | |
67 | zram->table[index].handle = handle; | |
68 | } | |
69 | ||
b31177f2 | 70 | /* flag operations require table entry bit_spin_lock() being held */ |
beb6602c | 71 | static int zram_test_flag(struct zram *zram, u32 index, |
522698d7 | 72 | enum zram_pageflags flag) |
99ebbd30 | 73 | { |
beb6602c | 74 | return zram->table[index].value & BIT(flag); |
522698d7 | 75 | } |
99ebbd30 | 76 | |
beb6602c | 77 | static void zram_set_flag(struct zram *zram, u32 index, |
522698d7 SS |
78 | enum zram_pageflags flag) |
79 | { | |
beb6602c | 80 | zram->table[index].value |= BIT(flag); |
522698d7 | 81 | } |
99ebbd30 | 82 | |
beb6602c | 83 | static void zram_clear_flag(struct zram *zram, u32 index, |
522698d7 SS |
84 | enum zram_pageflags flag) |
85 | { | |
beb6602c | 86 | zram->table[index].value &= ~BIT(flag); |
522698d7 | 87 | } |
99ebbd30 | 88 | |
beb6602c | 89 | static inline void zram_set_element(struct zram *zram, u32 index, |
8e19d540 | 90 | unsigned long element) |
91 | { | |
beb6602c | 92 | zram->table[index].element = element; |
8e19d540 | 93 | } |
94 | ||
643ae61d | 95 | static unsigned long zram_get_element(struct zram *zram, u32 index) |
8e19d540 | 96 | { |
643ae61d | 97 | return zram->table[index].element; |
8e19d540 | 98 | } |
99 | ||
beb6602c | 100 | static size_t zram_get_obj_size(struct zram *zram, u32 index) |
522698d7 | 101 | { |
beb6602c | 102 | return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); |
99ebbd30 AM |
103 | } |
104 | ||
beb6602c | 105 | static void zram_set_obj_size(struct zram *zram, |
522698d7 | 106 | u32 index, size_t size) |
9b3bb7ab | 107 | { |
beb6602c | 108 | unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT; |
9b3bb7ab | 109 | |
beb6602c | 110 | zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; |
522698d7 SS |
111 | } |
112 | ||
1f7319c7 | 113 | #if PAGE_SIZE != 4096 |
1c53e0d2 | 114 | static inline bool is_partial_io(struct bio_vec *bvec) |
522698d7 SS |
115 | { |
116 | return bvec->bv_len != PAGE_SIZE; | |
117 | } | |
1f7319c7 MK |
118 | #else |
119 | static inline bool is_partial_io(struct bio_vec *bvec) | |
120 | { | |
121 | return false; | |
122 | } | |
123 | #endif | |
522698d7 | 124 | |
b09ab054 MK |
125 | static void zram_revalidate_disk(struct zram *zram) |
126 | { | |
127 | revalidate_disk(zram->disk); | |
128 | /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */ | |
e1735496 | 129 | zram->disk->queue->backing_dev_info->capabilities |= |
b09ab054 MK |
130 | BDI_CAP_STABLE_WRITES; |
131 | } | |
132 | ||
522698d7 SS |
133 | /* |
134 | * Check if request is within bounds and aligned on zram logical blocks. | |
135 | */ | |
1c53e0d2 | 136 | static inline bool valid_io_request(struct zram *zram, |
522698d7 SS |
137 | sector_t start, unsigned int size) |
138 | { | |
139 | u64 end, bound; | |
140 | ||
141 | /* unaligned request */ | |
142 | if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) | |
1c53e0d2 | 143 | return false; |
522698d7 | 144 | if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) |
1c53e0d2 | 145 | return false; |
522698d7 SS |
146 | |
147 | end = start + (size >> SECTOR_SHIFT); | |
148 | bound = zram->disksize >> SECTOR_SHIFT; | |
149 | /* out of range range */ | |
150 | if (unlikely(start >= bound || end > bound || start > end)) | |
1c53e0d2 | 151 | return false; |
522698d7 SS |
152 | |
153 | /* I/O request is valid */ | |
1c53e0d2 | 154 | return true; |
522698d7 SS |
155 | } |
156 | ||
157 | static void update_position(u32 *index, int *offset, struct bio_vec *bvec) | |
158 | { | |
e86942c7 | 159 | *index += (*offset + bvec->bv_len) / PAGE_SIZE; |
522698d7 SS |
160 | *offset = (*offset + bvec->bv_len) % PAGE_SIZE; |
161 | } | |
162 | ||
163 | static inline void update_used_max(struct zram *zram, | |
164 | const unsigned long pages) | |
165 | { | |
166 | unsigned long old_max, cur_max; | |
167 | ||
168 | old_max = atomic_long_read(&zram->stats.max_used_pages); | |
169 | ||
170 | do { | |
171 | cur_max = old_max; | |
172 | if (pages > cur_max) | |
173 | old_max = atomic_long_cmpxchg( | |
174 | &zram->stats.max_used_pages, cur_max, pages); | |
175 | } while (old_max != cur_max); | |
176 | } | |
177 | ||
8e19d540 | 178 | static inline void zram_fill_page(char *ptr, unsigned long len, |
179 | unsigned long value) | |
180 | { | |
181 | int i; | |
182 | unsigned long *page = (unsigned long *)ptr; | |
183 | ||
184 | WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); | |
185 | ||
186 | if (likely(value == 0)) { | |
187 | memset(ptr, 0, len); | |
188 | } else { | |
189 | for (i = 0; i < len / sizeof(*page); i++) | |
190 | page[i] = value; | |
191 | } | |
192 | } | |
193 | ||
194 | static bool page_same_filled(void *ptr, unsigned long *element) | |
522698d7 SS |
195 | { |
196 | unsigned int pos; | |
197 | unsigned long *page; | |
f0fe9984 | 198 | unsigned long val; |
522698d7 SS |
199 | |
200 | page = (unsigned long *)ptr; | |
f0fe9984 | 201 | val = page[0]; |
522698d7 | 202 | |
f0fe9984 SP |
203 | for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) { |
204 | if (val != page[pos]) | |
1c53e0d2 | 205 | return false; |
522698d7 SS |
206 | } |
207 | ||
f0fe9984 | 208 | *element = val; |
8e19d540 | 209 | |
1c53e0d2 | 210 | return true; |
522698d7 SS |
211 | } |
212 | ||
9b3bb7ab SS |
213 | static ssize_t initstate_show(struct device *dev, |
214 | struct device_attribute *attr, char *buf) | |
215 | { | |
a68eb3b6 | 216 | u32 val; |
9b3bb7ab SS |
217 | struct zram *zram = dev_to_zram(dev); |
218 | ||
a68eb3b6 SS |
219 | down_read(&zram->init_lock); |
220 | val = init_done(zram); | |
221 | up_read(&zram->init_lock); | |
9b3bb7ab | 222 | |
56b4e8cb | 223 | return scnprintf(buf, PAGE_SIZE, "%u\n", val); |
9b3bb7ab SS |
224 | } |
225 | ||
522698d7 SS |
226 | static ssize_t disksize_show(struct device *dev, |
227 | struct device_attribute *attr, char *buf) | |
228 | { | |
229 | struct zram *zram = dev_to_zram(dev); | |
230 | ||
231 | return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); | |
232 | } | |
233 | ||
9ada9da9 MK |
234 | static ssize_t mem_limit_store(struct device *dev, |
235 | struct device_attribute *attr, const char *buf, size_t len) | |
236 | { | |
237 | u64 limit; | |
238 | char *tmp; | |
239 | struct zram *zram = dev_to_zram(dev); | |
240 | ||
241 | limit = memparse(buf, &tmp); | |
242 | if (buf == tmp) /* no chars parsed, invalid input */ | |
243 | return -EINVAL; | |
244 | ||
245 | down_write(&zram->init_lock); | |
246 | zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; | |
247 | up_write(&zram->init_lock); | |
248 | ||
249 | return len; | |
250 | } | |
251 | ||
461a8eee MK |
252 | static ssize_t mem_used_max_store(struct device *dev, |
253 | struct device_attribute *attr, const char *buf, size_t len) | |
254 | { | |
255 | int err; | |
256 | unsigned long val; | |
257 | struct zram *zram = dev_to_zram(dev); | |
461a8eee MK |
258 | |
259 | err = kstrtoul(buf, 10, &val); | |
260 | if (err || val != 0) | |
261 | return -EINVAL; | |
262 | ||
263 | down_read(&zram->init_lock); | |
5a99e95b | 264 | if (init_done(zram)) { |
461a8eee | 265 | atomic_long_set(&zram->stats.max_used_pages, |
beb6602c | 266 | zs_get_total_pages(zram->mem_pool)); |
5a99e95b | 267 | } |
461a8eee MK |
268 | up_read(&zram->init_lock); |
269 | ||
270 | return len; | |
271 | } | |
272 | ||
43209ea2 SS |
273 | /* |
274 | * We switched to per-cpu streams and this attr is not needed anymore. | |
275 | * However, we will keep it around for some time, because: | |
276 | * a) we may revert per-cpu streams in the future | |
277 | * b) it's visible to user space and we need to follow our 2 years | |
278 | * retirement rule; but we already have a number of 'soon to be | |
279 | * altered' attrs, so max_comp_streams need to wait for the next | |
280 | * layoff cycle. | |
281 | */ | |
522698d7 SS |
282 | static ssize_t max_comp_streams_show(struct device *dev, |
283 | struct device_attribute *attr, char *buf) | |
284 | { | |
43209ea2 | 285 | return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus()); |
522698d7 SS |
286 | } |
287 | ||
beca3ec7 SS |
288 | static ssize_t max_comp_streams_store(struct device *dev, |
289 | struct device_attribute *attr, const char *buf, size_t len) | |
290 | { | |
43209ea2 | 291 | return len; |
beca3ec7 SS |
292 | } |
293 | ||
e46b8a03 SS |
294 | static ssize_t comp_algorithm_show(struct device *dev, |
295 | struct device_attribute *attr, char *buf) | |
296 | { | |
297 | size_t sz; | |
298 | struct zram *zram = dev_to_zram(dev); | |
299 | ||
300 | down_read(&zram->init_lock); | |
301 | sz = zcomp_available_show(zram->compressor, buf); | |
302 | up_read(&zram->init_lock); | |
303 | ||
304 | return sz; | |
305 | } | |
306 | ||
307 | static ssize_t comp_algorithm_store(struct device *dev, | |
308 | struct device_attribute *attr, const char *buf, size_t len) | |
309 | { | |
310 | struct zram *zram = dev_to_zram(dev); | |
f357e345 | 311 | char compressor[ARRAY_SIZE(zram->compressor)]; |
4bbacd51 SS |
312 | size_t sz; |
313 | ||
415403be SS |
314 | strlcpy(compressor, buf, sizeof(compressor)); |
315 | /* ignore trailing newline */ | |
316 | sz = strlen(compressor); | |
317 | if (sz > 0 && compressor[sz - 1] == '\n') | |
318 | compressor[sz - 1] = 0x00; | |
319 | ||
320 | if (!zcomp_available_algorithm(compressor)) | |
1d5b43bf LH |
321 | return -EINVAL; |
322 | ||
e46b8a03 SS |
323 | down_write(&zram->init_lock); |
324 | if (init_done(zram)) { | |
325 | up_write(&zram->init_lock); | |
326 | pr_info("Can't change algorithm for initialized device\n"); | |
327 | return -EBUSY; | |
328 | } | |
4bbacd51 | 329 | |
f357e345 | 330 | strcpy(zram->compressor, compressor); |
e46b8a03 SS |
331 | up_write(&zram->init_lock); |
332 | return len; | |
333 | } | |
334 | ||
522698d7 SS |
335 | static ssize_t compact_store(struct device *dev, |
336 | struct device_attribute *attr, const char *buf, size_t len) | |
306b0c95 | 337 | { |
522698d7 | 338 | struct zram *zram = dev_to_zram(dev); |
306b0c95 | 339 | |
522698d7 SS |
340 | down_read(&zram->init_lock); |
341 | if (!init_done(zram)) { | |
342 | up_read(&zram->init_lock); | |
343 | return -EINVAL; | |
344 | } | |
306b0c95 | 345 | |
beb6602c | 346 | zs_compact(zram->mem_pool); |
522698d7 | 347 | up_read(&zram->init_lock); |
d2d5e762 | 348 | |
522698d7 | 349 | return len; |
d2d5e762 WY |
350 | } |
351 | ||
522698d7 SS |
352 | static ssize_t io_stat_show(struct device *dev, |
353 | struct device_attribute *attr, char *buf) | |
d2d5e762 | 354 | { |
522698d7 SS |
355 | struct zram *zram = dev_to_zram(dev); |
356 | ssize_t ret; | |
d2d5e762 | 357 | |
522698d7 SS |
358 | down_read(&zram->init_lock); |
359 | ret = scnprintf(buf, PAGE_SIZE, | |
360 | "%8llu %8llu %8llu %8llu\n", | |
361 | (u64)atomic64_read(&zram->stats.failed_reads), | |
362 | (u64)atomic64_read(&zram->stats.failed_writes), | |
363 | (u64)atomic64_read(&zram->stats.invalid_io), | |
364 | (u64)atomic64_read(&zram->stats.notify_free)); | |
365 | up_read(&zram->init_lock); | |
306b0c95 | 366 | |
522698d7 | 367 | return ret; |
9b3bb7ab SS |
368 | } |
369 | ||
522698d7 SS |
370 | static ssize_t mm_stat_show(struct device *dev, |
371 | struct device_attribute *attr, char *buf) | |
9b3bb7ab | 372 | { |
522698d7 | 373 | struct zram *zram = dev_to_zram(dev); |
7d3f3938 | 374 | struct zs_pool_stats pool_stats; |
522698d7 SS |
375 | u64 orig_size, mem_used = 0; |
376 | long max_used; | |
377 | ssize_t ret; | |
a539c72a | 378 | |
7d3f3938 SS |
379 | memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); |
380 | ||
522698d7 | 381 | down_read(&zram->init_lock); |
7d3f3938 | 382 | if (init_done(zram)) { |
beb6602c MK |
383 | mem_used = zs_get_total_pages(zram->mem_pool); |
384 | zs_pool_stats(zram->mem_pool, &pool_stats); | |
7d3f3938 | 385 | } |
9b3bb7ab | 386 | |
522698d7 SS |
387 | orig_size = atomic64_read(&zram->stats.pages_stored); |
388 | max_used = atomic_long_read(&zram->stats.max_used_pages); | |
9b3bb7ab | 389 | |
522698d7 | 390 | ret = scnprintf(buf, PAGE_SIZE, |
7d3f3938 | 391 | "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n", |
522698d7 SS |
392 | orig_size << PAGE_SHIFT, |
393 | (u64)atomic64_read(&zram->stats.compr_data_size), | |
394 | mem_used << PAGE_SHIFT, | |
395 | zram->limit_pages << PAGE_SHIFT, | |
396 | max_used << PAGE_SHIFT, | |
8e19d540 | 397 | (u64)atomic64_read(&zram->stats.same_pages), |
860c707d | 398 | pool_stats.pages_compacted); |
522698d7 | 399 | up_read(&zram->init_lock); |
9b3bb7ab | 400 | |
522698d7 SS |
401 | return ret; |
402 | } | |
403 | ||
623e47fc SS |
404 | static ssize_t debug_stat_show(struct device *dev, |
405 | struct device_attribute *attr, char *buf) | |
406 | { | |
407 | int version = 1; | |
408 | struct zram *zram = dev_to_zram(dev); | |
409 | ssize_t ret; | |
410 | ||
411 | down_read(&zram->init_lock); | |
412 | ret = scnprintf(buf, PAGE_SIZE, | |
413 | "version: %d\n%8llu\n", | |
414 | version, | |
415 | (u64)atomic64_read(&zram->stats.writestall)); | |
416 | up_read(&zram->init_lock); | |
417 | ||
418 | return ret; | |
419 | } | |
420 | ||
522698d7 SS |
421 | static DEVICE_ATTR_RO(io_stat); |
422 | static DEVICE_ATTR_RO(mm_stat); | |
623e47fc | 423 | static DEVICE_ATTR_RO(debug_stat); |
522698d7 | 424 | |
86c49814 MK |
425 | static void zram_slot_lock(struct zram *zram, u32 index) |
426 | { | |
beb6602c | 427 | bit_spin_lock(ZRAM_ACCESS, &zram->table[index].value); |
86c49814 MK |
428 | } |
429 | ||
430 | static void zram_slot_unlock(struct zram *zram, u32 index) | |
431 | { | |
beb6602c | 432 | bit_spin_unlock(ZRAM_ACCESS, &zram->table[index].value); |
86c49814 MK |
433 | } |
434 | ||
1f7319c7 MK |
435 | static bool zram_same_page_read(struct zram *zram, u32 index, |
436 | struct page *page, | |
437 | unsigned int offset, unsigned int len) | |
438 | { | |
86c49814 | 439 | zram_slot_lock(zram, index); |
643ae61d MK |
440 | if (unlikely(!zram_get_handle(zram, index) || |
441 | zram_test_flag(zram, index, ZRAM_SAME))) { | |
1f7319c7 MK |
442 | void *mem; |
443 | ||
86c49814 | 444 | zram_slot_unlock(zram, index); |
1f7319c7 | 445 | mem = kmap_atomic(page); |
643ae61d MK |
446 | zram_fill_page(mem + offset, len, |
447 | zram_get_element(zram, index)); | |
1f7319c7 MK |
448 | kunmap_atomic(mem); |
449 | return true; | |
450 | } | |
86c49814 | 451 | zram_slot_unlock(zram, index); |
1f7319c7 MK |
452 | |
453 | return false; | |
454 | } | |
455 | ||
456 | static bool zram_same_page_write(struct zram *zram, u32 index, | |
457 | struct page *page) | |
458 | { | |
459 | unsigned long element; | |
460 | void *mem = kmap_atomic(page); | |
461 | ||
462 | if (page_same_filled(mem, &element)) { | |
1f7319c7 MK |
463 | kunmap_atomic(mem); |
464 | /* Free memory associated with this sector now. */ | |
86c49814 | 465 | zram_slot_lock(zram, index); |
1f7319c7 | 466 | zram_free_page(zram, index); |
beb6602c MK |
467 | zram_set_flag(zram, index, ZRAM_SAME); |
468 | zram_set_element(zram, index, element); | |
86c49814 | 469 | zram_slot_unlock(zram, index); |
1f7319c7 MK |
470 | |
471 | atomic64_inc(&zram->stats.same_pages); | |
51f9f82c | 472 | atomic64_inc(&zram->stats.pages_stored); |
1f7319c7 MK |
473 | return true; |
474 | } | |
475 | kunmap_atomic(mem); | |
476 | ||
477 | return false; | |
478 | } | |
479 | ||
beb6602c | 480 | static void zram_meta_free(struct zram *zram, u64 disksize) |
522698d7 SS |
481 | { |
482 | size_t num_pages = disksize >> PAGE_SHIFT; | |
483 | size_t index; | |
1fec1172 GM |
484 | |
485 | /* Free all pages that are still in this zram device */ | |
302128dc MK |
486 | for (index = 0; index < num_pages; index++) |
487 | zram_free_page(zram, index); | |
1fec1172 | 488 | |
beb6602c MK |
489 | zs_destroy_pool(zram->mem_pool); |
490 | vfree(zram->table); | |
9b3bb7ab SS |
491 | } |
492 | ||
beb6602c | 493 | static bool zram_meta_alloc(struct zram *zram, u64 disksize) |
9b3bb7ab SS |
494 | { |
495 | size_t num_pages; | |
9b3bb7ab | 496 | |
9b3bb7ab | 497 | num_pages = disksize >> PAGE_SHIFT; |
beb6602c MK |
498 | zram->table = vzalloc(num_pages * sizeof(*zram->table)); |
499 | if (!zram->table) | |
500 | return false; | |
9b3bb7ab | 501 | |
beb6602c MK |
502 | zram->mem_pool = zs_create_pool(zram->disk->disk_name); |
503 | if (!zram->mem_pool) { | |
504 | vfree(zram->table); | |
505 | return false; | |
9b3bb7ab SS |
506 | } |
507 | ||
beb6602c | 508 | return true; |
9b3bb7ab SS |
509 | } |
510 | ||
d2d5e762 WY |
511 | /* |
512 | * To protect concurrent access to the same index entry, | |
513 | * caller should hold this table index entry's bit_spinlock to | |
514 | * indicate this index entry is accessing. | |
515 | */ | |
f1e3cfff | 516 | static void zram_free_page(struct zram *zram, size_t index) |
306b0c95 | 517 | { |
643ae61d | 518 | unsigned long handle = zram_get_handle(zram, index); |
306b0c95 | 519 | |
8e19d540 | 520 | /* |
521 | * No memory is allocated for same element filled pages. | |
522 | * Simply clear same page flag. | |
523 | */ | |
beb6602c MK |
524 | if (zram_test_flag(zram, index, ZRAM_SAME)) { |
525 | zram_clear_flag(zram, index, ZRAM_SAME); | |
643ae61d | 526 | zram_set_element(zram, index, 0); |
8e19d540 | 527 | atomic64_dec(&zram->stats.same_pages); |
51f9f82c | 528 | atomic64_dec(&zram->stats.pages_stored); |
306b0c95 NG |
529 | return; |
530 | } | |
531 | ||
8e19d540 | 532 | if (!handle) |
533 | return; | |
534 | ||
beb6602c | 535 | zs_free(zram->mem_pool, handle); |
306b0c95 | 536 | |
beb6602c | 537 | atomic64_sub(zram_get_obj_size(zram, index), |
d2d5e762 | 538 | &zram->stats.compr_data_size); |
90a7806e | 539 | atomic64_dec(&zram->stats.pages_stored); |
306b0c95 | 540 | |
643ae61d | 541 | zram_set_handle(zram, index, 0); |
beb6602c | 542 | zram_set_obj_size(zram, index, 0); |
306b0c95 NG |
543 | } |
544 | ||
1f7319c7 | 545 | static int zram_decompress_page(struct zram *zram, struct page *page, u32 index) |
306b0c95 | 546 | { |
1f7319c7 | 547 | int ret; |
92967471 | 548 | unsigned long handle; |
ebaf9ab5 | 549 | unsigned int size; |
1f7319c7 | 550 | void *src, *dst; |
1f7319c7 MK |
551 | |
552 | if (zram_same_page_read(zram, index, page, 0, PAGE_SIZE)) | |
553 | return 0; | |
92967471 | 554 | |
86c49814 | 555 | zram_slot_lock(zram, index); |
643ae61d | 556 | handle = zram_get_handle(zram, index); |
beb6602c | 557 | size = zram_get_obj_size(zram, index); |
306b0c95 | 558 | |
beb6602c | 559 | src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); |
ebaf9ab5 | 560 | if (size == PAGE_SIZE) { |
1f7319c7 MK |
561 | dst = kmap_atomic(page); |
562 | memcpy(dst, src, PAGE_SIZE); | |
563 | kunmap_atomic(dst); | |
564 | ret = 0; | |
ebaf9ab5 SS |
565 | } else { |
566 | struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); | |
567 | ||
1f7319c7 MK |
568 | dst = kmap_atomic(page); |
569 | ret = zcomp_decompress(zstrm, src, size, dst); | |
570 | kunmap_atomic(dst); | |
ebaf9ab5 SS |
571 | zcomp_stream_put(zram->comp); |
572 | } | |
beb6602c | 573 | zs_unmap_object(zram->mem_pool, handle); |
86c49814 | 574 | zram_slot_unlock(zram, index); |
a1dd52af | 575 | |
8c921b2b | 576 | /* Should NEVER happen. Return bio error if it does. */ |
1f7319c7 | 577 | if (unlikely(ret)) |
8c921b2b | 578 | pr_err("Decompression failed! err=%d, page=%u\n", ret, index); |
306b0c95 | 579 | |
1f7319c7 | 580 | return ret; |
306b0c95 NG |
581 | } |
582 | ||
37b51fdd | 583 | static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, |
1f7319c7 | 584 | u32 index, int offset) |
924bd88d JM |
585 | { |
586 | int ret; | |
37b51fdd | 587 | struct page *page; |
37b51fdd | 588 | |
1f7319c7 MK |
589 | page = bvec->bv_page; |
590 | if (is_partial_io(bvec)) { | |
591 | /* Use a temporary buffer to decompress the page */ | |
592 | page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); | |
593 | if (!page) | |
594 | return -ENOMEM; | |
924bd88d JM |
595 | } |
596 | ||
1f7319c7 MK |
597 | ret = zram_decompress_page(zram, page, index); |
598 | if (unlikely(ret)) | |
599 | goto out; | |
7e5a5104 | 600 | |
1f7319c7 MK |
601 | if (is_partial_io(bvec)) { |
602 | void *dst = kmap_atomic(bvec->bv_page); | |
603 | void *src = kmap_atomic(page); | |
37b51fdd | 604 | |
1f7319c7 MK |
605 | memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len); |
606 | kunmap_atomic(src); | |
607 | kunmap_atomic(dst); | |
37b51fdd | 608 | } |
1f7319c7 | 609 | out: |
37b51fdd | 610 | if (is_partial_io(bvec)) |
1f7319c7 | 611 | __free_page(page); |
37b51fdd | 612 | |
37b51fdd | 613 | return ret; |
924bd88d JM |
614 | } |
615 | ||
1f7319c7 MK |
616 | static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm, |
617 | struct page *page, | |
618 | unsigned long *out_handle, unsigned int *out_comp_len) | |
306b0c95 | 619 | { |
1f7319c7 MK |
620 | int ret; |
621 | unsigned int comp_len; | |
622 | void *src; | |
623 | unsigned long alloced_pages; | |
da9556a2 | 624 | unsigned long handle = 0; |
924bd88d | 625 | |
da9556a2 | 626 | compress_again: |
1f7319c7 MK |
627 | src = kmap_atomic(page); |
628 | ret = zcomp_compress(*zstrm, src, &comp_len); | |
629 | kunmap_atomic(src); | |
306b0c95 | 630 | |
b7ca232e | 631 | if (unlikely(ret)) { |
8c921b2b | 632 | pr_err("Compression failed! err=%d\n", ret); |
1f7319c7 | 633 | if (handle) |
beb6602c | 634 | zs_free(zram->mem_pool, handle); |
1f7319c7 | 635 | return ret; |
8c921b2b | 636 | } |
da9556a2 | 637 | |
1f7319c7 MK |
638 | if (unlikely(comp_len > max_zpage_size)) |
639 | comp_len = PAGE_SIZE; | |
a1dd52af | 640 | |
da9556a2 SS |
641 | /* |
642 | * handle allocation has 2 paths: | |
643 | * a) fast path is executed with preemption disabled (for | |
644 | * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, | |
645 | * since we can't sleep; | |
646 | * b) slow path enables preemption and attempts to allocate | |
647 | * the page with __GFP_DIRECT_RECLAIM bit set. we have to | |
648 | * put per-cpu compression stream and, thus, to re-do | |
649 | * the compression once handle is allocated. | |
650 | * | |
651 | * if we have a 'non-null' handle here then we are coming | |
652 | * from the slow path and handle has already been allocated. | |
653 | */ | |
654 | if (!handle) | |
beb6602c | 655 | handle = zs_malloc(zram->mem_pool, comp_len, |
da9556a2 SS |
656 | __GFP_KSWAPD_RECLAIM | |
657 | __GFP_NOWARN | | |
9bc482d3 MK |
658 | __GFP_HIGHMEM | |
659 | __GFP_MOVABLE); | |
fd1a30de | 660 | if (!handle) { |
2aea8493 | 661 | zcomp_stream_put(zram->comp); |
623e47fc | 662 | atomic64_inc(&zram->stats.writestall); |
beb6602c | 663 | handle = zs_malloc(zram->mem_pool, comp_len, |
9bc482d3 MK |
664 | GFP_NOIO | __GFP_HIGHMEM | |
665 | __GFP_MOVABLE); | |
1f7319c7 | 666 | *zstrm = zcomp_stream_get(zram->comp); |
da9556a2 SS |
667 | if (handle) |
668 | goto compress_again; | |
1f7319c7 | 669 | return -ENOMEM; |
8c921b2b | 670 | } |
9ada9da9 | 671 | |
beb6602c | 672 | alloced_pages = zs_get_total_pages(zram->mem_pool); |
12372755 SS |
673 | update_used_max(zram, alloced_pages); |
674 | ||
461a8eee | 675 | if (zram->limit_pages && alloced_pages > zram->limit_pages) { |
beb6602c | 676 | zs_free(zram->mem_pool, handle); |
1f7319c7 MK |
677 | return -ENOMEM; |
678 | } | |
679 | ||
680 | *out_handle = handle; | |
681 | *out_comp_len = comp_len; | |
682 | return 0; | |
683 | } | |
684 | ||
685 | static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index) | |
686 | { | |
687 | int ret; | |
688 | unsigned long handle; | |
689 | unsigned int comp_len; | |
690 | void *src, *dst; | |
691 | struct zcomp_strm *zstrm; | |
1f7319c7 MK |
692 | struct page *page = bvec->bv_page; |
693 | ||
694 | if (zram_same_page_write(zram, index, page)) | |
695 | return 0; | |
696 | ||
697 | zstrm = zcomp_stream_get(zram->comp); | |
698 | ret = zram_compress(zram, &zstrm, page, &handle, &comp_len); | |
699 | if (ret) { | |
700 | zcomp_stream_put(zram->comp); | |
701 | return ret; | |
9ada9da9 MK |
702 | } |
703 | ||
beb6602c | 704 | dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); |
1f7319c7 MK |
705 | |
706 | src = zstrm->buffer; | |
707 | if (comp_len == PAGE_SIZE) | |
397c6066 | 708 | src = kmap_atomic(page); |
1f7319c7 MK |
709 | memcpy(dst, src, comp_len); |
710 | if (comp_len == PAGE_SIZE) | |
397c6066 | 711 | kunmap_atomic(src); |
306b0c95 | 712 | |
2aea8493 | 713 | zcomp_stream_put(zram->comp); |
beb6602c | 714 | zs_unmap_object(zram->mem_pool, handle); |
fd1a30de | 715 | |
f40ac2ae SS |
716 | /* |
717 | * Free memory associated with this sector | |
718 | * before overwriting unused sectors. | |
719 | */ | |
86c49814 | 720 | zram_slot_lock(zram, index); |
f40ac2ae | 721 | zram_free_page(zram, index); |
643ae61d | 722 | zram_set_handle(zram, index, handle); |
beb6602c | 723 | zram_set_obj_size(zram, index, comp_len); |
86c49814 | 724 | zram_slot_unlock(zram, index); |
306b0c95 | 725 | |
8c921b2b | 726 | /* Update stats */ |
1f7319c7 | 727 | atomic64_add(comp_len, &zram->stats.compr_data_size); |
90a7806e | 728 | atomic64_inc(&zram->stats.pages_stored); |
1f7319c7 MK |
729 | return 0; |
730 | } | |
731 | ||
732 | static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, | |
733 | u32 index, int offset) | |
734 | { | |
735 | int ret; | |
736 | struct page *page = NULL; | |
737 | void *src; | |
738 | struct bio_vec vec; | |
739 | ||
740 | vec = *bvec; | |
741 | if (is_partial_io(bvec)) { | |
742 | void *dst; | |
743 | /* | |
744 | * This is a partial IO. We need to read the full page | |
745 | * before to write the changes. | |
746 | */ | |
747 | page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); | |
748 | if (!page) | |
749 | return -ENOMEM; | |
750 | ||
751 | ret = zram_decompress_page(zram, page, index); | |
752 | if (ret) | |
753 | goto out; | |
754 | ||
755 | src = kmap_atomic(bvec->bv_page); | |
756 | dst = kmap_atomic(page); | |
757 | memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len); | |
758 | kunmap_atomic(dst); | |
759 | kunmap_atomic(src); | |
760 | ||
761 | vec.bv_page = page; | |
762 | vec.bv_len = PAGE_SIZE; | |
763 | vec.bv_offset = 0; | |
764 | } | |
765 | ||
766 | ret = __zram_bvec_write(zram, &vec, index); | |
924bd88d | 767 | out: |
397c6066 | 768 | if (is_partial_io(bvec)) |
1f7319c7 | 769 | __free_page(page); |
924bd88d | 770 | return ret; |
8c921b2b JM |
771 | } |
772 | ||
f4659d8e JK |
773 | /* |
774 | * zram_bio_discard - handler on discard request | |
775 | * @index: physical block index in PAGE_SIZE units | |
776 | * @offset: byte offset within physical block | |
777 | */ | |
778 | static void zram_bio_discard(struct zram *zram, u32 index, | |
779 | int offset, struct bio *bio) | |
780 | { | |
781 | size_t n = bio->bi_iter.bi_size; | |
782 | ||
783 | /* | |
784 | * zram manages data in physical block size units. Because logical block | |
785 | * size isn't identical with physical block size on some arch, we | |
786 | * could get a discard request pointing to a specific offset within a | |
787 | * certain physical block. Although we can handle this request by | |
788 | * reading that physiclal block and decompressing and partially zeroing | |
789 | * and re-compressing and then re-storing it, this isn't reasonable | |
790 | * because our intent with a discard request is to save memory. So | |
791 | * skipping this logical block is appropriate here. | |
792 | */ | |
793 | if (offset) { | |
38515c73 | 794 | if (n <= (PAGE_SIZE - offset)) |
f4659d8e JK |
795 | return; |
796 | ||
38515c73 | 797 | n -= (PAGE_SIZE - offset); |
f4659d8e JK |
798 | index++; |
799 | } | |
800 | ||
801 | while (n >= PAGE_SIZE) { | |
86c49814 | 802 | zram_slot_lock(zram, index); |
f4659d8e | 803 | zram_free_page(zram, index); |
86c49814 | 804 | zram_slot_unlock(zram, index); |
015254da | 805 | atomic64_inc(&zram->stats.notify_free); |
f4659d8e JK |
806 | index++; |
807 | n -= PAGE_SIZE; | |
808 | } | |
809 | } | |
810 | ||
522698d7 | 811 | static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, |
c11f0c0b | 812 | int offset, bool is_write) |
9b3bb7ab | 813 | { |
522698d7 | 814 | unsigned long start_time = jiffies; |
c11f0c0b | 815 | int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ; |
9b3bb7ab | 816 | int ret; |
9b3bb7ab | 817 | |
c11f0c0b | 818 | generic_start_io_acct(rw_acct, bvec->bv_len >> SECTOR_SHIFT, |
522698d7 | 819 | &zram->disk->part0); |
46a51c80 | 820 | |
c11f0c0b | 821 | if (!is_write) { |
522698d7 SS |
822 | atomic64_inc(&zram->stats.num_reads); |
823 | ret = zram_bvec_read(zram, bvec, index, offset); | |
1f7319c7 | 824 | flush_dcache_page(bvec->bv_page); |
522698d7 SS |
825 | } else { |
826 | atomic64_inc(&zram->stats.num_writes); | |
827 | ret = zram_bvec_write(zram, bvec, index, offset); | |
1b672224 | 828 | } |
9b3bb7ab | 829 | |
c11f0c0b | 830 | generic_end_io_acct(rw_acct, &zram->disk->part0, start_time); |
9b3bb7ab | 831 | |
522698d7 | 832 | if (unlikely(ret)) { |
c11f0c0b | 833 | if (!is_write) |
522698d7 SS |
834 | atomic64_inc(&zram->stats.failed_reads); |
835 | else | |
836 | atomic64_inc(&zram->stats.failed_writes); | |
1b672224 | 837 | } |
9b3bb7ab | 838 | |
1b672224 | 839 | return ret; |
8c921b2b JM |
840 | } |
841 | ||
be257c61 | 842 | static void __zram_make_request(struct zram *zram, struct bio *bio) |
8c921b2b | 843 | { |
abf54548 | 844 | int offset; |
8c921b2b | 845 | u32 index; |
7988613b KO |
846 | struct bio_vec bvec; |
847 | struct bvec_iter iter; | |
8c921b2b | 848 | |
4f024f37 KO |
849 | index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; |
850 | offset = (bio->bi_iter.bi_sector & | |
851 | (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; | |
8c921b2b | 852 | |
31edeacd CH |
853 | switch (bio_op(bio)) { |
854 | case REQ_OP_DISCARD: | |
855 | case REQ_OP_WRITE_ZEROES: | |
f4659d8e | 856 | zram_bio_discard(zram, index, offset, bio); |
4246a0b6 | 857 | bio_endio(bio); |
f4659d8e | 858 | return; |
31edeacd CH |
859 | default: |
860 | break; | |
f4659d8e JK |
861 | } |
862 | ||
7988613b | 863 | bio_for_each_segment(bvec, bio, iter) { |
e86942c7 MK |
864 | struct bio_vec bv = bvec; |
865 | unsigned int unwritten = bvec.bv_len; | |
924bd88d | 866 | |
e86942c7 MK |
867 | do { |
868 | bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, | |
869 | unwritten); | |
abf54548 | 870 | if (zram_bvec_rw(zram, &bv, index, offset, |
e86942c7 | 871 | op_is_write(bio_op(bio))) < 0) |
924bd88d JM |
872 | goto out; |
873 | ||
e86942c7 MK |
874 | bv.bv_offset += bv.bv_len; |
875 | unwritten -= bv.bv_len; | |
924bd88d | 876 | |
e86942c7 MK |
877 | update_position(&index, &offset, &bv); |
878 | } while (unwritten); | |
a1dd52af | 879 | } |
306b0c95 | 880 | |
4246a0b6 | 881 | bio_endio(bio); |
7d7854b4 | 882 | return; |
306b0c95 NG |
883 | |
884 | out: | |
306b0c95 | 885 | bio_io_error(bio); |
306b0c95 NG |
886 | } |
887 | ||
306b0c95 | 888 | /* |
f1e3cfff | 889 | * Handler function for all zram I/O requests. |
306b0c95 | 890 | */ |
dece1635 | 891 | static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio) |
306b0c95 | 892 | { |
f1e3cfff | 893 | struct zram *zram = queue->queuedata; |
306b0c95 | 894 | |
54850e73 | 895 | if (!valid_io_request(zram, bio->bi_iter.bi_sector, |
896 | bio->bi_iter.bi_size)) { | |
da5cc7d3 | 897 | atomic64_inc(&zram->stats.invalid_io); |
a09759ac | 898 | goto error; |
6642a67c JM |
899 | } |
900 | ||
be257c61 | 901 | __zram_make_request(zram, bio); |
dece1635 | 902 | return BLK_QC_T_NONE; |
a09759ac | 903 | |
0900beae JM |
904 | error: |
905 | bio_io_error(bio); | |
dece1635 | 906 | return BLK_QC_T_NONE; |
306b0c95 NG |
907 | } |
908 | ||
2ccbec05 NG |
909 | static void zram_slot_free_notify(struct block_device *bdev, |
910 | unsigned long index) | |
107c161b | 911 | { |
f1e3cfff | 912 | struct zram *zram; |
107c161b | 913 | |
f1e3cfff | 914 | zram = bdev->bd_disk->private_data; |
a0c516cb | 915 | |
86c49814 | 916 | zram_slot_lock(zram, index); |
f614a9f4 | 917 | zram_free_page(zram, index); |
86c49814 | 918 | zram_slot_unlock(zram, index); |
f614a9f4 | 919 | atomic64_inc(&zram->stats.notify_free); |
107c161b NG |
920 | } |
921 | ||
8c7f0102 | 922 | static int zram_rw_page(struct block_device *bdev, sector_t sector, |
c11f0c0b | 923 | struct page *page, bool is_write) |
8c7f0102 | 924 | { |
08eee69f | 925 | int offset, err = -EIO; |
8c7f0102 | 926 | u32 index; |
927 | struct zram *zram; | |
928 | struct bio_vec bv; | |
929 | ||
930 | zram = bdev->bd_disk->private_data; | |
08eee69f | 931 | |
8c7f0102 | 932 | if (!valid_io_request(zram, sector, PAGE_SIZE)) { |
933 | atomic64_inc(&zram->stats.invalid_io); | |
08eee69f | 934 | err = -EINVAL; |
a09759ac | 935 | goto out; |
8c7f0102 | 936 | } |
937 | ||
938 | index = sector >> SECTORS_PER_PAGE_SHIFT; | |
4ca82dab | 939 | offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; |
8c7f0102 | 940 | |
941 | bv.bv_page = page; | |
942 | bv.bv_len = PAGE_SIZE; | |
943 | bv.bv_offset = 0; | |
944 | ||
c11f0c0b | 945 | err = zram_bvec_rw(zram, &bv, index, offset, is_write); |
08eee69f | 946 | out: |
8c7f0102 | 947 | /* |
948 | * If I/O fails, just return error(ie, non-zero) without | |
949 | * calling page_endio. | |
950 | * It causes resubmit the I/O with bio request by upper functions | |
951 | * of rw_page(e.g., swap_readpage, __swap_writepage) and | |
952 | * bio->bi_end_io does things to handle the error | |
953 | * (e.g., SetPageError, set_page_dirty and extra works). | |
954 | */ | |
955 | if (err == 0) | |
c11f0c0b | 956 | page_endio(page, is_write, 0); |
8c7f0102 | 957 | return err; |
958 | } | |
959 | ||
522698d7 SS |
960 | static void zram_reset_device(struct zram *zram) |
961 | { | |
522698d7 SS |
962 | struct zcomp *comp; |
963 | u64 disksize; | |
306b0c95 | 964 | |
522698d7 | 965 | down_write(&zram->init_lock); |
9b3bb7ab | 966 | |
522698d7 SS |
967 | zram->limit_pages = 0; |
968 | ||
969 | if (!init_done(zram)) { | |
970 | up_write(&zram->init_lock); | |
971 | return; | |
972 | } | |
973 | ||
522698d7 SS |
974 | comp = zram->comp; |
975 | disksize = zram->disksize; | |
522698d7 | 976 | zram->disksize = 0; |
522698d7 SS |
977 | |
978 | set_capacity(zram->disk, 0); | |
979 | part_stat_set_all(&zram->disk->part0, 0); | |
980 | ||
981 | up_write(&zram->init_lock); | |
982 | /* I/O operation under all of CPU are done so let's free */ | |
beb6602c | 983 | zram_meta_free(zram, disksize); |
302128dc | 984 | memset(&zram->stats, 0, sizeof(zram->stats)); |
522698d7 SS |
985 | zcomp_destroy(comp); |
986 | } | |
987 | ||
988 | static ssize_t disksize_store(struct device *dev, | |
989 | struct device_attribute *attr, const char *buf, size_t len) | |
2f6a3bed | 990 | { |
522698d7 SS |
991 | u64 disksize; |
992 | struct zcomp *comp; | |
2f6a3bed | 993 | struct zram *zram = dev_to_zram(dev); |
522698d7 | 994 | int err; |
2f6a3bed | 995 | |
522698d7 SS |
996 | disksize = memparse(buf, NULL); |
997 | if (!disksize) | |
998 | return -EINVAL; | |
2f6a3bed | 999 | |
beb6602c MK |
1000 | down_write(&zram->init_lock); |
1001 | if (init_done(zram)) { | |
1002 | pr_info("Cannot change disksize for initialized device\n"); | |
1003 | err = -EBUSY; | |
1004 | goto out_unlock; | |
1005 | } | |
1006 | ||
522698d7 | 1007 | disksize = PAGE_ALIGN(disksize); |
beb6602c MK |
1008 | if (!zram_meta_alloc(zram, disksize)) { |
1009 | err = -ENOMEM; | |
1010 | goto out_unlock; | |
1011 | } | |
522698d7 | 1012 | |
da9556a2 | 1013 | comp = zcomp_create(zram->compressor); |
522698d7 | 1014 | if (IS_ERR(comp)) { |
70864969 | 1015 | pr_err("Cannot initialise %s compressing backend\n", |
522698d7 SS |
1016 | zram->compressor); |
1017 | err = PTR_ERR(comp); | |
1018 | goto out_free_meta; | |
1019 | } | |
1020 | ||
522698d7 SS |
1021 | zram->comp = comp; |
1022 | zram->disksize = disksize; | |
1023 | set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); | |
b09ab054 | 1024 | zram_revalidate_disk(zram); |
e7ccfc4c | 1025 | up_write(&zram->init_lock); |
522698d7 SS |
1026 | |
1027 | return len; | |
1028 | ||
522698d7 | 1029 | out_free_meta: |
beb6602c MK |
1030 | zram_meta_free(zram, disksize); |
1031 | out_unlock: | |
1032 | up_write(&zram->init_lock); | |
522698d7 | 1033 | return err; |
2f6a3bed SS |
1034 | } |
1035 | ||
522698d7 SS |
1036 | static ssize_t reset_store(struct device *dev, |
1037 | struct device_attribute *attr, const char *buf, size_t len) | |
4f2109f6 | 1038 | { |
522698d7 SS |
1039 | int ret; |
1040 | unsigned short do_reset; | |
1041 | struct zram *zram; | |
1042 | struct block_device *bdev; | |
4f2109f6 | 1043 | |
f405c445 SS |
1044 | ret = kstrtou16(buf, 10, &do_reset); |
1045 | if (ret) | |
1046 | return ret; | |
1047 | ||
1048 | if (!do_reset) | |
1049 | return -EINVAL; | |
1050 | ||
522698d7 SS |
1051 | zram = dev_to_zram(dev); |
1052 | bdev = bdget_disk(zram->disk, 0); | |
522698d7 SS |
1053 | if (!bdev) |
1054 | return -ENOMEM; | |
4f2109f6 | 1055 | |
522698d7 | 1056 | mutex_lock(&bdev->bd_mutex); |
f405c445 SS |
1057 | /* Do not reset an active device or claimed device */ |
1058 | if (bdev->bd_openers || zram->claim) { | |
1059 | mutex_unlock(&bdev->bd_mutex); | |
1060 | bdput(bdev); | |
1061 | return -EBUSY; | |
522698d7 SS |
1062 | } |
1063 | ||
f405c445 SS |
1064 | /* From now on, anyone can't open /dev/zram[0-9] */ |
1065 | zram->claim = true; | |
1066 | mutex_unlock(&bdev->bd_mutex); | |
522698d7 | 1067 | |
f405c445 | 1068 | /* Make sure all the pending I/O are finished */ |
522698d7 SS |
1069 | fsync_bdev(bdev); |
1070 | zram_reset_device(zram); | |
b09ab054 | 1071 | zram_revalidate_disk(zram); |
522698d7 SS |
1072 | bdput(bdev); |
1073 | ||
f405c445 SS |
1074 | mutex_lock(&bdev->bd_mutex); |
1075 | zram->claim = false; | |
1076 | mutex_unlock(&bdev->bd_mutex); | |
1077 | ||
522698d7 | 1078 | return len; |
f405c445 SS |
1079 | } |
1080 | ||
1081 | static int zram_open(struct block_device *bdev, fmode_t mode) | |
1082 | { | |
1083 | int ret = 0; | |
1084 | struct zram *zram; | |
1085 | ||
1086 | WARN_ON(!mutex_is_locked(&bdev->bd_mutex)); | |
1087 | ||
1088 | zram = bdev->bd_disk->private_data; | |
1089 | /* zram was claimed to reset so open request fails */ | |
1090 | if (zram->claim) | |
1091 | ret = -EBUSY; | |
4f2109f6 SS |
1092 | |
1093 | return ret; | |
1094 | } | |
1095 | ||
522698d7 | 1096 | static const struct block_device_operations zram_devops = { |
f405c445 | 1097 | .open = zram_open, |
522698d7 SS |
1098 | .swap_slot_free_notify = zram_slot_free_notify, |
1099 | .rw_page = zram_rw_page, | |
1100 | .owner = THIS_MODULE | |
1101 | }; | |
1102 | ||
1103 | static DEVICE_ATTR_WO(compact); | |
1104 | static DEVICE_ATTR_RW(disksize); | |
1105 | static DEVICE_ATTR_RO(initstate); | |
1106 | static DEVICE_ATTR_WO(reset); | |
c87d1655 SS |
1107 | static DEVICE_ATTR_WO(mem_limit); |
1108 | static DEVICE_ATTR_WO(mem_used_max); | |
522698d7 SS |
1109 | static DEVICE_ATTR_RW(max_comp_streams); |
1110 | static DEVICE_ATTR_RW(comp_algorithm); | |
a68eb3b6 | 1111 | |
9b3bb7ab SS |
1112 | static struct attribute *zram_disk_attrs[] = { |
1113 | &dev_attr_disksize.attr, | |
1114 | &dev_attr_initstate.attr, | |
1115 | &dev_attr_reset.attr, | |
99ebbd30 | 1116 | &dev_attr_compact.attr, |
9ada9da9 | 1117 | &dev_attr_mem_limit.attr, |
461a8eee | 1118 | &dev_attr_mem_used_max.attr, |
beca3ec7 | 1119 | &dev_attr_max_comp_streams.attr, |
e46b8a03 | 1120 | &dev_attr_comp_algorithm.attr, |
2f6a3bed | 1121 | &dev_attr_io_stat.attr, |
4f2109f6 | 1122 | &dev_attr_mm_stat.attr, |
623e47fc | 1123 | &dev_attr_debug_stat.attr, |
9b3bb7ab SS |
1124 | NULL, |
1125 | }; | |
1126 | ||
bc1bb362 | 1127 | static const struct attribute_group zram_disk_attr_group = { |
9b3bb7ab SS |
1128 | .attrs = zram_disk_attrs, |
1129 | }; | |
1130 | ||
92ff1528 SS |
1131 | /* |
1132 | * Allocate and initialize new zram device. the function returns | |
1133 | * '>= 0' device_id upon success, and negative value otherwise. | |
1134 | */ | |
1135 | static int zram_add(void) | |
306b0c95 | 1136 | { |
85508ec6 | 1137 | struct zram *zram; |
ee980160 | 1138 | struct request_queue *queue; |
92ff1528 | 1139 | int ret, device_id; |
85508ec6 SS |
1140 | |
1141 | zram = kzalloc(sizeof(struct zram), GFP_KERNEL); | |
1142 | if (!zram) | |
1143 | return -ENOMEM; | |
1144 | ||
92ff1528 | 1145 | ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); |
85508ec6 SS |
1146 | if (ret < 0) |
1147 | goto out_free_dev; | |
92ff1528 | 1148 | device_id = ret; |
de1a21a0 | 1149 | |
0900beae | 1150 | init_rwsem(&zram->init_lock); |
306b0c95 | 1151 | |
ee980160 SS |
1152 | queue = blk_alloc_queue(GFP_KERNEL); |
1153 | if (!queue) { | |
306b0c95 NG |
1154 | pr_err("Error allocating disk queue for device %d\n", |
1155 | device_id); | |
85508ec6 SS |
1156 | ret = -ENOMEM; |
1157 | goto out_free_idr; | |
306b0c95 NG |
1158 | } |
1159 | ||
ee980160 | 1160 | blk_queue_make_request(queue, zram_make_request); |
306b0c95 | 1161 | |
85508ec6 | 1162 | /* gendisk structure */ |
f1e3cfff NG |
1163 | zram->disk = alloc_disk(1); |
1164 | if (!zram->disk) { | |
70864969 | 1165 | pr_err("Error allocating disk structure for device %d\n", |
306b0c95 | 1166 | device_id); |
201c7b72 | 1167 | ret = -ENOMEM; |
39a9b8ac | 1168 | goto out_free_queue; |
306b0c95 NG |
1169 | } |
1170 | ||
f1e3cfff NG |
1171 | zram->disk->major = zram_major; |
1172 | zram->disk->first_minor = device_id; | |
1173 | zram->disk->fops = &zram_devops; | |
ee980160 SS |
1174 | zram->disk->queue = queue; |
1175 | zram->disk->queue->queuedata = zram; | |
f1e3cfff NG |
1176 | zram->disk->private_data = zram; |
1177 | snprintf(zram->disk->disk_name, 16, "zram%d", device_id); | |
306b0c95 | 1178 | |
33863c21 | 1179 | /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ |
f1e3cfff | 1180 | set_capacity(zram->disk, 0); |
b67d1ec1 SS |
1181 | /* zram devices sort of resembles non-rotational disks */ |
1182 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); | |
b277da0a | 1183 | queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); |
a1dd52af NG |
1184 | /* |
1185 | * To ensure that we always get PAGE_SIZE aligned | |
1186 | * and n*PAGE_SIZED sized I/O requests. | |
1187 | */ | |
f1e3cfff | 1188 | blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); |
7b19b8d4 RJ |
1189 | blk_queue_logical_block_size(zram->disk->queue, |
1190 | ZRAM_LOGICAL_BLOCK_SIZE); | |
f1e3cfff NG |
1191 | blk_queue_io_min(zram->disk->queue, PAGE_SIZE); |
1192 | blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); | |
f4659d8e | 1193 | zram->disk->queue->limits.discard_granularity = PAGE_SIZE; |
2bb4cd5c | 1194 | blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); |
31edeacd CH |
1195 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); |
1196 | ||
f4659d8e JK |
1197 | /* |
1198 | * zram_bio_discard() will clear all logical blocks if logical block | |
1199 | * size is identical with physical block size(PAGE_SIZE). But if it is | |
1200 | * different, we will skip discarding some parts of logical blocks in | |
1201 | * the part of the request range which isn't aligned to physical block | |
1202 | * size. So we can't ensure that all discarded logical blocks are | |
1203 | * zeroed. | |
1204 | */ | |
1205 | if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) | |
31edeacd | 1206 | blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); |
5d83d5a0 | 1207 | |
f1e3cfff | 1208 | add_disk(zram->disk); |
306b0c95 | 1209 | |
33863c21 NG |
1210 | ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, |
1211 | &zram_disk_attr_group); | |
1212 | if (ret < 0) { | |
70864969 SS |
1213 | pr_err("Error creating sysfs group for device %d\n", |
1214 | device_id); | |
39a9b8ac | 1215 | goto out_free_disk; |
33863c21 | 1216 | } |
e46b8a03 | 1217 | strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); |
d12b63c9 SS |
1218 | |
1219 | pr_info("Added device: %s\n", zram->disk->disk_name); | |
92ff1528 | 1220 | return device_id; |
de1a21a0 | 1221 | |
39a9b8ac JL |
1222 | out_free_disk: |
1223 | del_gendisk(zram->disk); | |
1224 | put_disk(zram->disk); | |
1225 | out_free_queue: | |
ee980160 | 1226 | blk_cleanup_queue(queue); |
85508ec6 SS |
1227 | out_free_idr: |
1228 | idr_remove(&zram_index_idr, device_id); | |
1229 | out_free_dev: | |
1230 | kfree(zram); | |
de1a21a0 | 1231 | return ret; |
306b0c95 NG |
1232 | } |
1233 | ||
6566d1a3 | 1234 | static int zram_remove(struct zram *zram) |
306b0c95 | 1235 | { |
6566d1a3 SS |
1236 | struct block_device *bdev; |
1237 | ||
1238 | bdev = bdget_disk(zram->disk, 0); | |
1239 | if (!bdev) | |
1240 | return -ENOMEM; | |
1241 | ||
1242 | mutex_lock(&bdev->bd_mutex); | |
1243 | if (bdev->bd_openers || zram->claim) { | |
1244 | mutex_unlock(&bdev->bd_mutex); | |
1245 | bdput(bdev); | |
1246 | return -EBUSY; | |
1247 | } | |
1248 | ||
1249 | zram->claim = true; | |
1250 | mutex_unlock(&bdev->bd_mutex); | |
1251 | ||
85508ec6 SS |
1252 | /* |
1253 | * Remove sysfs first, so no one will perform a disksize | |
6566d1a3 SS |
1254 | * store while we destroy the devices. This also helps during |
1255 | * hot_remove -- zram_reset_device() is the last holder of | |
1256 | * ->init_lock, no later/concurrent disksize_store() or any | |
1257 | * other sysfs handlers are possible. | |
85508ec6 SS |
1258 | */ |
1259 | sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, | |
1260 | &zram_disk_attr_group); | |
306b0c95 | 1261 | |
6566d1a3 SS |
1262 | /* Make sure all the pending I/O are finished */ |
1263 | fsync_bdev(bdev); | |
85508ec6 | 1264 | zram_reset_device(zram); |
6566d1a3 SS |
1265 | bdput(bdev); |
1266 | ||
1267 | pr_info("Removed device: %s\n", zram->disk->disk_name); | |
1268 | ||
85508ec6 SS |
1269 | blk_cleanup_queue(zram->disk->queue); |
1270 | del_gendisk(zram->disk); | |
1271 | put_disk(zram->disk); | |
1272 | kfree(zram); | |
6566d1a3 SS |
1273 | return 0; |
1274 | } | |
1275 | ||
1276 | /* zram-control sysfs attributes */ | |
27104a53 GKH |
1277 | |
1278 | /* | |
1279 | * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a | |
1280 | * sense that reading from this file does alter the state of your system -- it | |
1281 | * creates a new un-initialized zram device and returns back this device's | |
1282 | * device_id (or an error code if it fails to create a new device). | |
1283 | */ | |
6566d1a3 SS |
1284 | static ssize_t hot_add_show(struct class *class, |
1285 | struct class_attribute *attr, | |
1286 | char *buf) | |
1287 | { | |
1288 | int ret; | |
1289 | ||
1290 | mutex_lock(&zram_index_mutex); | |
1291 | ret = zram_add(); | |
1292 | mutex_unlock(&zram_index_mutex); | |
1293 | ||
1294 | if (ret < 0) | |
1295 | return ret; | |
1296 | return scnprintf(buf, PAGE_SIZE, "%d\n", ret); | |
1297 | } | |
f40609d1 | 1298 | static CLASS_ATTR_RO(hot_add); |
6566d1a3 SS |
1299 | |
1300 | static ssize_t hot_remove_store(struct class *class, | |
1301 | struct class_attribute *attr, | |
1302 | const char *buf, | |
1303 | size_t count) | |
1304 | { | |
1305 | struct zram *zram; | |
1306 | int ret, dev_id; | |
1307 | ||
1308 | /* dev_id is gendisk->first_minor, which is `int' */ | |
1309 | ret = kstrtoint(buf, 10, &dev_id); | |
1310 | if (ret) | |
1311 | return ret; | |
1312 | if (dev_id < 0) | |
1313 | return -EINVAL; | |
1314 | ||
1315 | mutex_lock(&zram_index_mutex); | |
1316 | ||
1317 | zram = idr_find(&zram_index_idr, dev_id); | |
17ec4cd9 | 1318 | if (zram) { |
6566d1a3 | 1319 | ret = zram_remove(zram); |
529e71e1 TI |
1320 | if (!ret) |
1321 | idr_remove(&zram_index_idr, dev_id); | |
17ec4cd9 | 1322 | } else { |
6566d1a3 | 1323 | ret = -ENODEV; |
17ec4cd9 | 1324 | } |
6566d1a3 SS |
1325 | |
1326 | mutex_unlock(&zram_index_mutex); | |
1327 | return ret ? ret : count; | |
85508ec6 | 1328 | } |
27104a53 | 1329 | static CLASS_ATTR_WO(hot_remove); |
a096cafc | 1330 | |
27104a53 GKH |
1331 | static struct attribute *zram_control_class_attrs[] = { |
1332 | &class_attr_hot_add.attr, | |
1333 | &class_attr_hot_remove.attr, | |
1334 | NULL, | |
6566d1a3 | 1335 | }; |
27104a53 | 1336 | ATTRIBUTE_GROUPS(zram_control_class); |
6566d1a3 SS |
1337 | |
1338 | static struct class zram_control_class = { | |
1339 | .name = "zram-control", | |
1340 | .owner = THIS_MODULE, | |
27104a53 | 1341 | .class_groups = zram_control_class_groups, |
6566d1a3 SS |
1342 | }; |
1343 | ||
85508ec6 SS |
1344 | static int zram_remove_cb(int id, void *ptr, void *data) |
1345 | { | |
1346 | zram_remove(ptr); | |
1347 | return 0; | |
1348 | } | |
a096cafc | 1349 | |
85508ec6 SS |
1350 | static void destroy_devices(void) |
1351 | { | |
6566d1a3 | 1352 | class_unregister(&zram_control_class); |
85508ec6 SS |
1353 | idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); |
1354 | idr_destroy(&zram_index_idr); | |
a096cafc | 1355 | unregister_blkdev(zram_major, "zram"); |
1dd6c834 | 1356 | cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); |
306b0c95 NG |
1357 | } |
1358 | ||
f1e3cfff | 1359 | static int __init zram_init(void) |
306b0c95 | 1360 | { |
92ff1528 | 1361 | int ret; |
306b0c95 | 1362 | |
1dd6c834 AMG |
1363 | ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", |
1364 | zcomp_cpu_up_prepare, zcomp_cpu_dead); | |
1365 | if (ret < 0) | |
1366 | return ret; | |
1367 | ||
6566d1a3 SS |
1368 | ret = class_register(&zram_control_class); |
1369 | if (ret) { | |
70864969 | 1370 | pr_err("Unable to register zram-control class\n"); |
1dd6c834 | 1371 | cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); |
6566d1a3 SS |
1372 | return ret; |
1373 | } | |
1374 | ||
f1e3cfff NG |
1375 | zram_major = register_blkdev(0, "zram"); |
1376 | if (zram_major <= 0) { | |
70864969 | 1377 | pr_err("Unable to get major number\n"); |
6566d1a3 | 1378 | class_unregister(&zram_control_class); |
1dd6c834 | 1379 | cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); |
a096cafc | 1380 | return -EBUSY; |
306b0c95 NG |
1381 | } |
1382 | ||
92ff1528 | 1383 | while (num_devices != 0) { |
6566d1a3 | 1384 | mutex_lock(&zram_index_mutex); |
92ff1528 | 1385 | ret = zram_add(); |
6566d1a3 | 1386 | mutex_unlock(&zram_index_mutex); |
92ff1528 | 1387 | if (ret < 0) |
a096cafc | 1388 | goto out_error; |
92ff1528 | 1389 | num_devices--; |
de1a21a0 NG |
1390 | } |
1391 | ||
306b0c95 | 1392 | return 0; |
de1a21a0 | 1393 | |
a096cafc | 1394 | out_error: |
85508ec6 | 1395 | destroy_devices(); |
306b0c95 NG |
1396 | return ret; |
1397 | } | |
1398 | ||
f1e3cfff | 1399 | static void __exit zram_exit(void) |
306b0c95 | 1400 | { |
85508ec6 | 1401 | destroy_devices(); |
306b0c95 NG |
1402 | } |
1403 | ||
f1e3cfff NG |
1404 | module_init(zram_init); |
1405 | module_exit(zram_exit); | |
306b0c95 | 1406 | |
9b3bb7ab | 1407 | module_param(num_devices, uint, 0); |
c3cdb40e | 1408 | MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); |
9b3bb7ab | 1409 | |
306b0c95 NG |
1410 | MODULE_LICENSE("Dual BSD/GPL"); |
1411 | MODULE_AUTHOR("Nitin Gupta <[email protected]>"); | |
f1e3cfff | 1412 | MODULE_DESCRIPTION("Compressed RAM Block Device"); |