1 // SPDX-License-Identifier: GPL-2.0
11 #include <linux/lz4.h>
12 #include <linux/zlib.h>
13 #include <linux/zstd.h>
15 static inline enum bch_compression_opts bch2_compression_type_to_opt(enum bch_compression_type type)
18 case BCH_COMPRESSION_TYPE_none:
19 case BCH_COMPRESSION_TYPE_incompressible:
20 return BCH_COMPRESSION_OPT_none;
21 case BCH_COMPRESSION_TYPE_lz4_old:
22 case BCH_COMPRESSION_TYPE_lz4:
23 return BCH_COMPRESSION_OPT_lz4;
24 case BCH_COMPRESSION_TYPE_gzip:
25 return BCH_COMPRESSION_OPT_gzip;
26 case BCH_COMPRESSION_TYPE_zstd:
27 return BCH_COMPRESSION_OPT_zstd;
45 static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
49 BUG_ON(size > c->opts.encoded_extent_max);
51 b = kmalloc(size, GFP_NOFS|__GFP_NOWARN);
53 return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
55 b = mempool_alloc(&c->compression_bounce[rw], GFP_NOFS);
57 return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
62 static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
65 struct bvec_iter iter;
66 void *expected_start = NULL;
68 __bio_for_each_bvec(bv, bio, iter, start) {
70 expected_start != page_address(bv.bv_page) + bv.bv_offset)
73 expected_start = page_address(bv.bv_page) +
74 bv.bv_offset + bv.bv_len;
80 static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
81 struct bvec_iter start, int rw)
85 struct bvec_iter iter;
86 unsigned nr_pages = 0;
87 struct page *stack_pages[16];
88 struct page **pages = NULL;
91 BUG_ON(start.bi_size > c->opts.encoded_extent_max);
93 if (!PageHighMem(bio_iter_page(bio, start)) &&
94 bio_phys_contig(bio, start))
95 return (struct bbuf) {
96 .b = page_address(bio_iter_page(bio, start)) +
97 bio_iter_offset(bio, start),
98 .type = BB_NONE, .rw = rw
101 /* check if we can map the pages contiguously: */
102 __bio_for_each_segment(bv, bio, iter, start) {
103 if (iter.bi_size != start.bi_size &&
107 if (bv.bv_len < iter.bi_size &&
108 bv.bv_offset + bv.bv_len < PAGE_SIZE)
114 BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
116 pages = nr_pages > ARRAY_SIZE(stack_pages)
117 ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS)
123 __bio_for_each_segment(bv, bio, iter, start)
124 pages[nr_pages++] = bv.bv_page;
126 data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
127 if (pages != stack_pages)
131 return (struct bbuf) {
132 .b = data + bio_iter_offset(bio, start),
133 .type = BB_VMAP, .rw = rw
136 ret = __bounce_alloc(c, start.bi_size, rw);
139 memcpy_from_bio(ret.b, bio, start);
144 static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw)
146 return __bio_map_or_bounce(c, bio, bio->bi_iter, rw);
149 static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf)
155 vunmap((void *) ((unsigned long) buf.b & PAGE_MASK));
161 mempool_free(buf.b, &c->compression_bounce[buf.rw]);
166 static inline void zlib_set_workspace(z_stream *strm, void *workspace)
169 strm->workspace = workspace;
173 static int __bio_uncompress(struct bch_fs *c, struct bio *src,
174 void *dst_data, struct bch_extent_crc_unpacked crc)
176 struct bbuf src_data = { NULL };
177 size_t src_len = src->bi_iter.bi_size;
178 size_t dst_len = crc.uncompressed_size << 9;
182 enum bch_compression_opts opt = bch2_compression_type_to_opt(crc.compression_type);
183 mempool_t *workspace_pool = &c->compress_workspace[opt];
184 if (unlikely(!mempool_initialized(workspace_pool))) {
185 if (fsck_err(c, compression_type_not_marked_in_sb,
186 "compression type %s set but not marked in superblock",
187 __bch2_compression_types[crc.compression_type]))
188 ret = bch2_check_set_has_compressed_data(c, opt);
190 ret = -BCH_ERR_compression_workspace_not_initialized;
195 src_data = bio_map_or_bounce(c, src, READ);
197 switch (crc.compression_type) {
198 case BCH_COMPRESSION_TYPE_lz4_old:
199 case BCH_COMPRESSION_TYPE_lz4:
200 ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
201 src_len, dst_len, dst_len);
205 case BCH_COMPRESSION_TYPE_gzip: {
207 .next_in = src_data.b,
209 .next_out = dst_data,
210 .avail_out = dst_len,
213 workspace = mempool_alloc(workspace_pool, GFP_NOFS);
215 zlib_set_workspace(&strm, workspace);
216 zlib_inflateInit2(&strm, -MAX_WBITS);
217 ret = zlib_inflate(&strm, Z_FINISH);
219 mempool_free(workspace, workspace_pool);
221 if (ret != Z_STREAM_END)
225 case BCH_COMPRESSION_TYPE_zstd: {
227 size_t real_src_len = le32_to_cpup(src_data.b);
229 if (real_src_len > src_len - 4)
232 workspace = mempool_alloc(workspace_pool, GFP_NOFS);
233 ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound());
235 ret = zstd_decompress_dctx(ctx,
237 src_data.b + 4, real_src_len);
239 mempool_free(workspace, workspace_pool);
251 bio_unmap_or_unbounce(c, src_data);
258 int bch2_bio_uncompress_inplace(struct bch_write_op *op,
261 struct bch_fs *c = op->c;
262 struct bch_extent_crc_unpacked *crc = &op->crc;
263 struct bbuf data = { NULL };
264 size_t dst_len = crc->uncompressed_size << 9;
267 /* bio must own its pages: */
268 BUG_ON(!bio->bi_vcnt);
269 BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);
271 if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max ||
272 crc->compressed_size << 9 > c->opts.encoded_extent_max) {
273 struct printbuf buf = PRINTBUF;
274 bch2_write_op_error(&buf, op);
275 prt_printf(&buf, "error rewriting existing data: extent too big");
276 bch_err_ratelimited(c, "%s", buf.buf);
281 data = __bounce_alloc(c, dst_len, WRITE);
283 if (__bio_uncompress(c, bio, data.b, *crc)) {
284 if (!c->opts.no_data_io) {
285 struct printbuf buf = PRINTBUF;
286 bch2_write_op_error(&buf, op);
287 prt_printf(&buf, "error rewriting existing data: decompression error");
288 bch_err_ratelimited(c, "%s", buf.buf);
296 * XXX: don't have a good way to assert that the bio was allocated with
297 * enough space, we depend on bch2_move_extent doing the right thing
299 bio->bi_iter.bi_size = crc->live_size << 9;
301 memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9));
304 crc->compression_type = 0;
305 crc->compressed_size = crc->live_size;
306 crc->uncompressed_size = crc->live_size;
308 crc->csum = (struct bch_csum) { 0, 0 };
310 bio_unmap_or_unbounce(c, data);
314 int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
315 struct bio *dst, struct bvec_iter dst_iter,
316 struct bch_extent_crc_unpacked crc)
318 struct bbuf dst_data = { NULL };
319 size_t dst_len = crc.uncompressed_size << 9;
322 if (crc.uncompressed_size << 9 > c->opts.encoded_extent_max ||
323 crc.compressed_size << 9 > c->opts.encoded_extent_max)
326 dst_data = dst_len == dst_iter.bi_size
327 ? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
328 : __bounce_alloc(c, dst_len, WRITE);
330 ret = __bio_uncompress(c, src, dst_data.b, crc);
334 if (dst_data.type != BB_NONE &&
335 dst_data.type != BB_VMAP)
336 memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
338 bio_unmap_or_unbounce(c, dst_data);
342 static int attempt_compress(struct bch_fs *c,
344 void *dst, size_t dst_len,
345 void *src, size_t src_len,
346 struct bch_compression_opt compression)
348 enum bch_compression_type compression_type =
349 __bch2_compression_opt_to_type[compression.type];
351 switch (compression_type) {
352 case BCH_COMPRESSION_TYPE_lz4:
353 if (compression.level < LZ4HC_MIN_CLEVEL) {
355 int ret = LZ4_compress_destSize(
364 int ret = LZ4_compress_HC(
372 case BCH_COMPRESSION_TYPE_gzip: {
377 .avail_out = dst_len,
380 zlib_set_workspace(&strm, workspace);
381 zlib_deflateInit2(&strm,
383 ? clamp_t(unsigned, compression.level,
384 Z_BEST_SPEED, Z_BEST_COMPRESSION)
385 : Z_DEFAULT_COMPRESSION,
386 Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
389 if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END)
392 if (zlib_deflateEnd(&strm) != Z_OK)
395 return strm.total_out;
397 case BCH_COMPRESSION_TYPE_zstd: {
400 * zstd max compression level is 22, our max level is 15
402 unsigned level = min((compression.level * 3) / 2, zstd_max_clevel());
403 ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max);
404 ZSTD_CCtx *ctx = zstd_init_cctx(workspace, c->zstd_workspace_size);
407 * ZSTD requires that when we decompress we pass in the exact
408 * compressed size - rounding it up to the nearest sector
409 * doesn't work, so we use the first 4 bytes of the buffer for
412 * Additionally, the ZSTD code seems to have a bug where it will
413 * write just past the end of the buffer - so subtract a fudge
414 * factor (7 bytes) from the dst buffer size to account for
417 size_t len = zstd_compress_cctx(ctx,
418 dst + 4, dst_len - 4 - 7,
421 if (zstd_is_error(len))
424 *((__le32 *) dst) = cpu_to_le32(len);
432 static unsigned __bio_compress(struct bch_fs *c,
433 struct bio *dst, size_t *dst_len,
434 struct bio *src, size_t *src_len,
435 struct bch_compression_opt compression)
437 struct bbuf src_data = { NULL }, dst_data = { NULL };
439 enum bch_compression_type compression_type =
440 __bch2_compression_opt_to_type[compression.type];
444 /* bch2_compression_decode catches unknown compression types: */
445 BUG_ON(compression.type >= BCH_COMPRESSION_OPT_NR);
447 mempool_t *workspace_pool = &c->compress_workspace[compression.type];
448 if (unlikely(!mempool_initialized(workspace_pool))) {
449 if (fsck_err(c, compression_opt_not_marked_in_sb,
450 "compression opt %s set but not marked in superblock",
451 bch2_compression_opts[compression.type])) {
452 ret = bch2_check_set_has_compressed_data(c, compression.type);
453 if (ret) /* memory allocation failure, don't compress */
460 /* If it's only one block, don't bother trying to compress: */
461 if (src->bi_iter.bi_size <= c->opts.block_size)
462 return BCH_COMPRESSION_TYPE_incompressible;
464 dst_data = bio_map_or_bounce(c, dst, WRITE);
465 src_data = bio_map_or_bounce(c, src, READ);
467 workspace = mempool_alloc(workspace_pool, GFP_NOFS);
469 *src_len = src->bi_iter.bi_size;
470 *dst_len = dst->bi_iter.bi_size;
473 * XXX: this algorithm sucks when the compression code doesn't tell us
474 * how much would fit, like LZ4 does:
477 if (*src_len <= block_bytes(c)) {
482 ret = attempt_compress(c, workspace,
483 dst_data.b, *dst_len,
484 src_data.b, *src_len,
492 /* Didn't fit: should we retry with a smaller amount? */
493 if (*src_len <= *dst_len) {
499 * If ret is negative, it's a hint as to how much data would fit
501 BUG_ON(-ret >= *src_len);
506 *src_len -= (*src_len - *dst_len) / 2;
507 *src_len = round_down(*src_len, block_bytes(c));
510 mempool_free(workspace, workspace_pool);
515 /* Didn't get smaller: */
516 if (round_up(*dst_len, block_bytes(c)) >= *src_len)
519 pad = round_up(*dst_len, block_bytes(c)) - *dst_len;
521 memset(dst_data.b + *dst_len, 0, pad);
524 if (dst_data.type != BB_NONE &&
525 dst_data.type != BB_VMAP)
526 memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
528 BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
529 BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
530 BUG_ON(*dst_len & (block_bytes(c) - 1));
531 BUG_ON(*src_len & (block_bytes(c) - 1));
532 ret = compression_type;
534 bio_unmap_or_unbounce(c, src_data);
535 bio_unmap_or_unbounce(c, dst_data);
538 ret = BCH_COMPRESSION_TYPE_incompressible;
545 unsigned bch2_bio_compress(struct bch_fs *c,
546 struct bio *dst, size_t *dst_len,
547 struct bio *src, size_t *src_len,
548 unsigned compression_opt)
550 unsigned orig_dst = dst->bi_iter.bi_size;
551 unsigned orig_src = src->bi_iter.bi_size;
552 unsigned compression_type;
554 /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
555 src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
556 c->opts.encoded_extent_max);
557 /* Don't generate a bigger output than input: */
558 dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
561 __bio_compress(c, dst, dst_len, src, src_len,
562 bch2_compression_decode(compression_opt));
564 dst->bi_iter.bi_size = orig_dst;
565 src->bi_iter.bi_size = orig_src;
566 return compression_type;
569 static int __bch2_fs_compress_init(struct bch_fs *, u64);
571 #define BCH_FEATURE_none 0
573 static const unsigned bch2_compression_opt_to_feature[] = {
574 #define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
575 BCH_COMPRESSION_OPTS()
579 #undef BCH_FEATURE_none
581 static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
585 if ((c->sb.features & f) == f)
588 mutex_lock(&c->sb_lock);
590 if ((c->sb.features & f) == f) {
591 mutex_unlock(&c->sb_lock);
595 ret = __bch2_fs_compress_init(c, c->sb.features|f);
597 mutex_unlock(&c->sb_lock);
601 c->disk_sb.sb->features[0] |= cpu_to_le64(f);
603 mutex_unlock(&c->sb_lock);
608 int bch2_check_set_has_compressed_data(struct bch_fs *c,
609 unsigned compression_opt)
611 unsigned compression_type = bch2_compression_decode(compression_opt).type;
613 BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
615 return compression_type
616 ? __bch2_check_set_has_compressed_data(c,
617 1ULL << bch2_compression_opt_to_feature[compression_type])
621 void bch2_fs_compress_exit(struct bch_fs *c)
625 for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++)
626 mempool_exit(&c->compress_workspace[i]);
627 mempool_exit(&c->compression_bounce[WRITE]);
628 mempool_exit(&c->compression_bounce[READ]);
631 static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
633 ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
634 c->opts.encoded_extent_max);
636 c->zstd_workspace_size = zstd_cctx_workspace_bound(¶ms.cParams);
640 enum bch_compression_opts type;
641 size_t compress_workspace;
642 } compression_types[] = {
643 { BCH_FEATURE_lz4, BCH_COMPRESSION_OPT_lz4,
644 max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS) },
645 { BCH_FEATURE_gzip, BCH_COMPRESSION_OPT_gzip,
646 max(zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
647 zlib_inflate_workspacesize()) },
648 { BCH_FEATURE_zstd, BCH_COMPRESSION_OPT_zstd,
649 max(c->zstd_workspace_size,
650 zstd_dctx_workspace_bound()) },
652 bool have_compressed = false;
654 for (i = compression_types;
655 i < compression_types + ARRAY_SIZE(compression_types);
657 have_compressed |= (features & (1 << i->feature)) != 0;
659 if (!have_compressed)
662 if (!mempool_initialized(&c->compression_bounce[READ]) &&
663 mempool_init_kvmalloc_pool(&c->compression_bounce[READ],
664 1, c->opts.encoded_extent_max))
665 return -BCH_ERR_ENOMEM_compression_bounce_read_init;
667 if (!mempool_initialized(&c->compression_bounce[WRITE]) &&
668 mempool_init_kvmalloc_pool(&c->compression_bounce[WRITE],
669 1, c->opts.encoded_extent_max))
670 return -BCH_ERR_ENOMEM_compression_bounce_write_init;
672 for (i = compression_types;
673 i < compression_types + ARRAY_SIZE(compression_types);
675 if (!(features & (1 << i->feature)))
678 if (mempool_initialized(&c->compress_workspace[i->type]))
681 if (mempool_init_kvmalloc_pool(
682 &c->compress_workspace[i->type],
683 1, i->compress_workspace))
684 return -BCH_ERR_ENOMEM_compression_workspace_init;
690 static u64 compression_opt_to_feature(unsigned v)
692 unsigned type = bch2_compression_decode(v).type;
694 return BIT_ULL(bch2_compression_opt_to_feature[type]);
697 int bch2_fs_compress_init(struct bch_fs *c)
699 u64 f = c->sb.features;
701 f |= compression_opt_to_feature(c->opts.compression);
702 f |= compression_opt_to_feature(c->opts.background_compression);
704 return __bch2_fs_compress_init(c, f);
707 int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res,
708 struct printbuf *err)
710 char *val = kstrdup(_val, GFP_KERNEL);
711 char *p = val, *type_str, *level_str;
712 struct bch_compression_opt opt = { 0 };
718 type_str = strsep(&p, ":");
721 ret = match_string(bch2_compression_opts, -1, type_str);
723 prt_str(err, "invalid compression type");
732 ret = kstrtouint(level_str, 10, &level);
733 if (!ret && !opt.type && level)
735 if (!ret && level > 15)
738 prt_str(err, "invalid compression level");
745 *res = bch2_compression_encode(opt);
751 void bch2_compression_opt_to_text(struct printbuf *out, u64 v)
753 struct bch_compression_opt opt = bch2_compression_decode(v);
755 if (opt.type < BCH_COMPRESSION_OPT_NR)
756 prt_str(out, bch2_compression_opts[opt.type]);
758 prt_printf(out, "(unknown compression opt %u)", opt.type);
760 prt_printf(out, ":%u", opt.level);
763 void bch2_opt_compression_to_text(struct printbuf *out,
768 return bch2_compression_opt_to_text(out, v);
771 int bch2_opt_compression_validate(u64 v, struct printbuf *err)
773 if (!bch2_compression_opt_valid(v)) {
774 prt_printf(err, "invalid compression opt %llu", v);
775 return -BCH_ERR_invalid_sb_opt_compression;