2 * Cryptographic API for the NX-842 hardware compression.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * Copyright (C) IBM Corporation, 2011-2015
16 * Designer of the Power data compression engine:
24 * This is an interface to the NX-842 compression hardware in PowerPC
25 * processors. Most of the complexity of this drvier is due to the fact that
26 * the NX-842 compression hardware requires the input and output data buffers
27 * to be specifically aligned, to be a specific multiple in length, and within
28 * specific minimum and maximum lengths. Those restrictions, provided by the
29 * nx-842 driver via nx842_constraints, mean this driver must use bounce
30 * buffers and headers to correct misaligned in or out buffers, and to split
31 * input buffers that are too large.
33 * This driver will fall back to software decompression if the hardware
34 * decompression fails, so this driver's decompression should never fail as
35 * long as the provided compressed buffer is valid. Any compressed buffer
36 * created by this driver will have a header (except ones where the input
37 * perfectly matches the constraints); so users of this driver cannot simply
38 * pass a compressed buffer created by this driver over to the 842 software
39 * decompression library. Instead, users must use this driver to decompress;
40 * if the hardware fails or is unavailable, the compressed buffer will be
41 * parsed and the header removed, and the raw 842 buffer(s) passed to the 842
42 * software decompression library.
44 * This does not fall back to software compression, however, since the caller
45 * of this function is specifically requesting hardware compression; if the
46 * hardware compression fails, the caller can fall back to software
47 * compression, and the raw 842 compressed buffer that the software compressor
48 * creates can be passed to this driver for hardware decompression; any
49 * buffer without our specific header magic is assumed to be a raw 842 buffer
50 * and passed directly to the hardware. Note that the software compression
51 * library will produce a compressed buffer that is incompatible with the
52 * hardware decompressor if the original input buffer length is not a multiple
53 * of 8; if such a compressed buffer is passed to this driver for
54 * decompression, the hardware will reject it and this driver will then pass
55 * it over to the software library for decompression.
58 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
60 #include <linux/vmalloc.h>
61 #include <linux/sw842.h>
62 #include <linux/spinlock.h>
66 /* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
67 * template (see lib/842/842.h), so this magic number will never appear at
68 * the start of a raw 842 compressed buffer. That is important, as any buffer
69 * passed to us without this magic is assumed to be a raw 842 compressed
70 * buffer, and passed directly to the hardware to decompress.
72 #define NX842_CRYPTO_MAGIC (0xf842)
73 #define NX842_CRYPTO_HEADER_SIZE(g) \
74 (sizeof(struct nx842_crypto_header) + \
75 sizeof(struct nx842_crypto_header_group) * (g))
76 #define NX842_CRYPTO_HEADER_MAX_SIZE \
77 NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
79 /* bounce buffer size */
80 #define BOUNCE_BUFFER_ORDER (2)
81 #define BOUNCE_BUFFER_SIZE \
82 ((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
84 /* try longer on comp because we can fallback to sw decomp if hw is busy */
85 #define COMP_BUSY_TIMEOUT (250) /* ms */
86 #define DECOMP_BUSY_TIMEOUT (50) /* ms */
88 struct nx842_crypto_param {
96 static int update_param(struct nx842_crypto_param *p,
97 unsigned int slen, unsigned int dlen)
99 if (p->iremain < slen)
101 if (p->oremain < dlen)
113 int nx842_crypto_init(struct crypto_tfm *tfm, struct nx842_driver *driver)
115 struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
117 spin_lock_init(&ctx->lock);
118 ctx->driver = driver;
119 ctx->wmem = kzalloc(driver->workmem_size, GFP_KERNEL);
120 ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
121 ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
122 if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) {
124 free_page((unsigned long)ctx->sbounce);
125 free_page((unsigned long)ctx->dbounce);
131 EXPORT_SYMBOL_GPL(nx842_crypto_init);
133 void nx842_crypto_exit(struct crypto_tfm *tfm)
135 struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
138 free_page((unsigned long)ctx->sbounce);
139 free_page((unsigned long)ctx->dbounce);
141 EXPORT_SYMBOL_GPL(nx842_crypto_exit);
143 static void check_constraints(struct nx842_constraints *c)
145 /* limit maximum, to always have enough bounce buffer to decompress */
146 if (c->maximum > BOUNCE_BUFFER_SIZE)
147 c->maximum = BOUNCE_BUFFER_SIZE;
150 static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf)
152 int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
154 /* compress should have added space for header */
155 if (s > be16_to_cpu(hdr->group[0].padding)) {
156 pr_err("Internal error: no space for header\n");
162 print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0);
167 static int compress(struct nx842_crypto_ctx *ctx,
168 struct nx842_crypto_param *p,
169 struct nx842_crypto_header_group *g,
170 struct nx842_constraints *c,
172 unsigned int hdrsize)
174 unsigned int slen = p->iremain, dlen = p->oremain, tmplen;
175 unsigned int adj_slen = slen;
176 u8 *src = p->in, *dst = p->out;
183 if (p->oremain == 0 || hdrsize + c->minimum > dlen)
186 if (slen % c->multiple)
187 adj_slen = round_up(slen, c->multiple);
188 if (slen < c->minimum)
189 adj_slen = c->minimum;
190 if (slen > c->maximum)
191 adj_slen = slen = c->maximum;
192 if (adj_slen > slen || (u64)src % c->alignment) {
193 adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE);
194 slen = min(slen, BOUNCE_BUFFER_SIZE);
196 memset(ctx->sbounce + slen, 0, adj_slen - slen);
197 memcpy(ctx->sbounce, src, slen);
200 pr_debug("using comp sbounce buffer, len %x\n", slen);
206 if ((u64)dst % c->alignment) {
207 dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst);
211 if (dlen % c->multiple)
212 dlen = round_down(dlen, c->multiple);
213 if (dlen < c->minimum) {
216 dlen = min(p->oremain, BOUNCE_BUFFER_SIZE);
217 dlen = round_down(dlen, c->multiple);
219 pr_debug("using comp dbounce buffer, len %x\n", dlen);
221 if (dlen > c->maximum)
225 timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT);
227 dlen = tmplen; /* reset dlen, if we're retrying */
228 ret = ctx->driver->compress(src, slen, dst, &dlen, ctx->wmem);
229 /* possibly we should reduce the slen here, instead of
230 * retrying with the dbounce buffer?
232 if (ret == -ENOSPC && dst != ctx->dbounce)
234 } while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
240 if (dst == ctx->dbounce)
241 memcpy(p->out + dskip, dst, dlen);
243 g->padding = cpu_to_be16(dskip);
244 g->compressed_length = cpu_to_be32(dlen);
245 g->uncompressed_length = cpu_to_be32(slen);
247 if (p->iremain < slen) {
248 *ignore = slen - p->iremain;
252 pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
253 slen, *ignore, dlen, dskip);
255 return update_param(p, slen, dskip + dlen);
258 int nx842_crypto_compress(struct crypto_tfm *tfm,
259 const u8 *src, unsigned int slen,
260 u8 *dst, unsigned int *dlen)
262 struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
263 struct nx842_crypto_header *hdr = &ctx->header;
264 struct nx842_crypto_param p;
265 struct nx842_constraints c = *ctx->driver->constraints;
266 unsigned int groups, hdrsize, h;
271 check_constraints(&c);
281 groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX,
282 DIV_ROUND_UP(p.iremain, c.maximum));
283 hdrsize = NX842_CRYPTO_HEADER_SIZE(groups);
285 spin_lock_bh(&ctx->lock);
287 /* skip adding header if the buffers meet all constraints */
288 add_header = (p.iremain % c.multiple ||
289 p.iremain < c.minimum ||
290 p.iremain > c.maximum ||
291 (u64)p.in % c.alignment ||
292 p.oremain % c.multiple ||
293 p.oremain < c.minimum ||
294 p.oremain > c.maximum ||
295 (u64)p.out % c.alignment);
297 hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC);
301 while (p.iremain > 0) {
304 if (hdr->groups > NX842_CRYPTO_GROUP_MAX)
307 /* header goes before first group */
308 h = !n && add_header ? hdrsize : 0;
311 pr_warn("internal error, ignore is set %x\n", ignore);
313 ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h);
318 if (!add_header && hdr->groups > 1) {
319 pr_err("Internal error: No header but multiple groups\n");
324 /* ignore indicates the input stream needed to be padded */
325 hdr->ignore = cpu_to_be16(ignore);
327 pr_debug("marked %d bytes as ignore\n", ignore);
330 ret = nx842_crypto_add_header(hdr, dst);
336 pr_debug("compress total slen %x dlen %x\n", slen, *dlen);
339 spin_unlock_bh(&ctx->lock);
342 EXPORT_SYMBOL_GPL(nx842_crypto_compress);
344 static int decompress(struct nx842_crypto_ctx *ctx,
345 struct nx842_crypto_param *p,
346 struct nx842_crypto_header_group *g,
347 struct nx842_constraints *c,
350 unsigned int slen = be32_to_cpu(g->compressed_length);
351 unsigned int required_len = be32_to_cpu(g->uncompressed_length);
352 unsigned int dlen = p->oremain, tmplen;
353 unsigned int adj_slen = slen;
354 u8 *src = p->in, *dst = p->out;
355 u16 padding = be16_to_cpu(g->padding);
356 int ret, spadding = 0, dpadding = 0;
359 if (!slen || !required_len)
362 if (p->iremain <= 0 || padding + slen > p->iremain)
365 if (p->oremain <= 0 || required_len - ignore > p->oremain)
370 if (slen % c->multiple)
371 adj_slen = round_up(slen, c->multiple);
372 if (slen < c->minimum)
373 adj_slen = c->minimum;
374 if (slen > c->maximum)
376 if (slen < adj_slen || (u64)src % c->alignment) {
377 /* we can append padding bytes because the 842 format defines
378 * an "end" template (see lib/842/842_decompress.c) and will
379 * ignore any bytes following it.
382 memset(ctx->sbounce + slen, 0, adj_slen - slen);
383 memcpy(ctx->sbounce, src, slen);
385 spadding = adj_slen - slen;
387 pr_debug("using decomp sbounce buffer, len %x\n", slen);
390 if (dlen % c->multiple)
391 dlen = round_down(dlen, c->multiple);
392 if (dlen < required_len || (u64)dst % c->alignment) {
394 dlen = min(required_len, BOUNCE_BUFFER_SIZE);
395 pr_debug("using decomp dbounce buffer, len %x\n", dlen);
397 if (dlen < c->minimum)
399 if (dlen > c->maximum)
403 timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT);
405 dlen = tmplen; /* reset dlen, if we're retrying */
406 ret = ctx->driver->decompress(src, slen, dst, &dlen, ctx->wmem);
407 } while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
410 /* reset everything, sw doesn't have constraints */
411 src = p->in + padding;
412 slen = be32_to_cpu(g->compressed_length);
417 if (dlen < required_len) { /* have ignore bytes */
419 dlen = BOUNCE_BUFFER_SIZE;
421 pr_info_ratelimited("using software 842 decompression\n");
422 ret = sw842_decompress(src, slen, dst, &dlen);
431 pr_debug("ignoring last %x bytes\n", ignore);
433 if (dst == ctx->dbounce)
434 memcpy(p->out, dst, dlen);
436 pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
437 slen, padding, dlen, ignore);
439 return update_param(p, slen + padding, dlen);
442 int nx842_crypto_decompress(struct crypto_tfm *tfm,
443 const u8 *src, unsigned int slen,
444 u8 *dst, unsigned int *dlen)
446 struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
447 struct nx842_crypto_header *hdr;
448 struct nx842_crypto_param p;
449 struct nx842_constraints c = *ctx->driver->constraints;
453 check_constraints(&c);
463 hdr = (struct nx842_crypto_header *)src;
465 spin_lock_bh(&ctx->lock);
467 /* If it doesn't start with our header magic number, assume it's a raw
468 * 842 compressed buffer and pass it directly to the hardware driver
470 if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) {
471 struct nx842_crypto_header_group g = {
473 .compressed_length = cpu_to_be32(p.iremain),
474 .uncompressed_length = cpu_to_be32(p.oremain),
477 ret = decompress(ctx, &p, &g, &c, 0);
485 pr_err("header has no groups\n");
489 if (hdr->groups > NX842_CRYPTO_GROUP_MAX) {
490 pr_err("header has too many groups %x, max %x\n",
491 hdr->groups, NX842_CRYPTO_GROUP_MAX);
496 hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
497 if (hdr_len > slen) {
502 memcpy(&ctx->header, src, hdr_len);
505 for (n = 0; n < hdr->groups; n++) {
506 /* ignore applies to last group */
507 if (n + 1 == hdr->groups)
508 ignore = be16_to_cpu(hdr->ignore);
510 ret = decompress(ctx, &p, &hdr->group[n], &c, ignore);
518 pr_debug("decompress total slen %x dlen %x\n", slen, *dlen);
523 spin_unlock_bh(&ctx->lock);
527 EXPORT_SYMBOL_GPL(nx842_crypto_decompress);
529 MODULE_LICENSE("GPL");
530 MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Driver");