]>
Commit | Line | Data |
---|---|---|
4d6d6a2c JG |
1 | /* |
2 | * Glue Code for the AVX assembler implemention of the Cast5 Cipher | |
3 | * | |
4 | * Copyright (C) 2012 Johannes Goetzfried | |
5 | * <[email protected]> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | |
20 | * USA | |
21 | * | |
22 | */ | |
23 | ||
24 | #include <linux/module.h> | |
25 | #include <linux/hardirq.h> | |
26 | #include <linux/types.h> | |
27 | #include <linux/crypto.h> | |
28 | #include <linux/err.h> | |
29 | #include <crypto/algapi.h> | |
30 | #include <crypto/cast5.h> | |
31 | #include <crypto/cryptd.h> | |
32 | #include <crypto/ctr.h> | |
33 | #include <asm/xcr.h> | |
34 | #include <asm/xsave.h> | |
35 | #include <asm/crypto/ablk_helper.h> | |
36 | #include <asm/crypto/glue_helper.h> | |
37 | ||
38 | #define CAST5_PARALLEL_BLOCKS 16 | |
39 | ||
c12ab20b | 40 | asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst, |
4d6d6a2c | 41 | const u8 *src); |
c12ab20b JK |
42 | asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst, |
43 | const u8 *src); | |
44 | asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst, | |
45 | const u8 *src); | |
46 | asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src, | |
47 | __be64 *iv); | |
4d6d6a2c JG |
48 | |
49 | static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes) | |
50 | { | |
51 | return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, | |
52 | NULL, fpu_enabled, nbytes); | |
53 | } | |
54 | ||
55 | static inline void cast5_fpu_end(bool fpu_enabled) | |
56 | { | |
57 | return glue_fpu_end(fpu_enabled); | |
58 | } | |
59 | ||
60 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | |
61 | bool enc) | |
62 | { | |
63 | bool fpu_enabled = false; | |
64 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | |
65 | const unsigned int bsize = CAST5_BLOCK_SIZE; | |
66 | unsigned int nbytes; | |
c12ab20b | 67 | void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); |
4d6d6a2c JG |
68 | int err; |
69 | ||
c12ab20b JK |
70 | fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; |
71 | ||
4d6d6a2c JG |
72 | err = blkcipher_walk_virt(desc, walk); |
73 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
74 | ||
75 | while ((nbytes = walk->nbytes)) { | |
76 | u8 *wsrc = walk->src.virt.addr; | |
77 | u8 *wdst = walk->dst.virt.addr; | |
78 | ||
79 | fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
80 | ||
81 | /* Process multi-block batch */ | |
82 | if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { | |
83 | do { | |
c12ab20b | 84 | fn(ctx, wdst, wsrc); |
4d6d6a2c JG |
85 | |
86 | wsrc += bsize * CAST5_PARALLEL_BLOCKS; | |
87 | wdst += bsize * CAST5_PARALLEL_BLOCKS; | |
88 | nbytes -= bsize * CAST5_PARALLEL_BLOCKS; | |
89 | } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); | |
90 | ||
91 | if (nbytes < bsize) | |
92 | goto done; | |
93 | } | |
94 | ||
c12ab20b JK |
95 | fn = (enc) ? __cast5_encrypt : __cast5_decrypt; |
96 | ||
4d6d6a2c JG |
97 | /* Handle leftovers */ |
98 | do { | |
c12ab20b | 99 | fn(ctx, wdst, wsrc); |
4d6d6a2c JG |
100 | |
101 | wsrc += bsize; | |
102 | wdst += bsize; | |
103 | nbytes -= bsize; | |
104 | } while (nbytes >= bsize); | |
105 | ||
106 | done: | |
107 | err = blkcipher_walk_done(desc, walk, nbytes); | |
108 | } | |
109 | ||
110 | cast5_fpu_end(fpu_enabled); | |
111 | return err; | |
112 | } | |
113 | ||
114 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
115 | struct scatterlist *src, unsigned int nbytes) | |
116 | { | |
117 | struct blkcipher_walk walk; | |
118 | ||
119 | blkcipher_walk_init(&walk, dst, src, nbytes); | |
120 | return ecb_crypt(desc, &walk, true); | |
121 | } | |
122 | ||
123 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
124 | struct scatterlist *src, unsigned int nbytes) | |
125 | { | |
126 | struct blkcipher_walk walk; | |
127 | ||
128 | blkcipher_walk_init(&walk, dst, src, nbytes); | |
129 | return ecb_crypt(desc, &walk, false); | |
130 | } | |
131 | ||
132 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | |
133 | struct blkcipher_walk *walk) | |
134 | { | |
135 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | |
136 | const unsigned int bsize = CAST5_BLOCK_SIZE; | |
137 | unsigned int nbytes = walk->nbytes; | |
138 | u64 *src = (u64 *)walk->src.virt.addr; | |
139 | u64 *dst = (u64 *)walk->dst.virt.addr; | |
140 | u64 *iv = (u64 *)walk->iv; | |
141 | ||
142 | do { | |
143 | *dst = *src ^ *iv; | |
144 | __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); | |
145 | iv = dst; | |
146 | ||
147 | src += 1; | |
148 | dst += 1; | |
149 | nbytes -= bsize; | |
150 | } while (nbytes >= bsize); | |
151 | ||
200429cc | 152 | *(u64 *)walk->iv = *iv; |
4d6d6a2c JG |
153 | return nbytes; |
154 | } | |
155 | ||
156 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
157 | struct scatterlist *src, unsigned int nbytes) | |
158 | { | |
159 | struct blkcipher_walk walk; | |
160 | int err; | |
161 | ||
162 | blkcipher_walk_init(&walk, dst, src, nbytes); | |
163 | err = blkcipher_walk_virt(desc, &walk); | |
164 | ||
165 | while ((nbytes = walk.nbytes)) { | |
166 | nbytes = __cbc_encrypt(desc, &walk); | |
167 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
168 | } | |
169 | ||
170 | return err; | |
171 | } | |
172 | ||
173 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | |
174 | struct blkcipher_walk *walk) | |
175 | { | |
176 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | |
177 | const unsigned int bsize = CAST5_BLOCK_SIZE; | |
178 | unsigned int nbytes = walk->nbytes; | |
179 | u64 *src = (u64 *)walk->src.virt.addr; | |
180 | u64 *dst = (u64 *)walk->dst.virt.addr; | |
4d6d6a2c | 181 | u64 last_iv; |
4d6d6a2c JG |
182 | |
183 | /* Start of the last block. */ | |
184 | src += nbytes / bsize - 1; | |
185 | dst += nbytes / bsize - 1; | |
186 | ||
187 | last_iv = *src; | |
188 | ||
189 | /* Process multi-block batch */ | |
190 | if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { | |
191 | do { | |
192 | nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); | |
193 | src -= CAST5_PARALLEL_BLOCKS - 1; | |
194 | dst -= CAST5_PARALLEL_BLOCKS - 1; | |
195 | ||
c12ab20b | 196 | cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src); |
4d6d6a2c JG |
197 | |
198 | nbytes -= bsize; | |
199 | if (nbytes < bsize) | |
200 | goto done; | |
201 | ||
202 | *dst ^= *(src - 1); | |
203 | src -= 1; | |
204 | dst -= 1; | |
205 | } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); | |
206 | ||
207 | if (nbytes < bsize) | |
208 | goto done; | |
209 | } | |
210 | ||
211 | /* Handle leftovers */ | |
212 | for (;;) { | |
213 | __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); | |
214 | ||
215 | nbytes -= bsize; | |
216 | if (nbytes < bsize) | |
217 | break; | |
218 | ||
219 | *dst ^= *(src - 1); | |
220 | src -= 1; | |
221 | dst -= 1; | |
222 | } | |
223 | ||
224 | done: | |
225 | *dst ^= *(u64 *)walk->iv; | |
226 | *(u64 *)walk->iv = last_iv; | |
227 | ||
228 | return nbytes; | |
229 | } | |
230 | ||
231 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
232 | struct scatterlist *src, unsigned int nbytes) | |
233 | { | |
234 | bool fpu_enabled = false; | |
235 | struct blkcipher_walk walk; | |
236 | int err; | |
237 | ||
238 | blkcipher_walk_init(&walk, dst, src, nbytes); | |
239 | err = blkcipher_walk_virt(desc, &walk); | |
240 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
241 | ||
242 | while ((nbytes = walk.nbytes)) { | |
243 | fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
244 | nbytes = __cbc_decrypt(desc, &walk); | |
245 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
246 | } | |
247 | ||
248 | cast5_fpu_end(fpu_enabled); | |
249 | return err; | |
250 | } | |
251 | ||
252 | static void ctr_crypt_final(struct blkcipher_desc *desc, | |
253 | struct blkcipher_walk *walk) | |
254 | { | |
255 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | |
256 | u8 *ctrblk = walk->iv; | |
257 | u8 keystream[CAST5_BLOCK_SIZE]; | |
258 | u8 *src = walk->src.virt.addr; | |
259 | u8 *dst = walk->dst.virt.addr; | |
260 | unsigned int nbytes = walk->nbytes; | |
261 | ||
262 | __cast5_encrypt(ctx, keystream, ctrblk); | |
263 | crypto_xor(keystream, src, nbytes); | |
264 | memcpy(dst, keystream, nbytes); | |
265 | ||
266 | crypto_inc(ctrblk, CAST5_BLOCK_SIZE); | |
267 | } | |
268 | ||
269 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | |
270 | struct blkcipher_walk *walk) | |
271 | { | |
272 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | |
273 | const unsigned int bsize = CAST5_BLOCK_SIZE; | |
274 | unsigned int nbytes = walk->nbytes; | |
275 | u64 *src = (u64 *)walk->src.virt.addr; | |
276 | u64 *dst = (u64 *)walk->dst.virt.addr; | |
4d6d6a2c JG |
277 | |
278 | /* Process multi-block batch */ | |
279 | if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { | |
280 | do { | |
c12ab20b JK |
281 | cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src, |
282 | (__be64 *)walk->iv); | |
4d6d6a2c JG |
283 | |
284 | src += CAST5_PARALLEL_BLOCKS; | |
285 | dst += CAST5_PARALLEL_BLOCKS; | |
286 | nbytes -= bsize * CAST5_PARALLEL_BLOCKS; | |
287 | } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); | |
288 | ||
289 | if (nbytes < bsize) | |
290 | goto done; | |
291 | } | |
292 | ||
293 | /* Handle leftovers */ | |
294 | do { | |
c12ab20b JK |
295 | u64 ctrblk; |
296 | ||
4d6d6a2c JG |
297 | if (dst != src) |
298 | *dst = *src; | |
299 | ||
c12ab20b JK |
300 | ctrblk = *(u64 *)walk->iv; |
301 | be64_add_cpu((__be64 *)walk->iv, 1); | |
4d6d6a2c | 302 | |
c12ab20b JK |
303 | __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); |
304 | *dst ^= ctrblk; | |
4d6d6a2c JG |
305 | |
306 | src += 1; | |
307 | dst += 1; | |
308 | nbytes -= bsize; | |
309 | } while (nbytes >= bsize); | |
310 | ||
311 | done: | |
4d6d6a2c JG |
312 | return nbytes; |
313 | } | |
314 | ||
315 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
316 | struct scatterlist *src, unsigned int nbytes) | |
317 | { | |
318 | bool fpu_enabled = false; | |
319 | struct blkcipher_walk walk; | |
320 | int err; | |
321 | ||
322 | blkcipher_walk_init(&walk, dst, src, nbytes); | |
323 | err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE); | |
324 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
325 | ||
326 | while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { | |
327 | fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
328 | nbytes = __ctr_crypt(desc, &walk); | |
329 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
330 | } | |
331 | ||
332 | cast5_fpu_end(fpu_enabled); | |
333 | ||
334 | if (walk.nbytes) { | |
335 | ctr_crypt_final(desc, &walk); | |
336 | err = blkcipher_walk_done(desc, &walk, 0); | |
337 | } | |
338 | ||
339 | return err; | |
340 | } | |
341 | ||
342 | ||
343 | static struct crypto_alg cast5_algs[6] = { { | |
344 | .cra_name = "__ecb-cast5-avx", | |
345 | .cra_driver_name = "__driver-ecb-cast5-avx", | |
346 | .cra_priority = 0, | |
347 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | |
348 | .cra_blocksize = CAST5_BLOCK_SIZE, | |
349 | .cra_ctxsize = sizeof(struct cast5_ctx), | |
350 | .cra_alignmask = 0, | |
351 | .cra_type = &crypto_blkcipher_type, | |
352 | .cra_module = THIS_MODULE, | |
353 | .cra_u = { | |
354 | .blkcipher = { | |
355 | .min_keysize = CAST5_MIN_KEY_SIZE, | |
356 | .max_keysize = CAST5_MAX_KEY_SIZE, | |
357 | .setkey = cast5_setkey, | |
358 | .encrypt = ecb_encrypt, | |
359 | .decrypt = ecb_decrypt, | |
360 | }, | |
361 | }, | |
362 | }, { | |
363 | .cra_name = "__cbc-cast5-avx", | |
364 | .cra_driver_name = "__driver-cbc-cast5-avx", | |
365 | .cra_priority = 0, | |
366 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | |
367 | .cra_blocksize = CAST5_BLOCK_SIZE, | |
368 | .cra_ctxsize = sizeof(struct cast5_ctx), | |
369 | .cra_alignmask = 0, | |
370 | .cra_type = &crypto_blkcipher_type, | |
371 | .cra_module = THIS_MODULE, | |
372 | .cra_u = { | |
373 | .blkcipher = { | |
374 | .min_keysize = CAST5_MIN_KEY_SIZE, | |
375 | .max_keysize = CAST5_MAX_KEY_SIZE, | |
376 | .setkey = cast5_setkey, | |
377 | .encrypt = cbc_encrypt, | |
378 | .decrypt = cbc_decrypt, | |
379 | }, | |
380 | }, | |
381 | }, { | |
382 | .cra_name = "__ctr-cast5-avx", | |
383 | .cra_driver_name = "__driver-ctr-cast5-avx", | |
384 | .cra_priority = 0, | |
385 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | |
386 | .cra_blocksize = 1, | |
387 | .cra_ctxsize = sizeof(struct cast5_ctx), | |
388 | .cra_alignmask = 0, | |
389 | .cra_type = &crypto_blkcipher_type, | |
390 | .cra_module = THIS_MODULE, | |
391 | .cra_u = { | |
392 | .blkcipher = { | |
393 | .min_keysize = CAST5_MIN_KEY_SIZE, | |
394 | .max_keysize = CAST5_MAX_KEY_SIZE, | |
395 | .ivsize = CAST5_BLOCK_SIZE, | |
396 | .setkey = cast5_setkey, | |
397 | .encrypt = ctr_crypt, | |
398 | .decrypt = ctr_crypt, | |
399 | }, | |
400 | }, | |
401 | }, { | |
402 | .cra_name = "ecb(cast5)", | |
403 | .cra_driver_name = "ecb-cast5-avx", | |
404 | .cra_priority = 200, | |
405 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | |
406 | .cra_blocksize = CAST5_BLOCK_SIZE, | |
407 | .cra_ctxsize = sizeof(struct async_helper_ctx), | |
408 | .cra_alignmask = 0, | |
409 | .cra_type = &crypto_ablkcipher_type, | |
410 | .cra_module = THIS_MODULE, | |
411 | .cra_init = ablk_init, | |
412 | .cra_exit = ablk_exit, | |
413 | .cra_u = { | |
414 | .ablkcipher = { | |
415 | .min_keysize = CAST5_MIN_KEY_SIZE, | |
416 | .max_keysize = CAST5_MAX_KEY_SIZE, | |
417 | .setkey = ablk_set_key, | |
418 | .encrypt = ablk_encrypt, | |
419 | .decrypt = ablk_decrypt, | |
420 | }, | |
421 | }, | |
422 | }, { | |
423 | .cra_name = "cbc(cast5)", | |
424 | .cra_driver_name = "cbc-cast5-avx", | |
425 | .cra_priority = 200, | |
426 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | |
427 | .cra_blocksize = CAST5_BLOCK_SIZE, | |
428 | .cra_ctxsize = sizeof(struct async_helper_ctx), | |
429 | .cra_alignmask = 0, | |
430 | .cra_type = &crypto_ablkcipher_type, | |
431 | .cra_module = THIS_MODULE, | |
432 | .cra_init = ablk_init, | |
433 | .cra_exit = ablk_exit, | |
434 | .cra_u = { | |
435 | .ablkcipher = { | |
436 | .min_keysize = CAST5_MIN_KEY_SIZE, | |
437 | .max_keysize = CAST5_MAX_KEY_SIZE, | |
438 | .ivsize = CAST5_BLOCK_SIZE, | |
439 | .setkey = ablk_set_key, | |
440 | .encrypt = __ablk_encrypt, | |
441 | .decrypt = ablk_decrypt, | |
442 | }, | |
443 | }, | |
444 | }, { | |
445 | .cra_name = "ctr(cast5)", | |
446 | .cra_driver_name = "ctr-cast5-avx", | |
447 | .cra_priority = 200, | |
448 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | |
449 | .cra_blocksize = 1, | |
450 | .cra_ctxsize = sizeof(struct async_helper_ctx), | |
451 | .cra_alignmask = 0, | |
452 | .cra_type = &crypto_ablkcipher_type, | |
453 | .cra_module = THIS_MODULE, | |
454 | .cra_init = ablk_init, | |
455 | .cra_exit = ablk_exit, | |
456 | .cra_u = { | |
457 | .ablkcipher = { | |
458 | .min_keysize = CAST5_MIN_KEY_SIZE, | |
459 | .max_keysize = CAST5_MAX_KEY_SIZE, | |
460 | .ivsize = CAST5_BLOCK_SIZE, | |
461 | .setkey = ablk_set_key, | |
462 | .encrypt = ablk_encrypt, | |
463 | .decrypt = ablk_encrypt, | |
464 | .geniv = "chainiv", | |
465 | }, | |
466 | }, | |
467 | } }; | |
468 | ||
469 | static int __init cast5_init(void) | |
470 | { | |
471 | u64 xcr0; | |
472 | ||
473 | if (!cpu_has_avx || !cpu_has_osxsave) { | |
474 | pr_info("AVX instructions are not detected.\n"); | |
475 | return -ENODEV; | |
476 | } | |
477 | ||
478 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | |
479 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | |
480 | pr_info("AVX detected but unusable.\n"); | |
481 | return -ENODEV; | |
482 | } | |
483 | ||
484 | return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); | |
485 | } | |
486 | ||
487 | static void __exit cast5_exit(void) | |
488 | { | |
489 | crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); | |
490 | } | |
491 | ||
492 | module_init(cast5_init); | |
493 | module_exit(cast5_exit); | |
494 | ||
495 | MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); | |
496 | MODULE_LICENSE("GPL"); | |
497 | MODULE_ALIAS("cast5"); |