]> Git Repo - cpuminer-multi.git/blame - sha3/sph_blake.c
Avoid fetching too much work when LP is off
[cpuminer-multi.git] / sha3 / sph_blake.c
CommitLineData
b089cc9f
LJ
1/* $Id: blake.c 252 2011-06-07 17:55:14Z tp $ */
2/*
3 * BLAKE implementation.
4 *
5 * ==========================(LICENSE BEGIN)============================
6 *
7 * Copyright (c) 2007-2010 Projet RNRT SAPHIR
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be
18 * included in all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 *
28 * ===========================(LICENSE END)=============================
29 *
30 * @author Thomas Pornin <[email protected]>
31 */
32
33#include <stddef.h>
34#include <string.h>
35#include <limits.h>
36
37#include "sph_blake.h"
38
39#ifdef __cplusplus
40extern "C"{
41#endif
42
43#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_BLAKE
44#define SPH_SMALL_FOOTPRINT_BLAKE 1
45#endif
46
47#if SPH_SMALL_FOOTPRINT_BLAKE
48#define SPH_COMPACT_BLAKE_32 1
49#endif
50
51#if SPH_64 && (SPH_SMALL_FOOTPRINT_BLAKE || !SPH_64_TRUE)
52#define SPH_COMPACT_BLAKE_64 1
53#endif
54
55#ifdef _MSC_VER
56#pragma warning (disable: 4146)
57#endif
58
59static const sph_u32 IV224[8] = {
60 SPH_C32(0xC1059ED8), SPH_C32(0x367CD507),
61 SPH_C32(0x3070DD17), SPH_C32(0xF70E5939),
62 SPH_C32(0xFFC00B31), SPH_C32(0x68581511),
63 SPH_C32(0x64F98FA7), SPH_C32(0xBEFA4FA4)
64};
65
66static const sph_u32 IV256[8] = {
67 SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85),
68 SPH_C32(0x3C6EF372), SPH_C32(0xA54FF53A),
69 SPH_C32(0x510E527F), SPH_C32(0x9B05688C),
70 SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19)
71};
72
73#if SPH_64
74
75static const sph_u64 IV384[8] = {
76 SPH_C64(0xCBBB9D5DC1059ED8), SPH_C64(0x629A292A367CD507),
77 SPH_C64(0x9159015A3070DD17), SPH_C64(0x152FECD8F70E5939),
78 SPH_C64(0x67332667FFC00B31), SPH_C64(0x8EB44A8768581511),
79 SPH_C64(0xDB0C2E0D64F98FA7), SPH_C64(0x47B5481DBEFA4FA4)
80};
81
82static const sph_u64 IV512[8] = {
83 SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),
84 SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1),
85 SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F),
86 SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179)
87};
88
89#endif
90
91#if SPH_COMPACT_BLAKE_32 || SPH_COMPACT_BLAKE_64
92
93static const unsigned sigma[16][16] = {
94 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
95 { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
96 { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
97 { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
98 { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
99 { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
100 { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
101 { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
102 { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
103 { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
104 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
105 { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
106 { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
107 { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
108 { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
109 { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }
110};
111
112/*
113 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
114 14 10 4 8 9 15 13 6 1 12 0 2 11 7 5 3
115 11 8 12 0 5 2 15 13 10 14 3 6 7 1 9 4
116 7 9 3 1 13 12 11 14 2 6 5 10 4 0 15 8
117 9 0 5 7 2 4 10 15 14 1 11 12 6 8 3 13
118 2 12 6 10 0 11 8 3 4 13 7 5 15 14 1 9
119 12 5 1 15 14 13 4 10 0 7 6 3 9 2 8 11
120 13 11 7 14 12 1 3 9 5 0 15 4 8 6 2 10
121 6 15 14 9 11 3 0 8 12 2 13 7 1 4 10 5
122 10 2 8 4 7 6 1 5 15 11 9 14 3 12 13 0
123*/
124#endif
125
126#define Z00 0
127#define Z01 1
128#define Z02 2
129#define Z03 3
130#define Z04 4
131#define Z05 5
132#define Z06 6
133#define Z07 7
134#define Z08 8
135#define Z09 9
136#define Z0A A
137#define Z0B B
138#define Z0C C
139#define Z0D D
140#define Z0E E
141#define Z0F F
142
143#define Z10 E
144#define Z11 A
145#define Z12 4
146#define Z13 8
147#define Z14 9
148#define Z15 F
149#define Z16 D
150#define Z17 6
151#define Z18 1
152#define Z19 C
153#define Z1A 0
154#define Z1B 2
155#define Z1C B
156#define Z1D 7
157#define Z1E 5
158#define Z1F 3
159
160#define Z20 B
161#define Z21 8
162#define Z22 C
163#define Z23 0
164#define Z24 5
165#define Z25 2
166#define Z26 F
167#define Z27 D
168#define Z28 A
169#define Z29 E
170#define Z2A 3
171#define Z2B 6
172#define Z2C 7
173#define Z2D 1
174#define Z2E 9
175#define Z2F 4
176
177#define Z30 7
178#define Z31 9
179#define Z32 3
180#define Z33 1
181#define Z34 D
182#define Z35 C
183#define Z36 B
184#define Z37 E
185#define Z38 2
186#define Z39 6
187#define Z3A 5
188#define Z3B A
189#define Z3C 4
190#define Z3D 0
191#define Z3E F
192#define Z3F 8
193
194#define Z40 9
195#define Z41 0
196#define Z42 5
197#define Z43 7
198#define Z44 2
199#define Z45 4
200#define Z46 A
201#define Z47 F
202#define Z48 E
203#define Z49 1
204#define Z4A B
205#define Z4B C
206#define Z4C 6
207#define Z4D 8
208#define Z4E 3
209#define Z4F D
210
211#define Z50 2
212#define Z51 C
213#define Z52 6
214#define Z53 A
215#define Z54 0
216#define Z55 B
217#define Z56 8
218#define Z57 3
219#define Z58 4
220#define Z59 D
221#define Z5A 7
222#define Z5B 5
223#define Z5C F
224#define Z5D E
225#define Z5E 1
226#define Z5F 9
227
228#define Z60 C
229#define Z61 5
230#define Z62 1
231#define Z63 F
232#define Z64 E
233#define Z65 D
234#define Z66 4
235#define Z67 A
236#define Z68 0
237#define Z69 7
238#define Z6A 6
239#define Z6B 3
240#define Z6C 9
241#define Z6D 2
242#define Z6E 8
243#define Z6F B
244
245#define Z70 D
246#define Z71 B
247#define Z72 7
248#define Z73 E
249#define Z74 C
250#define Z75 1
251#define Z76 3
252#define Z77 9
253#define Z78 5
254#define Z79 0
255#define Z7A F
256#define Z7B 4
257#define Z7C 8
258#define Z7D 6
259#define Z7E 2
260#define Z7F A
261
262#define Z80 6
263#define Z81 F
264#define Z82 E
265#define Z83 9
266#define Z84 B
267#define Z85 3
268#define Z86 0
269#define Z87 8
270#define Z88 C
271#define Z89 2
272#define Z8A D
273#define Z8B 7
274#define Z8C 1
275#define Z8D 4
276#define Z8E A
277#define Z8F 5
278
279#define Z90 A
280#define Z91 2
281#define Z92 8
282#define Z93 4
283#define Z94 7
284#define Z95 6
285#define Z96 1
286#define Z97 5
287#define Z98 F
288#define Z99 B
289#define Z9A 9
290#define Z9B E
291#define Z9C 3
292#define Z9D C
293#define Z9E D
294#define Z9F 0
295
296#define Mx(r, i) Mx_(Z ## r ## i)
297#define Mx_(n) Mx__(n)
298#define Mx__(n) M ## n
299
300#define CSx(r, i) CSx_(Z ## r ## i)
301#define CSx_(n) CSx__(n)
302#define CSx__(n) CS ## n
303
304#define CS0 SPH_C32(0x243F6A88)
305#define CS1 SPH_C32(0x85A308D3)
306#define CS2 SPH_C32(0x13198A2E)
307#define CS3 SPH_C32(0x03707344)
308#define CS4 SPH_C32(0xA4093822)
309#define CS5 SPH_C32(0x299F31D0)
310#define CS6 SPH_C32(0x082EFA98)
311#define CS7 SPH_C32(0xEC4E6C89)
312#define CS8 SPH_C32(0x452821E6)
313#define CS9 SPH_C32(0x38D01377)
314#define CSA SPH_C32(0xBE5466CF)
315#define CSB SPH_C32(0x34E90C6C)
316#define CSC SPH_C32(0xC0AC29B7)
317#define CSD SPH_C32(0xC97C50DD)
318#define CSE SPH_C32(0x3F84D5B5)
319#define CSF SPH_C32(0xB5470917)
320
321#if SPH_COMPACT_BLAKE_32
322
323static const sph_u32 CS[16] = {
324 SPH_C32(0x243F6A88), SPH_C32(0x85A308D3),
325 SPH_C32(0x13198A2E), SPH_C32(0x03707344),
326 SPH_C32(0xA4093822), SPH_C32(0x299F31D0),
327 SPH_C32(0x082EFA98), SPH_C32(0xEC4E6C89),
328 SPH_C32(0x452821E6), SPH_C32(0x38D01377),
329 SPH_C32(0xBE5466CF), SPH_C32(0x34E90C6C),
330 SPH_C32(0xC0AC29B7), SPH_C32(0xC97C50DD),
331 SPH_C32(0x3F84D5B5), SPH_C32(0xB5470917)
332};
333
334#endif
335
336#if SPH_64
337
338#define CBx(r, i) CBx_(Z ## r ## i)
339#define CBx_(n) CBx__(n)
340#define CBx__(n) CB ## n
341
342#define CB0 SPH_C64(0x243F6A8885A308D3)
343#define CB1 SPH_C64(0x13198A2E03707344)
344#define CB2 SPH_C64(0xA4093822299F31D0)
345#define CB3 SPH_C64(0x082EFA98EC4E6C89)
346#define CB4 SPH_C64(0x452821E638D01377)
347#define CB5 SPH_C64(0xBE5466CF34E90C6C)
348#define CB6 SPH_C64(0xC0AC29B7C97C50DD)
349#define CB7 SPH_C64(0x3F84D5B5B5470917)
350#define CB8 SPH_C64(0x9216D5D98979FB1B)
351#define CB9 SPH_C64(0xD1310BA698DFB5AC)
352#define CBA SPH_C64(0x2FFD72DBD01ADFB7)
353#define CBB SPH_C64(0xB8E1AFED6A267E96)
354#define CBC SPH_C64(0xBA7C9045F12C7F99)
355#define CBD SPH_C64(0x24A19947B3916CF7)
356#define CBE SPH_C64(0x0801F2E2858EFC16)
357#define CBF SPH_C64(0x636920D871574E69)
358
359#if SPH_COMPACT_BLAKE_64
360
361static const sph_u64 CB[16] = {
362 SPH_C64(0x243F6A8885A308D3), SPH_C64(0x13198A2E03707344),
363 SPH_C64(0xA4093822299F31D0), SPH_C64(0x082EFA98EC4E6C89),
364 SPH_C64(0x452821E638D01377), SPH_C64(0xBE5466CF34E90C6C),
365 SPH_C64(0xC0AC29B7C97C50DD), SPH_C64(0x3F84D5B5B5470917),
366 SPH_C64(0x9216D5D98979FB1B), SPH_C64(0xD1310BA698DFB5AC),
367 SPH_C64(0x2FFD72DBD01ADFB7), SPH_C64(0xB8E1AFED6A267E96),
368 SPH_C64(0xBA7C9045F12C7F99), SPH_C64(0x24A19947B3916CF7),
369 SPH_C64(0x0801F2E2858EFC16), SPH_C64(0x636920D871574E69)
370};
371
372#endif
373
374#endif
375
376#define GS(m0, m1, c0, c1, a, b, c, d) do { \
377 a = SPH_T32(a + b + (m0 ^ c1)); \
378 d = SPH_ROTR32(d ^ a, 16); \
379 c = SPH_T32(c + d); \
380 b = SPH_ROTR32(b ^ c, 12); \
381 a = SPH_T32(a + b + (m1 ^ c0)); \
382 d = SPH_ROTR32(d ^ a, 8); \
383 c = SPH_T32(c + d); \
384 b = SPH_ROTR32(b ^ c, 7); \
385 } while (0)
386
387#if SPH_COMPACT_BLAKE_32
388
389#define ROUND_S(r) do { \
390 GS(M[sigma[r][0x0]], M[sigma[r][0x1]], \
391 CS[sigma[r][0x0]], CS[sigma[r][0x1]], V0, V4, V8, VC); \
392 GS(M[sigma[r][0x2]], M[sigma[r][0x3]], \
393 CS[sigma[r][0x2]], CS[sigma[r][0x3]], V1, V5, V9, VD); \
394 GS(M[sigma[r][0x4]], M[sigma[r][0x5]], \
395 CS[sigma[r][0x4]], CS[sigma[r][0x5]], V2, V6, VA, VE); \
396 GS(M[sigma[r][0x6]], M[sigma[r][0x7]], \
397 CS[sigma[r][0x6]], CS[sigma[r][0x7]], V3, V7, VB, VF); \
398 GS(M[sigma[r][0x8]], M[sigma[r][0x9]], \
399 CS[sigma[r][0x8]], CS[sigma[r][0x9]], V0, V5, VA, VF); \
400 GS(M[sigma[r][0xA]], M[sigma[r][0xB]], \
401 CS[sigma[r][0xA]], CS[sigma[r][0xB]], V1, V6, VB, VC); \
402 GS(M[sigma[r][0xC]], M[sigma[r][0xD]], \
403 CS[sigma[r][0xC]], CS[sigma[r][0xD]], V2, V7, V8, VD); \
404 GS(M[sigma[r][0xE]], M[sigma[r][0xF]], \
405 CS[sigma[r][0xE]], CS[sigma[r][0xF]], V3, V4, V9, VE); \
406 } while (0)
407
408#else
409
410#define ROUND_S(r) do { \
411 GS(Mx(r, 0), Mx(r, 1), CSx(r, 0), CSx(r, 1), V0, V4, V8, VC); \
412 GS(Mx(r, 2), Mx(r, 3), CSx(r, 2), CSx(r, 3), V1, V5, V9, VD); \
413 GS(Mx(r, 4), Mx(r, 5), CSx(r, 4), CSx(r, 5), V2, V6, VA, VE); \
414 GS(Mx(r, 6), Mx(r, 7), CSx(r, 6), CSx(r, 7), V3, V7, VB, VF); \
415 GS(Mx(r, 8), Mx(r, 9), CSx(r, 8), CSx(r, 9), V0, V5, VA, VF); \
416 GS(Mx(r, A), Mx(r, B), CSx(r, A), CSx(r, B), V1, V6, VB, VC); \
417 GS(Mx(r, C), Mx(r, D), CSx(r, C), CSx(r, D), V2, V7, V8, VD); \
418 GS(Mx(r, E), Mx(r, F), CSx(r, E), CSx(r, F), V3, V4, V9, VE); \
419 } while (0)
420
421#endif
422
423#if SPH_64
424
425#define GB(m0, m1, c0, c1, a, b, c, d) do { \
426 a = SPH_T64(a + b + (m0 ^ c1)); \
427 d = SPH_ROTR64(d ^ a, 32); \
428 c = SPH_T64(c + d); \
429 b = SPH_ROTR64(b ^ c, 25); \
430 a = SPH_T64(a + b + (m1 ^ c0)); \
431 d = SPH_ROTR64(d ^ a, 16); \
432 c = SPH_T64(c + d); \
433 b = SPH_ROTR64(b ^ c, 11); \
434 } while (0)
435
436#if SPH_COMPACT_BLAKE_64
437
438#define ROUND_B(r) do { \
439 GB(M[sigma[r][0x0]], M[sigma[r][0x1]], \
440 CB[sigma[r][0x0]], CB[sigma[r][0x1]], V0, V4, V8, VC); \
441 GB(M[sigma[r][0x2]], M[sigma[r][0x3]], \
442 CB[sigma[r][0x2]], CB[sigma[r][0x3]], V1, V5, V9, VD); \
443 GB(M[sigma[r][0x4]], M[sigma[r][0x5]], \
444 CB[sigma[r][0x4]], CB[sigma[r][0x5]], V2, V6, VA, VE); \
445 GB(M[sigma[r][0x6]], M[sigma[r][0x7]], \
446 CB[sigma[r][0x6]], CB[sigma[r][0x7]], V3, V7, VB, VF); \
447 GB(M[sigma[r][0x8]], M[sigma[r][0x9]], \
448 CB[sigma[r][0x8]], CB[sigma[r][0x9]], V0, V5, VA, VF); \
449 GB(M[sigma[r][0xA]], M[sigma[r][0xB]], \
450 CB[sigma[r][0xA]], CB[sigma[r][0xB]], V1, V6, VB, VC); \
451 GB(M[sigma[r][0xC]], M[sigma[r][0xD]], \
452 CB[sigma[r][0xC]], CB[sigma[r][0xD]], V2, V7, V8, VD); \
453 GB(M[sigma[r][0xE]], M[sigma[r][0xF]], \
454 CB[sigma[r][0xE]], CB[sigma[r][0xF]], V3, V4, V9, VE); \
455 } while (0)
456
457#else
458
459#define ROUND_B(r) do { \
460 GB(Mx(r, 0), Mx(r, 1), CBx(r, 0), CBx(r, 1), V0, V4, V8, VC); \
461 GB(Mx(r, 2), Mx(r, 3), CBx(r, 2), CBx(r, 3), V1, V5, V9, VD); \
462 GB(Mx(r, 4), Mx(r, 5), CBx(r, 4), CBx(r, 5), V2, V6, VA, VE); \
463 GB(Mx(r, 6), Mx(r, 7), CBx(r, 6), CBx(r, 7), V3, V7, VB, VF); \
464 GB(Mx(r, 8), Mx(r, 9), CBx(r, 8), CBx(r, 9), V0, V5, VA, VF); \
465 GB(Mx(r, A), Mx(r, B), CBx(r, A), CBx(r, B), V1, V6, VB, VC); \
466 GB(Mx(r, C), Mx(r, D), CBx(r, C), CBx(r, D), V2, V7, V8, VD); \
467 GB(Mx(r, E), Mx(r, F), CBx(r, E), CBx(r, F), V3, V4, V9, VE); \
468 } while (0)
469
470#endif
471
472#endif
473
474#define DECL_STATE32 \
475 sph_u32 H0, H1, H2, H3, H4, H5, H6, H7; \
476 sph_u32 S0, S1, S2, S3, T0, T1;
477
478#define READ_STATE32(state) do { \
479 H0 = (state)->H[0]; \
480 H1 = (state)->H[1]; \
481 H2 = (state)->H[2]; \
482 H3 = (state)->H[3]; \
483 H4 = (state)->H[4]; \
484 H5 = (state)->H[5]; \
485 H6 = (state)->H[6]; \
486 H7 = (state)->H[7]; \
487 S0 = (state)->S[0]; \
488 S1 = (state)->S[1]; \
489 S2 = (state)->S[2]; \
490 S3 = (state)->S[3]; \
491 T0 = (state)->T0; \
492 T1 = (state)->T1; \
493 } while (0)
494
495#define WRITE_STATE32(state) do { \
496 (state)->H[0] = H0; \
497 (state)->H[1] = H1; \
498 (state)->H[2] = H2; \
499 (state)->H[3] = H3; \
500 (state)->H[4] = H4; \
501 (state)->H[5] = H5; \
502 (state)->H[6] = H6; \
503 (state)->H[7] = H7; \
504 (state)->S[0] = S0; \
505 (state)->S[1] = S1; \
506 (state)->S[2] = S2; \
507 (state)->S[3] = S3; \
508 (state)->T0 = T0; \
509 (state)->T1 = T1; \
510 } while (0)
511
512#if SPH_COMPACT_BLAKE_32
513
514#define COMPRESS32 do { \
515 sph_u32 M[16]; \
516 sph_u32 V0, V1, V2, V3, V4, V5, V6, V7; \
517 sph_u32 V8, V9, VA, VB, VC, VD, VE, VF; \
518 unsigned r; \
519 V0 = H0; \
520 V1 = H1; \
521 V2 = H2; \
522 V3 = H3; \
523 V4 = H4; \
524 V5 = H5; \
525 V6 = H6; \
526 V7 = H7; \
527 V8 = S0 ^ CS0; \
528 V9 = S1 ^ CS1; \
529 VA = S2 ^ CS2; \
530 VB = S3 ^ CS3; \
531 VC = T0 ^ CS4; \
532 VD = T0 ^ CS5; \
533 VE = T1 ^ CS6; \
534 VF = T1 ^ CS7; \
535 M[0x0] = sph_dec32be_aligned(buf + 0); \
536 M[0x1] = sph_dec32be_aligned(buf + 4); \
537 M[0x2] = sph_dec32be_aligned(buf + 8); \
538 M[0x3] = sph_dec32be_aligned(buf + 12); \
539 M[0x4] = sph_dec32be_aligned(buf + 16); \
540 M[0x5] = sph_dec32be_aligned(buf + 20); \
541 M[0x6] = sph_dec32be_aligned(buf + 24); \
542 M[0x7] = sph_dec32be_aligned(buf + 28); \
543 M[0x8] = sph_dec32be_aligned(buf + 32); \
544 M[0x9] = sph_dec32be_aligned(buf + 36); \
545 M[0xA] = sph_dec32be_aligned(buf + 40); \
546 M[0xB] = sph_dec32be_aligned(buf + 44); \
547 M[0xC] = sph_dec32be_aligned(buf + 48); \
548 M[0xD] = sph_dec32be_aligned(buf + 52); \
549 M[0xE] = sph_dec32be_aligned(buf + 56); \
550 M[0xF] = sph_dec32be_aligned(buf + 60); \
551 for (r = 0; r < 8; r ++) \
552 ROUND_S(r); \
553 H0 ^= S0 ^ V0 ^ V8; \
554 H1 ^= S1 ^ V1 ^ V9; \
555 H2 ^= S2 ^ V2 ^ VA; \
556 H3 ^= S3 ^ V3 ^ VB; \
557 H4 ^= S0 ^ V4 ^ VC; \
558 H5 ^= S1 ^ V5 ^ VD; \
559 H6 ^= S2 ^ V6 ^ VE; \
560 H7 ^= S3 ^ V7 ^ VF; \
561 } while (0)
562
563#else
564
565#define COMPRESS32 do { \
566 sph_u32 M0, M1, M2, M3, M4, M5, M6, M7; \
567 sph_u32 M8, M9, MA, MB, MC, MD, ME, MF; \
568 sph_u32 V0, V1, V2, V3, V4, V5, V6, V7; \
569 sph_u32 V8, V9, VA, VB, VC, VD, VE, VF; \
570 V0 = H0; \
571 V1 = H1; \
572 V2 = H2; \
573 V3 = H3; \
574 V4 = H4; \
575 V5 = H5; \
576 V6 = H6; \
577 V7 = H7; \
578 V8 = S0 ^ CS0; \
579 V9 = S1 ^ CS1; \
580 VA = S2 ^ CS2; \
581 VB = S3 ^ CS3; \
582 VC = T0 ^ CS4; \
583 VD = T0 ^ CS5; \
584 VE = T1 ^ CS6; \
585 VF = T1 ^ CS7; \
586 M0 = sph_dec32be_aligned(buf + 0); \
587 M1 = sph_dec32be_aligned(buf + 4); \
588 M2 = sph_dec32be_aligned(buf + 8); \
589 M3 = sph_dec32be_aligned(buf + 12); \
590 M4 = sph_dec32be_aligned(buf + 16); \
591 M5 = sph_dec32be_aligned(buf + 20); \
592 M6 = sph_dec32be_aligned(buf + 24); \
593 M7 = sph_dec32be_aligned(buf + 28); \
594 M8 = sph_dec32be_aligned(buf + 32); \
595 M9 = sph_dec32be_aligned(buf + 36); \
596 MA = sph_dec32be_aligned(buf + 40); \
597 MB = sph_dec32be_aligned(buf + 44); \
598 MC = sph_dec32be_aligned(buf + 48); \
599 MD = sph_dec32be_aligned(buf + 52); \
600 ME = sph_dec32be_aligned(buf + 56); \
601 MF = sph_dec32be_aligned(buf + 60); \
602 ROUND_S(0); \
603 ROUND_S(1); \
604 ROUND_S(2); \
605 ROUND_S(3); \
606 ROUND_S(4); \
607 ROUND_S(5); \
608 ROUND_S(6); \
609 ROUND_S(7); \
610 H0 ^= S0 ^ V0 ^ V8; \
611 H1 ^= S1 ^ V1 ^ V9; \
612 H2 ^= S2 ^ V2 ^ VA; \
613 H3 ^= S3 ^ V3 ^ VB; \
614 H4 ^= S0 ^ V4 ^ VC; \
615 H5 ^= S1 ^ V5 ^ VD; \
616 H6 ^= S2 ^ V6 ^ VE; \
617 H7 ^= S3 ^ V7 ^ VF; \
618 } while (0)
619
620#endif
621
622#if SPH_64
623
624#define DECL_STATE64 \
625 sph_u64 H0, H1, H2, H3, H4, H5, H6, H7; \
626 sph_u64 S0, S1, S2, S3, T0, T1;
627
628#define READ_STATE64(state) do { \
629 H0 = (state)->H[0]; \
630 H1 = (state)->H[1]; \
631 H2 = (state)->H[2]; \
632 H3 = (state)->H[3]; \
633 H4 = (state)->H[4]; \
634 H5 = (state)->H[5]; \
635 H6 = (state)->H[6]; \
636 H7 = (state)->H[7]; \
637 S0 = (state)->S[0]; \
638 S1 = (state)->S[1]; \
639 S2 = (state)->S[2]; \
640 S3 = (state)->S[3]; \
641 T0 = (state)->T0; \
642 T1 = (state)->T1; \
643 } while (0)
644
645#define WRITE_STATE64(state) do { \
646 (state)->H[0] = H0; \
647 (state)->H[1] = H1; \
648 (state)->H[2] = H2; \
649 (state)->H[3] = H3; \
650 (state)->H[4] = H4; \
651 (state)->H[5] = H5; \
652 (state)->H[6] = H6; \
653 (state)->H[7] = H7; \
654 (state)->S[0] = S0; \
655 (state)->S[1] = S1; \
656 (state)->S[2] = S2; \
657 (state)->S[3] = S3; \
658 (state)->T0 = T0; \
659 (state)->T1 = T1; \
660 } while (0)
661
662#if SPH_COMPACT_BLAKE_64
663
664#define COMPRESS64 do { \
665 sph_u64 M[16]; \
666 sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; \
667 sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; \
668 unsigned r; \
669 V0 = H0; \
670 V1 = H1; \
671 V2 = H2; \
672 V3 = H3; \
673 V4 = H4; \
674 V5 = H5; \
675 V6 = H6; \
676 V7 = H7; \
677 V8 = S0 ^ CB0; \
678 V9 = S1 ^ CB1; \
679 VA = S2 ^ CB2; \
680 VB = S3 ^ CB3; \
681 VC = T0 ^ CB4; \
682 VD = T0 ^ CB5; \
683 VE = T1 ^ CB6; \
684 VF = T1 ^ CB7; \
685 M[0x0] = sph_dec64be_aligned(buf + 0); \
686 M[0x1] = sph_dec64be_aligned(buf + 8); \
687 M[0x2] = sph_dec64be_aligned(buf + 16); \
688 M[0x3] = sph_dec64be_aligned(buf + 24); \
689 M[0x4] = sph_dec64be_aligned(buf + 32); \
690 M[0x5] = sph_dec64be_aligned(buf + 40); \
691 M[0x6] = sph_dec64be_aligned(buf + 48); \
692 M[0x7] = sph_dec64be_aligned(buf + 56); \
693 M[0x8] = sph_dec64be_aligned(buf + 64); \
694 M[0x9] = sph_dec64be_aligned(buf + 72); \
695 M[0xA] = sph_dec64be_aligned(buf + 80); \
696 M[0xB] = sph_dec64be_aligned(buf + 88); \
697 M[0xC] = sph_dec64be_aligned(buf + 96); \
698 M[0xD] = sph_dec64be_aligned(buf + 104); \
699 M[0xE] = sph_dec64be_aligned(buf + 112); \
700 M[0xF] = sph_dec64be_aligned(buf + 120); \
701 for (r = 0; r < 16; r ++) \
702 ROUND_B(r); \
703 H0 ^= S0 ^ V0 ^ V8; \
704 H1 ^= S1 ^ V1 ^ V9; \
705 H2 ^= S2 ^ V2 ^ VA; \
706 H3 ^= S3 ^ V3 ^ VB; \
707 H4 ^= S0 ^ V4 ^ VC; \
708 H5 ^= S1 ^ V5 ^ VD; \
709 H6 ^= S2 ^ V6 ^ VE; \
710 H7 ^= S3 ^ V7 ^ VF; \
711 } while (0)
712
713#else
714
715#define COMPRESS64 do { \
716 sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; \
717 sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; \
718 sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; \
719 sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; \
720 V0 = H0; \
721 V1 = H1; \
722 V2 = H2; \
723 V3 = H3; \
724 V4 = H4; \
725 V5 = H5; \
726 V6 = H6; \
727 V7 = H7; \
728 V8 = S0 ^ CB0; \
729 V9 = S1 ^ CB1; \
730 VA = S2 ^ CB2; \
731 VB = S3 ^ CB3; \
732 VC = T0 ^ CB4; \
733 VD = T0 ^ CB5; \
734 VE = T1 ^ CB6; \
735 VF = T1 ^ CB7; \
736 M0 = sph_dec64be_aligned(buf + 0); \
737 M1 = sph_dec64be_aligned(buf + 8); \
738 M2 = sph_dec64be_aligned(buf + 16); \
739 M3 = sph_dec64be_aligned(buf + 24); \
740 M4 = sph_dec64be_aligned(buf + 32); \
741 M5 = sph_dec64be_aligned(buf + 40); \
742 M6 = sph_dec64be_aligned(buf + 48); \
743 M7 = sph_dec64be_aligned(buf + 56); \
744 M8 = sph_dec64be_aligned(buf + 64); \
745 M9 = sph_dec64be_aligned(buf + 72); \
746 MA = sph_dec64be_aligned(buf + 80); \
747 MB = sph_dec64be_aligned(buf + 88); \
748 MC = sph_dec64be_aligned(buf + 96); \
749 MD = sph_dec64be_aligned(buf + 104); \
750 ME = sph_dec64be_aligned(buf + 112); \
751 MF = sph_dec64be_aligned(buf + 120); \
752 ROUND_B(0); \
753 ROUND_B(1); \
754 ROUND_B(2); \
755 ROUND_B(3); \
756 ROUND_B(4); \
757 ROUND_B(5); \
758 ROUND_B(6); \
759 ROUND_B(7); \
760 ROUND_B(8); \
761 ROUND_B(9); \
762 ROUND_B(0); \
763 ROUND_B(1); \
764 ROUND_B(2); \
765 ROUND_B(3); \
766 ROUND_B(4); \
767 ROUND_B(5); \
768 H0 ^= S0 ^ V0 ^ V8; \
769 H1 ^= S1 ^ V1 ^ V9; \
770 H2 ^= S2 ^ V2 ^ VA; \
771 H3 ^= S3 ^ V3 ^ VB; \
772 H4 ^= S0 ^ V4 ^ VC; \
773 H5 ^= S1 ^ V5 ^ VD; \
774 H6 ^= S2 ^ V6 ^ VE; \
775 H7 ^= S3 ^ V7 ^ VF; \
776 } while (0)
777
778#endif
779
780#endif
781
782static const sph_u32 salt_zero_small[4] = { 0, 0, 0, 0 };
783
784static void
785blake32_init(sph_blake_small_context *sc,
786 const sph_u32 *iv, const sph_u32 *salt)
787{
788 memcpy(sc->H, iv, 8 * sizeof(sph_u32));
789 memcpy(sc->S, salt, 4 * sizeof(sph_u32));
790 sc->T0 = sc->T1 = 0;
791 sc->ptr = 0;
792}
793
794static void
795blake32(sph_blake_small_context *sc, const void *data, size_t len)
796{
797 unsigned char *buf;
798 size_t ptr;
799 DECL_STATE32
800
801 buf = sc->buf;
802 ptr = sc->ptr;
803 if (len < (sizeof sc->buf) - ptr) {
804 memcpy(buf + ptr, data, len);
805 ptr += len;
806 sc->ptr = ptr;
807 return;
808 }
809
810 READ_STATE32(sc);
811 while (len > 0) {
812 size_t clen;
813
814 clen = (sizeof sc->buf) - ptr;
815 if (clen > len)
816 clen = len;
817 memcpy(buf + ptr, data, clen);
818 ptr += clen;
819 data = (const unsigned char *)data + clen;
820 len -= clen;
821 if (ptr == sizeof sc->buf) {
822 if ((T0 = SPH_T32(T0 + 512)) < 512)
823 T1 = SPH_T32(T1 + 1);
824 COMPRESS32;
825 ptr = 0;
826 }
827 }
828 WRITE_STATE32(sc);
829 sc->ptr = ptr;
830}
831
832static void
833blake32_close(sph_blake_small_context *sc,
834 unsigned ub, unsigned n, void *dst, size_t out_size_w32)
835{
836 union {
837 unsigned char buf[64];
838 sph_u32 dummy;
839 } u;
840 size_t ptr, k;
841 unsigned bit_len;
842 unsigned z;
843 sph_u32 th, tl;
844 unsigned char *out;
845
846 ptr = sc->ptr;
847 bit_len = ((unsigned)ptr << 3) + n;
848 z = 0x80 >> n;
849 u.buf[ptr] = ((ub & -z) | z) & 0xFF;
850 tl = sc->T0 + bit_len;
851 th = sc->T1;
852 if (ptr == 0 && n == 0) {
853 sc->T0 = SPH_C32(0xFFFFFE00);
854 sc->T1 = SPH_C32(0xFFFFFFFF);
855 } else if (sc->T0 == 0) {
856 sc->T0 = SPH_C32(0xFFFFFE00) + bit_len;
857 sc->T1 = SPH_T32(sc->T1 - 1);
858 } else {
859 sc->T0 -= 512 - bit_len;
860 }
861 if (bit_len <= 446) {
862 memset(u.buf + ptr + 1, 0, 55 - ptr);
863 if (out_size_w32 == 8)
864 u.buf[55] |= 1;
865 sph_enc32be_aligned(u.buf + 56, th);
866 sph_enc32be_aligned(u.buf + 60, tl);
867 blake32(sc, u.buf + ptr, 64 - ptr);
868 } else {
869 memset(u.buf + ptr + 1, 0, 63 - ptr);
870 blake32(sc, u.buf + ptr, 64 - ptr);
871 sc->T0 = SPH_C32(0xFFFFFE00);
872 sc->T1 = SPH_C32(0xFFFFFFFF);
873 memset(u.buf, 0, 56);
874 if (out_size_w32 == 8)
875 u.buf[55] = 1;
876 sph_enc32be_aligned(u.buf + 56, th);
877 sph_enc32be_aligned(u.buf + 60, tl);
878 blake32(sc, u.buf, 64);
879 }
880 out = dst;
881 for (k = 0; k < out_size_w32; k ++)
882 sph_enc32be(out + (k << 2), sc->H[k]);
883}
884
885#if SPH_64
886
887static const sph_u64 salt_zero_big[4] = { 0, 0, 0, 0 };
888
889static void
890blake64_init(sph_blake_big_context *sc,
891 const sph_u64 *iv, const sph_u64 *salt)
892{
893 memcpy(sc->H, iv, 8 * sizeof(sph_u64));
894 memcpy(sc->S, salt, 4 * sizeof(sph_u64));
895 sc->T0 = sc->T1 = 0;
896 sc->ptr = 0;
897}
898
899static void
900blake64(sph_blake_big_context *sc, const void *data, size_t len)
901{
902 unsigned char *buf;
903 size_t ptr;
904 DECL_STATE64
905
906 buf = sc->buf;
907 ptr = sc->ptr;
908 if (len < (sizeof sc->buf) - ptr) {
909 memcpy(buf + ptr, data, len);
910 ptr += len;
911 sc->ptr = ptr;
912 return;
913 }
914
915 READ_STATE64(sc);
916 while (len > 0) {
917 size_t clen;
918
919 clen = (sizeof sc->buf) - ptr;
920 if (clen > len)
921 clen = len;
922 memcpy(buf + ptr, data, clen);
923 ptr += clen;
924 data = (const unsigned char *)data + clen;
925 len -= clen;
926 if (ptr == sizeof sc->buf) {
927 if ((T0 = SPH_T64(T0 + 1024)) < 1024)
928 T1 = SPH_T64(T1 + 1);
929 COMPRESS64;
930 ptr = 0;
931 }
932 }
933 WRITE_STATE64(sc);
934 sc->ptr = ptr;
935}
936
937static void
938blake64_close(sph_blake_big_context *sc,
939 unsigned ub, unsigned n, void *dst, size_t out_size_w64)
940{
941 union {
942 unsigned char buf[128];
943 sph_u64 dummy;
944 } u;
945 size_t ptr, k;
946 unsigned bit_len;
947 unsigned z;
948 sph_u64 th, tl;
949 unsigned char *out;
950
951 ptr = sc->ptr;
952 bit_len = ((unsigned)ptr << 3) + n;
953 z = 0x80 >> n;
954 u.buf[ptr] = ((ub & -z) | z) & 0xFF;
955 tl = sc->T0 + bit_len;
956 th = sc->T1;
957 if (ptr == 0 && n == 0) {
958 sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00);
959 sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFF);
960 } else if (sc->T0 == 0) {
961 sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + bit_len;
962 sc->T1 = SPH_T64(sc->T1 - 1);
963 } else {
964 sc->T0 -= 1024 - bit_len;
965 }
966 if (bit_len <= 894) {
967 memset(u.buf + ptr + 1, 0, 111 - ptr);
968 if (out_size_w64 == 8)
969 u.buf[111] |= 1;
970 sph_enc64be_aligned(u.buf + 112, th);
971 sph_enc64be_aligned(u.buf + 120, tl);
972 blake64(sc, u.buf + ptr, 128 - ptr);
973 } else {
974 memset(u.buf + ptr + 1, 0, 127 - ptr);
975 blake64(sc, u.buf + ptr, 128 - ptr);
976 sc->T0 = SPH_C64(0xFFFFFFFFFFFFFC00);
977 sc->T1 = SPH_C64(0xFFFFFFFFFFFFFFFF);
978 memset(u.buf, 0, 112);
979 if (out_size_w64 == 8)
980 u.buf[111] = 1;
981 sph_enc64be_aligned(u.buf + 112, th);
982 sph_enc64be_aligned(u.buf + 120, tl);
983 blake64(sc, u.buf, 128);
984 }
985 out = dst;
986 for (k = 0; k < out_size_w64; k ++)
987 sph_enc64be(out + (k << 3), sc->H[k]);
988}
989
990#endif
991
992/* see sph_blake.h */
993void
994sph_blake224_init(void *cc)
995{
996 blake32_init(cc, IV224, salt_zero_small);
997}
998
999/* see sph_blake.h */
1000void
1001sph_blake224(void *cc, const void *data, size_t len)
1002{
1003 blake32(cc, data, len);
1004}
1005
1006/* see sph_blake.h */
1007void
1008sph_blake224_close(void *cc, void *dst)
1009{
1010 sph_blake224_addbits_and_close(cc, 0, 0, dst);
1011}
1012
1013/* see sph_blake.h */
1014void
1015sph_blake224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1016{
1017 blake32_close(cc, ub, n, dst, 7);
1018 sph_blake224_init(cc);
1019}
1020
1021/* see sph_blake.h */
1022void
1023sph_blake256_init(void *cc)
1024{
1025 blake32_init(cc, IV256, salt_zero_small);
1026}
1027
1028/* see sph_blake.h */
1029void
1030sph_blake256(void *cc, const void *data, size_t len)
1031{
1032 blake32(cc, data, len);
1033}
1034
1035/* see sph_blake.h */
1036void
1037sph_blake256_close(void *cc, void *dst)
1038{
1039 sph_blake256_addbits_and_close(cc, 0, 0, dst);
1040}
1041
1042/* see sph_blake.h */
1043void
1044sph_blake256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1045{
1046 blake32_close(cc, ub, n, dst, 8);
1047 sph_blake256_init(cc);
1048}
1049
1050#if SPH_64
1051
1052/* see sph_blake.h */
1053void
1054sph_blake384_init(void *cc)
1055{
1056 blake64_init(cc, IV384, salt_zero_big);
1057}
1058
1059/* see sph_blake.h */
1060void
1061sph_blake384(void *cc, const void *data, size_t len)
1062{
1063 blake64(cc, data, len);
1064}
1065
1066/* see sph_blake.h */
1067void
1068sph_blake384_close(void *cc, void *dst)
1069{
1070 sph_blake384_addbits_and_close(cc, 0, 0, dst);
1071}
1072
1073/* see sph_blake.h */
1074void
1075sph_blake384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1076{
1077 blake64_close(cc, ub, n, dst, 6);
1078 sph_blake384_init(cc);
1079}
1080
1081/* see sph_blake.h */
1082void
1083sph_blake512_init(void *cc)
1084{
1085 blake64_init(cc, IV512, salt_zero_big);
1086}
1087
1088/* see sph_blake.h */
1089void
1090sph_blake512(void *cc, const void *data, size_t len)
1091{
1092 blake64(cc, data, len);
1093}
1094
1095/* see sph_blake.h */
1096void
1097sph_blake512_close(void *cc, void *dst)
1098{
1099 sph_blake512_addbits_and_close(cc, 0, 0, dst);
1100}
1101
1102/* see sph_blake.h */
1103void
1104sph_blake512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1105{
1106 blake64_close(cc, ub, n, dst, 8);
1107 sph_blake512_init(cc);
1108}
1109
1110#endif
1111
1112#ifdef __cplusplus
1113}
1114#endif
This page took 0.138193 seconds and 4 git commands to generate.