1 /* $Id: shavite.c 227 2010-06-16 17:28:38Z tp $ */
3 * SHAvite-3 implementation.
5 * ==========================(LICENSE BEGIN)============================
7 * Copyright (c) 2007-2010 Projet RNRT SAPHIR
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
17 * The above copyright notice and this permission notice shall be
18 * included in all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 * ===========================(LICENSE END)=============================
36 #include "sph_shavite.h"
42 #if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SHAVITE
43 #define SPH_SMALL_FOOTPRINT_SHAVITE 1
47 #pragma warning (disable: 4146)
53 * As of round 2 of the SHA-3 competition, the published reference
54 * implementation and test vectors are wrong, because they use
55 * big-endian AES tables while the internal decoding uses little-endian.
56 * The code below follows the specification. To turn it into a code
57 * which follows the reference implementation (the one called "BugFix"
58 * on the SHAvite-3 web site, published on Nov 23rd, 2009), comment out
59 * the code below (from the '#define AES_BIG_ENDIAN...' to the definition
60 * of the AES_ROUND_NOKEY macro) and replace it with the version which
61 * is commented out afterwards.
64 #define AES_BIG_ENDIAN 0
65 #include "aes_helper.c"
67 static const sph_u32 IV224[] = {
68 C32(0x6774F31C), C32(0x990AE210), C32(0xC87D4274), C32(0xC9546371),
69 C32(0x62B2AEA8), C32(0x4B5801D8), C32(0x1B702860), C32(0x842F3017)
72 static const sph_u32 IV256[] = {
73 C32(0x49BB3E47), C32(0x2674860D), C32(0xA8B392AC), C32(0x021AC4E6),
74 C32(0x409283CF), C32(0x620E5D86), C32(0x6D929DCB), C32(0x96CC2A8B)
77 static const sph_u32 IV384[] = {
78 C32(0x83DF1545), C32(0xF9AAEC13), C32(0xF4803CB0), C32(0x11FE1F47),
79 C32(0xDA6CD269), C32(0x4F53FCD7), C32(0x950529A2), C32(0x97908147),
80 C32(0xB0A4D7AF), C32(0x2B9132BF), C32(0x226E607D), C32(0x3C0F8D7C),
81 C32(0x487B3F0F), C32(0x04363E22), C32(0x0155C99C), C32(0xEC2E20D3)
84 static const sph_u32 IV512[] = {
85 C32(0x72FCCDD8), C32(0x79CA4727), C32(0x128A077B), C32(0x40D55AEC),
86 C32(0xD1901A06), C32(0x430AE307), C32(0xB29F5CD1), C32(0xDF07FBFC),
87 C32(0x8E45D73D), C32(0x681AB538), C32(0xBDE86578), C32(0xDD577E47),
88 C32(0xE275EADE), C32(0x502D9FCD), C32(0xB9357178), C32(0x022A4B9A)
91 #define AES_ROUND_NOKEY(x0, x1, x2, x3) do { \
96 AES_ROUND_NOKEY_LE(t0, t1, t2, t3, x0, x1, x2, x3); \
100 * This is the code needed to match the "reference implementation" as
101 * published on Nov 23rd, 2009, instead of the published specification.
104 #define AES_BIG_ENDIAN 1
105 #include "aes_helper.c"
107 static const sph_u32 IV224[] = {
108 C32(0xC4C67795), C32(0xC0B1817F), C32(0xEAD88924), C32(0x1ABB1BB0),
109 C32(0xE0C29152), C32(0xBDE046BA), C32(0xAEEECF99), C32(0x58D509D8)
112 static const sph_u32 IV256[] = {
113 C32(0x3EECF551), C32(0xBF10819B), C32(0xE6DC8559), C32(0xF3E23FD5),
114 C32(0x431AEC73), C32(0x79E3F731), C32(0x98325F05), C32(0xA92A31F1)
117 static const sph_u32 IV384[] = {
118 C32(0x71F48510), C32(0xA903A8AC), C32(0xFE3216DD), C32(0x0B2D2AD4),
119 C32(0x6672900A), C32(0x41032819), C32(0x15A7D780), C32(0xB3CAB8D9),
120 C32(0x34EF4711), C32(0xDE019FE8), C32(0x4D674DC4), C32(0xE056D96B),
121 C32(0xA35C016B), C32(0xDD903BA7), C32(0x8C1B09B4), C32(0x2C3E9F25)
124 static const sph_u32 IV512[] = {
125 C32(0xD5652B63), C32(0x25F1E6EA), C32(0xB18F48FA), C32(0xA1EE3A47),
126 C32(0xC8B67B07), C32(0xBDCE48D3), C32(0xE3937B78), C32(0x05DB5186),
127 C32(0x613BE326), C32(0xA11FA303), C32(0x90C833D4), C32(0x79CEE316),
128 C32(0x1E1AF00F), C32(0x2829B165), C32(0x23B25F80), C32(0x21E11499)
131 #define AES_ROUND_NOKEY(x0, x1, x2, x3) do { \
136 AES_ROUND_NOKEY_BE(t0, t1, t2, t3, x0, x1, x2, x3); \
141 #define KEY_EXPAND_ELT(k0, k1, k2, k3) do { \
143 AES_ROUND_NOKEY(k1, k2, k3, k0); \
151 #if SPH_SMALL_FOOTPRINT_SHAVITE
154 * This function assumes that "msg" is aligned for 32-bit access.
157 c256(sph_shavite_small_context *sc, const void *msg)
159 sph_u32 p0, p1, p2, p3, p4, p5, p6, p7;
164 #if SPH_LITTLE_ENDIAN
167 for (u = 0; u < 16; u += 4) {
168 rk[u + 0] = sph_dec32le_aligned(
169 (const unsigned char *)msg + (u << 2) + 0);
170 rk[u + 1] = sph_dec32le_aligned(
171 (const unsigned char *)msg + (u << 2) + 4);
172 rk[u + 2] = sph_dec32le_aligned(
173 (const unsigned char *)msg + (u << 2) + 8);
174 rk[u + 3] = sph_dec32le_aligned(
175 (const unsigned char *)msg + (u << 2) + 12);
179 for (r = 0; r < 4; r ++) {
180 for (s = 0; s < 2; s ++) {
181 sph_u32 x0, x1, x2, x3;
187 AES_ROUND_NOKEY(x0, x1, x2, x3);
188 rk[u + 0] = x0 ^ rk[u - 4];
189 rk[u + 1] = x1 ^ rk[u - 3];
190 rk[u + 2] = x2 ^ rk[u - 2];
191 rk[u + 3] = x3 ^ rk[u - 1];
193 rk[ 16] ^= sc->count0;
194 rk[ 17] ^= SPH_T32(~sc->count1);
195 } else if (u == 56) {
196 rk[ 57] ^= sc->count1;
197 rk[ 58] ^= SPH_T32(~sc->count0);
205 AES_ROUND_NOKEY(x0, x1, x2, x3);
206 rk[u + 0] = x0 ^ rk[u - 4];
207 rk[u + 1] = x1 ^ rk[u - 3];
208 rk[u + 2] = x2 ^ rk[u - 2];
209 rk[u + 3] = x3 ^ rk[u - 1];
211 rk[ 86] ^= sc->count1;
212 rk[ 87] ^= SPH_T32(~sc->count0);
213 } else if (u == 124) {
214 rk[124] ^= sc->count0;
215 rk[127] ^= SPH_T32(~sc->count1);
219 for (s = 0; s < 4; s ++) {
220 rk[u + 0] = rk[u - 16] ^ rk[u - 3];
221 rk[u + 1] = rk[u - 15] ^ rk[u - 2];
222 rk[u + 2] = rk[u - 14] ^ rk[u - 1];
223 rk[u + 3] = rk[u - 13] ^ rk[u - 0];
237 for (r = 0; r < 6; r ++) {
238 sph_u32 x0, x1, x2, x3;
244 AES_ROUND_NOKEY(x0, x1, x2, x3);
249 AES_ROUND_NOKEY(x0, x1, x2, x3);
254 AES_ROUND_NOKEY(x0, x1, x2, x3);
264 AES_ROUND_NOKEY(x0, x1, x2, x3);
269 AES_ROUND_NOKEY(x0, x1, x2, x3);
274 AES_ROUND_NOKEY(x0, x1, x2, x3);
293 * This function assumes that "msg" is aligned for 32-bit access.
296 c256(sph_shavite_small_context *sc, const void *msg)
298 sph_u32 p0, p1, p2, p3, p4, p5, p6, p7;
299 sph_u32 x0, x1, x2, x3;
300 sph_u32 rk0, rk1, rk2, rk3, rk4, rk5, rk6, rk7;
301 sph_u32 rk8, rk9, rkA, rkB, rkC, rkD, rkE, rkF;
312 rk0 = sph_dec32le_aligned((const unsigned char *)msg + 0);
314 rk1 = sph_dec32le_aligned((const unsigned char *)msg + 4);
316 rk2 = sph_dec32le_aligned((const unsigned char *)msg + 8);
318 rk3 = sph_dec32le_aligned((const unsigned char *)msg + 12);
320 AES_ROUND_NOKEY(x0, x1, x2, x3);
321 rk4 = sph_dec32le_aligned((const unsigned char *)msg + 16);
323 rk5 = sph_dec32le_aligned((const unsigned char *)msg + 20);
325 rk6 = sph_dec32le_aligned((const unsigned char *)msg + 24);
327 rk7 = sph_dec32le_aligned((const unsigned char *)msg + 28);
329 AES_ROUND_NOKEY(x0, x1, x2, x3);
330 rk8 = sph_dec32le_aligned((const unsigned char *)msg + 32);
332 rk9 = sph_dec32le_aligned((const unsigned char *)msg + 36);
334 rkA = sph_dec32le_aligned((const unsigned char *)msg + 40);
336 rkB = sph_dec32le_aligned((const unsigned char *)msg + 44);
338 AES_ROUND_NOKEY(x0, x1, x2, x3);
344 rkC = sph_dec32le_aligned((const unsigned char *)msg + 48);
346 rkD = sph_dec32le_aligned((const unsigned char *)msg + 52);
348 rkE = sph_dec32le_aligned((const unsigned char *)msg + 56);
350 rkF = sph_dec32le_aligned((const unsigned char *)msg + 60);
352 AES_ROUND_NOKEY(x0, x1, x2, x3);
353 KEY_EXPAND_ELT(rk0, rk1, rk2, rk3);
354 rk0 ^= rkC ^ sc->count0;
355 rk1 ^= rkD ^ SPH_T32(~sc->count1);
362 AES_ROUND_NOKEY(x0, x1, x2, x3);
363 KEY_EXPAND_ELT(rk4, rk5, rk6, rk7);
372 AES_ROUND_NOKEY(x0, x1, x2, x3);
378 KEY_EXPAND_ELT(rk8, rk9, rkA, rkB);
387 AES_ROUND_NOKEY(x0, x1, x2, x3);
388 KEY_EXPAND_ELT(rkC, rkD, rkE, rkF);
397 AES_ROUND_NOKEY(x0, x1, x2, x3);
406 AES_ROUND_NOKEY(x0, x1, x2, x3);
420 AES_ROUND_NOKEY(x0, x1, x2, x3);
429 AES_ROUND_NOKEY(x0, x1, x2, x3);
438 AES_ROUND_NOKEY(x0, x1, x2, x3);
444 KEY_EXPAND_ELT(rk0, rk1, rk2, rk3);
453 AES_ROUND_NOKEY(x0, x1, x2, x3);
454 KEY_EXPAND_ELT(rk4, rk5, rk6, rk7);
463 AES_ROUND_NOKEY(x0, x1, x2, x3);
464 KEY_EXPAND_ELT(rk8, rk9, rkA, rkB);
466 rk9 ^= rk5 ^ sc->count1;
467 rkA ^= rk6 ^ SPH_T32(~sc->count0);
473 AES_ROUND_NOKEY(x0, x1, x2, x3);
479 KEY_EXPAND_ELT(rkC, rkD, rkE, rkF);
488 AES_ROUND_NOKEY(x0, x1, x2, x3);
497 AES_ROUND_NOKEY(x0, x1, x2, x3);
506 AES_ROUND_NOKEY(x0, x1, x2, x3);
520 AES_ROUND_NOKEY(x0, x1, x2, x3);
529 AES_ROUND_NOKEY(x0, x1, x2, x3);
530 KEY_EXPAND_ELT(rk0, rk1, rk2, rk3);
539 AES_ROUND_NOKEY(x0, x1, x2, x3);
545 KEY_EXPAND_ELT(rk4, rk5, rk6, rk7);
548 rk6 ^= rk2 ^ sc->count1;
549 rk7 ^= rk3 ^ SPH_T32(~sc->count0);
554 AES_ROUND_NOKEY(x0, x1, x2, x3);
555 KEY_EXPAND_ELT(rk8, rk9, rkA, rkB);
564 AES_ROUND_NOKEY(x0, x1, x2, x3);
565 KEY_EXPAND_ELT(rkC, rkD, rkE, rkF);
574 AES_ROUND_NOKEY(x0, x1, x2, x3);
588 AES_ROUND_NOKEY(x0, x1, x2, x3);
597 AES_ROUND_NOKEY(x0, x1, x2, x3);
606 AES_ROUND_NOKEY(x0, x1, x2, x3);
620 AES_ROUND_NOKEY(x0, x1, x2, x3);
621 KEY_EXPAND_ELT(rk0, rk1, rk2, rk3);
630 AES_ROUND_NOKEY(x0, x1, x2, x3);
631 KEY_EXPAND_ELT(rk4, rk5, rk6, rk7);
640 AES_ROUND_NOKEY(x0, x1, x2, x3);
646 KEY_EXPAND_ELT(rk8, rk9, rkA, rkB);
655 AES_ROUND_NOKEY(x0, x1, x2, x3);
656 KEY_EXPAND_ELT(rkC, rkD, rkE, rkF);
657 rkC ^= rk8 ^ sc->count0;
660 rkF ^= rkB ^ SPH_T32(~sc->count1);
665 AES_ROUND_NOKEY(x0, x1, x2, x3);
674 AES_ROUND_NOKEY(x0, x1, x2, x3);
688 AES_ROUND_NOKEY(x0, x1, x2, x3);
697 AES_ROUND_NOKEY(x0, x1, x2, x3);
706 AES_ROUND_NOKEY(x0, x1, x2, x3);
723 #if SPH_SMALL_FOOTPRINT_SHAVITE
726 * This function assumes that "msg" is aligned for 32-bit access.
729 c512(sph_shavite_big_context *sc, const void *msg)
731 sph_u32 p0, p1, p2, p3, p4, p5, p6, p7;
732 sph_u32 p8, p9, pA, pB, pC, pD, pE, pF;
737 #if SPH_LITTLE_ENDIAN
738 memcpy(rk, msg, 128);
740 for (u = 0; u < 32; u += 4) {
741 rk[u + 0] = sph_dec32le_aligned(
742 (const unsigned char *)msg + (u << 2) + 0);
743 rk[u + 1] = sph_dec32le_aligned(
744 (const unsigned char *)msg + (u << 2) + 4);
745 rk[u + 2] = sph_dec32le_aligned(
746 (const unsigned char *)msg + (u << 2) + 8);
747 rk[u + 3] = sph_dec32le_aligned(
748 (const unsigned char *)msg + (u << 2) + 12);
753 for (s = 0; s < 4; s ++) {
754 sph_u32 x0, x1, x2, x3;
760 AES_ROUND_NOKEY(x0, x1, x2, x3);
761 rk[u + 0] = x0 ^ rk[u - 4];
762 rk[u + 1] = x1 ^ rk[u - 3];
763 rk[u + 2] = x2 ^ rk[u - 2];
764 rk[u + 3] = x3 ^ rk[u - 1];
766 rk[ 32] ^= sc->count0;
767 rk[ 33] ^= sc->count1;
768 rk[ 34] ^= sc->count2;
769 rk[ 35] ^= SPH_T32(~sc->count3);
770 } else if (u == 440) {
771 rk[440] ^= sc->count1;
772 rk[441] ^= sc->count0;
773 rk[442] ^= sc->count3;
774 rk[443] ^= SPH_T32(~sc->count2);
782 AES_ROUND_NOKEY(x0, x1, x2, x3);
783 rk[u + 0] = x0 ^ rk[u - 4];
784 rk[u + 1] = x1 ^ rk[u - 3];
785 rk[u + 2] = x2 ^ rk[u - 2];
786 rk[u + 3] = x3 ^ rk[u - 1];
788 rk[164] ^= sc->count3;
789 rk[165] ^= sc->count2;
790 rk[166] ^= sc->count1;
791 rk[167] ^= SPH_T32(~sc->count0);
792 } else if (u == 316) {
793 rk[316] ^= sc->count2;
794 rk[317] ^= sc->count3;
795 rk[318] ^= sc->count0;
796 rk[319] ^= SPH_T32(~sc->count1);
802 for (s = 0; s < 8; s ++) {
803 rk[u + 0] = rk[u - 32] ^ rk[u - 7];
804 rk[u + 1] = rk[u - 31] ^ rk[u - 6];
805 rk[u + 2] = rk[u - 30] ^ rk[u - 5];
806 rk[u + 3] = rk[u - 29] ^ rk[u - 4];
828 for (r = 0; r < 14; r ++) {
829 #define C512_ELT(l0, l1, l2, l3, r0, r1, r2, r3) do { \
830 sph_u32 x0, x1, x2, x3; \
831 x0 = r0 ^ rk[u ++]; \
832 x1 = r1 ^ rk[u ++]; \
833 x2 = r2 ^ rk[u ++]; \
834 x3 = r3 ^ rk[u ++]; \
835 AES_ROUND_NOKEY(x0, x1, x2, x3); \
840 AES_ROUND_NOKEY(x0, x1, x2, x3); \
845 AES_ROUND_NOKEY(x0, x1, x2, x3); \
850 AES_ROUND_NOKEY(x0, x1, x2, x3); \
857 #define WROT(a, b, c, d) do { \
865 C512_ELT(p0, p1, p2, p3, p4, p5, p6, p7);
866 C512_ELT(p8, p9, pA, pB, pC, pD, pE, pF);
868 WROT(p0, p4, p8, pC);
869 WROT(p1, p5, p9, pD);
870 WROT(p2, p6, pA, pE);
871 WROT(p3, p7, pB, pF);
897 * This function assumes that "msg" is aligned for 32-bit access.
900 c512(sph_shavite_big_context *sc, const void *msg)
902 sph_u32 p0, p1, p2, p3, p4, p5, p6, p7;
903 sph_u32 p8, p9, pA, pB, pC, pD, pE, pF;
904 sph_u32 x0, x1, x2, x3;
905 sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07;
906 sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F;
907 sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17;
908 sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F;
928 rk00 = sph_dec32le_aligned((const unsigned char *)msg + 0);
930 rk01 = sph_dec32le_aligned((const unsigned char *)msg + 4);
932 rk02 = sph_dec32le_aligned((const unsigned char *)msg + 8);
934 rk03 = sph_dec32le_aligned((const unsigned char *)msg + 12);
936 AES_ROUND_NOKEY(x0, x1, x2, x3);
937 rk04 = sph_dec32le_aligned((const unsigned char *)msg + 16);
939 rk05 = sph_dec32le_aligned((const unsigned char *)msg + 20);
941 rk06 = sph_dec32le_aligned((const unsigned char *)msg + 24);
943 rk07 = sph_dec32le_aligned((const unsigned char *)msg + 28);
945 AES_ROUND_NOKEY(x0, x1, x2, x3);
946 rk08 = sph_dec32le_aligned((const unsigned char *)msg + 32);
948 rk09 = sph_dec32le_aligned((const unsigned char *)msg + 36);
950 rk0A = sph_dec32le_aligned((const unsigned char *)msg + 40);
952 rk0B = sph_dec32le_aligned((const unsigned char *)msg + 44);
954 AES_ROUND_NOKEY(x0, x1, x2, x3);
955 rk0C = sph_dec32le_aligned((const unsigned char *)msg + 48);
957 rk0D = sph_dec32le_aligned((const unsigned char *)msg + 52);
959 rk0E = sph_dec32le_aligned((const unsigned char *)msg + 56);
961 rk0F = sph_dec32le_aligned((const unsigned char *)msg + 60);
963 AES_ROUND_NOKEY(x0, x1, x2, x3);
968 rk10 = sph_dec32le_aligned((const unsigned char *)msg + 64);
970 rk11 = sph_dec32le_aligned((const unsigned char *)msg + 68);
972 rk12 = sph_dec32le_aligned((const unsigned char *)msg + 72);
974 rk13 = sph_dec32le_aligned((const unsigned char *)msg + 76);
976 AES_ROUND_NOKEY(x0, x1, x2, x3);
977 rk14 = sph_dec32le_aligned((const unsigned char *)msg + 80);
979 rk15 = sph_dec32le_aligned((const unsigned char *)msg + 84);
981 rk16 = sph_dec32le_aligned((const unsigned char *)msg + 88);
983 rk17 = sph_dec32le_aligned((const unsigned char *)msg + 92);
985 AES_ROUND_NOKEY(x0, x1, x2, x3);
986 rk18 = sph_dec32le_aligned((const unsigned char *)msg + 96);
988 rk19 = sph_dec32le_aligned((const unsigned char *)msg + 100);
990 rk1A = sph_dec32le_aligned((const unsigned char *)msg + 104);
992 rk1B = sph_dec32le_aligned((const unsigned char *)msg + 108);
994 AES_ROUND_NOKEY(x0, x1, x2, x3);
995 rk1C = sph_dec32le_aligned((const unsigned char *)msg + 112);
997 rk1D = sph_dec32le_aligned((const unsigned char *)msg + 116);
999 rk1E = sph_dec32le_aligned((const unsigned char *)msg + 120);
1001 rk1F = sph_dec32le_aligned((const unsigned char *)msg + 124);
1003 AES_ROUND_NOKEY(x0, x1, x2, x3);
1009 for (r = 0; r < 3; r ++) {
1011 KEY_EXPAND_ELT(rk00, rk01, rk02, rk03);
1020 rk03 ^= SPH_T32(~sc->count3);
1026 AES_ROUND_NOKEY(x0, x1, x2, x3);
1027 KEY_EXPAND_ELT(rk04, rk05, rk06, rk07);
1036 rk07 ^= SPH_T32(~sc->count0);
1042 AES_ROUND_NOKEY(x0, x1, x2, x3);
1043 KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B);
1052 AES_ROUND_NOKEY(x0, x1, x2, x3);
1053 KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F);
1062 AES_ROUND_NOKEY(x0, x1, x2, x3);
1067 KEY_EXPAND_ELT(rk10, rk11, rk12, rk13);
1076 AES_ROUND_NOKEY(x0, x1, x2, x3);
1077 KEY_EXPAND_ELT(rk14, rk15, rk16, rk17);
1086 AES_ROUND_NOKEY(x0, x1, x2, x3);
1087 KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B);
1096 AES_ROUND_NOKEY(x0, x1, x2, x3);
1097 KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F);
1106 rk1F ^= SPH_T32(~sc->count1);
1112 AES_ROUND_NOKEY(x0, x1, x2, x3);
1117 /* round 2, 6, 10 */
1126 AES_ROUND_NOKEY(x0, x1, x2, x3);
1135 AES_ROUND_NOKEY(x0, x1, x2, x3);
1144 AES_ROUND_NOKEY(x0, x1, x2, x3);
1153 AES_ROUND_NOKEY(x0, x1, x2, x3);
1166 AES_ROUND_NOKEY(x0, x1, x2, x3);
1175 AES_ROUND_NOKEY(x0, x1, x2, x3);
1184 AES_ROUND_NOKEY(x0, x1, x2, x3);
1193 AES_ROUND_NOKEY(x0, x1, x2, x3);
1198 /* round 3, 7, 11 */
1199 KEY_EXPAND_ELT(rk00, rk01, rk02, rk03);
1208 AES_ROUND_NOKEY(x0, x1, x2, x3);
1209 KEY_EXPAND_ELT(rk04, rk05, rk06, rk07);
1218 AES_ROUND_NOKEY(x0, x1, x2, x3);
1219 KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B);
1228 AES_ROUND_NOKEY(x0, x1, x2, x3);
1229 KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F);
1238 AES_ROUND_NOKEY(x0, x1, x2, x3);
1243 KEY_EXPAND_ELT(rk10, rk11, rk12, rk13);
1252 AES_ROUND_NOKEY(x0, x1, x2, x3);
1253 KEY_EXPAND_ELT(rk14, rk15, rk16, rk17);
1262 AES_ROUND_NOKEY(x0, x1, x2, x3);
1263 KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B);
1272 AES_ROUND_NOKEY(x0, x1, x2, x3);
1273 KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F);
1282 AES_ROUND_NOKEY(x0, x1, x2, x3);
1287 /* round 4, 8, 12 */
1296 AES_ROUND_NOKEY(x0, x1, x2, x3);
1305 AES_ROUND_NOKEY(x0, x1, x2, x3);
1314 AES_ROUND_NOKEY(x0, x1, x2, x3);
1323 AES_ROUND_NOKEY(x0, x1, x2, x3);
1336 AES_ROUND_NOKEY(x0, x1, x2, x3);
1345 AES_ROUND_NOKEY(x0, x1, x2, x3);
1354 AES_ROUND_NOKEY(x0, x1, x2, x3);
1363 AES_ROUND_NOKEY(x0, x1, x2, x3);
1370 KEY_EXPAND_ELT(rk00, rk01, rk02, rk03);
1379 AES_ROUND_NOKEY(x0, x1, x2, x3);
1380 KEY_EXPAND_ELT(rk04, rk05, rk06, rk07);
1389 AES_ROUND_NOKEY(x0, x1, x2, x3);
1390 KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B);
1399 AES_ROUND_NOKEY(x0, x1, x2, x3);
1400 KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F);
1409 AES_ROUND_NOKEY(x0, x1, x2, x3);
1414 KEY_EXPAND_ELT(rk10, rk11, rk12, rk13);
1423 AES_ROUND_NOKEY(x0, x1, x2, x3);
1424 KEY_EXPAND_ELT(rk14, rk15, rk16, rk17);
1433 AES_ROUND_NOKEY(x0, x1, x2, x3);
1434 KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B);
1435 rk18 ^= rk14 ^ sc->count1;
1436 rk19 ^= rk15 ^ sc->count0;
1437 rk1A ^= rk16 ^ sc->count3;
1438 rk1B ^= rk17 ^ SPH_T32(~sc->count2);
1443 AES_ROUND_NOKEY(x0, x1, x2, x3);
1444 KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F);
1453 AES_ROUND_NOKEY(x0, x1, x2, x3);
1479 shavite_small_init(sph_shavite_small_context *sc, const sph_u32 *iv)
1481 memcpy(sc->h, iv, sizeof sc->h);
1488 shavite_small_core(sph_shavite_small_context *sc, const void *data, size_t len)
1498 clen = (sizeof sc->buf) - ptr;
1501 memcpy(buf + ptr, data, clen);
1502 data = (const unsigned char *)data + clen;
1505 if (ptr == sizeof sc->buf) {
1506 if ((sc->count0 = SPH_T32(sc->count0 + 512)) == 0)
1507 sc->count1 = SPH_T32(sc->count1 + 1);
1516 shavite_small_close(sph_shavite_small_context *sc,
1517 unsigned ub, unsigned n, void *dst, size_t out_size_w32)
1522 sph_u32 count0, count1;
1526 count0 = (sc->count0 += SPH_T32(ptr << 3) + n);
1527 count1 = sc->count1;
1529 z = ((ub & -z) | z) & 0xFF;
1530 if (ptr == 0 && n == 0) {
1532 memset(buf + 1, 0, 53);
1533 sc->count0 = sc->count1 = 0;
1534 } else if (ptr < 54) {
1536 memset(buf + ptr, 0, 54 - ptr);
1539 memset(buf + ptr, 0, 64 - ptr);
1542 sc->count0 = sc->count1 = 0;
1544 sph_enc32le(buf + 54, count0);
1545 sph_enc32le(buf + 58, count1);
1546 buf[62] = (unsigned char) (out_size_w32 << 5);
1547 buf[63] = (unsigned char) (out_size_w32 >> 3);
1549 for (u = 0; u < out_size_w32; u ++)
1550 sph_enc32le((unsigned char *)dst + (u << 2), sc->h[u]);
1554 shavite_big_init(sph_shavite_big_context *sc, const sph_u32 *iv)
1556 memcpy(sc->h, iv, sizeof sc->h);
1565 shavite_big_core(sph_shavite_big_context *sc, const void *data, size_t len)
1575 clen = (sizeof sc->buf) - ptr;
1578 memcpy(buf + ptr, data, clen);
1579 data = (const unsigned char *)data + clen;
1582 if (ptr == sizeof sc->buf) {
1583 if ((sc->count0 = SPH_T32(sc->count0 + 1024)) == 0) {
1584 sc->count1 = SPH_T32(sc->count1 + 1);
1585 if (sc->count1 == 0) {
1586 sc->count2 = SPH_T32(sc->count2 + 1);
1587 if (sc->count2 == 0) {
1588 sc->count3 = SPH_T32(
1601 shavite_big_close(sph_shavite_big_context *sc,
1602 unsigned ub, unsigned n, void *dst, size_t out_size_w32)
1607 sph_u32 count0, count1, count2, count3;
1611 count0 = (sc->count0 += SPH_T32(ptr << 3) + n);
1612 count1 = sc->count1;
1613 count2 = sc->count2;
1614 count3 = sc->count3;
1616 z = ((ub & -z) | z) & 0xFF;
1617 if (ptr == 0 && n == 0) {
1619 memset(buf + 1, 0, 109);
1620 sc->count0 = sc->count1 = sc->count2 = sc->count3 = 0;
1621 } else if (ptr < 110) {
1623 memset(buf + ptr, 0, 110 - ptr);
1626 memset(buf + ptr, 0, 128 - ptr);
1628 memset(buf, 0, 110);
1629 sc->count0 = sc->count1 = sc->count2 = sc->count3 = 0;
1631 sph_enc32le(buf + 110, count0);
1632 sph_enc32le(buf + 114, count1);
1633 sph_enc32le(buf + 118, count2);
1634 sph_enc32le(buf + 122, count3);
1635 buf[126] = (unsigned char) (out_size_w32 << 5);
1636 buf[127] = (unsigned char) (out_size_w32 >> 3);
1638 for (u = 0; u < out_size_w32; u ++)
1639 sph_enc32le((unsigned char *)dst + (u << 2), sc->h[u]);
1642 /* see sph_shavite.h */
1644 sph_shavite224_init(void *cc)
1646 shavite_small_init(cc, IV224);
1649 /* see sph_shavite.h */
1651 sph_shavite224(void *cc, const void *data, size_t len)
1653 shavite_small_core(cc, data, len);
1656 /* see sph_shavite.h */
1658 sph_shavite224_close(void *cc, void *dst)
1660 shavite_small_close(cc, 0, 0, dst, 7);
1661 shavite_small_init(cc, IV224);
1664 /* see sph_shavite.h */
1666 sph_shavite224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1668 shavite_small_close(cc, ub, n, dst, 7);
1669 shavite_small_init(cc, IV224);
1672 /* see sph_shavite.h */
1674 sph_shavite256_init(void *cc)
1676 shavite_small_init(cc, IV256);
1679 /* see sph_shavite.h */
1681 sph_shavite256(void *cc, const void *data, size_t len)
1683 shavite_small_core(cc, data, len);
1686 /* see sph_shavite.h */
1688 sph_shavite256_close(void *cc, void *dst)
1690 shavite_small_close(cc, 0, 0, dst, 8);
1691 shavite_small_init(cc, IV256);
1694 /* see sph_shavite.h */
1696 sph_shavite256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1698 shavite_small_close(cc, ub, n, dst, 8);
1699 shavite_small_init(cc, IV256);
1702 /* see sph_shavite.h */
1704 sph_shavite384_init(void *cc)
1706 shavite_big_init(cc, IV384);
1709 /* see sph_shavite.h */
1711 sph_shavite384(void *cc, const void *data, size_t len)
1713 shavite_big_core(cc, data, len);
1716 /* see sph_shavite.h */
1718 sph_shavite384_close(void *cc, void *dst)
1720 shavite_big_close(cc, 0, 0, dst, 12);
1721 shavite_big_init(cc, IV384);
1724 /* see sph_shavite.h */
1726 sph_shavite384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1728 shavite_big_close(cc, ub, n, dst, 12);
1729 shavite_big_init(cc, IV384);
1732 /* see sph_shavite.h */
1734 sph_shavite512_init(void *cc)
1736 shavite_big_init(cc, IV512);
1739 /* see sph_shavite.h */
1741 sph_shavite512(void *cc, const void *data, size_t len)
1743 shavite_big_core(cc, data, len);
1746 /* see sph_shavite.h */
1748 sph_shavite512_close(void *cc, void *dst)
1750 shavite_big_close(cc, 0, 0, dst, 16);
1751 shavite_big_init(cc, IV512);
1754 /* see sph_shavite.h */
1756 sph_shavite512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1758 shavite_big_close(cc, ub, n, dst, 16);
1759 shavite_big_init(cc, IV512);