]>
Commit | Line | Data |
---|---|---|
71712b27 GM |
1 | /********************************************************************** |
2 | * Copyright (c) 2014 Pieter Wuille * | |
3 | * Distributed under the MIT software license, see the accompanying * | |
4 | * file COPYING or http://www.opensource.org/licenses/mit-license.php.* | |
5 | **********************************************************************/ | |
1d52a8b1 PW |
6 | |
7 | #ifndef _SECP256K1_SCALAR_REPR_IMPL_H_ | |
8 | #define _SECP256K1_SCALAR_REPR_IMPL_H_ | |
9 | ||
71712b27 | 10 | /* Limbs of the secp256k1 order. */ |
1d52a8b1 PW |
11 | #define SECP256K1_N_0 ((uint32_t)0xD0364141UL) |
12 | #define SECP256K1_N_1 ((uint32_t)0xBFD25E8CUL) | |
13 | #define SECP256K1_N_2 ((uint32_t)0xAF48A03BUL) | |
14 | #define SECP256K1_N_3 ((uint32_t)0xBAAEDCE6UL) | |
15 | #define SECP256K1_N_4 ((uint32_t)0xFFFFFFFEUL) | |
16 | #define SECP256K1_N_5 ((uint32_t)0xFFFFFFFFUL) | |
17 | #define SECP256K1_N_6 ((uint32_t)0xFFFFFFFFUL) | |
18 | #define SECP256K1_N_7 ((uint32_t)0xFFFFFFFFUL) | |
19 | ||
71712b27 | 20 | /* Limbs of 2^256 minus the secp256k1 order. */ |
1d52a8b1 PW |
21 | #define SECP256K1_N_C_0 (~SECP256K1_N_0 + 1) |
22 | #define SECP256K1_N_C_1 (~SECP256K1_N_1) | |
23 | #define SECP256K1_N_C_2 (~SECP256K1_N_2) | |
24 | #define SECP256K1_N_C_3 (~SECP256K1_N_3) | |
25 | #define SECP256K1_N_C_4 (1) | |
26 | ||
71712b27 | 27 | /* Limbs of half the secp256k1 order. */ |
1d52a8b1 PW |
28 | #define SECP256K1_N_H_0 ((uint32_t)0x681B20A0UL) |
29 | #define SECP256K1_N_H_1 ((uint32_t)0xDFE92F46UL) | |
30 | #define SECP256K1_N_H_2 ((uint32_t)0x57A4501DUL) | |
31 | #define SECP256K1_N_H_3 ((uint32_t)0x5D576E73UL) | |
32 | #define SECP256K1_N_H_4 ((uint32_t)0xFFFFFFFFUL) | |
33 | #define SECP256K1_N_H_5 ((uint32_t)0xFFFFFFFFUL) | |
34 | #define SECP256K1_N_H_6 ((uint32_t)0xFFFFFFFFUL) | |
35 | #define SECP256K1_N_H_7 ((uint32_t)0x7FFFFFFFUL) | |
36 | ||
a4a43d75 | 37 | SECP256K1_INLINE static void secp256k1_scalar_clear(secp256k1_scalar_t *r) { |
1d52a8b1 PW |
38 | r->d[0] = 0; |
39 | r->d[1] = 0; | |
40 | r->d[2] = 0; | |
41 | r->d[3] = 0; | |
42 | r->d[4] = 0; | |
43 | r->d[5] = 0; | |
44 | r->d[6] = 0; | |
45 | r->d[7] = 0; | |
46 | } | |
47 | ||
a4a43d75 | 48 | SECP256K1_INLINE static int secp256k1_scalar_get_bits(const secp256k1_scalar_t *a, int offset, int count) { |
1d52a8b1 PW |
49 | VERIFY_CHECK((offset + count - 1) / 32 == offset / 32); |
50 | return (a->d[offset / 32] >> (offset % 32)) & ((1 << count) - 1); | |
51 | } | |
52 | ||
a4a43d75 | 53 | SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scalar_t *a) { |
1d52a8b1 PW |
54 | int yes = 0; |
55 | int no = 0; | |
71712b27 GM |
56 | no |= (a->d[7] < SECP256K1_N_7); /* No need for a > check. */ |
57 | no |= (a->d[6] < SECP256K1_N_6); /* No need for a > check. */ | |
58 | no |= (a->d[5] < SECP256K1_N_5); /* No need for a > check. */ | |
1d52a8b1 PW |
59 | no |= (a->d[4] < SECP256K1_N_4); |
60 | yes |= (a->d[4] > SECP256K1_N_4) & ~no; | |
61 | no |= (a->d[3] < SECP256K1_N_3) & ~yes; | |
62 | yes |= (a->d[3] > SECP256K1_N_3) & ~no; | |
63 | no |= (a->d[2] < SECP256K1_N_2) & ~yes; | |
64 | yes |= (a->d[2] > SECP256K1_N_2) & ~no; | |
65 | no |= (a->d[1] < SECP256K1_N_1) & ~yes; | |
66 | yes |= (a->d[1] > SECP256K1_N_1) & ~no; | |
67 | yes |= (a->d[0] >= SECP256K1_N_0) & ~no; | |
68 | return yes; | |
69 | } | |
70 | ||
a4a43d75 | 71 | SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar_t *r, uint32_t overflow) { |
1d52a8b1 PW |
72 | VERIFY_CHECK(overflow <= 1); |
73 | uint64_t t = (uint64_t)r->d[0] + overflow * SECP256K1_N_C_0; | |
74 | r->d[0] = t & 0xFFFFFFFFUL; t >>= 32; | |
75 | t += (uint64_t)r->d[1] + overflow * SECP256K1_N_C_1; | |
76 | r->d[1] = t & 0xFFFFFFFFUL; t >>= 32; | |
77 | t += (uint64_t)r->d[2] + overflow * SECP256K1_N_C_2; | |
78 | r->d[2] = t & 0xFFFFFFFFUL; t >>= 32; | |
79 | t += (uint64_t)r->d[3] + overflow * SECP256K1_N_C_3; | |
80 | r->d[3] = t & 0xFFFFFFFFUL; t >>= 32; | |
81 | t += (uint64_t)r->d[4] + overflow * SECP256K1_N_C_4; | |
82 | r->d[4] = t & 0xFFFFFFFFUL; t >>= 32; | |
83 | t += (uint64_t)r->d[5]; | |
84 | r->d[5] = t & 0xFFFFFFFFUL; t >>= 32; | |
85 | t += (uint64_t)r->d[6]; | |
86 | r->d[6] = t & 0xFFFFFFFFUL; t >>= 32; | |
87 | t += (uint64_t)r->d[7]; | |
88 | r->d[7] = t & 0xFFFFFFFFUL; | |
89 | return overflow; | |
90 | } | |
91 | ||
a4a43d75 | 92 | static void secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) { |
1d52a8b1 PW |
93 | uint64_t t = (uint64_t)a->d[0] + b->d[0]; |
94 | r->d[0] = t & 0xFFFFFFFFULL; t >>= 32; | |
95 | t += (uint64_t)a->d[1] + b->d[1]; | |
96 | r->d[1] = t & 0xFFFFFFFFULL; t >>= 32; | |
97 | t += (uint64_t)a->d[2] + b->d[2]; | |
98 | r->d[2] = t & 0xFFFFFFFFULL; t >>= 32; | |
99 | t += (uint64_t)a->d[3] + b->d[3]; | |
100 | r->d[3] = t & 0xFFFFFFFFULL; t >>= 32; | |
101 | t += (uint64_t)a->d[4] + b->d[4]; | |
102 | r->d[4] = t & 0xFFFFFFFFULL; t >>= 32; | |
103 | t += (uint64_t)a->d[5] + b->d[5]; | |
104 | r->d[5] = t & 0xFFFFFFFFULL; t >>= 32; | |
105 | t += (uint64_t)a->d[6] + b->d[6]; | |
106 | r->d[6] = t & 0xFFFFFFFFULL; t >>= 32; | |
107 | t += (uint64_t)a->d[7] + b->d[7]; | |
108 | r->d[7] = t & 0xFFFFFFFFULL; t >>= 32; | |
109 | secp256k1_scalar_reduce(r, t + secp256k1_scalar_check_overflow(r)); | |
110 | } | |
111 | ||
52132078 PW |
112 | static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit) { |
113 | VERIFY_CHECK(bit < 256); | |
114 | uint64_t t = (uint64_t)r->d[0] + (((uint32_t)((bit >> 5) == 0)) << bit); | |
115 | r->d[0] = t & 0xFFFFFFFFULL; t >>= 32; | |
116 | t += (uint64_t)r->d[1] + (((uint32_t)((bit >> 5) == 1)) << (bit & 0x1F)); | |
117 | r->d[1] = t & 0xFFFFFFFFULL; t >>= 32; | |
118 | t += (uint64_t)r->d[2] + (((uint32_t)((bit >> 5) == 2)) << (bit & 0x1F)); | |
119 | r->d[2] = t & 0xFFFFFFFFULL; t >>= 32; | |
120 | t += (uint64_t)r->d[3] + (((uint32_t)((bit >> 5) == 3)) << (bit & 0x1F)); | |
121 | r->d[3] = t & 0xFFFFFFFFULL; t >>= 32; | |
122 | t += (uint64_t)r->d[4] + (((uint32_t)((bit >> 5) == 4)) << (bit & 0x1F)); | |
123 | r->d[4] = t & 0xFFFFFFFFULL; t >>= 32; | |
124 | t += (uint64_t)r->d[5] + (((uint32_t)((bit >> 5) == 5)) << (bit & 0x1F)); | |
125 | r->d[5] = t & 0xFFFFFFFFULL; t >>= 32; | |
126 | t += (uint64_t)r->d[6] + (((uint32_t)((bit >> 5) == 6)) << (bit & 0x1F)); | |
127 | r->d[6] = t & 0xFFFFFFFFULL; t >>= 32; | |
128 | t += (uint64_t)r->d[7] + (((uint32_t)((bit >> 5) == 7)) << (bit & 0x1F)); | |
129 | r->d[7] = t & 0xFFFFFFFFULL; | |
130 | #ifdef VERIFY | |
131 | VERIFY_CHECK((t >> 32) == 0); | |
132 | VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0); | |
133 | #endif | |
134 | } | |
135 | ||
a4a43d75 | 136 | static void secp256k1_scalar_set_b32(secp256k1_scalar_t *r, const unsigned char *b32, int *overflow) { |
1d52a8b1 PW |
137 | r->d[0] = (uint32_t)b32[31] | (uint32_t)b32[30] << 8 | (uint32_t)b32[29] << 16 | (uint32_t)b32[28] << 24; |
138 | r->d[1] = (uint32_t)b32[27] | (uint32_t)b32[26] << 8 | (uint32_t)b32[25] << 16 | (uint32_t)b32[24] << 24; | |
139 | r->d[2] = (uint32_t)b32[23] | (uint32_t)b32[22] << 8 | (uint32_t)b32[21] << 16 | (uint32_t)b32[20] << 24; | |
140 | r->d[3] = (uint32_t)b32[19] | (uint32_t)b32[18] << 8 | (uint32_t)b32[17] << 16 | (uint32_t)b32[16] << 24; | |
141 | r->d[4] = (uint32_t)b32[15] | (uint32_t)b32[14] << 8 | (uint32_t)b32[13] << 16 | (uint32_t)b32[12] << 24; | |
142 | r->d[5] = (uint32_t)b32[11] | (uint32_t)b32[10] << 8 | (uint32_t)b32[9] << 16 | (uint32_t)b32[8] << 24; | |
143 | r->d[6] = (uint32_t)b32[7] | (uint32_t)b32[6] << 8 | (uint32_t)b32[5] << 16 | (uint32_t)b32[4] << 24; | |
144 | r->d[7] = (uint32_t)b32[3] | (uint32_t)b32[2] << 8 | (uint32_t)b32[1] << 16 | (uint32_t)b32[0] << 24; | |
145 | int over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r)); | |
146 | if (overflow) { | |
147 | *overflow = over; | |
148 | } | |
149 | } | |
150 | ||
a4a43d75 | 151 | static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar_t* a) { |
1d52a8b1 PW |
152 | bin[0] = a->d[7] >> 24; bin[1] = a->d[7] >> 16; bin[2] = a->d[7] >> 8; bin[3] = a->d[7]; |
153 | bin[4] = a->d[6] >> 24; bin[5] = a->d[6] >> 16; bin[6] = a->d[6] >> 8; bin[7] = a->d[6]; | |
154 | bin[8] = a->d[5] >> 24; bin[9] = a->d[5] >> 16; bin[10] = a->d[5] >> 8; bin[11] = a->d[5]; | |
155 | bin[12] = a->d[4] >> 24; bin[13] = a->d[4] >> 16; bin[14] = a->d[4] >> 8; bin[15] = a->d[4]; | |
156 | bin[16] = a->d[3] >> 24; bin[17] = a->d[3] >> 16; bin[18] = a->d[3] >> 8; bin[19] = a->d[3]; | |
157 | bin[20] = a->d[2] >> 24; bin[21] = a->d[2] >> 16; bin[22] = a->d[2] >> 8; bin[23] = a->d[2]; | |
158 | bin[24] = a->d[1] >> 24; bin[25] = a->d[1] >> 16; bin[26] = a->d[1] >> 8; bin[27] = a->d[1]; | |
159 | bin[28] = a->d[0] >> 24; bin[29] = a->d[0] >> 16; bin[30] = a->d[0] >> 8; bin[31] = a->d[0]; | |
160 | } | |
161 | ||
a4a43d75 | 162 | SECP256K1_INLINE static int secp256k1_scalar_is_zero(const secp256k1_scalar_t *a) { |
1d52a8b1 PW |
163 | return (a->d[0] | a->d[1] | a->d[2] | a->d[3] | a->d[4] | a->d[5] | a->d[6] | a->d[7]) == 0; |
164 | } | |
165 | ||
a4a43d75 | 166 | static void secp256k1_scalar_negate(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) { |
1d52a8b1 PW |
167 | uint32_t nonzero = 0xFFFFFFFFUL * (secp256k1_scalar_is_zero(a) == 0); |
168 | uint64_t t = (uint64_t)(~a->d[0]) + SECP256K1_N_0 + 1; | |
169 | r->d[0] = t & nonzero; t >>= 32; | |
170 | t += (uint64_t)(~a->d[1]) + SECP256K1_N_1; | |
171 | r->d[1] = t & nonzero; t >>= 32; | |
172 | t += (uint64_t)(~a->d[2]) + SECP256K1_N_2; | |
173 | r->d[2] = t & nonzero; t >>= 32; | |
174 | t += (uint64_t)(~a->d[3]) + SECP256K1_N_3; | |
175 | r->d[3] = t & nonzero; t >>= 32; | |
176 | t += (uint64_t)(~a->d[4]) + SECP256K1_N_4; | |
177 | r->d[4] = t & nonzero; t >>= 32; | |
178 | t += (uint64_t)(~a->d[5]) + SECP256K1_N_5; | |
179 | r->d[5] = t & nonzero; t >>= 32; | |
180 | t += (uint64_t)(~a->d[6]) + SECP256K1_N_6; | |
181 | r->d[6] = t & nonzero; t >>= 32; | |
182 | t += (uint64_t)(~a->d[7]) + SECP256K1_N_7; | |
183 | r->d[7] = t & nonzero; | |
184 | } | |
185 | ||
a4a43d75 | 186 | SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar_t *a) { |
1d52a8b1 PW |
187 | return ((a->d[0] ^ 1) | a->d[1] | a->d[2] | a->d[3] | a->d[4] | a->d[5] | a->d[6] | a->d[7]) == 0; |
188 | } | |
189 | ||
a4a43d75 | 190 | static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) { |
1d52a8b1 PW |
191 | int yes = 0; |
192 | int no = 0; | |
193 | no |= (a->d[7] < SECP256K1_N_H_7); | |
194 | yes |= (a->d[7] > SECP256K1_N_H_7) & ~no; | |
71712b27 GM |
195 | no |= (a->d[6] < SECP256K1_N_H_6) & ~yes; /* No need for a > check. */ |
196 | no |= (a->d[5] < SECP256K1_N_H_5) & ~yes; /* No need for a > check. */ | |
197 | no |= (a->d[4] < SECP256K1_N_H_4) & ~yes; /* No need for a > check. */ | |
1d52a8b1 PW |
198 | no |= (a->d[3] < SECP256K1_N_H_3) & ~yes; |
199 | yes |= (a->d[3] > SECP256K1_N_H_3) & ~no; | |
200 | no |= (a->d[2] < SECP256K1_N_H_2) & ~yes; | |
201 | yes |= (a->d[2] > SECP256K1_N_H_2) & ~no; | |
202 | no |= (a->d[1] < SECP256K1_N_H_1) & ~yes; | |
203 | yes |= (a->d[1] > SECP256K1_N_H_1) & ~no; | |
204 | yes |= (a->d[0] > SECP256K1_N_H_0) & ~no; | |
205 | return yes; | |
206 | } | |
207 | ||
71712b27 | 208 | /* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */ |
1d52a8b1 PW |
209 | |
210 | /** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */ | |
211 | #define muladd(a,b) { \ | |
212 | uint32_t tl, th; \ | |
213 | { \ | |
214 | uint64_t t = (uint64_t)a * b; \ | |
215 | th = t >> 32; /* at most 0xFFFFFFFE */ \ | |
216 | tl = t; \ | |
217 | } \ | |
218 | c0 += tl; /* overflow is handled on the next line */ \ | |
219 | th += (c0 < tl) ? 1 : 0; /* at most 0xFFFFFFFF */ \ | |
220 | c1 += th; /* overflow is handled on the next line */ \ | |
221 | c2 += (c1 < th) ? 1 : 0; /* never overflows by contract (verified in the next line) */ \ | |
222 | VERIFY_CHECK((c1 >= th) || (c2 != 0)); \ | |
223 | } | |
224 | ||
225 | /** Add a*b to the number defined by (c0,c1). c1 must never overflow. */ | |
226 | #define muladd_fast(a,b) { \ | |
227 | uint32_t tl, th; \ | |
228 | { \ | |
229 | uint64_t t = (uint64_t)a * b; \ | |
230 | th = t >> 32; /* at most 0xFFFFFFFE */ \ | |
231 | tl = t; \ | |
232 | } \ | |
233 | c0 += tl; /* overflow is handled on the next line */ \ | |
234 | th += (c0 < tl) ? 1 : 0; /* at most 0xFFFFFFFF */ \ | |
235 | c1 += th; /* never overflows by contract (verified in the next line) */ \ | |
236 | VERIFY_CHECK(c1 >= th); \ | |
237 | } | |
238 | ||
239 | /** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */ | |
240 | #define muladd2(a,b) { \ | |
241 | uint32_t tl, th; \ | |
242 | { \ | |
243 | uint64_t t = (uint64_t)a * b; \ | |
244 | th = t >> 32; /* at most 0xFFFFFFFE */ \ | |
245 | tl = t; \ | |
246 | } \ | |
247 | uint32_t th2 = th + th; /* at most 0xFFFFFFFE (in case th was 0x7FFFFFFF) */ \ | |
248 | c2 += (th2 < th) ? 1 : 0; /* never overflows by contract (verified the next line) */ \ | |
249 | VERIFY_CHECK((th2 >= th) || (c2 != 0)); \ | |
250 | uint32_t tl2 = tl + tl; /* at most 0xFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFF) */ \ | |
251 | th2 += (tl2 < tl) ? 1 : 0; /* at most 0xFFFFFFFF */ \ | |
252 | c0 += tl2; /* overflow is handled on the next line */ \ | |
253 | th2 += (c0 < tl2) ? 1 : 0; /* second overflow is handled on the next line */ \ | |
254 | c2 += (c0 < tl2) & (th2 == 0); /* never overflows by contract (verified the next line) */ \ | |
255 | VERIFY_CHECK((c0 >= tl2) || (th2 != 0) || (c2 != 0)); \ | |
256 | c1 += th2; /* overflow is handled on the next line */ \ | |
257 | c2 += (c1 < th2) ? 1 : 0; /* never overflows by contract (verified the next line) */ \ | |
258 | VERIFY_CHECK((c1 >= th2) || (c2 != 0)); \ | |
259 | } | |
260 | ||
261 | /** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */ | |
262 | #define sumadd(a) { \ | |
263 | c0 += (a); /* overflow is handled on the next line */ \ | |
850562e3 | 264 | unsigned int over = (c0 < (a)) ? 1 : 0; \ |
1d52a8b1 PW |
265 | c1 += over; /* overflow is handled on the next line */ \ |
266 | c2 += (c1 < over) ? 1 : 0; /* never overflows by contract */ \ | |
267 | } | |
268 | ||
269 | /** Add a to the number defined by (c0,c1). c1 must never overflow, c2 must be zero. */ | |
270 | #define sumadd_fast(a) { \ | |
271 | c0 += (a); /* overflow is handled on the next line */ \ | |
272 | c1 += (c0 < (a)) ? 1 : 0; /* never overflows by contract (verified the next line) */ \ | |
273 | VERIFY_CHECK((c1 != 0) | (c0 >= (a))); \ | |
274 | VERIFY_CHECK(c2 == 0); \ | |
275 | } | |
276 | ||
277 | /** Extract the lowest 32 bits of (c0,c1,c2) into n, and left shift the number 32 bits. */ | |
278 | #define extract(n) { \ | |
279 | (n) = c0; \ | |
280 | c0 = c1; \ | |
281 | c1 = c2; \ | |
282 | c2 = 0; \ | |
283 | } | |
284 | ||
285 | /** Extract the lowest 32 bits of (c0,c1,c2) into n, and left shift the number 32 bits. c2 is required to be zero. */ | |
286 | #define extract_fast(n) { \ | |
287 | (n) = c0; \ | |
288 | c0 = c1; \ | |
289 | c1 = 0; \ | |
290 | VERIFY_CHECK(c2 == 0); \ | |
291 | } | |
292 | ||
a4a43d75 | 293 | static void secp256k1_scalar_reduce_512(secp256k1_scalar_t *r, const uint32_t *l) { |
1d52a8b1 PW |
294 | uint32_t n0 = l[8], n1 = l[9], n2 = l[10], n3 = l[11], n4 = l[12], n5 = l[13], n6 = l[14], n7 = l[15]; |
295 | ||
71712b27 | 296 | /* 96 bit accumulator. */ |
1d52a8b1 PW |
297 | uint32_t c0, c1, c2; |
298 | ||
71712b27 GM |
299 | /* Reduce 512 bits into 385. */ |
300 | /* m[0..12] = l[0..7] + n[0..7] * SECP256K1_N_C. */ | |
1d52a8b1 PW |
301 | c0 = l[0]; c1 = 0; c2 = 0; |
302 | muladd_fast(n0, SECP256K1_N_C_0); | |
303 | uint32_t m0; extract_fast(m0); | |
304 | sumadd_fast(l[1]); | |
305 | muladd(n1, SECP256K1_N_C_0); | |
306 | muladd(n0, SECP256K1_N_C_1); | |
307 | uint32_t m1; extract(m1); | |
308 | sumadd(l[2]); | |
309 | muladd(n2, SECP256K1_N_C_0); | |
310 | muladd(n1, SECP256K1_N_C_1); | |
311 | muladd(n0, SECP256K1_N_C_2); | |
312 | uint32_t m2; extract(m2); | |
313 | sumadd(l[3]); | |
314 | muladd(n3, SECP256K1_N_C_0); | |
315 | muladd(n2, SECP256K1_N_C_1); | |
316 | muladd(n1, SECP256K1_N_C_2); | |
317 | muladd(n0, SECP256K1_N_C_3); | |
318 | uint32_t m3; extract(m3); | |
319 | sumadd(l[4]); | |
320 | muladd(n4, SECP256K1_N_C_0); | |
321 | muladd(n3, SECP256K1_N_C_1); | |
322 | muladd(n2, SECP256K1_N_C_2); | |
323 | muladd(n1, SECP256K1_N_C_3); | |
324 | sumadd(n0); | |
325 | uint32_t m4; extract(m4); | |
326 | sumadd(l[5]); | |
327 | muladd(n5, SECP256K1_N_C_0); | |
328 | muladd(n4, SECP256K1_N_C_1); | |
329 | muladd(n3, SECP256K1_N_C_2); | |
330 | muladd(n2, SECP256K1_N_C_3); | |
331 | sumadd(n1); | |
332 | uint32_t m5; extract(m5); | |
333 | sumadd(l[6]); | |
334 | muladd(n6, SECP256K1_N_C_0); | |
335 | muladd(n5, SECP256K1_N_C_1); | |
336 | muladd(n4, SECP256K1_N_C_2); | |
337 | muladd(n3, SECP256K1_N_C_3); | |
338 | sumadd(n2); | |
339 | uint32_t m6; extract(m6); | |
340 | sumadd(l[7]); | |
341 | muladd(n7, SECP256K1_N_C_0); | |
342 | muladd(n6, SECP256K1_N_C_1); | |
343 | muladd(n5, SECP256K1_N_C_2); | |
344 | muladd(n4, SECP256K1_N_C_3); | |
345 | sumadd(n3); | |
346 | uint32_t m7; extract(m7); | |
347 | muladd(n7, SECP256K1_N_C_1); | |
348 | muladd(n6, SECP256K1_N_C_2); | |
349 | muladd(n5, SECP256K1_N_C_3); | |
350 | sumadd(n4); | |
351 | uint32_t m8; extract(m8); | |
352 | muladd(n7, SECP256K1_N_C_2); | |
353 | muladd(n6, SECP256K1_N_C_3); | |
354 | sumadd(n5); | |
355 | uint32_t m9; extract(m9); | |
356 | muladd(n7, SECP256K1_N_C_3); | |
357 | sumadd(n6); | |
358 | uint32_t m10; extract(m10); | |
359 | sumadd_fast(n7); | |
360 | uint32_t m11; extract_fast(m11); | |
361 | VERIFY_CHECK(c0 <= 1); | |
362 | uint32_t m12 = c0; | |
363 | ||
71712b27 GM |
364 | /* Reduce 385 bits into 258. */ |
365 | /* p[0..8] = m[0..7] + m[8..12] * SECP256K1_N_C. */ | |
1d52a8b1 PW |
366 | c0 = m0; c1 = 0; c2 = 0; |
367 | muladd_fast(m8, SECP256K1_N_C_0); | |
368 | uint32_t p0; extract_fast(p0); | |
369 | sumadd_fast(m1); | |
370 | muladd(m9, SECP256K1_N_C_0); | |
371 | muladd(m8, SECP256K1_N_C_1); | |
372 | uint32_t p1; extract(p1); | |
373 | sumadd(m2); | |
374 | muladd(m10, SECP256K1_N_C_0); | |
375 | muladd(m9, SECP256K1_N_C_1); | |
376 | muladd(m8, SECP256K1_N_C_2); | |
377 | uint32_t p2; extract(p2); | |
378 | sumadd(m3); | |
379 | muladd(m11, SECP256K1_N_C_0); | |
380 | muladd(m10, SECP256K1_N_C_1); | |
381 | muladd(m9, SECP256K1_N_C_2); | |
382 | muladd(m8, SECP256K1_N_C_3); | |
383 | uint32_t p3; extract(p3); | |
384 | sumadd(m4); | |
385 | muladd(m12, SECP256K1_N_C_0); | |
386 | muladd(m11, SECP256K1_N_C_1); | |
387 | muladd(m10, SECP256K1_N_C_2); | |
388 | muladd(m9, SECP256K1_N_C_3); | |
389 | sumadd(m8); | |
390 | uint32_t p4; extract(p4); | |
391 | sumadd(m5); | |
392 | muladd(m12, SECP256K1_N_C_1); | |
393 | muladd(m11, SECP256K1_N_C_2); | |
394 | muladd(m10, SECP256K1_N_C_3); | |
395 | sumadd(m9); | |
396 | uint32_t p5; extract(p5); | |
397 | sumadd(m6); | |
398 | muladd(m12, SECP256K1_N_C_2); | |
399 | muladd(m11, SECP256K1_N_C_3); | |
400 | sumadd(m10); | |
401 | uint32_t p6; extract(p6); | |
402 | sumadd_fast(m7); | |
403 | muladd_fast(m12, SECP256K1_N_C_3); | |
404 | sumadd_fast(m11); | |
405 | uint32_t p7; extract_fast(p7); | |
406 | uint32_t p8 = c0 + m12; | |
407 | VERIFY_CHECK(p8 <= 2); | |
408 | ||
71712b27 GM |
409 | /* Reduce 258 bits into 256. */ |
410 | /* r[0..7] = p[0..7] + p[8] * SECP256K1_N_C. */ | |
1d52a8b1 PW |
411 | uint64_t c = p0 + (uint64_t)SECP256K1_N_C_0 * p8; |
412 | r->d[0] = c & 0xFFFFFFFFUL; c >>= 32; | |
413 | c += p1 + (uint64_t)SECP256K1_N_C_1 * p8; | |
414 | r->d[1] = c & 0xFFFFFFFFUL; c >>= 32; | |
415 | c += p2 + (uint64_t)SECP256K1_N_C_2 * p8; | |
416 | r->d[2] = c & 0xFFFFFFFFUL; c >>= 32; | |
417 | c += p3 + (uint64_t)SECP256K1_N_C_3 * p8; | |
418 | r->d[3] = c & 0xFFFFFFFFUL; c >>= 32; | |
419 | c += p4 + (uint64_t)p8; | |
420 | r->d[4] = c & 0xFFFFFFFFUL; c >>= 32; | |
421 | c += p5; | |
422 | r->d[5] = c & 0xFFFFFFFFUL; c >>= 32; | |
423 | c += p6; | |
424 | r->d[6] = c & 0xFFFFFFFFUL; c >>= 32; | |
425 | c += p7; | |
426 | r->d[7] = c & 0xFFFFFFFFUL; c >>= 32; | |
427 | ||
71712b27 | 428 | /* Final reduction of r. */ |
1d52a8b1 PW |
429 | secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r)); |
430 | } | |
431 | ||
a4a43d75 | 432 | static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) { |
71712b27 | 433 | /* 96 bit accumulator. */ |
1d52a8b1 PW |
434 | uint32_t c0 = 0, c1 = 0, c2 = 0; |
435 | ||
436 | uint32_t l[16]; | |
437 | ||
71712b27 | 438 | /* l[0..15] = a[0..7] * b[0..7]. */ |
1d52a8b1 PW |
439 | muladd_fast(a->d[0], b->d[0]); |
440 | extract_fast(l[0]); | |
441 | muladd(a->d[0], b->d[1]); | |
442 | muladd(a->d[1], b->d[0]); | |
443 | extract(l[1]); | |
444 | muladd(a->d[0], b->d[2]); | |
445 | muladd(a->d[1], b->d[1]); | |
446 | muladd(a->d[2], b->d[0]); | |
447 | extract(l[2]); | |
448 | muladd(a->d[0], b->d[3]); | |
449 | muladd(a->d[1], b->d[2]); | |
450 | muladd(a->d[2], b->d[1]); | |
451 | muladd(a->d[3], b->d[0]); | |
452 | extract(l[3]); | |
453 | muladd(a->d[0], b->d[4]); | |
454 | muladd(a->d[1], b->d[3]); | |
455 | muladd(a->d[2], b->d[2]); | |
456 | muladd(a->d[3], b->d[1]); | |
457 | muladd(a->d[4], b->d[0]); | |
458 | extract(l[4]); | |
459 | muladd(a->d[0], b->d[5]); | |
460 | muladd(a->d[1], b->d[4]); | |
461 | muladd(a->d[2], b->d[3]); | |
462 | muladd(a->d[3], b->d[2]); | |
463 | muladd(a->d[4], b->d[1]); | |
464 | muladd(a->d[5], b->d[0]); | |
465 | extract(l[5]); | |
466 | muladd(a->d[0], b->d[6]); | |
467 | muladd(a->d[1], b->d[5]); | |
468 | muladd(a->d[2], b->d[4]); | |
469 | muladd(a->d[3], b->d[3]); | |
470 | muladd(a->d[4], b->d[2]); | |
471 | muladd(a->d[5], b->d[1]); | |
472 | muladd(a->d[6], b->d[0]); | |
473 | extract(l[6]); | |
474 | muladd(a->d[0], b->d[7]); | |
475 | muladd(a->d[1], b->d[6]); | |
476 | muladd(a->d[2], b->d[5]); | |
477 | muladd(a->d[3], b->d[4]); | |
478 | muladd(a->d[4], b->d[3]); | |
479 | muladd(a->d[5], b->d[2]); | |
480 | muladd(a->d[6], b->d[1]); | |
481 | muladd(a->d[7], b->d[0]); | |
482 | extract(l[7]); | |
483 | muladd(a->d[1], b->d[7]); | |
484 | muladd(a->d[2], b->d[6]); | |
485 | muladd(a->d[3], b->d[5]); | |
486 | muladd(a->d[4], b->d[4]); | |
487 | muladd(a->d[5], b->d[3]); | |
488 | muladd(a->d[6], b->d[2]); | |
489 | muladd(a->d[7], b->d[1]); | |
490 | extract(l[8]); | |
491 | muladd(a->d[2], b->d[7]); | |
492 | muladd(a->d[3], b->d[6]); | |
493 | muladd(a->d[4], b->d[5]); | |
494 | muladd(a->d[5], b->d[4]); | |
495 | muladd(a->d[6], b->d[3]); | |
496 | muladd(a->d[7], b->d[2]); | |
497 | extract(l[9]); | |
498 | muladd(a->d[3], b->d[7]); | |
499 | muladd(a->d[4], b->d[6]); | |
500 | muladd(a->d[5], b->d[5]); | |
501 | muladd(a->d[6], b->d[4]); | |
502 | muladd(a->d[7], b->d[3]); | |
503 | extract(l[10]); | |
504 | muladd(a->d[4], b->d[7]); | |
505 | muladd(a->d[5], b->d[6]); | |
506 | muladd(a->d[6], b->d[5]); | |
507 | muladd(a->d[7], b->d[4]); | |
508 | extract(l[11]); | |
509 | muladd(a->d[5], b->d[7]); | |
510 | muladd(a->d[6], b->d[6]); | |
511 | muladd(a->d[7], b->d[5]); | |
512 | extract(l[12]); | |
513 | muladd(a->d[6], b->d[7]); | |
514 | muladd(a->d[7], b->d[6]); | |
515 | extract(l[13]); | |
516 | muladd_fast(a->d[7], b->d[7]); | |
517 | extract_fast(l[14]); | |
518 | VERIFY_CHECK(c1 == 0); | |
519 | l[15] = c0; | |
520 | ||
521 | secp256k1_scalar_reduce_512(r, l); | |
522 | } | |
523 | ||
a4a43d75 | 524 | static void secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) { |
71712b27 | 525 | /* 96 bit accumulator. */ |
1d52a8b1 PW |
526 | uint32_t c0 = 0, c1 = 0, c2 = 0; |
527 | ||
528 | uint32_t l[16]; | |
529 | ||
71712b27 | 530 | /* l[0..15] = a[0..7]^2. */ |
1d52a8b1 PW |
531 | muladd_fast(a->d[0], a->d[0]); |
532 | extract_fast(l[0]); | |
533 | muladd2(a->d[0], a->d[1]); | |
534 | extract(l[1]); | |
535 | muladd2(a->d[0], a->d[2]); | |
536 | muladd(a->d[1], a->d[1]); | |
537 | extract(l[2]); | |
538 | muladd2(a->d[0], a->d[3]); | |
539 | muladd2(a->d[1], a->d[2]); | |
540 | extract(l[3]); | |
541 | muladd2(a->d[0], a->d[4]); | |
542 | muladd2(a->d[1], a->d[3]); | |
543 | muladd(a->d[2], a->d[2]); | |
544 | extract(l[4]); | |
545 | muladd2(a->d[0], a->d[5]); | |
546 | muladd2(a->d[1], a->d[4]); | |
547 | muladd2(a->d[2], a->d[3]); | |
548 | extract(l[5]); | |
549 | muladd2(a->d[0], a->d[6]); | |
550 | muladd2(a->d[1], a->d[5]); | |
551 | muladd2(a->d[2], a->d[4]); | |
552 | muladd(a->d[3], a->d[3]); | |
553 | extract(l[6]); | |
554 | muladd2(a->d[0], a->d[7]); | |
555 | muladd2(a->d[1], a->d[6]); | |
556 | muladd2(a->d[2], a->d[5]); | |
557 | muladd2(a->d[3], a->d[4]); | |
558 | extract(l[7]); | |
559 | muladd2(a->d[1], a->d[7]); | |
560 | muladd2(a->d[2], a->d[6]); | |
561 | muladd2(a->d[3], a->d[5]); | |
562 | muladd(a->d[4], a->d[4]); | |
563 | extract(l[8]); | |
564 | muladd2(a->d[2], a->d[7]); | |
565 | muladd2(a->d[3], a->d[6]); | |
566 | muladd2(a->d[4], a->d[5]); | |
567 | extract(l[9]); | |
568 | muladd2(a->d[3], a->d[7]); | |
569 | muladd2(a->d[4], a->d[6]); | |
570 | muladd(a->d[5], a->d[5]); | |
571 | extract(l[10]); | |
572 | muladd2(a->d[4], a->d[7]); | |
573 | muladd2(a->d[5], a->d[6]); | |
574 | extract(l[11]); | |
575 | muladd2(a->d[5], a->d[7]); | |
576 | muladd(a->d[6], a->d[6]); | |
577 | extract(l[12]); | |
578 | muladd2(a->d[6], a->d[7]); | |
579 | extract(l[13]); | |
580 | muladd_fast(a->d[7], a->d[7]); | |
581 | extract_fast(l[14]); | |
582 | VERIFY_CHECK(c1 == 0); | |
583 | l[15] = c0; | |
584 | ||
585 | secp256k1_scalar_reduce_512(r, l); | |
586 | } | |
587 | ||
588 | #undef sumadd | |
589 | #undef sumadd_fast | |
590 | #undef muladd | |
591 | #undef muladd_fast | |
592 | #undef muladd2 | |
593 | #undef extract | |
594 | #undef extract_fast | |
595 | ||
596 | #endif |