]> Git Repo - secp256k1.git/blame - src/scalar_8x32_impl.h
Merge pull request #131
[secp256k1.git] / src / scalar_8x32_impl.h
CommitLineData
71712b27
GM
1/**********************************************************************
2 * Copyright (c) 2014 Pieter Wuille *
3 * Distributed under the MIT software license, see the accompanying *
4 * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
5 **********************************************************************/
1d52a8b1
PW
6
7#ifndef _SECP256K1_SCALAR_REPR_IMPL_H_
8#define _SECP256K1_SCALAR_REPR_IMPL_H_
9
71712b27 10/* Limbs of the secp256k1 order. */
1d52a8b1
PW
11#define SECP256K1_N_0 ((uint32_t)0xD0364141UL)
12#define SECP256K1_N_1 ((uint32_t)0xBFD25E8CUL)
13#define SECP256K1_N_2 ((uint32_t)0xAF48A03BUL)
14#define SECP256K1_N_3 ((uint32_t)0xBAAEDCE6UL)
15#define SECP256K1_N_4 ((uint32_t)0xFFFFFFFEUL)
16#define SECP256K1_N_5 ((uint32_t)0xFFFFFFFFUL)
17#define SECP256K1_N_6 ((uint32_t)0xFFFFFFFFUL)
18#define SECP256K1_N_7 ((uint32_t)0xFFFFFFFFUL)
19
71712b27 20/* Limbs of 2^256 minus the secp256k1 order. */
1d52a8b1
PW
21#define SECP256K1_N_C_0 (~SECP256K1_N_0 + 1)
22#define SECP256K1_N_C_1 (~SECP256K1_N_1)
23#define SECP256K1_N_C_2 (~SECP256K1_N_2)
24#define SECP256K1_N_C_3 (~SECP256K1_N_3)
25#define SECP256K1_N_C_4 (1)
26
71712b27 27/* Limbs of half the secp256k1 order. */
1d52a8b1
PW
28#define SECP256K1_N_H_0 ((uint32_t)0x681B20A0UL)
29#define SECP256K1_N_H_1 ((uint32_t)0xDFE92F46UL)
30#define SECP256K1_N_H_2 ((uint32_t)0x57A4501DUL)
31#define SECP256K1_N_H_3 ((uint32_t)0x5D576E73UL)
32#define SECP256K1_N_H_4 ((uint32_t)0xFFFFFFFFUL)
33#define SECP256K1_N_H_5 ((uint32_t)0xFFFFFFFFUL)
34#define SECP256K1_N_H_6 ((uint32_t)0xFFFFFFFFUL)
35#define SECP256K1_N_H_7 ((uint32_t)0x7FFFFFFFUL)
36
a4a43d75 37SECP256K1_INLINE static void secp256k1_scalar_clear(secp256k1_scalar_t *r) {
1d52a8b1
PW
38 r->d[0] = 0;
39 r->d[1] = 0;
40 r->d[2] = 0;
41 r->d[3] = 0;
42 r->d[4] = 0;
43 r->d[5] = 0;
44 r->d[6] = 0;
45 r->d[7] = 0;
46}
47
1e6c77c3
PW
48SECP256K1_INLINE static void secp256k1_scalar_set_int(secp256k1_scalar_t *r, unsigned int v) {
49 r->d[0] = v;
50 r->d[1] = 0;
51 r->d[2] = 0;
52 r->d[3] = 0;
53 r->d[4] = 0;
54 r->d[5] = 0;
55 r->d[6] = 0;
56 r->d[7] = 0;
57}
58
59SECP256K1_INLINE static unsigned int secp256k1_scalar_get_bits(const secp256k1_scalar_t *a, unsigned int offset, unsigned int count) {
60 VERIFY_CHECK((offset + count - 1) >> 5 == offset >> 5);
61 return (a->d[offset >> 5] >> (offset & 0x1F)) & ((1 << count) - 1);
62}
63
64SECP256K1_INLINE static unsigned int secp256k1_scalar_get_bits_var(const secp256k1_scalar_t *a, unsigned int offset, unsigned int count) {
65 VERIFY_CHECK(count < 32);
66 VERIFY_CHECK(offset + count <= 256);
67 if ((offset + count - 1) >> 5 == offset >> 5) {
68 return secp256k1_scalar_get_bits(a, offset, count);
69 } else {
70 VERIFY_CHECK((offset >> 5) + 1 < 8);
71 return ((a->d[offset >> 5] >> (offset & 0x1F)) | (a->d[(offset >> 5) + 1] << (32 - (offset & 0x1F)))) & ((((uint32_t)1) << count) - 1);
72 }
1d52a8b1
PW
73}
74
a4a43d75 75SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scalar_t *a) {
1d52a8b1
PW
76 int yes = 0;
77 int no = 0;
71712b27
GM
78 no |= (a->d[7] < SECP256K1_N_7); /* No need for a > check. */
79 no |= (a->d[6] < SECP256K1_N_6); /* No need for a > check. */
80 no |= (a->d[5] < SECP256K1_N_5); /* No need for a > check. */
1d52a8b1
PW
81 no |= (a->d[4] < SECP256K1_N_4);
82 yes |= (a->d[4] > SECP256K1_N_4) & ~no;
83 no |= (a->d[3] < SECP256K1_N_3) & ~yes;
84 yes |= (a->d[3] > SECP256K1_N_3) & ~no;
85 no |= (a->d[2] < SECP256K1_N_2) & ~yes;
86 yes |= (a->d[2] > SECP256K1_N_2) & ~no;
87 no |= (a->d[1] < SECP256K1_N_1) & ~yes;
88 yes |= (a->d[1] > SECP256K1_N_1) & ~no;
89 yes |= (a->d[0] >= SECP256K1_N_0) & ~no;
90 return yes;
91}
92
a4a43d75 93SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar_t *r, uint32_t overflow) {
1d52a8b1
PW
94 VERIFY_CHECK(overflow <= 1);
95 uint64_t t = (uint64_t)r->d[0] + overflow * SECP256K1_N_C_0;
96 r->d[0] = t & 0xFFFFFFFFUL; t >>= 32;
97 t += (uint64_t)r->d[1] + overflow * SECP256K1_N_C_1;
98 r->d[1] = t & 0xFFFFFFFFUL; t >>= 32;
99 t += (uint64_t)r->d[2] + overflow * SECP256K1_N_C_2;
100 r->d[2] = t & 0xFFFFFFFFUL; t >>= 32;
101 t += (uint64_t)r->d[3] + overflow * SECP256K1_N_C_3;
102 r->d[3] = t & 0xFFFFFFFFUL; t >>= 32;
103 t += (uint64_t)r->d[4] + overflow * SECP256K1_N_C_4;
104 r->d[4] = t & 0xFFFFFFFFUL; t >>= 32;
105 t += (uint64_t)r->d[5];
106 r->d[5] = t & 0xFFFFFFFFUL; t >>= 32;
107 t += (uint64_t)r->d[6];
108 r->d[6] = t & 0xFFFFFFFFUL; t >>= 32;
109 t += (uint64_t)r->d[7];
110 r->d[7] = t & 0xFFFFFFFFUL;
111 return overflow;
112}
113
a4a43d75 114static void secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
1d52a8b1
PW
115 uint64_t t = (uint64_t)a->d[0] + b->d[0];
116 r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
117 t += (uint64_t)a->d[1] + b->d[1];
118 r->d[1] = t & 0xFFFFFFFFULL; t >>= 32;
119 t += (uint64_t)a->d[2] + b->d[2];
120 r->d[2] = t & 0xFFFFFFFFULL; t >>= 32;
121 t += (uint64_t)a->d[3] + b->d[3];
122 r->d[3] = t & 0xFFFFFFFFULL; t >>= 32;
123 t += (uint64_t)a->d[4] + b->d[4];
124 r->d[4] = t & 0xFFFFFFFFULL; t >>= 32;
125 t += (uint64_t)a->d[5] + b->d[5];
126 r->d[5] = t & 0xFFFFFFFFULL; t >>= 32;
127 t += (uint64_t)a->d[6] + b->d[6];
128 r->d[6] = t & 0xFFFFFFFFULL; t >>= 32;
129 t += (uint64_t)a->d[7] + b->d[7];
130 r->d[7] = t & 0xFFFFFFFFULL; t >>= 32;
131 secp256k1_scalar_reduce(r, t + secp256k1_scalar_check_overflow(r));
132}
133
52132078
PW
134static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit) {
135 VERIFY_CHECK(bit < 256);
136 uint64_t t = (uint64_t)r->d[0] + (((uint32_t)((bit >> 5) == 0)) << bit);
137 r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
138 t += (uint64_t)r->d[1] + (((uint32_t)((bit >> 5) == 1)) << (bit & 0x1F));
139 r->d[1] = t & 0xFFFFFFFFULL; t >>= 32;
140 t += (uint64_t)r->d[2] + (((uint32_t)((bit >> 5) == 2)) << (bit & 0x1F));
141 r->d[2] = t & 0xFFFFFFFFULL; t >>= 32;
142 t += (uint64_t)r->d[3] + (((uint32_t)((bit >> 5) == 3)) << (bit & 0x1F));
143 r->d[3] = t & 0xFFFFFFFFULL; t >>= 32;
144 t += (uint64_t)r->d[4] + (((uint32_t)((bit >> 5) == 4)) << (bit & 0x1F));
145 r->d[4] = t & 0xFFFFFFFFULL; t >>= 32;
146 t += (uint64_t)r->d[5] + (((uint32_t)((bit >> 5) == 5)) << (bit & 0x1F));
147 r->d[5] = t & 0xFFFFFFFFULL; t >>= 32;
148 t += (uint64_t)r->d[6] + (((uint32_t)((bit >> 5) == 6)) << (bit & 0x1F));
149 r->d[6] = t & 0xFFFFFFFFULL; t >>= 32;
150 t += (uint64_t)r->d[7] + (((uint32_t)((bit >> 5) == 7)) << (bit & 0x1F));
151 r->d[7] = t & 0xFFFFFFFFULL;
152#ifdef VERIFY
153 VERIFY_CHECK((t >> 32) == 0);
154 VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0);
155#endif
156}
157
a4a43d75 158static void secp256k1_scalar_set_b32(secp256k1_scalar_t *r, const unsigned char *b32, int *overflow) {
1d52a8b1
PW
159 r->d[0] = (uint32_t)b32[31] | (uint32_t)b32[30] << 8 | (uint32_t)b32[29] << 16 | (uint32_t)b32[28] << 24;
160 r->d[1] = (uint32_t)b32[27] | (uint32_t)b32[26] << 8 | (uint32_t)b32[25] << 16 | (uint32_t)b32[24] << 24;
161 r->d[2] = (uint32_t)b32[23] | (uint32_t)b32[22] << 8 | (uint32_t)b32[21] << 16 | (uint32_t)b32[20] << 24;
162 r->d[3] = (uint32_t)b32[19] | (uint32_t)b32[18] << 8 | (uint32_t)b32[17] << 16 | (uint32_t)b32[16] << 24;
163 r->d[4] = (uint32_t)b32[15] | (uint32_t)b32[14] << 8 | (uint32_t)b32[13] << 16 | (uint32_t)b32[12] << 24;
164 r->d[5] = (uint32_t)b32[11] | (uint32_t)b32[10] << 8 | (uint32_t)b32[9] << 16 | (uint32_t)b32[8] << 24;
165 r->d[6] = (uint32_t)b32[7] | (uint32_t)b32[6] << 8 | (uint32_t)b32[5] << 16 | (uint32_t)b32[4] << 24;
166 r->d[7] = (uint32_t)b32[3] | (uint32_t)b32[2] << 8 | (uint32_t)b32[1] << 16 | (uint32_t)b32[0] << 24;
167 int over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r));
168 if (overflow) {
169 *overflow = over;
170 }
171}
172
a4a43d75 173static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar_t* a) {
1d52a8b1
PW
174 bin[0] = a->d[7] >> 24; bin[1] = a->d[7] >> 16; bin[2] = a->d[7] >> 8; bin[3] = a->d[7];
175 bin[4] = a->d[6] >> 24; bin[5] = a->d[6] >> 16; bin[6] = a->d[6] >> 8; bin[7] = a->d[6];
176 bin[8] = a->d[5] >> 24; bin[9] = a->d[5] >> 16; bin[10] = a->d[5] >> 8; bin[11] = a->d[5];
177 bin[12] = a->d[4] >> 24; bin[13] = a->d[4] >> 16; bin[14] = a->d[4] >> 8; bin[15] = a->d[4];
178 bin[16] = a->d[3] >> 24; bin[17] = a->d[3] >> 16; bin[18] = a->d[3] >> 8; bin[19] = a->d[3];
179 bin[20] = a->d[2] >> 24; bin[21] = a->d[2] >> 16; bin[22] = a->d[2] >> 8; bin[23] = a->d[2];
180 bin[24] = a->d[1] >> 24; bin[25] = a->d[1] >> 16; bin[26] = a->d[1] >> 8; bin[27] = a->d[1];
181 bin[28] = a->d[0] >> 24; bin[29] = a->d[0] >> 16; bin[30] = a->d[0] >> 8; bin[31] = a->d[0];
182}
183
a4a43d75 184SECP256K1_INLINE static int secp256k1_scalar_is_zero(const secp256k1_scalar_t *a) {
1d52a8b1
PW
185 return (a->d[0] | a->d[1] | a->d[2] | a->d[3] | a->d[4] | a->d[5] | a->d[6] | a->d[7]) == 0;
186}
187
a4a43d75 188static void secp256k1_scalar_negate(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) {
1d52a8b1
PW
189 uint32_t nonzero = 0xFFFFFFFFUL * (secp256k1_scalar_is_zero(a) == 0);
190 uint64_t t = (uint64_t)(~a->d[0]) + SECP256K1_N_0 + 1;
191 r->d[0] = t & nonzero; t >>= 32;
192 t += (uint64_t)(~a->d[1]) + SECP256K1_N_1;
193 r->d[1] = t & nonzero; t >>= 32;
194 t += (uint64_t)(~a->d[2]) + SECP256K1_N_2;
195 r->d[2] = t & nonzero; t >>= 32;
196 t += (uint64_t)(~a->d[3]) + SECP256K1_N_3;
197 r->d[3] = t & nonzero; t >>= 32;
198 t += (uint64_t)(~a->d[4]) + SECP256K1_N_4;
199 r->d[4] = t & nonzero; t >>= 32;
200 t += (uint64_t)(~a->d[5]) + SECP256K1_N_5;
201 r->d[5] = t & nonzero; t >>= 32;
202 t += (uint64_t)(~a->d[6]) + SECP256K1_N_6;
203 r->d[6] = t & nonzero; t >>= 32;
204 t += (uint64_t)(~a->d[7]) + SECP256K1_N_7;
205 r->d[7] = t & nonzero;
206}
207
a4a43d75 208SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar_t *a) {
1d52a8b1
PW
209 return ((a->d[0] ^ 1) | a->d[1] | a->d[2] | a->d[3] | a->d[4] | a->d[5] | a->d[6] | a->d[7]) == 0;
210}
211
a4a43d75 212static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
1d52a8b1
PW
213 int yes = 0;
214 int no = 0;
215 no |= (a->d[7] < SECP256K1_N_H_7);
216 yes |= (a->d[7] > SECP256K1_N_H_7) & ~no;
71712b27
GM
217 no |= (a->d[6] < SECP256K1_N_H_6) & ~yes; /* No need for a > check. */
218 no |= (a->d[5] < SECP256K1_N_H_5) & ~yes; /* No need for a > check. */
219 no |= (a->d[4] < SECP256K1_N_H_4) & ~yes; /* No need for a > check. */
1d52a8b1
PW
220 no |= (a->d[3] < SECP256K1_N_H_3) & ~yes;
221 yes |= (a->d[3] > SECP256K1_N_H_3) & ~no;
222 no |= (a->d[2] < SECP256K1_N_H_2) & ~yes;
223 yes |= (a->d[2] > SECP256K1_N_H_2) & ~no;
224 no |= (a->d[1] < SECP256K1_N_H_1) & ~yes;
225 yes |= (a->d[1] > SECP256K1_N_H_1) & ~no;
226 yes |= (a->d[0] > SECP256K1_N_H_0) & ~no;
227 return yes;
228}
229
71712b27 230/* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */
1d52a8b1
PW
231
232/** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
233#define muladd(a,b) { \
234 uint32_t tl, th; \
235 { \
236 uint64_t t = (uint64_t)a * b; \
237 th = t >> 32; /* at most 0xFFFFFFFE */ \
238 tl = t; \
239 } \
240 c0 += tl; /* overflow is handled on the next line */ \
241 th += (c0 < tl) ? 1 : 0; /* at most 0xFFFFFFFF */ \
242 c1 += th; /* overflow is handled on the next line */ \
243 c2 += (c1 < th) ? 1 : 0; /* never overflows by contract (verified in the next line) */ \
244 VERIFY_CHECK((c1 >= th) || (c2 != 0)); \
245}
246
247/** Add a*b to the number defined by (c0,c1). c1 must never overflow. */
248#define muladd_fast(a,b) { \
249 uint32_t tl, th; \
250 { \
251 uint64_t t = (uint64_t)a * b; \
252 th = t >> 32; /* at most 0xFFFFFFFE */ \
253 tl = t; \
254 } \
255 c0 += tl; /* overflow is handled on the next line */ \
256 th += (c0 < tl) ? 1 : 0; /* at most 0xFFFFFFFF */ \
257 c1 += th; /* never overflows by contract (verified in the next line) */ \
258 VERIFY_CHECK(c1 >= th); \
259}
260
261/** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
262#define muladd2(a,b) { \
263 uint32_t tl, th; \
264 { \
265 uint64_t t = (uint64_t)a * b; \
266 th = t >> 32; /* at most 0xFFFFFFFE */ \
267 tl = t; \
268 } \
269 uint32_t th2 = th + th; /* at most 0xFFFFFFFE (in case th was 0x7FFFFFFF) */ \
270 c2 += (th2 < th) ? 1 : 0; /* never overflows by contract (verified the next line) */ \
271 VERIFY_CHECK((th2 >= th) || (c2 != 0)); \
272 uint32_t tl2 = tl + tl; /* at most 0xFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFF) */ \
273 th2 += (tl2 < tl) ? 1 : 0; /* at most 0xFFFFFFFF */ \
274 c0 += tl2; /* overflow is handled on the next line */ \
275 th2 += (c0 < tl2) ? 1 : 0; /* second overflow is handled on the next line */ \
276 c2 += (c0 < tl2) & (th2 == 0); /* never overflows by contract (verified the next line) */ \
277 VERIFY_CHECK((c0 >= tl2) || (th2 != 0) || (c2 != 0)); \
278 c1 += th2; /* overflow is handled on the next line */ \
279 c2 += (c1 < th2) ? 1 : 0; /* never overflows by contract (verified the next line) */ \
280 VERIFY_CHECK((c1 >= th2) || (c2 != 0)); \
281}
282
283/** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
284#define sumadd(a) { \
285 c0 += (a); /* overflow is handled on the next line */ \
850562e3 286 unsigned int over = (c0 < (a)) ? 1 : 0; \
1d52a8b1
PW
287 c1 += over; /* overflow is handled on the next line */ \
288 c2 += (c1 < over) ? 1 : 0; /* never overflows by contract */ \
289}
290
291/** Add a to the number defined by (c0,c1). c1 must never overflow, c2 must be zero. */
292#define sumadd_fast(a) { \
293 c0 += (a); /* overflow is handled on the next line */ \
294 c1 += (c0 < (a)) ? 1 : 0; /* never overflows by contract (verified the next line) */ \
295 VERIFY_CHECK((c1 != 0) | (c0 >= (a))); \
296 VERIFY_CHECK(c2 == 0); \
297}
298
299/** Extract the lowest 32 bits of (c0,c1,c2) into n, and left shift the number 32 bits. */
300#define extract(n) { \
301 (n) = c0; \
302 c0 = c1; \
303 c1 = c2; \
304 c2 = 0; \
305}
306
307/** Extract the lowest 32 bits of (c0,c1,c2) into n, and left shift the number 32 bits. c2 is required to be zero. */
308#define extract_fast(n) { \
309 (n) = c0; \
310 c0 = c1; \
311 c1 = 0; \
312 VERIFY_CHECK(c2 == 0); \
313}
314
a4a43d75 315static void secp256k1_scalar_reduce_512(secp256k1_scalar_t *r, const uint32_t *l) {
1d52a8b1
PW
316 uint32_t n0 = l[8], n1 = l[9], n2 = l[10], n3 = l[11], n4 = l[12], n5 = l[13], n6 = l[14], n7 = l[15];
317
71712b27 318 /* 96 bit accumulator. */
1d52a8b1
PW
319 uint32_t c0, c1, c2;
320
71712b27
GM
321 /* Reduce 512 bits into 385. */
322 /* m[0..12] = l[0..7] + n[0..7] * SECP256K1_N_C. */
1d52a8b1
PW
323 c0 = l[0]; c1 = 0; c2 = 0;
324 muladd_fast(n0, SECP256K1_N_C_0);
325 uint32_t m0; extract_fast(m0);
326 sumadd_fast(l[1]);
327 muladd(n1, SECP256K1_N_C_0);
328 muladd(n0, SECP256K1_N_C_1);
329 uint32_t m1; extract(m1);
330 sumadd(l[2]);
331 muladd(n2, SECP256K1_N_C_0);
332 muladd(n1, SECP256K1_N_C_1);
333 muladd(n0, SECP256K1_N_C_2);
334 uint32_t m2; extract(m2);
335 sumadd(l[3]);
336 muladd(n3, SECP256K1_N_C_0);
337 muladd(n2, SECP256K1_N_C_1);
338 muladd(n1, SECP256K1_N_C_2);
339 muladd(n0, SECP256K1_N_C_3);
340 uint32_t m3; extract(m3);
341 sumadd(l[4]);
342 muladd(n4, SECP256K1_N_C_0);
343 muladd(n3, SECP256K1_N_C_1);
344 muladd(n2, SECP256K1_N_C_2);
345 muladd(n1, SECP256K1_N_C_3);
346 sumadd(n0);
347 uint32_t m4; extract(m4);
348 sumadd(l[5]);
349 muladd(n5, SECP256K1_N_C_0);
350 muladd(n4, SECP256K1_N_C_1);
351 muladd(n3, SECP256K1_N_C_2);
352 muladd(n2, SECP256K1_N_C_3);
353 sumadd(n1);
354 uint32_t m5; extract(m5);
355 sumadd(l[6]);
356 muladd(n6, SECP256K1_N_C_0);
357 muladd(n5, SECP256K1_N_C_1);
358 muladd(n4, SECP256K1_N_C_2);
359 muladd(n3, SECP256K1_N_C_3);
360 sumadd(n2);
361 uint32_t m6; extract(m6);
362 sumadd(l[7]);
363 muladd(n7, SECP256K1_N_C_0);
364 muladd(n6, SECP256K1_N_C_1);
365 muladd(n5, SECP256K1_N_C_2);
366 muladd(n4, SECP256K1_N_C_3);
367 sumadd(n3);
368 uint32_t m7; extract(m7);
369 muladd(n7, SECP256K1_N_C_1);
370 muladd(n6, SECP256K1_N_C_2);
371 muladd(n5, SECP256K1_N_C_3);
372 sumadd(n4);
373 uint32_t m8; extract(m8);
374 muladd(n7, SECP256K1_N_C_2);
375 muladd(n6, SECP256K1_N_C_3);
376 sumadd(n5);
377 uint32_t m9; extract(m9);
378 muladd(n7, SECP256K1_N_C_3);
379 sumadd(n6);
380 uint32_t m10; extract(m10);
381 sumadd_fast(n7);
382 uint32_t m11; extract_fast(m11);
383 VERIFY_CHECK(c0 <= 1);
384 uint32_t m12 = c0;
385
71712b27
GM
386 /* Reduce 385 bits into 258. */
387 /* p[0..8] = m[0..7] + m[8..12] * SECP256K1_N_C. */
1d52a8b1
PW
388 c0 = m0; c1 = 0; c2 = 0;
389 muladd_fast(m8, SECP256K1_N_C_0);
390 uint32_t p0; extract_fast(p0);
391 sumadd_fast(m1);
392 muladd(m9, SECP256K1_N_C_0);
393 muladd(m8, SECP256K1_N_C_1);
394 uint32_t p1; extract(p1);
395 sumadd(m2);
396 muladd(m10, SECP256K1_N_C_0);
397 muladd(m9, SECP256K1_N_C_1);
398 muladd(m8, SECP256K1_N_C_2);
399 uint32_t p2; extract(p2);
400 sumadd(m3);
401 muladd(m11, SECP256K1_N_C_0);
402 muladd(m10, SECP256K1_N_C_1);
403 muladd(m9, SECP256K1_N_C_2);
404 muladd(m8, SECP256K1_N_C_3);
405 uint32_t p3; extract(p3);
406 sumadd(m4);
407 muladd(m12, SECP256K1_N_C_0);
408 muladd(m11, SECP256K1_N_C_1);
409 muladd(m10, SECP256K1_N_C_2);
410 muladd(m9, SECP256K1_N_C_3);
411 sumadd(m8);
412 uint32_t p4; extract(p4);
413 sumadd(m5);
414 muladd(m12, SECP256K1_N_C_1);
415 muladd(m11, SECP256K1_N_C_2);
416 muladd(m10, SECP256K1_N_C_3);
417 sumadd(m9);
418 uint32_t p5; extract(p5);
419 sumadd(m6);
420 muladd(m12, SECP256K1_N_C_2);
421 muladd(m11, SECP256K1_N_C_3);
422 sumadd(m10);
423 uint32_t p6; extract(p6);
424 sumadd_fast(m7);
425 muladd_fast(m12, SECP256K1_N_C_3);
426 sumadd_fast(m11);
427 uint32_t p7; extract_fast(p7);
428 uint32_t p8 = c0 + m12;
429 VERIFY_CHECK(p8 <= 2);
430
71712b27
GM
431 /* Reduce 258 bits into 256. */
432 /* r[0..7] = p[0..7] + p[8] * SECP256K1_N_C. */
1d52a8b1
PW
433 uint64_t c = p0 + (uint64_t)SECP256K1_N_C_0 * p8;
434 r->d[0] = c & 0xFFFFFFFFUL; c >>= 32;
435 c += p1 + (uint64_t)SECP256K1_N_C_1 * p8;
436 r->d[1] = c & 0xFFFFFFFFUL; c >>= 32;
437 c += p2 + (uint64_t)SECP256K1_N_C_2 * p8;
438 r->d[2] = c & 0xFFFFFFFFUL; c >>= 32;
439 c += p3 + (uint64_t)SECP256K1_N_C_3 * p8;
440 r->d[3] = c & 0xFFFFFFFFUL; c >>= 32;
441 c += p4 + (uint64_t)p8;
442 r->d[4] = c & 0xFFFFFFFFUL; c >>= 32;
443 c += p5;
444 r->d[5] = c & 0xFFFFFFFFUL; c >>= 32;
445 c += p6;
446 r->d[6] = c & 0xFFFFFFFFUL; c >>= 32;
447 c += p7;
448 r->d[7] = c & 0xFFFFFFFFUL; c >>= 32;
449
71712b27 450 /* Final reduction of r. */
1d52a8b1
PW
451 secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r));
452}
453
a4a43d75 454static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
71712b27 455 /* 96 bit accumulator. */
1d52a8b1
PW
456 uint32_t c0 = 0, c1 = 0, c2 = 0;
457
458 uint32_t l[16];
459
71712b27 460 /* l[0..15] = a[0..7] * b[0..7]. */
1d52a8b1
PW
461 muladd_fast(a->d[0], b->d[0]);
462 extract_fast(l[0]);
463 muladd(a->d[0], b->d[1]);
464 muladd(a->d[1], b->d[0]);
465 extract(l[1]);
466 muladd(a->d[0], b->d[2]);
467 muladd(a->d[1], b->d[1]);
468 muladd(a->d[2], b->d[0]);
469 extract(l[2]);
470 muladd(a->d[0], b->d[3]);
471 muladd(a->d[1], b->d[2]);
472 muladd(a->d[2], b->d[1]);
473 muladd(a->d[3], b->d[0]);
474 extract(l[3]);
475 muladd(a->d[0], b->d[4]);
476 muladd(a->d[1], b->d[3]);
477 muladd(a->d[2], b->d[2]);
478 muladd(a->d[3], b->d[1]);
479 muladd(a->d[4], b->d[0]);
480 extract(l[4]);
481 muladd(a->d[0], b->d[5]);
482 muladd(a->d[1], b->d[4]);
483 muladd(a->d[2], b->d[3]);
484 muladd(a->d[3], b->d[2]);
485 muladd(a->d[4], b->d[1]);
486 muladd(a->d[5], b->d[0]);
487 extract(l[5]);
488 muladd(a->d[0], b->d[6]);
489 muladd(a->d[1], b->d[5]);
490 muladd(a->d[2], b->d[4]);
491 muladd(a->d[3], b->d[3]);
492 muladd(a->d[4], b->d[2]);
493 muladd(a->d[5], b->d[1]);
494 muladd(a->d[6], b->d[0]);
495 extract(l[6]);
496 muladd(a->d[0], b->d[7]);
497 muladd(a->d[1], b->d[6]);
498 muladd(a->d[2], b->d[5]);
499 muladd(a->d[3], b->d[4]);
500 muladd(a->d[4], b->d[3]);
501 muladd(a->d[5], b->d[2]);
502 muladd(a->d[6], b->d[1]);
503 muladd(a->d[7], b->d[0]);
504 extract(l[7]);
505 muladd(a->d[1], b->d[7]);
506 muladd(a->d[2], b->d[6]);
507 muladd(a->d[3], b->d[5]);
508 muladd(a->d[4], b->d[4]);
509 muladd(a->d[5], b->d[3]);
510 muladd(a->d[6], b->d[2]);
511 muladd(a->d[7], b->d[1]);
512 extract(l[8]);
513 muladd(a->d[2], b->d[7]);
514 muladd(a->d[3], b->d[6]);
515 muladd(a->d[4], b->d[5]);
516 muladd(a->d[5], b->d[4]);
517 muladd(a->d[6], b->d[3]);
518 muladd(a->d[7], b->d[2]);
519 extract(l[9]);
520 muladd(a->d[3], b->d[7]);
521 muladd(a->d[4], b->d[6]);
522 muladd(a->d[5], b->d[5]);
523 muladd(a->d[6], b->d[4]);
524 muladd(a->d[7], b->d[3]);
525 extract(l[10]);
526 muladd(a->d[4], b->d[7]);
527 muladd(a->d[5], b->d[6]);
528 muladd(a->d[6], b->d[5]);
529 muladd(a->d[7], b->d[4]);
530 extract(l[11]);
531 muladd(a->d[5], b->d[7]);
532 muladd(a->d[6], b->d[6]);
533 muladd(a->d[7], b->d[5]);
534 extract(l[12]);
535 muladd(a->d[6], b->d[7]);
536 muladd(a->d[7], b->d[6]);
537 extract(l[13]);
538 muladd_fast(a->d[7], b->d[7]);
539 extract_fast(l[14]);
540 VERIFY_CHECK(c1 == 0);
541 l[15] = c0;
542
543 secp256k1_scalar_reduce_512(r, l);
544}
545
a4a43d75 546static void secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) {
71712b27 547 /* 96 bit accumulator. */
1d52a8b1
PW
548 uint32_t c0 = 0, c1 = 0, c2 = 0;
549
550 uint32_t l[16];
551
71712b27 552 /* l[0..15] = a[0..7]^2. */
1d52a8b1
PW
553 muladd_fast(a->d[0], a->d[0]);
554 extract_fast(l[0]);
555 muladd2(a->d[0], a->d[1]);
556 extract(l[1]);
557 muladd2(a->d[0], a->d[2]);
558 muladd(a->d[1], a->d[1]);
559 extract(l[2]);
560 muladd2(a->d[0], a->d[3]);
561 muladd2(a->d[1], a->d[2]);
562 extract(l[3]);
563 muladd2(a->d[0], a->d[4]);
564 muladd2(a->d[1], a->d[3]);
565 muladd(a->d[2], a->d[2]);
566 extract(l[4]);
567 muladd2(a->d[0], a->d[5]);
568 muladd2(a->d[1], a->d[4]);
569 muladd2(a->d[2], a->d[3]);
570 extract(l[5]);
571 muladd2(a->d[0], a->d[6]);
572 muladd2(a->d[1], a->d[5]);
573 muladd2(a->d[2], a->d[4]);
574 muladd(a->d[3], a->d[3]);
575 extract(l[6]);
576 muladd2(a->d[0], a->d[7]);
577 muladd2(a->d[1], a->d[6]);
578 muladd2(a->d[2], a->d[5]);
579 muladd2(a->d[3], a->d[4]);
580 extract(l[7]);
581 muladd2(a->d[1], a->d[7]);
582 muladd2(a->d[2], a->d[6]);
583 muladd2(a->d[3], a->d[5]);
584 muladd(a->d[4], a->d[4]);
585 extract(l[8]);
586 muladd2(a->d[2], a->d[7]);
587 muladd2(a->d[3], a->d[6]);
588 muladd2(a->d[4], a->d[5]);
589 extract(l[9]);
590 muladd2(a->d[3], a->d[7]);
591 muladd2(a->d[4], a->d[6]);
592 muladd(a->d[5], a->d[5]);
593 extract(l[10]);
594 muladd2(a->d[4], a->d[7]);
595 muladd2(a->d[5], a->d[6]);
596 extract(l[11]);
597 muladd2(a->d[5], a->d[7]);
598 muladd(a->d[6], a->d[6]);
599 extract(l[12]);
600 muladd2(a->d[6], a->d[7]);
601 extract(l[13]);
602 muladd_fast(a->d[7], a->d[7]);
603 extract_fast(l[14]);
604 VERIFY_CHECK(c1 == 0);
605 l[15] = c0;
606
607 secp256k1_scalar_reduce_512(r, l);
608}
609
610#undef sumadd
611#undef sumadd_fast
612#undef muladd
613#undef muladd_fast
614#undef muladd2
615#undef extract
616#undef extract_fast
617
6794be60
PW
618static void secp256k1_scalar_split_128(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a) {
619 r1->d[0] = a->d[0];
620 r1->d[1] = a->d[1];
621 r1->d[2] = a->d[2];
622 r1->d[3] = a->d[3];
623 r1->d[4] = 0;
624 r1->d[5] = 0;
625 r1->d[6] = 0;
626 r1->d[7] = 0;
627 r2->d[0] = a->d[4];
628 r2->d[1] = a->d[5];
629 r2->d[2] = a->d[6];
630 r2->d[3] = a->d[7];
631 r2->d[4] = 0;
632 r2->d[5] = 0;
633 r2->d[6] = 0;
634 r2->d[7] = 0;
635}
636
f24041d6
PW
637SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
638 return ((a->d[0] ^ b->d[0]) | (a->d[1] ^ b->d[1]) | (a->d[2] ^ b->d[2]) | (a->d[3] ^ b->d[3]) | (a->d[4] ^ b->d[4]) | (a->d[5] ^ b->d[5]) | (a->d[6] ^ b->d[6]) | (a->d[7] ^ b->d[7])) == 0;
639}
640
1d52a8b1 641#endif
This page took 0.092581 seconds and 4 git commands to generate.