1 /**********************************************************************
2 * Copyright (c) 2013, 2014 Pieter Wuille *
3 * Distributed under the MIT software license, see the accompanying *
4 * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
5 **********************************************************************/
7 #ifndef _SECP256K1_FIELD_REPR_IMPL_H_
8 #define _SECP256K1_FIELD_REPR_IMPL_H_
17 static void secp256k1_fe_verify(const secp256k1_fe *a) {
18 const uint32_t *d = a->n;
19 int m = a->normalized ? 1 : 2 * a->magnitude, r = 1;
20 r &= (d[0] <= 0x3FFFFFFUL * m);
21 r &= (d[1] <= 0x3FFFFFFUL * m);
22 r &= (d[2] <= 0x3FFFFFFUL * m);
23 r &= (d[3] <= 0x3FFFFFFUL * m);
24 r &= (d[4] <= 0x3FFFFFFUL * m);
25 r &= (d[5] <= 0x3FFFFFFUL * m);
26 r &= (d[6] <= 0x3FFFFFFUL * m);
27 r &= (d[7] <= 0x3FFFFFFUL * m);
28 r &= (d[8] <= 0x3FFFFFFUL * m);
29 r &= (d[9] <= 0x03FFFFFUL * m);
30 r &= (a->magnitude >= 0);
31 r &= (a->magnitude <= 32);
33 r &= (a->magnitude <= 1);
34 if (r && (d[9] == 0x03FFFFFUL)) {
35 uint32_t mid = d[8] & d[7] & d[6] & d[5] & d[4] & d[3] & d[2];
36 if (mid == 0x3FFFFFFUL) {
37 r &= ((d[1] + 0x40UL + ((d[0] + 0x3D1UL) >> 26)) <= 0x3FFFFFFUL);
44 static void secp256k1_fe_verify(const secp256k1_fe *a) {
49 static void secp256k1_fe_normalize(secp256k1_fe *r) {
50 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
51 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
53 /* Reduce t9 at the start so there will be at most a single carry from the first pass */
55 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
57 /* The first pass ensures the magnitude is 1, ... */
58 t0 += x * 0x3D1UL; t1 += (x << 6);
59 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
60 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
61 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
62 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
63 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
64 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
65 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
66 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
67 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
69 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
70 VERIFY_CHECK(t9 >> 23 == 0);
72 /* At most a single final reduction is needed; check if the value is >= the field characteristic */
73 x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
74 & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
76 /* Apply the final reduction (for constant-time behaviour, we do it always) */
77 t0 += x * 0x3D1UL; t1 += (x << 6);
78 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
79 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
80 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
81 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
82 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
83 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
84 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
85 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
86 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
88 /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
89 VERIFY_CHECK(t9 >> 22 == x);
91 /* Mask off the possible multiple of 2^256 from the final reduction */
94 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
95 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
100 secp256k1_fe_verify(r);
104 static void secp256k1_fe_normalize_weak(secp256k1_fe *r) {
105 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
106 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
108 /* Reduce t9 at the start so there will be at most a single carry from the first pass */
109 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
111 /* The first pass ensures the magnitude is 1, ... */
112 t0 += x * 0x3D1UL; t1 += (x << 6);
113 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
114 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
115 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
116 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
117 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
118 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
119 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
120 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
121 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
123 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
124 VERIFY_CHECK(t9 >> 23 == 0);
126 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
127 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
131 secp256k1_fe_verify(r);
135 static void secp256k1_fe_normalize_var(secp256k1_fe *r) {
136 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
137 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
139 /* Reduce t9 at the start so there will be at most a single carry from the first pass */
141 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
143 /* The first pass ensures the magnitude is 1, ... */
144 t0 += x * 0x3D1UL; t1 += (x << 6);
145 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
146 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
147 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
148 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
149 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
150 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
151 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
152 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
153 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
155 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
156 VERIFY_CHECK(t9 >> 23 == 0);
158 /* At most a single final reduction is needed; check if the value is >= the field characteristic */
159 x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
160 & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
163 t0 += 0x3D1UL; t1 += (x << 6);
164 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
165 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
166 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
167 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
168 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
169 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
170 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
171 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
172 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
174 /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
175 VERIFY_CHECK(t9 >> 22 == x);
177 /* Mask off the possible multiple of 2^256 from the final reduction */
181 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
182 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
187 secp256k1_fe_verify(r);
191 static int secp256k1_fe_normalizes_to_zero(secp256k1_fe *r) {
192 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
193 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
195 /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
198 /* Reduce t9 at the start so there will be at most a single carry from the first pass */
199 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
201 /* The first pass ensures the magnitude is 1, ... */
202 t0 += x * 0x3D1UL; t1 += (x << 6);
203 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0 = t0; z1 = t0 ^ 0x3D0UL;
204 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
205 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
206 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
207 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
208 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
209 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
210 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
211 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
212 z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
214 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
215 VERIFY_CHECK(t9 >> 23 == 0);
217 return (z0 == 0) | (z1 == 0x3FFFFFFUL);
220 static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe *r) {
221 uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
228 /* Reduce t9 at the start so there will be at most a single carry from the first pass */
231 /* The first pass ensures the magnitude is 1, ... */
234 /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
235 z0 = t0 & 0x3FFFFFFUL;
238 /* Fast return path should catch the majority of cases */
239 if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL)) {
256 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
257 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
258 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
259 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
260 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
261 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
262 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
263 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
264 z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
266 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
267 VERIFY_CHECK(t9 >> 23 == 0);
269 return (z0 == 0) | (z1 == 0x3FFFFFFUL);
272 SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe *r, int a) {
274 r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
278 secp256k1_fe_verify(r);
282 SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe *a) {
283 const uint32_t *t = a->n;
285 VERIFY_CHECK(a->normalized);
286 secp256k1_fe_verify(a);
288 return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0;
291 SECP256K1_INLINE static int secp256k1_fe_is_odd(const secp256k1_fe *a) {
293 VERIFY_CHECK(a->normalized);
294 secp256k1_fe_verify(a);
299 SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe *a) {
305 for (i=0; i<10; i++) {
310 static int secp256k1_fe_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b) {
313 VERIFY_CHECK(a->normalized);
314 VERIFY_CHECK(b->normalized);
315 secp256k1_fe_verify(a);
316 secp256k1_fe_verify(b);
318 for (i = 9; i >= 0; i--) {
319 if (a->n[i] > b->n[i]) {
322 if (a->n[i] < b->n[i]) {
329 static int secp256k1_fe_set_b32(secp256k1_fe *r, const unsigned char *a) {
331 r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
332 r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
333 for (i=0; i<32; i++) {
335 for (j=0; j<4; j++) {
336 int limb = (8*i+2*j)/26;
337 int shift = (8*i+2*j)%26;
338 r->n[limb] |= (uint32_t)((a[31-i] >> (2*j)) & 0x3) << shift;
341 if (r->n[9] == 0x3FFFFFUL && (r->n[8] & r->n[7] & r->n[6] & r->n[5] & r->n[4] & r->n[3] & r->n[2]) == 0x3FFFFFFUL && (r->n[1] + 0x40UL + ((r->n[0] + 0x3D1UL) >> 26)) > 0x3FFFFFFUL) {
347 secp256k1_fe_verify(r);
352 /** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
353 static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe *a) {
356 VERIFY_CHECK(a->normalized);
357 secp256k1_fe_verify(a);
359 for (i=0; i<32; i++) {
362 for (j=0; j<4; j++) {
363 int limb = (8*i+2*j)/26;
364 int shift = (8*i+2*j)%26;
365 c |= ((a->n[limb] >> shift) & 0x3) << (2 * j);
371 SECP256K1_INLINE static void secp256k1_fe_negate(secp256k1_fe *r, const secp256k1_fe *a, int m) {
373 VERIFY_CHECK(a->magnitude <= m);
374 secp256k1_fe_verify(a);
376 r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0];
377 r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1];
378 r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2];
379 r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3];
380 r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4];
381 r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5];
382 r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6];
383 r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7];
384 r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8];
385 r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9];
387 r->magnitude = m + 1;
389 secp256k1_fe_verify(r);
393 SECP256K1_INLINE static void secp256k1_fe_mul_int(secp256k1_fe *r, int a) {
407 secp256k1_fe_verify(r);
411 SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe *r, const secp256k1_fe *a) {
413 secp256k1_fe_verify(a);
426 r->magnitude += a->magnitude;
428 secp256k1_fe_verify(r);
432 #if defined(USE_EXTERNAL_ASM)
434 /* External assembler implementation */
435 void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b);
436 void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a);
441 #define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
443 #define VERIFY_BITS(x, n) do { } while(0)
446 SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) {
448 uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
449 uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7;
450 const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
452 VERIFY_BITS(a[0], 30);
453 VERIFY_BITS(a[1], 30);
454 VERIFY_BITS(a[2], 30);
455 VERIFY_BITS(a[3], 30);
456 VERIFY_BITS(a[4], 30);
457 VERIFY_BITS(a[5], 30);
458 VERIFY_BITS(a[6], 30);
459 VERIFY_BITS(a[7], 30);
460 VERIFY_BITS(a[8], 30);
461 VERIFY_BITS(a[9], 26);
462 VERIFY_BITS(b[0], 30);
463 VERIFY_BITS(b[1], 30);
464 VERIFY_BITS(b[2], 30);
465 VERIFY_BITS(b[3], 30);
466 VERIFY_BITS(b[4], 30);
467 VERIFY_BITS(b[5], 30);
468 VERIFY_BITS(b[6], 30);
469 VERIFY_BITS(b[7], 30);
470 VERIFY_BITS(b[8], 30);
471 VERIFY_BITS(b[9], 26);
473 /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
474 * px is a shorthand for sum(a[i]*b[x-i], i=0..x).
475 * Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
478 d = (uint64_t)a[0] * b[9]
479 + (uint64_t)a[1] * b[8]
480 + (uint64_t)a[2] * b[7]
481 + (uint64_t)a[3] * b[6]
482 + (uint64_t)a[4] * b[5]
483 + (uint64_t)a[5] * b[4]
484 + (uint64_t)a[6] * b[3]
485 + (uint64_t)a[7] * b[2]
486 + (uint64_t)a[8] * b[1]
487 + (uint64_t)a[9] * b[0];
488 /* VERIFY_BITS(d, 64); */
489 /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
490 t9 = d & M; d >>= 26;
493 /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
495 c = (uint64_t)a[0] * b[0];
497 /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
498 d += (uint64_t)a[1] * b[9]
499 + (uint64_t)a[2] * b[8]
500 + (uint64_t)a[3] * b[7]
501 + (uint64_t)a[4] * b[6]
502 + (uint64_t)a[5] * b[5]
503 + (uint64_t)a[6] * b[4]
504 + (uint64_t)a[7] * b[3]
505 + (uint64_t)a[8] * b[2]
506 + (uint64_t)a[9] * b[1];
508 /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
509 u0 = d & M; d >>= 26; c += u0 * R0;
513 /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
514 t0 = c & M; c >>= 26; c += u0 * R1;
517 /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
518 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
520 c += (uint64_t)a[0] * b[1]
521 + (uint64_t)a[1] * b[0];
523 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
524 d += (uint64_t)a[2] * b[9]
525 + (uint64_t)a[3] * b[8]
526 + (uint64_t)a[4] * b[7]
527 + (uint64_t)a[5] * b[6]
528 + (uint64_t)a[6] * b[5]
529 + (uint64_t)a[7] * b[4]
530 + (uint64_t)a[8] * b[3]
531 + (uint64_t)a[9] * b[2];
533 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
534 u1 = d & M; d >>= 26; c += u1 * R0;
538 /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
539 t1 = c & M; c >>= 26; c += u1 * R1;
542 /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
543 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
545 c += (uint64_t)a[0] * b[2]
546 + (uint64_t)a[1] * b[1]
547 + (uint64_t)a[2] * b[0];
549 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
550 d += (uint64_t)a[3] * b[9]
551 + (uint64_t)a[4] * b[8]
552 + (uint64_t)a[5] * b[7]
553 + (uint64_t)a[6] * b[6]
554 + (uint64_t)a[7] * b[5]
555 + (uint64_t)a[8] * b[4]
556 + (uint64_t)a[9] * b[3];
558 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
559 u2 = d & M; d >>= 26; c += u2 * R0;
563 /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
564 t2 = c & M; c >>= 26; c += u2 * R1;
567 /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
568 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
570 c += (uint64_t)a[0] * b[3]
571 + (uint64_t)a[1] * b[2]
572 + (uint64_t)a[2] * b[1]
573 + (uint64_t)a[3] * b[0];
575 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
576 d += (uint64_t)a[4] * b[9]
577 + (uint64_t)a[5] * b[8]
578 + (uint64_t)a[6] * b[7]
579 + (uint64_t)a[7] * b[6]
580 + (uint64_t)a[8] * b[5]
581 + (uint64_t)a[9] * b[4];
583 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
584 u3 = d & M; d >>= 26; c += u3 * R0;
587 /* VERIFY_BITS(c, 64); */
588 /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
589 t3 = c & M; c >>= 26; c += u3 * R1;
592 /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
593 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
595 c += (uint64_t)a[0] * b[4]
596 + (uint64_t)a[1] * b[3]
597 + (uint64_t)a[2] * b[2]
598 + (uint64_t)a[3] * b[1]
599 + (uint64_t)a[4] * b[0];
601 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
602 d += (uint64_t)a[5] * b[9]
603 + (uint64_t)a[6] * b[8]
604 + (uint64_t)a[7] * b[7]
605 + (uint64_t)a[8] * b[6]
606 + (uint64_t)a[9] * b[5];
608 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
609 u4 = d & M; d >>= 26; c += u4 * R0;
612 /* VERIFY_BITS(c, 64); */
613 /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
614 t4 = c & M; c >>= 26; c += u4 * R1;
617 /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
618 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
620 c += (uint64_t)a[0] * b[5]
621 + (uint64_t)a[1] * b[4]
622 + (uint64_t)a[2] * b[3]
623 + (uint64_t)a[3] * b[2]
624 + (uint64_t)a[4] * b[1]
625 + (uint64_t)a[5] * b[0];
627 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
628 d += (uint64_t)a[6] * b[9]
629 + (uint64_t)a[7] * b[8]
630 + (uint64_t)a[8] * b[7]
631 + (uint64_t)a[9] * b[6];
633 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
634 u5 = d & M; d >>= 26; c += u5 * R0;
637 /* VERIFY_BITS(c, 64); */
638 /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
639 t5 = c & M; c >>= 26; c += u5 * R1;
642 /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
643 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
645 c += (uint64_t)a[0] * b[6]
646 + (uint64_t)a[1] * b[5]
647 + (uint64_t)a[2] * b[4]
648 + (uint64_t)a[3] * b[3]
649 + (uint64_t)a[4] * b[2]
650 + (uint64_t)a[5] * b[1]
651 + (uint64_t)a[6] * b[0];
653 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
654 d += (uint64_t)a[7] * b[9]
655 + (uint64_t)a[8] * b[8]
656 + (uint64_t)a[9] * b[7];
658 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
659 u6 = d & M; d >>= 26; c += u6 * R0;
662 /* VERIFY_BITS(c, 64); */
663 /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
664 t6 = c & M; c >>= 26; c += u6 * R1;
667 /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
668 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
670 c += (uint64_t)a[0] * b[7]
671 + (uint64_t)a[1] * b[6]
672 + (uint64_t)a[2] * b[5]
673 + (uint64_t)a[3] * b[4]
674 + (uint64_t)a[4] * b[3]
675 + (uint64_t)a[5] * b[2]
676 + (uint64_t)a[6] * b[1]
677 + (uint64_t)a[7] * b[0];
678 /* VERIFY_BITS(c, 64); */
679 VERIFY_CHECK(c <= 0x8000007C00000007ULL);
680 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
681 d += (uint64_t)a[8] * b[9]
682 + (uint64_t)a[9] * b[8];
684 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
685 u7 = d & M; d >>= 26; c += u7 * R0;
688 /* VERIFY_BITS(c, 64); */
689 VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
690 /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
691 t7 = c & M; c >>= 26; c += u7 * R1;
694 /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
695 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
697 c += (uint64_t)a[0] * b[8]
698 + (uint64_t)a[1] * b[7]
699 + (uint64_t)a[2] * b[6]
700 + (uint64_t)a[3] * b[5]
701 + (uint64_t)a[4] * b[4]
702 + (uint64_t)a[5] * b[3]
703 + (uint64_t)a[6] * b[2]
704 + (uint64_t)a[7] * b[1]
705 + (uint64_t)a[8] * b[0];
706 /* VERIFY_BITS(c, 64); */
707 VERIFY_CHECK(c <= 0x9000007B80000008ULL);
708 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
709 d += (uint64_t)a[9] * b[9];
711 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
712 u8 = d & M; d >>= 26; c += u8 * R0;
715 /* VERIFY_BITS(c, 64); */
716 VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
717 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
720 VERIFY_BITS(r[3], 26);
721 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
723 VERIFY_BITS(r[4], 26);
724 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
726 VERIFY_BITS(r[5], 26);
727 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
729 VERIFY_BITS(r[6], 26);
730 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
732 VERIFY_BITS(r[7], 26);
733 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
735 r[8] = c & M; c >>= 26; c += u8 * R1;
736 VERIFY_BITS(r[8], 26);
738 /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
739 /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
742 /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
743 r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
744 VERIFY_BITS(r[9], 22);
746 /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
747 /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
748 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
750 d = c * (R0 >> 4) + t0;
752 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
753 r[0] = d & M; d >>= 26;
754 VERIFY_BITS(r[0], 26);
756 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
757 d += c * (R1 >> 4) + t1;
759 VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
760 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
761 /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
762 r[1] = d & M; d >>= 26;
763 VERIFY_BITS(r[1], 26);
765 VERIFY_CHECK(d <= 0x4000000ULL);
766 /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
769 /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
771 VERIFY_BITS(r[2], 27);
772 /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
775 SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a) {
777 uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
778 uint32_t t9, t0, t1, t2, t3, t4, t5, t6, t7;
779 const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
781 VERIFY_BITS(a[0], 30);
782 VERIFY_BITS(a[1], 30);
783 VERIFY_BITS(a[2], 30);
784 VERIFY_BITS(a[3], 30);
785 VERIFY_BITS(a[4], 30);
786 VERIFY_BITS(a[5], 30);
787 VERIFY_BITS(a[6], 30);
788 VERIFY_BITS(a[7], 30);
789 VERIFY_BITS(a[8], 30);
790 VERIFY_BITS(a[9], 26);
792 /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
793 * px is a shorthand for sum(a[i]*a[x-i], i=0..x).
794 * Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
797 d = (uint64_t)(a[0]*2) * a[9]
798 + (uint64_t)(a[1]*2) * a[8]
799 + (uint64_t)(a[2]*2) * a[7]
800 + (uint64_t)(a[3]*2) * a[6]
801 + (uint64_t)(a[4]*2) * a[5];
802 /* VERIFY_BITS(d, 64); */
803 /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
804 t9 = d & M; d >>= 26;
807 /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
809 c = (uint64_t)a[0] * a[0];
811 /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
812 d += (uint64_t)(a[1]*2) * a[9]
813 + (uint64_t)(a[2]*2) * a[8]
814 + (uint64_t)(a[3]*2) * a[7]
815 + (uint64_t)(a[4]*2) * a[6]
816 + (uint64_t)a[5] * a[5];
818 /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
819 u0 = d & M; d >>= 26; c += u0 * R0;
823 /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
824 t0 = c & M; c >>= 26; c += u0 * R1;
827 /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
828 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
830 c += (uint64_t)(a[0]*2) * a[1];
832 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
833 d += (uint64_t)(a[2]*2) * a[9]
834 + (uint64_t)(a[3]*2) * a[8]
835 + (uint64_t)(a[4]*2) * a[7]
836 + (uint64_t)(a[5]*2) * a[6];
838 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
839 u1 = d & M; d >>= 26; c += u1 * R0;
843 /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
844 t1 = c & M; c >>= 26; c += u1 * R1;
847 /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
848 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
850 c += (uint64_t)(a[0]*2) * a[2]
851 + (uint64_t)a[1] * a[1];
853 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
854 d += (uint64_t)(a[3]*2) * a[9]
855 + (uint64_t)(a[4]*2) * a[8]
856 + (uint64_t)(a[5]*2) * a[7]
857 + (uint64_t)a[6] * a[6];
859 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
860 u2 = d & M; d >>= 26; c += u2 * R0;
864 /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
865 t2 = c & M; c >>= 26; c += u2 * R1;
868 /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
869 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
871 c += (uint64_t)(a[0]*2) * a[3]
872 + (uint64_t)(a[1]*2) * a[2];
874 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
875 d += (uint64_t)(a[4]*2) * a[9]
876 + (uint64_t)(a[5]*2) * a[8]
877 + (uint64_t)(a[6]*2) * a[7];
879 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
880 u3 = d & M; d >>= 26; c += u3 * R0;
883 /* VERIFY_BITS(c, 64); */
884 /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
885 t3 = c & M; c >>= 26; c += u3 * R1;
888 /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
889 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
891 c += (uint64_t)(a[0]*2) * a[4]
892 + (uint64_t)(a[1]*2) * a[3]
893 + (uint64_t)a[2] * a[2];
895 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
896 d += (uint64_t)(a[5]*2) * a[9]
897 + (uint64_t)(a[6]*2) * a[8]
898 + (uint64_t)a[7] * a[7];
900 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
901 u4 = d & M; d >>= 26; c += u4 * R0;
904 /* VERIFY_BITS(c, 64); */
905 /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
906 t4 = c & M; c >>= 26; c += u4 * R1;
909 /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
910 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
912 c += (uint64_t)(a[0]*2) * a[5]
913 + (uint64_t)(a[1]*2) * a[4]
914 + (uint64_t)(a[2]*2) * a[3];
916 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
917 d += (uint64_t)(a[6]*2) * a[9]
918 + (uint64_t)(a[7]*2) * a[8];
920 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
921 u5 = d & M; d >>= 26; c += u5 * R0;
924 /* VERIFY_BITS(c, 64); */
925 /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
926 t5 = c & M; c >>= 26; c += u5 * R1;
929 /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
930 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
932 c += (uint64_t)(a[0]*2) * a[6]
933 + (uint64_t)(a[1]*2) * a[5]
934 + (uint64_t)(a[2]*2) * a[4]
935 + (uint64_t)a[3] * a[3];
937 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
938 d += (uint64_t)(a[7]*2) * a[9]
939 + (uint64_t)a[8] * a[8];
941 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
942 u6 = d & M; d >>= 26; c += u6 * R0;
945 /* VERIFY_BITS(c, 64); */
946 /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
947 t6 = c & M; c >>= 26; c += u6 * R1;
950 /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
951 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
953 c += (uint64_t)(a[0]*2) * a[7]
954 + (uint64_t)(a[1]*2) * a[6]
955 + (uint64_t)(a[2]*2) * a[5]
956 + (uint64_t)(a[3]*2) * a[4];
957 /* VERIFY_BITS(c, 64); */
958 VERIFY_CHECK(c <= 0x8000007C00000007ULL);
959 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
960 d += (uint64_t)(a[8]*2) * a[9];
962 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
963 u7 = d & M; d >>= 26; c += u7 * R0;
966 /* VERIFY_BITS(c, 64); */
967 VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
968 /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
969 t7 = c & M; c >>= 26; c += u7 * R1;
972 /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
973 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
975 c += (uint64_t)(a[0]*2) * a[8]
976 + (uint64_t)(a[1]*2) * a[7]
977 + (uint64_t)(a[2]*2) * a[6]
978 + (uint64_t)(a[3]*2) * a[5]
979 + (uint64_t)a[4] * a[4];
980 /* VERIFY_BITS(c, 64); */
981 VERIFY_CHECK(c <= 0x9000007B80000008ULL);
982 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
983 d += (uint64_t)a[9] * a[9];
985 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
986 u8 = d & M; d >>= 26; c += u8 * R0;
989 /* VERIFY_BITS(c, 64); */
990 VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
991 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
994 VERIFY_BITS(r[3], 26);
995 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
997 VERIFY_BITS(r[4], 26);
998 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1000 VERIFY_BITS(r[5], 26);
1001 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1003 VERIFY_BITS(r[6], 26);
1004 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1006 VERIFY_BITS(r[7], 26);
1007 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1009 r[8] = c & M; c >>= 26; c += u8 * R1;
1010 VERIFY_BITS(r[8], 26);
1012 /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1013 /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1016 /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1017 r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
1018 VERIFY_BITS(r[9], 22);
1020 /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1021 /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1022 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1024 d = c * (R0 >> 4) + t0;
1026 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1027 r[0] = d & M; d >>= 26;
1028 VERIFY_BITS(r[0], 26);
1030 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1031 d += c * (R1 >> 4) + t1;
1033 VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
1034 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1035 /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1036 r[1] = d & M; d >>= 26;
1037 VERIFY_BITS(r[1], 26);
1039 VERIFY_CHECK(d <= 0x4000000ULL);
1040 /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1043 /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1045 VERIFY_BITS(r[2], 27);
1046 /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1050 static void secp256k1_fe_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp256k1_fe * SECP256K1_RESTRICT b) {
1052 VERIFY_CHECK(a->magnitude <= 8);
1053 VERIFY_CHECK(b->magnitude <= 8);
1054 secp256k1_fe_verify(a);
1055 secp256k1_fe_verify(b);
1056 VERIFY_CHECK(r != b);
1058 secp256k1_fe_mul_inner(r->n, a->n, b->n);
1062 secp256k1_fe_verify(r);
1066 static void secp256k1_fe_sqr(secp256k1_fe *r, const secp256k1_fe *a) {
1068 VERIFY_CHECK(a->magnitude <= 8);
1069 secp256k1_fe_verify(a);
1071 secp256k1_fe_sqr_inner(r->n, a->n);
1075 secp256k1_fe_verify(r);
1079 static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag) {
1080 uint32_t mask0, mask1;
1081 mask0 = flag + ~((uint32_t)0);
1083 r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
1084 r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
1085 r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
1086 r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
1087 r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
1088 r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
1089 r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
1090 r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
1091 r->n[8] = (r->n[8] & mask0) | (a->n[8] & mask1);
1092 r->n[9] = (r->n[9] & mask0) | (a->n[9] & mask1);
1094 if (a->magnitude > r->magnitude) {
1095 r->magnitude = a->magnitude;
1097 r->normalized &= a->normalized;
1101 static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag) {
1102 uint32_t mask0, mask1;
1103 mask0 = flag + ~((uint32_t)0);
1105 r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
1106 r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
1107 r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
1108 r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
1109 r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
1110 r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
1111 r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
1112 r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
1115 static void secp256k1_fe_to_storage(secp256k1_fe_storage *r, const secp256k1_fe *a) {
1117 VERIFY_CHECK(a->normalized);
1119 r->n[0] = a->n[0] | a->n[1] << 26;
1120 r->n[1] = a->n[1] >> 6 | a->n[2] << 20;
1121 r->n[2] = a->n[2] >> 12 | a->n[3] << 14;
1122 r->n[3] = a->n[3] >> 18 | a->n[4] << 8;
1123 r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28;
1124 r->n[5] = a->n[6] >> 4 | a->n[7] << 22;
1125 r->n[6] = a->n[7] >> 10 | a->n[8] << 16;
1126 r->n[7] = a->n[8] >> 16 | a->n[9] << 10;
1129 static SECP256K1_INLINE void secp256k1_fe_from_storage(secp256k1_fe *r, const secp256k1_fe_storage *a) {
1130 r->n[0] = a->n[0] & 0x3FFFFFFUL;
1131 r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL);
1132 r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL);
1133 r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL);
1134 r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL);
1135 r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL;
1136 r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL);
1137 r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL);
1138 r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL);
1139 r->n[9] = a->n[7] >> 10;