]> Git Repo - secp256k1.git/blame - src/field_10x26_impl.h
Implement endomorphism optimization for secp256k1_ecmult_const
[secp256k1.git] / src / field_10x26_impl.h
CommitLineData
71712b27
GM
1/**********************************************************************
2 * Copyright (c) 2013, 2014 Pieter Wuille *
3 * Distributed under the MIT software license, see the accompanying *
4 * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
5 **********************************************************************/
0a433ea2 6
7a4b7691
PW
7#ifndef _SECP256K1_FIELD_REPR_IMPL_H_
8#define _SECP256K1_FIELD_REPR_IMPL_H_
9
3231676b 10#include <stdio.h>
3231676b 11#include <string.h>
1c7fa133 12#include "util.h"
11ab5622
PW
13#include "num.h"
14#include "field.h"
3231676b 15
7d681ac6 16#ifdef VERIFY
a4a43d75 17static void secp256k1_fe_verify(const secp256k1_fe_t *a) {
7d681ac6
PD
18 const uint32_t *d = a->n;
19 int m = a->normalized ? 1 : 2 * a->magnitude, r = 1;
20 r &= (d[0] <= 0x3FFFFFFUL * m);
21 r &= (d[1] <= 0x3FFFFFFUL * m);
22 r &= (d[2] <= 0x3FFFFFFUL * m);
23 r &= (d[3] <= 0x3FFFFFFUL * m);
24 r &= (d[4] <= 0x3FFFFFFUL * m);
25 r &= (d[5] <= 0x3FFFFFFUL * m);
26 r &= (d[6] <= 0x3FFFFFFUL * m);
27 r &= (d[7] <= 0x3FFFFFFUL * m);
28 r &= (d[8] <= 0x3FFFFFFUL * m);
29 r &= (d[9] <= 0x03FFFFFUL * m);
30 r &= (a->magnitude >= 0);
7688e341 31 r &= (a->magnitude <= 32);
7d681ac6
PD
32 if (a->normalized) {
33 r &= (a->magnitude <= 1);
34 if (r && (d[9] == 0x03FFFFFUL)) {
35 uint32_t mid = d[8] & d[7] & d[6] & d[5] & d[4] & d[3] & d[2];
36 if (mid == 0x3FFFFFFUL) {
37 r &= ((d[1] + 0x40UL + ((d[0] + 0x3D1UL) >> 26)) <= 0x3FFFFFFUL);
38 }
39 }
40 }
41 VERIFY_CHECK(r == 1);
42}
43#else
2cad067a
GM
44static void secp256k1_fe_verify(const secp256k1_fe_t *a) {
45 (void)a;
46}
7d681ac6
PD
47#endif
48
a4a43d75 49static void secp256k1_fe_normalize(secp256k1_fe_t *r) {
42822baa
PD
50 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
51 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
52
71712b27 53 /* Reduce t9 at the start so there will be at most a single carry from the first pass */
42822baa 54 uint32_t m;
25b35c7e 55 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
42822baa 56
71712b27 57 /* The first pass ensures the magnitude is 1, ... */
42822baa
PD
58 t0 += x * 0x3D1UL; t1 += (x << 6);
59 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
60 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
61 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
62 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
63 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
64 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
65 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
66 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
67 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
68
71712b27 69 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
1c7fa133 70 VERIFY_CHECK(t9 >> 23 == 0);
42822baa 71
71712b27 72 /* At most a single final reduction is needed; check if the value is >= the field characteristic */
7d681ac6
PD
73 x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
74 & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
42822baa 75
71712b27 76 /* Apply the final reduction (for constant-time behaviour, we do it always) */
42822baa
PD
77 t0 += x * 0x3D1UL; t1 += (x << 6);
78 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
79 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
80 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
81 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
82 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
83 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
84 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
85 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
86 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
87
71712b27 88 /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
1c7fa133 89 VERIFY_CHECK(t9 >> 22 == x);
42822baa 90
71712b27 91 /* Mask off the possible multiple of 2^256 from the final reduction */
42822baa
PD
92 t9 &= 0x03FFFFFUL;
93
94 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
3231676b 95 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
39bd94d8
PW
96
97#ifdef VERIFY
98 r->magnitude = 1;
99 r->normalized = 1;
100 secp256k1_fe_verify(r);
101#endif
102}
103
0295f0a3
PW
104static void secp256k1_fe_normalize_weak(secp256k1_fe_t *r) {
105 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
106 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
107
108 /* Reduce t9 at the start so there will be at most a single carry from the first pass */
109 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
110
111 /* The first pass ensures the magnitude is 1, ... */
112 t0 += x * 0x3D1UL; t1 += (x << 6);
113 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
114 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
115 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
116 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
117 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
118 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
119 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
120 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
121 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
122
eed599dd
PD
123 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
124 VERIFY_CHECK(t9 >> 23 == 0);
125
0295f0a3
PW
126 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
127 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
128
129#ifdef VERIFY
130 r->magnitude = 1;
131 secp256k1_fe_verify(r);
132#endif
133}
134
39bd94d8
PW
135static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) {
136 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
137 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
138
139 /* Reduce t9 at the start so there will be at most a single carry from the first pass */
39bd94d8 140 uint32_t m;
25b35c7e 141 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
39bd94d8
PW
142
143 /* The first pass ensures the magnitude is 1, ... */
144 t0 += x * 0x3D1UL; t1 += (x << 6);
145 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
146 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
147 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
148 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
149 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
150 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
151 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
152 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
153 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
154
155 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
156 VERIFY_CHECK(t9 >> 23 == 0);
157
158 /* At most a single final reduction is needed; check if the value is >= the field characteristic */
159 x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
160 & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
161
162 if (x) {
163 t0 += 0x3D1UL; t1 += (x << 6);
164 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
165 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
166 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
167 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
168 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
169 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
170 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
171 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
172 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
173
174 /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
175 VERIFY_CHECK(t9 >> 22 == x);
176
177 /* Mask off the possible multiple of 2^256 from the final reduction */
178 t9 &= 0x03FFFFFUL;
179 }
180
181 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
182 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
3231676b
PW
183
184#ifdef VERIFY
185 r->magnitude = 1;
186 r->normalized = 1;
7d681ac6 187 secp256k1_fe_verify(r);
3231676b
PW
188#endif
189}
190
eed599dd
PD
191static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r) {
192 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
193 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
194
eed599dd
PD
195 /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
196 uint32_t z0, z1;
197
25b35c7e
GM
198 /* Reduce t9 at the start so there will be at most a single carry from the first pass */
199 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
200
eed599dd
PD
201 /* The first pass ensures the magnitude is 1, ... */
202 t0 += x * 0x3D1UL; t1 += (x << 6);
203 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0 = t0; z1 = t0 ^ 0x3D0UL;
204 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
205 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
206 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
49ee0dbe
PD
207 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
208 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
209 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
210 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
211 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
212 z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
213
214 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
215 VERIFY_CHECK(t9 >> 23 == 0);
216
217 return (z0 == 0) | (z1 == 0x3FFFFFFUL);
218}
219
220static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe_t *r) {
25b35c7e
GM
221 uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
222 uint32_t z0, z1;
223 uint32_t x;
224
225 t0 = r->n[0];
226 t9 = r->n[9];
49ee0dbe
PD
227
228 /* Reduce t9 at the start so there will be at most a single carry from the first pass */
25b35c7e 229 x = t9 >> 22;
49ee0dbe
PD
230
231 /* The first pass ensures the magnitude is 1, ... */
232 t0 += x * 0x3D1UL;
233
234 /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
25b35c7e
GM
235 z0 = t0 & 0x3FFFFFFUL;
236 z1 = z0 ^ 0x3D0UL;
49ee0dbe
PD
237
238 /* Fast return path should catch the majority of cases */
26320197 239 if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL)) {
49ee0dbe 240 return 0;
26320197 241 }
49ee0dbe 242
25b35c7e
GM
243 t1 = r->n[1];
244 t2 = r->n[2];
245 t3 = r->n[3];
246 t4 = r->n[4];
247 t5 = r->n[5];
248 t6 = r->n[6];
249 t7 = r->n[7];
250 t8 = r->n[8];
251
49ee0dbe
PD
252 t9 &= 0x03FFFFFUL;
253 t1 += (x << 6);
254
255 t1 += (t0 >> 26); t0 = z0;
256 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
257 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
258 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
eed599dd
PD
259 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
260 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
261 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
262 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
263 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
264 z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
265
266 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
267 VERIFY_CHECK(t9 >> 23 == 0);
268
269 return (z0 == 0) | (z1 == 0x3FFFFFFUL);
270}
271
a4a43d75 272SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
3231676b
PW
273 r->n[0] = a;
274 r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
275#ifdef VERIFY
276 r->magnitude = 1;
277 r->normalized = 1;
7d681ac6 278 secp256k1_fe_verify(r);
3231676b
PW
279#endif
280}
281
a4a43d75 282SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe_t *a) {
25b35c7e 283 const uint32_t *t = a->n;
3231676b 284#ifdef VERIFY
1c7fa133 285 VERIFY_CHECK(a->normalized);
7d681ac6 286 secp256k1_fe_verify(a);
3231676b 287#endif
137e77af 288 return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0;
3231676b
PW
289}
290
a4a43d75 291SECP256K1_INLINE static int secp256k1_fe_is_odd(const secp256k1_fe_t *a) {
3231676b 292#ifdef VERIFY
1c7fa133 293 VERIFY_CHECK(a->normalized);
7d681ac6 294 secp256k1_fe_verify(a);
3231676b
PW
295#endif
296 return a->n[0] & 1;
297}
298
a4a43d75 299SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe_t *a) {
25b35c7e 300 int i;
2f6c8019 301#ifdef VERIFY
2f6c8019 302 a->magnitude = 0;
7d681ac6 303 a->normalized = 1;
2f6c8019 304#endif
25b35c7e 305 for (i=0; i<10; i++) {
2f6c8019
GM
306 a->n[i] = 0;
307 }
308}
309
f24041d6 310static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
25b35c7e 311 int i;
f24041d6
PW
312#ifdef VERIFY
313 VERIFY_CHECK(a->normalized);
314 VERIFY_CHECK(b->normalized);
315 secp256k1_fe_verify(a);
316 secp256k1_fe_verify(b);
317#endif
25b35c7e 318 for (i = 9; i >= 0; i--) {
26320197
GM
319 if (a->n[i] > b->n[i]) {
320 return 1;
321 }
322 if (a->n[i] < b->n[i]) {
323 return -1;
324 }
f24041d6
PW
325 }
326 return 0;
327}
328
d907ebc0 329static int secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
25b35c7e 330 int i;
3231676b
PW
331 r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
332 r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
25b35c7e
GM
333 for (i=0; i<32; i++) {
334 int j;
335 for (j=0; j<4; j++) {
3231676b
PW
336 int limb = (8*i+2*j)/26;
337 int shift = (8*i+2*j)%26;
338 r->n[limb] |= (uint32_t)((a[31-i] >> (2*j)) & 0x3) << shift;
339 }
340 }
d907ebc0
PW
341 if (r->n[9] == 0x3FFFFFUL && (r->n[8] & r->n[7] & r->n[6] & r->n[5] & r->n[4] & r->n[3] & r->n[2]) == 0x3FFFFFFUL && (r->n[1] + 0x40UL + ((r->n[0] + 0x3D1UL) >> 26)) > 0x3FFFFFFUL) {
342 return 0;
343 }
3231676b
PW
344#ifdef VERIFY
345 r->magnitude = 1;
346 r->normalized = 1;
7d681ac6 347 secp256k1_fe_verify(r);
3231676b 348#endif
d907ebc0 349 return 1;
3231676b
PW
350}
351
784e62f3 352/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
a4a43d75 353static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) {
25b35c7e 354 int i;
3231676b 355#ifdef VERIFY
1c7fa133 356 VERIFY_CHECK(a->normalized);
7d681ac6 357 secp256k1_fe_verify(a);
3231676b 358#endif
25b35c7e
GM
359 for (i=0; i<32; i++) {
360 int j;
3231676b 361 int c = 0;
25b35c7e 362 for (j=0; j<4; j++) {
3231676b
PW
363 int limb = (8*i+2*j)/26;
364 int shift = (8*i+2*j)%26;
365 c |= ((a->n[limb] >> shift) & 0x3) << (2 * j);
366 }
367 r[31-i] = c;
368 }
369}
370
a4a43d75 371SECP256K1_INLINE static void secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *a, int m) {
3231676b 372#ifdef VERIFY
1c7fa133 373 VERIFY_CHECK(a->magnitude <= m);
7d681ac6 374 secp256k1_fe_verify(a);
3231676b 375#endif
7a8e385d
PW
376 r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0];
377 r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1];
378 r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2];
379 r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3];
380 r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4];
381 r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5];
382 r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6];
383 r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7];
384 r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8];
385 r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9];
3231676b 386#ifdef VERIFY
7d681ac6 387 r->magnitude = m + 1;
3231676b 388 r->normalized = 0;
7d681ac6 389 secp256k1_fe_verify(r);
3231676b 390#endif
7d681ac6
PD
391}
392
a4a43d75 393SECP256K1_INLINE static void secp256k1_fe_mul_int(secp256k1_fe_t *r, int a) {
3231676b
PW
394 r->n[0] *= a;
395 r->n[1] *= a;
396 r->n[2] *= a;
397 r->n[3] *= a;
398 r->n[4] *= a;
399 r->n[5] *= a;
400 r->n[6] *= a;
401 r->n[7] *= a;
402 r->n[8] *= a;
403 r->n[9] *= a;
7d681ac6
PD
404#ifdef VERIFY
405 r->magnitude *= a;
406 r->normalized = 0;
407 secp256k1_fe_verify(r);
408#endif
3231676b
PW
409}
410
a4a43d75 411SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
3231676b 412#ifdef VERIFY
7d681ac6 413 secp256k1_fe_verify(a);
3231676b
PW
414#endif
415 r->n[0] += a->n[0];
416 r->n[1] += a->n[1];
417 r->n[2] += a->n[2];
418 r->n[3] += a->n[3];
419 r->n[4] += a->n[4];
420 r->n[5] += a->n[5];
421 r->n[6] += a->n[6];
422 r->n[7] += a->n[7];
423 r->n[8] += a->n[8];
424 r->n[9] += a->n[9];
7d681ac6
PD
425#ifdef VERIFY
426 r->magnitude += a->magnitude;
427 r->normalized = 0;
428 secp256k1_fe_verify(r);
429#endif
3231676b
PW
430}
431
f8cce956
PW
432#ifdef VERIFY
433#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
434#else
435#define VERIFY_BITS(x, n) do { } while(0)
436#endif
437
b2c9681c 438SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) {
25b35c7e
GM
439 uint64_t c, d;
440 uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
441 uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7;
442 const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
443
f8cce956
PW
444 VERIFY_BITS(a[0], 30);
445 VERIFY_BITS(a[1], 30);
446 VERIFY_BITS(a[2], 30);
447 VERIFY_BITS(a[3], 30);
448 VERIFY_BITS(a[4], 30);
449 VERIFY_BITS(a[5], 30);
450 VERIFY_BITS(a[6], 30);
451 VERIFY_BITS(a[7], 30);
452 VERIFY_BITS(a[8], 30);
453 VERIFY_BITS(a[9], 26);
454 VERIFY_BITS(b[0], 30);
455 VERIFY_BITS(b[1], 30);
456 VERIFY_BITS(b[2], 30);
457 VERIFY_BITS(b[3], 30);
458 VERIFY_BITS(b[4], 30);
459 VERIFY_BITS(b[5], 30);
460 VERIFY_BITS(b[6], 30);
461 VERIFY_BITS(b[7], 30);
462 VERIFY_BITS(b[8], 30);
463 VERIFY_BITS(b[9], 26);
5dd421ba 464
71712b27
GM
465 /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
466 * px is a shorthand for sum(a[i]*b[x-i], i=0..x).
467 * Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
468 */
5dd421ba 469
5dd421ba
PD
470 d = (uint64_t)a[0] * b[9]
471 + (uint64_t)a[1] * b[8]
472 + (uint64_t)a[2] * b[7]
473 + (uint64_t)a[3] * b[6]
474 + (uint64_t)a[4] * b[5]
475 + (uint64_t)a[5] * b[4]
476 + (uint64_t)a[6] * b[3]
477 + (uint64_t)a[7] * b[2]
478 + (uint64_t)a[8] * b[1]
479 + (uint64_t)a[9] * b[0];
71712b27
GM
480 /* VERIFY_BITS(d, 64); */
481 /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
25b35c7e 482 t9 = d & M; d >>= 26;
f8cce956
PW
483 VERIFY_BITS(t9, 26);
484 VERIFY_BITS(d, 38);
71712b27 485 /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
5dd421ba
PD
486
487 c = (uint64_t)a[0] * b[0];
f8cce956 488 VERIFY_BITS(c, 60);
71712b27 489 /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
5dd421ba
PD
490 d += (uint64_t)a[1] * b[9]
491 + (uint64_t)a[2] * b[8]
492 + (uint64_t)a[3] * b[7]
493 + (uint64_t)a[4] * b[6]
494 + (uint64_t)a[5] * b[5]
495 + (uint64_t)a[6] * b[4]
496 + (uint64_t)a[7] * b[3]
497 + (uint64_t)a[8] * b[2]
498 + (uint64_t)a[9] * b[1];
f8cce956 499 VERIFY_BITS(d, 63);
71712b27 500 /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
25b35c7e 501 u0 = d & M; d >>= 26; c += u0 * R0;
f8cce956
PW
502 VERIFY_BITS(u0, 26);
503 VERIFY_BITS(d, 37);
504 VERIFY_BITS(c, 61);
71712b27 505 /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
25b35c7e 506 t0 = c & M; c >>= 26; c += u0 * R1;
f8cce956
PW
507 VERIFY_BITS(t0, 26);
508 VERIFY_BITS(c, 37);
71712b27
GM
509 /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
510 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
5dd421ba
PD
511
512 c += (uint64_t)a[0] * b[1]
513 + (uint64_t)a[1] * b[0];
f8cce956 514 VERIFY_BITS(c, 62);
71712b27 515 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
5dd421ba
PD
516 d += (uint64_t)a[2] * b[9]
517 + (uint64_t)a[3] * b[8]
518 + (uint64_t)a[4] * b[7]
519 + (uint64_t)a[5] * b[6]
520 + (uint64_t)a[6] * b[5]
521 + (uint64_t)a[7] * b[4]
522 + (uint64_t)a[8] * b[3]
523 + (uint64_t)a[9] * b[2];
f8cce956 524 VERIFY_BITS(d, 63);
71712b27 525 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
25b35c7e 526 u1 = d & M; d >>= 26; c += u1 * R0;
f8cce956
PW
527 VERIFY_BITS(u1, 26);
528 VERIFY_BITS(d, 37);
529 VERIFY_BITS(c, 63);
71712b27 530 /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
25b35c7e 531 t1 = c & M; c >>= 26; c += u1 * R1;
f8cce956
PW
532 VERIFY_BITS(t1, 26);
533 VERIFY_BITS(c, 38);
71712b27
GM
534 /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
535 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
5dd421ba
PD
536
537 c += (uint64_t)a[0] * b[2]
538 + (uint64_t)a[1] * b[1]
539 + (uint64_t)a[2] * b[0];
f8cce956 540 VERIFY_BITS(c, 62);
71712b27 541 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
5dd421ba
PD
542 d += (uint64_t)a[3] * b[9]
543 + (uint64_t)a[4] * b[8]
544 + (uint64_t)a[5] * b[7]
545 + (uint64_t)a[6] * b[6]
546 + (uint64_t)a[7] * b[5]
547 + (uint64_t)a[8] * b[4]
548 + (uint64_t)a[9] * b[3];
f8cce956 549 VERIFY_BITS(d, 63);
71712b27 550 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
25b35c7e 551 u2 = d & M; d >>= 26; c += u2 * R0;
f8cce956
PW
552 VERIFY_BITS(u2, 26);
553 VERIFY_BITS(d, 37);
554 VERIFY_BITS(c, 63);
71712b27 555 /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
25b35c7e 556 t2 = c & M; c >>= 26; c += u2 * R1;
f8cce956
PW
557 VERIFY_BITS(t2, 26);
558 VERIFY_BITS(c, 38);
71712b27
GM
559 /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
560 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
5dd421ba
PD
561
562 c += (uint64_t)a[0] * b[3]
563 + (uint64_t)a[1] * b[2]
564 + (uint64_t)a[2] * b[1]
565 + (uint64_t)a[3] * b[0];
f8cce956 566 VERIFY_BITS(c, 63);
71712b27 567 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
5dd421ba
PD
568 d += (uint64_t)a[4] * b[9]
569 + (uint64_t)a[5] * b[8]
570 + (uint64_t)a[6] * b[7]
571 + (uint64_t)a[7] * b[6]
572 + (uint64_t)a[8] * b[5]
573 + (uint64_t)a[9] * b[4];
f8cce956 574 VERIFY_BITS(d, 63);
71712b27 575 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
25b35c7e 576 u3 = d & M; d >>= 26; c += u3 * R0;
f8cce956
PW
577 VERIFY_BITS(u3, 26);
578 VERIFY_BITS(d, 37);
71712b27
GM
579 /* VERIFY_BITS(c, 64); */
580 /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
25b35c7e 581 t3 = c & M; c >>= 26; c += u3 * R1;
f8cce956
PW
582 VERIFY_BITS(t3, 26);
583 VERIFY_BITS(c, 39);
71712b27
GM
584 /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
585 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
5dd421ba
PD
586
587 c += (uint64_t)a[0] * b[4]
588 + (uint64_t)a[1] * b[3]
589 + (uint64_t)a[2] * b[2]
590 + (uint64_t)a[3] * b[1]
591 + (uint64_t)a[4] * b[0];
f8cce956 592 VERIFY_BITS(c, 63);
71712b27 593 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
5dd421ba
PD
594 d += (uint64_t)a[5] * b[9]
595 + (uint64_t)a[6] * b[8]
596 + (uint64_t)a[7] * b[7]
597 + (uint64_t)a[8] * b[6]
598 + (uint64_t)a[9] * b[5];
f8cce956 599 VERIFY_BITS(d, 62);
71712b27 600 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
25b35c7e 601 u4 = d & M; d >>= 26; c += u4 * R0;
f8cce956
PW
602 VERIFY_BITS(u4, 26);
603 VERIFY_BITS(d, 36);
71712b27
GM
604 /* VERIFY_BITS(c, 64); */
605 /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
25b35c7e 606 t4 = c & M; c >>= 26; c += u4 * R1;
f8cce956
PW
607 VERIFY_BITS(t4, 26);
608 VERIFY_BITS(c, 39);
71712b27
GM
609 /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
610 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
5dd421ba
PD
611
612 c += (uint64_t)a[0] * b[5]
613 + (uint64_t)a[1] * b[4]
614 + (uint64_t)a[2] * b[3]
615 + (uint64_t)a[3] * b[2]
616 + (uint64_t)a[4] * b[1]
617 + (uint64_t)a[5] * b[0];
f8cce956 618 VERIFY_BITS(c, 63);
71712b27 619 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
5dd421ba
PD
620 d += (uint64_t)a[6] * b[9]
621 + (uint64_t)a[7] * b[8]
622 + (uint64_t)a[8] * b[7]
623 + (uint64_t)a[9] * b[6];
f8cce956 624 VERIFY_BITS(d, 62);
71712b27 625 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
25b35c7e 626 u5 = d & M; d >>= 26; c += u5 * R0;
f8cce956
PW
627 VERIFY_BITS(u5, 26);
628 VERIFY_BITS(d, 36);
71712b27
GM
629 /* VERIFY_BITS(c, 64); */
630 /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
25b35c7e 631 t5 = c & M; c >>= 26; c += u5 * R1;
f8cce956
PW
632 VERIFY_BITS(t5, 26);
633 VERIFY_BITS(c, 39);
71712b27
GM
634 /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
635 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
5dd421ba
PD
636
637 c += (uint64_t)a[0] * b[6]
638 + (uint64_t)a[1] * b[5]
639 + (uint64_t)a[2] * b[4]
640 + (uint64_t)a[3] * b[3]
641 + (uint64_t)a[4] * b[2]
642 + (uint64_t)a[5] * b[1]
643 + (uint64_t)a[6] * b[0];
f8cce956 644 VERIFY_BITS(c, 63);
71712b27 645 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba
PD
646 d += (uint64_t)a[7] * b[9]
647 + (uint64_t)a[8] * b[8]
648 + (uint64_t)a[9] * b[7];
f8cce956 649 VERIFY_BITS(d, 61);
71712b27 650 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
25b35c7e 651 u6 = d & M; d >>= 26; c += u6 * R0;
f8cce956
PW
652 VERIFY_BITS(u6, 26);
653 VERIFY_BITS(d, 35);
71712b27
GM
654 /* VERIFY_BITS(c, 64); */
655 /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
25b35c7e 656 t6 = c & M; c >>= 26; c += u6 * R1;
f8cce956
PW
657 VERIFY_BITS(t6, 26);
658 VERIFY_BITS(c, 39);
71712b27
GM
659 /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
660 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba
PD
661
662 c += (uint64_t)a[0] * b[7]
663 + (uint64_t)a[1] * b[6]
664 + (uint64_t)a[2] * b[5]
665 + (uint64_t)a[3] * b[4]
666 + (uint64_t)a[4] * b[3]
667 + (uint64_t)a[5] * b[2]
668 + (uint64_t)a[6] * b[1]
669 + (uint64_t)a[7] * b[0];
71712b27 670 /* VERIFY_BITS(c, 64); */
f8cce956 671 VERIFY_CHECK(c <= 0x8000007C00000007ULL);
71712b27 672 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba
PD
673 d += (uint64_t)a[8] * b[9]
674 + (uint64_t)a[9] * b[8];
f8cce956 675 VERIFY_BITS(d, 58);
71712b27 676 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
25b35c7e 677 u7 = d & M; d >>= 26; c += u7 * R0;
f8cce956
PW
678 VERIFY_BITS(u7, 26);
679 VERIFY_BITS(d, 32);
71712b27 680 /* VERIFY_BITS(c, 64); */
f8cce956 681 VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
71712b27 682 /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
25b35c7e 683 t7 = c & M; c >>= 26; c += u7 * R1;
f8cce956
PW
684 VERIFY_BITS(t7, 26);
685 VERIFY_BITS(c, 38);
71712b27
GM
686 /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
687 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba
PD
688
689 c += (uint64_t)a[0] * b[8]
690 + (uint64_t)a[1] * b[7]
691 + (uint64_t)a[2] * b[6]
692 + (uint64_t)a[3] * b[5]
693 + (uint64_t)a[4] * b[4]
694 + (uint64_t)a[5] * b[3]
695 + (uint64_t)a[6] * b[2]
696 + (uint64_t)a[7] * b[1]
697 + (uint64_t)a[8] * b[0];
71712b27 698 /* VERIFY_BITS(c, 64); */
f8cce956 699 VERIFY_CHECK(c <= 0x9000007B80000008ULL);
71712b27 700 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 701 d += (uint64_t)a[9] * b[9];
f8cce956 702 VERIFY_BITS(d, 57);
71712b27 703 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
25b35c7e 704 u8 = d & M; d >>= 26; c += u8 * R0;
f8cce956
PW
705 VERIFY_BITS(u8, 26);
706 VERIFY_BITS(d, 31);
71712b27 707 /* VERIFY_BITS(c, 64); */
f8cce956 708 VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
71712b27 709 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba
PD
710
711 r[3] = t3;
f8cce956 712 VERIFY_BITS(r[3], 26);
71712b27 713 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 714 r[4] = t4;
f8cce956 715 VERIFY_BITS(r[4], 26);
71712b27 716 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 717 r[5] = t5;
f8cce956 718 VERIFY_BITS(r[5], 26);
71712b27 719 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 720 r[6] = t6;
f8cce956 721 VERIFY_BITS(r[6], 26);
71712b27 722 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 723 r[7] = t7;
f8cce956 724 VERIFY_BITS(r[7], 26);
71712b27 725 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba
PD
726
727 r[8] = c & M; c >>= 26; c += u8 * R1;
f8cce956
PW
728 VERIFY_BITS(r[8], 26);
729 VERIFY_BITS(c, 39);
71712b27
GM
730 /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
731 /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 732 c += d * R0 + t9;
f8cce956 733 VERIFY_BITS(c, 45);
71712b27 734 /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 735 r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
f8cce956
PW
736 VERIFY_BITS(r[9], 22);
737 VERIFY_BITS(c, 46);
71712b27
GM
738 /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
739 /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
740 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba
PD
741
742 d = c * (R0 >> 4) + t0;
f8cce956 743 VERIFY_BITS(d, 56);
71712b27 744 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 745 r[0] = d & M; d >>= 26;
f8cce956
PW
746 VERIFY_BITS(r[0], 26);
747 VERIFY_BITS(d, 30);
71712b27 748 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 749 d += c * (R1 >> 4) + t1;
f8cce956
PW
750 VERIFY_BITS(d, 53);
751 VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
71712b27
GM
752 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
753 /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 754 r[1] = d & M; d >>= 26;
f8cce956
PW
755 VERIFY_BITS(r[1], 26);
756 VERIFY_BITS(d, 27);
757 VERIFY_CHECK(d <= 0x4000000ULL);
71712b27 758 /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 759 d += t2;
f8cce956 760 VERIFY_BITS(d, 27);
71712b27 761 /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 762 r[2] = d;
f8cce956 763 VERIFY_BITS(r[2], 27);
71712b27 764 /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
3231676b
PW
765}
766
b2c9681c 767SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a) {
25b35c7e
GM
768 uint64_t c, d;
769 uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
770 uint32_t t9, t0, t1, t2, t3, t4, t5, t6, t7;
771 const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
772
f8cce956
PW
773 VERIFY_BITS(a[0], 30);
774 VERIFY_BITS(a[1], 30);
775 VERIFY_BITS(a[2], 30);
776 VERIFY_BITS(a[3], 30);
777 VERIFY_BITS(a[4], 30);
778 VERIFY_BITS(a[5], 30);
779 VERIFY_BITS(a[6], 30);
780 VERIFY_BITS(a[7], 30);
781 VERIFY_BITS(a[8], 30);
782 VERIFY_BITS(a[9], 26);
5dd421ba 783
71712b27
GM
784 /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
785 * px is a shorthand for sum(a[i]*a[x-i], i=0..x).
786 * Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
787 */
5dd421ba 788
5dd421ba
PD
789 d = (uint64_t)(a[0]*2) * a[9]
790 + (uint64_t)(a[1]*2) * a[8]
791 + (uint64_t)(a[2]*2) * a[7]
792 + (uint64_t)(a[3]*2) * a[6]
793 + (uint64_t)(a[4]*2) * a[5];
71712b27
GM
794 /* VERIFY_BITS(d, 64); */
795 /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
25b35c7e 796 t9 = d & M; d >>= 26;
f8cce956
PW
797 VERIFY_BITS(t9, 26);
798 VERIFY_BITS(d, 38);
71712b27 799 /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
5dd421ba
PD
800
801 c = (uint64_t)a[0] * a[0];
f8cce956 802 VERIFY_BITS(c, 60);
71712b27 803 /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
5dd421ba
PD
804 d += (uint64_t)(a[1]*2) * a[9]
805 + (uint64_t)(a[2]*2) * a[8]
806 + (uint64_t)(a[3]*2) * a[7]
807 + (uint64_t)(a[4]*2) * a[6]
808 + (uint64_t)a[5] * a[5];
f8cce956 809 VERIFY_BITS(d, 63);
71712b27 810 /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
25b35c7e 811 u0 = d & M; d >>= 26; c += u0 * R0;
f8cce956
PW
812 VERIFY_BITS(u0, 26);
813 VERIFY_BITS(d, 37);
814 VERIFY_BITS(c, 61);
71712b27 815 /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
25b35c7e 816 t0 = c & M; c >>= 26; c += u0 * R1;
f8cce956
PW
817 VERIFY_BITS(t0, 26);
818 VERIFY_BITS(c, 37);
71712b27
GM
819 /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
820 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
5dd421ba
PD
821
822 c += (uint64_t)(a[0]*2) * a[1];
f8cce956 823 VERIFY_BITS(c, 62);
71712b27 824 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
5dd421ba
PD
825 d += (uint64_t)(a[2]*2) * a[9]
826 + (uint64_t)(a[3]*2) * a[8]
827 + (uint64_t)(a[4]*2) * a[7]
828 + (uint64_t)(a[5]*2) * a[6];
f8cce956 829 VERIFY_BITS(d, 63);
71712b27 830 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
25b35c7e 831 u1 = d & M; d >>= 26; c += u1 * R0;
f8cce956
PW
832 VERIFY_BITS(u1, 26);
833 VERIFY_BITS(d, 37);
834 VERIFY_BITS(c, 63);
71712b27 835 /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
25b35c7e 836 t1 = c & M; c >>= 26; c += u1 * R1;
f8cce956
PW
837 VERIFY_BITS(t1, 26);
838 VERIFY_BITS(c, 38);
71712b27
GM
839 /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
840 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
5dd421ba
PD
841
842 c += (uint64_t)(a[0]*2) * a[2]
843 + (uint64_t)a[1] * a[1];
f8cce956 844 VERIFY_BITS(c, 62);
71712b27 845 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
5dd421ba
PD
846 d += (uint64_t)(a[3]*2) * a[9]
847 + (uint64_t)(a[4]*2) * a[8]
848 + (uint64_t)(a[5]*2) * a[7]
849 + (uint64_t)a[6] * a[6];
f8cce956 850 VERIFY_BITS(d, 63);
71712b27 851 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
25b35c7e 852 u2 = d & M; d >>= 26; c += u2 * R0;
f8cce956
PW
853 VERIFY_BITS(u2, 26);
854 VERIFY_BITS(d, 37);
855 VERIFY_BITS(c, 63);
71712b27 856 /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
25b35c7e 857 t2 = c & M; c >>= 26; c += u2 * R1;
f8cce956
PW
858 VERIFY_BITS(t2, 26);
859 VERIFY_BITS(c, 38);
71712b27
GM
860 /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
861 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
5dd421ba
PD
862
863 c += (uint64_t)(a[0]*2) * a[3]
864 + (uint64_t)(a[1]*2) * a[2];
f8cce956 865 VERIFY_BITS(c, 63);
71712b27 866 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
5dd421ba
PD
867 d += (uint64_t)(a[4]*2) * a[9]
868 + (uint64_t)(a[5]*2) * a[8]
869 + (uint64_t)(a[6]*2) * a[7];
f8cce956 870 VERIFY_BITS(d, 63);
71712b27 871 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
25b35c7e 872 u3 = d & M; d >>= 26; c += u3 * R0;
f8cce956
PW
873 VERIFY_BITS(u3, 26);
874 VERIFY_BITS(d, 37);
71712b27
GM
875 /* VERIFY_BITS(c, 64); */
876 /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
25b35c7e 877 t3 = c & M; c >>= 26; c += u3 * R1;
f8cce956
PW
878 VERIFY_BITS(t3, 26);
879 VERIFY_BITS(c, 39);
71712b27
GM
880 /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
881 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
5dd421ba
PD
882
883 c += (uint64_t)(a[0]*2) * a[4]
884 + (uint64_t)(a[1]*2) * a[3]
885 + (uint64_t)a[2] * a[2];
f8cce956 886 VERIFY_BITS(c, 63);
71712b27 887 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
5dd421ba
PD
888 d += (uint64_t)(a[5]*2) * a[9]
889 + (uint64_t)(a[6]*2) * a[8]
890 + (uint64_t)a[7] * a[7];
f8cce956 891 VERIFY_BITS(d, 62);
71712b27 892 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
25b35c7e 893 u4 = d & M; d >>= 26; c += u4 * R0;
f8cce956
PW
894 VERIFY_BITS(u4, 26);
895 VERIFY_BITS(d, 36);
71712b27
GM
896 /* VERIFY_BITS(c, 64); */
897 /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
25b35c7e 898 t4 = c & M; c >>= 26; c += u4 * R1;
f8cce956
PW
899 VERIFY_BITS(t4, 26);
900 VERIFY_BITS(c, 39);
71712b27
GM
901 /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
902 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
5dd421ba
PD
903
904 c += (uint64_t)(a[0]*2) * a[5]
905 + (uint64_t)(a[1]*2) * a[4]
906 + (uint64_t)(a[2]*2) * a[3];
f8cce956 907 VERIFY_BITS(c, 63);
71712b27 908 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
5dd421ba
PD
909 d += (uint64_t)(a[6]*2) * a[9]
910 + (uint64_t)(a[7]*2) * a[8];
f8cce956 911 VERIFY_BITS(d, 62);
71712b27 912 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
25b35c7e 913 u5 = d & M; d >>= 26; c += u5 * R0;
f8cce956
PW
914 VERIFY_BITS(u5, 26);
915 VERIFY_BITS(d, 36);
71712b27
GM
916 /* VERIFY_BITS(c, 64); */
917 /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
25b35c7e 918 t5 = c & M; c >>= 26; c += u5 * R1;
f8cce956
PW
919 VERIFY_BITS(t5, 26);
920 VERIFY_BITS(c, 39);
71712b27
GM
921 /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
922 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
5dd421ba
PD
923
924 c += (uint64_t)(a[0]*2) * a[6]
925 + (uint64_t)(a[1]*2) * a[5]
926 + (uint64_t)(a[2]*2) * a[4]
927 + (uint64_t)a[3] * a[3];
f8cce956 928 VERIFY_BITS(c, 63);
71712b27 929 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba
PD
930 d += (uint64_t)(a[7]*2) * a[9]
931 + (uint64_t)a[8] * a[8];
f8cce956 932 VERIFY_BITS(d, 61);
71712b27 933 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
25b35c7e 934 u6 = d & M; d >>= 26; c += u6 * R0;
f8cce956
PW
935 VERIFY_BITS(u6, 26);
936 VERIFY_BITS(d, 35);
71712b27
GM
937 /* VERIFY_BITS(c, 64); */
938 /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
25b35c7e 939 t6 = c & M; c >>= 26; c += u6 * R1;
f8cce956
PW
940 VERIFY_BITS(t6, 26);
941 VERIFY_BITS(c, 39);
71712b27
GM
942 /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
943 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba
PD
944
945 c += (uint64_t)(a[0]*2) * a[7]
946 + (uint64_t)(a[1]*2) * a[6]
947 + (uint64_t)(a[2]*2) * a[5]
948 + (uint64_t)(a[3]*2) * a[4];
71712b27 949 /* VERIFY_BITS(c, 64); */
f8cce956 950 VERIFY_CHECK(c <= 0x8000007C00000007ULL);
71712b27 951 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 952 d += (uint64_t)(a[8]*2) * a[9];
f8cce956 953 VERIFY_BITS(d, 58);
71712b27 954 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
25b35c7e 955 u7 = d & M; d >>= 26; c += u7 * R0;
f8cce956
PW
956 VERIFY_BITS(u7, 26);
957 VERIFY_BITS(d, 32);
71712b27 958 /* VERIFY_BITS(c, 64); */
f8cce956 959 VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
71712b27 960 /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
25b35c7e 961 t7 = c & M; c >>= 26; c += u7 * R1;
f8cce956
PW
962 VERIFY_BITS(t7, 26);
963 VERIFY_BITS(c, 38);
71712b27
GM
964 /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
965 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba
PD
966
967 c += (uint64_t)(a[0]*2) * a[8]
968 + (uint64_t)(a[1]*2) * a[7]
969 + (uint64_t)(a[2]*2) * a[6]
970 + (uint64_t)(a[3]*2) * a[5]
971 + (uint64_t)a[4] * a[4];
71712b27 972 /* VERIFY_BITS(c, 64); */
f8cce956 973 VERIFY_CHECK(c <= 0x9000007B80000008ULL);
71712b27 974 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 975 d += (uint64_t)a[9] * a[9];
f8cce956 976 VERIFY_BITS(d, 57);
71712b27 977 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
25b35c7e 978 u8 = d & M; d >>= 26; c += u8 * R0;
f8cce956
PW
979 VERIFY_BITS(u8, 26);
980 VERIFY_BITS(d, 31);
71712b27 981 /* VERIFY_BITS(c, 64); */
f8cce956 982 VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
71712b27 983 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba
PD
984
985 r[3] = t3;
f8cce956 986 VERIFY_BITS(r[3], 26);
71712b27 987 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 988 r[4] = t4;
f8cce956 989 VERIFY_BITS(r[4], 26);
71712b27 990 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 991 r[5] = t5;
f8cce956 992 VERIFY_BITS(r[5], 26);
71712b27 993 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 994 r[6] = t6;
f8cce956 995 VERIFY_BITS(r[6], 26);
71712b27 996 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 997 r[7] = t7;
f8cce956 998 VERIFY_BITS(r[7], 26);
71712b27 999 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba
PD
1000
1001 r[8] = c & M; c >>= 26; c += u8 * R1;
f8cce956
PW
1002 VERIFY_BITS(r[8], 26);
1003 VERIFY_BITS(c, 39);
71712b27
GM
1004 /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1005 /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 1006 c += d * R0 + t9;
f8cce956 1007 VERIFY_BITS(c, 45);
71712b27 1008 /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 1009 r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
f8cce956
PW
1010 VERIFY_BITS(r[9], 22);
1011 VERIFY_BITS(c, 46);
71712b27
GM
1012 /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1013 /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1014 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba
PD
1015
1016 d = c * (R0 >> 4) + t0;
f8cce956 1017 VERIFY_BITS(d, 56);
71712b27 1018 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 1019 r[0] = d & M; d >>= 26;
f8cce956
PW
1020 VERIFY_BITS(r[0], 26);
1021 VERIFY_BITS(d, 30);
71712b27 1022 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 1023 d += c * (R1 >> 4) + t1;
f8cce956
PW
1024 VERIFY_BITS(d, 53);
1025 VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
71712b27
GM
1026 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
1027 /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 1028 r[1] = d & M; d >>= 26;
f8cce956
PW
1029 VERIFY_BITS(r[1], 26);
1030 VERIFY_BITS(d, 27);
1031 VERIFY_CHECK(d <= 0x4000000ULL);
71712b27 1032 /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 1033 d += t2;
f8cce956 1034 VERIFY_BITS(d, 27);
71712b27 1035 /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 1036 r[2] = d;
f8cce956 1037 VERIFY_BITS(r[2], 27);
71712b27 1038 /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
3231676b
PW
1039}
1040
1041
be82e92f 1042static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t * SECP256K1_RESTRICT b) {
3231676b 1043#ifdef VERIFY
1c7fa133
PW
1044 VERIFY_CHECK(a->magnitude <= 8);
1045 VERIFY_CHECK(b->magnitude <= 8);
7d681ac6
PD
1046 secp256k1_fe_verify(a);
1047 secp256k1_fe_verify(b);
be82e92f 1048 VERIFY_CHECK(r != b);
7d681ac6 1049#endif
b2c9681c 1050 secp256k1_fe_mul_inner(r->n, a->n, b->n);
7d681ac6 1051#ifdef VERIFY
3231676b
PW
1052 r->magnitude = 1;
1053 r->normalized = 0;
7d681ac6 1054 secp256k1_fe_verify(r);
3231676b 1055#endif
3231676b
PW
1056}
1057
a4a43d75 1058static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
3231676b 1059#ifdef VERIFY
1c7fa133 1060 VERIFY_CHECK(a->magnitude <= 8);
7d681ac6
PD
1061 secp256k1_fe_verify(a);
1062#endif
b2c9681c 1063 secp256k1_fe_sqr_inner(r->n, a->n);
7d681ac6 1064#ifdef VERIFY
3231676b
PW
1065 r->magnitude = 1;
1066 r->normalized = 0;
7d681ac6 1067 secp256k1_fe_verify(r);
3231676b 1068#endif
3231676b 1069}
7a4b7691 1070
bb0ea50d
GM
1071static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe_t *r, const secp256k1_fe_t *a, int flag) {
1072 uint32_t mask0, mask1;
1073 mask0 = flag + ~((uint32_t)0);
1074 mask1 = ~mask0;
1075 r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
1076 r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
1077 r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
1078 r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
1079 r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
1080 r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
1081 r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
1082 r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
1083 r->n[8] = (r->n[8] & mask0) | (a->n[8] & mask1);
1084 r->n[9] = (r->n[9] & mask0) | (a->n[9] & mask1);
1085#ifdef VERIFY
a0601cd7
PD
1086 if (a->magnitude > r->magnitude) {
1087 r->magnitude = a->magnitude;
1088 }
1089 r->normalized &= a->normalized;
bb0ea50d
GM
1090#endif
1091}
1092
bf2e1ac7 1093static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage_t *r, const secp256k1_fe_storage_t *a, int flag) {
27bc1311
GM
1094 uint32_t mask0, mask1;
1095 mask0 = flag + ~((uint32_t)0);
1096 mask1 = ~mask0;
ff889f7d
PW
1097 r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
1098 r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
1099 r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
1100 r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
1101 r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
1102 r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
1103 r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
1104 r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
1105}
1106
1107static void secp256k1_fe_to_storage(secp256k1_fe_storage_t *r, const secp256k1_fe_t *a) {
1108#ifdef VERIFY
1109 VERIFY_CHECK(a->normalized);
1110#endif
1111 r->n[0] = a->n[0] | a->n[1] << 26;
1112 r->n[1] = a->n[1] >> 6 | a->n[2] << 20;
1113 r->n[2] = a->n[2] >> 12 | a->n[3] << 14;
1114 r->n[3] = a->n[3] >> 18 | a->n[4] << 8;
1115 r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28;
1116 r->n[5] = a->n[6] >> 4 | a->n[7] << 22;
1117 r->n[6] = a->n[7] >> 10 | a->n[8] << 16;
1118 r->n[7] = a->n[8] >> 16 | a->n[9] << 10;
1119}
1120
bf2e1ac7 1121static SECP256K1_INLINE void secp256k1_fe_from_storage(secp256k1_fe_t *r, const secp256k1_fe_storage_t *a) {
ff889f7d
PW
1122 r->n[0] = a->n[0] & 0x3FFFFFFUL;
1123 r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL);
1124 r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL);
1125 r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL);
1126 r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL);
1127 r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL;
1128 r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL);
1129 r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL);
1130 r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL);
1131 r->n[9] = a->n[7] >> 10;
1132#ifdef VERIFY
1133 r->magnitude = 1;
1134 r->normalized = 1;
1135#endif
1136}
1137
7a4b7691 1138#endif
This page took 0.195696 seconds and 4 git commands to generate.