]> Git Repo - secp256k1.git/blame - src/field_5x52_int128_impl.h
Implement endomorphism optimization for secp256k1_ecmult_const
[secp256k1.git] / src / field_5x52_int128_impl.h
CommitLineData
71712b27
GM
1/**********************************************************************
2 * Copyright (c) 2013, 2014 Pieter Wuille *
3 * Distributed under the MIT software license, see the accompanying *
4 * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
5 **********************************************************************/
0a433ea2 6
7a4b7691
PW
7#ifndef _SECP256K1_FIELD_INNER5X52_IMPL_H_
8#define _SECP256K1_FIELD_INNER5X52_IMPL_H_
9
10#include <stdint.h>
938d3c27 11
a5185987
PW
12#ifdef VERIFY
13#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
14#else
15#define VERIFY_BITS(x, n) do { } while(0)
16#endif
17
b2c9681c 18SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) {
4be8d6fc 19 uint128_t c, d;
25b35c7e
GM
20 uint64_t t3, t4, tx, u0;
21 uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
22 const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
23
a5185987
PW
24 VERIFY_BITS(a[0], 56);
25 VERIFY_BITS(a[1], 56);
26 VERIFY_BITS(a[2], 56);
27 VERIFY_BITS(a[3], 56);
28 VERIFY_BITS(a[4], 52);
29 VERIFY_BITS(b[0], 56);
30 VERIFY_BITS(b[1], 56);
31 VERIFY_BITS(b[2], 56);
32 VERIFY_BITS(b[3], 56);
33 VERIFY_BITS(b[4], 52);
be82e92f 34 VERIFY_CHECK(r != b);
938d3c27 35
71712b27
GM
36 /* [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n.
37 * px is a shorthand for sum(a[i]*b[x-i], i=0..x).
38 * Note that [x 0 0 0 0 0] = [x*R].
39 */
5dd421ba 40
4be8d6fc
GM
41 d = (uint128_t)a0 * b[3]
42 + (uint128_t)a1 * b[2]
43 + (uint128_t)a2 * b[1]
44 + (uint128_t)a3 * b[0];
a5185987 45 VERIFY_BITS(d, 114);
71712b27 46 /* [d 0 0 0] = [p3 0 0 0] */
4be8d6fc 47 c = (uint128_t)a4 * b[4];
a5185987 48 VERIFY_BITS(c, 112);
71712b27 49 /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
5dd421ba 50 d += (c & M) * R; c >>= 52;
a5185987
PW
51 VERIFY_BITS(d, 115);
52 VERIFY_BITS(c, 60);
71712b27 53 /* [c 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
25b35c7e 54 t3 = d & M; d >>= 52;
a5185987
PW
55 VERIFY_BITS(t3, 52);
56 VERIFY_BITS(d, 63);
71712b27 57 /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
5dd421ba 58
4be8d6fc
GM
59 d += (uint128_t)a0 * b[4]
60 + (uint128_t)a1 * b[3]
61 + (uint128_t)a2 * b[2]
62 + (uint128_t)a3 * b[1]
63 + (uint128_t)a4 * b[0];
a5185987 64 VERIFY_BITS(d, 115);
71712b27 65 /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
5dd421ba 66 d += c * R;
a5185987 67 VERIFY_BITS(d, 116);
71712b27 68 /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
25b35c7e 69 t4 = d & M; d >>= 52;
a5185987
PW
70 VERIFY_BITS(t4, 52);
71 VERIFY_BITS(d, 64);
71712b27 72 /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
25b35c7e 73 tx = (t4 >> 48); t4 &= (M >> 4);
a5185987
PW
74 VERIFY_BITS(tx, 4);
75 VERIFY_BITS(t4, 48);
71712b27 76 /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
5dd421ba 77
4be8d6fc 78 c = (uint128_t)a0 * b[0];
a5185987 79 VERIFY_BITS(c, 112);
71712b27 80 /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */
4be8d6fc
GM
81 d += (uint128_t)a1 * b[4]
82 + (uint128_t)a2 * b[3]
83 + (uint128_t)a3 * b[2]
84 + (uint128_t)a4 * b[1];
a5185987 85 VERIFY_BITS(d, 115);
71712b27 86 /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
25b35c7e 87 u0 = d & M; d >>= 52;
a5185987
PW
88 VERIFY_BITS(u0, 52);
89 VERIFY_BITS(d, 63);
71712b27
GM
90 /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
91 /* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
5dd421ba 92 u0 = (u0 << 4) | tx;
a5185987 93 VERIFY_BITS(u0, 56);
71712b27 94 /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
4be8d6fc 95 c += (uint128_t)u0 * (R >> 4);
a5185987 96 VERIFY_BITS(c, 115);
71712b27 97 /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
4d4eeea4
PW
98 r[0] = c & M; c >>= 52;
99 VERIFY_BITS(r[0], 52);
a5185987 100 VERIFY_BITS(c, 61);
4d4eeea4 101 /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */
5dd421ba 102
4be8d6fc
GM
103 c += (uint128_t)a0 * b[1]
104 + (uint128_t)a1 * b[0];
a5185987 105 VERIFY_BITS(c, 114);
4d4eeea4 106 /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */
4be8d6fc
GM
107 d += (uint128_t)a2 * b[4]
108 + (uint128_t)a3 * b[3]
109 + (uint128_t)a4 * b[2];
a5185987 110 VERIFY_BITS(d, 114);
4d4eeea4 111 /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
5dd421ba 112 c += (d & M) * R; d >>= 52;
a5185987
PW
113 VERIFY_BITS(c, 115);
114 VERIFY_BITS(d, 62);
4d4eeea4
PW
115 /* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
116 r[1] = c & M; c >>= 52;
117 VERIFY_BITS(r[1], 52);
a5185987 118 VERIFY_BITS(c, 63);
4d4eeea4 119 /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
5dd421ba 120
4be8d6fc
GM
121 c += (uint128_t)a0 * b[2]
122 + (uint128_t)a1 * b[1]
123 + (uint128_t)a2 * b[0];
a5185987 124 VERIFY_BITS(c, 114);
4d4eeea4 125 /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */
4be8d6fc
GM
126 d += (uint128_t)a3 * b[4]
127 + (uint128_t)a4 * b[3];
a5185987 128 VERIFY_BITS(d, 114);
4d4eeea4 129 /* [d 0 0 t4 t3 c t1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 130 c += (d & M) * R; d >>= 52;
a5185987
PW
131 VERIFY_BITS(c, 115);
132 VERIFY_BITS(d, 62);
4d4eeea4 133 /* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 134
71712b27 135 /* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 136 r[2] = c & M; c >>= 52;
a5185987
PW
137 VERIFY_BITS(r[2], 52);
138 VERIFY_BITS(c, 63);
71712b27 139 /* [d 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 140 c += d * R + t3;;
a5185987 141 VERIFY_BITS(c, 100);
71712b27 142 /* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 143 r[3] = c & M; c >>= 52;
a5185987
PW
144 VERIFY_BITS(r[3], 52);
145 VERIFY_BITS(c, 48);
71712b27 146 /* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 147 c += t4;
a5185987 148 VERIFY_BITS(c, 49);
71712b27
GM
149 /* [c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
150 r[4] = c;
a5185987 151 VERIFY_BITS(r[4], 49);
71712b27 152 /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
938d3c27
PW
153}
154
b2c9681c 155SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) {
4be8d6fc 156 uint128_t c, d;
25b35c7e
GM
157 uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
158 int64_t t3, t4, tx, u0;
159 const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
160
a5185987
PW
161 VERIFY_BITS(a[0], 56);
162 VERIFY_BITS(a[1], 56);
163 VERIFY_BITS(a[2], 56);
164 VERIFY_BITS(a[3], 56);
165 VERIFY_BITS(a[4], 52);
938d3c27 166
71712b27
GM
167 /** [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n.
168 * px is a shorthand for sum(a[i]*a[x-i], i=0..x).
169 * Note that [x 0 0 0 0 0] = [x*R].
170 */
5dd421ba 171
4be8d6fc
GM
172 d = (uint128_t)(a0*2) * a3
173 + (uint128_t)(a1*2) * a2;
a5185987 174 VERIFY_BITS(d, 114);
71712b27 175 /* [d 0 0 0] = [p3 0 0 0] */
4be8d6fc 176 c = (uint128_t)a4 * a4;
a5185987 177 VERIFY_BITS(c, 112);
71712b27 178 /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
5dd421ba 179 d += (c & M) * R; c >>= 52;
a5185987
PW
180 VERIFY_BITS(d, 115);
181 VERIFY_BITS(c, 60);
71712b27 182 /* [c 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
25b35c7e 183 t3 = d & M; d >>= 52;
a5185987
PW
184 VERIFY_BITS(t3, 52);
185 VERIFY_BITS(d, 63);
71712b27 186 /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
5dd421ba
PD
187
188 a4 *= 2;
4be8d6fc
GM
189 d += (uint128_t)a0 * a4
190 + (uint128_t)(a1*2) * a3
191 + (uint128_t)a2 * a2;
a5185987 192 VERIFY_BITS(d, 115);
71712b27 193 /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
5dd421ba 194 d += c * R;
a5185987 195 VERIFY_BITS(d, 116);
71712b27 196 /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
25b35c7e 197 t4 = d & M; d >>= 52;
a5185987
PW
198 VERIFY_BITS(t4, 52);
199 VERIFY_BITS(d, 64);
71712b27 200 /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
25b35c7e 201 tx = (t4 >> 48); t4 &= (M >> 4);
a5185987
PW
202 VERIFY_BITS(tx, 4);
203 VERIFY_BITS(t4, 48);
71712b27 204 /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
5dd421ba 205
4be8d6fc 206 c = (uint128_t)a0 * a0;
a5185987 207 VERIFY_BITS(c, 112);
71712b27 208 /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */
4be8d6fc
GM
209 d += (uint128_t)a1 * a4
210 + (uint128_t)(a2*2) * a3;
a5185987 211 VERIFY_BITS(d, 114);
71712b27 212 /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
25b35c7e 213 u0 = d & M; d >>= 52;
a5185987
PW
214 VERIFY_BITS(u0, 52);
215 VERIFY_BITS(d, 62);
71712b27
GM
216 /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
217 /* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
5dd421ba 218 u0 = (u0 << 4) | tx;
a5185987 219 VERIFY_BITS(u0, 56);
71712b27 220 /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
4be8d6fc 221 c += (uint128_t)u0 * (R >> 4);
a5185987 222 VERIFY_BITS(c, 113);
71712b27 223 /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
5dd421ba 224 r[0] = c & M; c >>= 52;
a5185987
PW
225 VERIFY_BITS(r[0], 52);
226 VERIFY_BITS(c, 61);
71712b27 227 /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */
5dd421ba
PD
228
229 a0 *= 2;
4be8d6fc 230 c += (uint128_t)a0 * a1;
a5185987 231 VERIFY_BITS(c, 114);
71712b27 232 /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */
4be8d6fc
GM
233 d += (uint128_t)a2 * a4
234 + (uint128_t)a3 * a3;
a5185987 235 VERIFY_BITS(d, 114);
71712b27 236 /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
5dd421ba 237 c += (d & M) * R; d >>= 52;
a5185987
PW
238 VERIFY_BITS(c, 115);
239 VERIFY_BITS(d, 62);
71712b27 240 /* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
5dd421ba 241 r[1] = c & M; c >>= 52;
a5185987
PW
242 VERIFY_BITS(r[1], 52);
243 VERIFY_BITS(c, 63);
71712b27 244 /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
5dd421ba 245
4be8d6fc
GM
246 c += (uint128_t)a0 * a2
247 + (uint128_t)a1 * a1;
a5185987 248 VERIFY_BITS(c, 114);
71712b27 249 /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */
4be8d6fc 250 d += (uint128_t)a3 * a4;
a5185987 251 VERIFY_BITS(d, 114);
71712b27 252 /* [d 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 253 c += (d & M) * R; d >>= 52;
a5185987
PW
254 VERIFY_BITS(c, 115);
255 VERIFY_BITS(d, 62);
71712b27 256 /* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 257 r[2] = c & M; c >>= 52;
a5185987
PW
258 VERIFY_BITS(r[2], 52);
259 VERIFY_BITS(c, 63);
71712b27 260 /* [d 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba
PD
261
262 c += d * R + t3;;
a5185987 263 VERIFY_BITS(c, 100);
71712b27 264 /* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 265 r[3] = c & M; c >>= 52;
a5185987
PW
266 VERIFY_BITS(r[3], 52);
267 VERIFY_BITS(c, 48);
71712b27 268 /* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
5dd421ba 269 c += t4;
a5185987 270 VERIFY_BITS(c, 49);
71712b27
GM
271 /* [c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
272 r[4] = c;
a5185987 273 VERIFY_BITS(r[4], 49);
71712b27 274 /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
938d3c27 275}
7a4b7691
PW
276
277#endif
This page took 0.069984 seconds and 4 git commands to generate.