Implement endomorphism optimization for secp256k1_ecmult_const
[secp256k1.git] / src / ecmult_impl.h
CommitLineData
71712b27
GM
1/**********************************************************************
2 * Copyright (c) 2013, 2014 Pieter Wuille *
3 * Distributed under the MIT software license, see the accompanying *
4 * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
5 **********************************************************************/
0a433ea2 6
7a4b7691
PW
7#ifndef _SECP256K1_ECMULT_IMPL_H_
8#define _SECP256K1_ECMULT_IMPL_H_
9
11ab5622 10#include "group.h"
0b730597 11#include "scalar.h"
11ab5622 12#include "ecmult.h"
607884fc 13
71712b27 14/* optimal for 128-bit and 256-bit exponents. */
607884fc
PW
15#define WINDOW_A 5
16
71712b27 17/** larger numbers may result in slightly better performance, at the cost of
3ce74b12 18 exponentially larger precomputed tables. */
665775b2 19#ifdef USE_ENDOMORPHISM
3ce74b12 20/** Two tables for window size 15: 1.375 MiB. */
665775b2 21#define WINDOW_G 15
3ce74b12
PW
22#else
23/** One table for window size 16: 1.375 MiB. */
24#define WINDOW_G 16
665775b2 25#endif
607884fc 26
4f9791ab
PD
27/** The number of entries a table with precomputed multiples needs to have. */
28#define ECMULT_TABLE_SIZE(w) (1 << ((w)-2))
29
30/** Fill a table 'prej' with precomputed odd multiples of a. Prej will contain
31 * the values [1*a,3*a,...,(2*n-1)*a], so it space for n values. zr[0] will
32 * contain prej[0].z / a.z. The other zr[i] values = prej[i].z / prej[i-1].z.
2d5a186c 33 * Prej's Z values are undefined, except for the last value.
b1483f87 34 */
4f9791ab 35static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej_t *prej, secp256k1_fe_t *zr, const secp256k1_gej_t *a) {
f735446c 36 secp256k1_gej_t d;
2d5a186c 37 secp256k1_ge_t a_ge, d_ge;
f735446c 38 int i;
4f9791ab
PD
39
40 VERIFY_CHECK(!a->infinity);
41
2d5a186c
PD
42 secp256k1_gej_double_var(&d, a, NULL);
43
44 /*
45 * Perform the additions on an isomorphism where 'd' is affine: drop the z coordinate
46 * of 'd', and scale the 1P starting value's x/y coordinates without changing its z.
47 */
48 d_ge.x = d.x;
49 d_ge.y = d.y;
50 d_ge.infinity = 0;
51
52 secp256k1_ge_set_gej_zinv(&a_ge, a, &d.z);
53 prej[0].x = a_ge.x;
54 prej[0].y = a_ge.y;
55 prej[0].z = a->z;
56 prej[0].infinity = 0;
57
58 zr[0] = d.z;
4f9791ab 59 for (i = 1; i < n; i++) {
2d5a186c 60 secp256k1_gej_add_ge_var(&prej[i], &prej[i-1], &d_ge, &zr[i]);
26320197 61 }
2d5a186c
PD
62
63 /*
64 * Each point in 'prej' has a z coordinate too small by a factor of 'd.z'. Only
65 * the final point's z coordinate is actually used though, so just update that.
66 */
67 secp256k1_fe_mul(&prej[n-1].z, &prej[n-1].z, &d.z);
b1483f87 68}
f11ff5be 69
4f9791ab
PD
70/** Fill a table 'pre' with precomputed odd multiples of a.
71 *
72 * There are two versions of this function:
73 * - secp256k1_ecmult_odd_multiples_table_globalz_windowa which brings its
74 * resulting point set to a single constant Z denominator, stores the X and Y
75 * coordinates as ge_storage points in pre, and stores the global Z in rz.
76 * It only operates on tables sized for WINDOW_A wnaf multiples.
77 * - secp256k1_ecmult_odd_multiples_table_storage_var, which converts its
78 * resulting point set to actually affine points, and stores those in pre.
79 * It operates on tables of any size, but uses heap-allocated temporaries.
80 *
81 * To compute a*P + b*G, we compute a table for P using the first function,
82 * and for G using the second (which requires an inverse, but it only needs to
83 * happen once).
84 */
85static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge_t *pre, secp256k1_fe_t *globalz, const secp256k1_gej_t *a) {
86 secp256k1_gej_t prej[ECMULT_TABLE_SIZE(WINDOW_A)];
87 secp256k1_fe_t zr[ECMULT_TABLE_SIZE(WINDOW_A)];
88
89 /* Compute the odd multiples in Jacobian form. */
90 secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), prej, zr, a);
91 /* Bring them to the same Z denominator. */
92 secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A), pre, globalz, prej, zr);
93}
94
995c5487
PW
95static void secp256k1_ecmult_odd_multiples_table_storage_var(int n, secp256k1_ge_storage_t *pre, const secp256k1_gej_t *a, const callback_t *cb) {
96 secp256k1_gej_t *prej = (secp256k1_gej_t*)checked_malloc(cb, sizeof(secp256k1_gej_t) * n);
97 secp256k1_ge_t *prea = (secp256k1_ge_t*)checked_malloc(cb, sizeof(secp256k1_ge_t) * n);
98 secp256k1_fe_t *zr = (secp256k1_fe_t*)checked_malloc(cb, sizeof(secp256k1_fe_t) * n);
f735446c 99 int i;
4f9791ab
PD
100
101 /* Compute the odd multiples in Jacobian form. */
102 secp256k1_ecmult_odd_multiples_table(n, prej, zr, a);
103 /* Convert them in batch to affine coordinates. */
104 secp256k1_ge_set_table_gej_var(n, prea, prej, zr);
105 /* Convert them to compact storage form. */
106 for (i = 0; i < n; i++) {
41f84554
PW
107 secp256k1_ge_to_storage(&pre[i], &prea[i]);
108 }
4f9791ab 109
41f84554 110 free(prea);
4f9791ab
PD
111 free(prej);
112 free(zr);
b1483f87 113}
f11ff5be 114
b1483f87
PW
115/** The following two macro retrieves a particular odd multiple from a table
116 * of precomputed multiples. */
4f9791ab 117#define ECMULT_TABLE_GET_GE(r,pre,n,w) do { \
1c7fa133
PW
118 VERIFY_CHECK(((n) & 1) == 1); \
119 VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
120 VERIFY_CHECK((n) <= ((1 << ((w)-1)) - 1)); \
26320197 121 if ((n) > 0) { \
b1483f87 122 *(r) = (pre)[((n)-1)/2]; \
26320197 123 } else { \
4f9791ab 124 secp256k1_ge_neg((r), &(pre)[(-(n)-1)/2]); \
26320197 125 } \
41f84554 126} while(0)
4f9791ab 127
41f84554
PW
128#define ECMULT_TABLE_GET_GE_STORAGE(r,pre,n,w) do { \
129 VERIFY_CHECK(((n) & 1) == 1); \
130 VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
131 VERIFY_CHECK((n) <= ((1 << ((w)-1)) - 1)); \
26320197 132 if ((n) > 0) { \
41f84554 133 secp256k1_ge_from_storage((r), &(pre)[((n)-1)/2]); \
26320197 134 } else { \
41f84554
PW
135 secp256k1_ge_from_storage((r), &(pre)[(-(n)-1)/2]); \
136 secp256k1_ge_neg((r), (r)); \
137 } \
b1483f87 138} while(0)
b1483f87 139
a9b6595e
PW
140static void secp256k1_ecmult_context_init(secp256k1_ecmult_context_t *ctx) {
141 ctx->pre_g = NULL;
665775b2 142#ifdef USE_ENDOMORPHISM
a9b6595e 143 ctx->pre_g_128 = NULL;
665775b2 144#endif
a9b6595e 145}
b1483f87 146
995c5487 147static void secp256k1_ecmult_context_build(secp256k1_ecmult_context_t *ctx, const callback_t *cb) {
f735446c 148 secp256k1_gej_t gj;
a9b6595e
PW
149
150 if (ctx->pre_g != NULL) {
b1483f87 151 return;
26320197 152 }
b1483f87 153
71712b27 154 /* get the generator */
f735446c 155 secp256k1_gej_set_ge(&gj, &secp256k1_ge_const_g);
b1483f87 156
995c5487 157 ctx->pre_g = (secp256k1_ge_storage_t (*)[])checked_malloc(cb, sizeof((*ctx->pre_g)[0]) * ECMULT_TABLE_SIZE(WINDOW_G));
b1483f87 158
71712b27 159 /* precompute the tables with odd multiples */
995c5487 160 secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g, &gj, cb);
f735446c 161
665775b2 162#ifdef USE_ENDOMORPHISM
f735446c
GM
163 {
164 secp256k1_gej_t g_128j;
165 int i;
a9b6595e 166
995c5487 167 ctx->pre_g_128 = (secp256k1_ge_storage_t (*)[])checked_malloc(cb, sizeof((*ctx->pre_g_128)[0]) * ECMULT_TABLE_SIZE(WINDOW_G));
a9b6595e 168
f735446c
GM
169 /* calculate 2^128*generator */
170 g_128j = gj;
26320197 171 for (i = 0; i < 128; i++) {
4f9791ab 172 secp256k1_gej_double_var(&g_128j, &g_128j, NULL);
26320197 173 }
995c5487 174 secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g_128, &g_128j, cb);
f735446c 175 }
665775b2 176#endif
04e34d18
PW
177}
178
d899b5b6 179static void secp256k1_ecmult_context_clone(secp256k1_ecmult_context_t *dst,
995c5487 180 const secp256k1_ecmult_context_t *src, const callback_t *cb) {
d899b5b6
AP
181 if (src->pre_g == NULL) {
182 dst->pre_g = NULL;
183 } else {
184 size_t size = sizeof((*dst->pre_g)[0]) * ECMULT_TABLE_SIZE(WINDOW_G);
995c5487 185 dst->pre_g = (secp256k1_ge_storage_t (*)[])checked_malloc(cb, size);
d899b5b6
AP
186 memcpy(dst->pre_g, src->pre_g, size);
187 }
188#ifdef USE_ENDOMORPHISM
189 if (src->pre_g_128 == NULL) {
190 dst->pre_g_128 = NULL;
191 } else {
192 size_t size = sizeof((*dst->pre_g_128)[0]) * ECMULT_TABLE_SIZE(WINDOW_G);
995c5487 193 dst->pre_g_128 = (secp256k1_ge_storage_t (*)[])checked_malloc(cb, size);
d899b5b6
AP
194 memcpy(dst->pre_g_128, src->pre_g_128, size);
195 }
196#endif
197}
198
a9b6595e
PW
199static int secp256k1_ecmult_context_is_built(const secp256k1_ecmult_context_t *ctx) {
200 return ctx->pre_g != NULL;
201}
607884fc 202
a9b6595e
PW
203static void secp256k1_ecmult_context_clear(secp256k1_ecmult_context_t *ctx) {
204 free(ctx->pre_g);
205#ifdef USE_ENDOMORPHISM
206 free(ctx->pre_g_128);
207#endif
208 secp256k1_ecmult_context_init(ctx);
b1483f87 209}
607884fc 210
b1483f87
PW
211/** Convert a number to WNAF notation. The number becomes represented by sum(2^i * wnaf[i], i=0..bits),
212 * with the following guarantees:
213 * - each wnaf[i] is either 0, or an odd integer between -(1<<(w-1) - 1) and (1<<(w-1) - 1)
214 * - two non-zero entries in wnaf are separated by at least w-1 zeroes.
0b730597 215 * - the number of set values in wnaf is returned. This number is at most 256, and at most one more
55399c23 216 * than the number of bits in the (absolute value) of the input.
b1483f87 217 */
55399c23 218static int secp256k1_ecmult_wnaf(int *wnaf, int len, const secp256k1_scalar_t *a, int w) {
f24041d6 219 secp256k1_scalar_t s = *a;
55399c23 220 int last_set_bit = -1;
f735446c 221 int bit = 0;
f24041d6 222 int sign = 1;
145cc6ea 223 int carry = 0;
f735446c 224
55399c23
PD
225 VERIFY_CHECK(wnaf != NULL);
226 VERIFY_CHECK(0 <= len && len <= 256);
227 VERIFY_CHECK(a != NULL);
228 VERIFY_CHECK(2 <= w && w <= 31);
229
230 memset(wnaf, 0, len * sizeof(wnaf[0]));
231
0b730597
PW
232 if (secp256k1_scalar_get_bits(&s, 255, 1)) {
233 secp256k1_scalar_negate(&s, &s);
f24041d6 234 sign = -1;
0b730597
PW
235 }
236
55399c23 237 while (bit < len) {
f735446c
GM
238 int now;
239 int word;
145cc6ea 240 if (secp256k1_scalar_get_bits(&s, bit, 1) == (unsigned int)carry) {
0b730597
PW
241 bit++;
242 continue;
243 }
145cc6ea 244
f735446c 245 now = w;
55399c23
PD
246 if (now > len - bit) {
247 now = len - bit;
607884fc 248 }
145cc6ea
PD
249
250 word = secp256k1_scalar_get_bits_var(&s, bit, now) + carry;
251
252 carry = (word >> (w-1)) & 1;
253 word -= carry << w;
254
55399c23
PD
255 wnaf[bit] = sign * word;
256 last_set_bit = bit;
257
0b730597 258 bit += now;
607884fc 259 }
55399c23
PD
260#ifdef VERIFY
261 CHECK(carry == 0);
262 while (bit < 256) {
263 CHECK(secp256k1_scalar_get_bits(&s, bit++, 1) == 0);
264 }
265#endif
266 return last_set_bit + 1;
607884fc
PW
267}
268
a9b6595e 269static void secp256k1_ecmult(const secp256k1_ecmult_context_t *ctx, secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_scalar_t *na, const secp256k1_scalar_t *ng) {
4f9791ab 270 secp256k1_ge_t pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
f735446c 271 secp256k1_ge_t tmpa;
4f9791ab 272 secp256k1_fe_t Z;
399c03f2 273#ifdef USE_ENDOMORPHISM
4f9791ab 274 secp256k1_ge_t pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
f24041d6 275 secp256k1_scalar_t na_1, na_lam;
f735446c
GM
276 /* Splitted G factors. */
277 secp256k1_scalar_t ng_1, ng_128;
278 int wnaf_na_1[130];
279 int wnaf_na_lam[130];
280 int bits_na_1;
281 int bits_na_lam;
282 int wnaf_ng_1[129];
283 int bits_ng_1;
284 int wnaf_ng_128[129];
285 int bits_ng_128;
286#else
287 int wnaf_na[256];
288 int bits_na;
55399c23 289 int wnaf_ng[256];
f735446c
GM
290 int bits_ng;
291#endif
292 int i;
293 int bits;
294
295#ifdef USE_ENDOMORPHISM
71712b27 296 /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */
ed35d43a 297 secp256k1_scalar_split_lambda(&na_1, &na_lam, na);
b1483f87 298
71712b27 299 /* build wnaf representation for na_1 and na_lam. */
55399c23
PD
300 bits_na_1 = secp256k1_ecmult_wnaf(wnaf_na_1, 130, &na_1, WINDOW_A);
301 bits_na_lam = secp256k1_ecmult_wnaf(wnaf_na_lam, 130, &na_lam, WINDOW_A);
c35ff1ea
PW
302 VERIFY_CHECK(bits_na_1 <= 130);
303 VERIFY_CHECK(bits_na_lam <= 130);
f735446c 304 bits = bits_na_1;
26320197
GM
305 if (bits_na_lam > bits) {
306 bits = bits_na_lam;
307 }
399c03f2 308#else
71712b27 309 /* build wnaf representation for na. */
55399c23 310 bits_na = secp256k1_ecmult_wnaf(wnaf_na, 256, na, WINDOW_A);
f735446c 311 bits = bits_na;
399c03f2 312#endif
b1483f87 313
4f9791ab
PD
314 /* Calculate odd multiples of a.
315 * All multiples are brought to the same Z 'denominator', which is stored
316 * in Z. Due to secp256k1' isomorphism we can do all operations pretending
317 * that the Z coordinate was 1, use affine addition formulae, and correct
318 * the Z coordinate of the result once at the end.
319 * The exception is the precomputed G table points, which are actually
320 * affine. Compared to the base used for other points, they have a Z ratio
321 * of 1/Z, so we can use secp256k1_gej_add_zinv_var, which uses the same
322 * isomorphism to efficiently add with a known Z inverse.
323 */
324 secp256k1_ecmult_odd_multiples_table_globalz_windowa(pre_a, &Z, a);
399c03f2 325
d7fd4d0f 326#ifdef USE_ENDOMORPHISM
26320197 327 for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
4f9791ab 328 secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]);
26320197 329 }
d7fd4d0f 330
71712b27 331 /* split ng into ng_1 and ng_128 (where gn = gn_1 + gn_128*2^128, and gn_1 and gn_128 are ~128 bit) */
f24041d6 332 secp256k1_scalar_split_128(&ng_1, &ng_128, ng);
399c03f2 333
71712b27 334 /* Build wnaf representation for ng_1 and ng_128 */
55399c23
PD
335 bits_ng_1 = secp256k1_ecmult_wnaf(wnaf_ng_1, 129, &ng_1, WINDOW_G);
336 bits_ng_128 = secp256k1_ecmult_wnaf(wnaf_ng_128, 129, &ng_128, WINDOW_G);
26320197
GM
337 if (bits_ng_1 > bits) {
338 bits = bits_ng_1;
339 }
340 if (bits_ng_128 > bits) {
341 bits = bits_ng_128;
342 }
665775b2 343#else
55399c23 344 bits_ng = secp256k1_ecmult_wnaf(wnaf_ng, 256, ng, WINDOW_G);
26320197
GM
345 if (bits_ng > bits) {
346 bits = bits_ng;
347 }
665775b2 348#endif
b1483f87
PW
349
350 secp256k1_gej_set_infinity(r);
607884fc 351
4f9791ab 352 for (i = bits - 1; i >= 0; i--) {
b1483f87 353 int n;
4f9791ab 354 secp256k1_gej_double_var(r, r, NULL);
399c03f2 355#ifdef USE_ENDOMORPHISM
b1483f87 356 if (i < bits_na_1 && (n = wnaf_na_1[i])) {
4f9791ab 357 ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
2d5a186c 358 secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
607884fc 359 }
b1483f87 360 if (i < bits_na_lam && (n = wnaf_na_lam[i])) {
4f9791ab 361 ECMULT_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A);
2d5a186c 362 secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
607884fc 363 }
b1483f87 364 if (i < bits_ng_1 && (n = wnaf_ng_1[i])) {
a9b6595e 365 ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G);
4f9791ab 366 secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z);
607884fc 367 }
b1483f87 368 if (i < bits_ng_128 && (n = wnaf_ng_128[i])) {
a9b6595e 369 ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g_128, n, WINDOW_G);
4f9791ab 370 secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z);
607884fc 371 }
665775b2
PW
372#else
373 if (i < bits_na && (n = wnaf_na[i])) {
4f9791ab 374 ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
2d5a186c 375 secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
665775b2
PW
376 }
377 if (i < bits_ng && (n = wnaf_ng[i])) {
a9b6595e 378 ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G);
4f9791ab 379 secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z);
665775b2
PW
380 }
381#endif
607884fc 382 }
4f9791ab
PD
383
384 if (!r->infinity) {
385 secp256k1_fe_mul(&r->z, &r->z, &Z);
386 }
607884fc 387}
7a4b7691
PW
388
389#endif
This page took 0.091804 seconds and 4 git commands to generate.