src/scalar_8x32_impl.h

   1 // Copyright (c) 2014 Pieter Wuille
   2 // Distributed under the MIT software license, see the accompanying
   3 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
   4
   5 #ifndef _SECP256K1_SCALAR_REPR_IMPL_H_
   6 #define _SECP256K1_SCALAR_REPR_IMPL_H_
   7
   8 // Limbs of the secp256k1 order.
   9 #define SECP256K1_N_0 ((uint32_t)0xD0364141UL)
  10 #define SECP256K1_N_1 ((uint32_t)0xBFD25E8CUL)
  11 #define SECP256K1_N_2 ((uint32_t)0xAF48A03BUL)
  12 #define SECP256K1_N_3 ((uint32_t)0xBAAEDCE6UL)
  13 #define SECP256K1_N_4 ((uint32_t)0xFFFFFFFEUL)
  14 #define SECP256K1_N_5 ((uint32_t)0xFFFFFFFFUL)
  15 #define SECP256K1_N_6 ((uint32_t)0xFFFFFFFFUL)
  16 #define SECP256K1_N_7 ((uint32_t)0xFFFFFFFFUL)
  17
  18 // Limbs of 2^256 minus the secp256k1 order.
  19 #define SECP256K1_N_C_0 (~SECP256K1_N_0 + 1)
  20 #define SECP256K1_N_C_1 (~SECP256K1_N_1)
  21 #define SECP256K1_N_C_2 (~SECP256K1_N_2)
  22 #define SECP256K1_N_C_3 (~SECP256K1_N_3)
  23 #define SECP256K1_N_C_4 (1)
  24
  25 // Limbs of half the secp256k1 order.
  26 #define SECP256K1_N_H_0 ((uint32_t)0x681B20A0UL)
  27 #define SECP256K1_N_H_1 ((uint32_t)0xDFE92F46UL)
  28 #define SECP256K1_N_H_2 ((uint32_t)0x57A4501DUL)
  29 #define SECP256K1_N_H_3 ((uint32_t)0x5D576E73UL)
  30 #define SECP256K1_N_H_4 ((uint32_t)0xFFFFFFFFUL)
  31 #define SECP256K1_N_H_5 ((uint32_t)0xFFFFFFFFUL)
  32 #define SECP256K1_N_H_6 ((uint32_t)0xFFFFFFFFUL)
  33 #define SECP256K1_N_H_7 ((uint32_t)0x7FFFFFFFUL)
  34
  35 void static inline secp256k1_scalar_clear(secp256k1_scalar_t *r) {
  36     r->d[0] = 0;
  37     r->d[1] = 0;
  38     r->d[2] = 0;
  39     r->d[3] = 0;
  40     r->d[4] = 0;
  41     r->d[5] = 0;
  42     r->d[6] = 0;
  43     r->d[7] = 0;
  44 }
  45
  46 int static inline secp256k1_scalar_get_bits(const secp256k1_scalar_t *a, int offset, int count) {
  47     VERIFY_CHECK((offset + count - 1) / 32 == offset / 32);
  48     return (a->d[offset / 32] >> (offset % 32)) & ((1 << count) - 1);
  49 }
  50
  51 int static inline secp256k1_scalar_check_overflow(const secp256k1_scalar_t *a) {
  52     int yes = 0;
  53     int no = 0;
  54     no |= (a->d[7] < SECP256K1_N_7); // No need for a > check.
  55     no |= (a->d[6] < SECP256K1_N_6); // No need for a > check.
  56     no |= (a->d[5] < SECP256K1_N_5); // No need for a > check.
  57     no |= (a->d[4] < SECP256K1_N_4);
  58     yes |= (a->d[4] > SECP256K1_N_4) & ~no;
  59     no |= (a->d[3] < SECP256K1_N_3) & ~yes;
  60     yes |= (a->d[3] > SECP256K1_N_3) & ~no;
  61     no |= (a->d[2] < SECP256K1_N_2) & ~yes;
  62     yes |= (a->d[2] > SECP256K1_N_2) & ~no;
  63     no |= (a->d[1] < SECP256K1_N_1) & ~yes;
  64     yes |= (a->d[1] > SECP256K1_N_1) & ~no;
  65     yes |= (a->d[0] >= SECP256K1_N_0) & ~no;
  66     return yes;
  67 }
  68
  69 int static inline secp256k1_scalar_reduce(secp256k1_scalar_t *r, uint32_t overflow) {
  70     VERIFY_CHECK(overflow <= 1);
  71     uint64_t t = (uint64_t)r->d[0] + overflow * SECP256K1_N_C_0;
  72     r->d[0] = t & 0xFFFFFFFFUL; t >>= 32;
  73     t += (uint64_t)r->d[1] + overflow * SECP256K1_N_C_1;
  74     r->d[1] = t & 0xFFFFFFFFUL; t >>= 32;
  75     t += (uint64_t)r->d[2] + overflow * SECP256K1_N_C_2;
  76     r->d[2] = t & 0xFFFFFFFFUL; t >>= 32;
  77     t += (uint64_t)r->d[3] + overflow * SECP256K1_N_C_3;
  78     r->d[3] = t & 0xFFFFFFFFUL; t >>= 32;
  79     t += (uint64_t)r->d[4] + overflow * SECP256K1_N_C_4;
  80     r->d[4] = t & 0xFFFFFFFFUL; t >>= 32;
  81     t += (uint64_t)r->d[5];
  82     r->d[5] = t & 0xFFFFFFFFUL; t >>= 32;
  83     t += (uint64_t)r->d[6];
  84     r->d[6] = t & 0xFFFFFFFFUL; t >>= 32;
  85     t += (uint64_t)r->d[7];
  86     r->d[7] = t & 0xFFFFFFFFUL;
  87     return overflow;
  88 }
  89
  90 void static secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
  91     uint64_t t = (uint64_t)a->d[0] + b->d[0];
  92     r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
  93     t += (uint64_t)a->d[1] + b->d[1];
  94     r->d[1] = t & 0xFFFFFFFFULL; t >>= 32;
  95     t += (uint64_t)a->d[2] + b->d[2];
  96     r->d[2] = t & 0xFFFFFFFFULL; t >>= 32;
  97     t += (uint64_t)a->d[3] + b->d[3];
  98     r->d[3] = t & 0xFFFFFFFFULL; t >>= 32;
  99     t += (uint64_t)a->d[4] + b->d[4];
 100     r->d[4] = t & 0xFFFFFFFFULL; t >>= 32;
 101     t += (uint64_t)a->d[5] + b->d[5];
 102     r->d[5] = t & 0xFFFFFFFFULL; t >>= 32;
 103     t += (uint64_t)a->d[6] + b->d[6];
 104     r->d[6] = t & 0xFFFFFFFFULL; t >>= 32;
 105     t += (uint64_t)a->d[7] + b->d[7];
 106     r->d[7] = t & 0xFFFFFFFFULL; t >>= 32;
 107     secp256k1_scalar_reduce(r, t + secp256k1_scalar_check_overflow(r));
 108 }
 109
 110 void static secp256k1_scalar_set_b32(secp256k1_scalar_t *r, const unsigned char *b32, int *overflow) {
 111     r->d[0] = (uint32_t)b32[31] | (uint32_t)b32[30] << 8 | (uint32_t)b32[29] << 16 | (uint32_t)b32[28] << 24;
 112     r->d[1] = (uint32_t)b32[27] | (uint32_t)b32[26] << 8 | (uint32_t)b32[25] << 16 | (uint32_t)b32[24] << 24;
 113     r->d[2] = (uint32_t)b32[23] | (uint32_t)b32[22] << 8 | (uint32_t)b32[21] << 16 | (uint32_t)b32[20] << 24;
 114     r->d[3] = (uint32_t)b32[19] | (uint32_t)b32[18] << 8 | (uint32_t)b32[17] << 16 | (uint32_t)b32[16] << 24;
 115     r->d[4] = (uint32_t)b32[15] | (uint32_t)b32[14] << 8 | (uint32_t)b32[13] << 16 | (uint32_t)b32[12] << 24;
 116     r->d[5] = (uint32_t)b32[11] | (uint32_t)b32[10] << 8 | (uint32_t)b32[9] << 16 | (uint32_t)b32[8] << 24;
 117     r->d[6] = (uint32_t)b32[7] | (uint32_t)b32[6] << 8 | (uint32_t)b32[5] << 16 | (uint32_t)b32[4] << 24;
 118     r->d[7] = (uint32_t)b32[3] | (uint32_t)b32[2] << 8 | (uint32_t)b32[1] << 16 | (uint32_t)b32[0] << 24;
 119     int over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r));
 120     if (overflow) {
 121         *overflow = over;
 122     }
 123 }
 124
 125 void static secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar_t* a) {
 126     bin[0] = a->d[7] >> 24; bin[1] = a->d[7] >> 16; bin[2] = a->d[7] >> 8; bin[3] = a->d[7];
 127     bin[4] = a->d[6] >> 24; bin[5] = a->d[6] >> 16; bin[6] = a->d[6] >> 8; bin[7] = a->d[6];
 128     bin[8] = a->d[5] >> 24; bin[9] = a->d[5] >> 16; bin[10] = a->d[5] >> 8; bin[11] = a->d[5];
 129     bin[12] = a->d[4] >> 24; bin[13] = a->d[4] >> 16; bin[14] = a->d[4] >> 8; bin[15] = a->d[4];
 130     bin[16] = a->d[3] >> 24; bin[17] = a->d[3] >> 16; bin[18] = a->d[3] >> 8; bin[19] = a->d[3];
 131     bin[20] = a->d[2] >> 24; bin[21] = a->d[2] >> 16; bin[22] = a->d[2] >> 8; bin[23] = a->d[2];
 132     bin[24] = a->d[1] >> 24; bin[25] = a->d[1] >> 16; bin[26] = a->d[1] >> 8; bin[27] = a->d[1];
 133     bin[28] = a->d[0] >> 24; bin[29] = a->d[0] >> 16; bin[30] = a->d[0] >> 8; bin[31] = a->d[0];
 134 }
 135
 136 int static inline secp256k1_scalar_is_zero(const secp256k1_scalar_t *a) {
 137     return (a->d[0] | a->d[1] | a->d[2] | a->d[3] | a->d[4] | a->d[5] | a->d[6] | a->d[7]) == 0;
 138 }
 139
 140 void static secp256k1_scalar_negate(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) {
 141     uint32_t nonzero = 0xFFFFFFFFUL * (secp256k1_scalar_is_zero(a) == 0);
 142     uint64_t t = (uint64_t)(~a->d[0]) + SECP256K1_N_0 + 1;
 143     r->d[0] = t & nonzero; t >>= 32;
 144     t += (uint64_t)(~a->d[1]) + SECP256K1_N_1;
 145     r->d[1] = t & nonzero; t >>= 32;
 146     t += (uint64_t)(~a->d[2]) + SECP256K1_N_2;
 147     r->d[2] = t & nonzero; t >>= 32;
 148     t += (uint64_t)(~a->d[3]) + SECP256K1_N_3;
 149     r->d[3] = t & nonzero; t >>= 32;
 150     t += (uint64_t)(~a->d[4]) + SECP256K1_N_4;
 151     r->d[4] = t & nonzero; t >>= 32;
 152     t += (uint64_t)(~a->d[5]) + SECP256K1_N_5;
 153     r->d[5] = t & nonzero; t >>= 32;
 154     t += (uint64_t)(~a->d[6]) + SECP256K1_N_6;
 155     r->d[6] = t & nonzero; t >>= 32;
 156     t += (uint64_t)(~a->d[7]) + SECP256K1_N_7;
 157     r->d[7] = t & nonzero;
 158 }
 159
 160 int static inline secp256k1_scalar_is_one(const secp256k1_scalar_t *a) {
 161     return ((a->d[0] ^ 1) | a->d[1] | a->d[2] | a->d[3] | a->d[4] | a->d[5] | a->d[6] | a->d[7]) == 0;
 162 }
 163
 164 int static secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
 165     int yes = 0;
 166     int no = 0;
 167     no |= (a->d[7] < SECP256K1_N_H_7);
 168     yes |= (a->d[7] > SECP256K1_N_H_7) & ~no;
 169     no |= (a->d[6] < SECP256K1_N_H_6) & ~yes; // No need for a > check.
 170     no |= (a->d[5] < SECP256K1_N_H_5) & ~yes; // No need for a > check.
 171     no |= (a->d[4] < SECP256K1_N_H_4) & ~yes; // No need for a > check.
 172     no |= (a->d[3] < SECP256K1_N_H_3) & ~yes;
 173     yes |= (a->d[3] > SECP256K1_N_H_3) & ~no;
 174     no |= (a->d[2] < SECP256K1_N_H_2) & ~yes;
 175     yes |= (a->d[2] > SECP256K1_N_H_2) & ~no;
 176     no |= (a->d[1] < SECP256K1_N_H_1) & ~yes;
 177     yes |= (a->d[1] > SECP256K1_N_H_1) & ~no;
 178     yes |= (a->d[0] > SECP256K1_N_H_0) & ~no;
 179     return yes;
 180 }
 181
 182 // Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c.
 183
 184 /** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
 185 #define muladd(a,b) { \
 186     uint32_t tl, th; \
 187     { \
 188         uint64_t t = (uint64_t)a * b; \
 189         th = t >> 32;         /* at most 0xFFFFFFFE */ \
 190         tl = t; \
 191     } \
 192     c0 += tl;                 /* overflow is handled on the next line */ \
 193     th += (c0 < tl) ? 1 : 0;  /* at most 0xFFFFFFFF */ \
 194     c1 += th;                 /* overflow is handled on the next line */ \
 195     c2 += (c1 < th) ? 1 : 0;  /* never overflows by contract (verified in the next line) */ \
 196     VERIFY_CHECK((c1 >= th) || (c2 != 0)); \
 197 }
 198
 199 /** Add a*b to the number defined by (c0,c1). c1 must never overflow. */
 200 #define muladd_fast(a,b) { \
 201     uint32_t tl, th; \
 202     { \
 203         uint64_t t = (uint64_t)a * b; \
 204         th = t >> 32;         /* at most 0xFFFFFFFE */ \
 205         tl = t; \
 206     } \
 207     c0 += tl;                 /* overflow is handled on the next line */ \
 208     th += (c0 < tl) ? 1 : 0;  /* at most 0xFFFFFFFF */ \
 209     c1 += th;                 /* never overflows by contract (verified in the next line) */ \
 210     VERIFY_CHECK(c1 >= th); \
 211 }
 212
 213 /** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
 214 #define muladd2(a,b) { \
 215     uint32_t tl, th; \
 216     { \
 217         uint64_t t = (uint64_t)a * b; \
 218         th = t >> 32;               /* at most 0xFFFFFFFE */ \
 219         tl = t; \
 220     } \
 221     uint32_t th2 = th + th;         /* at most 0xFFFFFFFE (in case th was 0x7FFFFFFF) */ \
 222     c2 += (th2 < th) ? 1 : 0;       /* never overflows by contract (verified the next line) */ \
 223     VERIFY_CHECK((th2 >= th) || (c2 != 0)); \
 224     uint32_t tl2 = tl + tl;         /* at most 0xFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFF) */ \
 225     th2 += (tl2 < tl) ? 1 : 0;      /* at most 0xFFFFFFFF */ \
 226     c0 += tl2;                      /* overflow is handled on the next line */ \
 227     th2 += (c0 < tl2) ? 1 : 0;      /* second overflow is handled on the next line */ \
 228     c2 += (c0 < tl2) & (th2 == 0);  /* never overflows by contract (verified the next line) */ \
 229     VERIFY_CHECK((c0 >= tl2) || (th2 != 0) || (c2 != 0)); \
 230     c1 += th2;                      /* overflow is handled on the next line */ \
 231     c2 += (c1 < th2) ? 1 : 0;       /* never overflows by contract (verified the next line) */ \
 232     VERIFY_CHECK((c1 >= th2) || (c2 != 0)); \
 233 }
 234
 235 /** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
 236 #define sumadd(a) { \
 237     c0 += (a);                  /* overflow is handled on the next line */ \
 238     int over = (c0 < (a)) ? 1 : 0; \
 239     c1 += over;                 /* overflow is handled on the next line */ \
 240     c2 += (c1 < over) ? 1 : 0;  /* never overflows by contract */ \
 241 }
 242
 243 /** Add a to the number defined by (c0,c1). c1 must never overflow, c2 must be zero. */
 244 #define sumadd_fast(a) { \
 245     c0 += (a);                 /* overflow is handled on the next line */ \
 246     c1 += (c0 < (a)) ? 1 : 0;  /* never overflows by contract (verified the next line) */ \
 247     VERIFY_CHECK((c1 != 0) | (c0 >= (a))); \
 248     VERIFY_CHECK(c2 == 0); \
 249 }
 250
 251 /** Extract the lowest 32 bits of (c0,c1,c2) into n, and left shift the number 32 bits. */
 252 #define extract(n) { \
 253     (n) = c0; \
 254     c0 = c1; \
 255     c1 = c2; \
 256     c2 = 0; \
 257 }
 258
 259 /** Extract the lowest 32 bits of (c0,c1,c2) into n, and left shift the number 32 bits. c2 is required to be zero. */
 260 #define extract_fast(n) { \
 261     (n) = c0; \
 262     c0 = c1; \
 263     c1 = 0; \
 264     VERIFY_CHECK(c2 == 0); \
 265 }
 266
 267 void static secp256k1_scalar_reduce_512(secp256k1_scalar_t *r, const uint32_t *l) {
 268     uint32_t n0 = l[8], n1 = l[9], n2 = l[10], n3 = l[11], n4 = l[12], n5 = l[13], n6 = l[14], n7 = l[15];
 269
 270     // 96 bit accumulator.
 271     uint32_t c0, c1, c2;
 272
 273     // Reduce 512 bits into 385.
 274     // m[0..12] = l[0..7] + n[0..7] * SECP256K1_N_C.
 275     c0 = l[0]; c1 = 0; c2 = 0;
 276     muladd_fast(n0, SECP256K1_N_C_0);
 277     uint32_t m0; extract_fast(m0);
 278     sumadd_fast(l[1]);
 279     muladd(n1, SECP256K1_N_C_0);
 280     muladd(n0, SECP256K1_N_C_1);
 281     uint32_t m1; extract(m1);
 282     sumadd(l[2]);
 283     muladd(n2, SECP256K1_N_C_0);
 284     muladd(n1, SECP256K1_N_C_1);
 285     muladd(n0, SECP256K1_N_C_2);
 286     uint32_t m2; extract(m2);
 287     sumadd(l[3]);
 288     muladd(n3, SECP256K1_N_C_0);
 289     muladd(n2, SECP256K1_N_C_1);
 290     muladd(n1, SECP256K1_N_C_2);
 291     muladd(n0, SECP256K1_N_C_3);
 292     uint32_t m3; extract(m3);
 293     sumadd(l[4]);
 294     muladd(n4, SECP256K1_N_C_0);
 295     muladd(n3, SECP256K1_N_C_1);
 296     muladd(n2, SECP256K1_N_C_2);
 297     muladd(n1, SECP256K1_N_C_3);
 298     sumadd(n0);
 299     uint32_t m4; extract(m4);
 300     sumadd(l[5]);
 301     muladd(n5, SECP256K1_N_C_0);
 302     muladd(n4, SECP256K1_N_C_1);
 303     muladd(n3, SECP256K1_N_C_2);
 304     muladd(n2, SECP256K1_N_C_3);
 305     sumadd(n1);
 306     uint32_t m5; extract(m5);
 307     sumadd(l[6]);
 308     muladd(n6, SECP256K1_N_C_0);
 309     muladd(n5, SECP256K1_N_C_1);
 310     muladd(n4, SECP256K1_N_C_2);
 311     muladd(n3, SECP256K1_N_C_3);
 312     sumadd(n2);
 313     uint32_t m6; extract(m6);
 314     sumadd(l[7]);
 315     muladd(n7, SECP256K1_N_C_0);
 316     muladd(n6, SECP256K1_N_C_1);
 317     muladd(n5, SECP256K1_N_C_2);
 318     muladd(n4, SECP256K1_N_C_3);
 319     sumadd(n3);
 320     uint32_t m7; extract(m7);
 321     muladd(n7, SECP256K1_N_C_1);
 322     muladd(n6, SECP256K1_N_C_2);
 323     muladd(n5, SECP256K1_N_C_3);
 324     sumadd(n4);
 325     uint32_t m8; extract(m8);
 326     muladd(n7, SECP256K1_N_C_2);
 327     muladd(n6, SECP256K1_N_C_3);
 328     sumadd(n5);
 329     uint32_t m9; extract(m9);
 330     muladd(n7, SECP256K1_N_C_3);
 331     sumadd(n6);
 332     uint32_t m10; extract(m10);
 333     sumadd_fast(n7);
 334     uint32_t m11; extract_fast(m11);
 335     VERIFY_CHECK(c0 <= 1);
 336     uint32_t m12 = c0;
 337
 338     // Reduce 385 bits into 258.
 339     // p[0..8] = m[0..7] + m[8..12] * SECP256K1_N_C.
 340     c0 = m0; c1 = 0; c2 = 0;
 341     muladd_fast(m8, SECP256K1_N_C_0);
 342     uint32_t p0; extract_fast(p0);
 343     sumadd_fast(m1);
 344     muladd(m9, SECP256K1_N_C_0);
 345     muladd(m8, SECP256K1_N_C_1);
 346     uint32_t p1; extract(p1);
 347     sumadd(m2);
 348     muladd(m10, SECP256K1_N_C_0);
 349     muladd(m9, SECP256K1_N_C_1);
 350     muladd(m8, SECP256K1_N_C_2);
 351     uint32_t p2; extract(p2);
 352     sumadd(m3);
 353     muladd(m11, SECP256K1_N_C_0);
 354     muladd(m10, SECP256K1_N_C_1);
 355     muladd(m9, SECP256K1_N_C_2);
 356     muladd(m8, SECP256K1_N_C_3);
 357     uint32_t p3; extract(p3);
 358     sumadd(m4);
 359     muladd(m12, SECP256K1_N_C_0);
 360     muladd(m11, SECP256K1_N_C_1);
 361     muladd(m10, SECP256K1_N_C_2);
 362     muladd(m9, SECP256K1_N_C_3);
 363     sumadd(m8);
 364     uint32_t p4; extract(p4);
 365     sumadd(m5);
 366     muladd(m12, SECP256K1_N_C_1);
 367     muladd(m11, SECP256K1_N_C_2);
 368     muladd(m10, SECP256K1_N_C_3);
 369     sumadd(m9);
 370     uint32_t p5; extract(p5);
 371     sumadd(m6);
 372     muladd(m12, SECP256K1_N_C_2);
 373     muladd(m11, SECP256K1_N_C_3);
 374     sumadd(m10);
 375     uint32_t p6; extract(p6);
 376     sumadd_fast(m7);
 377     muladd_fast(m12, SECP256K1_N_C_3);
 378     sumadd_fast(m11);
 379     uint32_t p7; extract_fast(p7);
 380     uint32_t p8 = c0 + m12;
 381     VERIFY_CHECK(p8 <= 2);
 382
 383     // Reduce 258 bits into 256.
 384     // r[0..7] = p[0..7] + p[8] * SECP256K1_N_C.
 385     uint64_t c = p0 + (uint64_t)SECP256K1_N_C_0 * p8;
 386     r->d[0] = c & 0xFFFFFFFFUL; c >>= 32;
 387     c += p1 + (uint64_t)SECP256K1_N_C_1 * p8;
 388     r->d[1] = c & 0xFFFFFFFFUL; c >>= 32;
 389     c += p2 + (uint64_t)SECP256K1_N_C_2 * p8;
 390     r->d[2] = c & 0xFFFFFFFFUL; c >>= 32;
 391     c += p3 + (uint64_t)SECP256K1_N_C_3 * p8;
 392     r->d[3] = c & 0xFFFFFFFFUL; c >>= 32;
 393     c += p4 + (uint64_t)p8;
 394     r->d[4] = c & 0xFFFFFFFFUL; c >>= 32;
 395     c += p5;
 396     r->d[5] = c & 0xFFFFFFFFUL; c >>= 32;
 397     c += p6;
 398     r->d[6] = c & 0xFFFFFFFFUL; c >>= 32;
 399     c += p7;
 400     r->d[7] = c & 0xFFFFFFFFUL; c >>= 32;
 401
 402     // Final reduction of r.
 403     secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r));
 404 }
 405
 406 void static secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
 407     // 96 bit accumulator.
 408     uint32_t c0 = 0, c1 = 0, c2 = 0;
 409
 410     uint32_t l[16];
 411
 412     // l[0..15] = a[0..7] * b[0..7].
 413     muladd_fast(a->d[0], b->d[0]);
 414     extract_fast(l[0]);
 415     muladd(a->d[0], b->d[1]);
 416     muladd(a->d[1], b->d[0]);
 417     extract(l[1]);
 418     muladd(a->d[0], b->d[2]);
 419     muladd(a->d[1], b->d[1]);
 420     muladd(a->d[2], b->d[0]);
 421     extract(l[2]);
 422     muladd(a->d[0], b->d[3]);
 423     muladd(a->d[1], b->d[2]);
 424     muladd(a->d[2], b->d[1]);
 425     muladd(a->d[3], b->d[0]);
 426     extract(l[3]);
 427     muladd(a->d[0], b->d[4]);
 428     muladd(a->d[1], b->d[3]);
 429     muladd(a->d[2], b->d[2]);
 430     muladd(a->d[3], b->d[1]);
 431     muladd(a->d[4], b->d[0]);
 432     extract(l[4]);
 433     muladd(a->d[0], b->d[5]);
 434     muladd(a->d[1], b->d[4]);
 435     muladd(a->d[2], b->d[3]);
 436     muladd(a->d[3], b->d[2]);
 437     muladd(a->d[4], b->d[1]);
 438     muladd(a->d[5], b->d[0]);
 439     extract(l[5]);
 440     muladd(a->d[0], b->d[6]);
 441     muladd(a->d[1], b->d[5]);
 442     muladd(a->d[2], b->d[4]);
 443     muladd(a->d[3], b->d[3]);
 444     muladd(a->d[4], b->d[2]);
 445     muladd(a->d[5], b->d[1]);
 446     muladd(a->d[6], b->d[0]);
 447     extract(l[6]);
 448     muladd(a->d[0], b->d[7]);
 449     muladd(a->d[1], b->d[6]);
 450     muladd(a->d[2], b->d[5]);
 451     muladd(a->d[3], b->d[4]);
 452     muladd(a->d[4], b->d[3]);
 453     muladd(a->d[5], b->d[2]);
 454     muladd(a->d[6], b->d[1]);
 455     muladd(a->d[7], b->d[0]);
 456     extract(l[7]);
 457     muladd(a->d[1], b->d[7]);
 458     muladd(a->d[2], b->d[6]);
 459     muladd(a->d[3], b->d[5]);
 460     muladd(a->d[4], b->d[4]);
 461     muladd(a->d[5], b->d[3]);
 462     muladd(a->d[6], b->d[2]);
 463     muladd(a->d[7], b->d[1]);
 464     extract(l[8]);
 465     muladd(a->d[2], b->d[7]);
 466     muladd(a->d[3], b->d[6]);
 467     muladd(a->d[4], b->d[5]);
 468     muladd(a->d[5], b->d[4]);
 469     muladd(a->d[6], b->d[3]);
 470     muladd(a->d[7], b->d[2]);
 471     extract(l[9]);
 472     muladd(a->d[3], b->d[7]);
 473     muladd(a->d[4], b->d[6]);
 474     muladd(a->d[5], b->d[5]);
 475     muladd(a->d[6], b->d[4]);
 476     muladd(a->d[7], b->d[3]);
 477     extract(l[10]);
 478     muladd(a->d[4], b->d[7]);
 479     muladd(a->d[5], b->d[6]);
 480     muladd(a->d[6], b->d[5]);
 481     muladd(a->d[7], b->d[4]);
 482     extract(l[11]);
 483     muladd(a->d[5], b->d[7]);
 484     muladd(a->d[6], b->d[6]);
 485     muladd(a->d[7], b->d[5]);
 486     extract(l[12]);
 487     muladd(a->d[6], b->d[7]);
 488     muladd(a->d[7], b->d[6]);
 489     extract(l[13]);
 490     muladd_fast(a->d[7], b->d[7]);
 491     extract_fast(l[14]);
 492     VERIFY_CHECK(c1 == 0);
 493     l[15] = c0;
 494
 495     secp256k1_scalar_reduce_512(r, l);
 496 }
 497
 498 void static secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) {
 499     // 96 bit accumulator.
 500     uint32_t c0 = 0, c1 = 0, c2 = 0;
 501
 502     uint32_t l[16];
 503
 504     // l[0..15] = a[0..7]^2.
 505     muladd_fast(a->d[0], a->d[0]);
 506     extract_fast(l[0]);
 507     muladd2(a->d[0], a->d[1]);
 508     extract(l[1]);
 509     muladd2(a->d[0], a->d[2]);
 510     muladd(a->d[1], a->d[1]);
 511     extract(l[2]);
 512     muladd2(a->d[0], a->d[3]);
 513     muladd2(a->d[1], a->d[2]);
 514     extract(l[3]);
 515     muladd2(a->d[0], a->d[4]);
 516     muladd2(a->d[1], a->d[3]);
 517     muladd(a->d[2], a->d[2]);
 518     extract(l[4]);
 519     muladd2(a->d[0], a->d[5]);
 520     muladd2(a->d[1], a->d[4]);
 521     muladd2(a->d[2], a->d[3]);
 522     extract(l[5]);
 523     muladd2(a->d[0], a->d[6]);
 524     muladd2(a->d[1], a->d[5]);
 525     muladd2(a->d[2], a->d[4]);
 526     muladd(a->d[3], a->d[3]);
 527     extract(l[6]);
 528     muladd2(a->d[0], a->d[7]);
 529     muladd2(a->d[1], a->d[6]);
 530     muladd2(a->d[2], a->d[5]);
 531     muladd2(a->d[3], a->d[4]);
 532     extract(l[7]);
 533     muladd2(a->d[1], a->d[7]);
 534     muladd2(a->d[2], a->d[6]);
 535     muladd2(a->d[3], a->d[5]);
 536     muladd(a->d[4], a->d[4]);
 537     extract(l[8]);
 538     muladd2(a->d[2], a->d[7]);
 539     muladd2(a->d[3], a->d[6]);
 540     muladd2(a->d[4], a->d[5]);
 541     extract(l[9]);
 542     muladd2(a->d[3], a->d[7]);
 543     muladd2(a->d[4], a->d[6]);
 544     muladd(a->d[5], a->d[5]);
 545     extract(l[10]);
 546     muladd2(a->d[4], a->d[7]);
 547     muladd2(a->d[5], a->d[6]);
 548     extract(l[11]);
 549     muladd2(a->d[5], a->d[7]);
 550     muladd(a->d[6], a->d[6]);
 551     extract(l[12]);
 552     muladd2(a->d[6], a->d[7]);
 553     extract(l[13]);
 554     muladd_fast(a->d[7], a->d[7]);
 555     extract_fast(l[14]);
 556     VERIFY_CHECK(c1 == 0);
 557     l[15] = c0;
 558
 559     secp256k1_scalar_reduce_512(r, l);
 560 }
 561
 562 #undef sumadd
 563 #undef sumadd_fast
 564 #undef muladd
 565 #undef muladd_fast
 566 #undef muladd2
 567 #undef extract
 568 #undef extract_fast
 569
 570 #endif