]>
Commit | Line | Data |
---|---|---|
cdec9cb5 DK |
1 | /* mpihelp-mul.c - MPI helper functions |
2 | * Copyright (C) 1994, 1996, 1998, 1999, | |
3 | * 2000 Free Software Foundation, Inc. | |
4 | * | |
5 | * This file is part of GnuPG. | |
6 | * | |
7 | * GnuPG is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * GnuPG is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA | |
20 | * | |
21 | * Note: This code is heavily based on the GNU MP Library. | |
22 | * Actually it's the same code with only minor changes in the | |
23 | * way the data is stored; this is to support the abstraction | |
24 | * of an optional secure memory allocation which may be used | |
25 | * to avoid revealing of sensitive data due to paging etc. | |
26 | * The GNU MP Library itself is published under the LGPL; | |
27 | * however I decided to publish this code under the plain GPL. | |
28 | */ | |
29 | ||
30 | #include <linux/string.h> | |
31 | #include "mpi-internal.h" | |
32 | #include "longlong.h" | |
33 | ||
34 | #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ | |
35 | do { \ | |
36 | if ((size) < KARATSUBA_THRESHOLD) \ | |
37 | mul_n_basecase(prodp, up, vp, size); \ | |
38 | else \ | |
39 | mul_n(prodp, up, vp, size, tspace); \ | |
40 | } while (0); | |
41 | ||
42 | #define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \ | |
43 | do { \ | |
44 | if ((size) < KARATSUBA_THRESHOLD) \ | |
45 | mpih_sqr_n_basecase(prodp, up, size); \ | |
46 | else \ | |
47 | mpih_sqr_n(prodp, up, size, tspace); \ | |
48 | } while (0); | |
49 | ||
50 | /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP), | |
51 | * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are | |
52 | * always stored. Return the most significant limb. | |
53 | * | |
54 | * Argument constraints: | |
55 | * 1. PRODP != UP and PRODP != VP, i.e. the destination | |
56 | * must be distinct from the multiplier and the multiplicand. | |
57 | * | |
58 | * | |
59 | * Handle simple cases with traditional multiplication. | |
60 | * | |
61 | * This is the most critical code of multiplication. All multiplies rely | |
62 | * on this, both small and huge. Small ones arrive here immediately. Huge | |
63 | * ones arrive here as this is the base case for Karatsuba's recursive | |
64 | * algorithm below. | |
65 | */ | |
66 | ||
67 | static mpi_limb_t | |
68 | mul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) | |
69 | { | |
70 | mpi_size_t i; | |
71 | mpi_limb_t cy; | |
72 | mpi_limb_t v_limb; | |
73 | ||
74 | /* Multiply by the first limb in V separately, as the result can be | |
75 | * stored (not added) to PROD. We also avoid a loop for zeroing. */ | |
76 | v_limb = vp[0]; | |
77 | if (v_limb <= 1) { | |
78 | if (v_limb == 1) | |
79 | MPN_COPY(prodp, up, size); | |
80 | else | |
81 | MPN_ZERO(prodp, size); | |
82 | cy = 0; | |
83 | } else | |
84 | cy = mpihelp_mul_1(prodp, up, size, v_limb); | |
85 | ||
86 | prodp[size] = cy; | |
87 | prodp++; | |
88 | ||
89 | /* For each iteration in the outer loop, multiply one limb from | |
90 | * U with one limb from V, and add it to PROD. */ | |
91 | for (i = 1; i < size; i++) { | |
92 | v_limb = vp[i]; | |
93 | if (v_limb <= 1) { | |
94 | cy = 0; | |
95 | if (v_limb == 1) | |
96 | cy = mpihelp_add_n(prodp, prodp, up, size); | |
97 | } else | |
98 | cy = mpihelp_addmul_1(prodp, up, size, v_limb); | |
99 | ||
100 | prodp[size] = cy; | |
101 | prodp++; | |
102 | } | |
103 | ||
104 | return cy; | |
105 | } | |
106 | ||
107 | static void | |
108 | mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, | |
109 | mpi_size_t size, mpi_ptr_t tspace) | |
110 | { | |
111 | if (size & 1) { | |
112 | /* The size is odd, and the code below doesn't handle that. | |
113 | * Multiply the least significant (size - 1) limbs with a recursive | |
114 | * call, and handle the most significant limb of S1 and S2 | |
115 | * separately. | |
116 | * A slightly faster way to do this would be to make the Karatsuba | |
117 | * code below behave as if the size were even, and let it check for | |
118 | * odd size in the end. I.e., in essence move this code to the end. | |
119 | * Doing so would save us a recursive call, and potentially make the | |
120 | * stack grow a lot less. | |
121 | */ | |
122 | mpi_size_t esize = size - 1; /* even size */ | |
123 | mpi_limb_t cy_limb; | |
124 | ||
125 | MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace); | |
126 | cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]); | |
127 | prodp[esize + esize] = cy_limb; | |
128 | cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]); | |
129 | prodp[esize + size] = cy_limb; | |
130 | } else { | |
131 | /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm. | |
132 | * | |
133 | * Split U in two pieces, U1 and U0, such that | |
134 | * U = U0 + U1*(B**n), | |
135 | * and V in V1 and V0, such that | |
136 | * V = V0 + V1*(B**n). | |
137 | * | |
138 | * UV is then computed recursively using the identity | |
139 | * | |
140 | * 2n n n n | |
141 | * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V | |
142 | * 1 1 1 0 0 1 0 0 | |
143 | * | |
144 | * Where B = 2**BITS_PER_MP_LIMB. | |
145 | */ | |
146 | mpi_size_t hsize = size >> 1; | |
147 | mpi_limb_t cy; | |
148 | int negflg; | |
149 | ||
150 | /* Product H. ________________ ________________ | |
151 | * |_____U1 x V1____||____U0 x V0_____| | |
152 | * Put result in upper part of PROD and pass low part of TSPACE | |
153 | * as new TSPACE. | |
154 | */ | |
155 | MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, | |
156 | tspace); | |
157 | ||
158 | /* Product M. ________________ | |
159 | * |_(U1-U0)(V0-V1)_| | |
160 | */ | |
161 | if (mpihelp_cmp(up + hsize, up, hsize) >= 0) { | |
162 | mpihelp_sub_n(prodp, up + hsize, up, hsize); | |
163 | negflg = 0; | |
164 | } else { | |
165 | mpihelp_sub_n(prodp, up, up + hsize, hsize); | |
166 | negflg = 1; | |
167 | } | |
168 | if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) { | |
169 | mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize); | |
170 | negflg ^= 1; | |
171 | } else { | |
172 | mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize); | |
173 | /* No change of NEGFLG. */ | |
174 | } | |
175 | /* Read temporary operands from low part of PROD. | |
176 | * Put result in low part of TSPACE using upper part of TSPACE | |
177 | * as new TSPACE. | |
178 | */ | |
179 | MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, | |
180 | tspace + size); | |
181 | ||
182 | /* Add/copy product H. */ | |
183 | MPN_COPY(prodp + hsize, prodp + size, hsize); | |
184 | cy = mpihelp_add_n(prodp + size, prodp + size, | |
185 | prodp + size + hsize, hsize); | |
186 | ||
187 | /* Add product M (if NEGFLG M is a negative number) */ | |
188 | if (negflg) | |
189 | cy -= | |
190 | mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, | |
191 | size); | |
192 | else | |
193 | cy += | |
194 | mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, | |
195 | size); | |
196 | ||
197 | /* Product L. ________________ ________________ | |
198 | * |________________||____U0 x V0_____| | |
199 | * Read temporary operands from low part of PROD. | |
200 | * Put result in low part of TSPACE using upper part of TSPACE | |
201 | * as new TSPACE. | |
202 | */ | |
203 | MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size); | |
204 | ||
205 | /* Add/copy Product L (twice) */ | |
206 | ||
207 | cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); | |
208 | if (cy) | |
209 | mpihelp_add_1(prodp + hsize + size, | |
210 | prodp + hsize + size, hsize, cy); | |
211 | ||
212 | MPN_COPY(prodp, tspace, hsize); | |
213 | cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, | |
214 | hsize); | |
215 | if (cy) | |
216 | mpihelp_add_1(prodp + size, prodp + size, size, 1); | |
217 | } | |
218 | } | |
219 | ||
220 | void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size) | |
221 | { | |
222 | mpi_size_t i; | |
223 | mpi_limb_t cy_limb; | |
224 | mpi_limb_t v_limb; | |
225 | ||
226 | /* Multiply by the first limb in V separately, as the result can be | |
227 | * stored (not added) to PROD. We also avoid a loop for zeroing. */ | |
228 | v_limb = up[0]; | |
229 | if (v_limb <= 1) { | |
230 | if (v_limb == 1) | |
231 | MPN_COPY(prodp, up, size); | |
232 | else | |
233 | MPN_ZERO(prodp, size); | |
234 | cy_limb = 0; | |
235 | } else | |
236 | cy_limb = mpihelp_mul_1(prodp, up, size, v_limb); | |
237 | ||
238 | prodp[size] = cy_limb; | |
239 | prodp++; | |
240 | ||
241 | /* For each iteration in the outer loop, multiply one limb from | |
242 | * U with one limb from V, and add it to PROD. */ | |
243 | for (i = 1; i < size; i++) { | |
244 | v_limb = up[i]; | |
245 | if (v_limb <= 1) { | |
246 | cy_limb = 0; | |
247 | if (v_limb == 1) | |
248 | cy_limb = mpihelp_add_n(prodp, prodp, up, size); | |
249 | } else | |
250 | cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb); | |
251 | ||
252 | prodp[size] = cy_limb; | |
253 | prodp++; | |
254 | } | |
255 | } | |
256 | ||
257 | void | |
258 | mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace) | |
259 | { | |
260 | if (size & 1) { | |
261 | /* The size is odd, and the code below doesn't handle that. | |
262 | * Multiply the least significant (size - 1) limbs with a recursive | |
263 | * call, and handle the most significant limb of S1 and S2 | |
264 | * separately. | |
265 | * A slightly faster way to do this would be to make the Karatsuba | |
266 | * code below behave as if the size were even, and let it check for | |
267 | * odd size in the end. I.e., in essence move this code to the end. | |
268 | * Doing so would save us a recursive call, and potentially make the | |
269 | * stack grow a lot less. | |
270 | */ | |
271 | mpi_size_t esize = size - 1; /* even size */ | |
272 | mpi_limb_t cy_limb; | |
273 | ||
274 | MPN_SQR_N_RECURSE(prodp, up, esize, tspace); | |
275 | cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]); | |
276 | prodp[esize + esize] = cy_limb; | |
277 | cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]); | |
278 | ||
279 | prodp[esize + size] = cy_limb; | |
280 | } else { | |
281 | mpi_size_t hsize = size >> 1; | |
282 | mpi_limb_t cy; | |
283 | ||
284 | /* Product H. ________________ ________________ | |
285 | * |_____U1 x U1____||____U0 x U0_____| | |
286 | * Put result in upper part of PROD and pass low part of TSPACE | |
287 | * as new TSPACE. | |
288 | */ | |
289 | MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace); | |
290 | ||
291 | /* Product M. ________________ | |
292 | * |_(U1-U0)(U0-U1)_| | |
293 | */ | |
294 | if (mpihelp_cmp(up + hsize, up, hsize) >= 0) | |
295 | mpihelp_sub_n(prodp, up + hsize, up, hsize); | |
296 | else | |
297 | mpihelp_sub_n(prodp, up, up + hsize, hsize); | |
298 | ||
299 | /* Read temporary operands from low part of PROD. | |
300 | * Put result in low part of TSPACE using upper part of TSPACE | |
301 | * as new TSPACE. */ | |
302 | MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size); | |
303 | ||
304 | /* Add/copy product H */ | |
305 | MPN_COPY(prodp + hsize, prodp + size, hsize); | |
306 | cy = mpihelp_add_n(prodp + size, prodp + size, | |
307 | prodp + size + hsize, hsize); | |
308 | ||
309 | /* Add product M (if NEGFLG M is a negative number). */ | |
310 | cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size); | |
311 | ||
312 | /* Product L. ________________ ________________ | |
313 | * |________________||____U0 x U0_____| | |
314 | * Read temporary operands from low part of PROD. | |
315 | * Put result in low part of TSPACE using upper part of TSPACE | |
316 | * as new TSPACE. */ | |
317 | MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size); | |
318 | ||
319 | /* Add/copy Product L (twice). */ | |
320 | cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); | |
321 | if (cy) | |
322 | mpihelp_add_1(prodp + hsize + size, | |
323 | prodp + hsize + size, hsize, cy); | |
324 | ||
325 | MPN_COPY(prodp, tspace, hsize); | |
326 | cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, | |
327 | hsize); | |
328 | if (cy) | |
329 | mpihelp_add_1(prodp + size, prodp + size, size, 1); | |
330 | } | |
331 | } | |
332 | ||
cdec9cb5 DK |
333 | int |
334 | mpihelp_mul_karatsuba_case(mpi_ptr_t prodp, | |
335 | mpi_ptr_t up, mpi_size_t usize, | |
336 | mpi_ptr_t vp, mpi_size_t vsize, | |
337 | struct karatsuba_ctx *ctx) | |
338 | { | |
339 | mpi_limb_t cy; | |
340 | ||
341 | if (!ctx->tspace || ctx->tspace_size < vsize) { | |
342 | if (ctx->tspace) | |
343 | mpi_free_limb_space(ctx->tspace); | |
344 | ctx->tspace = mpi_alloc_limb_space(2 * vsize); | |
345 | if (!ctx->tspace) | |
346 | return -ENOMEM; | |
347 | ctx->tspace_size = vsize; | |
348 | } | |
349 | ||
350 | MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace); | |
351 | ||
352 | prodp += vsize; | |
353 | up += vsize; | |
354 | usize -= vsize; | |
355 | if (usize >= vsize) { | |
356 | if (!ctx->tp || ctx->tp_size < vsize) { | |
357 | if (ctx->tp) | |
358 | mpi_free_limb_space(ctx->tp); | |
359 | ctx->tp = mpi_alloc_limb_space(2 * vsize); | |
360 | if (!ctx->tp) { | |
361 | if (ctx->tspace) | |
362 | mpi_free_limb_space(ctx->tspace); | |
363 | ctx->tspace = NULL; | |
364 | return -ENOMEM; | |
365 | } | |
366 | ctx->tp_size = vsize; | |
367 | } | |
368 | ||
369 | do { | |
370 | MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace); | |
371 | cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize); | |
372 | mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize, | |
373 | cy); | |
374 | prodp += vsize; | |
375 | up += vsize; | |
376 | usize -= vsize; | |
377 | } while (usize >= vsize); | |
378 | } | |
379 | ||
380 | if (usize) { | |
381 | if (usize < KARATSUBA_THRESHOLD) { | |
382 | mpi_limb_t tmp; | |
383 | if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp) | |
384 | < 0) | |
385 | return -ENOMEM; | |
386 | } else { | |
387 | if (!ctx->next) { | |
388 | ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL); | |
389 | if (!ctx->next) | |
390 | return -ENOMEM; | |
391 | } | |
392 | if (mpihelp_mul_karatsuba_case(ctx->tspace, | |
393 | vp, vsize, | |
394 | up, usize, | |
395 | ctx->next) < 0) | |
396 | return -ENOMEM; | |
397 | } | |
398 | ||
399 | cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize); | |
400 | mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy); | |
401 | } | |
402 | ||
403 | return 0; | |
404 | } | |
405 | ||
406 | void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx) | |
407 | { | |
408 | struct karatsuba_ctx *ctx2; | |
409 | ||
410 | if (ctx->tp) | |
411 | mpi_free_limb_space(ctx->tp); | |
412 | if (ctx->tspace) | |
413 | mpi_free_limb_space(ctx->tspace); | |
414 | for (ctx = ctx->next; ctx; ctx = ctx2) { | |
415 | ctx2 = ctx->next; | |
416 | if (ctx->tp) | |
417 | mpi_free_limb_space(ctx->tp); | |
418 | if (ctx->tspace) | |
419 | mpi_free_limb_space(ctx->tspace); | |
420 | kfree(ctx); | |
421 | } | |
422 | } | |
423 | ||
424 | /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) | |
425 | * and v (pointed to by VP, with VSIZE limbs), and store the result at | |
426 | * PRODP. USIZE + VSIZE limbs are always stored, but if the input | |
427 | * operands are normalized. Return the most significant limb of the | |
428 | * result. | |
429 | * | |
430 | * NOTE: The space pointed to by PRODP is overwritten before finished | |
431 | * with U and V, so overlap is an error. | |
432 | * | |
433 | * Argument constraints: | |
434 | * 1. USIZE >= VSIZE. | |
435 | * 2. PRODP != UP and PRODP != VP, i.e. the destination | |
436 | * must be distinct from the multiplier and the multiplicand. | |
437 | */ | |
438 | ||
439 | int | |
440 | mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, | |
441 | mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result) | |
442 | { | |
443 | mpi_ptr_t prod_endp = prodp + usize + vsize - 1; | |
444 | mpi_limb_t cy; | |
445 | struct karatsuba_ctx ctx; | |
446 | ||
447 | if (vsize < KARATSUBA_THRESHOLD) { | |
448 | mpi_size_t i; | |
449 | mpi_limb_t v_limb; | |
450 | ||
451 | if (!vsize) { | |
452 | *_result = 0; | |
453 | return 0; | |
454 | } | |
455 | ||
456 | /* Multiply by the first limb in V separately, as the result can be | |
457 | * stored (not added) to PROD. We also avoid a loop for zeroing. */ | |
458 | v_limb = vp[0]; | |
459 | if (v_limb <= 1) { | |
460 | if (v_limb == 1) | |
461 | MPN_COPY(prodp, up, usize); | |
462 | else | |
463 | MPN_ZERO(prodp, usize); | |
464 | cy = 0; | |
465 | } else | |
466 | cy = mpihelp_mul_1(prodp, up, usize, v_limb); | |
467 | ||
468 | prodp[usize] = cy; | |
469 | prodp++; | |
470 | ||
471 | /* For each iteration in the outer loop, multiply one limb from | |
472 | * U with one limb from V, and add it to PROD. */ | |
473 | for (i = 1; i < vsize; i++) { | |
474 | v_limb = vp[i]; | |
475 | if (v_limb <= 1) { | |
476 | cy = 0; | |
477 | if (v_limb == 1) | |
478 | cy = mpihelp_add_n(prodp, prodp, up, | |
479 | usize); | |
480 | } else | |
481 | cy = mpihelp_addmul_1(prodp, up, usize, v_limb); | |
482 | ||
483 | prodp[usize] = cy; | |
484 | prodp++; | |
485 | } | |
486 | ||
487 | *_result = cy; | |
488 | return 0; | |
489 | } | |
490 | ||
491 | memset(&ctx, 0, sizeof ctx); | |
492 | if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0) | |
493 | return -ENOMEM; | |
494 | mpihelp_release_karatsuba_ctx(&ctx); | |
495 | *_result = *prod_endp; | |
496 | return 0; | |
497 | } |