1 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
2  * All rights reserved.
3  *
4  * This package is an SSL implementation written
5  * by Eric Young (eay@cryptsoft.com).
6  * The implementation was written so as to conform with Netscapes SSL.
7  *
8  * This library is free for commercial and non-commercial use as long as
9  * the following conditions are aheared to.  The following conditions
10  * apply to all code found in this distribution, be it the RC4, RSA,
11  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
12  * included with this distribution is covered by the same copyright terms
13  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
14  *
15  * Copyright remains Eric Young's, and as such any Copyright notices in
16  * the code are not to be removed.
17  * If this package is used in a product, Eric Young should be given attribution
18  * as the author of the parts of the library used.
19  * This can be in the form of a textual message at program startup or
20  * in documentation (online or textual) provided with the package.
21  *
22  * Redistribution and use in source and binary forms, with or without
23  * modification, are permitted provided that the following conditions
24  * are met:
25  * 1. Redistributions of source code must retain the copyright
26  *    notice, this list of conditions and the following disclaimer.
27  * 2. Redistributions in binary form must reproduce the above copyright
28  *    notice, this list of conditions and the following disclaimer in the
29  *    documentation and/or other materials provided with the distribution.
30  * 3. All advertising materials mentioning features or use of this software
31  *    must display the following acknowledgement:
32  *    "This product includes cryptographic software written by
33  *     Eric Young (eay@cryptsoft.com)"
34  *    The word 'cryptographic' can be left out if the rouines from the library
35  *    being used are not cryptographic related :-).
36  * 4. If you include any Windows specific code (or a derivative thereof) from
37  *    the apps directory (application code) you must include an acknowledgement:
38  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
39  *
40  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  *
52  * The licence and distribution terms for any publically available version or
53  * derivative of this code cannot be changed.  i.e. this code cannot simply be
54  * copied and put under another distribution licence
55  * [including the GNU Public Licence.]
56  */
57 /* ====================================================================
58  * Copyright (c) 1998-2006 The OpenSSL Project.  All rights reserved.
59  *
60  * Redistribution and use in source and binary forms, with or without
61  * modification, are permitted provided that the following conditions
62  * are met:
63  *
64  * 1. Redistributions of source code must retain the above copyright
65  *    notice, this list of conditions and the following disclaimer.
66  *
67  * 2. Redistributions in binary form must reproduce the above copyright
68  *    notice, this list of conditions and the following disclaimer in
69  *    the documentation and/or other materials provided with the
70  *    distribution.
71  *
72  * 3. All advertising materials mentioning features or use of this
73  *    software must display the following acknowledgment:
74  *    "This product includes software developed by the OpenSSL Project
75  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
76  *
77  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
78  *    endorse or promote products derived from this software without
79  *    prior written permission. For written permission, please contact
80  *    openssl-core@openssl.org.
81  *
82  * 5. Products derived from this software may not be called "OpenSSL"
83  *    nor may "OpenSSL" appear in their names without prior written
84  *    permission of the OpenSSL Project.
85  *
86  * 6. Redistributions of any form whatsoever must retain the following
87  *    acknowledgment:
88  *    "This product includes software developed by the OpenSSL Project
89  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
90  *
91  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
92  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
93  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
94  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
95  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
96  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
97  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
98  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
100  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
101  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
102  * OF THE POSSIBILITY OF SUCH DAMAGE.
103  * ====================================================================
104  *
105  * This product includes cryptographic software written by Eric Young
106  * (eay@cryptsoft.com).  This product includes software written by Tim
107  * Hudson (tjh@cryptsoft.com). */
108 
109 #include <openssl/bn.h>
110 
111 #include <assert.h>
112 #include <string.h>
113 
114 #include <openssl/err.h>
115 #include <openssl/mem.h>
116 #include <openssl/thread.h>
117 #include <openssl/type_check.h>
118 
119 #include "internal.h"
120 #include "../../internal.h"
121 
122 
123 #if !defined(OPENSSL_NO_ASM) &&                         \
124     (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
125      defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
126 #define OPENSSL_BN_ASM_MONT
127 #endif
128 
129 
BN_MONT_CTX_new(void)130 BN_MONT_CTX *BN_MONT_CTX_new(void) {
131   BN_MONT_CTX *ret = OPENSSL_malloc(sizeof(BN_MONT_CTX));
132 
133   if (ret == NULL) {
134     return NULL;
135   }
136 
137   OPENSSL_memset(ret, 0, sizeof(BN_MONT_CTX));
138   BN_init(&ret->RR);
139   BN_init(&ret->N);
140 
141   return ret;
142 }
143 
BN_MONT_CTX_free(BN_MONT_CTX * mont)144 void BN_MONT_CTX_free(BN_MONT_CTX *mont) {
145   if (mont == NULL) {
146     return;
147   }
148 
149   BN_free(&mont->RR);
150   BN_free(&mont->N);
151   OPENSSL_free(mont);
152 }
153 
BN_MONT_CTX_copy(BN_MONT_CTX * to,const BN_MONT_CTX * from)154 BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, const BN_MONT_CTX *from) {
155   if (to == from) {
156     return to;
157   }
158 
159   if (!BN_copy(&to->RR, &from->RR) ||
160       !BN_copy(&to->N, &from->N)) {
161     return NULL;
162   }
163   to->n0[0] = from->n0[0];
164   to->n0[1] = from->n0[1];
165   return to;
166 }
167 
168 OPENSSL_COMPILE_ASSERT(BN_MONT_CTX_N0_LIMBS == 1 || BN_MONT_CTX_N0_LIMBS == 2,
169                        BN_MONT_CTX_N0_LIMBS_VALUE_INVALID);
170 OPENSSL_COMPILE_ASSERT(sizeof(BN_ULONG) * BN_MONT_CTX_N0_LIMBS ==
171                        sizeof(uint64_t), BN_MONT_CTX_set_64_bit_mismatch);
172 
BN_MONT_CTX_set(BN_MONT_CTX * mont,const BIGNUM * mod,BN_CTX * ctx)173 int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) {
174   if (BN_is_zero(mod)) {
175     OPENSSL_PUT_ERROR(BN, BN_R_DIV_BY_ZERO);
176     return 0;
177   }
178   if (!BN_is_odd(mod)) {
179     OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
180     return 0;
181   }
182   if (BN_is_negative(mod)) {
183     OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
184     return 0;
185   }
186 
187   // Save the modulus.
188   if (!BN_copy(&mont->N, mod)) {
189     OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
190     return 0;
191   }
192   // |mont->N| is always stored minimally. Computing RR efficiently leaks the
193   // size of the modulus. While the modulus may be private in RSA (one of the
194   // primes), their sizes are public, so this is fine.
195   bn_correct_top(&mont->N);
196 
197   // Find n0 such that n0 * N == -1 (mod r).
198   //
199   // Only certain BN_BITS2<=32 platforms actually make use of n0[1]. For the
200   // others, we could use a shorter R value and use faster |BN_ULONG|-based
201   // math instead of |uint64_t|-based math, which would be double-precision.
202   // However, currently only the assembler files know which is which.
203   uint64_t n0 = bn_mont_n0(&mont->N);
204   mont->n0[0] = (BN_ULONG)n0;
205 #if BN_MONT_CTX_N0_LIMBS == 2
206   mont->n0[1] = (BN_ULONG)(n0 >> BN_BITS2);
207 #else
208   mont->n0[1] = 0;
209 #endif
210 
211   // Save RR = R**2 (mod N). R is the smallest power of 2**BN_BITS2 such that R
212   // > mod. Even though the assembly on some 32-bit platforms works with 64-bit
213   // values, using |BN_BITS2| here, rather than |BN_MONT_CTX_N0_LIMBS *
214   // BN_BITS2|, is correct because R**2 will still be a multiple of the latter
215   // as |BN_MONT_CTX_N0_LIMBS| is either one or two.
216   //
217   // XXX: This is not constant time with respect to |mont->N|, but it should be.
218   unsigned lgBigR = mont->N.top * BN_BITS2;
219   if (!bn_mod_exp_base_2_vartime(&mont->RR, lgBigR * 2, &mont->N)) {
220     return 0;
221   }
222 
223   return 1;
224 }
225 
BN_MONT_CTX_new_for_modulus(const BIGNUM * mod,BN_CTX * ctx)226 BN_MONT_CTX *BN_MONT_CTX_new_for_modulus(const BIGNUM *mod, BN_CTX *ctx) {
227   BN_MONT_CTX *mont = BN_MONT_CTX_new();
228   if (mont == NULL ||
229       !BN_MONT_CTX_set(mont, mod, ctx)) {
230     BN_MONT_CTX_free(mont);
231     return NULL;
232   }
233   return mont;
234 }
235 
BN_MONT_CTX_set_locked(BN_MONT_CTX ** pmont,CRYPTO_MUTEX * lock,const BIGNUM * mod,BN_CTX * bn_ctx)236 int BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, CRYPTO_MUTEX *lock,
237                            const BIGNUM *mod, BN_CTX *bn_ctx) {
238   CRYPTO_MUTEX_lock_read(lock);
239   BN_MONT_CTX *ctx = *pmont;
240   CRYPTO_MUTEX_unlock_read(lock);
241 
242   if (ctx) {
243     return 1;
244   }
245 
246   CRYPTO_MUTEX_lock_write(lock);
247   if (*pmont == NULL) {
248     *pmont = BN_MONT_CTX_new_for_modulus(mod, bn_ctx);
249   }
250   const int ok = *pmont != NULL;
251   CRYPTO_MUTEX_unlock_write(lock);
252   return ok;
253 }
254 
BN_to_montgomery(BIGNUM * ret,const BIGNUM * a,const BN_MONT_CTX * mont,BN_CTX * ctx)255 int BN_to_montgomery(BIGNUM *ret, const BIGNUM *a, const BN_MONT_CTX *mont,
256                      BN_CTX *ctx) {
257   return BN_mod_mul_montgomery(ret, a, &mont->RR, mont, ctx);
258 }
259 
bn_from_montgomery_in_place(BN_ULONG * r,size_t num_r,BN_ULONG * a,size_t num_a,const BN_MONT_CTX * mont)260 static int bn_from_montgomery_in_place(BN_ULONG *r, size_t num_r, BN_ULONG *a,
261                                        size_t num_a, const BN_MONT_CTX *mont) {
262   const BN_ULONG *n = mont->N.d;
263   size_t num_n = mont->N.top;
264   if (num_r != num_n || num_a != 2 * num_n) {
265     OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
266     return 0;
267   }
268 
269   // Add multiples of |n| to |r| until R = 2^(nl * BN_BITS2) divides it. On
270   // input, we had |r| < |n| * R, so now |r| < 2 * |n| * R. Note that |r|
271   // includes |carry| which is stored separately.
272   BN_ULONG n0 = mont->n0[0];
273   BN_ULONG carry = 0;
274   for (size_t i = 0; i < num_n; i++) {
275     BN_ULONG v = bn_mul_add_words(a + i, n, num_n, a[i] * n0);
276     v += carry + a[i + num_n];
277     carry |= (v != a[i + num_n]);
278     carry &= (v <= a[i + num_n]);
279     a[i + num_n] = v;
280   }
281 
282   // Shift |num_n| words to divide by R. We have |a| < 2 * |n|. Note that |a|
283   // includes |carry| which is stored separately.
284   a += num_n;
285 
286   // |a| thus requires at most one additional subtraction |n| to be reduced.
287   // Subtract |n| and select the answer in constant time.
288   OPENSSL_COMPILE_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t),
289                          crypto_word_t_too_small);
290   BN_ULONG v = bn_sub_words(r, a, n, num_n) - carry;
291   // |v| is one if |a| - |n| underflowed or zero if it did not. Note |v| cannot
292   // be -1. That would imply the subtraction did not fit in |num_n| words, and
293   // we know at most one subtraction is needed.
294   v = 0u - v;
295   for (size_t i = 0; i < num_n; i++) {
296     r[i] = constant_time_select_w(v, a[i], r[i]);
297     a[i] = 0;
298   }
299   return 1;
300 }
301 
BN_from_montgomery_word(BIGNUM * ret,BIGNUM * r,const BN_MONT_CTX * mont)302 static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r,
303                                    const BN_MONT_CTX *mont) {
304   if (r->neg) {
305     OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
306     return 0;
307   }
308 
309   const BIGNUM *n = &mont->N;
310   if (n->top == 0) {
311     ret->top = 0;
312     return 1;
313   }
314 
315   int max = (2 * n->top);  // carry is stored separately
316   if (!bn_resize_words(r, max) ||
317       !bn_wexpand(ret, n->top)) {
318     return 0;
319   }
320   ret->top = n->top;
321 
322   if (!bn_from_montgomery_in_place(ret->d, ret->top, r->d, r->top, mont)) {
323     return 0;
324   }
325   ret->neg = 0;
326 
327   bn_correct_top(r);
328   bn_correct_top(ret);
329   return 1;
330 }
331 
BN_from_montgomery(BIGNUM * r,const BIGNUM * a,const BN_MONT_CTX * mont,BN_CTX * ctx)332 int BN_from_montgomery(BIGNUM *r, const BIGNUM *a, const BN_MONT_CTX *mont,
333                        BN_CTX *ctx) {
334   int ret = 0;
335   BIGNUM *t;
336 
337   BN_CTX_start(ctx);
338   t = BN_CTX_get(ctx);
339   if (t == NULL ||
340       !BN_copy(t, a)) {
341     goto err;
342   }
343 
344   ret = BN_from_montgomery_word(r, t, mont);
345 
346 err:
347   BN_CTX_end(ctx);
348 
349   return ret;
350 }
351 
bn_one_to_montgomery(BIGNUM * r,const BN_MONT_CTX * mont,BN_CTX * ctx)352 int bn_one_to_montgomery(BIGNUM *r, const BN_MONT_CTX *mont, BN_CTX *ctx) {
353   // If the high bit of |n| is set, R = 2^(top*BN_BITS2) < 2 * |n|, so we
354   // compute R - |n| rather than perform Montgomery reduction.
355   const BIGNUM *n = &mont->N;
356   if (n->top > 0 && (n->d[n->top - 1] >> (BN_BITS2 - 1)) != 0) {
357     if (!bn_wexpand(r, n->top)) {
358       return 0;
359     }
360     r->d[0] = 0 - n->d[0];
361     for (int i = 1; i < n->top; i++) {
362       r->d[i] = ~n->d[i];
363     }
364     r->top = n->top;
365     r->neg = 0;
366     // The upper words will be zero if the corresponding words of |n| were
367     // 0xfff[...], so call |bn_correct_top|.
368     bn_correct_top(r);
369     return 1;
370   }
371 
372   return BN_from_montgomery(r, &mont->RR, mont, ctx);
373 }
374 
bn_mod_mul_montgomery_fallback(BIGNUM * r,const BIGNUM * a,const BIGNUM * b,const BN_MONT_CTX * mont,BN_CTX * ctx)375 static int bn_mod_mul_montgomery_fallback(BIGNUM *r, const BIGNUM *a,
376                                           const BIGNUM *b,
377                                           const BN_MONT_CTX *mont,
378                                           BN_CTX *ctx) {
379   int ret = 0;
380 
381   BN_CTX_start(ctx);
382   BIGNUM *tmp = BN_CTX_get(ctx);
383   if (tmp == NULL) {
384     goto err;
385   }
386 
387   if (a == b) {
388     if (!BN_sqr(tmp, a, ctx)) {
389       goto err;
390     }
391   } else {
392     if (!BN_mul(tmp, a, b, ctx)) {
393       goto err;
394     }
395   }
396 
397   // reduce from aRR to aR
398   if (!BN_from_montgomery_word(r, tmp, mont)) {
399     goto err;
400   }
401 
402   ret = 1;
403 
404 err:
405   BN_CTX_end(ctx);
406   return ret;
407 }
408 
BN_mod_mul_montgomery(BIGNUM * r,const BIGNUM * a,const BIGNUM * b,const BN_MONT_CTX * mont,BN_CTX * ctx)409 int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
410                           const BN_MONT_CTX *mont, BN_CTX *ctx) {
411   if (a->neg || b->neg) {
412     OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
413     return 0;
414   }
415 
416 #if defined(OPENSSL_BN_ASM_MONT)
417   // |bn_mul_mont| requires at least 128 bits of limbs, at least for x86.
418   int num = mont->N.top;
419   if (num >= (128 / BN_BITS2) &&
420       a->top == num &&
421       b->top == num) {
422     if (!bn_wexpand(r, num)) {
423       return 0;
424     }
425     if (!bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) {
426       // The check above ensures this won't happen.
427       assert(0);
428       OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
429       return 0;
430     }
431     r->neg = 0;
432     r->top = num;
433     bn_correct_top(r);
434 
435     return 1;
436   }
437 #endif
438 
439   return bn_mod_mul_montgomery_fallback(r, a, b, mont, ctx);
440 }
441 
bn_less_than_montgomery_R(const BIGNUM * bn,const BN_MONT_CTX * mont)442 int bn_less_than_montgomery_R(const BIGNUM *bn, const BN_MONT_CTX *mont) {
443   return !BN_is_negative(bn) &&
444          bn_fits_in_words(bn, mont->N.top);
445 }
446 
bn_to_montgomery_small(BN_ULONG * r,size_t num_r,const BN_ULONG * a,size_t num_a,const BN_MONT_CTX * mont)447 int bn_to_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
448                            size_t num_a, const BN_MONT_CTX *mont) {
449   return bn_mod_mul_montgomery_small(r, num_r, a, num_a, mont->RR.d,
450                                      mont->RR.top, mont);
451 }
452 
bn_from_montgomery_small(BN_ULONG * r,size_t num_r,const BN_ULONG * a,size_t num_a,const BN_MONT_CTX * mont)453 int bn_from_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
454                              size_t num_a, const BN_MONT_CTX *mont) {
455   size_t num_n = mont->N.top;
456   if (num_a > 2 * num_n || num_r != num_n || num_n > BN_SMALL_MAX_WORDS) {
457     OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
458     return 0;
459   }
460   BN_ULONG tmp[BN_SMALL_MAX_WORDS * 2];
461   size_t num_tmp = 2 * num_n;
462   OPENSSL_memcpy(tmp, a, num_a * sizeof(BN_ULONG));
463   OPENSSL_memset(tmp + num_a, 0, (num_tmp - num_a) * sizeof(BN_ULONG));
464   int ret = bn_from_montgomery_in_place(r, num_r, tmp, num_tmp, mont);
465   OPENSSL_cleanse(tmp, num_tmp * sizeof(BN_ULONG));
466   return ret;
467 }
468 
bn_one_to_montgomery_small(BN_ULONG * r,size_t num_r,const BN_MONT_CTX * mont)469 int bn_one_to_montgomery_small(BN_ULONG *r, size_t num_r,
470                                const BN_MONT_CTX *mont) {
471   const BN_ULONG *n = mont->N.d;
472   size_t num_n = mont->N.top;
473   if (num_n == 0 || num_r != num_n) {
474     OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
475     return 0;
476   }
477 
478   // If the high bit of |n| is set, R = 2^(num_n*BN_BITS2) < 2 * |n|, so we
479   // compute R - |n| rather than perform Montgomery reduction.
480   if (num_n > 0 && (n[num_n - 1] >> (BN_BITS2 - 1)) != 0) {
481     r[0] = 0 - n[0];
482     for (size_t i = 1; i < num_n; i++) {
483       r[i] = ~n[i];
484     }
485     return 1;
486   }
487 
488   return bn_from_montgomery_small(r, num_r, mont->RR.d, mont->RR.top, mont);
489 }
490 
bn_mod_mul_montgomery_small(BN_ULONG * r,size_t num_r,const BN_ULONG * a,size_t num_a,const BN_ULONG * b,size_t num_b,const BN_MONT_CTX * mont)491 int bn_mod_mul_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
492                                 size_t num_a, const BN_ULONG *b, size_t num_b,
493                                 const BN_MONT_CTX *mont) {
494   size_t num_n = mont->N.top;
495   if (num_r != num_n || num_a + num_b > 2 * num_n ||
496       num_n > BN_SMALL_MAX_WORDS) {
497     OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
498     return 0;
499   }
500 
501 #if defined(OPENSSL_BN_ASM_MONT)
502   // |bn_mul_mont| requires at least 128 bits of limbs, at least for x86.
503   if (num_n >= (128 / BN_BITS2) &&
504       num_a == num_n &&
505       num_b == num_n) {
506     if (!bn_mul_mont(r, a, b, mont->N.d, mont->n0, num_n)) {
507       assert(0);  // The check above ensures this won't happen.
508       OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
509       return 0;
510     }
511     return 1;
512   }
513 #endif
514 
515   // Compute the product.
516   BN_ULONG tmp[2 * BN_SMALL_MAX_WORDS];
517   size_t num_tmp = 2 * num_n;
518   size_t num_ab = num_a + num_b;
519   if (a == b && num_a == num_b) {
520     if (!bn_sqr_small(tmp, num_ab, a, num_a)) {
521       return 0;
522     }
523   } else if (!bn_mul_small(tmp, num_ab, a, num_a, b, num_b)) {
524     return 0;
525   }
526 
527   // Zero-extend to full width and reduce.
528   OPENSSL_memset(tmp + num_ab, 0, (num_tmp - num_ab) * sizeof(BN_ULONG));
529   int ret = bn_from_montgomery_in_place(r, num_r, tmp, num_tmp, mont);
530   OPENSSL_cleanse(tmp, num_tmp * sizeof(BN_ULONG));
531   return ret;
532 }
533