1 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
2  * All rights reserved.
3  *
4  * This package is an SSL implementation written
5  * by Eric Young (eay@cryptsoft.com).
6  * The implementation was written so as to conform with Netscapes SSL.
7  *
8  * This library is free for commercial and non-commercial use as long as
9  * the following conditions are aheared to.  The following conditions
10  * apply to all code found in this distribution, be it the RC4, RSA,
11  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
12  * included with this distribution is covered by the same copyright terms
13  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
14  *
15  * Copyright remains Eric Young's, and as such any Copyright notices in
16  * the code are not to be removed.
17  * If this package is used in a product, Eric Young should be given attribution
18  * as the author of the parts of the library used.
19  * This can be in the form of a textual message at program startup or
20  * in documentation (online or textual) provided with the package.
21  *
22  * Redistribution and use in source and binary forms, with or without
23  * modification, are permitted provided that the following conditions
24  * are met:
25  * 1. Redistributions of source code must retain the copyright
26  *    notice, this list of conditions and the following disclaimer.
27  * 2. Redistributions in binary form must reproduce the above copyright
28  *    notice, this list of conditions and the following disclaimer in the
29  *    documentation and/or other materials provided with the distribution.
30  * 3. All advertising materials mentioning features or use of this software
31  *    must display the following acknowledgement:
32  *    "This product includes cryptographic software written by
33  *     Eric Young (eay@cryptsoft.com)"
34  *    The word 'cryptographic' can be left out if the rouines from the library
35  *    being used are not cryptographic related :-).
36  * 4. If you include any Windows specific code (or a derivative thereof) from
37  *    the apps directory (application code) you must include an acknowledgement:
38  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
39  *
40  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  *
52  * The licence and distribution terms for any publically available version or
53  * derivative of this code cannot be changed.  i.e. this code cannot simply be
54  * copied and put under another distribution licence
55  * [including the GNU Public Licence.] */
56 
57 #include <openssl/bn.h>
58 
59 #include <assert.h>
60 
61 #include "internal.h"
62 
63 
64 // This file has two other implementations: x86 assembly language in
65 // asm/bn-586.pl and x86_64 inline assembly in asm/x86_64-gcc.c.
66 #if defined(OPENSSL_NO_ASM) || \
67     !(defined(OPENSSL_X86) ||  \
68       (defined(OPENSSL_X86_64) && (defined(__GNUC__) || defined(__clang__))))
69 
70 #ifdef BN_ULLONG
71 #define mul_add(r, a, w, c)               \
72   do {                                    \
73     BN_ULLONG t;                          \
74     t = (BN_ULLONG)(w) * (a) + (r) + (c); \
75     (r) = Lw(t);                          \
76     (c) = Hw(t);                          \
77   } while (0)
78 
79 #define mul(r, a, w, c)             \
80   do {                              \
81     BN_ULLONG t;                    \
82     t = (BN_ULLONG)(w) * (a) + (c); \
83     (r) = Lw(t);                    \
84     (c) = Hw(t);                    \
85   } while (0)
86 
87 #define sqr(r0, r1, a)        \
88   do {                        \
89     BN_ULLONG t;              \
90     t = (BN_ULLONG)(a) * (a); \
91     (r0) = Lw(t);             \
92     (r1) = Hw(t);             \
93   } while (0)
94 
95 #else
96 
97 #define mul_add(r, a, w, c)             \
98   do {                                  \
99     BN_ULONG high, low, ret, tmp = (a); \
100     ret = (r);                          \
101     BN_UMULT_LOHI(low, high, w, tmp);   \
102     ret += (c);                         \
103     (c) = (ret < (c)) ? 1 : 0;          \
104     (c) += high;                        \
105     ret += low;                         \
106     (c) += (ret < low) ? 1 : 0;         \
107     (r) = ret;                          \
108   } while (0)
109 
110 #define mul(r, a, w, c)                \
111   do {                                 \
112     BN_ULONG high, low, ret, ta = (a); \
113     BN_UMULT_LOHI(low, high, w, ta);   \
114     ret = low + (c);                   \
115     (c) = high;                        \
116     (c) += (ret < low) ? 1 : 0;        \
117     (r) = ret;                         \
118   } while (0)
119 
120 #define sqr(r0, r1, a)               \
121   do {                               \
122     BN_ULONG tmp = (a);              \
123     BN_UMULT_LOHI(r0, r1, tmp, tmp); \
124   } while (0)
125 
126 #endif  // !BN_ULLONG
127 
bn_mul_add_words(BN_ULONG * rp,const BN_ULONG * ap,size_t num,BN_ULONG w)128 BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
129                           BN_ULONG w) {
130   BN_ULONG c1 = 0;
131 
132   if (num == 0) {
133     return c1;
134   }
135 
136   while (num & ~3) {
137     mul_add(rp[0], ap[0], w, c1);
138     mul_add(rp[1], ap[1], w, c1);
139     mul_add(rp[2], ap[2], w, c1);
140     mul_add(rp[3], ap[3], w, c1);
141     ap += 4;
142     rp += 4;
143     num -= 4;
144   }
145 
146   while (num) {
147     mul_add(rp[0], ap[0], w, c1);
148     ap++;
149     rp++;
150     num--;
151   }
152 
153   return c1;
154 }
155 
bn_mul_words(BN_ULONG * rp,const BN_ULONG * ap,size_t num,BN_ULONG w)156 BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
157                       BN_ULONG w) {
158   BN_ULONG c1 = 0;
159 
160   if (num == 0) {
161     return c1;
162   }
163 
164   while (num & ~3) {
165     mul(rp[0], ap[0], w, c1);
166     mul(rp[1], ap[1], w, c1);
167     mul(rp[2], ap[2], w, c1);
168     mul(rp[3], ap[3], w, c1);
169     ap += 4;
170     rp += 4;
171     num -= 4;
172   }
173   while (num) {
174     mul(rp[0], ap[0], w, c1);
175     ap++;
176     rp++;
177     num--;
178   }
179   return c1;
180 }
181 
bn_sqr_words(BN_ULONG * r,const BN_ULONG * a,size_t n)182 void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, size_t n) {
183   if (n == 0) {
184     return;
185   }
186 
187   while (n & ~3) {
188     sqr(r[0], r[1], a[0]);
189     sqr(r[2], r[3], a[1]);
190     sqr(r[4], r[5], a[2]);
191     sqr(r[6], r[7], a[3]);
192     a += 4;
193     r += 8;
194     n -= 4;
195   }
196   while (n) {
197     sqr(r[0], r[1], a[0]);
198     a++;
199     r += 2;
200     n--;
201   }
202 }
203 
204 #ifdef BN_ULLONG
bn_add_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,size_t n)205 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
206                       size_t n) {
207   BN_ULLONG ll = 0;
208 
209   if (n == 0) {
210     return 0;
211   }
212 
213   while (n & ~3) {
214     ll += (BN_ULLONG)a[0] + b[0];
215     r[0] = (BN_ULONG)ll;
216     ll >>= BN_BITS2;
217     ll += (BN_ULLONG)a[1] + b[1];
218     r[1] = (BN_ULONG)ll;
219     ll >>= BN_BITS2;
220     ll += (BN_ULLONG)a[2] + b[2];
221     r[2] = (BN_ULONG)ll;
222     ll >>= BN_BITS2;
223     ll += (BN_ULLONG)a[3] + b[3];
224     r[3] = (BN_ULONG)ll;
225     ll >>= BN_BITS2;
226     a += 4;
227     b += 4;
228     r += 4;
229     n -= 4;
230   }
231   while (n) {
232     ll += (BN_ULLONG)a[0] + b[0];
233     r[0] = (BN_ULONG)ll;
234     ll >>= BN_BITS2;
235     a++;
236     b++;
237     r++;
238     n--;
239   }
240   return (BN_ULONG)ll;
241 }
242 
243 #else  // !BN_ULLONG
244 
bn_add_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,size_t n)245 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
246                       size_t n) {
247   BN_ULONG c, l, t;
248 
249   if (n == 0) {
250     return (BN_ULONG)0;
251   }
252 
253   c = 0;
254   while (n & ~3) {
255     t = a[0];
256     t += c;
257     c = (t < c);
258     l = t + b[0];
259     c += (l < t);
260     r[0] = l;
261     t = a[1];
262     t += c;
263     c = (t < c);
264     l = t + b[1];
265     c += (l < t);
266     r[1] = l;
267     t = a[2];
268     t += c;
269     c = (t < c);
270     l = t + b[2];
271     c += (l < t);
272     r[2] = l;
273     t = a[3];
274     t += c;
275     c = (t < c);
276     l = t + b[3];
277     c += (l < t);
278     r[3] = l;
279     a += 4;
280     b += 4;
281     r += 4;
282     n -= 4;
283   }
284   while (n) {
285     t = a[0];
286     t += c;
287     c = (t < c);
288     l = t + b[0];
289     c += (l < t);
290     r[0] = l;
291     a++;
292     b++;
293     r++;
294     n--;
295   }
296   return (BN_ULONG)c;
297 }
298 
299 #endif  // !BN_ULLONG
300 
bn_sub_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,size_t n)301 BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
302                       size_t n) {
303   BN_ULONG t1, t2;
304   int c = 0;
305 
306   if (n == 0) {
307     return (BN_ULONG)0;
308   }
309 
310   while (n & ~3) {
311     t1 = a[0];
312     t2 = b[0];
313     r[0] = t1 - t2 - c;
314     if (t1 != t2) {
315       c = (t1 < t2);
316     }
317     t1 = a[1];
318     t2 = b[1];
319     r[1] = t1 - t2 - c;
320     if (t1 != t2) {
321       c = (t1 < t2);
322     }
323     t1 = a[2];
324     t2 = b[2];
325     r[2] = t1 - t2 - c;
326     if (t1 != t2) {
327       c = (t1 < t2);
328     }
329     t1 = a[3];
330     t2 = b[3];
331     r[3] = t1 - t2 - c;
332     if (t1 != t2) {
333       c = (t1 < t2);
334     }
335     a += 4;
336     b += 4;
337     r += 4;
338     n -= 4;
339   }
340   while (n) {
341     t1 = a[0];
342     t2 = b[0];
343     r[0] = t1 - t2 - c;
344     if (t1 != t2) {
345       c = (t1 < t2);
346     }
347     a++;
348     b++;
349     r++;
350     n--;
351   }
352   return c;
353 }
354 
355 // mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0)
356 // mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0)
357 // sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0)
358 // sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0)
359 
360 #ifdef BN_ULLONG
361 
362 // Keep in mind that additions to multiplication result can not overflow,
363 // because its high half cannot be all-ones.
364 #define mul_add_c(a, b, c0, c1, c2)     \
365   do {                                  \
366     BN_ULONG hi;                        \
367     BN_ULLONG t = (BN_ULLONG)(a) * (b); \
368     t += (c0); /* no carry */           \
369     (c0) = (BN_ULONG)Lw(t);             \
370     hi = (BN_ULONG)Hw(t);               \
371     (c1) += (hi);                       \
372     if ((c1) < hi) {                    \
373       (c2)++;                           \
374     }                                   \
375   } while (0)
376 
377 #define mul_add_c2(a, b, c0, c1, c2)        \
378   do {                                      \
379     BN_ULONG hi;                            \
380     BN_ULLONG t = (BN_ULLONG)(a) * (b);     \
381     BN_ULLONG tt = t + (c0); /* no carry */ \
382     (c0) = (BN_ULONG)Lw(tt);                \
383     hi = (BN_ULONG)Hw(tt);                  \
384     (c1) += hi;                             \
385     if ((c1) < hi) {                        \
386       (c2)++;                               \
387     }                                       \
388     t += (c0); /* no carry */               \
389     (c0) = (BN_ULONG)Lw(t);                 \
390     hi = (BN_ULONG)Hw(t);                   \
391     (c1) += hi;                             \
392     if ((c1) < hi) {                        \
393       (c2)++;                               \
394     }                                       \
395   } while (0)
396 
397 #define sqr_add_c(a, i, c0, c1, c2)           \
398   do {                                        \
399     BN_ULONG hi;                              \
400     BN_ULLONG t = (BN_ULLONG)(a)[i] * (a)[i]; \
401     t += (c0); /* no carry */                 \
402     (c0) = (BN_ULONG)Lw(t);                   \
403     hi = (BN_ULONG)Hw(t);                     \
404     (c1) += hi;                               \
405     if ((c1) < hi) {                          \
406       (c2)++;                                 \
407     }                                         \
408   } while (0)
409 
410 #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
411 
412 #else
413 
414 // Keep in mind that additions to hi can not overflow, because the high word of
415 // a multiplication result cannot be all-ones.
416 #define mul_add_c(a, b, c0, c1, c2) \
417   do {                              \
418     BN_ULONG ta = (a), tb = (b);    \
419     BN_ULONG lo, hi;                \
420     BN_UMULT_LOHI(lo, hi, ta, tb);  \
421     (c0) += lo;                     \
422     hi += ((c0) < lo) ? 1 : 0;      \
423     (c1) += hi;                     \
424     (c2) += ((c1) < hi) ? 1 : 0;    \
425   } while (0)
426 
427 #define mul_add_c2(a, b, c0, c1, c2) \
428   do {                               \
429     BN_ULONG ta = (a), tb = (b);     \
430     BN_ULONG lo, hi, tt;             \
431     BN_UMULT_LOHI(lo, hi, ta, tb);   \
432     (c0) += lo;                      \
433     tt = hi + (((c0) < lo) ? 1 : 0); \
434     (c1) += tt;                      \
435     (c2) += ((c1) < tt) ? 1 : 0;     \
436     (c0) += lo;                      \
437     hi += (c0 < lo) ? 1 : 0;         \
438     (c1) += hi;                      \
439     (c2) += ((c1) < hi) ? 1 : 0;     \
440   } while (0)
441 
442 #define sqr_add_c(a, i, c0, c1, c2) \
443   do {                              \
444     BN_ULONG ta = (a)[i];           \
445     BN_ULONG lo, hi;                \
446     BN_UMULT_LOHI(lo, hi, ta, ta);  \
447     (c0) += lo;                     \
448     hi += (c0 < lo) ? 1 : 0;        \
449     (c1) += hi;                     \
450     (c2) += ((c1) < hi) ? 1 : 0;    \
451   } while (0)
452 
453 #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
454 
455 #endif  // !BN_ULLONG
456 
bn_mul_comba8(BN_ULONG r[16],const BN_ULONG a[8],const BN_ULONG b[8])457 void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]) {
458   BN_ULONG c1, c2, c3;
459 
460   c1 = 0;
461   c2 = 0;
462   c3 = 0;
463   mul_add_c(a[0], b[0], c1, c2, c3);
464   r[0] = c1;
465   c1 = 0;
466   mul_add_c(a[0], b[1], c2, c3, c1);
467   mul_add_c(a[1], b[0], c2, c3, c1);
468   r[1] = c2;
469   c2 = 0;
470   mul_add_c(a[2], b[0], c3, c1, c2);
471   mul_add_c(a[1], b[1], c3, c1, c2);
472   mul_add_c(a[0], b[2], c3, c1, c2);
473   r[2] = c3;
474   c3 = 0;
475   mul_add_c(a[0], b[3], c1, c2, c3);
476   mul_add_c(a[1], b[2], c1, c2, c3);
477   mul_add_c(a[2], b[1], c1, c2, c3);
478   mul_add_c(a[3], b[0], c1, c2, c3);
479   r[3] = c1;
480   c1 = 0;
481   mul_add_c(a[4], b[0], c2, c3, c1);
482   mul_add_c(a[3], b[1], c2, c3, c1);
483   mul_add_c(a[2], b[2], c2, c3, c1);
484   mul_add_c(a[1], b[3], c2, c3, c1);
485   mul_add_c(a[0], b[4], c2, c3, c1);
486   r[4] = c2;
487   c2 = 0;
488   mul_add_c(a[0], b[5], c3, c1, c2);
489   mul_add_c(a[1], b[4], c3, c1, c2);
490   mul_add_c(a[2], b[3], c3, c1, c2);
491   mul_add_c(a[3], b[2], c3, c1, c2);
492   mul_add_c(a[4], b[1], c3, c1, c2);
493   mul_add_c(a[5], b[0], c3, c1, c2);
494   r[5] = c3;
495   c3 = 0;
496   mul_add_c(a[6], b[0], c1, c2, c3);
497   mul_add_c(a[5], b[1], c1, c2, c3);
498   mul_add_c(a[4], b[2], c1, c2, c3);
499   mul_add_c(a[3], b[3], c1, c2, c3);
500   mul_add_c(a[2], b[4], c1, c2, c3);
501   mul_add_c(a[1], b[5], c1, c2, c3);
502   mul_add_c(a[0], b[6], c1, c2, c3);
503   r[6] = c1;
504   c1 = 0;
505   mul_add_c(a[0], b[7], c2, c3, c1);
506   mul_add_c(a[1], b[6], c2, c3, c1);
507   mul_add_c(a[2], b[5], c2, c3, c1);
508   mul_add_c(a[3], b[4], c2, c3, c1);
509   mul_add_c(a[4], b[3], c2, c3, c1);
510   mul_add_c(a[5], b[2], c2, c3, c1);
511   mul_add_c(a[6], b[1], c2, c3, c1);
512   mul_add_c(a[7], b[0], c2, c3, c1);
513   r[7] = c2;
514   c2 = 0;
515   mul_add_c(a[7], b[1], c3, c1, c2);
516   mul_add_c(a[6], b[2], c3, c1, c2);
517   mul_add_c(a[5], b[3], c3, c1, c2);
518   mul_add_c(a[4], b[4], c3, c1, c2);
519   mul_add_c(a[3], b[5], c3, c1, c2);
520   mul_add_c(a[2], b[6], c3, c1, c2);
521   mul_add_c(a[1], b[7], c3, c1, c2);
522   r[8] = c3;
523   c3 = 0;
524   mul_add_c(a[2], b[7], c1, c2, c3);
525   mul_add_c(a[3], b[6], c1, c2, c3);
526   mul_add_c(a[4], b[5], c1, c2, c3);
527   mul_add_c(a[5], b[4], c1, c2, c3);
528   mul_add_c(a[6], b[3], c1, c2, c3);
529   mul_add_c(a[7], b[2], c1, c2, c3);
530   r[9] = c1;
531   c1 = 0;
532   mul_add_c(a[7], b[3], c2, c3, c1);
533   mul_add_c(a[6], b[4], c2, c3, c1);
534   mul_add_c(a[5], b[5], c2, c3, c1);
535   mul_add_c(a[4], b[6], c2, c3, c1);
536   mul_add_c(a[3], b[7], c2, c3, c1);
537   r[10] = c2;
538   c2 = 0;
539   mul_add_c(a[4], b[7], c3, c1, c2);
540   mul_add_c(a[5], b[6], c3, c1, c2);
541   mul_add_c(a[6], b[5], c3, c1, c2);
542   mul_add_c(a[7], b[4], c3, c1, c2);
543   r[11] = c3;
544   c3 = 0;
545   mul_add_c(a[7], b[5], c1, c2, c3);
546   mul_add_c(a[6], b[6], c1, c2, c3);
547   mul_add_c(a[5], b[7], c1, c2, c3);
548   r[12] = c1;
549   c1 = 0;
550   mul_add_c(a[6], b[7], c2, c3, c1);
551   mul_add_c(a[7], b[6], c2, c3, c1);
552   r[13] = c2;
553   c2 = 0;
554   mul_add_c(a[7], b[7], c3, c1, c2);
555   r[14] = c3;
556   r[15] = c1;
557 }
558 
bn_mul_comba4(BN_ULONG r[8],const BN_ULONG a[4],const BN_ULONG b[4])559 void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]) {
560   BN_ULONG c1, c2, c3;
561 
562   c1 = 0;
563   c2 = 0;
564   c3 = 0;
565   mul_add_c(a[0], b[0], c1, c2, c3);
566   r[0] = c1;
567   c1 = 0;
568   mul_add_c(a[0], b[1], c2, c3, c1);
569   mul_add_c(a[1], b[0], c2, c3, c1);
570   r[1] = c2;
571   c2 = 0;
572   mul_add_c(a[2], b[0], c3, c1, c2);
573   mul_add_c(a[1], b[1], c3, c1, c2);
574   mul_add_c(a[0], b[2], c3, c1, c2);
575   r[2] = c3;
576   c3 = 0;
577   mul_add_c(a[0], b[3], c1, c2, c3);
578   mul_add_c(a[1], b[2], c1, c2, c3);
579   mul_add_c(a[2], b[1], c1, c2, c3);
580   mul_add_c(a[3], b[0], c1, c2, c3);
581   r[3] = c1;
582   c1 = 0;
583   mul_add_c(a[3], b[1], c2, c3, c1);
584   mul_add_c(a[2], b[2], c2, c3, c1);
585   mul_add_c(a[1], b[3], c2, c3, c1);
586   r[4] = c2;
587   c2 = 0;
588   mul_add_c(a[2], b[3], c3, c1, c2);
589   mul_add_c(a[3], b[2], c3, c1, c2);
590   r[5] = c3;
591   c3 = 0;
592   mul_add_c(a[3], b[3], c1, c2, c3);
593   r[6] = c1;
594   r[7] = c2;
595 }
596 
bn_sqr_comba8(BN_ULONG r[16],const BN_ULONG a[8])597 void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[8]) {
598   BN_ULONG c1, c2, c3;
599 
600   c1 = 0;
601   c2 = 0;
602   c3 = 0;
603   sqr_add_c(a, 0, c1, c2, c3);
604   r[0] = c1;
605   c1 = 0;
606   sqr_add_c2(a, 1, 0, c2, c3, c1);
607   r[1] = c2;
608   c2 = 0;
609   sqr_add_c(a, 1, c3, c1, c2);
610   sqr_add_c2(a, 2, 0, c3, c1, c2);
611   r[2] = c3;
612   c3 = 0;
613   sqr_add_c2(a, 3, 0, c1, c2, c3);
614   sqr_add_c2(a, 2, 1, c1, c2, c3);
615   r[3] = c1;
616   c1 = 0;
617   sqr_add_c(a, 2, c2, c3, c1);
618   sqr_add_c2(a, 3, 1, c2, c3, c1);
619   sqr_add_c2(a, 4, 0, c2, c3, c1);
620   r[4] = c2;
621   c2 = 0;
622   sqr_add_c2(a, 5, 0, c3, c1, c2);
623   sqr_add_c2(a, 4, 1, c3, c1, c2);
624   sqr_add_c2(a, 3, 2, c3, c1, c2);
625   r[5] = c3;
626   c3 = 0;
627   sqr_add_c(a, 3, c1, c2, c3);
628   sqr_add_c2(a, 4, 2, c1, c2, c3);
629   sqr_add_c2(a, 5, 1, c1, c2, c3);
630   sqr_add_c2(a, 6, 0, c1, c2, c3);
631   r[6] = c1;
632   c1 = 0;
633   sqr_add_c2(a, 7, 0, c2, c3, c1);
634   sqr_add_c2(a, 6, 1, c2, c3, c1);
635   sqr_add_c2(a, 5, 2, c2, c3, c1);
636   sqr_add_c2(a, 4, 3, c2, c3, c1);
637   r[7] = c2;
638   c2 = 0;
639   sqr_add_c(a, 4, c3, c1, c2);
640   sqr_add_c2(a, 5, 3, c3, c1, c2);
641   sqr_add_c2(a, 6, 2, c3, c1, c2);
642   sqr_add_c2(a, 7, 1, c3, c1, c2);
643   r[8] = c3;
644   c3 = 0;
645   sqr_add_c2(a, 7, 2, c1, c2, c3);
646   sqr_add_c2(a, 6, 3, c1, c2, c3);
647   sqr_add_c2(a, 5, 4, c1, c2, c3);
648   r[9] = c1;
649   c1 = 0;
650   sqr_add_c(a, 5, c2, c3, c1);
651   sqr_add_c2(a, 6, 4, c2, c3, c1);
652   sqr_add_c2(a, 7, 3, c2, c3, c1);
653   r[10] = c2;
654   c2 = 0;
655   sqr_add_c2(a, 7, 4, c3, c1, c2);
656   sqr_add_c2(a, 6, 5, c3, c1, c2);
657   r[11] = c3;
658   c3 = 0;
659   sqr_add_c(a, 6, c1, c2, c3);
660   sqr_add_c2(a, 7, 5, c1, c2, c3);
661   r[12] = c1;
662   c1 = 0;
663   sqr_add_c2(a, 7, 6, c2, c3, c1);
664   r[13] = c2;
665   c2 = 0;
666   sqr_add_c(a, 7, c3, c1, c2);
667   r[14] = c3;
668   r[15] = c1;
669 }
670 
bn_sqr_comba4(BN_ULONG r[8],const BN_ULONG a[4])671 void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]) {
672   BN_ULONG c1, c2, c3;
673 
674   c1 = 0;
675   c2 = 0;
676   c3 = 0;
677   sqr_add_c(a, 0, c1, c2, c3);
678   r[0] = c1;
679   c1 = 0;
680   sqr_add_c2(a, 1, 0, c2, c3, c1);
681   r[1] = c2;
682   c2 = 0;
683   sqr_add_c(a, 1, c3, c1, c2);
684   sqr_add_c2(a, 2, 0, c3, c1, c2);
685   r[2] = c3;
686   c3 = 0;
687   sqr_add_c2(a, 3, 0, c1, c2, c3);
688   sqr_add_c2(a, 2, 1, c1, c2, c3);
689   r[3] = c1;
690   c1 = 0;
691   sqr_add_c(a, 2, c2, c3, c1);
692   sqr_add_c2(a, 3, 1, c2, c3, c1);
693   r[4] = c2;
694   c2 = 0;
695   sqr_add_c2(a, 3, 2, c3, c1, c2);
696   r[5] = c3;
697   c3 = 0;
698   sqr_add_c(a, 3, c1, c2, c3);
699   r[6] = c1;
700   r[7] = c2;
701 }
702 
703 #undef mul_add
704 #undef mul
705 #undef sqr
706 #undef mul_add_c
707 #undef mul_add_c2
708 #undef sqr_add_c
709 #undef sqr_add_c2
710 
711 #endif
712