1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__,obj
8section	code	use32 class=code align=64
9%elifidn __OUTPUT_FORMAT__,win32
10%ifdef __YASM_VERSION_ID__
11%if __YASM_VERSION_ID__ < 01010000h
12%error yasm version 1.1.0 or later needed.
13%endif
14; Yasm automatically includes .00 and complains about redefining it.
15; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
16%else
17$@feat.00 equ 1
18%endif
19section	.text	code align=64
20%else
21section	.text	code
22%endif
23global	_bn_mul_comba8
24align	16
25_bn_mul_comba8:
26L$_bn_mul_comba8_begin:
27	push	esi
28	mov	esi,DWORD [12+esp]
29	push	edi
30	mov	edi,DWORD [20+esp]
31	push	ebp
32	push	ebx
33	xor	ebx,ebx
34	mov	eax,DWORD [esi]
35	xor	ecx,ecx
36	mov	edx,DWORD [edi]
37	; ################## Calculate word 0
38	xor	ebp,ebp
39	; mul a[0]*b[0]
40	mul	edx
41	add	ebx,eax
42	mov	eax,DWORD [20+esp]
43	adc	ecx,edx
44	mov	edx,DWORD [edi]
45	adc	ebp,0
46	mov	DWORD [eax],ebx
47	mov	eax,DWORD [4+esi]
48	; saved r[0]
49	; ################## Calculate word 1
50	xor	ebx,ebx
51	; mul a[1]*b[0]
52	mul	edx
53	add	ecx,eax
54	mov	eax,DWORD [esi]
55	adc	ebp,edx
56	mov	edx,DWORD [4+edi]
57	adc	ebx,0
58	; mul a[0]*b[1]
59	mul	edx
60	add	ecx,eax
61	mov	eax,DWORD [20+esp]
62	adc	ebp,edx
63	mov	edx,DWORD [edi]
64	adc	ebx,0
65	mov	DWORD [4+eax],ecx
66	mov	eax,DWORD [8+esi]
67	; saved r[1]
68	; ################## Calculate word 2
69	xor	ecx,ecx
70	; mul a[2]*b[0]
71	mul	edx
72	add	ebp,eax
73	mov	eax,DWORD [4+esi]
74	adc	ebx,edx
75	mov	edx,DWORD [4+edi]
76	adc	ecx,0
77	; mul a[1]*b[1]
78	mul	edx
79	add	ebp,eax
80	mov	eax,DWORD [esi]
81	adc	ebx,edx
82	mov	edx,DWORD [8+edi]
83	adc	ecx,0
84	; mul a[0]*b[2]
85	mul	edx
86	add	ebp,eax
87	mov	eax,DWORD [20+esp]
88	adc	ebx,edx
89	mov	edx,DWORD [edi]
90	adc	ecx,0
91	mov	DWORD [8+eax],ebp
92	mov	eax,DWORD [12+esi]
93	; saved r[2]
94	; ################## Calculate word 3
95	xor	ebp,ebp
96	; mul a[3]*b[0]
97	mul	edx
98	add	ebx,eax
99	mov	eax,DWORD [8+esi]
100	adc	ecx,edx
101	mov	edx,DWORD [4+edi]
102	adc	ebp,0
103	; mul a[2]*b[1]
104	mul	edx
105	add	ebx,eax
106	mov	eax,DWORD [4+esi]
107	adc	ecx,edx
108	mov	edx,DWORD [8+edi]
109	adc	ebp,0
110	; mul a[1]*b[2]
111	mul	edx
112	add	ebx,eax
113	mov	eax,DWORD [esi]
114	adc	ecx,edx
115	mov	edx,DWORD [12+edi]
116	adc	ebp,0
117	; mul a[0]*b[3]
118	mul	edx
119	add	ebx,eax
120	mov	eax,DWORD [20+esp]
121	adc	ecx,edx
122	mov	edx,DWORD [edi]
123	adc	ebp,0
124	mov	DWORD [12+eax],ebx
125	mov	eax,DWORD [16+esi]
126	; saved r[3]
127	; ################## Calculate word 4
128	xor	ebx,ebx
129	; mul a[4]*b[0]
130	mul	edx
131	add	ecx,eax
132	mov	eax,DWORD [12+esi]
133	adc	ebp,edx
134	mov	edx,DWORD [4+edi]
135	adc	ebx,0
136	; mul a[3]*b[1]
137	mul	edx
138	add	ecx,eax
139	mov	eax,DWORD [8+esi]
140	adc	ebp,edx
141	mov	edx,DWORD [8+edi]
142	adc	ebx,0
143	; mul a[2]*b[2]
144	mul	edx
145	add	ecx,eax
146	mov	eax,DWORD [4+esi]
147	adc	ebp,edx
148	mov	edx,DWORD [12+edi]
149	adc	ebx,0
150	; mul a[1]*b[3]
151	mul	edx
152	add	ecx,eax
153	mov	eax,DWORD [esi]
154	adc	ebp,edx
155	mov	edx,DWORD [16+edi]
156	adc	ebx,0
157	; mul a[0]*b[4]
158	mul	edx
159	add	ecx,eax
160	mov	eax,DWORD [20+esp]
161	adc	ebp,edx
162	mov	edx,DWORD [edi]
163	adc	ebx,0
164	mov	DWORD [16+eax],ecx
165	mov	eax,DWORD [20+esi]
166	; saved r[4]
167	; ################## Calculate word 5
168	xor	ecx,ecx
169	; mul a[5]*b[0]
170	mul	edx
171	add	ebp,eax
172	mov	eax,DWORD [16+esi]
173	adc	ebx,edx
174	mov	edx,DWORD [4+edi]
175	adc	ecx,0
176	; mul a[4]*b[1]
177	mul	edx
178	add	ebp,eax
179	mov	eax,DWORD [12+esi]
180	adc	ebx,edx
181	mov	edx,DWORD [8+edi]
182	adc	ecx,0
183	; mul a[3]*b[2]
184	mul	edx
185	add	ebp,eax
186	mov	eax,DWORD [8+esi]
187	adc	ebx,edx
188	mov	edx,DWORD [12+edi]
189	adc	ecx,0
190	; mul a[2]*b[3]
191	mul	edx
192	add	ebp,eax
193	mov	eax,DWORD [4+esi]
194	adc	ebx,edx
195	mov	edx,DWORD [16+edi]
196	adc	ecx,0
197	; mul a[1]*b[4]
198	mul	edx
199	add	ebp,eax
200	mov	eax,DWORD [esi]
201	adc	ebx,edx
202	mov	edx,DWORD [20+edi]
203	adc	ecx,0
204	; mul a[0]*b[5]
205	mul	edx
206	add	ebp,eax
207	mov	eax,DWORD [20+esp]
208	adc	ebx,edx
209	mov	edx,DWORD [edi]
210	adc	ecx,0
211	mov	DWORD [20+eax],ebp
212	mov	eax,DWORD [24+esi]
213	; saved r[5]
214	; ################## Calculate word 6
215	xor	ebp,ebp
216	; mul a[6]*b[0]
217	mul	edx
218	add	ebx,eax
219	mov	eax,DWORD [20+esi]
220	adc	ecx,edx
221	mov	edx,DWORD [4+edi]
222	adc	ebp,0
223	; mul a[5]*b[1]
224	mul	edx
225	add	ebx,eax
226	mov	eax,DWORD [16+esi]
227	adc	ecx,edx
228	mov	edx,DWORD [8+edi]
229	adc	ebp,0
230	; mul a[4]*b[2]
231	mul	edx
232	add	ebx,eax
233	mov	eax,DWORD [12+esi]
234	adc	ecx,edx
235	mov	edx,DWORD [12+edi]
236	adc	ebp,0
237	; mul a[3]*b[3]
238	mul	edx
239	add	ebx,eax
240	mov	eax,DWORD [8+esi]
241	adc	ecx,edx
242	mov	edx,DWORD [16+edi]
243	adc	ebp,0
244	; mul a[2]*b[4]
245	mul	edx
246	add	ebx,eax
247	mov	eax,DWORD [4+esi]
248	adc	ecx,edx
249	mov	edx,DWORD [20+edi]
250	adc	ebp,0
251	; mul a[1]*b[5]
252	mul	edx
253	add	ebx,eax
254	mov	eax,DWORD [esi]
255	adc	ecx,edx
256	mov	edx,DWORD [24+edi]
257	adc	ebp,0
258	; mul a[0]*b[6]
259	mul	edx
260	add	ebx,eax
261	mov	eax,DWORD [20+esp]
262	adc	ecx,edx
263	mov	edx,DWORD [edi]
264	adc	ebp,0
265	mov	DWORD [24+eax],ebx
266	mov	eax,DWORD [28+esi]
267	; saved r[6]
268	; ################## Calculate word 7
269	xor	ebx,ebx
270	; mul a[7]*b[0]
271	mul	edx
272	add	ecx,eax
273	mov	eax,DWORD [24+esi]
274	adc	ebp,edx
275	mov	edx,DWORD [4+edi]
276	adc	ebx,0
277	; mul a[6]*b[1]
278	mul	edx
279	add	ecx,eax
280	mov	eax,DWORD [20+esi]
281	adc	ebp,edx
282	mov	edx,DWORD [8+edi]
283	adc	ebx,0
284	; mul a[5]*b[2]
285	mul	edx
286	add	ecx,eax
287	mov	eax,DWORD [16+esi]
288	adc	ebp,edx
289	mov	edx,DWORD [12+edi]
290	adc	ebx,0
291	; mul a[4]*b[3]
292	mul	edx
293	add	ecx,eax
294	mov	eax,DWORD [12+esi]
295	adc	ebp,edx
296	mov	edx,DWORD [16+edi]
297	adc	ebx,0
298	; mul a[3]*b[4]
299	mul	edx
300	add	ecx,eax
301	mov	eax,DWORD [8+esi]
302	adc	ebp,edx
303	mov	edx,DWORD [20+edi]
304	adc	ebx,0
305	; mul a[2]*b[5]
306	mul	edx
307	add	ecx,eax
308	mov	eax,DWORD [4+esi]
309	adc	ebp,edx
310	mov	edx,DWORD [24+edi]
311	adc	ebx,0
312	; mul a[1]*b[6]
313	mul	edx
314	add	ecx,eax
315	mov	eax,DWORD [esi]
316	adc	ebp,edx
317	mov	edx,DWORD [28+edi]
318	adc	ebx,0
319	; mul a[0]*b[7]
320	mul	edx
321	add	ecx,eax
322	mov	eax,DWORD [20+esp]
323	adc	ebp,edx
324	mov	edx,DWORD [4+edi]
325	adc	ebx,0
326	mov	DWORD [28+eax],ecx
327	mov	eax,DWORD [28+esi]
328	; saved r[7]
329	; ################## Calculate word 8
330	xor	ecx,ecx
331	; mul a[7]*b[1]
332	mul	edx
333	add	ebp,eax
334	mov	eax,DWORD [24+esi]
335	adc	ebx,edx
336	mov	edx,DWORD [8+edi]
337	adc	ecx,0
338	; mul a[6]*b[2]
339	mul	edx
340	add	ebp,eax
341	mov	eax,DWORD [20+esi]
342	adc	ebx,edx
343	mov	edx,DWORD [12+edi]
344	adc	ecx,0
345	; mul a[5]*b[3]
346	mul	edx
347	add	ebp,eax
348	mov	eax,DWORD [16+esi]
349	adc	ebx,edx
350	mov	edx,DWORD [16+edi]
351	adc	ecx,0
352	; mul a[4]*b[4]
353	mul	edx
354	add	ebp,eax
355	mov	eax,DWORD [12+esi]
356	adc	ebx,edx
357	mov	edx,DWORD [20+edi]
358	adc	ecx,0
359	; mul a[3]*b[5]
360	mul	edx
361	add	ebp,eax
362	mov	eax,DWORD [8+esi]
363	adc	ebx,edx
364	mov	edx,DWORD [24+edi]
365	adc	ecx,0
366	; mul a[2]*b[6]
367	mul	edx
368	add	ebp,eax
369	mov	eax,DWORD [4+esi]
370	adc	ebx,edx
371	mov	edx,DWORD [28+edi]
372	adc	ecx,0
373	; mul a[1]*b[7]
374	mul	edx
375	add	ebp,eax
376	mov	eax,DWORD [20+esp]
377	adc	ebx,edx
378	mov	edx,DWORD [8+edi]
379	adc	ecx,0
380	mov	DWORD [32+eax],ebp
381	mov	eax,DWORD [28+esi]
382	; saved r[8]
383	; ################## Calculate word 9
384	xor	ebp,ebp
385	; mul a[7]*b[2]
386	mul	edx
387	add	ebx,eax
388	mov	eax,DWORD [24+esi]
389	adc	ecx,edx
390	mov	edx,DWORD [12+edi]
391	adc	ebp,0
392	; mul a[6]*b[3]
393	mul	edx
394	add	ebx,eax
395	mov	eax,DWORD [20+esi]
396	adc	ecx,edx
397	mov	edx,DWORD [16+edi]
398	adc	ebp,0
399	; mul a[5]*b[4]
400	mul	edx
401	add	ebx,eax
402	mov	eax,DWORD [16+esi]
403	adc	ecx,edx
404	mov	edx,DWORD [20+edi]
405	adc	ebp,0
406	; mul a[4]*b[5]
407	mul	edx
408	add	ebx,eax
409	mov	eax,DWORD [12+esi]
410	adc	ecx,edx
411	mov	edx,DWORD [24+edi]
412	adc	ebp,0
413	; mul a[3]*b[6]
414	mul	edx
415	add	ebx,eax
416	mov	eax,DWORD [8+esi]
417	adc	ecx,edx
418	mov	edx,DWORD [28+edi]
419	adc	ebp,0
420	; mul a[2]*b[7]
421	mul	edx
422	add	ebx,eax
423	mov	eax,DWORD [20+esp]
424	adc	ecx,edx
425	mov	edx,DWORD [12+edi]
426	adc	ebp,0
427	mov	DWORD [36+eax],ebx
428	mov	eax,DWORD [28+esi]
429	; saved r[9]
430	; ################## Calculate word 10
431	xor	ebx,ebx
432	; mul a[7]*b[3]
433	mul	edx
434	add	ecx,eax
435	mov	eax,DWORD [24+esi]
436	adc	ebp,edx
437	mov	edx,DWORD [16+edi]
438	adc	ebx,0
439	; mul a[6]*b[4]
440	mul	edx
441	add	ecx,eax
442	mov	eax,DWORD [20+esi]
443	adc	ebp,edx
444	mov	edx,DWORD [20+edi]
445	adc	ebx,0
446	; mul a[5]*b[5]
447	mul	edx
448	add	ecx,eax
449	mov	eax,DWORD [16+esi]
450	adc	ebp,edx
451	mov	edx,DWORD [24+edi]
452	adc	ebx,0
453	; mul a[4]*b[6]
454	mul	edx
455	add	ecx,eax
456	mov	eax,DWORD [12+esi]
457	adc	ebp,edx
458	mov	edx,DWORD [28+edi]
459	adc	ebx,0
460	; mul a[3]*b[7]
461	mul	edx
462	add	ecx,eax
463	mov	eax,DWORD [20+esp]
464	adc	ebp,edx
465	mov	edx,DWORD [16+edi]
466	adc	ebx,0
467	mov	DWORD [40+eax],ecx
468	mov	eax,DWORD [28+esi]
469	; saved r[10]
470	; ################## Calculate word 11
471	xor	ecx,ecx
472	; mul a[7]*b[4]
473	mul	edx
474	add	ebp,eax
475	mov	eax,DWORD [24+esi]
476	adc	ebx,edx
477	mov	edx,DWORD [20+edi]
478	adc	ecx,0
479	; mul a[6]*b[5]
480	mul	edx
481	add	ebp,eax
482	mov	eax,DWORD [20+esi]
483	adc	ebx,edx
484	mov	edx,DWORD [24+edi]
485	adc	ecx,0
486	; mul a[5]*b[6]
487	mul	edx
488	add	ebp,eax
489	mov	eax,DWORD [16+esi]
490	adc	ebx,edx
491	mov	edx,DWORD [28+edi]
492	adc	ecx,0
493	; mul a[4]*b[7]
494	mul	edx
495	add	ebp,eax
496	mov	eax,DWORD [20+esp]
497	adc	ebx,edx
498	mov	edx,DWORD [20+edi]
499	adc	ecx,0
500	mov	DWORD [44+eax],ebp
501	mov	eax,DWORD [28+esi]
502	; saved r[11]
503	; ################## Calculate word 12
504	xor	ebp,ebp
505	; mul a[7]*b[5]
506	mul	edx
507	add	ebx,eax
508	mov	eax,DWORD [24+esi]
509	adc	ecx,edx
510	mov	edx,DWORD [24+edi]
511	adc	ebp,0
512	; mul a[6]*b[6]
513	mul	edx
514	add	ebx,eax
515	mov	eax,DWORD [20+esi]
516	adc	ecx,edx
517	mov	edx,DWORD [28+edi]
518	adc	ebp,0
519	; mul a[5]*b[7]
520	mul	edx
521	add	ebx,eax
522	mov	eax,DWORD [20+esp]
523	adc	ecx,edx
524	mov	edx,DWORD [24+edi]
525	adc	ebp,0
526	mov	DWORD [48+eax],ebx
527	mov	eax,DWORD [28+esi]
528	; saved r[12]
529	; ################## Calculate word 13
530	xor	ebx,ebx
531	; mul a[7]*b[6]
532	mul	edx
533	add	ecx,eax
534	mov	eax,DWORD [24+esi]
535	adc	ebp,edx
536	mov	edx,DWORD [28+edi]
537	adc	ebx,0
538	; mul a[6]*b[7]
539	mul	edx
540	add	ecx,eax
541	mov	eax,DWORD [20+esp]
542	adc	ebp,edx
543	mov	edx,DWORD [28+edi]
544	adc	ebx,0
545	mov	DWORD [52+eax],ecx
546	mov	eax,DWORD [28+esi]
547	; saved r[13]
548	; ################## Calculate word 14
549	xor	ecx,ecx
550	; mul a[7]*b[7]
551	mul	edx
552	add	ebp,eax
553	mov	eax,DWORD [20+esp]
554	adc	ebx,edx
555	adc	ecx,0
556	mov	DWORD [56+eax],ebp
557	; saved r[14]
558	; save r[15]
559	mov	DWORD [60+eax],ebx
560	pop	ebx
561	pop	ebp
562	pop	edi
563	pop	esi
564	ret
565global	_bn_mul_comba4
566align	16
567_bn_mul_comba4:
568L$_bn_mul_comba4_begin:
569	push	esi
570	mov	esi,DWORD [12+esp]
571	push	edi
572	mov	edi,DWORD [20+esp]
573	push	ebp
574	push	ebx
575	xor	ebx,ebx
576	mov	eax,DWORD [esi]
577	xor	ecx,ecx
578	mov	edx,DWORD [edi]
579	; ################## Calculate word 0
580	xor	ebp,ebp
581	; mul a[0]*b[0]
582	mul	edx
583	add	ebx,eax
584	mov	eax,DWORD [20+esp]
585	adc	ecx,edx
586	mov	edx,DWORD [edi]
587	adc	ebp,0
588	mov	DWORD [eax],ebx
589	mov	eax,DWORD [4+esi]
590	; saved r[0]
591	; ################## Calculate word 1
592	xor	ebx,ebx
593	; mul a[1]*b[0]
594	mul	edx
595	add	ecx,eax
596	mov	eax,DWORD [esi]
597	adc	ebp,edx
598	mov	edx,DWORD [4+edi]
599	adc	ebx,0
600	; mul a[0]*b[1]
601	mul	edx
602	add	ecx,eax
603	mov	eax,DWORD [20+esp]
604	adc	ebp,edx
605	mov	edx,DWORD [edi]
606	adc	ebx,0
607	mov	DWORD [4+eax],ecx
608	mov	eax,DWORD [8+esi]
609	; saved r[1]
610	; ################## Calculate word 2
611	xor	ecx,ecx
612	; mul a[2]*b[0]
613	mul	edx
614	add	ebp,eax
615	mov	eax,DWORD [4+esi]
616	adc	ebx,edx
617	mov	edx,DWORD [4+edi]
618	adc	ecx,0
619	; mul a[1]*b[1]
620	mul	edx
621	add	ebp,eax
622	mov	eax,DWORD [esi]
623	adc	ebx,edx
624	mov	edx,DWORD [8+edi]
625	adc	ecx,0
626	; mul a[0]*b[2]
627	mul	edx
628	add	ebp,eax
629	mov	eax,DWORD [20+esp]
630	adc	ebx,edx
631	mov	edx,DWORD [edi]
632	adc	ecx,0
633	mov	DWORD [8+eax],ebp
634	mov	eax,DWORD [12+esi]
635	; saved r[2]
636	; ################## Calculate word 3
637	xor	ebp,ebp
638	; mul a[3]*b[0]
639	mul	edx
640	add	ebx,eax
641	mov	eax,DWORD [8+esi]
642	adc	ecx,edx
643	mov	edx,DWORD [4+edi]
644	adc	ebp,0
645	; mul a[2]*b[1]
646	mul	edx
647	add	ebx,eax
648	mov	eax,DWORD [4+esi]
649	adc	ecx,edx
650	mov	edx,DWORD [8+edi]
651	adc	ebp,0
652	; mul a[1]*b[2]
653	mul	edx
654	add	ebx,eax
655	mov	eax,DWORD [esi]
656	adc	ecx,edx
657	mov	edx,DWORD [12+edi]
658	adc	ebp,0
659	; mul a[0]*b[3]
660	mul	edx
661	add	ebx,eax
662	mov	eax,DWORD [20+esp]
663	adc	ecx,edx
664	mov	edx,DWORD [4+edi]
665	adc	ebp,0
666	mov	DWORD [12+eax],ebx
667	mov	eax,DWORD [12+esi]
668	; saved r[3]
669	; ################## Calculate word 4
670	xor	ebx,ebx
671	; mul a[3]*b[1]
672	mul	edx
673	add	ecx,eax
674	mov	eax,DWORD [8+esi]
675	adc	ebp,edx
676	mov	edx,DWORD [8+edi]
677	adc	ebx,0
678	; mul a[2]*b[2]
679	mul	edx
680	add	ecx,eax
681	mov	eax,DWORD [4+esi]
682	adc	ebp,edx
683	mov	edx,DWORD [12+edi]
684	adc	ebx,0
685	; mul a[1]*b[3]
686	mul	edx
687	add	ecx,eax
688	mov	eax,DWORD [20+esp]
689	adc	ebp,edx
690	mov	edx,DWORD [8+edi]
691	adc	ebx,0
692	mov	DWORD [16+eax],ecx
693	mov	eax,DWORD [12+esi]
694	; saved r[4]
695	; ################## Calculate word 5
696	xor	ecx,ecx
697	; mul a[3]*b[2]
698	mul	edx
699	add	ebp,eax
700	mov	eax,DWORD [8+esi]
701	adc	ebx,edx
702	mov	edx,DWORD [12+edi]
703	adc	ecx,0
704	; mul a[2]*b[3]
705	mul	edx
706	add	ebp,eax
707	mov	eax,DWORD [20+esp]
708	adc	ebx,edx
709	mov	edx,DWORD [12+edi]
710	adc	ecx,0
711	mov	DWORD [20+eax],ebp
712	mov	eax,DWORD [12+esi]
713	; saved r[5]
714	; ################## Calculate word 6
715	xor	ebp,ebp
716	; mul a[3]*b[3]
717	mul	edx
718	add	ebx,eax
719	mov	eax,DWORD [20+esp]
720	adc	ecx,edx
721	adc	ebp,0
722	mov	DWORD [24+eax],ebx
723	; saved r[6]
724	; save r[7]
725	mov	DWORD [28+eax],ecx
726	pop	ebx
727	pop	ebp
728	pop	edi
729	pop	esi
730	ret
731global	_bn_sqr_comba8
732align	16
733_bn_sqr_comba8:
734L$_bn_sqr_comba8_begin:
735	push	esi
736	push	edi
737	push	ebp
738	push	ebx
739	mov	edi,DWORD [20+esp]
740	mov	esi,DWORD [24+esp]
741	xor	ebx,ebx
742	xor	ecx,ecx
743	mov	eax,DWORD [esi]
744	; ############### Calculate word 0
745	xor	ebp,ebp
746	; sqr a[0]*a[0]
747	mul	eax
748	add	ebx,eax
749	adc	ecx,edx
750	mov	edx,DWORD [esi]
751	adc	ebp,0
752	mov	DWORD [edi],ebx
753	mov	eax,DWORD [4+esi]
754	; saved r[0]
755	; ############### Calculate word 1
756	xor	ebx,ebx
757	; sqr a[1]*a[0]
758	mul	edx
759	add	eax,eax
760	adc	edx,edx
761	adc	ebx,0
762	add	ecx,eax
763	adc	ebp,edx
764	mov	eax,DWORD [8+esi]
765	adc	ebx,0
766	mov	DWORD [4+edi],ecx
767	mov	edx,DWORD [esi]
768	; saved r[1]
769	; ############### Calculate word 2
770	xor	ecx,ecx
771	; sqr a[2]*a[0]
772	mul	edx
773	add	eax,eax
774	adc	edx,edx
775	adc	ecx,0
776	add	ebp,eax
777	adc	ebx,edx
778	mov	eax,DWORD [4+esi]
779	adc	ecx,0
780	; sqr a[1]*a[1]
781	mul	eax
782	add	ebp,eax
783	adc	ebx,edx
784	mov	edx,DWORD [esi]
785	adc	ecx,0
786	mov	DWORD [8+edi],ebp
787	mov	eax,DWORD [12+esi]
788	; saved r[2]
789	; ############### Calculate word 3
790	xor	ebp,ebp
791	; sqr a[3]*a[0]
792	mul	edx
793	add	eax,eax
794	adc	edx,edx
795	adc	ebp,0
796	add	ebx,eax
797	adc	ecx,edx
798	mov	eax,DWORD [8+esi]
799	adc	ebp,0
800	mov	edx,DWORD [4+esi]
801	; sqr a[2]*a[1]
802	mul	edx
803	add	eax,eax
804	adc	edx,edx
805	adc	ebp,0
806	add	ebx,eax
807	adc	ecx,edx
808	mov	eax,DWORD [16+esi]
809	adc	ebp,0
810	mov	DWORD [12+edi],ebx
811	mov	edx,DWORD [esi]
812	; saved r[3]
813	; ############### Calculate word 4
814	xor	ebx,ebx
815	; sqr a[4]*a[0]
816	mul	edx
817	add	eax,eax
818	adc	edx,edx
819	adc	ebx,0
820	add	ecx,eax
821	adc	ebp,edx
822	mov	eax,DWORD [12+esi]
823	adc	ebx,0
824	mov	edx,DWORD [4+esi]
825	; sqr a[3]*a[1]
826	mul	edx
827	add	eax,eax
828	adc	edx,edx
829	adc	ebx,0
830	add	ecx,eax
831	adc	ebp,edx
832	mov	eax,DWORD [8+esi]
833	adc	ebx,0
834	; sqr a[2]*a[2]
835	mul	eax
836	add	ecx,eax
837	adc	ebp,edx
838	mov	edx,DWORD [esi]
839	adc	ebx,0
840	mov	DWORD [16+edi],ecx
841	mov	eax,DWORD [20+esi]
842	; saved r[4]
843	; ############### Calculate word 5
844	xor	ecx,ecx
845	; sqr a[5]*a[0]
846	mul	edx
847	add	eax,eax
848	adc	edx,edx
849	adc	ecx,0
850	add	ebp,eax
851	adc	ebx,edx
852	mov	eax,DWORD [16+esi]
853	adc	ecx,0
854	mov	edx,DWORD [4+esi]
855	; sqr a[4]*a[1]
856	mul	edx
857	add	eax,eax
858	adc	edx,edx
859	adc	ecx,0
860	add	ebp,eax
861	adc	ebx,edx
862	mov	eax,DWORD [12+esi]
863	adc	ecx,0
864	mov	edx,DWORD [8+esi]
865	; sqr a[3]*a[2]
866	mul	edx
867	add	eax,eax
868	adc	edx,edx
869	adc	ecx,0
870	add	ebp,eax
871	adc	ebx,edx
872	mov	eax,DWORD [24+esi]
873	adc	ecx,0
874	mov	DWORD [20+edi],ebp
875	mov	edx,DWORD [esi]
876	; saved r[5]
877	; ############### Calculate word 6
878	xor	ebp,ebp
879	; sqr a[6]*a[0]
880	mul	edx
881	add	eax,eax
882	adc	edx,edx
883	adc	ebp,0
884	add	ebx,eax
885	adc	ecx,edx
886	mov	eax,DWORD [20+esi]
887	adc	ebp,0
888	mov	edx,DWORD [4+esi]
889	; sqr a[5]*a[1]
890	mul	edx
891	add	eax,eax
892	adc	edx,edx
893	adc	ebp,0
894	add	ebx,eax
895	adc	ecx,edx
896	mov	eax,DWORD [16+esi]
897	adc	ebp,0
898	mov	edx,DWORD [8+esi]
899	; sqr a[4]*a[2]
900	mul	edx
901	add	eax,eax
902	adc	edx,edx
903	adc	ebp,0
904	add	ebx,eax
905	adc	ecx,edx
906	mov	eax,DWORD [12+esi]
907	adc	ebp,0
908	; sqr a[3]*a[3]
909	mul	eax
910	add	ebx,eax
911	adc	ecx,edx
912	mov	edx,DWORD [esi]
913	adc	ebp,0
914	mov	DWORD [24+edi],ebx
915	mov	eax,DWORD [28+esi]
916	; saved r[6]
917	; ############### Calculate word 7
918	xor	ebx,ebx
919	; sqr a[7]*a[0]
920	mul	edx
921	add	eax,eax
922	adc	edx,edx
923	adc	ebx,0
924	add	ecx,eax
925	adc	ebp,edx
926	mov	eax,DWORD [24+esi]
927	adc	ebx,0
928	mov	edx,DWORD [4+esi]
929	; sqr a[6]*a[1]
930	mul	edx
931	add	eax,eax
932	adc	edx,edx
933	adc	ebx,0
934	add	ecx,eax
935	adc	ebp,edx
936	mov	eax,DWORD [20+esi]
937	adc	ebx,0
938	mov	edx,DWORD [8+esi]
939	; sqr a[5]*a[2]
940	mul	edx
941	add	eax,eax
942	adc	edx,edx
943	adc	ebx,0
944	add	ecx,eax
945	adc	ebp,edx
946	mov	eax,DWORD [16+esi]
947	adc	ebx,0
948	mov	edx,DWORD [12+esi]
949	; sqr a[4]*a[3]
950	mul	edx
951	add	eax,eax
952	adc	edx,edx
953	adc	ebx,0
954	add	ecx,eax
955	adc	ebp,edx
956	mov	eax,DWORD [28+esi]
957	adc	ebx,0
958	mov	DWORD [28+edi],ecx
959	mov	edx,DWORD [4+esi]
960	; saved r[7]
961	; ############### Calculate word 8
962	xor	ecx,ecx
963	; sqr a[7]*a[1]
964	mul	edx
965	add	eax,eax
966	adc	edx,edx
967	adc	ecx,0
968	add	ebp,eax
969	adc	ebx,edx
970	mov	eax,DWORD [24+esi]
971	adc	ecx,0
972	mov	edx,DWORD [8+esi]
973	; sqr a[6]*a[2]
974	mul	edx
975	add	eax,eax
976	adc	edx,edx
977	adc	ecx,0
978	add	ebp,eax
979	adc	ebx,edx
980	mov	eax,DWORD [20+esi]
981	adc	ecx,0
982	mov	edx,DWORD [12+esi]
983	; sqr a[5]*a[3]
984	mul	edx
985	add	eax,eax
986	adc	edx,edx
987	adc	ecx,0
988	add	ebp,eax
989	adc	ebx,edx
990	mov	eax,DWORD [16+esi]
991	adc	ecx,0
992	; sqr a[4]*a[4]
993	mul	eax
994	add	ebp,eax
995	adc	ebx,edx
996	mov	edx,DWORD [8+esi]
997	adc	ecx,0
998	mov	DWORD [32+edi],ebp
999	mov	eax,DWORD [28+esi]
1000	; saved r[8]
1001	; ############### Calculate word 9
1002	xor	ebp,ebp
1003	; sqr a[7]*a[2]
1004	mul	edx
1005	add	eax,eax
1006	adc	edx,edx
1007	adc	ebp,0
1008	add	ebx,eax
1009	adc	ecx,edx
1010	mov	eax,DWORD [24+esi]
1011	adc	ebp,0
1012	mov	edx,DWORD [12+esi]
1013	; sqr a[6]*a[3]
1014	mul	edx
1015	add	eax,eax
1016	adc	edx,edx
1017	adc	ebp,0
1018	add	ebx,eax
1019	adc	ecx,edx
1020	mov	eax,DWORD [20+esi]
1021	adc	ebp,0
1022	mov	edx,DWORD [16+esi]
1023	; sqr a[5]*a[4]
1024	mul	edx
1025	add	eax,eax
1026	adc	edx,edx
1027	adc	ebp,0
1028	add	ebx,eax
1029	adc	ecx,edx
1030	mov	eax,DWORD [28+esi]
1031	adc	ebp,0
1032	mov	DWORD [36+edi],ebx
1033	mov	edx,DWORD [12+esi]
1034	; saved r[9]
1035	; ############### Calculate word 10
1036	xor	ebx,ebx
1037	; sqr a[7]*a[3]
1038	mul	edx
1039	add	eax,eax
1040	adc	edx,edx
1041	adc	ebx,0
1042	add	ecx,eax
1043	adc	ebp,edx
1044	mov	eax,DWORD [24+esi]
1045	adc	ebx,0
1046	mov	edx,DWORD [16+esi]
1047	; sqr a[6]*a[4]
1048	mul	edx
1049	add	eax,eax
1050	adc	edx,edx
1051	adc	ebx,0
1052	add	ecx,eax
1053	adc	ebp,edx
1054	mov	eax,DWORD [20+esi]
1055	adc	ebx,0
1056	; sqr a[5]*a[5]
1057	mul	eax
1058	add	ecx,eax
1059	adc	ebp,edx
1060	mov	edx,DWORD [16+esi]
1061	adc	ebx,0
1062	mov	DWORD [40+edi],ecx
1063	mov	eax,DWORD [28+esi]
1064	; saved r[10]
1065	; ############### Calculate word 11
1066	xor	ecx,ecx
1067	; sqr a[7]*a[4]
1068	mul	edx
1069	add	eax,eax
1070	adc	edx,edx
1071	adc	ecx,0
1072	add	ebp,eax
1073	adc	ebx,edx
1074	mov	eax,DWORD [24+esi]
1075	adc	ecx,0
1076	mov	edx,DWORD [20+esi]
1077	; sqr a[6]*a[5]
1078	mul	edx
1079	add	eax,eax
1080	adc	edx,edx
1081	adc	ecx,0
1082	add	ebp,eax
1083	adc	ebx,edx
1084	mov	eax,DWORD [28+esi]
1085	adc	ecx,0
1086	mov	DWORD [44+edi],ebp
1087	mov	edx,DWORD [20+esi]
1088	; saved r[11]
1089	; ############### Calculate word 12
1090	xor	ebp,ebp
1091	; sqr a[7]*a[5]
1092	mul	edx
1093	add	eax,eax
1094	adc	edx,edx
1095	adc	ebp,0
1096	add	ebx,eax
1097	adc	ecx,edx
1098	mov	eax,DWORD [24+esi]
1099	adc	ebp,0
1100	; sqr a[6]*a[6]
1101	mul	eax
1102	add	ebx,eax
1103	adc	ecx,edx
1104	mov	edx,DWORD [24+esi]
1105	adc	ebp,0
1106	mov	DWORD [48+edi],ebx
1107	mov	eax,DWORD [28+esi]
1108	; saved r[12]
1109	; ############### Calculate word 13
1110	xor	ebx,ebx
1111	; sqr a[7]*a[6]
1112	mul	edx
1113	add	eax,eax
1114	adc	edx,edx
1115	adc	ebx,0
1116	add	ecx,eax
1117	adc	ebp,edx
1118	mov	eax,DWORD [28+esi]
1119	adc	ebx,0
1120	mov	DWORD [52+edi],ecx
1121	; saved r[13]
1122	; ############### Calculate word 14
1123	xor	ecx,ecx
1124	; sqr a[7]*a[7]
1125	mul	eax
1126	add	ebp,eax
1127	adc	ebx,edx
1128	adc	ecx,0
1129	mov	DWORD [56+edi],ebp
1130	; saved r[14]
1131	mov	DWORD [60+edi],ebx
1132	pop	ebx
1133	pop	ebp
1134	pop	edi
1135	pop	esi
1136	ret
1137global	_bn_sqr_comba4
1138align	16
1139_bn_sqr_comba4:
1140L$_bn_sqr_comba4_begin:
1141	push	esi
1142	push	edi
1143	push	ebp
1144	push	ebx
1145	mov	edi,DWORD [20+esp]
1146	mov	esi,DWORD [24+esp]
1147	xor	ebx,ebx
1148	xor	ecx,ecx
1149	mov	eax,DWORD [esi]
1150	; ############### Calculate word 0
1151	xor	ebp,ebp
1152	; sqr a[0]*a[0]
1153	mul	eax
1154	add	ebx,eax
1155	adc	ecx,edx
1156	mov	edx,DWORD [esi]
1157	adc	ebp,0
1158	mov	DWORD [edi],ebx
1159	mov	eax,DWORD [4+esi]
1160	; saved r[0]
1161	; ############### Calculate word 1
1162	xor	ebx,ebx
1163	; sqr a[1]*a[0]
1164	mul	edx
1165	add	eax,eax
1166	adc	edx,edx
1167	adc	ebx,0
1168	add	ecx,eax
1169	adc	ebp,edx
1170	mov	eax,DWORD [8+esi]
1171	adc	ebx,0
1172	mov	DWORD [4+edi],ecx
1173	mov	edx,DWORD [esi]
1174	; saved r[1]
1175	; ############### Calculate word 2
1176	xor	ecx,ecx
1177	; sqr a[2]*a[0]
1178	mul	edx
1179	add	eax,eax
1180	adc	edx,edx
1181	adc	ecx,0
1182	add	ebp,eax
1183	adc	ebx,edx
1184	mov	eax,DWORD [4+esi]
1185	adc	ecx,0
1186	; sqr a[1]*a[1]
1187	mul	eax
1188	add	ebp,eax
1189	adc	ebx,edx
1190	mov	edx,DWORD [esi]
1191	adc	ecx,0
1192	mov	DWORD [8+edi],ebp
1193	mov	eax,DWORD [12+esi]
1194	; saved r[2]
1195	; ############### Calculate word 3
1196	xor	ebp,ebp
1197	; sqr a[3]*a[0]
1198	mul	edx
1199	add	eax,eax
1200	adc	edx,edx
1201	adc	ebp,0
1202	add	ebx,eax
1203	adc	ecx,edx
1204	mov	eax,DWORD [8+esi]
1205	adc	ebp,0
1206	mov	edx,DWORD [4+esi]
1207	; sqr a[2]*a[1]
1208	mul	edx
1209	add	eax,eax
1210	adc	edx,edx
1211	adc	ebp,0
1212	add	ebx,eax
1213	adc	ecx,edx
1214	mov	eax,DWORD [12+esi]
1215	adc	ebp,0
1216	mov	DWORD [12+edi],ebx
1217	mov	edx,DWORD [4+esi]
1218	; saved r[3]
1219	; ############### Calculate word 4
1220	xor	ebx,ebx
1221	; sqr a[3]*a[1]
1222	mul	edx
1223	add	eax,eax
1224	adc	edx,edx
1225	adc	ebx,0
1226	add	ecx,eax
1227	adc	ebp,edx
1228	mov	eax,DWORD [8+esi]
1229	adc	ebx,0
1230	; sqr a[2]*a[2]
1231	mul	eax
1232	add	ecx,eax
1233	adc	ebp,edx
1234	mov	edx,DWORD [8+esi]
1235	adc	ebx,0
1236	mov	DWORD [16+edi],ecx
1237	mov	eax,DWORD [12+esi]
1238	; saved r[4]
1239	; ############### Calculate word 5
1240	xor	ecx,ecx
1241	; sqr a[3]*a[2]
1242	mul	edx
1243	add	eax,eax
1244	adc	edx,edx
1245	adc	ecx,0
1246	add	ebp,eax
1247	adc	ebx,edx
1248	mov	eax,DWORD [12+esi]
1249	adc	ecx,0
1250	mov	DWORD [20+edi],ebp
1251	; saved r[5]
1252	; ############### Calculate word 6
1253	xor	ebp,ebp
1254	; sqr a[3]*a[3]
1255	mul	eax
1256	add	ebx,eax
1257	adc	ecx,edx
1258	adc	ebp,0
1259	mov	DWORD [24+edi],ebx
1260	; saved r[6]
1261	mov	DWORD [28+edi],ecx
1262	pop	ebx
1263	pop	ebp
1264	pop	edi
1265	pop	esi
1266	ret
1267