1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__,obj
8section	code	use32 class=code align=64
9%elifidn __OUTPUT_FORMAT__,win32
10%ifdef __YASM_VERSION_ID__
11%if __YASM_VERSION_ID__ < 01010000h
12%error yasm version 1.1.0 or later needed.
13%endif
14; Yasm automatically includes .00 and complains about redefining it.
15; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
16%else
17$@feat.00 equ 1
18%endif
19section	.text	code align=64
20%else
21section	.text	code
22%endif
23;extern	_OPENSSL_ia32cap_P
24global	_bn_mul_mont
25align	16
26_bn_mul_mont:
27L$_bn_mul_mont_begin:
28	push	ebp
29	push	ebx
30	push	esi
31	push	edi
32	xor	eax,eax
33	mov	edi,DWORD [40+esp]
34	cmp	edi,4
35	jl	NEAR L$000just_leave
36	lea	esi,[20+esp]
37	lea	edx,[24+esp]
38	add	edi,2
39	neg	edi
40	lea	ebp,[edi*4+esp-32]
41	neg	edi
42	mov	eax,ebp
43	sub	eax,edx
44	and	eax,2047
45	sub	ebp,eax
46	xor	edx,ebp
47	and	edx,2048
48	xor	edx,2048
49	sub	ebp,edx
50	and	ebp,-64
51	mov	eax,esp
52	sub	eax,ebp
53	and	eax,-4096
54	mov	edx,esp
55	lea	esp,[eax*1+ebp]
56	mov	eax,DWORD [esp]
57	cmp	esp,ebp
58	ja	NEAR L$001page_walk
59	jmp	NEAR L$002page_walk_done
60align	16
61L$001page_walk:
62	lea	esp,[esp-4096]
63	mov	eax,DWORD [esp]
64	cmp	esp,ebp
65	ja	NEAR L$001page_walk
66L$002page_walk_done:
67	mov	eax,DWORD [esi]
68	mov	ebx,DWORD [4+esi]
69	mov	ecx,DWORD [8+esi]
70	mov	ebp,DWORD [12+esi]
71	mov	esi,DWORD [16+esi]
72	mov	esi,DWORD [esi]
73	mov	DWORD [4+esp],eax
74	mov	DWORD [8+esp],ebx
75	mov	DWORD [12+esp],ecx
76	mov	DWORD [16+esp],ebp
77	mov	DWORD [20+esp],esi
78	lea	ebx,[edi-3]
79	mov	DWORD [24+esp],edx
80	lea	eax,[_OPENSSL_ia32cap_P]
81	bt	DWORD [eax],26
82	jnc	NEAR L$003non_sse2
83	mov	eax,-1
84	movd	mm7,eax
85	mov	esi,DWORD [8+esp]
86	mov	edi,DWORD [12+esp]
87	mov	ebp,DWORD [16+esp]
88	xor	edx,edx
89	xor	ecx,ecx
90	movd	mm4,DWORD [edi]
91	movd	mm5,DWORD [esi]
92	movd	mm3,DWORD [ebp]
93	pmuludq	mm5,mm4
94	movq	mm2,mm5
95	movq	mm0,mm5
96	pand	mm0,mm7
97	pmuludq	mm5,[20+esp]
98	pmuludq	mm3,mm5
99	paddq	mm3,mm0
100	movd	mm1,DWORD [4+ebp]
101	movd	mm0,DWORD [4+esi]
102	psrlq	mm2,32
103	psrlq	mm3,32
104	inc	ecx
105align	16
106L$0041st:
107	pmuludq	mm0,mm4
108	pmuludq	mm1,mm5
109	paddq	mm2,mm0
110	paddq	mm3,mm1
111	movq	mm0,mm2
112	pand	mm0,mm7
113	movd	mm1,DWORD [4+ecx*4+ebp]
114	paddq	mm3,mm0
115	movd	mm0,DWORD [4+ecx*4+esi]
116	psrlq	mm2,32
117	movd	DWORD [28+ecx*4+esp],mm3
118	psrlq	mm3,32
119	lea	ecx,[1+ecx]
120	cmp	ecx,ebx
121	jl	NEAR L$0041st
122	pmuludq	mm0,mm4
123	pmuludq	mm1,mm5
124	paddq	mm2,mm0
125	paddq	mm3,mm1
126	movq	mm0,mm2
127	pand	mm0,mm7
128	paddq	mm3,mm0
129	movd	DWORD [28+ecx*4+esp],mm3
130	psrlq	mm2,32
131	psrlq	mm3,32
132	paddq	mm3,mm2
133	movq	[32+ebx*4+esp],mm3
134	inc	edx
135L$005outer:
136	xor	ecx,ecx
137	movd	mm4,DWORD [edx*4+edi]
138	movd	mm5,DWORD [esi]
139	movd	mm6,DWORD [32+esp]
140	movd	mm3,DWORD [ebp]
141	pmuludq	mm5,mm4
142	paddq	mm5,mm6
143	movq	mm0,mm5
144	movq	mm2,mm5
145	pand	mm0,mm7
146	pmuludq	mm5,[20+esp]
147	pmuludq	mm3,mm5
148	paddq	mm3,mm0
149	movd	mm6,DWORD [36+esp]
150	movd	mm1,DWORD [4+ebp]
151	movd	mm0,DWORD [4+esi]
152	psrlq	mm2,32
153	psrlq	mm3,32
154	paddq	mm2,mm6
155	inc	ecx
156	dec	ebx
157L$006inner:
158	pmuludq	mm0,mm4
159	pmuludq	mm1,mm5
160	paddq	mm2,mm0
161	paddq	mm3,mm1
162	movq	mm0,mm2
163	movd	mm6,DWORD [36+ecx*4+esp]
164	pand	mm0,mm7
165	movd	mm1,DWORD [4+ecx*4+ebp]
166	paddq	mm3,mm0
167	movd	mm0,DWORD [4+ecx*4+esi]
168	psrlq	mm2,32
169	movd	DWORD [28+ecx*4+esp],mm3
170	psrlq	mm3,32
171	paddq	mm2,mm6
172	dec	ebx
173	lea	ecx,[1+ecx]
174	jnz	NEAR L$006inner
175	mov	ebx,ecx
176	pmuludq	mm0,mm4
177	pmuludq	mm1,mm5
178	paddq	mm2,mm0
179	paddq	mm3,mm1
180	movq	mm0,mm2
181	pand	mm0,mm7
182	paddq	mm3,mm0
183	movd	DWORD [28+ecx*4+esp],mm3
184	psrlq	mm2,32
185	psrlq	mm3,32
186	movd	mm6,DWORD [36+ebx*4+esp]
187	paddq	mm3,mm2
188	paddq	mm3,mm6
189	movq	[32+ebx*4+esp],mm3
190	lea	edx,[1+edx]
191	cmp	edx,ebx
192	jle	NEAR L$005outer
193	emms
194	jmp	NEAR L$007common_tail
195align	16
196L$003non_sse2:
197	mov	esi,DWORD [8+esp]
198	lea	ebp,[1+ebx]
199	mov	edi,DWORD [12+esp]
200	xor	ecx,ecx
201	mov	edx,esi
202	and	ebp,1
203	sub	edx,edi
204	lea	eax,[4+ebx*4+edi]
205	or	ebp,edx
206	mov	edi,DWORD [edi]
207	jz	NEAR L$008bn_sqr_mont
208	mov	DWORD [28+esp],eax
209	mov	eax,DWORD [esi]
210	xor	edx,edx
211align	16
212L$009mull:
213	mov	ebp,edx
214	mul	edi
215	add	ebp,eax
216	lea	ecx,[1+ecx]
217	adc	edx,0
218	mov	eax,DWORD [ecx*4+esi]
219	cmp	ecx,ebx
220	mov	DWORD [28+ecx*4+esp],ebp
221	jl	NEAR L$009mull
222	mov	ebp,edx
223	mul	edi
224	mov	edi,DWORD [20+esp]
225	add	eax,ebp
226	mov	esi,DWORD [16+esp]
227	adc	edx,0
228	imul	edi,DWORD [32+esp]
229	mov	DWORD [32+ebx*4+esp],eax
230	xor	ecx,ecx
231	mov	DWORD [36+ebx*4+esp],edx
232	mov	DWORD [40+ebx*4+esp],ecx
233	mov	eax,DWORD [esi]
234	mul	edi
235	add	eax,DWORD [32+esp]
236	mov	eax,DWORD [4+esi]
237	adc	edx,0
238	inc	ecx
239	jmp	NEAR L$0102ndmadd
240align	16
241L$0111stmadd:
242	mov	ebp,edx
243	mul	edi
244	add	ebp,DWORD [32+ecx*4+esp]
245	lea	ecx,[1+ecx]
246	adc	edx,0
247	add	ebp,eax
248	mov	eax,DWORD [ecx*4+esi]
249	adc	edx,0
250	cmp	ecx,ebx
251	mov	DWORD [28+ecx*4+esp],ebp
252	jl	NEAR L$0111stmadd
253	mov	ebp,edx
254	mul	edi
255	add	eax,DWORD [32+ebx*4+esp]
256	mov	edi,DWORD [20+esp]
257	adc	edx,0
258	mov	esi,DWORD [16+esp]
259	add	ebp,eax
260	adc	edx,0
261	imul	edi,DWORD [32+esp]
262	xor	ecx,ecx
263	add	edx,DWORD [36+ebx*4+esp]
264	mov	DWORD [32+ebx*4+esp],ebp
265	adc	ecx,0
266	mov	eax,DWORD [esi]
267	mov	DWORD [36+ebx*4+esp],edx
268	mov	DWORD [40+ebx*4+esp],ecx
269	mul	edi
270	add	eax,DWORD [32+esp]
271	mov	eax,DWORD [4+esi]
272	adc	edx,0
273	mov	ecx,1
274align	16
275L$0102ndmadd:
276	mov	ebp,edx
277	mul	edi
278	add	ebp,DWORD [32+ecx*4+esp]
279	lea	ecx,[1+ecx]
280	adc	edx,0
281	add	ebp,eax
282	mov	eax,DWORD [ecx*4+esi]
283	adc	edx,0
284	cmp	ecx,ebx
285	mov	DWORD [24+ecx*4+esp],ebp
286	jl	NEAR L$0102ndmadd
287	mov	ebp,edx
288	mul	edi
289	add	ebp,DWORD [32+ebx*4+esp]
290	adc	edx,0
291	add	ebp,eax
292	adc	edx,0
293	mov	DWORD [28+ebx*4+esp],ebp
294	xor	eax,eax
295	mov	ecx,DWORD [12+esp]
296	add	edx,DWORD [36+ebx*4+esp]
297	adc	eax,DWORD [40+ebx*4+esp]
298	lea	ecx,[4+ecx]
299	mov	DWORD [32+ebx*4+esp],edx
300	cmp	ecx,DWORD [28+esp]
301	mov	DWORD [36+ebx*4+esp],eax
302	je	NEAR L$007common_tail
303	mov	edi,DWORD [ecx]
304	mov	esi,DWORD [8+esp]
305	mov	DWORD [12+esp],ecx
306	xor	ecx,ecx
307	xor	edx,edx
308	mov	eax,DWORD [esi]
309	jmp	NEAR L$0111stmadd
310align	16
311L$008bn_sqr_mont:
312	mov	DWORD [esp],ebx
313	mov	DWORD [12+esp],ecx
314	mov	eax,edi
315	mul	edi
316	mov	DWORD [32+esp],eax
317	mov	ebx,edx
318	shr	edx,1
319	and	ebx,1
320	inc	ecx
321align	16
322L$012sqr:
323	mov	eax,DWORD [ecx*4+esi]
324	mov	ebp,edx
325	mul	edi
326	add	eax,ebp
327	lea	ecx,[1+ecx]
328	adc	edx,0
329	lea	ebp,[eax*2+ebx]
330	shr	eax,31
331	cmp	ecx,DWORD [esp]
332	mov	ebx,eax
333	mov	DWORD [28+ecx*4+esp],ebp
334	jl	NEAR L$012sqr
335	mov	eax,DWORD [ecx*4+esi]
336	mov	ebp,edx
337	mul	edi
338	add	eax,ebp
339	mov	edi,DWORD [20+esp]
340	adc	edx,0
341	mov	esi,DWORD [16+esp]
342	lea	ebp,[eax*2+ebx]
343	imul	edi,DWORD [32+esp]
344	shr	eax,31
345	mov	DWORD [32+ecx*4+esp],ebp
346	lea	ebp,[edx*2+eax]
347	mov	eax,DWORD [esi]
348	shr	edx,31
349	mov	DWORD [36+ecx*4+esp],ebp
350	mov	DWORD [40+ecx*4+esp],edx
351	mul	edi
352	add	eax,DWORD [32+esp]
353	mov	ebx,ecx
354	adc	edx,0
355	mov	eax,DWORD [4+esi]
356	mov	ecx,1
357align	16
358L$0133rdmadd:
359	mov	ebp,edx
360	mul	edi
361	add	ebp,DWORD [32+ecx*4+esp]
362	adc	edx,0
363	add	ebp,eax
364	mov	eax,DWORD [4+ecx*4+esi]
365	adc	edx,0
366	mov	DWORD [28+ecx*4+esp],ebp
367	mov	ebp,edx
368	mul	edi
369	add	ebp,DWORD [36+ecx*4+esp]
370	lea	ecx,[2+ecx]
371	adc	edx,0
372	add	ebp,eax
373	mov	eax,DWORD [ecx*4+esi]
374	adc	edx,0
375	cmp	ecx,ebx
376	mov	DWORD [24+ecx*4+esp],ebp
377	jl	NEAR L$0133rdmadd
378	mov	ebp,edx
379	mul	edi
380	add	ebp,DWORD [32+ebx*4+esp]
381	adc	edx,0
382	add	ebp,eax
383	adc	edx,0
384	mov	DWORD [28+ebx*4+esp],ebp
385	mov	ecx,DWORD [12+esp]
386	xor	eax,eax
387	mov	esi,DWORD [8+esp]
388	add	edx,DWORD [36+ebx*4+esp]
389	adc	eax,DWORD [40+ebx*4+esp]
390	mov	DWORD [32+ebx*4+esp],edx
391	cmp	ecx,ebx
392	mov	DWORD [36+ebx*4+esp],eax
393	je	NEAR L$007common_tail
394	mov	edi,DWORD [4+ecx*4+esi]
395	lea	ecx,[1+ecx]
396	mov	eax,edi
397	mov	DWORD [12+esp],ecx
398	mul	edi
399	add	eax,DWORD [32+ecx*4+esp]
400	adc	edx,0
401	mov	DWORD [32+ecx*4+esp],eax
402	xor	ebp,ebp
403	cmp	ecx,ebx
404	lea	ecx,[1+ecx]
405	je	NEAR L$014sqrlast
406	mov	ebx,edx
407	shr	edx,1
408	and	ebx,1
409align	16
410L$015sqradd:
411	mov	eax,DWORD [ecx*4+esi]
412	mov	ebp,edx
413	mul	edi
414	add	eax,ebp
415	lea	ebp,[eax*1+eax]
416	adc	edx,0
417	shr	eax,31
418	add	ebp,DWORD [32+ecx*4+esp]
419	lea	ecx,[1+ecx]
420	adc	eax,0
421	add	ebp,ebx
422	adc	eax,0
423	cmp	ecx,DWORD [esp]
424	mov	DWORD [28+ecx*4+esp],ebp
425	mov	ebx,eax
426	jle	NEAR L$015sqradd
427	mov	ebp,edx
428	add	edx,edx
429	shr	ebp,31
430	add	edx,ebx
431	adc	ebp,0
432L$014sqrlast:
433	mov	edi,DWORD [20+esp]
434	mov	esi,DWORD [16+esp]
435	imul	edi,DWORD [32+esp]
436	add	edx,DWORD [32+ecx*4+esp]
437	mov	eax,DWORD [esi]
438	adc	ebp,0
439	mov	DWORD [32+ecx*4+esp],edx
440	mov	DWORD [36+ecx*4+esp],ebp
441	mul	edi
442	add	eax,DWORD [32+esp]
443	lea	ebx,[ecx-1]
444	adc	edx,0
445	mov	ecx,1
446	mov	eax,DWORD [4+esi]
447	jmp	NEAR L$0133rdmadd
448align	16
449L$007common_tail:
450	mov	ebp,DWORD [16+esp]
451	mov	edi,DWORD [4+esp]
452	lea	esi,[32+esp]
453	mov	eax,DWORD [esi]
454	mov	ecx,ebx
455	xor	edx,edx
456align	16
457L$016sub:
458	sbb	eax,DWORD [edx*4+ebp]
459	mov	DWORD [edx*4+edi],eax
460	dec	ecx
461	mov	eax,DWORD [4+edx*4+esi]
462	lea	edx,[1+edx]
463	jge	NEAR L$016sub
464	sbb	eax,0
465	mov	edx,-1
466	xor	edx,eax
467	jmp	NEAR L$017copy
468align	16
469L$017copy:
470	mov	esi,DWORD [32+ebx*4+esp]
471	mov	ebp,DWORD [ebx*4+edi]
472	mov	DWORD [32+ebx*4+esp],ecx
473	and	esi,eax
474	and	ebp,edx
475	or	ebp,esi
476	mov	DWORD [ebx*4+edi],ebp
477	dec	ebx
478	jge	NEAR L$017copy
479	mov	esp,DWORD [24+esp]
480	mov	eax,1
481L$000just_leave:
482	pop	edi
483	pop	esi
484	pop	ebx
485	pop	ebp
486	ret
487db	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
488db	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
489db	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
490db	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
491db	111,114,103,62,0
492segment	.bss
493common	_OPENSSL_ia32cap_P 16
494