1#if defined(__i386__)
2.file	"src/crypto/bn/asm/x86-mont.S"
3.text
4.globl	bn_mul_mont
5.hidden	bn_mul_mont
6.type	bn_mul_mont,@function
7.align	16
8bn_mul_mont:
9.L_bn_mul_mont_begin:
10	pushl	%ebp
11	pushl	%ebx
12	pushl	%esi
13	pushl	%edi
14	xorl	%eax,%eax
15	movl	40(%esp),%edi
16	cmpl	$4,%edi
17	jl	.L000just_leave
18	leal	20(%esp),%esi
19	leal	24(%esp),%edx
20	addl	$2,%edi
21	negl	%edi
22	leal	-32(%esp,%edi,4),%ebp
23	negl	%edi
24	movl	%ebp,%eax
25	subl	%edx,%eax
26	andl	$2047,%eax
27	subl	%eax,%ebp
28	xorl	%ebp,%edx
29	andl	$2048,%edx
30	xorl	$2048,%edx
31	subl	%edx,%ebp
32	andl	$-64,%ebp
33	movl	%esp,%eax
34	subl	%ebp,%eax
35	andl	$-4096,%eax
36	movl	%esp,%edx
37	leal	(%ebp,%eax,1),%esp
38	movl	(%esp),%eax
39	cmpl	%ebp,%esp
40	ja	.L001page_walk
41	jmp	.L002page_walk_done
42.align	16
43.L001page_walk:
44	leal	-4096(%esp),%esp
45	movl	(%esp),%eax
46	cmpl	%ebp,%esp
47	ja	.L001page_walk
48.L002page_walk_done:
49	movl	(%esi),%eax
50	movl	4(%esi),%ebx
51	movl	8(%esi),%ecx
52	movl	12(%esi),%ebp
53	movl	16(%esi),%esi
54	movl	(%esi),%esi
55	movl	%eax,4(%esp)
56	movl	%ebx,8(%esp)
57	movl	%ecx,12(%esp)
58	movl	%ebp,16(%esp)
59	movl	%esi,20(%esp)
60	leal	-3(%edi),%ebx
61	movl	%edx,24(%esp)
62	call	.L003PIC_me_up
63.L003PIC_me_up:
64	popl	%eax
65	leal	OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
66	btl	$26,(%eax)
67	jnc	.L004non_sse2
68	movl	$-1,%eax
69	movd	%eax,%mm7
70	movl	8(%esp),%esi
71	movl	12(%esp),%edi
72	movl	16(%esp),%ebp
73	xorl	%edx,%edx
74	xorl	%ecx,%ecx
75	movd	(%edi),%mm4
76	movd	(%esi),%mm5
77	movd	(%ebp),%mm3
78	pmuludq	%mm4,%mm5
79	movq	%mm5,%mm2
80	movq	%mm5,%mm0
81	pand	%mm7,%mm0
82	pmuludq	20(%esp),%mm5
83	pmuludq	%mm5,%mm3
84	paddq	%mm0,%mm3
85	movd	4(%ebp),%mm1
86	movd	4(%esi),%mm0
87	psrlq	$32,%mm2
88	psrlq	$32,%mm3
89	incl	%ecx
90.align	16
91.L0051st:
92	pmuludq	%mm4,%mm0
93	pmuludq	%mm5,%mm1
94	paddq	%mm0,%mm2
95	paddq	%mm1,%mm3
96	movq	%mm2,%mm0
97	pand	%mm7,%mm0
98	movd	4(%ebp,%ecx,4),%mm1
99	paddq	%mm0,%mm3
100	movd	4(%esi,%ecx,4),%mm0
101	psrlq	$32,%mm2
102	movd	%mm3,28(%esp,%ecx,4)
103	psrlq	$32,%mm3
104	leal	1(%ecx),%ecx
105	cmpl	%ebx,%ecx
106	jl	.L0051st
107	pmuludq	%mm4,%mm0
108	pmuludq	%mm5,%mm1
109	paddq	%mm0,%mm2
110	paddq	%mm1,%mm3
111	movq	%mm2,%mm0
112	pand	%mm7,%mm0
113	paddq	%mm0,%mm3
114	movd	%mm3,28(%esp,%ecx,4)
115	psrlq	$32,%mm2
116	psrlq	$32,%mm3
117	paddq	%mm2,%mm3
118	movq	%mm3,32(%esp,%ebx,4)
119	incl	%edx
120.L006outer:
121	xorl	%ecx,%ecx
122	movd	(%edi,%edx,4),%mm4
123	movd	(%esi),%mm5
124	movd	32(%esp),%mm6
125	movd	(%ebp),%mm3
126	pmuludq	%mm4,%mm5
127	paddq	%mm6,%mm5
128	movq	%mm5,%mm0
129	movq	%mm5,%mm2
130	pand	%mm7,%mm0
131	pmuludq	20(%esp),%mm5
132	pmuludq	%mm5,%mm3
133	paddq	%mm0,%mm3
134	movd	36(%esp),%mm6
135	movd	4(%ebp),%mm1
136	movd	4(%esi),%mm0
137	psrlq	$32,%mm2
138	psrlq	$32,%mm3
139	paddq	%mm6,%mm2
140	incl	%ecx
141	decl	%ebx
142.L007inner:
143	pmuludq	%mm4,%mm0
144	pmuludq	%mm5,%mm1
145	paddq	%mm0,%mm2
146	paddq	%mm1,%mm3
147	movq	%mm2,%mm0
148	movd	36(%esp,%ecx,4),%mm6
149	pand	%mm7,%mm0
150	movd	4(%ebp,%ecx,4),%mm1
151	paddq	%mm0,%mm3
152	movd	4(%esi,%ecx,4),%mm0
153	psrlq	$32,%mm2
154	movd	%mm3,28(%esp,%ecx,4)
155	psrlq	$32,%mm3
156	paddq	%mm6,%mm2
157	decl	%ebx
158	leal	1(%ecx),%ecx
159	jnz	.L007inner
160	movl	%ecx,%ebx
161	pmuludq	%mm4,%mm0
162	pmuludq	%mm5,%mm1
163	paddq	%mm0,%mm2
164	paddq	%mm1,%mm3
165	movq	%mm2,%mm0
166	pand	%mm7,%mm0
167	paddq	%mm0,%mm3
168	movd	%mm3,28(%esp,%ecx,4)
169	psrlq	$32,%mm2
170	psrlq	$32,%mm3
171	movd	36(%esp,%ebx,4),%mm6
172	paddq	%mm2,%mm3
173	paddq	%mm6,%mm3
174	movq	%mm3,32(%esp,%ebx,4)
175	leal	1(%edx),%edx
176	cmpl	%ebx,%edx
177	jle	.L006outer
178	emms
179	jmp	.L008common_tail
180.align	16
181.L004non_sse2:
182	movl	8(%esp),%esi
183	leal	1(%ebx),%ebp
184	movl	12(%esp),%edi
185	xorl	%ecx,%ecx
186	movl	%esi,%edx
187	andl	$1,%ebp
188	subl	%edi,%edx
189	leal	4(%edi,%ebx,4),%eax
190	orl	%edx,%ebp
191	movl	(%edi),%edi
192	jz	.L009bn_sqr_mont
193	movl	%eax,28(%esp)
194	movl	(%esi),%eax
195	xorl	%edx,%edx
196.align	16
197.L010mull:
198	movl	%edx,%ebp
199	mull	%edi
200	addl	%eax,%ebp
201	leal	1(%ecx),%ecx
202	adcl	$0,%edx
203	movl	(%esi,%ecx,4),%eax
204	cmpl	%ebx,%ecx
205	movl	%ebp,28(%esp,%ecx,4)
206	jl	.L010mull
207	movl	%edx,%ebp
208	mull	%edi
209	movl	20(%esp),%edi
210	addl	%ebp,%eax
211	movl	16(%esp),%esi
212	adcl	$0,%edx
213	imull	32(%esp),%edi
214	movl	%eax,32(%esp,%ebx,4)
215	xorl	%ecx,%ecx
216	movl	%edx,36(%esp,%ebx,4)
217	movl	%ecx,40(%esp,%ebx,4)
218	movl	(%esi),%eax
219	mull	%edi
220	addl	32(%esp),%eax
221	movl	4(%esi),%eax
222	adcl	$0,%edx
223	incl	%ecx
224	jmp	.L0112ndmadd
225.align	16
226.L0121stmadd:
227	movl	%edx,%ebp
228	mull	%edi
229	addl	32(%esp,%ecx,4),%ebp
230	leal	1(%ecx),%ecx
231	adcl	$0,%edx
232	addl	%eax,%ebp
233	movl	(%esi,%ecx,4),%eax
234	adcl	$0,%edx
235	cmpl	%ebx,%ecx
236	movl	%ebp,28(%esp,%ecx,4)
237	jl	.L0121stmadd
238	movl	%edx,%ebp
239	mull	%edi
240	addl	32(%esp,%ebx,4),%eax
241	movl	20(%esp),%edi
242	adcl	$0,%edx
243	movl	16(%esp),%esi
244	addl	%eax,%ebp
245	adcl	$0,%edx
246	imull	32(%esp),%edi
247	xorl	%ecx,%ecx
248	addl	36(%esp,%ebx,4),%edx
249	movl	%ebp,32(%esp,%ebx,4)
250	adcl	$0,%ecx
251	movl	(%esi),%eax
252	movl	%edx,36(%esp,%ebx,4)
253	movl	%ecx,40(%esp,%ebx,4)
254	mull	%edi
255	addl	32(%esp),%eax
256	movl	4(%esi),%eax
257	adcl	$0,%edx
258	movl	$1,%ecx
259.align	16
260.L0112ndmadd:
261	movl	%edx,%ebp
262	mull	%edi
263	addl	32(%esp,%ecx,4),%ebp
264	leal	1(%ecx),%ecx
265	adcl	$0,%edx
266	addl	%eax,%ebp
267	movl	(%esi,%ecx,4),%eax
268	adcl	$0,%edx
269	cmpl	%ebx,%ecx
270	movl	%ebp,24(%esp,%ecx,4)
271	jl	.L0112ndmadd
272	movl	%edx,%ebp
273	mull	%edi
274	addl	32(%esp,%ebx,4),%ebp
275	adcl	$0,%edx
276	addl	%eax,%ebp
277	adcl	$0,%edx
278	movl	%ebp,28(%esp,%ebx,4)
279	xorl	%eax,%eax
280	movl	12(%esp),%ecx
281	addl	36(%esp,%ebx,4),%edx
282	adcl	40(%esp,%ebx,4),%eax
283	leal	4(%ecx),%ecx
284	movl	%edx,32(%esp,%ebx,4)
285	cmpl	28(%esp),%ecx
286	movl	%eax,36(%esp,%ebx,4)
287	je	.L008common_tail
288	movl	(%ecx),%edi
289	movl	8(%esp),%esi
290	movl	%ecx,12(%esp)
291	xorl	%ecx,%ecx
292	xorl	%edx,%edx
293	movl	(%esi),%eax
294	jmp	.L0121stmadd
295.align	16
296.L009bn_sqr_mont:
297	movl	%ebx,(%esp)
298	movl	%ecx,12(%esp)
299	movl	%edi,%eax
300	mull	%edi
301	movl	%eax,32(%esp)
302	movl	%edx,%ebx
303	shrl	$1,%edx
304	andl	$1,%ebx
305	incl	%ecx
306.align	16
307.L013sqr:
308	movl	(%esi,%ecx,4),%eax
309	movl	%edx,%ebp
310	mull	%edi
311	addl	%ebp,%eax
312	leal	1(%ecx),%ecx
313	adcl	$0,%edx
314	leal	(%ebx,%eax,2),%ebp
315	shrl	$31,%eax
316	cmpl	(%esp),%ecx
317	movl	%eax,%ebx
318	movl	%ebp,28(%esp,%ecx,4)
319	jl	.L013sqr
320	movl	(%esi,%ecx,4),%eax
321	movl	%edx,%ebp
322	mull	%edi
323	addl	%ebp,%eax
324	movl	20(%esp),%edi
325	adcl	$0,%edx
326	movl	16(%esp),%esi
327	leal	(%ebx,%eax,2),%ebp
328	imull	32(%esp),%edi
329	shrl	$31,%eax
330	movl	%ebp,32(%esp,%ecx,4)
331	leal	(%eax,%edx,2),%ebp
332	movl	(%esi),%eax
333	shrl	$31,%edx
334	movl	%ebp,36(%esp,%ecx,4)
335	movl	%edx,40(%esp,%ecx,4)
336	mull	%edi
337	addl	32(%esp),%eax
338	movl	%ecx,%ebx
339	adcl	$0,%edx
340	movl	4(%esi),%eax
341	movl	$1,%ecx
342.align	16
343.L0143rdmadd:
344	movl	%edx,%ebp
345	mull	%edi
346	addl	32(%esp,%ecx,4),%ebp
347	adcl	$0,%edx
348	addl	%eax,%ebp
349	movl	4(%esi,%ecx,4),%eax
350	adcl	$0,%edx
351	movl	%ebp,28(%esp,%ecx,4)
352	movl	%edx,%ebp
353	mull	%edi
354	addl	36(%esp,%ecx,4),%ebp
355	leal	2(%ecx),%ecx
356	adcl	$0,%edx
357	addl	%eax,%ebp
358	movl	(%esi,%ecx,4),%eax
359	adcl	$0,%edx
360	cmpl	%ebx,%ecx
361	movl	%ebp,24(%esp,%ecx,4)
362	jl	.L0143rdmadd
363	movl	%edx,%ebp
364	mull	%edi
365	addl	32(%esp,%ebx,4),%ebp
366	adcl	$0,%edx
367	addl	%eax,%ebp
368	adcl	$0,%edx
369	movl	%ebp,28(%esp,%ebx,4)
370	movl	12(%esp),%ecx
371	xorl	%eax,%eax
372	movl	8(%esp),%esi
373	addl	36(%esp,%ebx,4),%edx
374	adcl	40(%esp,%ebx,4),%eax
375	movl	%edx,32(%esp,%ebx,4)
376	cmpl	%ebx,%ecx
377	movl	%eax,36(%esp,%ebx,4)
378	je	.L008common_tail
379	movl	4(%esi,%ecx,4),%edi
380	leal	1(%ecx),%ecx
381	movl	%edi,%eax
382	movl	%ecx,12(%esp)
383	mull	%edi
384	addl	32(%esp,%ecx,4),%eax
385	adcl	$0,%edx
386	movl	%eax,32(%esp,%ecx,4)
387	xorl	%ebp,%ebp
388	cmpl	%ebx,%ecx
389	leal	1(%ecx),%ecx
390	je	.L015sqrlast
391	movl	%edx,%ebx
392	shrl	$1,%edx
393	andl	$1,%ebx
394.align	16
395.L016sqradd:
396	movl	(%esi,%ecx,4),%eax
397	movl	%edx,%ebp
398	mull	%edi
399	addl	%ebp,%eax
400	leal	(%eax,%eax,1),%ebp
401	adcl	$0,%edx
402	shrl	$31,%eax
403	addl	32(%esp,%ecx,4),%ebp
404	leal	1(%ecx),%ecx
405	adcl	$0,%eax
406	addl	%ebx,%ebp
407	adcl	$0,%eax
408	cmpl	(%esp),%ecx
409	movl	%ebp,28(%esp,%ecx,4)
410	movl	%eax,%ebx
411	jle	.L016sqradd
412	movl	%edx,%ebp
413	addl	%edx,%edx
414	shrl	$31,%ebp
415	addl	%ebx,%edx
416	adcl	$0,%ebp
417.L015sqrlast:
418	movl	20(%esp),%edi
419	movl	16(%esp),%esi
420	imull	32(%esp),%edi
421	addl	32(%esp,%ecx,4),%edx
422	movl	(%esi),%eax
423	adcl	$0,%ebp
424	movl	%edx,32(%esp,%ecx,4)
425	movl	%ebp,36(%esp,%ecx,4)
426	mull	%edi
427	addl	32(%esp),%eax
428	leal	-1(%ecx),%ebx
429	adcl	$0,%edx
430	movl	$1,%ecx
431	movl	4(%esi),%eax
432	jmp	.L0143rdmadd
433.align	16
434.L008common_tail:
435	movl	16(%esp),%ebp
436	movl	4(%esp),%edi
437	leal	32(%esp),%esi
438	movl	(%esi),%eax
439	movl	%ebx,%ecx
440	xorl	%edx,%edx
441.align	16
442.L017sub:
443	sbbl	(%ebp,%edx,4),%eax
444	movl	%eax,(%edi,%edx,4)
445	decl	%ecx
446	movl	4(%esi,%edx,4),%eax
447	leal	1(%edx),%edx
448	jge	.L017sub
449	sbbl	$0,%eax
450	andl	%eax,%esi
451	notl	%eax
452	movl	%edi,%ebp
453	andl	%eax,%ebp
454	orl	%ebp,%esi
455.align	16
456.L018copy:
457	movl	(%esi,%ebx,4),%eax
458	movl	%eax,(%edi,%ebx,4)
459	movl	%ecx,32(%esp,%ebx,4)
460	decl	%ebx
461	jge	.L018copy
462	movl	24(%esp),%esp
463	movl	$1,%eax
464.L000just_leave:
465	popl	%edi
466	popl	%esi
467	popl	%ebx
468	popl	%ebp
469	ret
470.size	bn_mul_mont,.-.L_bn_mul_mont_begin
471.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
472.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
473.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
474.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
475.byte	111,114,103,62,0
476#endif
477