1#if defined(__i386__)
2.file	"chacha-x86.S"
3.text
4.globl	_ChaCha20_ctr32
5.private_extern	_ChaCha20_ctr32
6.align	4
7_ChaCha20_ctr32:
8L_ChaCha20_ctr32_begin:
9	pushl	%ebp
10	pushl	%ebx
11	pushl	%esi
12	pushl	%edi
13	xorl	%eax,%eax
14	cmpl	28(%esp),%eax
15	je	L000no_data
16	call	Lpic_point
17Lpic_point:
18	popl	%eax
19	movl	L_OPENSSL_ia32cap_P$non_lazy_ptr-Lpic_point(%eax),%ebp
20	testl	$16777216,(%ebp)
21	jz	L001x86
22	testl	$512,4(%ebp)
23	jz	L001x86
24	jmp	Lssse3_shortcut
25L001x86:
26	movl	32(%esp),%esi
27	movl	36(%esp),%edi
28	subl	$132,%esp
29	movl	(%esi),%eax
30	movl	4(%esi),%ebx
31	movl	8(%esi),%ecx
32	movl	12(%esi),%edx
33	movl	%eax,80(%esp)
34	movl	%ebx,84(%esp)
35	movl	%ecx,88(%esp)
36	movl	%edx,92(%esp)
37	movl	16(%esi),%eax
38	movl	20(%esi),%ebx
39	movl	24(%esi),%ecx
40	movl	28(%esi),%edx
41	movl	%eax,96(%esp)
42	movl	%ebx,100(%esp)
43	movl	%ecx,104(%esp)
44	movl	%edx,108(%esp)
45	movl	(%edi),%eax
46	movl	4(%edi),%ebx
47	movl	8(%edi),%ecx
48	movl	12(%edi),%edx
49	subl	$1,%eax
50	movl	%eax,112(%esp)
51	movl	%ebx,116(%esp)
52	movl	%ecx,120(%esp)
53	movl	%edx,124(%esp)
54	jmp	L002entry
55.align	4,0x90
56L003outer_loop:
57	movl	%ebx,156(%esp)
58	movl	%eax,152(%esp)
59	movl	%ecx,160(%esp)
60L002entry:
61	movl	$1634760805,%eax
62	movl	$857760878,4(%esp)
63	movl	$2036477234,8(%esp)
64	movl	$1797285236,12(%esp)
65	movl	84(%esp),%ebx
66	movl	88(%esp),%ebp
67	movl	104(%esp),%ecx
68	movl	108(%esp),%esi
69	movl	116(%esp),%edx
70	movl	120(%esp),%edi
71	movl	%ebx,20(%esp)
72	movl	%ebp,24(%esp)
73	movl	%ecx,40(%esp)
74	movl	%esi,44(%esp)
75	movl	%edx,52(%esp)
76	movl	%edi,56(%esp)
77	movl	92(%esp),%ebx
78	movl	124(%esp),%edi
79	movl	112(%esp),%edx
80	movl	80(%esp),%ebp
81	movl	96(%esp),%ecx
82	movl	100(%esp),%esi
83	addl	$1,%edx
84	movl	%ebx,28(%esp)
85	movl	%edi,60(%esp)
86	movl	%edx,112(%esp)
87	movl	$10,%ebx
88	jmp	L004loop
89.align	4,0x90
90L004loop:
91	addl	%ebp,%eax
92	movl	%ebx,128(%esp)
93	movl	%ebp,%ebx
94	xorl	%eax,%edx
95	roll	$16,%edx
96	addl	%edx,%ecx
97	xorl	%ecx,%ebx
98	movl	52(%esp),%edi
99	roll	$12,%ebx
100	movl	20(%esp),%ebp
101	addl	%ebx,%eax
102	xorl	%eax,%edx
103	movl	%eax,(%esp)
104	roll	$8,%edx
105	movl	4(%esp),%eax
106	addl	%edx,%ecx
107	movl	%edx,48(%esp)
108	xorl	%ecx,%ebx
109	addl	%ebp,%eax
110	roll	$7,%ebx
111	xorl	%eax,%edi
112	movl	%ecx,32(%esp)
113	roll	$16,%edi
114	movl	%ebx,16(%esp)
115	addl	%edi,%esi
116	movl	40(%esp),%ecx
117	xorl	%esi,%ebp
118	movl	56(%esp),%edx
119	roll	$12,%ebp
120	movl	24(%esp),%ebx
121	addl	%ebp,%eax
122	xorl	%eax,%edi
123	movl	%eax,4(%esp)
124	roll	$8,%edi
125	movl	8(%esp),%eax
126	addl	%edi,%esi
127	movl	%edi,52(%esp)
128	xorl	%esi,%ebp
129	addl	%ebx,%eax
130	roll	$7,%ebp
131	xorl	%eax,%edx
132	movl	%esi,36(%esp)
133	roll	$16,%edx
134	movl	%ebp,20(%esp)
135	addl	%edx,%ecx
136	movl	44(%esp),%esi
137	xorl	%ecx,%ebx
138	movl	60(%esp),%edi
139	roll	$12,%ebx
140	movl	28(%esp),%ebp
141	addl	%ebx,%eax
142	xorl	%eax,%edx
143	movl	%eax,8(%esp)
144	roll	$8,%edx
145	movl	12(%esp),%eax
146	addl	%edx,%ecx
147	movl	%edx,56(%esp)
148	xorl	%ecx,%ebx
149	addl	%ebp,%eax
150	roll	$7,%ebx
151	xorl	%eax,%edi
152	roll	$16,%edi
153	movl	%ebx,24(%esp)
154	addl	%edi,%esi
155	xorl	%esi,%ebp
156	roll	$12,%ebp
157	movl	20(%esp),%ebx
158	addl	%ebp,%eax
159	xorl	%eax,%edi
160	movl	%eax,12(%esp)
161	roll	$8,%edi
162	movl	(%esp),%eax
163	addl	%edi,%esi
164	movl	%edi,%edx
165	xorl	%esi,%ebp
166	addl	%ebx,%eax
167	roll	$7,%ebp
168	xorl	%eax,%edx
169	roll	$16,%edx
170	movl	%ebp,28(%esp)
171	addl	%edx,%ecx
172	xorl	%ecx,%ebx
173	movl	48(%esp),%edi
174	roll	$12,%ebx
175	movl	24(%esp),%ebp
176	addl	%ebx,%eax
177	xorl	%eax,%edx
178	movl	%eax,(%esp)
179	roll	$8,%edx
180	movl	4(%esp),%eax
181	addl	%edx,%ecx
182	movl	%edx,60(%esp)
183	xorl	%ecx,%ebx
184	addl	%ebp,%eax
185	roll	$7,%ebx
186	xorl	%eax,%edi
187	movl	%ecx,40(%esp)
188	roll	$16,%edi
189	movl	%ebx,20(%esp)
190	addl	%edi,%esi
191	movl	32(%esp),%ecx
192	xorl	%esi,%ebp
193	movl	52(%esp),%edx
194	roll	$12,%ebp
195	movl	28(%esp),%ebx
196	addl	%ebp,%eax
197	xorl	%eax,%edi
198	movl	%eax,4(%esp)
199	roll	$8,%edi
200	movl	8(%esp),%eax
201	addl	%edi,%esi
202	movl	%edi,48(%esp)
203	xorl	%esi,%ebp
204	addl	%ebx,%eax
205	roll	$7,%ebp
206	xorl	%eax,%edx
207	movl	%esi,44(%esp)
208	roll	$16,%edx
209	movl	%ebp,24(%esp)
210	addl	%edx,%ecx
211	movl	36(%esp),%esi
212	xorl	%ecx,%ebx
213	movl	56(%esp),%edi
214	roll	$12,%ebx
215	movl	16(%esp),%ebp
216	addl	%ebx,%eax
217	xorl	%eax,%edx
218	movl	%eax,8(%esp)
219	roll	$8,%edx
220	movl	12(%esp),%eax
221	addl	%edx,%ecx
222	movl	%edx,52(%esp)
223	xorl	%ecx,%ebx
224	addl	%ebp,%eax
225	roll	$7,%ebx
226	xorl	%eax,%edi
227	roll	$16,%edi
228	movl	%ebx,28(%esp)
229	addl	%edi,%esi
230	xorl	%esi,%ebp
231	movl	48(%esp),%edx
232	roll	$12,%ebp
233	movl	128(%esp),%ebx
234	addl	%ebp,%eax
235	xorl	%eax,%edi
236	movl	%eax,12(%esp)
237	roll	$8,%edi
238	movl	(%esp),%eax
239	addl	%edi,%esi
240	movl	%edi,56(%esp)
241	xorl	%esi,%ebp
242	roll	$7,%ebp
243	decl	%ebx
244	jnz	L004loop
245	movl	160(%esp),%ebx
246	addl	$1634760805,%eax
247	addl	80(%esp),%ebp
248	addl	96(%esp),%ecx
249	addl	100(%esp),%esi
250	cmpl	$64,%ebx
251	jb	L005tail
252	movl	156(%esp),%ebx
253	addl	112(%esp),%edx
254	addl	120(%esp),%edi
255	xorl	(%ebx),%eax
256	xorl	16(%ebx),%ebp
257	movl	%eax,(%esp)
258	movl	152(%esp),%eax
259	xorl	32(%ebx),%ecx
260	xorl	36(%ebx),%esi
261	xorl	48(%ebx),%edx
262	xorl	56(%ebx),%edi
263	movl	%ebp,16(%eax)
264	movl	%ecx,32(%eax)
265	movl	%esi,36(%eax)
266	movl	%edx,48(%eax)
267	movl	%edi,56(%eax)
268	movl	4(%esp),%ebp
269	movl	8(%esp),%ecx
270	movl	12(%esp),%esi
271	movl	20(%esp),%edx
272	movl	24(%esp),%edi
273	addl	$857760878,%ebp
274	addl	$2036477234,%ecx
275	addl	$1797285236,%esi
276	addl	84(%esp),%edx
277	addl	88(%esp),%edi
278	xorl	4(%ebx),%ebp
279	xorl	8(%ebx),%ecx
280	xorl	12(%ebx),%esi
281	xorl	20(%ebx),%edx
282	xorl	24(%ebx),%edi
283	movl	%ebp,4(%eax)
284	movl	%ecx,8(%eax)
285	movl	%esi,12(%eax)
286	movl	%edx,20(%eax)
287	movl	%edi,24(%eax)
288	movl	28(%esp),%ebp
289	movl	40(%esp),%ecx
290	movl	44(%esp),%esi
291	movl	52(%esp),%edx
292	movl	60(%esp),%edi
293	addl	92(%esp),%ebp
294	addl	104(%esp),%ecx
295	addl	108(%esp),%esi
296	addl	116(%esp),%edx
297	addl	124(%esp),%edi
298	xorl	28(%ebx),%ebp
299	xorl	40(%ebx),%ecx
300	xorl	44(%ebx),%esi
301	xorl	52(%ebx),%edx
302	xorl	60(%ebx),%edi
303	leal	64(%ebx),%ebx
304	movl	%ebp,28(%eax)
305	movl	(%esp),%ebp
306	movl	%ecx,40(%eax)
307	movl	160(%esp),%ecx
308	movl	%esi,44(%eax)
309	movl	%edx,52(%eax)
310	movl	%edi,60(%eax)
311	movl	%ebp,(%eax)
312	leal	64(%eax),%eax
313	subl	$64,%ecx
314	jnz	L003outer_loop
315	jmp	L006done
316L005tail:
317	addl	112(%esp),%edx
318	addl	120(%esp),%edi
319	movl	%eax,(%esp)
320	movl	%ebp,16(%esp)
321	movl	%ecx,32(%esp)
322	movl	%esi,36(%esp)
323	movl	%edx,48(%esp)
324	movl	%edi,56(%esp)
325	movl	4(%esp),%ebp
326	movl	8(%esp),%ecx
327	movl	12(%esp),%esi
328	movl	20(%esp),%edx
329	movl	24(%esp),%edi
330	addl	$857760878,%ebp
331	addl	$2036477234,%ecx
332	addl	$1797285236,%esi
333	addl	84(%esp),%edx
334	addl	88(%esp),%edi
335	movl	%ebp,4(%esp)
336	movl	%ecx,8(%esp)
337	movl	%esi,12(%esp)
338	movl	%edx,20(%esp)
339	movl	%edi,24(%esp)
340	movl	28(%esp),%ebp
341	movl	40(%esp),%ecx
342	movl	44(%esp),%esi
343	movl	52(%esp),%edx
344	movl	60(%esp),%edi
345	addl	92(%esp),%ebp
346	addl	104(%esp),%ecx
347	addl	108(%esp),%esi
348	addl	116(%esp),%edx
349	addl	124(%esp),%edi
350	movl	%ebp,28(%esp)
351	movl	156(%esp),%ebp
352	movl	%ecx,40(%esp)
353	movl	152(%esp),%ecx
354	movl	%esi,44(%esp)
355	xorl	%esi,%esi
356	movl	%edx,52(%esp)
357	movl	%edi,60(%esp)
358	xorl	%eax,%eax
359	xorl	%edx,%edx
360L007tail_loop:
361	movb	(%esi,%ebp,1),%al
362	movb	(%esp,%esi,1),%dl
363	leal	1(%esi),%esi
364	xorb	%dl,%al
365	movb	%al,-1(%ecx,%esi,1)
366	decl	%ebx
367	jnz	L007tail_loop
368L006done:
369	addl	$132,%esp
370L000no_data:
371	popl	%edi
372	popl	%esi
373	popl	%ebx
374	popl	%ebp
375	ret
376.globl	_ChaCha20_ssse3
377.private_extern	_ChaCha20_ssse3
378.align	4
379_ChaCha20_ssse3:
380L_ChaCha20_ssse3_begin:
381	pushl	%ebp
382	pushl	%ebx
383	pushl	%esi
384	pushl	%edi
385Lssse3_shortcut:
386	movl	20(%esp),%edi
387	movl	24(%esp),%esi
388	movl	28(%esp),%ecx
389	movl	32(%esp),%edx
390	movl	36(%esp),%ebx
391	movl	%esp,%ebp
392	subl	$524,%esp
393	andl	$-64,%esp
394	movl	%ebp,512(%esp)
395	leal	Lssse3_data-Lpic_point(%eax),%eax
396	movdqu	(%ebx),%xmm3
397	cmpl	$256,%ecx
398	jb	L0081x
399	movl	%edx,516(%esp)
400	movl	%ebx,520(%esp)
401	subl	$256,%ecx
402	leal	384(%esp),%ebp
403	movdqu	(%edx),%xmm7
404	pshufd	$0,%xmm3,%xmm0
405	pshufd	$85,%xmm3,%xmm1
406	pshufd	$170,%xmm3,%xmm2
407	pshufd	$255,%xmm3,%xmm3
408	paddd	48(%eax),%xmm0
409	pshufd	$0,%xmm7,%xmm4
410	pshufd	$85,%xmm7,%xmm5
411	psubd	64(%eax),%xmm0
412	pshufd	$170,%xmm7,%xmm6
413	pshufd	$255,%xmm7,%xmm7
414	movdqa	%xmm0,64(%ebp)
415	movdqa	%xmm1,80(%ebp)
416	movdqa	%xmm2,96(%ebp)
417	movdqa	%xmm3,112(%ebp)
418	movdqu	16(%edx),%xmm3
419	movdqa	%xmm4,-64(%ebp)
420	movdqa	%xmm5,-48(%ebp)
421	movdqa	%xmm6,-32(%ebp)
422	movdqa	%xmm7,-16(%ebp)
423	movdqa	32(%eax),%xmm7
424	leal	128(%esp),%ebx
425	pshufd	$0,%xmm3,%xmm0
426	pshufd	$85,%xmm3,%xmm1
427	pshufd	$170,%xmm3,%xmm2
428	pshufd	$255,%xmm3,%xmm3
429	pshufd	$0,%xmm7,%xmm4
430	pshufd	$85,%xmm7,%xmm5
431	pshufd	$170,%xmm7,%xmm6
432	pshufd	$255,%xmm7,%xmm7
433	movdqa	%xmm0,(%ebp)
434	movdqa	%xmm1,16(%ebp)
435	movdqa	%xmm2,32(%ebp)
436	movdqa	%xmm3,48(%ebp)
437	movdqa	%xmm4,-128(%ebp)
438	movdqa	%xmm5,-112(%ebp)
439	movdqa	%xmm6,-96(%ebp)
440	movdqa	%xmm7,-80(%ebp)
441	leal	128(%esi),%esi
442	leal	128(%edi),%edi
443	jmp	L009outer_loop
444.align	4,0x90
445L009outer_loop:
446	movdqa	-112(%ebp),%xmm1
447	movdqa	-96(%ebp),%xmm2
448	movdqa	-80(%ebp),%xmm3
449	movdqa	-48(%ebp),%xmm5
450	movdqa	-32(%ebp),%xmm6
451	movdqa	-16(%ebp),%xmm7
452	movdqa	%xmm1,-112(%ebx)
453	movdqa	%xmm2,-96(%ebx)
454	movdqa	%xmm3,-80(%ebx)
455	movdqa	%xmm5,-48(%ebx)
456	movdqa	%xmm6,-32(%ebx)
457	movdqa	%xmm7,-16(%ebx)
458	movdqa	32(%ebp),%xmm2
459	movdqa	48(%ebp),%xmm3
460	movdqa	64(%ebp),%xmm4
461	movdqa	80(%ebp),%xmm5
462	movdqa	96(%ebp),%xmm6
463	movdqa	112(%ebp),%xmm7
464	paddd	64(%eax),%xmm4
465	movdqa	%xmm2,32(%ebx)
466	movdqa	%xmm3,48(%ebx)
467	movdqa	%xmm4,64(%ebx)
468	movdqa	%xmm5,80(%ebx)
469	movdqa	%xmm6,96(%ebx)
470	movdqa	%xmm7,112(%ebx)
471	movdqa	%xmm4,64(%ebp)
472	movdqa	-128(%ebp),%xmm0
473	movdqa	%xmm4,%xmm6
474	movdqa	-64(%ebp),%xmm3
475	movdqa	(%ebp),%xmm4
476	movdqa	16(%ebp),%xmm5
477	movl	$10,%edx
478	nop
479.align	4,0x90
480L010loop:
481	paddd	%xmm3,%xmm0
482	movdqa	%xmm3,%xmm2
483	pxor	%xmm0,%xmm6
484	pshufb	(%eax),%xmm6
485	paddd	%xmm6,%xmm4
486	pxor	%xmm4,%xmm2
487	movdqa	-48(%ebx),%xmm3
488	movdqa	%xmm2,%xmm1
489	pslld	$12,%xmm2
490	psrld	$20,%xmm1
491	por	%xmm1,%xmm2
492	movdqa	-112(%ebx),%xmm1
493	paddd	%xmm2,%xmm0
494	movdqa	80(%ebx),%xmm7
495	pxor	%xmm0,%xmm6
496	movdqa	%xmm0,-128(%ebx)
497	pshufb	16(%eax),%xmm6
498	paddd	%xmm6,%xmm4
499	movdqa	%xmm6,64(%ebx)
500	pxor	%xmm4,%xmm2
501	paddd	%xmm3,%xmm1
502	movdqa	%xmm2,%xmm0
503	pslld	$7,%xmm2
504	psrld	$25,%xmm0
505	pxor	%xmm1,%xmm7
506	por	%xmm0,%xmm2
507	movdqa	%xmm4,(%ebx)
508	pshufb	(%eax),%xmm7
509	movdqa	%xmm2,-64(%ebx)
510	paddd	%xmm7,%xmm5
511	movdqa	32(%ebx),%xmm4
512	pxor	%xmm5,%xmm3
513	movdqa	-32(%ebx),%xmm2
514	movdqa	%xmm3,%xmm0
515	pslld	$12,%xmm3
516	psrld	$20,%xmm0
517	por	%xmm0,%xmm3
518	movdqa	-96(%ebx),%xmm0
519	paddd	%xmm3,%xmm1
520	movdqa	96(%ebx),%xmm6
521	pxor	%xmm1,%xmm7
522	movdqa	%xmm1,-112(%ebx)
523	pshufb	16(%eax),%xmm7
524	paddd	%xmm7,%xmm5
525	movdqa	%xmm7,80(%ebx)
526	pxor	%xmm5,%xmm3
527	paddd	%xmm2,%xmm0
528	movdqa	%xmm3,%xmm1
529	pslld	$7,%xmm3
530	psrld	$25,%xmm1
531	pxor	%xmm0,%xmm6
532	por	%xmm1,%xmm3
533	movdqa	%xmm5,16(%ebx)
534	pshufb	(%eax),%xmm6
535	movdqa	%xmm3,-48(%ebx)
536	paddd	%xmm6,%xmm4
537	movdqa	48(%ebx),%xmm5
538	pxor	%xmm4,%xmm2
539	movdqa	-16(%ebx),%xmm3
540	movdqa	%xmm2,%xmm1
541	pslld	$12,%xmm2
542	psrld	$20,%xmm1
543	por	%xmm1,%xmm2
544	movdqa	-80(%ebx),%xmm1
545	paddd	%xmm2,%xmm0
546	movdqa	112(%ebx),%xmm7
547	pxor	%xmm0,%xmm6
548	movdqa	%xmm0,-96(%ebx)
549	pshufb	16(%eax),%xmm6
550	paddd	%xmm6,%xmm4
551	movdqa	%xmm6,96(%ebx)
552	pxor	%xmm4,%xmm2
553	paddd	%xmm3,%xmm1
554	movdqa	%xmm2,%xmm0
555	pslld	$7,%xmm2
556	psrld	$25,%xmm0
557	pxor	%xmm1,%xmm7
558	por	%xmm0,%xmm2
559	pshufb	(%eax),%xmm7
560	movdqa	%xmm2,-32(%ebx)
561	paddd	%xmm7,%xmm5
562	pxor	%xmm5,%xmm3
563	movdqa	-48(%ebx),%xmm2
564	movdqa	%xmm3,%xmm0
565	pslld	$12,%xmm3
566	psrld	$20,%xmm0
567	por	%xmm0,%xmm3
568	movdqa	-128(%ebx),%xmm0
569	paddd	%xmm3,%xmm1
570	pxor	%xmm1,%xmm7
571	movdqa	%xmm1,-80(%ebx)
572	pshufb	16(%eax),%xmm7
573	paddd	%xmm7,%xmm5
574	movdqa	%xmm7,%xmm6
575	pxor	%xmm5,%xmm3
576	paddd	%xmm2,%xmm0
577	movdqa	%xmm3,%xmm1
578	pslld	$7,%xmm3
579	psrld	$25,%xmm1
580	pxor	%xmm0,%xmm6
581	por	%xmm1,%xmm3
582	pshufb	(%eax),%xmm6
583	movdqa	%xmm3,-16(%ebx)
584	paddd	%xmm6,%xmm4
585	pxor	%xmm4,%xmm2
586	movdqa	-32(%ebx),%xmm3
587	movdqa	%xmm2,%xmm1
588	pslld	$12,%xmm2
589	psrld	$20,%xmm1
590	por	%xmm1,%xmm2
591	movdqa	-112(%ebx),%xmm1
592	paddd	%xmm2,%xmm0
593	movdqa	64(%ebx),%xmm7
594	pxor	%xmm0,%xmm6
595	movdqa	%xmm0,-128(%ebx)
596	pshufb	16(%eax),%xmm6
597	paddd	%xmm6,%xmm4
598	movdqa	%xmm6,112(%ebx)
599	pxor	%xmm4,%xmm2
600	paddd	%xmm3,%xmm1
601	movdqa	%xmm2,%xmm0
602	pslld	$7,%xmm2
603	psrld	$25,%xmm0
604	pxor	%xmm1,%xmm7
605	por	%xmm0,%xmm2
606	movdqa	%xmm4,32(%ebx)
607	pshufb	(%eax),%xmm7
608	movdqa	%xmm2,-48(%ebx)
609	paddd	%xmm7,%xmm5
610	movdqa	(%ebx),%xmm4
611	pxor	%xmm5,%xmm3
612	movdqa	-16(%ebx),%xmm2
613	movdqa	%xmm3,%xmm0
614	pslld	$12,%xmm3
615	psrld	$20,%xmm0
616	por	%xmm0,%xmm3
617	movdqa	-96(%ebx),%xmm0
618	paddd	%xmm3,%xmm1
619	movdqa	80(%ebx),%xmm6
620	pxor	%xmm1,%xmm7
621	movdqa	%xmm1,-112(%ebx)
622	pshufb	16(%eax),%xmm7
623	paddd	%xmm7,%xmm5
624	movdqa	%xmm7,64(%ebx)
625	pxor	%xmm5,%xmm3
626	paddd	%xmm2,%xmm0
627	movdqa	%xmm3,%xmm1
628	pslld	$7,%xmm3
629	psrld	$25,%xmm1
630	pxor	%xmm0,%xmm6
631	por	%xmm1,%xmm3
632	movdqa	%xmm5,48(%ebx)
633	pshufb	(%eax),%xmm6
634	movdqa	%xmm3,-32(%ebx)
635	paddd	%xmm6,%xmm4
636	movdqa	16(%ebx),%xmm5
637	pxor	%xmm4,%xmm2
638	movdqa	-64(%ebx),%xmm3
639	movdqa	%xmm2,%xmm1
640	pslld	$12,%xmm2
641	psrld	$20,%xmm1
642	por	%xmm1,%xmm2
643	movdqa	-80(%ebx),%xmm1
644	paddd	%xmm2,%xmm0
645	movdqa	96(%ebx),%xmm7
646	pxor	%xmm0,%xmm6
647	movdqa	%xmm0,-96(%ebx)
648	pshufb	16(%eax),%xmm6
649	paddd	%xmm6,%xmm4
650	movdqa	%xmm6,80(%ebx)
651	pxor	%xmm4,%xmm2
652	paddd	%xmm3,%xmm1
653	movdqa	%xmm2,%xmm0
654	pslld	$7,%xmm2
655	psrld	$25,%xmm0
656	pxor	%xmm1,%xmm7
657	por	%xmm0,%xmm2
658	pshufb	(%eax),%xmm7
659	movdqa	%xmm2,-16(%ebx)
660	paddd	%xmm7,%xmm5
661	pxor	%xmm5,%xmm3
662	movdqa	%xmm3,%xmm0
663	pslld	$12,%xmm3
664	psrld	$20,%xmm0
665	por	%xmm0,%xmm3
666	movdqa	-128(%ebx),%xmm0
667	paddd	%xmm3,%xmm1
668	movdqa	64(%ebx),%xmm6
669	pxor	%xmm1,%xmm7
670	movdqa	%xmm1,-80(%ebx)
671	pshufb	16(%eax),%xmm7
672	paddd	%xmm7,%xmm5
673	movdqa	%xmm7,96(%ebx)
674	pxor	%xmm5,%xmm3
675	movdqa	%xmm3,%xmm1
676	pslld	$7,%xmm3
677	psrld	$25,%xmm1
678	por	%xmm1,%xmm3
679	decl	%edx
680	jnz	L010loop
681	movdqa	%xmm3,-64(%ebx)
682	movdqa	%xmm4,(%ebx)
683	movdqa	%xmm5,16(%ebx)
684	movdqa	%xmm6,64(%ebx)
685	movdqa	%xmm7,96(%ebx)
686	movdqa	-112(%ebx),%xmm1
687	movdqa	-96(%ebx),%xmm2
688	movdqa	-80(%ebx),%xmm3
689	paddd	-128(%ebp),%xmm0
690	paddd	-112(%ebp),%xmm1
691	paddd	-96(%ebp),%xmm2
692	paddd	-80(%ebp),%xmm3
693	movdqa	%xmm0,%xmm6
694	punpckldq	%xmm1,%xmm0
695	movdqa	%xmm2,%xmm7
696	punpckldq	%xmm3,%xmm2
697	punpckhdq	%xmm1,%xmm6
698	punpckhdq	%xmm3,%xmm7
699	movdqa	%xmm0,%xmm1
700	punpcklqdq	%xmm2,%xmm0
701	movdqa	%xmm6,%xmm3
702	punpcklqdq	%xmm7,%xmm6
703	punpckhqdq	%xmm2,%xmm1
704	punpckhqdq	%xmm7,%xmm3
705	movdqu	-128(%esi),%xmm4
706	movdqu	-64(%esi),%xmm5
707	movdqu	(%esi),%xmm2
708	movdqu	64(%esi),%xmm7
709	leal	16(%esi),%esi
710	pxor	%xmm0,%xmm4
711	movdqa	-64(%ebx),%xmm0
712	pxor	%xmm1,%xmm5
713	movdqa	-48(%ebx),%xmm1
714	pxor	%xmm2,%xmm6
715	movdqa	-32(%ebx),%xmm2
716	pxor	%xmm3,%xmm7
717	movdqa	-16(%ebx),%xmm3
718	movdqu	%xmm4,-128(%edi)
719	movdqu	%xmm5,-64(%edi)
720	movdqu	%xmm6,(%edi)
721	movdqu	%xmm7,64(%edi)
722	leal	16(%edi),%edi
723	paddd	-64(%ebp),%xmm0
724	paddd	-48(%ebp),%xmm1
725	paddd	-32(%ebp),%xmm2
726	paddd	-16(%ebp),%xmm3
727	movdqa	%xmm0,%xmm6
728	punpckldq	%xmm1,%xmm0
729	movdqa	%xmm2,%xmm7
730	punpckldq	%xmm3,%xmm2
731	punpckhdq	%xmm1,%xmm6
732	punpckhdq	%xmm3,%xmm7
733	movdqa	%xmm0,%xmm1
734	punpcklqdq	%xmm2,%xmm0
735	movdqa	%xmm6,%xmm3
736	punpcklqdq	%xmm7,%xmm6
737	punpckhqdq	%xmm2,%xmm1
738	punpckhqdq	%xmm7,%xmm3
739	movdqu	-128(%esi),%xmm4
740	movdqu	-64(%esi),%xmm5
741	movdqu	(%esi),%xmm2
742	movdqu	64(%esi),%xmm7
743	leal	16(%esi),%esi
744	pxor	%xmm0,%xmm4
745	movdqa	(%ebx),%xmm0
746	pxor	%xmm1,%xmm5
747	movdqa	16(%ebx),%xmm1
748	pxor	%xmm2,%xmm6
749	movdqa	32(%ebx),%xmm2
750	pxor	%xmm3,%xmm7
751	movdqa	48(%ebx),%xmm3
752	movdqu	%xmm4,-128(%edi)
753	movdqu	%xmm5,-64(%edi)
754	movdqu	%xmm6,(%edi)
755	movdqu	%xmm7,64(%edi)
756	leal	16(%edi),%edi
757	paddd	(%ebp),%xmm0
758	paddd	16(%ebp),%xmm1
759	paddd	32(%ebp),%xmm2
760	paddd	48(%ebp),%xmm3
761	movdqa	%xmm0,%xmm6
762	punpckldq	%xmm1,%xmm0
763	movdqa	%xmm2,%xmm7
764	punpckldq	%xmm3,%xmm2
765	punpckhdq	%xmm1,%xmm6
766	punpckhdq	%xmm3,%xmm7
767	movdqa	%xmm0,%xmm1
768	punpcklqdq	%xmm2,%xmm0
769	movdqa	%xmm6,%xmm3
770	punpcklqdq	%xmm7,%xmm6
771	punpckhqdq	%xmm2,%xmm1
772	punpckhqdq	%xmm7,%xmm3
773	movdqu	-128(%esi),%xmm4
774	movdqu	-64(%esi),%xmm5
775	movdqu	(%esi),%xmm2
776	movdqu	64(%esi),%xmm7
777	leal	16(%esi),%esi
778	pxor	%xmm0,%xmm4
779	movdqa	64(%ebx),%xmm0
780	pxor	%xmm1,%xmm5
781	movdqa	80(%ebx),%xmm1
782	pxor	%xmm2,%xmm6
783	movdqa	96(%ebx),%xmm2
784	pxor	%xmm3,%xmm7
785	movdqa	112(%ebx),%xmm3
786	movdqu	%xmm4,-128(%edi)
787	movdqu	%xmm5,-64(%edi)
788	movdqu	%xmm6,(%edi)
789	movdqu	%xmm7,64(%edi)
790	leal	16(%edi),%edi
791	paddd	64(%ebp),%xmm0
792	paddd	80(%ebp),%xmm1
793	paddd	96(%ebp),%xmm2
794	paddd	112(%ebp),%xmm3
795	movdqa	%xmm0,%xmm6
796	punpckldq	%xmm1,%xmm0
797	movdqa	%xmm2,%xmm7
798	punpckldq	%xmm3,%xmm2
799	punpckhdq	%xmm1,%xmm6
800	punpckhdq	%xmm3,%xmm7
801	movdqa	%xmm0,%xmm1
802	punpcklqdq	%xmm2,%xmm0
803	movdqa	%xmm6,%xmm3
804	punpcklqdq	%xmm7,%xmm6
805	punpckhqdq	%xmm2,%xmm1
806	punpckhqdq	%xmm7,%xmm3
807	movdqu	-128(%esi),%xmm4
808	movdqu	-64(%esi),%xmm5
809	movdqu	(%esi),%xmm2
810	movdqu	64(%esi),%xmm7
811	leal	208(%esi),%esi
812	pxor	%xmm0,%xmm4
813	pxor	%xmm1,%xmm5
814	pxor	%xmm2,%xmm6
815	pxor	%xmm3,%xmm7
816	movdqu	%xmm4,-128(%edi)
817	movdqu	%xmm5,-64(%edi)
818	movdqu	%xmm6,(%edi)
819	movdqu	%xmm7,64(%edi)
820	leal	208(%edi),%edi
821	subl	$256,%ecx
822	jnc	L009outer_loop
823	addl	$256,%ecx
824	jz	L011done
825	movl	520(%esp),%ebx
826	leal	-128(%esi),%esi
827	movl	516(%esp),%edx
828	leal	-128(%edi),%edi
829	movd	64(%ebp),%xmm2
830	movdqu	(%ebx),%xmm3
831	paddd	96(%eax),%xmm2
832	pand	112(%eax),%xmm3
833	por	%xmm2,%xmm3
834L0081x:
835	movdqa	32(%eax),%xmm0
836	movdqu	(%edx),%xmm1
837	movdqu	16(%edx),%xmm2
838	movdqa	(%eax),%xmm6
839	movdqa	16(%eax),%xmm7
840	movl	%ebp,48(%esp)
841	movdqa	%xmm0,(%esp)
842	movdqa	%xmm1,16(%esp)
843	movdqa	%xmm2,32(%esp)
844	movdqa	%xmm3,48(%esp)
845	movl	$10,%edx
846	jmp	L012loop1x
847.align	4,0x90
848L013outer1x:
849	movdqa	80(%eax),%xmm3
850	movdqa	(%esp),%xmm0
851	movdqa	16(%esp),%xmm1
852	movdqa	32(%esp),%xmm2
853	paddd	48(%esp),%xmm3
854	movl	$10,%edx
855	movdqa	%xmm3,48(%esp)
856	jmp	L012loop1x
857.align	4,0x90
858L012loop1x:
859	paddd	%xmm1,%xmm0
860	pxor	%xmm0,%xmm3
861.byte	102,15,56,0,222
862	paddd	%xmm3,%xmm2
863	pxor	%xmm2,%xmm1
864	movdqa	%xmm1,%xmm4
865	psrld	$20,%xmm1
866	pslld	$12,%xmm4
867	por	%xmm4,%xmm1
868	paddd	%xmm1,%xmm0
869	pxor	%xmm0,%xmm3
870.byte	102,15,56,0,223
871	paddd	%xmm3,%xmm2
872	pxor	%xmm2,%xmm1
873	movdqa	%xmm1,%xmm4
874	psrld	$25,%xmm1
875	pslld	$7,%xmm4
876	por	%xmm4,%xmm1
877	pshufd	$78,%xmm2,%xmm2
878	pshufd	$57,%xmm1,%xmm1
879	pshufd	$147,%xmm3,%xmm3
880	nop
881	paddd	%xmm1,%xmm0
882	pxor	%xmm0,%xmm3
883.byte	102,15,56,0,222
884	paddd	%xmm3,%xmm2
885	pxor	%xmm2,%xmm1
886	movdqa	%xmm1,%xmm4
887	psrld	$20,%xmm1
888	pslld	$12,%xmm4
889	por	%xmm4,%xmm1
890	paddd	%xmm1,%xmm0
891	pxor	%xmm0,%xmm3
892.byte	102,15,56,0,223
893	paddd	%xmm3,%xmm2
894	pxor	%xmm2,%xmm1
895	movdqa	%xmm1,%xmm4
896	psrld	$25,%xmm1
897	pslld	$7,%xmm4
898	por	%xmm4,%xmm1
899	pshufd	$78,%xmm2,%xmm2
900	pshufd	$147,%xmm1,%xmm1
901	pshufd	$57,%xmm3,%xmm3
902	decl	%edx
903	jnz	L012loop1x
904	paddd	(%esp),%xmm0
905	paddd	16(%esp),%xmm1
906	paddd	32(%esp),%xmm2
907	paddd	48(%esp),%xmm3
908	cmpl	$64,%ecx
909	jb	L014tail
910	movdqu	(%esi),%xmm4
911	movdqu	16(%esi),%xmm5
912	pxor	%xmm4,%xmm0
913	movdqu	32(%esi),%xmm4
914	pxor	%xmm5,%xmm1
915	movdqu	48(%esi),%xmm5
916	pxor	%xmm4,%xmm2
917	pxor	%xmm5,%xmm3
918	leal	64(%esi),%esi
919	movdqu	%xmm0,(%edi)
920	movdqu	%xmm1,16(%edi)
921	movdqu	%xmm2,32(%edi)
922	movdqu	%xmm3,48(%edi)
923	leal	64(%edi),%edi
924	subl	$64,%ecx
925	jnz	L013outer1x
926	jmp	L011done
927L014tail:
928	movdqa	%xmm0,(%esp)
929	movdqa	%xmm1,16(%esp)
930	movdqa	%xmm2,32(%esp)
931	movdqa	%xmm3,48(%esp)
932	xorl	%eax,%eax
933	xorl	%edx,%edx
934	xorl	%ebp,%ebp
935L015tail_loop:
936	movb	(%esp,%ebp,1),%al
937	movb	(%esi,%ebp,1),%dl
938	leal	1(%ebp),%ebp
939	xorb	%dl,%al
940	movb	%al,-1(%edi,%ebp,1)
941	decl	%ecx
942	jnz	L015tail_loop
943L011done:
944	movl	512(%esp),%esp
945	popl	%edi
946	popl	%esi
947	popl	%ebx
948	popl	%ebp
949	ret
950.align	6,0x90
951Lssse3_data:
952.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
953.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
954.long	1634760805,857760878,2036477234,1797285236
955.long	0,1,2,3
956.long	4,4,4,4
957.long	1,0,0,0
958.long	4,0,0,0
959.long	0,-1,-1,-1
960.align	6,0x90
961.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
962.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
963.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
964.byte	114,103,62,0
965.section __IMPORT,__pointers,non_lazy_symbol_pointers
966L_OPENSSL_ia32cap_P$non_lazy_ptr:
967.indirect_symbol	_OPENSSL_ia32cap_P
968.long	0
969#endif
970