1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__i386__)
5#if defined(BORINGSSL_PREFIX)
6#include <boringssl_prefix_symbols_asm.h>
7#endif
8.text
9.globl	_ChaCha20_ctr32
10.private_extern	_ChaCha20_ctr32
11.align	4
12_ChaCha20_ctr32:
13L_ChaCha20_ctr32_begin:
14	pushl	%ebp
15	pushl	%ebx
16	pushl	%esi
17	pushl	%edi
18	xorl	%eax,%eax
19	cmpl	28(%esp),%eax
20	je	L000no_data
21	call	Lpic_point
22Lpic_point:
23	popl	%eax
24	movl	L_OPENSSL_ia32cap_P$non_lazy_ptr-Lpic_point(%eax),%ebp
25	testl	$16777216,(%ebp)
26	jz	L001x86
27	testl	$512,4(%ebp)
28	jz	L001x86
29	jmp	Lssse3_shortcut
30L001x86:
31	movl	32(%esp),%esi
32	movl	36(%esp),%edi
33	subl	$132,%esp
34	movl	(%esi),%eax
35	movl	4(%esi),%ebx
36	movl	8(%esi),%ecx
37	movl	12(%esi),%edx
38	movl	%eax,80(%esp)
39	movl	%ebx,84(%esp)
40	movl	%ecx,88(%esp)
41	movl	%edx,92(%esp)
42	movl	16(%esi),%eax
43	movl	20(%esi),%ebx
44	movl	24(%esi),%ecx
45	movl	28(%esi),%edx
46	movl	%eax,96(%esp)
47	movl	%ebx,100(%esp)
48	movl	%ecx,104(%esp)
49	movl	%edx,108(%esp)
50	movl	(%edi),%eax
51	movl	4(%edi),%ebx
52	movl	8(%edi),%ecx
53	movl	12(%edi),%edx
54	subl	$1,%eax
55	movl	%eax,112(%esp)
56	movl	%ebx,116(%esp)
57	movl	%ecx,120(%esp)
58	movl	%edx,124(%esp)
59	jmp	L002entry
60.align	4,0x90
61L003outer_loop:
62	movl	%ebx,156(%esp)
63	movl	%eax,152(%esp)
64	movl	%ecx,160(%esp)
65L002entry:
66	movl	$1634760805,%eax
67	movl	$857760878,4(%esp)
68	movl	$2036477234,8(%esp)
69	movl	$1797285236,12(%esp)
70	movl	84(%esp),%ebx
71	movl	88(%esp),%ebp
72	movl	104(%esp),%ecx
73	movl	108(%esp),%esi
74	movl	116(%esp),%edx
75	movl	120(%esp),%edi
76	movl	%ebx,20(%esp)
77	movl	%ebp,24(%esp)
78	movl	%ecx,40(%esp)
79	movl	%esi,44(%esp)
80	movl	%edx,52(%esp)
81	movl	%edi,56(%esp)
82	movl	92(%esp),%ebx
83	movl	124(%esp),%edi
84	movl	112(%esp),%edx
85	movl	80(%esp),%ebp
86	movl	96(%esp),%ecx
87	movl	100(%esp),%esi
88	addl	$1,%edx
89	movl	%ebx,28(%esp)
90	movl	%edi,60(%esp)
91	movl	%edx,112(%esp)
92	movl	$10,%ebx
93	jmp	L004loop
94.align	4,0x90
95L004loop:
96	addl	%ebp,%eax
97	movl	%ebx,128(%esp)
98	movl	%ebp,%ebx
99	xorl	%eax,%edx
100	roll	$16,%edx
101	addl	%edx,%ecx
102	xorl	%ecx,%ebx
103	movl	52(%esp),%edi
104	roll	$12,%ebx
105	movl	20(%esp),%ebp
106	addl	%ebx,%eax
107	xorl	%eax,%edx
108	movl	%eax,(%esp)
109	roll	$8,%edx
110	movl	4(%esp),%eax
111	addl	%edx,%ecx
112	movl	%edx,48(%esp)
113	xorl	%ecx,%ebx
114	addl	%ebp,%eax
115	roll	$7,%ebx
116	xorl	%eax,%edi
117	movl	%ecx,32(%esp)
118	roll	$16,%edi
119	movl	%ebx,16(%esp)
120	addl	%edi,%esi
121	movl	40(%esp),%ecx
122	xorl	%esi,%ebp
123	movl	56(%esp),%edx
124	roll	$12,%ebp
125	movl	24(%esp),%ebx
126	addl	%ebp,%eax
127	xorl	%eax,%edi
128	movl	%eax,4(%esp)
129	roll	$8,%edi
130	movl	8(%esp),%eax
131	addl	%edi,%esi
132	movl	%edi,52(%esp)
133	xorl	%esi,%ebp
134	addl	%ebx,%eax
135	roll	$7,%ebp
136	xorl	%eax,%edx
137	movl	%esi,36(%esp)
138	roll	$16,%edx
139	movl	%ebp,20(%esp)
140	addl	%edx,%ecx
141	movl	44(%esp),%esi
142	xorl	%ecx,%ebx
143	movl	60(%esp),%edi
144	roll	$12,%ebx
145	movl	28(%esp),%ebp
146	addl	%ebx,%eax
147	xorl	%eax,%edx
148	movl	%eax,8(%esp)
149	roll	$8,%edx
150	movl	12(%esp),%eax
151	addl	%edx,%ecx
152	movl	%edx,56(%esp)
153	xorl	%ecx,%ebx
154	addl	%ebp,%eax
155	roll	$7,%ebx
156	xorl	%eax,%edi
157	roll	$16,%edi
158	movl	%ebx,24(%esp)
159	addl	%edi,%esi
160	xorl	%esi,%ebp
161	roll	$12,%ebp
162	movl	20(%esp),%ebx
163	addl	%ebp,%eax
164	xorl	%eax,%edi
165	movl	%eax,12(%esp)
166	roll	$8,%edi
167	movl	(%esp),%eax
168	addl	%edi,%esi
169	movl	%edi,%edx
170	xorl	%esi,%ebp
171	addl	%ebx,%eax
172	roll	$7,%ebp
173	xorl	%eax,%edx
174	roll	$16,%edx
175	movl	%ebp,28(%esp)
176	addl	%edx,%ecx
177	xorl	%ecx,%ebx
178	movl	48(%esp),%edi
179	roll	$12,%ebx
180	movl	24(%esp),%ebp
181	addl	%ebx,%eax
182	xorl	%eax,%edx
183	movl	%eax,(%esp)
184	roll	$8,%edx
185	movl	4(%esp),%eax
186	addl	%edx,%ecx
187	movl	%edx,60(%esp)
188	xorl	%ecx,%ebx
189	addl	%ebp,%eax
190	roll	$7,%ebx
191	xorl	%eax,%edi
192	movl	%ecx,40(%esp)
193	roll	$16,%edi
194	movl	%ebx,20(%esp)
195	addl	%edi,%esi
196	movl	32(%esp),%ecx
197	xorl	%esi,%ebp
198	movl	52(%esp),%edx
199	roll	$12,%ebp
200	movl	28(%esp),%ebx
201	addl	%ebp,%eax
202	xorl	%eax,%edi
203	movl	%eax,4(%esp)
204	roll	$8,%edi
205	movl	8(%esp),%eax
206	addl	%edi,%esi
207	movl	%edi,48(%esp)
208	xorl	%esi,%ebp
209	addl	%ebx,%eax
210	roll	$7,%ebp
211	xorl	%eax,%edx
212	movl	%esi,44(%esp)
213	roll	$16,%edx
214	movl	%ebp,24(%esp)
215	addl	%edx,%ecx
216	movl	36(%esp),%esi
217	xorl	%ecx,%ebx
218	movl	56(%esp),%edi
219	roll	$12,%ebx
220	movl	16(%esp),%ebp
221	addl	%ebx,%eax
222	xorl	%eax,%edx
223	movl	%eax,8(%esp)
224	roll	$8,%edx
225	movl	12(%esp),%eax
226	addl	%edx,%ecx
227	movl	%edx,52(%esp)
228	xorl	%ecx,%ebx
229	addl	%ebp,%eax
230	roll	$7,%ebx
231	xorl	%eax,%edi
232	roll	$16,%edi
233	movl	%ebx,28(%esp)
234	addl	%edi,%esi
235	xorl	%esi,%ebp
236	movl	48(%esp),%edx
237	roll	$12,%ebp
238	movl	128(%esp),%ebx
239	addl	%ebp,%eax
240	xorl	%eax,%edi
241	movl	%eax,12(%esp)
242	roll	$8,%edi
243	movl	(%esp),%eax
244	addl	%edi,%esi
245	movl	%edi,56(%esp)
246	xorl	%esi,%ebp
247	roll	$7,%ebp
248	decl	%ebx
249	jnz	L004loop
250	movl	160(%esp),%ebx
251	addl	$1634760805,%eax
252	addl	80(%esp),%ebp
253	addl	96(%esp),%ecx
254	addl	100(%esp),%esi
255	cmpl	$64,%ebx
256	jb	L005tail
257	movl	156(%esp),%ebx
258	addl	112(%esp),%edx
259	addl	120(%esp),%edi
260	xorl	(%ebx),%eax
261	xorl	16(%ebx),%ebp
262	movl	%eax,(%esp)
263	movl	152(%esp),%eax
264	xorl	32(%ebx),%ecx
265	xorl	36(%ebx),%esi
266	xorl	48(%ebx),%edx
267	xorl	56(%ebx),%edi
268	movl	%ebp,16(%eax)
269	movl	%ecx,32(%eax)
270	movl	%esi,36(%eax)
271	movl	%edx,48(%eax)
272	movl	%edi,56(%eax)
273	movl	4(%esp),%ebp
274	movl	8(%esp),%ecx
275	movl	12(%esp),%esi
276	movl	20(%esp),%edx
277	movl	24(%esp),%edi
278	addl	$857760878,%ebp
279	addl	$2036477234,%ecx
280	addl	$1797285236,%esi
281	addl	84(%esp),%edx
282	addl	88(%esp),%edi
283	xorl	4(%ebx),%ebp
284	xorl	8(%ebx),%ecx
285	xorl	12(%ebx),%esi
286	xorl	20(%ebx),%edx
287	xorl	24(%ebx),%edi
288	movl	%ebp,4(%eax)
289	movl	%ecx,8(%eax)
290	movl	%esi,12(%eax)
291	movl	%edx,20(%eax)
292	movl	%edi,24(%eax)
293	movl	28(%esp),%ebp
294	movl	40(%esp),%ecx
295	movl	44(%esp),%esi
296	movl	52(%esp),%edx
297	movl	60(%esp),%edi
298	addl	92(%esp),%ebp
299	addl	104(%esp),%ecx
300	addl	108(%esp),%esi
301	addl	116(%esp),%edx
302	addl	124(%esp),%edi
303	xorl	28(%ebx),%ebp
304	xorl	40(%ebx),%ecx
305	xorl	44(%ebx),%esi
306	xorl	52(%ebx),%edx
307	xorl	60(%ebx),%edi
308	leal	64(%ebx),%ebx
309	movl	%ebp,28(%eax)
310	movl	(%esp),%ebp
311	movl	%ecx,40(%eax)
312	movl	160(%esp),%ecx
313	movl	%esi,44(%eax)
314	movl	%edx,52(%eax)
315	movl	%edi,60(%eax)
316	movl	%ebp,(%eax)
317	leal	64(%eax),%eax
318	subl	$64,%ecx
319	jnz	L003outer_loop
320	jmp	L006done
321L005tail:
322	addl	112(%esp),%edx
323	addl	120(%esp),%edi
324	movl	%eax,(%esp)
325	movl	%ebp,16(%esp)
326	movl	%ecx,32(%esp)
327	movl	%esi,36(%esp)
328	movl	%edx,48(%esp)
329	movl	%edi,56(%esp)
330	movl	4(%esp),%ebp
331	movl	8(%esp),%ecx
332	movl	12(%esp),%esi
333	movl	20(%esp),%edx
334	movl	24(%esp),%edi
335	addl	$857760878,%ebp
336	addl	$2036477234,%ecx
337	addl	$1797285236,%esi
338	addl	84(%esp),%edx
339	addl	88(%esp),%edi
340	movl	%ebp,4(%esp)
341	movl	%ecx,8(%esp)
342	movl	%esi,12(%esp)
343	movl	%edx,20(%esp)
344	movl	%edi,24(%esp)
345	movl	28(%esp),%ebp
346	movl	40(%esp),%ecx
347	movl	44(%esp),%esi
348	movl	52(%esp),%edx
349	movl	60(%esp),%edi
350	addl	92(%esp),%ebp
351	addl	104(%esp),%ecx
352	addl	108(%esp),%esi
353	addl	116(%esp),%edx
354	addl	124(%esp),%edi
355	movl	%ebp,28(%esp)
356	movl	156(%esp),%ebp
357	movl	%ecx,40(%esp)
358	movl	152(%esp),%ecx
359	movl	%esi,44(%esp)
360	xorl	%esi,%esi
361	movl	%edx,52(%esp)
362	movl	%edi,60(%esp)
363	xorl	%eax,%eax
364	xorl	%edx,%edx
365L007tail_loop:
366	movb	(%esi,%ebp,1),%al
367	movb	(%esp,%esi,1),%dl
368	leal	1(%esi),%esi
369	xorb	%dl,%al
370	movb	%al,-1(%ecx,%esi,1)
371	decl	%ebx
372	jnz	L007tail_loop
373L006done:
374	addl	$132,%esp
375L000no_data:
376	popl	%edi
377	popl	%esi
378	popl	%ebx
379	popl	%ebp
380	ret
381.globl	_ChaCha20_ssse3
382.private_extern	_ChaCha20_ssse3
383.align	4
384_ChaCha20_ssse3:
385L_ChaCha20_ssse3_begin:
386	pushl	%ebp
387	pushl	%ebx
388	pushl	%esi
389	pushl	%edi
390Lssse3_shortcut:
391	movl	20(%esp),%edi
392	movl	24(%esp),%esi
393	movl	28(%esp),%ecx
394	movl	32(%esp),%edx
395	movl	36(%esp),%ebx
396	movl	%esp,%ebp
397	subl	$524,%esp
398	andl	$-64,%esp
399	movl	%ebp,512(%esp)
400	leal	Lssse3_data-Lpic_point(%eax),%eax
401	movdqu	(%ebx),%xmm3
402	cmpl	$256,%ecx
403	jb	L0081x
404	movl	%edx,516(%esp)
405	movl	%ebx,520(%esp)
406	subl	$256,%ecx
407	leal	384(%esp),%ebp
408	movdqu	(%edx),%xmm7
409	pshufd	$0,%xmm3,%xmm0
410	pshufd	$85,%xmm3,%xmm1
411	pshufd	$170,%xmm3,%xmm2
412	pshufd	$255,%xmm3,%xmm3
413	paddd	48(%eax),%xmm0
414	pshufd	$0,%xmm7,%xmm4
415	pshufd	$85,%xmm7,%xmm5
416	psubd	64(%eax),%xmm0
417	pshufd	$170,%xmm7,%xmm6
418	pshufd	$255,%xmm7,%xmm7
419	movdqa	%xmm0,64(%ebp)
420	movdqa	%xmm1,80(%ebp)
421	movdqa	%xmm2,96(%ebp)
422	movdqa	%xmm3,112(%ebp)
423	movdqu	16(%edx),%xmm3
424	movdqa	%xmm4,-64(%ebp)
425	movdqa	%xmm5,-48(%ebp)
426	movdqa	%xmm6,-32(%ebp)
427	movdqa	%xmm7,-16(%ebp)
428	movdqa	32(%eax),%xmm7
429	leal	128(%esp),%ebx
430	pshufd	$0,%xmm3,%xmm0
431	pshufd	$85,%xmm3,%xmm1
432	pshufd	$170,%xmm3,%xmm2
433	pshufd	$255,%xmm3,%xmm3
434	pshufd	$0,%xmm7,%xmm4
435	pshufd	$85,%xmm7,%xmm5
436	pshufd	$170,%xmm7,%xmm6
437	pshufd	$255,%xmm7,%xmm7
438	movdqa	%xmm0,(%ebp)
439	movdqa	%xmm1,16(%ebp)
440	movdqa	%xmm2,32(%ebp)
441	movdqa	%xmm3,48(%ebp)
442	movdqa	%xmm4,-128(%ebp)
443	movdqa	%xmm5,-112(%ebp)
444	movdqa	%xmm6,-96(%ebp)
445	movdqa	%xmm7,-80(%ebp)
446	leal	128(%esi),%esi
447	leal	128(%edi),%edi
448	jmp	L009outer_loop
449.align	4,0x90
450L009outer_loop:
451	movdqa	-112(%ebp),%xmm1
452	movdqa	-96(%ebp),%xmm2
453	movdqa	-80(%ebp),%xmm3
454	movdqa	-48(%ebp),%xmm5
455	movdqa	-32(%ebp),%xmm6
456	movdqa	-16(%ebp),%xmm7
457	movdqa	%xmm1,-112(%ebx)
458	movdqa	%xmm2,-96(%ebx)
459	movdqa	%xmm3,-80(%ebx)
460	movdqa	%xmm5,-48(%ebx)
461	movdqa	%xmm6,-32(%ebx)
462	movdqa	%xmm7,-16(%ebx)
463	movdqa	32(%ebp),%xmm2
464	movdqa	48(%ebp),%xmm3
465	movdqa	64(%ebp),%xmm4
466	movdqa	80(%ebp),%xmm5
467	movdqa	96(%ebp),%xmm6
468	movdqa	112(%ebp),%xmm7
469	paddd	64(%eax),%xmm4
470	movdqa	%xmm2,32(%ebx)
471	movdqa	%xmm3,48(%ebx)
472	movdqa	%xmm4,64(%ebx)
473	movdqa	%xmm5,80(%ebx)
474	movdqa	%xmm6,96(%ebx)
475	movdqa	%xmm7,112(%ebx)
476	movdqa	%xmm4,64(%ebp)
477	movdqa	-128(%ebp),%xmm0
478	movdqa	%xmm4,%xmm6
479	movdqa	-64(%ebp),%xmm3
480	movdqa	(%ebp),%xmm4
481	movdqa	16(%ebp),%xmm5
482	movl	$10,%edx
483	nop
484.align	4,0x90
485L010loop:
486	paddd	%xmm3,%xmm0
487	movdqa	%xmm3,%xmm2
488	pxor	%xmm0,%xmm6
489	pshufb	(%eax),%xmm6
490	paddd	%xmm6,%xmm4
491	pxor	%xmm4,%xmm2
492	movdqa	-48(%ebx),%xmm3
493	movdqa	%xmm2,%xmm1
494	pslld	$12,%xmm2
495	psrld	$20,%xmm1
496	por	%xmm1,%xmm2
497	movdqa	-112(%ebx),%xmm1
498	paddd	%xmm2,%xmm0
499	movdqa	80(%ebx),%xmm7
500	pxor	%xmm0,%xmm6
501	movdqa	%xmm0,-128(%ebx)
502	pshufb	16(%eax),%xmm6
503	paddd	%xmm6,%xmm4
504	movdqa	%xmm6,64(%ebx)
505	pxor	%xmm4,%xmm2
506	paddd	%xmm3,%xmm1
507	movdqa	%xmm2,%xmm0
508	pslld	$7,%xmm2
509	psrld	$25,%xmm0
510	pxor	%xmm1,%xmm7
511	por	%xmm0,%xmm2
512	movdqa	%xmm4,(%ebx)
513	pshufb	(%eax),%xmm7
514	movdqa	%xmm2,-64(%ebx)
515	paddd	%xmm7,%xmm5
516	movdqa	32(%ebx),%xmm4
517	pxor	%xmm5,%xmm3
518	movdqa	-32(%ebx),%xmm2
519	movdqa	%xmm3,%xmm0
520	pslld	$12,%xmm3
521	psrld	$20,%xmm0
522	por	%xmm0,%xmm3
523	movdqa	-96(%ebx),%xmm0
524	paddd	%xmm3,%xmm1
525	movdqa	96(%ebx),%xmm6
526	pxor	%xmm1,%xmm7
527	movdqa	%xmm1,-112(%ebx)
528	pshufb	16(%eax),%xmm7
529	paddd	%xmm7,%xmm5
530	movdqa	%xmm7,80(%ebx)
531	pxor	%xmm5,%xmm3
532	paddd	%xmm2,%xmm0
533	movdqa	%xmm3,%xmm1
534	pslld	$7,%xmm3
535	psrld	$25,%xmm1
536	pxor	%xmm0,%xmm6
537	por	%xmm1,%xmm3
538	movdqa	%xmm5,16(%ebx)
539	pshufb	(%eax),%xmm6
540	movdqa	%xmm3,-48(%ebx)
541	paddd	%xmm6,%xmm4
542	movdqa	48(%ebx),%xmm5
543	pxor	%xmm4,%xmm2
544	movdqa	-16(%ebx),%xmm3
545	movdqa	%xmm2,%xmm1
546	pslld	$12,%xmm2
547	psrld	$20,%xmm1
548	por	%xmm1,%xmm2
549	movdqa	-80(%ebx),%xmm1
550	paddd	%xmm2,%xmm0
551	movdqa	112(%ebx),%xmm7
552	pxor	%xmm0,%xmm6
553	movdqa	%xmm0,-96(%ebx)
554	pshufb	16(%eax),%xmm6
555	paddd	%xmm6,%xmm4
556	movdqa	%xmm6,96(%ebx)
557	pxor	%xmm4,%xmm2
558	paddd	%xmm3,%xmm1
559	movdqa	%xmm2,%xmm0
560	pslld	$7,%xmm2
561	psrld	$25,%xmm0
562	pxor	%xmm1,%xmm7
563	por	%xmm0,%xmm2
564	pshufb	(%eax),%xmm7
565	movdqa	%xmm2,-32(%ebx)
566	paddd	%xmm7,%xmm5
567	pxor	%xmm5,%xmm3
568	movdqa	-48(%ebx),%xmm2
569	movdqa	%xmm3,%xmm0
570	pslld	$12,%xmm3
571	psrld	$20,%xmm0
572	por	%xmm0,%xmm3
573	movdqa	-128(%ebx),%xmm0
574	paddd	%xmm3,%xmm1
575	pxor	%xmm1,%xmm7
576	movdqa	%xmm1,-80(%ebx)
577	pshufb	16(%eax),%xmm7
578	paddd	%xmm7,%xmm5
579	movdqa	%xmm7,%xmm6
580	pxor	%xmm5,%xmm3
581	paddd	%xmm2,%xmm0
582	movdqa	%xmm3,%xmm1
583	pslld	$7,%xmm3
584	psrld	$25,%xmm1
585	pxor	%xmm0,%xmm6
586	por	%xmm1,%xmm3
587	pshufb	(%eax),%xmm6
588	movdqa	%xmm3,-16(%ebx)
589	paddd	%xmm6,%xmm4
590	pxor	%xmm4,%xmm2
591	movdqa	-32(%ebx),%xmm3
592	movdqa	%xmm2,%xmm1
593	pslld	$12,%xmm2
594	psrld	$20,%xmm1
595	por	%xmm1,%xmm2
596	movdqa	-112(%ebx),%xmm1
597	paddd	%xmm2,%xmm0
598	movdqa	64(%ebx),%xmm7
599	pxor	%xmm0,%xmm6
600	movdqa	%xmm0,-128(%ebx)
601	pshufb	16(%eax),%xmm6
602	paddd	%xmm6,%xmm4
603	movdqa	%xmm6,112(%ebx)
604	pxor	%xmm4,%xmm2
605	paddd	%xmm3,%xmm1
606	movdqa	%xmm2,%xmm0
607	pslld	$7,%xmm2
608	psrld	$25,%xmm0
609	pxor	%xmm1,%xmm7
610	por	%xmm0,%xmm2
611	movdqa	%xmm4,32(%ebx)
612	pshufb	(%eax),%xmm7
613	movdqa	%xmm2,-48(%ebx)
614	paddd	%xmm7,%xmm5
615	movdqa	(%ebx),%xmm4
616	pxor	%xmm5,%xmm3
617	movdqa	-16(%ebx),%xmm2
618	movdqa	%xmm3,%xmm0
619	pslld	$12,%xmm3
620	psrld	$20,%xmm0
621	por	%xmm0,%xmm3
622	movdqa	-96(%ebx),%xmm0
623	paddd	%xmm3,%xmm1
624	movdqa	80(%ebx),%xmm6
625	pxor	%xmm1,%xmm7
626	movdqa	%xmm1,-112(%ebx)
627	pshufb	16(%eax),%xmm7
628	paddd	%xmm7,%xmm5
629	movdqa	%xmm7,64(%ebx)
630	pxor	%xmm5,%xmm3
631	paddd	%xmm2,%xmm0
632	movdqa	%xmm3,%xmm1
633	pslld	$7,%xmm3
634	psrld	$25,%xmm1
635	pxor	%xmm0,%xmm6
636	por	%xmm1,%xmm3
637	movdqa	%xmm5,48(%ebx)
638	pshufb	(%eax),%xmm6
639	movdqa	%xmm3,-32(%ebx)
640	paddd	%xmm6,%xmm4
641	movdqa	16(%ebx),%xmm5
642	pxor	%xmm4,%xmm2
643	movdqa	-64(%ebx),%xmm3
644	movdqa	%xmm2,%xmm1
645	pslld	$12,%xmm2
646	psrld	$20,%xmm1
647	por	%xmm1,%xmm2
648	movdqa	-80(%ebx),%xmm1
649	paddd	%xmm2,%xmm0
650	movdqa	96(%ebx),%xmm7
651	pxor	%xmm0,%xmm6
652	movdqa	%xmm0,-96(%ebx)
653	pshufb	16(%eax),%xmm6
654	paddd	%xmm6,%xmm4
655	movdqa	%xmm6,80(%ebx)
656	pxor	%xmm4,%xmm2
657	paddd	%xmm3,%xmm1
658	movdqa	%xmm2,%xmm0
659	pslld	$7,%xmm2
660	psrld	$25,%xmm0
661	pxor	%xmm1,%xmm7
662	por	%xmm0,%xmm2
663	pshufb	(%eax),%xmm7
664	movdqa	%xmm2,-16(%ebx)
665	paddd	%xmm7,%xmm5
666	pxor	%xmm5,%xmm3
667	movdqa	%xmm3,%xmm0
668	pslld	$12,%xmm3
669	psrld	$20,%xmm0
670	por	%xmm0,%xmm3
671	movdqa	-128(%ebx),%xmm0
672	paddd	%xmm3,%xmm1
673	movdqa	64(%ebx),%xmm6
674	pxor	%xmm1,%xmm7
675	movdqa	%xmm1,-80(%ebx)
676	pshufb	16(%eax),%xmm7
677	paddd	%xmm7,%xmm5
678	movdqa	%xmm7,96(%ebx)
679	pxor	%xmm5,%xmm3
680	movdqa	%xmm3,%xmm1
681	pslld	$7,%xmm3
682	psrld	$25,%xmm1
683	por	%xmm1,%xmm3
684	decl	%edx
685	jnz	L010loop
686	movdqa	%xmm3,-64(%ebx)
687	movdqa	%xmm4,(%ebx)
688	movdqa	%xmm5,16(%ebx)
689	movdqa	%xmm6,64(%ebx)
690	movdqa	%xmm7,96(%ebx)
691	movdqa	-112(%ebx),%xmm1
692	movdqa	-96(%ebx),%xmm2
693	movdqa	-80(%ebx),%xmm3
694	paddd	-128(%ebp),%xmm0
695	paddd	-112(%ebp),%xmm1
696	paddd	-96(%ebp),%xmm2
697	paddd	-80(%ebp),%xmm3
698	movdqa	%xmm0,%xmm6
699	punpckldq	%xmm1,%xmm0
700	movdqa	%xmm2,%xmm7
701	punpckldq	%xmm3,%xmm2
702	punpckhdq	%xmm1,%xmm6
703	punpckhdq	%xmm3,%xmm7
704	movdqa	%xmm0,%xmm1
705	punpcklqdq	%xmm2,%xmm0
706	movdqa	%xmm6,%xmm3
707	punpcklqdq	%xmm7,%xmm6
708	punpckhqdq	%xmm2,%xmm1
709	punpckhqdq	%xmm7,%xmm3
710	movdqu	-128(%esi),%xmm4
711	movdqu	-64(%esi),%xmm5
712	movdqu	(%esi),%xmm2
713	movdqu	64(%esi),%xmm7
714	leal	16(%esi),%esi
715	pxor	%xmm0,%xmm4
716	movdqa	-64(%ebx),%xmm0
717	pxor	%xmm1,%xmm5
718	movdqa	-48(%ebx),%xmm1
719	pxor	%xmm2,%xmm6
720	movdqa	-32(%ebx),%xmm2
721	pxor	%xmm3,%xmm7
722	movdqa	-16(%ebx),%xmm3
723	movdqu	%xmm4,-128(%edi)
724	movdqu	%xmm5,-64(%edi)
725	movdqu	%xmm6,(%edi)
726	movdqu	%xmm7,64(%edi)
727	leal	16(%edi),%edi
728	paddd	-64(%ebp),%xmm0
729	paddd	-48(%ebp),%xmm1
730	paddd	-32(%ebp),%xmm2
731	paddd	-16(%ebp),%xmm3
732	movdqa	%xmm0,%xmm6
733	punpckldq	%xmm1,%xmm0
734	movdqa	%xmm2,%xmm7
735	punpckldq	%xmm3,%xmm2
736	punpckhdq	%xmm1,%xmm6
737	punpckhdq	%xmm3,%xmm7
738	movdqa	%xmm0,%xmm1
739	punpcklqdq	%xmm2,%xmm0
740	movdqa	%xmm6,%xmm3
741	punpcklqdq	%xmm7,%xmm6
742	punpckhqdq	%xmm2,%xmm1
743	punpckhqdq	%xmm7,%xmm3
744	movdqu	-128(%esi),%xmm4
745	movdqu	-64(%esi),%xmm5
746	movdqu	(%esi),%xmm2
747	movdqu	64(%esi),%xmm7
748	leal	16(%esi),%esi
749	pxor	%xmm0,%xmm4
750	movdqa	(%ebx),%xmm0
751	pxor	%xmm1,%xmm5
752	movdqa	16(%ebx),%xmm1
753	pxor	%xmm2,%xmm6
754	movdqa	32(%ebx),%xmm2
755	pxor	%xmm3,%xmm7
756	movdqa	48(%ebx),%xmm3
757	movdqu	%xmm4,-128(%edi)
758	movdqu	%xmm5,-64(%edi)
759	movdqu	%xmm6,(%edi)
760	movdqu	%xmm7,64(%edi)
761	leal	16(%edi),%edi
762	paddd	(%ebp),%xmm0
763	paddd	16(%ebp),%xmm1
764	paddd	32(%ebp),%xmm2
765	paddd	48(%ebp),%xmm3
766	movdqa	%xmm0,%xmm6
767	punpckldq	%xmm1,%xmm0
768	movdqa	%xmm2,%xmm7
769	punpckldq	%xmm3,%xmm2
770	punpckhdq	%xmm1,%xmm6
771	punpckhdq	%xmm3,%xmm7
772	movdqa	%xmm0,%xmm1
773	punpcklqdq	%xmm2,%xmm0
774	movdqa	%xmm6,%xmm3
775	punpcklqdq	%xmm7,%xmm6
776	punpckhqdq	%xmm2,%xmm1
777	punpckhqdq	%xmm7,%xmm3
778	movdqu	-128(%esi),%xmm4
779	movdqu	-64(%esi),%xmm5
780	movdqu	(%esi),%xmm2
781	movdqu	64(%esi),%xmm7
782	leal	16(%esi),%esi
783	pxor	%xmm0,%xmm4
784	movdqa	64(%ebx),%xmm0
785	pxor	%xmm1,%xmm5
786	movdqa	80(%ebx),%xmm1
787	pxor	%xmm2,%xmm6
788	movdqa	96(%ebx),%xmm2
789	pxor	%xmm3,%xmm7
790	movdqa	112(%ebx),%xmm3
791	movdqu	%xmm4,-128(%edi)
792	movdqu	%xmm5,-64(%edi)
793	movdqu	%xmm6,(%edi)
794	movdqu	%xmm7,64(%edi)
795	leal	16(%edi),%edi
796	paddd	64(%ebp),%xmm0
797	paddd	80(%ebp),%xmm1
798	paddd	96(%ebp),%xmm2
799	paddd	112(%ebp),%xmm3
800	movdqa	%xmm0,%xmm6
801	punpckldq	%xmm1,%xmm0
802	movdqa	%xmm2,%xmm7
803	punpckldq	%xmm3,%xmm2
804	punpckhdq	%xmm1,%xmm6
805	punpckhdq	%xmm3,%xmm7
806	movdqa	%xmm0,%xmm1
807	punpcklqdq	%xmm2,%xmm0
808	movdqa	%xmm6,%xmm3
809	punpcklqdq	%xmm7,%xmm6
810	punpckhqdq	%xmm2,%xmm1
811	punpckhqdq	%xmm7,%xmm3
812	movdqu	-128(%esi),%xmm4
813	movdqu	-64(%esi),%xmm5
814	movdqu	(%esi),%xmm2
815	movdqu	64(%esi),%xmm7
816	leal	208(%esi),%esi
817	pxor	%xmm0,%xmm4
818	pxor	%xmm1,%xmm5
819	pxor	%xmm2,%xmm6
820	pxor	%xmm3,%xmm7
821	movdqu	%xmm4,-128(%edi)
822	movdqu	%xmm5,-64(%edi)
823	movdqu	%xmm6,(%edi)
824	movdqu	%xmm7,64(%edi)
825	leal	208(%edi),%edi
826	subl	$256,%ecx
827	jnc	L009outer_loop
828	addl	$256,%ecx
829	jz	L011done
830	movl	520(%esp),%ebx
831	leal	-128(%esi),%esi
832	movl	516(%esp),%edx
833	leal	-128(%edi),%edi
834	movd	64(%ebp),%xmm2
835	movdqu	(%ebx),%xmm3
836	paddd	96(%eax),%xmm2
837	pand	112(%eax),%xmm3
838	por	%xmm2,%xmm3
839L0081x:
840	movdqa	32(%eax),%xmm0
841	movdqu	(%edx),%xmm1
842	movdqu	16(%edx),%xmm2
843	movdqa	(%eax),%xmm6
844	movdqa	16(%eax),%xmm7
845	movl	%ebp,48(%esp)
846	movdqa	%xmm0,(%esp)
847	movdqa	%xmm1,16(%esp)
848	movdqa	%xmm2,32(%esp)
849	movdqa	%xmm3,48(%esp)
850	movl	$10,%edx
851	jmp	L012loop1x
852.align	4,0x90
853L013outer1x:
854	movdqa	80(%eax),%xmm3
855	movdqa	(%esp),%xmm0
856	movdqa	16(%esp),%xmm1
857	movdqa	32(%esp),%xmm2
858	paddd	48(%esp),%xmm3
859	movl	$10,%edx
860	movdqa	%xmm3,48(%esp)
861	jmp	L012loop1x
862.align	4,0x90
863L012loop1x:
864	paddd	%xmm1,%xmm0
865	pxor	%xmm0,%xmm3
866.byte	102,15,56,0,222
867	paddd	%xmm3,%xmm2
868	pxor	%xmm2,%xmm1
869	movdqa	%xmm1,%xmm4
870	psrld	$20,%xmm1
871	pslld	$12,%xmm4
872	por	%xmm4,%xmm1
873	paddd	%xmm1,%xmm0
874	pxor	%xmm0,%xmm3
875.byte	102,15,56,0,223
876	paddd	%xmm3,%xmm2
877	pxor	%xmm2,%xmm1
878	movdqa	%xmm1,%xmm4
879	psrld	$25,%xmm1
880	pslld	$7,%xmm4
881	por	%xmm4,%xmm1
882	pshufd	$78,%xmm2,%xmm2
883	pshufd	$57,%xmm1,%xmm1
884	pshufd	$147,%xmm3,%xmm3
885	nop
886	paddd	%xmm1,%xmm0
887	pxor	%xmm0,%xmm3
888.byte	102,15,56,0,222
889	paddd	%xmm3,%xmm2
890	pxor	%xmm2,%xmm1
891	movdqa	%xmm1,%xmm4
892	psrld	$20,%xmm1
893	pslld	$12,%xmm4
894	por	%xmm4,%xmm1
895	paddd	%xmm1,%xmm0
896	pxor	%xmm0,%xmm3
897.byte	102,15,56,0,223
898	paddd	%xmm3,%xmm2
899	pxor	%xmm2,%xmm1
900	movdqa	%xmm1,%xmm4
901	psrld	$25,%xmm1
902	pslld	$7,%xmm4
903	por	%xmm4,%xmm1
904	pshufd	$78,%xmm2,%xmm2
905	pshufd	$147,%xmm1,%xmm1
906	pshufd	$57,%xmm3,%xmm3
907	decl	%edx
908	jnz	L012loop1x
909	paddd	(%esp),%xmm0
910	paddd	16(%esp),%xmm1
911	paddd	32(%esp),%xmm2
912	paddd	48(%esp),%xmm3
913	cmpl	$64,%ecx
914	jb	L014tail
915	movdqu	(%esi),%xmm4
916	movdqu	16(%esi),%xmm5
917	pxor	%xmm4,%xmm0
918	movdqu	32(%esi),%xmm4
919	pxor	%xmm5,%xmm1
920	movdqu	48(%esi),%xmm5
921	pxor	%xmm4,%xmm2
922	pxor	%xmm5,%xmm3
923	leal	64(%esi),%esi
924	movdqu	%xmm0,(%edi)
925	movdqu	%xmm1,16(%edi)
926	movdqu	%xmm2,32(%edi)
927	movdqu	%xmm3,48(%edi)
928	leal	64(%edi),%edi
929	subl	$64,%ecx
930	jnz	L013outer1x
931	jmp	L011done
932L014tail:
933	movdqa	%xmm0,(%esp)
934	movdqa	%xmm1,16(%esp)
935	movdqa	%xmm2,32(%esp)
936	movdqa	%xmm3,48(%esp)
937	xorl	%eax,%eax
938	xorl	%edx,%edx
939	xorl	%ebp,%ebp
940L015tail_loop:
941	movb	(%esp,%ebp,1),%al
942	movb	(%esi,%ebp,1),%dl
943	leal	1(%ebp),%ebp
944	xorb	%dl,%al
945	movb	%al,-1(%edi,%ebp,1)
946	decl	%ecx
947	jnz	L015tail_loop
948L011done:
949	movl	512(%esp),%esp
950	popl	%edi
951	popl	%esi
952	popl	%ebx
953	popl	%ebp
954	ret
955.align	6,0x90
956Lssse3_data:
957.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
958.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
959.long	1634760805,857760878,2036477234,1797285236
960.long	0,1,2,3
961.long	4,4,4,4
962.long	1,0,0,0
963.long	4,0,0,0
964.long	0,-1,-1,-1
965.align	6,0x90
966.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
967.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
968.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
969.byte	114,103,62,0
970.section __IMPORT,__pointers,non_lazy_symbol_pointers
971L_OPENSSL_ia32cap_P$non_lazy_ptr:
972.indirect_symbol	_OPENSSL_ia32cap_P
973.long	0
974#endif
975