1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__i386__)
5#if defined(BORINGSSL_PREFIX)
6#include <boringssl_prefix_symbols_asm.h>
7#endif
8.text
9.globl	GFp_ChaCha20_ctr32
10.hidden	GFp_ChaCha20_ctr32
11.type	GFp_ChaCha20_ctr32,@function
12.align	16
13GFp_ChaCha20_ctr32:
14.L_GFp_ChaCha20_ctr32_begin:
15	pushl	%ebp
16	pushl	%ebx
17	pushl	%esi
18	pushl	%edi
19	xorl	%eax,%eax
20	cmpl	28(%esp),%eax
21	je	.L000no_data
22	call	.Lpic_point
23.Lpic_point:
24	popl	%eax
25	leal	GFp_ia32cap_P-.Lpic_point(%eax),%ebp
26	testl	$16777216,(%ebp)
27	jz	.L001x86
28	testl	$512,4(%ebp)
29	jz	.L001x86
30	jmp	.Lssse3_shortcut
31.L001x86:
32	movl	32(%esp),%esi
33	movl	36(%esp),%edi
34	subl	$132,%esp
35	movl	(%esi),%eax
36	movl	4(%esi),%ebx
37	movl	8(%esi),%ecx
38	movl	12(%esi),%edx
39	movl	%eax,80(%esp)
40	movl	%ebx,84(%esp)
41	movl	%ecx,88(%esp)
42	movl	%edx,92(%esp)
43	movl	16(%esi),%eax
44	movl	20(%esi),%ebx
45	movl	24(%esi),%ecx
46	movl	28(%esi),%edx
47	movl	%eax,96(%esp)
48	movl	%ebx,100(%esp)
49	movl	%ecx,104(%esp)
50	movl	%edx,108(%esp)
51	movl	(%edi),%eax
52	movl	4(%edi),%ebx
53	movl	8(%edi),%ecx
54	movl	12(%edi),%edx
55	subl	$1,%eax
56	movl	%eax,112(%esp)
57	movl	%ebx,116(%esp)
58	movl	%ecx,120(%esp)
59	movl	%edx,124(%esp)
60	jmp	.L002entry
61.align	16
62.L003outer_loop:
63	movl	%ebx,156(%esp)
64	movl	%eax,152(%esp)
65	movl	%ecx,160(%esp)
66.L002entry:
67	movl	$1634760805,%eax
68	movl	$857760878,4(%esp)
69	movl	$2036477234,8(%esp)
70	movl	$1797285236,12(%esp)
71	movl	84(%esp),%ebx
72	movl	88(%esp),%ebp
73	movl	104(%esp),%ecx
74	movl	108(%esp),%esi
75	movl	116(%esp),%edx
76	movl	120(%esp),%edi
77	movl	%ebx,20(%esp)
78	movl	%ebp,24(%esp)
79	movl	%ecx,40(%esp)
80	movl	%esi,44(%esp)
81	movl	%edx,52(%esp)
82	movl	%edi,56(%esp)
83	movl	92(%esp),%ebx
84	movl	124(%esp),%edi
85	movl	112(%esp),%edx
86	movl	80(%esp),%ebp
87	movl	96(%esp),%ecx
88	movl	100(%esp),%esi
89	addl	$1,%edx
90	movl	%ebx,28(%esp)
91	movl	%edi,60(%esp)
92	movl	%edx,112(%esp)
93	movl	$10,%ebx
94	jmp	.L004loop
95.align	16
96.L004loop:
97	addl	%ebp,%eax
98	movl	%ebx,128(%esp)
99	movl	%ebp,%ebx
100	xorl	%eax,%edx
101	roll	$16,%edx
102	addl	%edx,%ecx
103	xorl	%ecx,%ebx
104	movl	52(%esp),%edi
105	roll	$12,%ebx
106	movl	20(%esp),%ebp
107	addl	%ebx,%eax
108	xorl	%eax,%edx
109	movl	%eax,(%esp)
110	roll	$8,%edx
111	movl	4(%esp),%eax
112	addl	%edx,%ecx
113	movl	%edx,48(%esp)
114	xorl	%ecx,%ebx
115	addl	%ebp,%eax
116	roll	$7,%ebx
117	xorl	%eax,%edi
118	movl	%ecx,32(%esp)
119	roll	$16,%edi
120	movl	%ebx,16(%esp)
121	addl	%edi,%esi
122	movl	40(%esp),%ecx
123	xorl	%esi,%ebp
124	movl	56(%esp),%edx
125	roll	$12,%ebp
126	movl	24(%esp),%ebx
127	addl	%ebp,%eax
128	xorl	%eax,%edi
129	movl	%eax,4(%esp)
130	roll	$8,%edi
131	movl	8(%esp),%eax
132	addl	%edi,%esi
133	movl	%edi,52(%esp)
134	xorl	%esi,%ebp
135	addl	%ebx,%eax
136	roll	$7,%ebp
137	xorl	%eax,%edx
138	movl	%esi,36(%esp)
139	roll	$16,%edx
140	movl	%ebp,20(%esp)
141	addl	%edx,%ecx
142	movl	44(%esp),%esi
143	xorl	%ecx,%ebx
144	movl	60(%esp),%edi
145	roll	$12,%ebx
146	movl	28(%esp),%ebp
147	addl	%ebx,%eax
148	xorl	%eax,%edx
149	movl	%eax,8(%esp)
150	roll	$8,%edx
151	movl	12(%esp),%eax
152	addl	%edx,%ecx
153	movl	%edx,56(%esp)
154	xorl	%ecx,%ebx
155	addl	%ebp,%eax
156	roll	$7,%ebx
157	xorl	%eax,%edi
158	roll	$16,%edi
159	movl	%ebx,24(%esp)
160	addl	%edi,%esi
161	xorl	%esi,%ebp
162	roll	$12,%ebp
163	movl	20(%esp),%ebx
164	addl	%ebp,%eax
165	xorl	%eax,%edi
166	movl	%eax,12(%esp)
167	roll	$8,%edi
168	movl	(%esp),%eax
169	addl	%edi,%esi
170	movl	%edi,%edx
171	xorl	%esi,%ebp
172	addl	%ebx,%eax
173	roll	$7,%ebp
174	xorl	%eax,%edx
175	roll	$16,%edx
176	movl	%ebp,28(%esp)
177	addl	%edx,%ecx
178	xorl	%ecx,%ebx
179	movl	48(%esp),%edi
180	roll	$12,%ebx
181	movl	24(%esp),%ebp
182	addl	%ebx,%eax
183	xorl	%eax,%edx
184	movl	%eax,(%esp)
185	roll	$8,%edx
186	movl	4(%esp),%eax
187	addl	%edx,%ecx
188	movl	%edx,60(%esp)
189	xorl	%ecx,%ebx
190	addl	%ebp,%eax
191	roll	$7,%ebx
192	xorl	%eax,%edi
193	movl	%ecx,40(%esp)
194	roll	$16,%edi
195	movl	%ebx,20(%esp)
196	addl	%edi,%esi
197	movl	32(%esp),%ecx
198	xorl	%esi,%ebp
199	movl	52(%esp),%edx
200	roll	$12,%ebp
201	movl	28(%esp),%ebx
202	addl	%ebp,%eax
203	xorl	%eax,%edi
204	movl	%eax,4(%esp)
205	roll	$8,%edi
206	movl	8(%esp),%eax
207	addl	%edi,%esi
208	movl	%edi,48(%esp)
209	xorl	%esi,%ebp
210	addl	%ebx,%eax
211	roll	$7,%ebp
212	xorl	%eax,%edx
213	movl	%esi,44(%esp)
214	roll	$16,%edx
215	movl	%ebp,24(%esp)
216	addl	%edx,%ecx
217	movl	36(%esp),%esi
218	xorl	%ecx,%ebx
219	movl	56(%esp),%edi
220	roll	$12,%ebx
221	movl	16(%esp),%ebp
222	addl	%ebx,%eax
223	xorl	%eax,%edx
224	movl	%eax,8(%esp)
225	roll	$8,%edx
226	movl	12(%esp),%eax
227	addl	%edx,%ecx
228	movl	%edx,52(%esp)
229	xorl	%ecx,%ebx
230	addl	%ebp,%eax
231	roll	$7,%ebx
232	xorl	%eax,%edi
233	roll	$16,%edi
234	movl	%ebx,28(%esp)
235	addl	%edi,%esi
236	xorl	%esi,%ebp
237	movl	48(%esp),%edx
238	roll	$12,%ebp
239	movl	128(%esp),%ebx
240	addl	%ebp,%eax
241	xorl	%eax,%edi
242	movl	%eax,12(%esp)
243	roll	$8,%edi
244	movl	(%esp),%eax
245	addl	%edi,%esi
246	movl	%edi,56(%esp)
247	xorl	%esi,%ebp
248	roll	$7,%ebp
249	decl	%ebx
250	jnz	.L004loop
251	movl	160(%esp),%ebx
252	addl	$1634760805,%eax
253	addl	80(%esp),%ebp
254	addl	96(%esp),%ecx
255	addl	100(%esp),%esi
256	cmpl	$64,%ebx
257	jb	.L005tail
258	movl	156(%esp),%ebx
259	addl	112(%esp),%edx
260	addl	120(%esp),%edi
261	xorl	(%ebx),%eax
262	xorl	16(%ebx),%ebp
263	movl	%eax,(%esp)
264	movl	152(%esp),%eax
265	xorl	32(%ebx),%ecx
266	xorl	36(%ebx),%esi
267	xorl	48(%ebx),%edx
268	xorl	56(%ebx),%edi
269	movl	%ebp,16(%eax)
270	movl	%ecx,32(%eax)
271	movl	%esi,36(%eax)
272	movl	%edx,48(%eax)
273	movl	%edi,56(%eax)
274	movl	4(%esp),%ebp
275	movl	8(%esp),%ecx
276	movl	12(%esp),%esi
277	movl	20(%esp),%edx
278	movl	24(%esp),%edi
279	addl	$857760878,%ebp
280	addl	$2036477234,%ecx
281	addl	$1797285236,%esi
282	addl	84(%esp),%edx
283	addl	88(%esp),%edi
284	xorl	4(%ebx),%ebp
285	xorl	8(%ebx),%ecx
286	xorl	12(%ebx),%esi
287	xorl	20(%ebx),%edx
288	xorl	24(%ebx),%edi
289	movl	%ebp,4(%eax)
290	movl	%ecx,8(%eax)
291	movl	%esi,12(%eax)
292	movl	%edx,20(%eax)
293	movl	%edi,24(%eax)
294	movl	28(%esp),%ebp
295	movl	40(%esp),%ecx
296	movl	44(%esp),%esi
297	movl	52(%esp),%edx
298	movl	60(%esp),%edi
299	addl	92(%esp),%ebp
300	addl	104(%esp),%ecx
301	addl	108(%esp),%esi
302	addl	116(%esp),%edx
303	addl	124(%esp),%edi
304	xorl	28(%ebx),%ebp
305	xorl	40(%ebx),%ecx
306	xorl	44(%ebx),%esi
307	xorl	52(%ebx),%edx
308	xorl	60(%ebx),%edi
309	leal	64(%ebx),%ebx
310	movl	%ebp,28(%eax)
311	movl	(%esp),%ebp
312	movl	%ecx,40(%eax)
313	movl	160(%esp),%ecx
314	movl	%esi,44(%eax)
315	movl	%edx,52(%eax)
316	movl	%edi,60(%eax)
317	movl	%ebp,(%eax)
318	leal	64(%eax),%eax
319	subl	$64,%ecx
320	jnz	.L003outer_loop
321	jmp	.L006done
322.L005tail:
323	addl	112(%esp),%edx
324	addl	120(%esp),%edi
325	movl	%eax,(%esp)
326	movl	%ebp,16(%esp)
327	movl	%ecx,32(%esp)
328	movl	%esi,36(%esp)
329	movl	%edx,48(%esp)
330	movl	%edi,56(%esp)
331	movl	4(%esp),%ebp
332	movl	8(%esp),%ecx
333	movl	12(%esp),%esi
334	movl	20(%esp),%edx
335	movl	24(%esp),%edi
336	addl	$857760878,%ebp
337	addl	$2036477234,%ecx
338	addl	$1797285236,%esi
339	addl	84(%esp),%edx
340	addl	88(%esp),%edi
341	movl	%ebp,4(%esp)
342	movl	%ecx,8(%esp)
343	movl	%esi,12(%esp)
344	movl	%edx,20(%esp)
345	movl	%edi,24(%esp)
346	movl	28(%esp),%ebp
347	movl	40(%esp),%ecx
348	movl	44(%esp),%esi
349	movl	52(%esp),%edx
350	movl	60(%esp),%edi
351	addl	92(%esp),%ebp
352	addl	104(%esp),%ecx
353	addl	108(%esp),%esi
354	addl	116(%esp),%edx
355	addl	124(%esp),%edi
356	movl	%ebp,28(%esp)
357	movl	156(%esp),%ebp
358	movl	%ecx,40(%esp)
359	movl	152(%esp),%ecx
360	movl	%esi,44(%esp)
361	xorl	%esi,%esi
362	movl	%edx,52(%esp)
363	movl	%edi,60(%esp)
364	xorl	%eax,%eax
365	xorl	%edx,%edx
366.L007tail_loop:
367	movb	(%esi,%ebp,1),%al
368	movb	(%esp,%esi,1),%dl
369	leal	1(%esi),%esi
370	xorb	%dl,%al
371	movb	%al,-1(%ecx,%esi,1)
372	decl	%ebx
373	jnz	.L007tail_loop
374.L006done:
375	addl	$132,%esp
376.L000no_data:
377	popl	%edi
378	popl	%esi
379	popl	%ebx
380	popl	%ebp
381	ret
382.size	GFp_ChaCha20_ctr32,.-.L_GFp_ChaCha20_ctr32_begin
383.hidden	_ChaCha20_ssse3
384.type	_ChaCha20_ssse3,@function
385.align	16
386_ChaCha20_ssse3:
387	pushl	%ebp
388	pushl	%ebx
389	pushl	%esi
390	pushl	%edi
391.Lssse3_shortcut:
392	movl	20(%esp),%edi
393	movl	24(%esp),%esi
394	movl	28(%esp),%ecx
395	movl	32(%esp),%edx
396	movl	36(%esp),%ebx
397	movl	%esp,%ebp
398	subl	$524,%esp
399	andl	$-64,%esp
400	movl	%ebp,512(%esp)
401	leal	.Lssse3_data-.Lpic_point(%eax),%eax
402	movdqu	(%ebx),%xmm3
403	cmpl	$256,%ecx
404	jb	.L0081x
405	movl	%edx,516(%esp)
406	movl	%ebx,520(%esp)
407	subl	$256,%ecx
408	leal	384(%esp),%ebp
409	movdqu	(%edx),%xmm7
410	pshufd	$0,%xmm3,%xmm0
411	pshufd	$85,%xmm3,%xmm1
412	pshufd	$170,%xmm3,%xmm2
413	pshufd	$255,%xmm3,%xmm3
414	paddd	48(%eax),%xmm0
415	pshufd	$0,%xmm7,%xmm4
416	pshufd	$85,%xmm7,%xmm5
417	psubd	64(%eax),%xmm0
418	pshufd	$170,%xmm7,%xmm6
419	pshufd	$255,%xmm7,%xmm7
420	movdqa	%xmm0,64(%ebp)
421	movdqa	%xmm1,80(%ebp)
422	movdqa	%xmm2,96(%ebp)
423	movdqa	%xmm3,112(%ebp)
424	movdqu	16(%edx),%xmm3
425	movdqa	%xmm4,-64(%ebp)
426	movdqa	%xmm5,-48(%ebp)
427	movdqa	%xmm6,-32(%ebp)
428	movdqa	%xmm7,-16(%ebp)
429	movdqa	32(%eax),%xmm7
430	leal	128(%esp),%ebx
431	pshufd	$0,%xmm3,%xmm0
432	pshufd	$85,%xmm3,%xmm1
433	pshufd	$170,%xmm3,%xmm2
434	pshufd	$255,%xmm3,%xmm3
435	pshufd	$0,%xmm7,%xmm4
436	pshufd	$85,%xmm7,%xmm5
437	pshufd	$170,%xmm7,%xmm6
438	pshufd	$255,%xmm7,%xmm7
439	movdqa	%xmm0,(%ebp)
440	movdqa	%xmm1,16(%ebp)
441	movdqa	%xmm2,32(%ebp)
442	movdqa	%xmm3,48(%ebp)
443	movdqa	%xmm4,-128(%ebp)
444	movdqa	%xmm5,-112(%ebp)
445	movdqa	%xmm6,-96(%ebp)
446	movdqa	%xmm7,-80(%ebp)
447	leal	128(%esi),%esi
448	leal	128(%edi),%edi
449	jmp	.L009outer_loop
450.align	16
451.L009outer_loop:
452	movdqa	-112(%ebp),%xmm1
453	movdqa	-96(%ebp),%xmm2
454	movdqa	-80(%ebp),%xmm3
455	movdqa	-48(%ebp),%xmm5
456	movdqa	-32(%ebp),%xmm6
457	movdqa	-16(%ebp),%xmm7
458	movdqa	%xmm1,-112(%ebx)
459	movdqa	%xmm2,-96(%ebx)
460	movdqa	%xmm3,-80(%ebx)
461	movdqa	%xmm5,-48(%ebx)
462	movdqa	%xmm6,-32(%ebx)
463	movdqa	%xmm7,-16(%ebx)
464	movdqa	32(%ebp),%xmm2
465	movdqa	48(%ebp),%xmm3
466	movdqa	64(%ebp),%xmm4
467	movdqa	80(%ebp),%xmm5
468	movdqa	96(%ebp),%xmm6
469	movdqa	112(%ebp),%xmm7
470	paddd	64(%eax),%xmm4
471	movdqa	%xmm2,32(%ebx)
472	movdqa	%xmm3,48(%ebx)
473	movdqa	%xmm4,64(%ebx)
474	movdqa	%xmm5,80(%ebx)
475	movdqa	%xmm6,96(%ebx)
476	movdqa	%xmm7,112(%ebx)
477	movdqa	%xmm4,64(%ebp)
478	movdqa	-128(%ebp),%xmm0
479	movdqa	%xmm4,%xmm6
480	movdqa	-64(%ebp),%xmm3
481	movdqa	(%ebp),%xmm4
482	movdqa	16(%ebp),%xmm5
483	movl	$10,%edx
484	nop
485.align	16
486.L010loop:
487	paddd	%xmm3,%xmm0
488	movdqa	%xmm3,%xmm2
489	pxor	%xmm0,%xmm6
490	pshufb	(%eax),%xmm6
491	paddd	%xmm6,%xmm4
492	pxor	%xmm4,%xmm2
493	movdqa	-48(%ebx),%xmm3
494	movdqa	%xmm2,%xmm1
495	pslld	$12,%xmm2
496	psrld	$20,%xmm1
497	por	%xmm1,%xmm2
498	movdqa	-112(%ebx),%xmm1
499	paddd	%xmm2,%xmm0
500	movdqa	80(%ebx),%xmm7
501	pxor	%xmm0,%xmm6
502	movdqa	%xmm0,-128(%ebx)
503	pshufb	16(%eax),%xmm6
504	paddd	%xmm6,%xmm4
505	movdqa	%xmm6,64(%ebx)
506	pxor	%xmm4,%xmm2
507	paddd	%xmm3,%xmm1
508	movdqa	%xmm2,%xmm0
509	pslld	$7,%xmm2
510	psrld	$25,%xmm0
511	pxor	%xmm1,%xmm7
512	por	%xmm0,%xmm2
513	movdqa	%xmm4,(%ebx)
514	pshufb	(%eax),%xmm7
515	movdqa	%xmm2,-64(%ebx)
516	paddd	%xmm7,%xmm5
517	movdqa	32(%ebx),%xmm4
518	pxor	%xmm5,%xmm3
519	movdqa	-32(%ebx),%xmm2
520	movdqa	%xmm3,%xmm0
521	pslld	$12,%xmm3
522	psrld	$20,%xmm0
523	por	%xmm0,%xmm3
524	movdqa	-96(%ebx),%xmm0
525	paddd	%xmm3,%xmm1
526	movdqa	96(%ebx),%xmm6
527	pxor	%xmm1,%xmm7
528	movdqa	%xmm1,-112(%ebx)
529	pshufb	16(%eax),%xmm7
530	paddd	%xmm7,%xmm5
531	movdqa	%xmm7,80(%ebx)
532	pxor	%xmm5,%xmm3
533	paddd	%xmm2,%xmm0
534	movdqa	%xmm3,%xmm1
535	pslld	$7,%xmm3
536	psrld	$25,%xmm1
537	pxor	%xmm0,%xmm6
538	por	%xmm1,%xmm3
539	movdqa	%xmm5,16(%ebx)
540	pshufb	(%eax),%xmm6
541	movdqa	%xmm3,-48(%ebx)
542	paddd	%xmm6,%xmm4
543	movdqa	48(%ebx),%xmm5
544	pxor	%xmm4,%xmm2
545	movdqa	-16(%ebx),%xmm3
546	movdqa	%xmm2,%xmm1
547	pslld	$12,%xmm2
548	psrld	$20,%xmm1
549	por	%xmm1,%xmm2
550	movdqa	-80(%ebx),%xmm1
551	paddd	%xmm2,%xmm0
552	movdqa	112(%ebx),%xmm7
553	pxor	%xmm0,%xmm6
554	movdqa	%xmm0,-96(%ebx)
555	pshufb	16(%eax),%xmm6
556	paddd	%xmm6,%xmm4
557	movdqa	%xmm6,96(%ebx)
558	pxor	%xmm4,%xmm2
559	paddd	%xmm3,%xmm1
560	movdqa	%xmm2,%xmm0
561	pslld	$7,%xmm2
562	psrld	$25,%xmm0
563	pxor	%xmm1,%xmm7
564	por	%xmm0,%xmm2
565	pshufb	(%eax),%xmm7
566	movdqa	%xmm2,-32(%ebx)
567	paddd	%xmm7,%xmm5
568	pxor	%xmm5,%xmm3
569	movdqa	-48(%ebx),%xmm2
570	movdqa	%xmm3,%xmm0
571	pslld	$12,%xmm3
572	psrld	$20,%xmm0
573	por	%xmm0,%xmm3
574	movdqa	-128(%ebx),%xmm0
575	paddd	%xmm3,%xmm1
576	pxor	%xmm1,%xmm7
577	movdqa	%xmm1,-80(%ebx)
578	pshufb	16(%eax),%xmm7
579	paddd	%xmm7,%xmm5
580	movdqa	%xmm7,%xmm6
581	pxor	%xmm5,%xmm3
582	paddd	%xmm2,%xmm0
583	movdqa	%xmm3,%xmm1
584	pslld	$7,%xmm3
585	psrld	$25,%xmm1
586	pxor	%xmm0,%xmm6
587	por	%xmm1,%xmm3
588	pshufb	(%eax),%xmm6
589	movdqa	%xmm3,-16(%ebx)
590	paddd	%xmm6,%xmm4
591	pxor	%xmm4,%xmm2
592	movdqa	-32(%ebx),%xmm3
593	movdqa	%xmm2,%xmm1
594	pslld	$12,%xmm2
595	psrld	$20,%xmm1
596	por	%xmm1,%xmm2
597	movdqa	-112(%ebx),%xmm1
598	paddd	%xmm2,%xmm0
599	movdqa	64(%ebx),%xmm7
600	pxor	%xmm0,%xmm6
601	movdqa	%xmm0,-128(%ebx)
602	pshufb	16(%eax),%xmm6
603	paddd	%xmm6,%xmm4
604	movdqa	%xmm6,112(%ebx)
605	pxor	%xmm4,%xmm2
606	paddd	%xmm3,%xmm1
607	movdqa	%xmm2,%xmm0
608	pslld	$7,%xmm2
609	psrld	$25,%xmm0
610	pxor	%xmm1,%xmm7
611	por	%xmm0,%xmm2
612	movdqa	%xmm4,32(%ebx)
613	pshufb	(%eax),%xmm7
614	movdqa	%xmm2,-48(%ebx)
615	paddd	%xmm7,%xmm5
616	movdqa	(%ebx),%xmm4
617	pxor	%xmm5,%xmm3
618	movdqa	-16(%ebx),%xmm2
619	movdqa	%xmm3,%xmm0
620	pslld	$12,%xmm3
621	psrld	$20,%xmm0
622	por	%xmm0,%xmm3
623	movdqa	-96(%ebx),%xmm0
624	paddd	%xmm3,%xmm1
625	movdqa	80(%ebx),%xmm6
626	pxor	%xmm1,%xmm7
627	movdqa	%xmm1,-112(%ebx)
628	pshufb	16(%eax),%xmm7
629	paddd	%xmm7,%xmm5
630	movdqa	%xmm7,64(%ebx)
631	pxor	%xmm5,%xmm3
632	paddd	%xmm2,%xmm0
633	movdqa	%xmm3,%xmm1
634	pslld	$7,%xmm3
635	psrld	$25,%xmm1
636	pxor	%xmm0,%xmm6
637	por	%xmm1,%xmm3
638	movdqa	%xmm5,48(%ebx)
639	pshufb	(%eax),%xmm6
640	movdqa	%xmm3,-32(%ebx)
641	paddd	%xmm6,%xmm4
642	movdqa	16(%ebx),%xmm5
643	pxor	%xmm4,%xmm2
644	movdqa	-64(%ebx),%xmm3
645	movdqa	%xmm2,%xmm1
646	pslld	$12,%xmm2
647	psrld	$20,%xmm1
648	por	%xmm1,%xmm2
649	movdqa	-80(%ebx),%xmm1
650	paddd	%xmm2,%xmm0
651	movdqa	96(%ebx),%xmm7
652	pxor	%xmm0,%xmm6
653	movdqa	%xmm0,-96(%ebx)
654	pshufb	16(%eax),%xmm6
655	paddd	%xmm6,%xmm4
656	movdqa	%xmm6,80(%ebx)
657	pxor	%xmm4,%xmm2
658	paddd	%xmm3,%xmm1
659	movdqa	%xmm2,%xmm0
660	pslld	$7,%xmm2
661	psrld	$25,%xmm0
662	pxor	%xmm1,%xmm7
663	por	%xmm0,%xmm2
664	pshufb	(%eax),%xmm7
665	movdqa	%xmm2,-16(%ebx)
666	paddd	%xmm7,%xmm5
667	pxor	%xmm5,%xmm3
668	movdqa	%xmm3,%xmm0
669	pslld	$12,%xmm3
670	psrld	$20,%xmm0
671	por	%xmm0,%xmm3
672	movdqa	-128(%ebx),%xmm0
673	paddd	%xmm3,%xmm1
674	movdqa	64(%ebx),%xmm6
675	pxor	%xmm1,%xmm7
676	movdqa	%xmm1,-80(%ebx)
677	pshufb	16(%eax),%xmm7
678	paddd	%xmm7,%xmm5
679	movdqa	%xmm7,96(%ebx)
680	pxor	%xmm5,%xmm3
681	movdqa	%xmm3,%xmm1
682	pslld	$7,%xmm3
683	psrld	$25,%xmm1
684	por	%xmm1,%xmm3
685	decl	%edx
686	jnz	.L010loop
687	movdqa	%xmm3,-64(%ebx)
688	movdqa	%xmm4,(%ebx)
689	movdqa	%xmm5,16(%ebx)
690	movdqa	%xmm6,64(%ebx)
691	movdqa	%xmm7,96(%ebx)
692	movdqa	-112(%ebx),%xmm1
693	movdqa	-96(%ebx),%xmm2
694	movdqa	-80(%ebx),%xmm3
695	paddd	-128(%ebp),%xmm0
696	paddd	-112(%ebp),%xmm1
697	paddd	-96(%ebp),%xmm2
698	paddd	-80(%ebp),%xmm3
699	movdqa	%xmm0,%xmm6
700	punpckldq	%xmm1,%xmm0
701	movdqa	%xmm2,%xmm7
702	punpckldq	%xmm3,%xmm2
703	punpckhdq	%xmm1,%xmm6
704	punpckhdq	%xmm3,%xmm7
705	movdqa	%xmm0,%xmm1
706	punpcklqdq	%xmm2,%xmm0
707	movdqa	%xmm6,%xmm3
708	punpcklqdq	%xmm7,%xmm6
709	punpckhqdq	%xmm2,%xmm1
710	punpckhqdq	%xmm7,%xmm3
711	movdqu	-128(%esi),%xmm4
712	movdqu	-64(%esi),%xmm5
713	movdqu	(%esi),%xmm2
714	movdqu	64(%esi),%xmm7
715	leal	16(%esi),%esi
716	pxor	%xmm0,%xmm4
717	movdqa	-64(%ebx),%xmm0
718	pxor	%xmm1,%xmm5
719	movdqa	-48(%ebx),%xmm1
720	pxor	%xmm2,%xmm6
721	movdqa	-32(%ebx),%xmm2
722	pxor	%xmm3,%xmm7
723	movdqa	-16(%ebx),%xmm3
724	movdqu	%xmm4,-128(%edi)
725	movdqu	%xmm5,-64(%edi)
726	movdqu	%xmm6,(%edi)
727	movdqu	%xmm7,64(%edi)
728	leal	16(%edi),%edi
729	paddd	-64(%ebp),%xmm0
730	paddd	-48(%ebp),%xmm1
731	paddd	-32(%ebp),%xmm2
732	paddd	-16(%ebp),%xmm3
733	movdqa	%xmm0,%xmm6
734	punpckldq	%xmm1,%xmm0
735	movdqa	%xmm2,%xmm7
736	punpckldq	%xmm3,%xmm2
737	punpckhdq	%xmm1,%xmm6
738	punpckhdq	%xmm3,%xmm7
739	movdqa	%xmm0,%xmm1
740	punpcklqdq	%xmm2,%xmm0
741	movdqa	%xmm6,%xmm3
742	punpcklqdq	%xmm7,%xmm6
743	punpckhqdq	%xmm2,%xmm1
744	punpckhqdq	%xmm7,%xmm3
745	movdqu	-128(%esi),%xmm4
746	movdqu	-64(%esi),%xmm5
747	movdqu	(%esi),%xmm2
748	movdqu	64(%esi),%xmm7
749	leal	16(%esi),%esi
750	pxor	%xmm0,%xmm4
751	movdqa	(%ebx),%xmm0
752	pxor	%xmm1,%xmm5
753	movdqa	16(%ebx),%xmm1
754	pxor	%xmm2,%xmm6
755	movdqa	32(%ebx),%xmm2
756	pxor	%xmm3,%xmm7
757	movdqa	48(%ebx),%xmm3
758	movdqu	%xmm4,-128(%edi)
759	movdqu	%xmm5,-64(%edi)
760	movdqu	%xmm6,(%edi)
761	movdqu	%xmm7,64(%edi)
762	leal	16(%edi),%edi
763	paddd	(%ebp),%xmm0
764	paddd	16(%ebp),%xmm1
765	paddd	32(%ebp),%xmm2
766	paddd	48(%ebp),%xmm3
767	movdqa	%xmm0,%xmm6
768	punpckldq	%xmm1,%xmm0
769	movdqa	%xmm2,%xmm7
770	punpckldq	%xmm3,%xmm2
771	punpckhdq	%xmm1,%xmm6
772	punpckhdq	%xmm3,%xmm7
773	movdqa	%xmm0,%xmm1
774	punpcklqdq	%xmm2,%xmm0
775	movdqa	%xmm6,%xmm3
776	punpcklqdq	%xmm7,%xmm6
777	punpckhqdq	%xmm2,%xmm1
778	punpckhqdq	%xmm7,%xmm3
779	movdqu	-128(%esi),%xmm4
780	movdqu	-64(%esi),%xmm5
781	movdqu	(%esi),%xmm2
782	movdqu	64(%esi),%xmm7
783	leal	16(%esi),%esi
784	pxor	%xmm0,%xmm4
785	movdqa	64(%ebx),%xmm0
786	pxor	%xmm1,%xmm5
787	movdqa	80(%ebx),%xmm1
788	pxor	%xmm2,%xmm6
789	movdqa	96(%ebx),%xmm2
790	pxor	%xmm3,%xmm7
791	movdqa	112(%ebx),%xmm3
792	movdqu	%xmm4,-128(%edi)
793	movdqu	%xmm5,-64(%edi)
794	movdqu	%xmm6,(%edi)
795	movdqu	%xmm7,64(%edi)
796	leal	16(%edi),%edi
797	paddd	64(%ebp),%xmm0
798	paddd	80(%ebp),%xmm1
799	paddd	96(%ebp),%xmm2
800	paddd	112(%ebp),%xmm3
801	movdqa	%xmm0,%xmm6
802	punpckldq	%xmm1,%xmm0
803	movdqa	%xmm2,%xmm7
804	punpckldq	%xmm3,%xmm2
805	punpckhdq	%xmm1,%xmm6
806	punpckhdq	%xmm3,%xmm7
807	movdqa	%xmm0,%xmm1
808	punpcklqdq	%xmm2,%xmm0
809	movdqa	%xmm6,%xmm3
810	punpcklqdq	%xmm7,%xmm6
811	punpckhqdq	%xmm2,%xmm1
812	punpckhqdq	%xmm7,%xmm3
813	movdqu	-128(%esi),%xmm4
814	movdqu	-64(%esi),%xmm5
815	movdqu	(%esi),%xmm2
816	movdqu	64(%esi),%xmm7
817	leal	208(%esi),%esi
818	pxor	%xmm0,%xmm4
819	pxor	%xmm1,%xmm5
820	pxor	%xmm2,%xmm6
821	pxor	%xmm3,%xmm7
822	movdqu	%xmm4,-128(%edi)
823	movdqu	%xmm5,-64(%edi)
824	movdqu	%xmm6,(%edi)
825	movdqu	%xmm7,64(%edi)
826	leal	208(%edi),%edi
827	subl	$256,%ecx
828	jnc	.L009outer_loop
829	addl	$256,%ecx
830	jz	.L011done
831	movl	520(%esp),%ebx
832	leal	-128(%esi),%esi
833	movl	516(%esp),%edx
834	leal	-128(%edi),%edi
835	movd	64(%ebp),%xmm2
836	movdqu	(%ebx),%xmm3
837	paddd	96(%eax),%xmm2
838	pand	112(%eax),%xmm3
839	por	%xmm2,%xmm3
840.L0081x:
841	movdqa	32(%eax),%xmm0
842	movdqu	(%edx),%xmm1
843	movdqu	16(%edx),%xmm2
844	movdqa	(%eax),%xmm6
845	movdqa	16(%eax),%xmm7
846	movl	%ebp,48(%esp)
847	movdqa	%xmm0,(%esp)
848	movdqa	%xmm1,16(%esp)
849	movdqa	%xmm2,32(%esp)
850	movdqa	%xmm3,48(%esp)
851	movl	$10,%edx
852	jmp	.L012loop1x
853.align	16
854.L013outer1x:
855	movdqa	80(%eax),%xmm3
856	movdqa	(%esp),%xmm0
857	movdqa	16(%esp),%xmm1
858	movdqa	32(%esp),%xmm2
859	paddd	48(%esp),%xmm3
860	movl	$10,%edx
861	movdqa	%xmm3,48(%esp)
862	jmp	.L012loop1x
863.align	16
864.L012loop1x:
865	paddd	%xmm1,%xmm0
866	pxor	%xmm0,%xmm3
867.byte	102,15,56,0,222
868	paddd	%xmm3,%xmm2
869	pxor	%xmm2,%xmm1
870	movdqa	%xmm1,%xmm4
871	psrld	$20,%xmm1
872	pslld	$12,%xmm4
873	por	%xmm4,%xmm1
874	paddd	%xmm1,%xmm0
875	pxor	%xmm0,%xmm3
876.byte	102,15,56,0,223
877	paddd	%xmm3,%xmm2
878	pxor	%xmm2,%xmm1
879	movdqa	%xmm1,%xmm4
880	psrld	$25,%xmm1
881	pslld	$7,%xmm4
882	por	%xmm4,%xmm1
883	pshufd	$78,%xmm2,%xmm2
884	pshufd	$57,%xmm1,%xmm1
885	pshufd	$147,%xmm3,%xmm3
886	nop
887	paddd	%xmm1,%xmm0
888	pxor	%xmm0,%xmm3
889.byte	102,15,56,0,222
890	paddd	%xmm3,%xmm2
891	pxor	%xmm2,%xmm1
892	movdqa	%xmm1,%xmm4
893	psrld	$20,%xmm1
894	pslld	$12,%xmm4
895	por	%xmm4,%xmm1
896	paddd	%xmm1,%xmm0
897	pxor	%xmm0,%xmm3
898.byte	102,15,56,0,223
899	paddd	%xmm3,%xmm2
900	pxor	%xmm2,%xmm1
901	movdqa	%xmm1,%xmm4
902	psrld	$25,%xmm1
903	pslld	$7,%xmm4
904	por	%xmm4,%xmm1
905	pshufd	$78,%xmm2,%xmm2
906	pshufd	$147,%xmm1,%xmm1
907	pshufd	$57,%xmm3,%xmm3
908	decl	%edx
909	jnz	.L012loop1x
910	paddd	(%esp),%xmm0
911	paddd	16(%esp),%xmm1
912	paddd	32(%esp),%xmm2
913	paddd	48(%esp),%xmm3
914	cmpl	$64,%ecx
915	jb	.L014tail
916	movdqu	(%esi),%xmm4
917	movdqu	16(%esi),%xmm5
918	pxor	%xmm4,%xmm0
919	movdqu	32(%esi),%xmm4
920	pxor	%xmm5,%xmm1
921	movdqu	48(%esi),%xmm5
922	pxor	%xmm4,%xmm2
923	pxor	%xmm5,%xmm3
924	leal	64(%esi),%esi
925	movdqu	%xmm0,(%edi)
926	movdqu	%xmm1,16(%edi)
927	movdqu	%xmm2,32(%edi)
928	movdqu	%xmm3,48(%edi)
929	leal	64(%edi),%edi
930	subl	$64,%ecx
931	jnz	.L013outer1x
932	jmp	.L011done
933.L014tail:
934	movdqa	%xmm0,(%esp)
935	movdqa	%xmm1,16(%esp)
936	movdqa	%xmm2,32(%esp)
937	movdqa	%xmm3,48(%esp)
938	xorl	%eax,%eax
939	xorl	%edx,%edx
940	xorl	%ebp,%ebp
941.L015tail_loop:
942	movb	(%esp,%ebp,1),%al
943	movb	(%esi,%ebp,1),%dl
944	leal	1(%ebp),%ebp
945	xorb	%dl,%al
946	movb	%al,-1(%edi,%ebp,1)
947	decl	%ecx
948	jnz	.L015tail_loop
949.L011done:
950	movl	512(%esp),%esp
951	popl	%edi
952	popl	%esi
953	popl	%ebx
954	popl	%ebp
955	ret
956.size	_ChaCha20_ssse3,.-_ChaCha20_ssse3
957.align	64
958.Lssse3_data:
959.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
960.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
961.long	1634760805,857760878,2036477234,1797285236
962.long	0,1,2,3
963.long	4,4,4,4
964.long	1,0,0,0
965.long	4,0,0,0
966.long	0,-1,-1,-1
967.align	64
968.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
969.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
970.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
971.byte	114,103,62,0
972#endif
973.section	.note.GNU-stack,"",@progbits
974