1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__i386__)
5#if defined(BORINGSSL_PREFIX)
6#include <boringssl_prefix_symbols_asm.h>
7#endif
8.text
9.globl	_GFp_ChaCha20_ctr32
10.private_extern	_GFp_ChaCha20_ctr32
11.align	4
12_GFp_ChaCha20_ctr32:
13L_GFp_ChaCha20_ctr32_begin:
14	pushl	%ebp
15	pushl	%ebx
16	pushl	%esi
17	pushl	%edi
18	xorl	%eax,%eax
19	cmpl	28(%esp),%eax
20	je	L000no_data
21	call	Lpic_point
22Lpic_point:
23	popl	%eax
24	movl	L_GFp_ia32cap_P$non_lazy_ptr-Lpic_point(%eax),%ebp
25	testl	$16777216,(%ebp)
26	jz	L001x86
27	testl	$512,4(%ebp)
28	jz	L001x86
29	jmp	Lssse3_shortcut
30L001x86:
31	movl	32(%esp),%esi
32	movl	36(%esp),%edi
33	subl	$132,%esp
34	movl	(%esi),%eax
35	movl	4(%esi),%ebx
36	movl	8(%esi),%ecx
37	movl	12(%esi),%edx
38	movl	%eax,80(%esp)
39	movl	%ebx,84(%esp)
40	movl	%ecx,88(%esp)
41	movl	%edx,92(%esp)
42	movl	16(%esi),%eax
43	movl	20(%esi),%ebx
44	movl	24(%esi),%ecx
45	movl	28(%esi),%edx
46	movl	%eax,96(%esp)
47	movl	%ebx,100(%esp)
48	movl	%ecx,104(%esp)
49	movl	%edx,108(%esp)
50	movl	(%edi),%eax
51	movl	4(%edi),%ebx
52	movl	8(%edi),%ecx
53	movl	12(%edi),%edx
54	subl	$1,%eax
55	movl	%eax,112(%esp)
56	movl	%ebx,116(%esp)
57	movl	%ecx,120(%esp)
58	movl	%edx,124(%esp)
59	jmp	L002entry
60.align	4,0x90
61L003outer_loop:
62	movl	%ebx,156(%esp)
63	movl	%eax,152(%esp)
64	movl	%ecx,160(%esp)
65L002entry:
66	movl	$1634760805,%eax
67	movl	$857760878,4(%esp)
68	movl	$2036477234,8(%esp)
69	movl	$1797285236,12(%esp)
70	movl	84(%esp),%ebx
71	movl	88(%esp),%ebp
72	movl	104(%esp),%ecx
73	movl	108(%esp),%esi
74	movl	116(%esp),%edx
75	movl	120(%esp),%edi
76	movl	%ebx,20(%esp)
77	movl	%ebp,24(%esp)
78	movl	%ecx,40(%esp)
79	movl	%esi,44(%esp)
80	movl	%edx,52(%esp)
81	movl	%edi,56(%esp)
82	movl	92(%esp),%ebx
83	movl	124(%esp),%edi
84	movl	112(%esp),%edx
85	movl	80(%esp),%ebp
86	movl	96(%esp),%ecx
87	movl	100(%esp),%esi
88	addl	$1,%edx
89	movl	%ebx,28(%esp)
90	movl	%edi,60(%esp)
91	movl	%edx,112(%esp)
92	movl	$10,%ebx
93	jmp	L004loop
94.align	4,0x90
95L004loop:
96	addl	%ebp,%eax
97	movl	%ebx,128(%esp)
98	movl	%ebp,%ebx
99	xorl	%eax,%edx
100	roll	$16,%edx
101	addl	%edx,%ecx
102	xorl	%ecx,%ebx
103	movl	52(%esp),%edi
104	roll	$12,%ebx
105	movl	20(%esp),%ebp
106	addl	%ebx,%eax
107	xorl	%eax,%edx
108	movl	%eax,(%esp)
109	roll	$8,%edx
110	movl	4(%esp),%eax
111	addl	%edx,%ecx
112	movl	%edx,48(%esp)
113	xorl	%ecx,%ebx
114	addl	%ebp,%eax
115	roll	$7,%ebx
116	xorl	%eax,%edi
117	movl	%ecx,32(%esp)
118	roll	$16,%edi
119	movl	%ebx,16(%esp)
120	addl	%edi,%esi
121	movl	40(%esp),%ecx
122	xorl	%esi,%ebp
123	movl	56(%esp),%edx
124	roll	$12,%ebp
125	movl	24(%esp),%ebx
126	addl	%ebp,%eax
127	xorl	%eax,%edi
128	movl	%eax,4(%esp)
129	roll	$8,%edi
130	movl	8(%esp),%eax
131	addl	%edi,%esi
132	movl	%edi,52(%esp)
133	xorl	%esi,%ebp
134	addl	%ebx,%eax
135	roll	$7,%ebp
136	xorl	%eax,%edx
137	movl	%esi,36(%esp)
138	roll	$16,%edx
139	movl	%ebp,20(%esp)
140	addl	%edx,%ecx
141	movl	44(%esp),%esi
142	xorl	%ecx,%ebx
143	movl	60(%esp),%edi
144	roll	$12,%ebx
145	movl	28(%esp),%ebp
146	addl	%ebx,%eax
147	xorl	%eax,%edx
148	movl	%eax,8(%esp)
149	roll	$8,%edx
150	movl	12(%esp),%eax
151	addl	%edx,%ecx
152	movl	%edx,56(%esp)
153	xorl	%ecx,%ebx
154	addl	%ebp,%eax
155	roll	$7,%ebx
156	xorl	%eax,%edi
157	roll	$16,%edi
158	movl	%ebx,24(%esp)
159	addl	%edi,%esi
160	xorl	%esi,%ebp
161	roll	$12,%ebp
162	movl	20(%esp),%ebx
163	addl	%ebp,%eax
164	xorl	%eax,%edi
165	movl	%eax,12(%esp)
166	roll	$8,%edi
167	movl	(%esp),%eax
168	addl	%edi,%esi
169	movl	%edi,%edx
170	xorl	%esi,%ebp
171	addl	%ebx,%eax
172	roll	$7,%ebp
173	xorl	%eax,%edx
174	roll	$16,%edx
175	movl	%ebp,28(%esp)
176	addl	%edx,%ecx
177	xorl	%ecx,%ebx
178	movl	48(%esp),%edi
179	roll	$12,%ebx
180	movl	24(%esp),%ebp
181	addl	%ebx,%eax
182	xorl	%eax,%edx
183	movl	%eax,(%esp)
184	roll	$8,%edx
185	movl	4(%esp),%eax
186	addl	%edx,%ecx
187	movl	%edx,60(%esp)
188	xorl	%ecx,%ebx
189	addl	%ebp,%eax
190	roll	$7,%ebx
191	xorl	%eax,%edi
192	movl	%ecx,40(%esp)
193	roll	$16,%edi
194	movl	%ebx,20(%esp)
195	addl	%edi,%esi
196	movl	32(%esp),%ecx
197	xorl	%esi,%ebp
198	movl	52(%esp),%edx
199	roll	$12,%ebp
200	movl	28(%esp),%ebx
201	addl	%ebp,%eax
202	xorl	%eax,%edi
203	movl	%eax,4(%esp)
204	roll	$8,%edi
205	movl	8(%esp),%eax
206	addl	%edi,%esi
207	movl	%edi,48(%esp)
208	xorl	%esi,%ebp
209	addl	%ebx,%eax
210	roll	$7,%ebp
211	xorl	%eax,%edx
212	movl	%esi,44(%esp)
213	roll	$16,%edx
214	movl	%ebp,24(%esp)
215	addl	%edx,%ecx
216	movl	36(%esp),%esi
217	xorl	%ecx,%ebx
218	movl	56(%esp),%edi
219	roll	$12,%ebx
220	movl	16(%esp),%ebp
221	addl	%ebx,%eax
222	xorl	%eax,%edx
223	movl	%eax,8(%esp)
224	roll	$8,%edx
225	movl	12(%esp),%eax
226	addl	%edx,%ecx
227	movl	%edx,52(%esp)
228	xorl	%ecx,%ebx
229	addl	%ebp,%eax
230	roll	$7,%ebx
231	xorl	%eax,%edi
232	roll	$16,%edi
233	movl	%ebx,28(%esp)
234	addl	%edi,%esi
235	xorl	%esi,%ebp
236	movl	48(%esp),%edx
237	roll	$12,%ebp
238	movl	128(%esp),%ebx
239	addl	%ebp,%eax
240	xorl	%eax,%edi
241	movl	%eax,12(%esp)
242	roll	$8,%edi
243	movl	(%esp),%eax
244	addl	%edi,%esi
245	movl	%edi,56(%esp)
246	xorl	%esi,%ebp
247	roll	$7,%ebp
248	decl	%ebx
249	jnz	L004loop
250	movl	160(%esp),%ebx
251	addl	$1634760805,%eax
252	addl	80(%esp),%ebp
253	addl	96(%esp),%ecx
254	addl	100(%esp),%esi
255	cmpl	$64,%ebx
256	jb	L005tail
257	movl	156(%esp),%ebx
258	addl	112(%esp),%edx
259	addl	120(%esp),%edi
260	xorl	(%ebx),%eax
261	xorl	16(%ebx),%ebp
262	movl	%eax,(%esp)
263	movl	152(%esp),%eax
264	xorl	32(%ebx),%ecx
265	xorl	36(%ebx),%esi
266	xorl	48(%ebx),%edx
267	xorl	56(%ebx),%edi
268	movl	%ebp,16(%eax)
269	movl	%ecx,32(%eax)
270	movl	%esi,36(%eax)
271	movl	%edx,48(%eax)
272	movl	%edi,56(%eax)
273	movl	4(%esp),%ebp
274	movl	8(%esp),%ecx
275	movl	12(%esp),%esi
276	movl	20(%esp),%edx
277	movl	24(%esp),%edi
278	addl	$857760878,%ebp
279	addl	$2036477234,%ecx
280	addl	$1797285236,%esi
281	addl	84(%esp),%edx
282	addl	88(%esp),%edi
283	xorl	4(%ebx),%ebp
284	xorl	8(%ebx),%ecx
285	xorl	12(%ebx),%esi
286	xorl	20(%ebx),%edx
287	xorl	24(%ebx),%edi
288	movl	%ebp,4(%eax)
289	movl	%ecx,8(%eax)
290	movl	%esi,12(%eax)
291	movl	%edx,20(%eax)
292	movl	%edi,24(%eax)
293	movl	28(%esp),%ebp
294	movl	40(%esp),%ecx
295	movl	44(%esp),%esi
296	movl	52(%esp),%edx
297	movl	60(%esp),%edi
298	addl	92(%esp),%ebp
299	addl	104(%esp),%ecx
300	addl	108(%esp),%esi
301	addl	116(%esp),%edx
302	addl	124(%esp),%edi
303	xorl	28(%ebx),%ebp
304	xorl	40(%ebx),%ecx
305	xorl	44(%ebx),%esi
306	xorl	52(%ebx),%edx
307	xorl	60(%ebx),%edi
308	leal	64(%ebx),%ebx
309	movl	%ebp,28(%eax)
310	movl	(%esp),%ebp
311	movl	%ecx,40(%eax)
312	movl	160(%esp),%ecx
313	movl	%esi,44(%eax)
314	movl	%edx,52(%eax)
315	movl	%edi,60(%eax)
316	movl	%ebp,(%eax)
317	leal	64(%eax),%eax
318	subl	$64,%ecx
319	jnz	L003outer_loop
320	jmp	L006done
321L005tail:
322	addl	112(%esp),%edx
323	addl	120(%esp),%edi
324	movl	%eax,(%esp)
325	movl	%ebp,16(%esp)
326	movl	%ecx,32(%esp)
327	movl	%esi,36(%esp)
328	movl	%edx,48(%esp)
329	movl	%edi,56(%esp)
330	movl	4(%esp),%ebp
331	movl	8(%esp),%ecx
332	movl	12(%esp),%esi
333	movl	20(%esp),%edx
334	movl	24(%esp),%edi
335	addl	$857760878,%ebp
336	addl	$2036477234,%ecx
337	addl	$1797285236,%esi
338	addl	84(%esp),%edx
339	addl	88(%esp),%edi
340	movl	%ebp,4(%esp)
341	movl	%ecx,8(%esp)
342	movl	%esi,12(%esp)
343	movl	%edx,20(%esp)
344	movl	%edi,24(%esp)
345	movl	28(%esp),%ebp
346	movl	40(%esp),%ecx
347	movl	44(%esp),%esi
348	movl	52(%esp),%edx
349	movl	60(%esp),%edi
350	addl	92(%esp),%ebp
351	addl	104(%esp),%ecx
352	addl	108(%esp),%esi
353	addl	116(%esp),%edx
354	addl	124(%esp),%edi
355	movl	%ebp,28(%esp)
356	movl	156(%esp),%ebp
357	movl	%ecx,40(%esp)
358	movl	152(%esp),%ecx
359	movl	%esi,44(%esp)
360	xorl	%esi,%esi
361	movl	%edx,52(%esp)
362	movl	%edi,60(%esp)
363	xorl	%eax,%eax
364	xorl	%edx,%edx
365L007tail_loop:
366	movb	(%esi,%ebp,1),%al
367	movb	(%esp,%esi,1),%dl
368	leal	1(%esi),%esi
369	xorb	%dl,%al
370	movb	%al,-1(%ecx,%esi,1)
371	decl	%ebx
372	jnz	L007tail_loop
373L006done:
374	addl	$132,%esp
375L000no_data:
376	popl	%edi
377	popl	%esi
378	popl	%ebx
379	popl	%ebp
380	ret
381.private_extern	__ChaCha20_ssse3
382.align	4
383__ChaCha20_ssse3:
384	pushl	%ebp
385	pushl	%ebx
386	pushl	%esi
387	pushl	%edi
388Lssse3_shortcut:
389	movl	20(%esp),%edi
390	movl	24(%esp),%esi
391	movl	28(%esp),%ecx
392	movl	32(%esp),%edx
393	movl	36(%esp),%ebx
394	movl	%esp,%ebp
395	subl	$524,%esp
396	andl	$-64,%esp
397	movl	%ebp,512(%esp)
398	leal	Lssse3_data-Lpic_point(%eax),%eax
399	movdqu	(%ebx),%xmm3
400	cmpl	$256,%ecx
401	jb	L0081x
402	movl	%edx,516(%esp)
403	movl	%ebx,520(%esp)
404	subl	$256,%ecx
405	leal	384(%esp),%ebp
406	movdqu	(%edx),%xmm7
407	pshufd	$0,%xmm3,%xmm0
408	pshufd	$85,%xmm3,%xmm1
409	pshufd	$170,%xmm3,%xmm2
410	pshufd	$255,%xmm3,%xmm3
411	paddd	48(%eax),%xmm0
412	pshufd	$0,%xmm7,%xmm4
413	pshufd	$85,%xmm7,%xmm5
414	psubd	64(%eax),%xmm0
415	pshufd	$170,%xmm7,%xmm6
416	pshufd	$255,%xmm7,%xmm7
417	movdqa	%xmm0,64(%ebp)
418	movdqa	%xmm1,80(%ebp)
419	movdqa	%xmm2,96(%ebp)
420	movdqa	%xmm3,112(%ebp)
421	movdqu	16(%edx),%xmm3
422	movdqa	%xmm4,-64(%ebp)
423	movdqa	%xmm5,-48(%ebp)
424	movdqa	%xmm6,-32(%ebp)
425	movdqa	%xmm7,-16(%ebp)
426	movdqa	32(%eax),%xmm7
427	leal	128(%esp),%ebx
428	pshufd	$0,%xmm3,%xmm0
429	pshufd	$85,%xmm3,%xmm1
430	pshufd	$170,%xmm3,%xmm2
431	pshufd	$255,%xmm3,%xmm3
432	pshufd	$0,%xmm7,%xmm4
433	pshufd	$85,%xmm7,%xmm5
434	pshufd	$170,%xmm7,%xmm6
435	pshufd	$255,%xmm7,%xmm7
436	movdqa	%xmm0,(%ebp)
437	movdqa	%xmm1,16(%ebp)
438	movdqa	%xmm2,32(%ebp)
439	movdqa	%xmm3,48(%ebp)
440	movdqa	%xmm4,-128(%ebp)
441	movdqa	%xmm5,-112(%ebp)
442	movdqa	%xmm6,-96(%ebp)
443	movdqa	%xmm7,-80(%ebp)
444	leal	128(%esi),%esi
445	leal	128(%edi),%edi
446	jmp	L009outer_loop
447.align	4,0x90
448L009outer_loop:
449	movdqa	-112(%ebp),%xmm1
450	movdqa	-96(%ebp),%xmm2
451	movdqa	-80(%ebp),%xmm3
452	movdqa	-48(%ebp),%xmm5
453	movdqa	-32(%ebp),%xmm6
454	movdqa	-16(%ebp),%xmm7
455	movdqa	%xmm1,-112(%ebx)
456	movdqa	%xmm2,-96(%ebx)
457	movdqa	%xmm3,-80(%ebx)
458	movdqa	%xmm5,-48(%ebx)
459	movdqa	%xmm6,-32(%ebx)
460	movdqa	%xmm7,-16(%ebx)
461	movdqa	32(%ebp),%xmm2
462	movdqa	48(%ebp),%xmm3
463	movdqa	64(%ebp),%xmm4
464	movdqa	80(%ebp),%xmm5
465	movdqa	96(%ebp),%xmm6
466	movdqa	112(%ebp),%xmm7
467	paddd	64(%eax),%xmm4
468	movdqa	%xmm2,32(%ebx)
469	movdqa	%xmm3,48(%ebx)
470	movdqa	%xmm4,64(%ebx)
471	movdqa	%xmm5,80(%ebx)
472	movdqa	%xmm6,96(%ebx)
473	movdqa	%xmm7,112(%ebx)
474	movdqa	%xmm4,64(%ebp)
475	movdqa	-128(%ebp),%xmm0
476	movdqa	%xmm4,%xmm6
477	movdqa	-64(%ebp),%xmm3
478	movdqa	(%ebp),%xmm4
479	movdqa	16(%ebp),%xmm5
480	movl	$10,%edx
481	nop
482.align	4,0x90
483L010loop:
484	paddd	%xmm3,%xmm0
485	movdqa	%xmm3,%xmm2
486	pxor	%xmm0,%xmm6
487	pshufb	(%eax),%xmm6
488	paddd	%xmm6,%xmm4
489	pxor	%xmm4,%xmm2
490	movdqa	-48(%ebx),%xmm3
491	movdqa	%xmm2,%xmm1
492	pslld	$12,%xmm2
493	psrld	$20,%xmm1
494	por	%xmm1,%xmm2
495	movdqa	-112(%ebx),%xmm1
496	paddd	%xmm2,%xmm0
497	movdqa	80(%ebx),%xmm7
498	pxor	%xmm0,%xmm6
499	movdqa	%xmm0,-128(%ebx)
500	pshufb	16(%eax),%xmm6
501	paddd	%xmm6,%xmm4
502	movdqa	%xmm6,64(%ebx)
503	pxor	%xmm4,%xmm2
504	paddd	%xmm3,%xmm1
505	movdqa	%xmm2,%xmm0
506	pslld	$7,%xmm2
507	psrld	$25,%xmm0
508	pxor	%xmm1,%xmm7
509	por	%xmm0,%xmm2
510	movdqa	%xmm4,(%ebx)
511	pshufb	(%eax),%xmm7
512	movdqa	%xmm2,-64(%ebx)
513	paddd	%xmm7,%xmm5
514	movdqa	32(%ebx),%xmm4
515	pxor	%xmm5,%xmm3
516	movdqa	-32(%ebx),%xmm2
517	movdqa	%xmm3,%xmm0
518	pslld	$12,%xmm3
519	psrld	$20,%xmm0
520	por	%xmm0,%xmm3
521	movdqa	-96(%ebx),%xmm0
522	paddd	%xmm3,%xmm1
523	movdqa	96(%ebx),%xmm6
524	pxor	%xmm1,%xmm7
525	movdqa	%xmm1,-112(%ebx)
526	pshufb	16(%eax),%xmm7
527	paddd	%xmm7,%xmm5
528	movdqa	%xmm7,80(%ebx)
529	pxor	%xmm5,%xmm3
530	paddd	%xmm2,%xmm0
531	movdqa	%xmm3,%xmm1
532	pslld	$7,%xmm3
533	psrld	$25,%xmm1
534	pxor	%xmm0,%xmm6
535	por	%xmm1,%xmm3
536	movdqa	%xmm5,16(%ebx)
537	pshufb	(%eax),%xmm6
538	movdqa	%xmm3,-48(%ebx)
539	paddd	%xmm6,%xmm4
540	movdqa	48(%ebx),%xmm5
541	pxor	%xmm4,%xmm2
542	movdqa	-16(%ebx),%xmm3
543	movdqa	%xmm2,%xmm1
544	pslld	$12,%xmm2
545	psrld	$20,%xmm1
546	por	%xmm1,%xmm2
547	movdqa	-80(%ebx),%xmm1
548	paddd	%xmm2,%xmm0
549	movdqa	112(%ebx),%xmm7
550	pxor	%xmm0,%xmm6
551	movdqa	%xmm0,-96(%ebx)
552	pshufb	16(%eax),%xmm6
553	paddd	%xmm6,%xmm4
554	movdqa	%xmm6,96(%ebx)
555	pxor	%xmm4,%xmm2
556	paddd	%xmm3,%xmm1
557	movdqa	%xmm2,%xmm0
558	pslld	$7,%xmm2
559	psrld	$25,%xmm0
560	pxor	%xmm1,%xmm7
561	por	%xmm0,%xmm2
562	pshufb	(%eax),%xmm7
563	movdqa	%xmm2,-32(%ebx)
564	paddd	%xmm7,%xmm5
565	pxor	%xmm5,%xmm3
566	movdqa	-48(%ebx),%xmm2
567	movdqa	%xmm3,%xmm0
568	pslld	$12,%xmm3
569	psrld	$20,%xmm0
570	por	%xmm0,%xmm3
571	movdqa	-128(%ebx),%xmm0
572	paddd	%xmm3,%xmm1
573	pxor	%xmm1,%xmm7
574	movdqa	%xmm1,-80(%ebx)
575	pshufb	16(%eax),%xmm7
576	paddd	%xmm7,%xmm5
577	movdqa	%xmm7,%xmm6
578	pxor	%xmm5,%xmm3
579	paddd	%xmm2,%xmm0
580	movdqa	%xmm3,%xmm1
581	pslld	$7,%xmm3
582	psrld	$25,%xmm1
583	pxor	%xmm0,%xmm6
584	por	%xmm1,%xmm3
585	pshufb	(%eax),%xmm6
586	movdqa	%xmm3,-16(%ebx)
587	paddd	%xmm6,%xmm4
588	pxor	%xmm4,%xmm2
589	movdqa	-32(%ebx),%xmm3
590	movdqa	%xmm2,%xmm1
591	pslld	$12,%xmm2
592	psrld	$20,%xmm1
593	por	%xmm1,%xmm2
594	movdqa	-112(%ebx),%xmm1
595	paddd	%xmm2,%xmm0
596	movdqa	64(%ebx),%xmm7
597	pxor	%xmm0,%xmm6
598	movdqa	%xmm0,-128(%ebx)
599	pshufb	16(%eax),%xmm6
600	paddd	%xmm6,%xmm4
601	movdqa	%xmm6,112(%ebx)
602	pxor	%xmm4,%xmm2
603	paddd	%xmm3,%xmm1
604	movdqa	%xmm2,%xmm0
605	pslld	$7,%xmm2
606	psrld	$25,%xmm0
607	pxor	%xmm1,%xmm7
608	por	%xmm0,%xmm2
609	movdqa	%xmm4,32(%ebx)
610	pshufb	(%eax),%xmm7
611	movdqa	%xmm2,-48(%ebx)
612	paddd	%xmm7,%xmm5
613	movdqa	(%ebx),%xmm4
614	pxor	%xmm5,%xmm3
615	movdqa	-16(%ebx),%xmm2
616	movdqa	%xmm3,%xmm0
617	pslld	$12,%xmm3
618	psrld	$20,%xmm0
619	por	%xmm0,%xmm3
620	movdqa	-96(%ebx),%xmm0
621	paddd	%xmm3,%xmm1
622	movdqa	80(%ebx),%xmm6
623	pxor	%xmm1,%xmm7
624	movdqa	%xmm1,-112(%ebx)
625	pshufb	16(%eax),%xmm7
626	paddd	%xmm7,%xmm5
627	movdqa	%xmm7,64(%ebx)
628	pxor	%xmm5,%xmm3
629	paddd	%xmm2,%xmm0
630	movdqa	%xmm3,%xmm1
631	pslld	$7,%xmm3
632	psrld	$25,%xmm1
633	pxor	%xmm0,%xmm6
634	por	%xmm1,%xmm3
635	movdqa	%xmm5,48(%ebx)
636	pshufb	(%eax),%xmm6
637	movdqa	%xmm3,-32(%ebx)
638	paddd	%xmm6,%xmm4
639	movdqa	16(%ebx),%xmm5
640	pxor	%xmm4,%xmm2
641	movdqa	-64(%ebx),%xmm3
642	movdqa	%xmm2,%xmm1
643	pslld	$12,%xmm2
644	psrld	$20,%xmm1
645	por	%xmm1,%xmm2
646	movdqa	-80(%ebx),%xmm1
647	paddd	%xmm2,%xmm0
648	movdqa	96(%ebx),%xmm7
649	pxor	%xmm0,%xmm6
650	movdqa	%xmm0,-96(%ebx)
651	pshufb	16(%eax),%xmm6
652	paddd	%xmm6,%xmm4
653	movdqa	%xmm6,80(%ebx)
654	pxor	%xmm4,%xmm2
655	paddd	%xmm3,%xmm1
656	movdqa	%xmm2,%xmm0
657	pslld	$7,%xmm2
658	psrld	$25,%xmm0
659	pxor	%xmm1,%xmm7
660	por	%xmm0,%xmm2
661	pshufb	(%eax),%xmm7
662	movdqa	%xmm2,-16(%ebx)
663	paddd	%xmm7,%xmm5
664	pxor	%xmm5,%xmm3
665	movdqa	%xmm3,%xmm0
666	pslld	$12,%xmm3
667	psrld	$20,%xmm0
668	por	%xmm0,%xmm3
669	movdqa	-128(%ebx),%xmm0
670	paddd	%xmm3,%xmm1
671	movdqa	64(%ebx),%xmm6
672	pxor	%xmm1,%xmm7
673	movdqa	%xmm1,-80(%ebx)
674	pshufb	16(%eax),%xmm7
675	paddd	%xmm7,%xmm5
676	movdqa	%xmm7,96(%ebx)
677	pxor	%xmm5,%xmm3
678	movdqa	%xmm3,%xmm1
679	pslld	$7,%xmm3
680	psrld	$25,%xmm1
681	por	%xmm1,%xmm3
682	decl	%edx
683	jnz	L010loop
684	movdqa	%xmm3,-64(%ebx)
685	movdqa	%xmm4,(%ebx)
686	movdqa	%xmm5,16(%ebx)
687	movdqa	%xmm6,64(%ebx)
688	movdqa	%xmm7,96(%ebx)
689	movdqa	-112(%ebx),%xmm1
690	movdqa	-96(%ebx),%xmm2
691	movdqa	-80(%ebx),%xmm3
692	paddd	-128(%ebp),%xmm0
693	paddd	-112(%ebp),%xmm1
694	paddd	-96(%ebp),%xmm2
695	paddd	-80(%ebp),%xmm3
696	movdqa	%xmm0,%xmm6
697	punpckldq	%xmm1,%xmm0
698	movdqa	%xmm2,%xmm7
699	punpckldq	%xmm3,%xmm2
700	punpckhdq	%xmm1,%xmm6
701	punpckhdq	%xmm3,%xmm7
702	movdqa	%xmm0,%xmm1
703	punpcklqdq	%xmm2,%xmm0
704	movdqa	%xmm6,%xmm3
705	punpcklqdq	%xmm7,%xmm6
706	punpckhqdq	%xmm2,%xmm1
707	punpckhqdq	%xmm7,%xmm3
708	movdqu	-128(%esi),%xmm4
709	movdqu	-64(%esi),%xmm5
710	movdqu	(%esi),%xmm2
711	movdqu	64(%esi),%xmm7
712	leal	16(%esi),%esi
713	pxor	%xmm0,%xmm4
714	movdqa	-64(%ebx),%xmm0
715	pxor	%xmm1,%xmm5
716	movdqa	-48(%ebx),%xmm1
717	pxor	%xmm2,%xmm6
718	movdqa	-32(%ebx),%xmm2
719	pxor	%xmm3,%xmm7
720	movdqa	-16(%ebx),%xmm3
721	movdqu	%xmm4,-128(%edi)
722	movdqu	%xmm5,-64(%edi)
723	movdqu	%xmm6,(%edi)
724	movdqu	%xmm7,64(%edi)
725	leal	16(%edi),%edi
726	paddd	-64(%ebp),%xmm0
727	paddd	-48(%ebp),%xmm1
728	paddd	-32(%ebp),%xmm2
729	paddd	-16(%ebp),%xmm3
730	movdqa	%xmm0,%xmm6
731	punpckldq	%xmm1,%xmm0
732	movdqa	%xmm2,%xmm7
733	punpckldq	%xmm3,%xmm2
734	punpckhdq	%xmm1,%xmm6
735	punpckhdq	%xmm3,%xmm7
736	movdqa	%xmm0,%xmm1
737	punpcklqdq	%xmm2,%xmm0
738	movdqa	%xmm6,%xmm3
739	punpcklqdq	%xmm7,%xmm6
740	punpckhqdq	%xmm2,%xmm1
741	punpckhqdq	%xmm7,%xmm3
742	movdqu	-128(%esi),%xmm4
743	movdqu	-64(%esi),%xmm5
744	movdqu	(%esi),%xmm2
745	movdqu	64(%esi),%xmm7
746	leal	16(%esi),%esi
747	pxor	%xmm0,%xmm4
748	movdqa	(%ebx),%xmm0
749	pxor	%xmm1,%xmm5
750	movdqa	16(%ebx),%xmm1
751	pxor	%xmm2,%xmm6
752	movdqa	32(%ebx),%xmm2
753	pxor	%xmm3,%xmm7
754	movdqa	48(%ebx),%xmm3
755	movdqu	%xmm4,-128(%edi)
756	movdqu	%xmm5,-64(%edi)
757	movdqu	%xmm6,(%edi)
758	movdqu	%xmm7,64(%edi)
759	leal	16(%edi),%edi
760	paddd	(%ebp),%xmm0
761	paddd	16(%ebp),%xmm1
762	paddd	32(%ebp),%xmm2
763	paddd	48(%ebp),%xmm3
764	movdqa	%xmm0,%xmm6
765	punpckldq	%xmm1,%xmm0
766	movdqa	%xmm2,%xmm7
767	punpckldq	%xmm3,%xmm2
768	punpckhdq	%xmm1,%xmm6
769	punpckhdq	%xmm3,%xmm7
770	movdqa	%xmm0,%xmm1
771	punpcklqdq	%xmm2,%xmm0
772	movdqa	%xmm6,%xmm3
773	punpcklqdq	%xmm7,%xmm6
774	punpckhqdq	%xmm2,%xmm1
775	punpckhqdq	%xmm7,%xmm3
776	movdqu	-128(%esi),%xmm4
777	movdqu	-64(%esi),%xmm5
778	movdqu	(%esi),%xmm2
779	movdqu	64(%esi),%xmm7
780	leal	16(%esi),%esi
781	pxor	%xmm0,%xmm4
782	movdqa	64(%ebx),%xmm0
783	pxor	%xmm1,%xmm5
784	movdqa	80(%ebx),%xmm1
785	pxor	%xmm2,%xmm6
786	movdqa	96(%ebx),%xmm2
787	pxor	%xmm3,%xmm7
788	movdqa	112(%ebx),%xmm3
789	movdqu	%xmm4,-128(%edi)
790	movdqu	%xmm5,-64(%edi)
791	movdqu	%xmm6,(%edi)
792	movdqu	%xmm7,64(%edi)
793	leal	16(%edi),%edi
794	paddd	64(%ebp),%xmm0
795	paddd	80(%ebp),%xmm1
796	paddd	96(%ebp),%xmm2
797	paddd	112(%ebp),%xmm3
798	movdqa	%xmm0,%xmm6
799	punpckldq	%xmm1,%xmm0
800	movdqa	%xmm2,%xmm7
801	punpckldq	%xmm3,%xmm2
802	punpckhdq	%xmm1,%xmm6
803	punpckhdq	%xmm3,%xmm7
804	movdqa	%xmm0,%xmm1
805	punpcklqdq	%xmm2,%xmm0
806	movdqa	%xmm6,%xmm3
807	punpcklqdq	%xmm7,%xmm6
808	punpckhqdq	%xmm2,%xmm1
809	punpckhqdq	%xmm7,%xmm3
810	movdqu	-128(%esi),%xmm4
811	movdqu	-64(%esi),%xmm5
812	movdqu	(%esi),%xmm2
813	movdqu	64(%esi),%xmm7
814	leal	208(%esi),%esi
815	pxor	%xmm0,%xmm4
816	pxor	%xmm1,%xmm5
817	pxor	%xmm2,%xmm6
818	pxor	%xmm3,%xmm7
819	movdqu	%xmm4,-128(%edi)
820	movdqu	%xmm5,-64(%edi)
821	movdqu	%xmm6,(%edi)
822	movdqu	%xmm7,64(%edi)
823	leal	208(%edi),%edi
824	subl	$256,%ecx
825	jnc	L009outer_loop
826	addl	$256,%ecx
827	jz	L011done
828	movl	520(%esp),%ebx
829	leal	-128(%esi),%esi
830	movl	516(%esp),%edx
831	leal	-128(%edi),%edi
832	movd	64(%ebp),%xmm2
833	movdqu	(%ebx),%xmm3
834	paddd	96(%eax),%xmm2
835	pand	112(%eax),%xmm3
836	por	%xmm2,%xmm3
837L0081x:
838	movdqa	32(%eax),%xmm0
839	movdqu	(%edx),%xmm1
840	movdqu	16(%edx),%xmm2
841	movdqa	(%eax),%xmm6
842	movdqa	16(%eax),%xmm7
843	movl	%ebp,48(%esp)
844	movdqa	%xmm0,(%esp)
845	movdqa	%xmm1,16(%esp)
846	movdqa	%xmm2,32(%esp)
847	movdqa	%xmm3,48(%esp)
848	movl	$10,%edx
849	jmp	L012loop1x
850.align	4,0x90
851L013outer1x:
852	movdqa	80(%eax),%xmm3
853	movdqa	(%esp),%xmm0
854	movdqa	16(%esp),%xmm1
855	movdqa	32(%esp),%xmm2
856	paddd	48(%esp),%xmm3
857	movl	$10,%edx
858	movdqa	%xmm3,48(%esp)
859	jmp	L012loop1x
860.align	4,0x90
861L012loop1x:
862	paddd	%xmm1,%xmm0
863	pxor	%xmm0,%xmm3
864.byte	102,15,56,0,222
865	paddd	%xmm3,%xmm2
866	pxor	%xmm2,%xmm1
867	movdqa	%xmm1,%xmm4
868	psrld	$20,%xmm1
869	pslld	$12,%xmm4
870	por	%xmm4,%xmm1
871	paddd	%xmm1,%xmm0
872	pxor	%xmm0,%xmm3
873.byte	102,15,56,0,223
874	paddd	%xmm3,%xmm2
875	pxor	%xmm2,%xmm1
876	movdqa	%xmm1,%xmm4
877	psrld	$25,%xmm1
878	pslld	$7,%xmm4
879	por	%xmm4,%xmm1
880	pshufd	$78,%xmm2,%xmm2
881	pshufd	$57,%xmm1,%xmm1
882	pshufd	$147,%xmm3,%xmm3
883	nop
884	paddd	%xmm1,%xmm0
885	pxor	%xmm0,%xmm3
886.byte	102,15,56,0,222
887	paddd	%xmm3,%xmm2
888	pxor	%xmm2,%xmm1
889	movdqa	%xmm1,%xmm4
890	psrld	$20,%xmm1
891	pslld	$12,%xmm4
892	por	%xmm4,%xmm1
893	paddd	%xmm1,%xmm0
894	pxor	%xmm0,%xmm3
895.byte	102,15,56,0,223
896	paddd	%xmm3,%xmm2
897	pxor	%xmm2,%xmm1
898	movdqa	%xmm1,%xmm4
899	psrld	$25,%xmm1
900	pslld	$7,%xmm4
901	por	%xmm4,%xmm1
902	pshufd	$78,%xmm2,%xmm2
903	pshufd	$147,%xmm1,%xmm1
904	pshufd	$57,%xmm3,%xmm3
905	decl	%edx
906	jnz	L012loop1x
907	paddd	(%esp),%xmm0
908	paddd	16(%esp),%xmm1
909	paddd	32(%esp),%xmm2
910	paddd	48(%esp),%xmm3
911	cmpl	$64,%ecx
912	jb	L014tail
913	movdqu	(%esi),%xmm4
914	movdqu	16(%esi),%xmm5
915	pxor	%xmm4,%xmm0
916	movdqu	32(%esi),%xmm4
917	pxor	%xmm5,%xmm1
918	movdqu	48(%esi),%xmm5
919	pxor	%xmm4,%xmm2
920	pxor	%xmm5,%xmm3
921	leal	64(%esi),%esi
922	movdqu	%xmm0,(%edi)
923	movdqu	%xmm1,16(%edi)
924	movdqu	%xmm2,32(%edi)
925	movdqu	%xmm3,48(%edi)
926	leal	64(%edi),%edi
927	subl	$64,%ecx
928	jnz	L013outer1x
929	jmp	L011done
930L014tail:
931	movdqa	%xmm0,(%esp)
932	movdqa	%xmm1,16(%esp)
933	movdqa	%xmm2,32(%esp)
934	movdqa	%xmm3,48(%esp)
935	xorl	%eax,%eax
936	xorl	%edx,%edx
937	xorl	%ebp,%ebp
938L015tail_loop:
939	movb	(%esp,%ebp,1),%al
940	movb	(%esi,%ebp,1),%dl
941	leal	1(%ebp),%ebp
942	xorb	%dl,%al
943	movb	%al,-1(%edi,%ebp,1)
944	decl	%ecx
945	jnz	L015tail_loop
946L011done:
947	movl	512(%esp),%esp
948	popl	%edi
949	popl	%esi
950	popl	%ebx
951	popl	%ebp
952	ret
953.align	6,0x90
954Lssse3_data:
955.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
956.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
957.long	1634760805,857760878,2036477234,1797285236
958.long	0,1,2,3
959.long	4,4,4,4
960.long	1,0,0,0
961.long	4,0,0,0
962.long	0,-1,-1,-1
963.align	6,0x90
964.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
965.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
966.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
967.byte	114,103,62,0
968.section __IMPORT,__pointers,non_lazy_symbol_pointers
969L_GFp_ia32cap_P$non_lazy_ptr:
970.indirect_symbol	_GFp_ia32cap_P
971.long	0
972#endif
973