1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__i386__)
5#if defined(BORINGSSL_PREFIX)
6#include <boringssl_prefix_symbols_asm.h>
7#endif
8.text
9.globl	_gcm_gmult_4bit_mmx
10.private_extern	_gcm_gmult_4bit_mmx
11.align	4
12_gcm_gmult_4bit_mmx:
13L_gcm_gmult_4bit_mmx_begin:
14	pushl	%ebp
15	pushl	%ebx
16	pushl	%esi
17	pushl	%edi
18	movl	20(%esp),%edi
19	movl	24(%esp),%esi
20	call	L000pic_point
21L000pic_point:
22	popl	%eax
23	leal	Lrem_4bit-L000pic_point(%eax),%eax
24	movzbl	15(%edi),%ebx
25	xorl	%ecx,%ecx
26	movl	%ebx,%edx
27	movb	%dl,%cl
28	movl	$14,%ebp
29	shlb	$4,%cl
30	andl	$240,%edx
31	movq	8(%esi,%ecx,1),%mm0
32	movq	(%esi,%ecx,1),%mm1
33	movd	%mm0,%ebx
34	jmp	L001mmx_loop
35.align	4,0x90
36L001mmx_loop:
37	psrlq	$4,%mm0
38	andl	$15,%ebx
39	movq	%mm1,%mm2
40	psrlq	$4,%mm1
41	pxor	8(%esi,%edx,1),%mm0
42	movb	(%edi,%ebp,1),%cl
43	psllq	$60,%mm2
44	pxor	(%eax,%ebx,8),%mm1
45	decl	%ebp
46	movd	%mm0,%ebx
47	pxor	(%esi,%edx,1),%mm1
48	movl	%ecx,%edx
49	pxor	%mm2,%mm0
50	js	L002mmx_break
51	shlb	$4,%cl
52	andl	$15,%ebx
53	psrlq	$4,%mm0
54	andl	$240,%edx
55	movq	%mm1,%mm2
56	psrlq	$4,%mm1
57	pxor	8(%esi,%ecx,1),%mm0
58	psllq	$60,%mm2
59	pxor	(%eax,%ebx,8),%mm1
60	movd	%mm0,%ebx
61	pxor	(%esi,%ecx,1),%mm1
62	pxor	%mm2,%mm0
63	jmp	L001mmx_loop
64.align	4,0x90
65L002mmx_break:
66	shlb	$4,%cl
67	andl	$15,%ebx
68	psrlq	$4,%mm0
69	andl	$240,%edx
70	movq	%mm1,%mm2
71	psrlq	$4,%mm1
72	pxor	8(%esi,%ecx,1),%mm0
73	psllq	$60,%mm2
74	pxor	(%eax,%ebx,8),%mm1
75	movd	%mm0,%ebx
76	pxor	(%esi,%ecx,1),%mm1
77	pxor	%mm2,%mm0
78	psrlq	$4,%mm0
79	andl	$15,%ebx
80	movq	%mm1,%mm2
81	psrlq	$4,%mm1
82	pxor	8(%esi,%edx,1),%mm0
83	psllq	$60,%mm2
84	pxor	(%eax,%ebx,8),%mm1
85	movd	%mm0,%ebx
86	pxor	(%esi,%edx,1),%mm1
87	pxor	%mm2,%mm0
88	psrlq	$32,%mm0
89	movd	%mm1,%edx
90	psrlq	$32,%mm1
91	movd	%mm0,%ecx
92	movd	%mm1,%ebp
93	bswap	%ebx
94	bswap	%edx
95	bswap	%ecx
96	bswap	%ebp
97	emms
98	movl	%ebx,12(%edi)
99	movl	%edx,4(%edi)
100	movl	%ecx,8(%edi)
101	movl	%ebp,(%edi)
102	popl	%edi
103	popl	%esi
104	popl	%ebx
105	popl	%ebp
106	ret
107.globl	_gcm_ghash_4bit_mmx
108.private_extern	_gcm_ghash_4bit_mmx
109.align	4
110_gcm_ghash_4bit_mmx:
111L_gcm_ghash_4bit_mmx_begin:
112	pushl	%ebp
113	pushl	%ebx
114	pushl	%esi
115	pushl	%edi
116	movl	20(%esp),%eax
117	movl	24(%esp),%ebx
118	movl	28(%esp),%ecx
119	movl	32(%esp),%edx
120	movl	%esp,%ebp
121	call	L003pic_point
122L003pic_point:
123	popl	%esi
124	leal	Lrem_8bit-L003pic_point(%esi),%esi
125	subl	$544,%esp
126	andl	$-64,%esp
127	subl	$16,%esp
128	addl	%ecx,%edx
129	movl	%eax,544(%esp)
130	movl	%edx,552(%esp)
131	movl	%ebp,556(%esp)
132	addl	$128,%ebx
133	leal	144(%esp),%edi
134	leal	400(%esp),%ebp
135	movl	-120(%ebx),%edx
136	movq	-120(%ebx),%mm0
137	movq	-128(%ebx),%mm3
138	shll	$4,%edx
139	movb	%dl,(%esp)
140	movl	-104(%ebx),%edx
141	movq	-104(%ebx),%mm2
142	movq	-112(%ebx),%mm5
143	movq	%mm0,-128(%edi)
144	psrlq	$4,%mm0
145	movq	%mm3,(%edi)
146	movq	%mm3,%mm7
147	psrlq	$4,%mm3
148	shll	$4,%edx
149	movb	%dl,1(%esp)
150	movl	-88(%ebx),%edx
151	movq	-88(%ebx),%mm1
152	psllq	$60,%mm7
153	movq	-96(%ebx),%mm4
154	por	%mm7,%mm0
155	movq	%mm2,-120(%edi)
156	psrlq	$4,%mm2
157	movq	%mm5,8(%edi)
158	movq	%mm5,%mm6
159	movq	%mm0,-128(%ebp)
160	psrlq	$4,%mm5
161	movq	%mm3,(%ebp)
162	shll	$4,%edx
163	movb	%dl,2(%esp)
164	movl	-72(%ebx),%edx
165	movq	-72(%ebx),%mm0
166	psllq	$60,%mm6
167	movq	-80(%ebx),%mm3
168	por	%mm6,%mm2
169	movq	%mm1,-112(%edi)
170	psrlq	$4,%mm1
171	movq	%mm4,16(%edi)
172	movq	%mm4,%mm7
173	movq	%mm2,-120(%ebp)
174	psrlq	$4,%mm4
175	movq	%mm5,8(%ebp)
176	shll	$4,%edx
177	movb	%dl,3(%esp)
178	movl	-56(%ebx),%edx
179	movq	-56(%ebx),%mm2
180	psllq	$60,%mm7
181	movq	-64(%ebx),%mm5
182	por	%mm7,%mm1
183	movq	%mm0,-104(%edi)
184	psrlq	$4,%mm0
185	movq	%mm3,24(%edi)
186	movq	%mm3,%mm6
187	movq	%mm1,-112(%ebp)
188	psrlq	$4,%mm3
189	movq	%mm4,16(%ebp)
190	shll	$4,%edx
191	movb	%dl,4(%esp)
192	movl	-40(%ebx),%edx
193	movq	-40(%ebx),%mm1
194	psllq	$60,%mm6
195	movq	-48(%ebx),%mm4
196	por	%mm6,%mm0
197	movq	%mm2,-96(%edi)
198	psrlq	$4,%mm2
199	movq	%mm5,32(%edi)
200	movq	%mm5,%mm7
201	movq	%mm0,-104(%ebp)
202	psrlq	$4,%mm5
203	movq	%mm3,24(%ebp)
204	shll	$4,%edx
205	movb	%dl,5(%esp)
206	movl	-24(%ebx),%edx
207	movq	-24(%ebx),%mm0
208	psllq	$60,%mm7
209	movq	-32(%ebx),%mm3
210	por	%mm7,%mm2
211	movq	%mm1,-88(%edi)
212	psrlq	$4,%mm1
213	movq	%mm4,40(%edi)
214	movq	%mm4,%mm6
215	movq	%mm2,-96(%ebp)
216	psrlq	$4,%mm4
217	movq	%mm5,32(%ebp)
218	shll	$4,%edx
219	movb	%dl,6(%esp)
220	movl	-8(%ebx),%edx
221	movq	-8(%ebx),%mm2
222	psllq	$60,%mm6
223	movq	-16(%ebx),%mm5
224	por	%mm6,%mm1
225	movq	%mm0,-80(%edi)
226	psrlq	$4,%mm0
227	movq	%mm3,48(%edi)
228	movq	%mm3,%mm7
229	movq	%mm1,-88(%ebp)
230	psrlq	$4,%mm3
231	movq	%mm4,40(%ebp)
232	shll	$4,%edx
233	movb	%dl,7(%esp)
234	movl	8(%ebx),%edx
235	movq	8(%ebx),%mm1
236	psllq	$60,%mm7
237	movq	(%ebx),%mm4
238	por	%mm7,%mm0
239	movq	%mm2,-72(%edi)
240	psrlq	$4,%mm2
241	movq	%mm5,56(%edi)
242	movq	%mm5,%mm6
243	movq	%mm0,-80(%ebp)
244	psrlq	$4,%mm5
245	movq	%mm3,48(%ebp)
246	shll	$4,%edx
247	movb	%dl,8(%esp)
248	movl	24(%ebx),%edx
249	movq	24(%ebx),%mm0
250	psllq	$60,%mm6
251	movq	16(%ebx),%mm3
252	por	%mm6,%mm2
253	movq	%mm1,-64(%edi)
254	psrlq	$4,%mm1
255	movq	%mm4,64(%edi)
256	movq	%mm4,%mm7
257	movq	%mm2,-72(%ebp)
258	psrlq	$4,%mm4
259	movq	%mm5,56(%ebp)
260	shll	$4,%edx
261	movb	%dl,9(%esp)
262	movl	40(%ebx),%edx
263	movq	40(%ebx),%mm2
264	psllq	$60,%mm7
265	movq	32(%ebx),%mm5
266	por	%mm7,%mm1
267	movq	%mm0,-56(%edi)
268	psrlq	$4,%mm0
269	movq	%mm3,72(%edi)
270	movq	%mm3,%mm6
271	movq	%mm1,-64(%ebp)
272	psrlq	$4,%mm3
273	movq	%mm4,64(%ebp)
274	shll	$4,%edx
275	movb	%dl,10(%esp)
276	movl	56(%ebx),%edx
277	movq	56(%ebx),%mm1
278	psllq	$60,%mm6
279	movq	48(%ebx),%mm4
280	por	%mm6,%mm0
281	movq	%mm2,-48(%edi)
282	psrlq	$4,%mm2
283	movq	%mm5,80(%edi)
284	movq	%mm5,%mm7
285	movq	%mm0,-56(%ebp)
286	psrlq	$4,%mm5
287	movq	%mm3,72(%ebp)
288	shll	$4,%edx
289	movb	%dl,11(%esp)
290	movl	72(%ebx),%edx
291	movq	72(%ebx),%mm0
292	psllq	$60,%mm7
293	movq	64(%ebx),%mm3
294	por	%mm7,%mm2
295	movq	%mm1,-40(%edi)
296	psrlq	$4,%mm1
297	movq	%mm4,88(%edi)
298	movq	%mm4,%mm6
299	movq	%mm2,-48(%ebp)
300	psrlq	$4,%mm4
301	movq	%mm5,80(%ebp)
302	shll	$4,%edx
303	movb	%dl,12(%esp)
304	movl	88(%ebx),%edx
305	movq	88(%ebx),%mm2
306	psllq	$60,%mm6
307	movq	80(%ebx),%mm5
308	por	%mm6,%mm1
309	movq	%mm0,-32(%edi)
310	psrlq	$4,%mm0
311	movq	%mm3,96(%edi)
312	movq	%mm3,%mm7
313	movq	%mm1,-40(%ebp)
314	psrlq	$4,%mm3
315	movq	%mm4,88(%ebp)
316	shll	$4,%edx
317	movb	%dl,13(%esp)
318	movl	104(%ebx),%edx
319	movq	104(%ebx),%mm1
320	psllq	$60,%mm7
321	movq	96(%ebx),%mm4
322	por	%mm7,%mm0
323	movq	%mm2,-24(%edi)
324	psrlq	$4,%mm2
325	movq	%mm5,104(%edi)
326	movq	%mm5,%mm6
327	movq	%mm0,-32(%ebp)
328	psrlq	$4,%mm5
329	movq	%mm3,96(%ebp)
330	shll	$4,%edx
331	movb	%dl,14(%esp)
332	movl	120(%ebx),%edx
333	movq	120(%ebx),%mm0
334	psllq	$60,%mm6
335	movq	112(%ebx),%mm3
336	por	%mm6,%mm2
337	movq	%mm1,-16(%edi)
338	psrlq	$4,%mm1
339	movq	%mm4,112(%edi)
340	movq	%mm4,%mm7
341	movq	%mm2,-24(%ebp)
342	psrlq	$4,%mm4
343	movq	%mm5,104(%ebp)
344	shll	$4,%edx
345	movb	%dl,15(%esp)
346	psllq	$60,%mm7
347	por	%mm7,%mm1
348	movq	%mm0,-8(%edi)
349	psrlq	$4,%mm0
350	movq	%mm3,120(%edi)
351	movq	%mm3,%mm6
352	movq	%mm1,-16(%ebp)
353	psrlq	$4,%mm3
354	movq	%mm4,112(%ebp)
355	psllq	$60,%mm6
356	por	%mm6,%mm0
357	movq	%mm0,-8(%ebp)
358	movq	%mm3,120(%ebp)
359	movq	(%eax),%mm6
360	movl	8(%eax),%ebx
361	movl	12(%eax),%edx
362.align	4,0x90
363L004outer:
364	xorl	12(%ecx),%edx
365	xorl	8(%ecx),%ebx
366	pxor	(%ecx),%mm6
367	leal	16(%ecx),%ecx
368	movl	%ebx,536(%esp)
369	movq	%mm6,528(%esp)
370	movl	%ecx,548(%esp)
371	xorl	%eax,%eax
372	roll	$8,%edx
373	movb	%dl,%al
374	movl	%eax,%ebp
375	andb	$15,%al
376	shrl	$4,%ebp
377	pxor	%mm0,%mm0
378	roll	$8,%edx
379	pxor	%mm1,%mm1
380	pxor	%mm2,%mm2
381	movq	16(%esp,%eax,8),%mm7
382	movq	144(%esp,%eax,8),%mm6
383	movb	%dl,%al
384	movd	%mm7,%ebx
385	psrlq	$8,%mm7
386	movq	%mm6,%mm3
387	movl	%eax,%edi
388	psrlq	$8,%mm6
389	pxor	272(%esp,%ebp,8),%mm7
390	andb	$15,%al
391	psllq	$56,%mm3
392	shrl	$4,%edi
393	pxor	16(%esp,%eax,8),%mm7
394	roll	$8,%edx
395	pxor	144(%esp,%eax,8),%mm6
396	pxor	%mm3,%mm7
397	pxor	400(%esp,%ebp,8),%mm6
398	xorb	(%esp,%ebp,1),%bl
399	movb	%dl,%al
400	movd	%mm7,%ecx
401	movzbl	%bl,%ebx
402	psrlq	$8,%mm7
403	movq	%mm6,%mm3
404	movl	%eax,%ebp
405	psrlq	$8,%mm6
406	pxor	272(%esp,%edi,8),%mm7
407	andb	$15,%al
408	psllq	$56,%mm3
409	shrl	$4,%ebp
410	pinsrw	$2,(%esi,%ebx,2),%mm2
411	pxor	16(%esp,%eax,8),%mm7
412	roll	$8,%edx
413	pxor	144(%esp,%eax,8),%mm6
414	pxor	%mm3,%mm7
415	pxor	400(%esp,%edi,8),%mm6
416	xorb	(%esp,%edi,1),%cl
417	movb	%dl,%al
418	movl	536(%esp),%edx
419	movd	%mm7,%ebx
420	movzbl	%cl,%ecx
421	psrlq	$8,%mm7
422	movq	%mm6,%mm3
423	movl	%eax,%edi
424	psrlq	$8,%mm6
425	pxor	272(%esp,%ebp,8),%mm7
426	andb	$15,%al
427	psllq	$56,%mm3
428	pxor	%mm2,%mm6
429	shrl	$4,%edi
430	pinsrw	$2,(%esi,%ecx,2),%mm1
431	pxor	16(%esp,%eax,8),%mm7
432	roll	$8,%edx
433	pxor	144(%esp,%eax,8),%mm6
434	pxor	%mm3,%mm7
435	pxor	400(%esp,%ebp,8),%mm6
436	xorb	(%esp,%ebp,1),%bl
437	movb	%dl,%al
438	movd	%mm7,%ecx
439	movzbl	%bl,%ebx
440	psrlq	$8,%mm7
441	movq	%mm6,%mm3
442	movl	%eax,%ebp
443	psrlq	$8,%mm6
444	pxor	272(%esp,%edi,8),%mm7
445	andb	$15,%al
446	psllq	$56,%mm3
447	pxor	%mm1,%mm6
448	shrl	$4,%ebp
449	pinsrw	$2,(%esi,%ebx,2),%mm0
450	pxor	16(%esp,%eax,8),%mm7
451	roll	$8,%edx
452	pxor	144(%esp,%eax,8),%mm6
453	pxor	%mm3,%mm7
454	pxor	400(%esp,%edi,8),%mm6
455	xorb	(%esp,%edi,1),%cl
456	movb	%dl,%al
457	movd	%mm7,%ebx
458	movzbl	%cl,%ecx
459	psrlq	$8,%mm7
460	movq	%mm6,%mm3
461	movl	%eax,%edi
462	psrlq	$8,%mm6
463	pxor	272(%esp,%ebp,8),%mm7
464	andb	$15,%al
465	psllq	$56,%mm3
466	pxor	%mm0,%mm6
467	shrl	$4,%edi
468	pinsrw	$2,(%esi,%ecx,2),%mm2
469	pxor	16(%esp,%eax,8),%mm7
470	roll	$8,%edx
471	pxor	144(%esp,%eax,8),%mm6
472	pxor	%mm3,%mm7
473	pxor	400(%esp,%ebp,8),%mm6
474	xorb	(%esp,%ebp,1),%bl
475	movb	%dl,%al
476	movd	%mm7,%ecx
477	movzbl	%bl,%ebx
478	psrlq	$8,%mm7
479	movq	%mm6,%mm3
480	movl	%eax,%ebp
481	psrlq	$8,%mm6
482	pxor	272(%esp,%edi,8),%mm7
483	andb	$15,%al
484	psllq	$56,%mm3
485	pxor	%mm2,%mm6
486	shrl	$4,%ebp
487	pinsrw	$2,(%esi,%ebx,2),%mm1
488	pxor	16(%esp,%eax,8),%mm7
489	roll	$8,%edx
490	pxor	144(%esp,%eax,8),%mm6
491	pxor	%mm3,%mm7
492	pxor	400(%esp,%edi,8),%mm6
493	xorb	(%esp,%edi,1),%cl
494	movb	%dl,%al
495	movl	532(%esp),%edx
496	movd	%mm7,%ebx
497	movzbl	%cl,%ecx
498	psrlq	$8,%mm7
499	movq	%mm6,%mm3
500	movl	%eax,%edi
501	psrlq	$8,%mm6
502	pxor	272(%esp,%ebp,8),%mm7
503	andb	$15,%al
504	psllq	$56,%mm3
505	pxor	%mm1,%mm6
506	shrl	$4,%edi
507	pinsrw	$2,(%esi,%ecx,2),%mm0
508	pxor	16(%esp,%eax,8),%mm7
509	roll	$8,%edx
510	pxor	144(%esp,%eax,8),%mm6
511	pxor	%mm3,%mm7
512	pxor	400(%esp,%ebp,8),%mm6
513	xorb	(%esp,%ebp,1),%bl
514	movb	%dl,%al
515	movd	%mm7,%ecx
516	movzbl	%bl,%ebx
517	psrlq	$8,%mm7
518	movq	%mm6,%mm3
519	movl	%eax,%ebp
520	psrlq	$8,%mm6
521	pxor	272(%esp,%edi,8),%mm7
522	andb	$15,%al
523	psllq	$56,%mm3
524	pxor	%mm0,%mm6
525	shrl	$4,%ebp
526	pinsrw	$2,(%esi,%ebx,2),%mm2
527	pxor	16(%esp,%eax,8),%mm7
528	roll	$8,%edx
529	pxor	144(%esp,%eax,8),%mm6
530	pxor	%mm3,%mm7
531	pxor	400(%esp,%edi,8),%mm6
532	xorb	(%esp,%edi,1),%cl
533	movb	%dl,%al
534	movd	%mm7,%ebx
535	movzbl	%cl,%ecx
536	psrlq	$8,%mm7
537	movq	%mm6,%mm3
538	movl	%eax,%edi
539	psrlq	$8,%mm6
540	pxor	272(%esp,%ebp,8),%mm7
541	andb	$15,%al
542	psllq	$56,%mm3
543	pxor	%mm2,%mm6
544	shrl	$4,%edi
545	pinsrw	$2,(%esi,%ecx,2),%mm1
546	pxor	16(%esp,%eax,8),%mm7
547	roll	$8,%edx
548	pxor	144(%esp,%eax,8),%mm6
549	pxor	%mm3,%mm7
550	pxor	400(%esp,%ebp,8),%mm6
551	xorb	(%esp,%ebp,1),%bl
552	movb	%dl,%al
553	movd	%mm7,%ecx
554	movzbl	%bl,%ebx
555	psrlq	$8,%mm7
556	movq	%mm6,%mm3
557	movl	%eax,%ebp
558	psrlq	$8,%mm6
559	pxor	272(%esp,%edi,8),%mm7
560	andb	$15,%al
561	psllq	$56,%mm3
562	pxor	%mm1,%mm6
563	shrl	$4,%ebp
564	pinsrw	$2,(%esi,%ebx,2),%mm0
565	pxor	16(%esp,%eax,8),%mm7
566	roll	$8,%edx
567	pxor	144(%esp,%eax,8),%mm6
568	pxor	%mm3,%mm7
569	pxor	400(%esp,%edi,8),%mm6
570	xorb	(%esp,%edi,1),%cl
571	movb	%dl,%al
572	movl	528(%esp),%edx
573	movd	%mm7,%ebx
574	movzbl	%cl,%ecx
575	psrlq	$8,%mm7
576	movq	%mm6,%mm3
577	movl	%eax,%edi
578	psrlq	$8,%mm6
579	pxor	272(%esp,%ebp,8),%mm7
580	andb	$15,%al
581	psllq	$56,%mm3
582	pxor	%mm0,%mm6
583	shrl	$4,%edi
584	pinsrw	$2,(%esi,%ecx,2),%mm2
585	pxor	16(%esp,%eax,8),%mm7
586	roll	$8,%edx
587	pxor	144(%esp,%eax,8),%mm6
588	pxor	%mm3,%mm7
589	pxor	400(%esp,%ebp,8),%mm6
590	xorb	(%esp,%ebp,1),%bl
591	movb	%dl,%al
592	movd	%mm7,%ecx
593	movzbl	%bl,%ebx
594	psrlq	$8,%mm7
595	movq	%mm6,%mm3
596	movl	%eax,%ebp
597	psrlq	$8,%mm6
598	pxor	272(%esp,%edi,8),%mm7
599	andb	$15,%al
600	psllq	$56,%mm3
601	pxor	%mm2,%mm6
602	shrl	$4,%ebp
603	pinsrw	$2,(%esi,%ebx,2),%mm1
604	pxor	16(%esp,%eax,8),%mm7
605	roll	$8,%edx
606	pxor	144(%esp,%eax,8),%mm6
607	pxor	%mm3,%mm7
608	pxor	400(%esp,%edi,8),%mm6
609	xorb	(%esp,%edi,1),%cl
610	movb	%dl,%al
611	movd	%mm7,%ebx
612	movzbl	%cl,%ecx
613	psrlq	$8,%mm7
614	movq	%mm6,%mm3
615	movl	%eax,%edi
616	psrlq	$8,%mm6
617	pxor	272(%esp,%ebp,8),%mm7
618	andb	$15,%al
619	psllq	$56,%mm3
620	pxor	%mm1,%mm6
621	shrl	$4,%edi
622	pinsrw	$2,(%esi,%ecx,2),%mm0
623	pxor	16(%esp,%eax,8),%mm7
624	roll	$8,%edx
625	pxor	144(%esp,%eax,8),%mm6
626	pxor	%mm3,%mm7
627	pxor	400(%esp,%ebp,8),%mm6
628	xorb	(%esp,%ebp,1),%bl
629	movb	%dl,%al
630	movd	%mm7,%ecx
631	movzbl	%bl,%ebx
632	psrlq	$8,%mm7
633	movq	%mm6,%mm3
634	movl	%eax,%ebp
635	psrlq	$8,%mm6
636	pxor	272(%esp,%edi,8),%mm7
637	andb	$15,%al
638	psllq	$56,%mm3
639	pxor	%mm0,%mm6
640	shrl	$4,%ebp
641	pinsrw	$2,(%esi,%ebx,2),%mm2
642	pxor	16(%esp,%eax,8),%mm7
643	roll	$8,%edx
644	pxor	144(%esp,%eax,8),%mm6
645	pxor	%mm3,%mm7
646	pxor	400(%esp,%edi,8),%mm6
647	xorb	(%esp,%edi,1),%cl
648	movb	%dl,%al
649	movl	524(%esp),%edx
650	movd	%mm7,%ebx
651	movzbl	%cl,%ecx
652	psrlq	$8,%mm7
653	movq	%mm6,%mm3
654	movl	%eax,%edi
655	psrlq	$8,%mm6
656	pxor	272(%esp,%ebp,8),%mm7
657	andb	$15,%al
658	psllq	$56,%mm3
659	pxor	%mm2,%mm6
660	shrl	$4,%edi
661	pinsrw	$2,(%esi,%ecx,2),%mm1
662	pxor	16(%esp,%eax,8),%mm7
663	pxor	144(%esp,%eax,8),%mm6
664	xorb	(%esp,%ebp,1),%bl
665	pxor	%mm3,%mm7
666	pxor	400(%esp,%ebp,8),%mm6
667	movzbl	%bl,%ebx
668	pxor	%mm2,%mm2
669	psllq	$4,%mm1
670	movd	%mm7,%ecx
671	psrlq	$4,%mm7
672	movq	%mm6,%mm3
673	psrlq	$4,%mm6
674	shll	$4,%ecx
675	pxor	16(%esp,%edi,8),%mm7
676	psllq	$60,%mm3
677	movzbl	%cl,%ecx
678	pxor	%mm3,%mm7
679	pxor	144(%esp,%edi,8),%mm6
680	pinsrw	$2,(%esi,%ebx,2),%mm0
681	pxor	%mm1,%mm6
682	movd	%mm7,%edx
683	pinsrw	$3,(%esi,%ecx,2),%mm2
684	psllq	$12,%mm0
685	pxor	%mm0,%mm6
686	psrlq	$32,%mm7
687	pxor	%mm2,%mm6
688	movl	548(%esp),%ecx
689	movd	%mm7,%ebx
690	movq	%mm6,%mm3
691	psllw	$8,%mm6
692	psrlw	$8,%mm3
693	por	%mm3,%mm6
694	bswap	%edx
695	pshufw	$27,%mm6,%mm6
696	bswap	%ebx
697	cmpl	552(%esp),%ecx
698	jne	L004outer
699	movl	544(%esp),%eax
700	movl	%edx,12(%eax)
701	movl	%ebx,8(%eax)
702	movq	%mm6,(%eax)
703	movl	556(%esp),%esp
704	emms
705	popl	%edi
706	popl	%esi
707	popl	%ebx
708	popl	%ebp
709	ret
710.globl	_gcm_init_clmul
711.private_extern	_gcm_init_clmul
712.align	4
713_gcm_init_clmul:
714L_gcm_init_clmul_begin:
715	movl	4(%esp),%edx
716	movl	8(%esp),%eax
717	call	L005pic
718L005pic:
719	popl	%ecx
720	leal	Lbswap-L005pic(%ecx),%ecx
721	movdqu	(%eax),%xmm2
722	pshufd	$78,%xmm2,%xmm2
723	pshufd	$255,%xmm2,%xmm4
724	movdqa	%xmm2,%xmm3
725	psllq	$1,%xmm2
726	pxor	%xmm5,%xmm5
727	psrlq	$63,%xmm3
728	pcmpgtd	%xmm4,%xmm5
729	pslldq	$8,%xmm3
730	por	%xmm3,%xmm2
731	pand	16(%ecx),%xmm5
732	pxor	%xmm5,%xmm2
733	movdqa	%xmm2,%xmm0
734	movdqa	%xmm0,%xmm1
735	pshufd	$78,%xmm0,%xmm3
736	pshufd	$78,%xmm2,%xmm4
737	pxor	%xmm0,%xmm3
738	pxor	%xmm2,%xmm4
739.byte	102,15,58,68,194,0
740.byte	102,15,58,68,202,17
741.byte	102,15,58,68,220,0
742	xorps	%xmm0,%xmm3
743	xorps	%xmm1,%xmm3
744	movdqa	%xmm3,%xmm4
745	psrldq	$8,%xmm3
746	pslldq	$8,%xmm4
747	pxor	%xmm3,%xmm1
748	pxor	%xmm4,%xmm0
749	movdqa	%xmm0,%xmm4
750	movdqa	%xmm0,%xmm3
751	psllq	$5,%xmm0
752	pxor	%xmm0,%xmm3
753	psllq	$1,%xmm0
754	pxor	%xmm3,%xmm0
755	psllq	$57,%xmm0
756	movdqa	%xmm0,%xmm3
757	pslldq	$8,%xmm0
758	psrldq	$8,%xmm3
759	pxor	%xmm4,%xmm0
760	pxor	%xmm3,%xmm1
761	movdqa	%xmm0,%xmm4
762	psrlq	$1,%xmm0
763	pxor	%xmm4,%xmm1
764	pxor	%xmm0,%xmm4
765	psrlq	$5,%xmm0
766	pxor	%xmm4,%xmm0
767	psrlq	$1,%xmm0
768	pxor	%xmm1,%xmm0
769	pshufd	$78,%xmm2,%xmm3
770	pshufd	$78,%xmm0,%xmm4
771	pxor	%xmm2,%xmm3
772	movdqu	%xmm2,(%edx)
773	pxor	%xmm0,%xmm4
774	movdqu	%xmm0,16(%edx)
775.byte	102,15,58,15,227,8
776	movdqu	%xmm4,32(%edx)
777	ret
778.globl	_gcm_gmult_clmul
779.private_extern	_gcm_gmult_clmul
780.align	4
781_gcm_gmult_clmul:
782L_gcm_gmult_clmul_begin:
783	movl	4(%esp),%eax
784	movl	8(%esp),%edx
785	call	L006pic
786L006pic:
787	popl	%ecx
788	leal	Lbswap-L006pic(%ecx),%ecx
789	movdqu	(%eax),%xmm0
790	movdqa	(%ecx),%xmm5
791	movups	(%edx),%xmm2
792.byte	102,15,56,0,197
793	movups	32(%edx),%xmm4
794	movdqa	%xmm0,%xmm1
795	pshufd	$78,%xmm0,%xmm3
796	pxor	%xmm0,%xmm3
797.byte	102,15,58,68,194,0
798.byte	102,15,58,68,202,17
799.byte	102,15,58,68,220,0
800	xorps	%xmm0,%xmm3
801	xorps	%xmm1,%xmm3
802	movdqa	%xmm3,%xmm4
803	psrldq	$8,%xmm3
804	pslldq	$8,%xmm4
805	pxor	%xmm3,%xmm1
806	pxor	%xmm4,%xmm0
807	movdqa	%xmm0,%xmm4
808	movdqa	%xmm0,%xmm3
809	psllq	$5,%xmm0
810	pxor	%xmm0,%xmm3
811	psllq	$1,%xmm0
812	pxor	%xmm3,%xmm0
813	psllq	$57,%xmm0
814	movdqa	%xmm0,%xmm3
815	pslldq	$8,%xmm0
816	psrldq	$8,%xmm3
817	pxor	%xmm4,%xmm0
818	pxor	%xmm3,%xmm1
819	movdqa	%xmm0,%xmm4
820	psrlq	$1,%xmm0
821	pxor	%xmm4,%xmm1
822	pxor	%xmm0,%xmm4
823	psrlq	$5,%xmm0
824	pxor	%xmm4,%xmm0
825	psrlq	$1,%xmm0
826	pxor	%xmm1,%xmm0
827.byte	102,15,56,0,197
828	movdqu	%xmm0,(%eax)
829	ret
830.globl	_gcm_ghash_clmul
831.private_extern	_gcm_ghash_clmul
832.align	4
833_gcm_ghash_clmul:
834L_gcm_ghash_clmul_begin:
835	pushl	%ebp
836	pushl	%ebx
837	pushl	%esi
838	pushl	%edi
839	movl	20(%esp),%eax
840	movl	24(%esp),%edx
841	movl	28(%esp),%esi
842	movl	32(%esp),%ebx
843	call	L007pic
844L007pic:
845	popl	%ecx
846	leal	Lbswap-L007pic(%ecx),%ecx
847	movdqu	(%eax),%xmm0
848	movdqa	(%ecx),%xmm5
849	movdqu	(%edx),%xmm2
850.byte	102,15,56,0,197
851	subl	$16,%ebx
852	jz	L008odd_tail
853	movdqu	(%esi),%xmm3
854	movdqu	16(%esi),%xmm6
855.byte	102,15,56,0,221
856.byte	102,15,56,0,245
857	movdqu	32(%edx),%xmm5
858	pxor	%xmm3,%xmm0
859	pshufd	$78,%xmm6,%xmm3
860	movdqa	%xmm6,%xmm7
861	pxor	%xmm6,%xmm3
862	leal	32(%esi),%esi
863.byte	102,15,58,68,242,0
864.byte	102,15,58,68,250,17
865.byte	102,15,58,68,221,0
866	movups	16(%edx),%xmm2
867	nop
868	subl	$32,%ebx
869	jbe	L009even_tail
870	jmp	L010mod_loop
871.align	5,0x90
872L010mod_loop:
873	pshufd	$78,%xmm0,%xmm4
874	movdqa	%xmm0,%xmm1
875	pxor	%xmm0,%xmm4
876	nop
877.byte	102,15,58,68,194,0
878.byte	102,15,58,68,202,17
879.byte	102,15,58,68,229,16
880	movups	(%edx),%xmm2
881	xorps	%xmm6,%xmm0
882	movdqa	(%ecx),%xmm5
883	xorps	%xmm7,%xmm1
884	movdqu	(%esi),%xmm7
885	pxor	%xmm0,%xmm3
886	movdqu	16(%esi),%xmm6
887	pxor	%xmm1,%xmm3
888.byte	102,15,56,0,253
889	pxor	%xmm3,%xmm4
890	movdqa	%xmm4,%xmm3
891	psrldq	$8,%xmm4
892	pslldq	$8,%xmm3
893	pxor	%xmm4,%xmm1
894	pxor	%xmm3,%xmm0
895.byte	102,15,56,0,245
896	pxor	%xmm7,%xmm1
897	movdqa	%xmm6,%xmm7
898	movdqa	%xmm0,%xmm4
899	movdqa	%xmm0,%xmm3
900	psllq	$5,%xmm0
901	pxor	%xmm0,%xmm3
902	psllq	$1,%xmm0
903	pxor	%xmm3,%xmm0
904.byte	102,15,58,68,242,0
905	movups	32(%edx),%xmm5
906	psllq	$57,%xmm0
907	movdqa	%xmm0,%xmm3
908	pslldq	$8,%xmm0
909	psrldq	$8,%xmm3
910	pxor	%xmm4,%xmm0
911	pxor	%xmm3,%xmm1
912	pshufd	$78,%xmm7,%xmm3
913	movdqa	%xmm0,%xmm4
914	psrlq	$1,%xmm0
915	pxor	%xmm7,%xmm3
916	pxor	%xmm4,%xmm1
917.byte	102,15,58,68,250,17
918	movups	16(%edx),%xmm2
919	pxor	%xmm0,%xmm4
920	psrlq	$5,%xmm0
921	pxor	%xmm4,%xmm0
922	psrlq	$1,%xmm0
923	pxor	%xmm1,%xmm0
924.byte	102,15,58,68,221,0
925	leal	32(%esi),%esi
926	subl	$32,%ebx
927	ja	L010mod_loop
928L009even_tail:
929	pshufd	$78,%xmm0,%xmm4
930	movdqa	%xmm0,%xmm1
931	pxor	%xmm0,%xmm4
932.byte	102,15,58,68,194,0
933.byte	102,15,58,68,202,17
934.byte	102,15,58,68,229,16
935	movdqa	(%ecx),%xmm5
936	xorps	%xmm6,%xmm0
937	xorps	%xmm7,%xmm1
938	pxor	%xmm0,%xmm3
939	pxor	%xmm1,%xmm3
940	pxor	%xmm3,%xmm4
941	movdqa	%xmm4,%xmm3
942	psrldq	$8,%xmm4
943	pslldq	$8,%xmm3
944	pxor	%xmm4,%xmm1
945	pxor	%xmm3,%xmm0
946	movdqa	%xmm0,%xmm4
947	movdqa	%xmm0,%xmm3
948	psllq	$5,%xmm0
949	pxor	%xmm0,%xmm3
950	psllq	$1,%xmm0
951	pxor	%xmm3,%xmm0
952	psllq	$57,%xmm0
953	movdqa	%xmm0,%xmm3
954	pslldq	$8,%xmm0
955	psrldq	$8,%xmm3
956	pxor	%xmm4,%xmm0
957	pxor	%xmm3,%xmm1
958	movdqa	%xmm0,%xmm4
959	psrlq	$1,%xmm0
960	pxor	%xmm4,%xmm1
961	pxor	%xmm0,%xmm4
962	psrlq	$5,%xmm0
963	pxor	%xmm4,%xmm0
964	psrlq	$1,%xmm0
965	pxor	%xmm1,%xmm0
966	testl	%ebx,%ebx
967	jnz	L011done
968	movups	(%edx),%xmm2
969L008odd_tail:
970	movdqu	(%esi),%xmm3
971.byte	102,15,56,0,221
972	pxor	%xmm3,%xmm0
973	movdqa	%xmm0,%xmm1
974	pshufd	$78,%xmm0,%xmm3
975	pshufd	$78,%xmm2,%xmm4
976	pxor	%xmm0,%xmm3
977	pxor	%xmm2,%xmm4
978.byte	102,15,58,68,194,0
979.byte	102,15,58,68,202,17
980.byte	102,15,58,68,220,0
981	xorps	%xmm0,%xmm3
982	xorps	%xmm1,%xmm3
983	movdqa	%xmm3,%xmm4
984	psrldq	$8,%xmm3
985	pslldq	$8,%xmm4
986	pxor	%xmm3,%xmm1
987	pxor	%xmm4,%xmm0
988	movdqa	%xmm0,%xmm4
989	movdqa	%xmm0,%xmm3
990	psllq	$5,%xmm0
991	pxor	%xmm0,%xmm3
992	psllq	$1,%xmm0
993	pxor	%xmm3,%xmm0
994	psllq	$57,%xmm0
995	movdqa	%xmm0,%xmm3
996	pslldq	$8,%xmm0
997	psrldq	$8,%xmm3
998	pxor	%xmm4,%xmm0
999	pxor	%xmm3,%xmm1
1000	movdqa	%xmm0,%xmm4
1001	psrlq	$1,%xmm0
1002	pxor	%xmm4,%xmm1
1003	pxor	%xmm0,%xmm4
1004	psrlq	$5,%xmm0
1005	pxor	%xmm4,%xmm0
1006	psrlq	$1,%xmm0
1007	pxor	%xmm1,%xmm0
1008L011done:
1009.byte	102,15,56,0,197
1010	movdqu	%xmm0,(%eax)
1011	popl	%edi
1012	popl	%esi
1013	popl	%ebx
1014	popl	%ebp
1015	ret
1016.align	6,0x90
1017Lbswap:
1018.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1019.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
1020.align	6,0x90
1021Lrem_8bit:
1022.value	0,450,900,582,1800,1738,1164,1358
1023.value	3600,4050,3476,3158,2328,2266,2716,2910
1024.value	7200,7650,8100,7782,6952,6890,6316,6510
1025.value	4656,5106,4532,4214,5432,5370,5820,6014
1026.value	14400,14722,15300,14854,16200,16010,15564,15630
1027.value	13904,14226,13780,13334,12632,12442,13020,13086
1028.value	9312,9634,10212,9766,9064,8874,8428,8494
1029.value	10864,11186,10740,10294,11640,11450,12028,12094
1030.value	28800,28994,29444,29382,30600,30282,29708,30158
1031.value	32400,32594,32020,31958,31128,30810,31260,31710
1032.value	27808,28002,28452,28390,27560,27242,26668,27118
1033.value	25264,25458,24884,24822,26040,25722,26172,26622
1034.value	18624,18690,19268,19078,20424,19978,19532,19854
1035.value	18128,18194,17748,17558,16856,16410,16988,17310
1036.value	21728,21794,22372,22182,21480,21034,20588,20910
1037.value	23280,23346,22900,22710,24056,23610,24188,24510
1038.value	57600,57538,57988,58182,58888,59338,58764,58446
1039.value	61200,61138,60564,60758,59416,59866,60316,59998
1040.value	64800,64738,65188,65382,64040,64490,63916,63598
1041.value	62256,62194,61620,61814,62520,62970,63420,63102
1042.value	55616,55426,56004,56070,56904,57226,56780,56334
1043.value	55120,54930,54484,54550,53336,53658,54236,53790
1044.value	50528,50338,50916,50982,49768,50090,49644,49198
1045.value	52080,51890,51444,51510,52344,52666,53244,52798
1046.value	37248,36930,37380,37830,38536,38730,38156,38094
1047.value	40848,40530,39956,40406,39064,39258,39708,39646
1048.value	36256,35938,36388,36838,35496,35690,35116,35054
1049.value	33712,33394,32820,33270,33976,34170,34620,34558
1050.value	43456,43010,43588,43910,44744,44810,44364,44174
1051.value	42960,42514,42068,42390,41176,41242,41820,41630
1052.value	46560,46114,46692,47014,45800,45866,45420,45230
1053.value	48112,47666,47220,47542,48376,48442,49020,48830
1054.align	6,0x90
1055Lrem_4bit:
1056.long	0,0,0,471859200,0,943718400,0,610271232
1057.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
1058.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
1059.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
1060.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
1061.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
1062.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
1063.byte	0
1064#endif
1065