1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11#if defined(BORINGSSL_PREFIX)
12#include <boringssl_prefix_symbols_asm.h>
13#endif
14.text
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32.p2align	4
33_vpaes_encrypt_core:
34
35	movq	%rdx,%r9
36	movq	$16,%r11
37	movl	240(%rdx),%eax
38	movdqa	%xmm9,%xmm1
39	movdqa	L$k_ipt(%rip),%xmm2
40	pandn	%xmm0,%xmm1
41	movdqu	(%r9),%xmm5
42	psrld	$4,%xmm1
43	pand	%xmm9,%xmm0
44.byte	102,15,56,0,208
45	movdqa	L$k_ipt+16(%rip),%xmm0
46.byte	102,15,56,0,193
47	pxor	%xmm5,%xmm2
48	addq	$16,%r9
49	pxor	%xmm2,%xmm0
50	leaq	L$k_mc_backward(%rip),%r10
51	jmp	L$enc_entry
52
53.p2align	4
54L$enc_loop:
55
56	movdqa	%xmm13,%xmm4
57	movdqa	%xmm12,%xmm0
58.byte	102,15,56,0,226
59.byte	102,15,56,0,195
60	pxor	%xmm5,%xmm4
61	movdqa	%xmm15,%xmm5
62	pxor	%xmm4,%xmm0
63	movdqa	-64(%r11,%r10,1),%xmm1
64.byte	102,15,56,0,234
65	movdqa	(%r11,%r10,1),%xmm4
66	movdqa	%xmm14,%xmm2
67.byte	102,15,56,0,211
68	movdqa	%xmm0,%xmm3
69	pxor	%xmm5,%xmm2
70.byte	102,15,56,0,193
71	addq	$16,%r9
72	pxor	%xmm2,%xmm0
73.byte	102,15,56,0,220
74	addq	$16,%r11
75	pxor	%xmm0,%xmm3
76.byte	102,15,56,0,193
77	andq	$0x30,%r11
78	subq	$1,%rax
79	pxor	%xmm3,%xmm0
80
81L$enc_entry:
82
83	movdqa	%xmm9,%xmm1
84	movdqa	%xmm11,%xmm5
85	pandn	%xmm0,%xmm1
86	psrld	$4,%xmm1
87	pand	%xmm9,%xmm0
88.byte	102,15,56,0,232
89	movdqa	%xmm10,%xmm3
90	pxor	%xmm1,%xmm0
91.byte	102,15,56,0,217
92	movdqa	%xmm10,%xmm4
93	pxor	%xmm5,%xmm3
94.byte	102,15,56,0,224
95	movdqa	%xmm10,%xmm2
96	pxor	%xmm5,%xmm4
97.byte	102,15,56,0,211
98	movdqa	%xmm10,%xmm3
99	pxor	%xmm0,%xmm2
100.byte	102,15,56,0,220
101	movdqu	(%r9),%xmm5
102	pxor	%xmm1,%xmm3
103	jnz	L$enc_loop
104
105
106	movdqa	-96(%r10),%xmm4
107	movdqa	-80(%r10),%xmm0
108.byte	102,15,56,0,226
109	pxor	%xmm5,%xmm4
110.byte	102,15,56,0,195
111	movdqa	64(%r11,%r10,1),%xmm1
112	pxor	%xmm4,%xmm0
113.byte	102,15,56,0,193
114	.byte	0xf3,0xc3
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148.p2align	4
149_vpaes_encrypt_core_2x:
150
151	movq	%rdx,%r9
152	movq	$16,%r11
153	movl	240(%rdx),%eax
154	movdqa	%xmm9,%xmm1
155	movdqa	%xmm9,%xmm7
156	movdqa	L$k_ipt(%rip),%xmm2
157	movdqa	%xmm2,%xmm8
158	pandn	%xmm0,%xmm1
159	pandn	%xmm6,%xmm7
160	movdqu	(%r9),%xmm5
161
162	psrld	$4,%xmm1
163	psrld	$4,%xmm7
164	pand	%xmm9,%xmm0
165	pand	%xmm9,%xmm6
166.byte	102,15,56,0,208
167.byte	102,68,15,56,0,198
168	movdqa	L$k_ipt+16(%rip),%xmm0
169	movdqa	%xmm0,%xmm6
170.byte	102,15,56,0,193
171.byte	102,15,56,0,247
172	pxor	%xmm5,%xmm2
173	pxor	%xmm5,%xmm8
174	addq	$16,%r9
175	pxor	%xmm2,%xmm0
176	pxor	%xmm8,%xmm6
177	leaq	L$k_mc_backward(%rip),%r10
178	jmp	L$enc2x_entry
179
180.p2align	4
181L$enc2x_loop:
182
183	movdqa	L$k_sb1(%rip),%xmm4
184	movdqa	L$k_sb1+16(%rip),%xmm0
185	movdqa	%xmm4,%xmm12
186	movdqa	%xmm0,%xmm6
187.byte	102,15,56,0,226
188.byte	102,69,15,56,0,224
189.byte	102,15,56,0,195
190.byte	102,65,15,56,0,243
191	pxor	%xmm5,%xmm4
192	pxor	%xmm5,%xmm12
193	movdqa	L$k_sb2(%rip),%xmm5
194	movdqa	%xmm5,%xmm13
195	pxor	%xmm4,%xmm0
196	pxor	%xmm12,%xmm6
197	movdqa	-64(%r11,%r10,1),%xmm1
198
199.byte	102,15,56,0,234
200.byte	102,69,15,56,0,232
201	movdqa	(%r11,%r10,1),%xmm4
202
203	movdqa	L$k_sb2+16(%rip),%xmm2
204	movdqa	%xmm2,%xmm8
205.byte	102,15,56,0,211
206.byte	102,69,15,56,0,195
207	movdqa	%xmm0,%xmm3
208	movdqa	%xmm6,%xmm11
209	pxor	%xmm5,%xmm2
210	pxor	%xmm13,%xmm8
211.byte	102,15,56,0,193
212.byte	102,15,56,0,241
213	addq	$16,%r9
214	pxor	%xmm2,%xmm0
215	pxor	%xmm8,%xmm6
216.byte	102,15,56,0,220
217.byte	102,68,15,56,0,220
218	addq	$16,%r11
219	pxor	%xmm0,%xmm3
220	pxor	%xmm6,%xmm11
221.byte	102,15,56,0,193
222.byte	102,15,56,0,241
223	andq	$0x30,%r11
224	subq	$1,%rax
225	pxor	%xmm3,%xmm0
226	pxor	%xmm11,%xmm6
227
228L$enc2x_entry:
229
230	movdqa	%xmm9,%xmm1
231	movdqa	%xmm9,%xmm7
232	movdqa	L$k_inv+16(%rip),%xmm5
233	movdqa	%xmm5,%xmm13
234	pandn	%xmm0,%xmm1
235	pandn	%xmm6,%xmm7
236	psrld	$4,%xmm1
237	psrld	$4,%xmm7
238	pand	%xmm9,%xmm0
239	pand	%xmm9,%xmm6
240.byte	102,15,56,0,232
241.byte	102,68,15,56,0,238
242	movdqa	%xmm10,%xmm3
243	movdqa	%xmm10,%xmm11
244	pxor	%xmm1,%xmm0
245	pxor	%xmm7,%xmm6
246.byte	102,15,56,0,217
247.byte	102,68,15,56,0,223
248	movdqa	%xmm10,%xmm4
249	movdqa	%xmm10,%xmm12
250	pxor	%xmm5,%xmm3
251	pxor	%xmm13,%xmm11
252.byte	102,15,56,0,224
253.byte	102,68,15,56,0,230
254	movdqa	%xmm10,%xmm2
255	movdqa	%xmm10,%xmm8
256	pxor	%xmm5,%xmm4
257	pxor	%xmm13,%xmm12
258.byte	102,15,56,0,211
259.byte	102,69,15,56,0,195
260	movdqa	%xmm10,%xmm3
261	movdqa	%xmm10,%xmm11
262	pxor	%xmm0,%xmm2
263	pxor	%xmm6,%xmm8
264.byte	102,15,56,0,220
265.byte	102,69,15,56,0,220
266	movdqu	(%r9),%xmm5
267
268	pxor	%xmm1,%xmm3
269	pxor	%xmm7,%xmm11
270	jnz	L$enc2x_loop
271
272
273	movdqa	-96(%r10),%xmm4
274	movdqa	-80(%r10),%xmm0
275	movdqa	%xmm4,%xmm12
276	movdqa	%xmm0,%xmm6
277.byte	102,15,56,0,226
278.byte	102,69,15,56,0,224
279	pxor	%xmm5,%xmm4
280	pxor	%xmm5,%xmm12
281.byte	102,15,56,0,195
282.byte	102,65,15,56,0,243
283	movdqa	64(%r11,%r10,1),%xmm1
284
285	pxor	%xmm4,%xmm0
286	pxor	%xmm12,%xmm6
287.byte	102,15,56,0,193
288.byte	102,15,56,0,241
289	.byte	0xf3,0xc3
290
291
292
293
294
295
296
297
298
299.p2align	4
300_vpaes_decrypt_core:
301
302	movq	%rdx,%r9
303	movl	240(%rdx),%eax
304	movdqa	%xmm9,%xmm1
305	movdqa	L$k_dipt(%rip),%xmm2
306	pandn	%xmm0,%xmm1
307	movq	%rax,%r11
308	psrld	$4,%xmm1
309	movdqu	(%r9),%xmm5
310	shlq	$4,%r11
311	pand	%xmm9,%xmm0
312.byte	102,15,56,0,208
313	movdqa	L$k_dipt+16(%rip),%xmm0
314	xorq	$0x30,%r11
315	leaq	L$k_dsbd(%rip),%r10
316.byte	102,15,56,0,193
317	andq	$0x30,%r11
318	pxor	%xmm5,%xmm2
319	movdqa	L$k_mc_forward+48(%rip),%xmm5
320	pxor	%xmm2,%xmm0
321	addq	$16,%r9
322	addq	%r10,%r11
323	jmp	L$dec_entry
324
325.p2align	4
326L$dec_loop:
327
328
329
330	movdqa	-32(%r10),%xmm4
331	movdqa	-16(%r10),%xmm1
332.byte	102,15,56,0,226
333.byte	102,15,56,0,203
334	pxor	%xmm4,%xmm0
335	movdqa	0(%r10),%xmm4
336	pxor	%xmm1,%xmm0
337	movdqa	16(%r10),%xmm1
338
339.byte	102,15,56,0,226
340.byte	102,15,56,0,197
341.byte	102,15,56,0,203
342	pxor	%xmm4,%xmm0
343	movdqa	32(%r10),%xmm4
344	pxor	%xmm1,%xmm0
345	movdqa	48(%r10),%xmm1
346
347.byte	102,15,56,0,226
348.byte	102,15,56,0,197
349.byte	102,15,56,0,203
350	pxor	%xmm4,%xmm0
351	movdqa	64(%r10),%xmm4
352	pxor	%xmm1,%xmm0
353	movdqa	80(%r10),%xmm1
354
355.byte	102,15,56,0,226
356.byte	102,15,56,0,197
357.byte	102,15,56,0,203
358	pxor	%xmm4,%xmm0
359	addq	$16,%r9
360.byte	102,15,58,15,237,12
361	pxor	%xmm1,%xmm0
362	subq	$1,%rax
363
364L$dec_entry:
365
366	movdqa	%xmm9,%xmm1
367	pandn	%xmm0,%xmm1
368	movdqa	%xmm11,%xmm2
369	psrld	$4,%xmm1
370	pand	%xmm9,%xmm0
371.byte	102,15,56,0,208
372	movdqa	%xmm10,%xmm3
373	pxor	%xmm1,%xmm0
374.byte	102,15,56,0,217
375	movdqa	%xmm10,%xmm4
376	pxor	%xmm2,%xmm3
377.byte	102,15,56,0,224
378	pxor	%xmm2,%xmm4
379	movdqa	%xmm10,%xmm2
380.byte	102,15,56,0,211
381	movdqa	%xmm10,%xmm3
382	pxor	%xmm0,%xmm2
383.byte	102,15,56,0,220
384	movdqu	(%r9),%xmm0
385	pxor	%xmm1,%xmm3
386	jnz	L$dec_loop
387
388
389	movdqa	96(%r10),%xmm4
390.byte	102,15,56,0,226
391	pxor	%xmm0,%xmm4
392	movdqa	112(%r10),%xmm0
393	movdqa	-352(%r11),%xmm2
394.byte	102,15,56,0,195
395	pxor	%xmm4,%xmm0
396.byte	102,15,56,0,194
397	.byte	0xf3,0xc3
398
399
400
401
402
403
404
405
406
407.p2align	4
408_vpaes_schedule_core:
409
410
411
412
413
414
415	call	_vpaes_preheat
416	movdqa	L$k_rcon(%rip),%xmm8
417	movdqu	(%rdi),%xmm0
418
419
420	movdqa	%xmm0,%xmm3
421	leaq	L$k_ipt(%rip),%r11
422	call	_vpaes_schedule_transform
423	movdqa	%xmm0,%xmm7
424
425	leaq	L$k_sr(%rip),%r10
426	testq	%rcx,%rcx
427	jnz	L$schedule_am_decrypting
428
429
430	movdqu	%xmm0,(%rdx)
431	jmp	L$schedule_go
432
433L$schedule_am_decrypting:
434
435	movdqa	(%r8,%r10,1),%xmm1
436.byte	102,15,56,0,217
437	movdqu	%xmm3,(%rdx)
438	xorq	$0x30,%r8
439
440L$schedule_go:
441	cmpl	$192,%esi
442	ja	L$schedule_256
443	je	L$schedule_192
444
445
446
447
448
449
450
451
452
453
454L$schedule_128:
455	movl	$10,%esi
456
457L$oop_schedule_128:
458	call	_vpaes_schedule_round
459	decq	%rsi
460	jz	L$schedule_mangle_last
461	call	_vpaes_schedule_mangle
462	jmp	L$oop_schedule_128
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479.p2align	4
480L$schedule_192:
481	movdqu	8(%rdi),%xmm0
482	call	_vpaes_schedule_transform
483	movdqa	%xmm0,%xmm6
484	pxor	%xmm4,%xmm4
485	movhlps	%xmm4,%xmm6
486	movl	$4,%esi
487
488L$oop_schedule_192:
489	call	_vpaes_schedule_round
490.byte	102,15,58,15,198,8
491	call	_vpaes_schedule_mangle
492	call	_vpaes_schedule_192_smear
493	call	_vpaes_schedule_mangle
494	call	_vpaes_schedule_round
495	decq	%rsi
496	jz	L$schedule_mangle_last
497	call	_vpaes_schedule_mangle
498	call	_vpaes_schedule_192_smear
499	jmp	L$oop_schedule_192
500
501
502
503
504
505
506
507
508
509
510
511.p2align	4
512L$schedule_256:
513	movdqu	16(%rdi),%xmm0
514	call	_vpaes_schedule_transform
515	movl	$7,%esi
516
517L$oop_schedule_256:
518	call	_vpaes_schedule_mangle
519	movdqa	%xmm0,%xmm6
520
521
522	call	_vpaes_schedule_round
523	decq	%rsi
524	jz	L$schedule_mangle_last
525	call	_vpaes_schedule_mangle
526
527
528	pshufd	$0xFF,%xmm0,%xmm0
529	movdqa	%xmm7,%xmm5
530	movdqa	%xmm6,%xmm7
531	call	_vpaes_schedule_low_round
532	movdqa	%xmm5,%xmm7
533
534	jmp	L$oop_schedule_256
535
536
537
538
539
540
541
542
543
544
545
546
547.p2align	4
548L$schedule_mangle_last:
549
550	leaq	L$k_deskew(%rip),%r11
551	testq	%rcx,%rcx
552	jnz	L$schedule_mangle_last_dec
553
554
555	movdqa	(%r8,%r10,1),%xmm1
556.byte	102,15,56,0,193
557	leaq	L$k_opt(%rip),%r11
558	addq	$32,%rdx
559
560L$schedule_mangle_last_dec:
561	addq	$-16,%rdx
562	pxor	L$k_s63(%rip),%xmm0
563	call	_vpaes_schedule_transform
564	movdqu	%xmm0,(%rdx)
565
566
567	pxor	%xmm0,%xmm0
568	pxor	%xmm1,%xmm1
569	pxor	%xmm2,%xmm2
570	pxor	%xmm3,%xmm3
571	pxor	%xmm4,%xmm4
572	pxor	%xmm5,%xmm5
573	pxor	%xmm6,%xmm6
574	pxor	%xmm7,%xmm7
575	.byte	0xf3,0xc3
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594.p2align	4
595_vpaes_schedule_192_smear:
596
597	pshufd	$0x80,%xmm6,%xmm1
598	pshufd	$0xFE,%xmm7,%xmm0
599	pxor	%xmm1,%xmm6
600	pxor	%xmm1,%xmm1
601	pxor	%xmm0,%xmm6
602	movdqa	%xmm6,%xmm0
603	movhlps	%xmm1,%xmm6
604	.byte	0xf3,0xc3
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627.p2align	4
628_vpaes_schedule_round:
629
630
631	pxor	%xmm1,%xmm1
632.byte	102,65,15,58,15,200,15
633.byte	102,69,15,58,15,192,15
634	pxor	%xmm1,%xmm7
635
636
637	pshufd	$0xFF,%xmm0,%xmm0
638.byte	102,15,58,15,192,1
639
640
641
642
643_vpaes_schedule_low_round:
644
645	movdqa	%xmm7,%xmm1
646	pslldq	$4,%xmm7
647	pxor	%xmm1,%xmm7
648	movdqa	%xmm7,%xmm1
649	pslldq	$8,%xmm7
650	pxor	%xmm1,%xmm7
651	pxor	L$k_s63(%rip),%xmm7
652
653
654	movdqa	%xmm9,%xmm1
655	pandn	%xmm0,%xmm1
656	psrld	$4,%xmm1
657	pand	%xmm9,%xmm0
658	movdqa	%xmm11,%xmm2
659.byte	102,15,56,0,208
660	pxor	%xmm1,%xmm0
661	movdqa	%xmm10,%xmm3
662.byte	102,15,56,0,217
663	pxor	%xmm2,%xmm3
664	movdqa	%xmm10,%xmm4
665.byte	102,15,56,0,224
666	pxor	%xmm2,%xmm4
667	movdqa	%xmm10,%xmm2
668.byte	102,15,56,0,211
669	pxor	%xmm0,%xmm2
670	movdqa	%xmm10,%xmm3
671.byte	102,15,56,0,220
672	pxor	%xmm1,%xmm3
673	movdqa	%xmm13,%xmm4
674.byte	102,15,56,0,226
675	movdqa	%xmm12,%xmm0
676.byte	102,15,56,0,195
677	pxor	%xmm4,%xmm0
678
679
680	pxor	%xmm7,%xmm0
681	movdqa	%xmm0,%xmm7
682	.byte	0xf3,0xc3
683
684
685
686
687
688
689
690
691
692
693
694
695
696.p2align	4
697_vpaes_schedule_transform:
698
699	movdqa	%xmm9,%xmm1
700	pandn	%xmm0,%xmm1
701	psrld	$4,%xmm1
702	pand	%xmm9,%xmm0
703	movdqa	(%r11),%xmm2
704.byte	102,15,56,0,208
705	movdqa	16(%r11),%xmm0
706.byte	102,15,56,0,193
707	pxor	%xmm2,%xmm0
708	.byte	0xf3,0xc3
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736.p2align	4
737_vpaes_schedule_mangle:
738
739	movdqa	%xmm0,%xmm4
740	movdqa	L$k_mc_forward(%rip),%xmm5
741	testq	%rcx,%rcx
742	jnz	L$schedule_mangle_dec
743
744
745	addq	$16,%rdx
746	pxor	L$k_s63(%rip),%xmm4
747.byte	102,15,56,0,229
748	movdqa	%xmm4,%xmm3
749.byte	102,15,56,0,229
750	pxor	%xmm4,%xmm3
751.byte	102,15,56,0,229
752	pxor	%xmm4,%xmm3
753
754	jmp	L$schedule_mangle_both
755.p2align	4
756L$schedule_mangle_dec:
757
758	leaq	L$k_dksd(%rip),%r11
759	movdqa	%xmm9,%xmm1
760	pandn	%xmm4,%xmm1
761	psrld	$4,%xmm1
762	pand	%xmm9,%xmm4
763
764	movdqa	0(%r11),%xmm2
765.byte	102,15,56,0,212
766	movdqa	16(%r11),%xmm3
767.byte	102,15,56,0,217
768	pxor	%xmm2,%xmm3
769.byte	102,15,56,0,221
770
771	movdqa	32(%r11),%xmm2
772.byte	102,15,56,0,212
773	pxor	%xmm3,%xmm2
774	movdqa	48(%r11),%xmm3
775.byte	102,15,56,0,217
776	pxor	%xmm2,%xmm3
777.byte	102,15,56,0,221
778
779	movdqa	64(%r11),%xmm2
780.byte	102,15,56,0,212
781	pxor	%xmm3,%xmm2
782	movdqa	80(%r11),%xmm3
783.byte	102,15,56,0,217
784	pxor	%xmm2,%xmm3
785.byte	102,15,56,0,221
786
787	movdqa	96(%r11),%xmm2
788.byte	102,15,56,0,212
789	pxor	%xmm3,%xmm2
790	movdqa	112(%r11),%xmm3
791.byte	102,15,56,0,217
792	pxor	%xmm2,%xmm3
793
794	addq	$-16,%rdx
795
796L$schedule_mangle_both:
797	movdqa	(%r8,%r10,1),%xmm1
798.byte	102,15,56,0,217
799	addq	$-16,%r8
800	andq	$0x30,%r8
801	movdqu	%xmm3,(%rdx)
802	.byte	0xf3,0xc3
803
804
805
806
807
808
809.globl	_vpaes_set_encrypt_key
810.private_extern _vpaes_set_encrypt_key
811
812.p2align	4
813_vpaes_set_encrypt_key:
814
815#ifdef BORINGSSL_DISPATCH_TEST
816
817	movb	$1,_BORINGSSL_function_hit+5(%rip)
818#endif
819
820	movl	%esi,%eax
821	shrl	$5,%eax
822	addl	$5,%eax
823	movl	%eax,240(%rdx)
824
825	movl	$0,%ecx
826	movl	$0x30,%r8d
827	call	_vpaes_schedule_core
828	xorl	%eax,%eax
829	.byte	0xf3,0xc3
830
831
832
833.globl	_vpaes_set_decrypt_key
834.private_extern _vpaes_set_decrypt_key
835
836.p2align	4
837_vpaes_set_decrypt_key:
838
839	movl	%esi,%eax
840	shrl	$5,%eax
841	addl	$5,%eax
842	movl	%eax,240(%rdx)
843	shll	$4,%eax
844	leaq	16(%rdx,%rax,1),%rdx
845
846	movl	$1,%ecx
847	movl	%esi,%r8d
848	shrl	$1,%r8d
849	andl	$32,%r8d
850	xorl	$32,%r8d
851	call	_vpaes_schedule_core
852	xorl	%eax,%eax
853	.byte	0xf3,0xc3
854
855
856
857.globl	_vpaes_encrypt
858.private_extern _vpaes_encrypt
859
860.p2align	4
861_vpaes_encrypt:
862
863#ifdef BORINGSSL_DISPATCH_TEST
864
865	movb	$1,_BORINGSSL_function_hit+4(%rip)
866#endif
867	movdqu	(%rdi),%xmm0
868	call	_vpaes_preheat
869	call	_vpaes_encrypt_core
870	movdqu	%xmm0,(%rsi)
871	.byte	0xf3,0xc3
872
873
874
875.globl	_vpaes_decrypt
876.private_extern _vpaes_decrypt
877
878.p2align	4
879_vpaes_decrypt:
880
881	movdqu	(%rdi),%xmm0
882	call	_vpaes_preheat
883	call	_vpaes_decrypt_core
884	movdqu	%xmm0,(%rsi)
885	.byte	0xf3,0xc3
886
887
888.globl	_vpaes_cbc_encrypt
889.private_extern _vpaes_cbc_encrypt
890
891.p2align	4
892_vpaes_cbc_encrypt:
893
894	xchgq	%rcx,%rdx
895	subq	$16,%rcx
896	jc	L$cbc_abort
897	movdqu	(%r8),%xmm6
898	subq	%rdi,%rsi
899	call	_vpaes_preheat
900	cmpl	$0,%r9d
901	je	L$cbc_dec_loop
902	jmp	L$cbc_enc_loop
903.p2align	4
904L$cbc_enc_loop:
905	movdqu	(%rdi),%xmm0
906	pxor	%xmm6,%xmm0
907	call	_vpaes_encrypt_core
908	movdqa	%xmm0,%xmm6
909	movdqu	%xmm0,(%rsi,%rdi,1)
910	leaq	16(%rdi),%rdi
911	subq	$16,%rcx
912	jnc	L$cbc_enc_loop
913	jmp	L$cbc_done
914.p2align	4
915L$cbc_dec_loop:
916	movdqu	(%rdi),%xmm0
917	movdqa	%xmm0,%xmm7
918	call	_vpaes_decrypt_core
919	pxor	%xmm6,%xmm0
920	movdqa	%xmm7,%xmm6
921	movdqu	%xmm0,(%rsi,%rdi,1)
922	leaq	16(%rdi),%rdi
923	subq	$16,%rcx
924	jnc	L$cbc_dec_loop
925L$cbc_done:
926	movdqu	%xmm6,(%r8)
927L$cbc_abort:
928	.byte	0xf3,0xc3
929
930
931.globl	_vpaes_ctr32_encrypt_blocks
932.private_extern _vpaes_ctr32_encrypt_blocks
933
934.p2align	4
935_vpaes_ctr32_encrypt_blocks:
936
937
938	xchgq	%rcx,%rdx
939	testq	%rcx,%rcx
940	jz	L$ctr32_abort
941	movdqu	(%r8),%xmm0
942	movdqa	L$ctr_add_one(%rip),%xmm8
943	subq	%rdi,%rsi
944	call	_vpaes_preheat
945	movdqa	%xmm0,%xmm6
946	pshufb	L$rev_ctr(%rip),%xmm6
947
948	testq	$1,%rcx
949	jz	L$ctr32_prep_loop
950
951
952
953	movdqu	(%rdi),%xmm7
954	call	_vpaes_encrypt_core
955	pxor	%xmm7,%xmm0
956	paddd	%xmm8,%xmm6
957	movdqu	%xmm0,(%rsi,%rdi,1)
958	subq	$1,%rcx
959	leaq	16(%rdi),%rdi
960	jz	L$ctr32_done
961
962L$ctr32_prep_loop:
963
964
965	movdqa	%xmm6,%xmm14
966	movdqa	%xmm6,%xmm15
967	paddd	%xmm8,%xmm15
968
969L$ctr32_loop:
970	movdqa	L$rev_ctr(%rip),%xmm1
971	movdqa	%xmm14,%xmm0
972	movdqa	%xmm15,%xmm6
973.byte	102,15,56,0,193
974.byte	102,15,56,0,241
975	call	_vpaes_encrypt_core_2x
976	movdqu	(%rdi),%xmm1
977	movdqu	16(%rdi),%xmm2
978	movdqa	L$ctr_add_two(%rip),%xmm3
979	pxor	%xmm1,%xmm0
980	pxor	%xmm2,%xmm6
981	paddd	%xmm3,%xmm14
982	paddd	%xmm3,%xmm15
983	movdqu	%xmm0,(%rsi,%rdi,1)
984	movdqu	%xmm6,16(%rsi,%rdi,1)
985	subq	$2,%rcx
986	leaq	32(%rdi),%rdi
987	jnz	L$ctr32_loop
988
989L$ctr32_done:
990L$ctr32_abort:
991	.byte	0xf3,0xc3
992
993
994
995
996
997
998
999
1000
1001.p2align	4
1002_vpaes_preheat:
1003
1004	leaq	L$k_s0F(%rip),%r10
1005	movdqa	-32(%r10),%xmm10
1006	movdqa	-16(%r10),%xmm11
1007	movdqa	0(%r10),%xmm9
1008	movdqa	48(%r10),%xmm13
1009	movdqa	64(%r10),%xmm12
1010	movdqa	80(%r10),%xmm15
1011	movdqa	96(%r10),%xmm14
1012	.byte	0xf3,0xc3
1013
1014
1015
1016
1017
1018
1019
1020
1021.p2align	6
1022_vpaes_consts:
1023L$k_inv:
1024.quad	0x0E05060F0D080180, 0x040703090A0B0C02
1025.quad	0x01040A060F0B0780, 0x030D0E0C02050809
1026
1027L$k_s0F:
1028.quad	0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
1029
1030L$k_ipt:
1031.quad	0xC2B2E8985A2A7000, 0xCABAE09052227808
1032.quad	0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
1033
1034L$k_sb1:
1035.quad	0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
1036.quad	0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
1037L$k_sb2:
1038.quad	0xE27A93C60B712400, 0x5EB7E955BC982FCD
1039.quad	0x69EB88400AE12900, 0xC2A163C8AB82234A
1040L$k_sbo:
1041.quad	0xD0D26D176FBDC700, 0x15AABF7AC502A878
1042.quad	0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
1043
1044L$k_mc_forward:
1045.quad	0x0407060500030201, 0x0C0F0E0D080B0A09
1046.quad	0x080B0A0904070605, 0x000302010C0F0E0D
1047.quad	0x0C0F0E0D080B0A09, 0x0407060500030201
1048.quad	0x000302010C0F0E0D, 0x080B0A0904070605
1049
1050L$k_mc_backward:
1051.quad	0x0605040702010003, 0x0E0D0C0F0A09080B
1052.quad	0x020100030E0D0C0F, 0x0A09080B06050407
1053.quad	0x0E0D0C0F0A09080B, 0x0605040702010003
1054.quad	0x0A09080B06050407, 0x020100030E0D0C0F
1055
1056L$k_sr:
1057.quad	0x0706050403020100, 0x0F0E0D0C0B0A0908
1058.quad	0x030E09040F0A0500, 0x0B06010C07020D08
1059.quad	0x0F060D040B020900, 0x070E050C030A0108
1060.quad	0x0B0E0104070A0D00, 0x0306090C0F020508
1061
1062L$k_rcon:
1063.quad	0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
1064
1065L$k_s63:
1066.quad	0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
1067
1068L$k_opt:
1069.quad	0xFF9F4929D6B66000, 0xF7974121DEBE6808
1070.quad	0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
1071
1072L$k_deskew:
1073.quad	0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
1074.quad	0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
1075
1076
1077
1078
1079
1080L$k_dksd:
1081.quad	0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
1082.quad	0x41C277F4B5368300, 0x5FDC69EAAB289D1E
1083L$k_dksb:
1084.quad	0x9A4FCA1F8550D500, 0x03D653861CC94C99
1085.quad	0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
1086L$k_dkse:
1087.quad	0xD5031CCA1FC9D600, 0x53859A4C994F5086
1088.quad	0xA23196054FDC7BE8, 0xCD5EF96A20B31487
1089L$k_dks9:
1090.quad	0xB6116FC87ED9A700, 0x4AED933482255BFC
1091.quad	0x4576516227143300, 0x8BB89FACE9DAFDCE
1092
1093
1094
1095
1096
1097L$k_dipt:
1098.quad	0x0F505B040B545F00, 0x154A411E114E451A
1099.quad	0x86E383E660056500, 0x12771772F491F194
1100
1101L$k_dsb9:
1102.quad	0x851C03539A86D600, 0xCAD51F504F994CC9
1103.quad	0xC03B1789ECD74900, 0x725E2C9EB2FBA565
1104L$k_dsbd:
1105.quad	0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
1106.quad	0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
1107L$k_dsbb:
1108.quad	0xD022649296B44200, 0x602646F6B0F2D404
1109.quad	0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
1110L$k_dsbe:
1111.quad	0x46F2929626D4D000, 0x2242600464B4F6B0
1112.quad	0x0C55A6CDFFAAC100, 0x9467F36B98593E32
1113L$k_dsbo:
1114.quad	0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
1115.quad	0x12D7560F93441D00, 0xCA4B8159D8C58E9C
1116
1117
1118L$rev_ctr:
1119.quad	0x0706050403020100, 0x0c0d0e0f0b0a0908
1120
1121
1122L$ctr_add_one:
1123.quad	0x0000000000000000, 0x0000000100000000
1124L$ctr_add_two:
1125.quad	0x0000000000000000, 0x0000000200000000
1126
1127.byte	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
1128.p2align	6
1129
1130#endif
1131