1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11.text
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29.p2align	4
30_vpaes_encrypt_core:
31
32	movq	%rdx,%r9
33	movq	$16,%r11
34	movl	240(%rdx),%eax
35	movdqa	%xmm9,%xmm1
36	movdqa	L$k_ipt(%rip),%xmm2
37	pandn	%xmm0,%xmm1
38	movdqu	(%r9),%xmm5
39	psrld	$4,%xmm1
40	pand	%xmm9,%xmm0
41.byte	102,15,56,0,208
42	movdqa	L$k_ipt+16(%rip),%xmm0
43.byte	102,15,56,0,193
44	pxor	%xmm5,%xmm2
45	addq	$16,%r9
46	pxor	%xmm2,%xmm0
47	leaq	L$k_mc_backward(%rip),%r10
48	jmp	L$enc_entry
49
50.p2align	4
51L$enc_loop:
52
53	movdqa	%xmm13,%xmm4
54	movdqa	%xmm12,%xmm0
55.byte	102,15,56,0,226
56.byte	102,15,56,0,195
57	pxor	%xmm5,%xmm4
58	movdqa	%xmm15,%xmm5
59	pxor	%xmm4,%xmm0
60	movdqa	-64(%r11,%r10,1),%xmm1
61.byte	102,15,56,0,234
62	movdqa	(%r11,%r10,1),%xmm4
63	movdqa	%xmm14,%xmm2
64.byte	102,15,56,0,211
65	movdqa	%xmm0,%xmm3
66	pxor	%xmm5,%xmm2
67.byte	102,15,56,0,193
68	addq	$16,%r9
69	pxor	%xmm2,%xmm0
70.byte	102,15,56,0,220
71	addq	$16,%r11
72	pxor	%xmm0,%xmm3
73.byte	102,15,56,0,193
74	andq	$0x30,%r11
75	subq	$1,%rax
76	pxor	%xmm3,%xmm0
77
78L$enc_entry:
79
80	movdqa	%xmm9,%xmm1
81	movdqa	%xmm11,%xmm5
82	pandn	%xmm0,%xmm1
83	psrld	$4,%xmm1
84	pand	%xmm9,%xmm0
85.byte	102,15,56,0,232
86	movdqa	%xmm10,%xmm3
87	pxor	%xmm1,%xmm0
88.byte	102,15,56,0,217
89	movdqa	%xmm10,%xmm4
90	pxor	%xmm5,%xmm3
91.byte	102,15,56,0,224
92	movdqa	%xmm10,%xmm2
93	pxor	%xmm5,%xmm4
94.byte	102,15,56,0,211
95	movdqa	%xmm10,%xmm3
96	pxor	%xmm0,%xmm2
97.byte	102,15,56,0,220
98	movdqu	(%r9),%xmm5
99	pxor	%xmm1,%xmm3
100	jnz	L$enc_loop
101
102
103	movdqa	-96(%r10),%xmm4
104	movdqa	-80(%r10),%xmm0
105.byte	102,15,56,0,226
106	pxor	%xmm5,%xmm4
107.byte	102,15,56,0,195
108	movdqa	64(%r11,%r10,1),%xmm1
109	pxor	%xmm4,%xmm0
110.byte	102,15,56,0,193
111	.byte	0xf3,0xc3
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145.p2align	4
146_vpaes_encrypt_core_2x:
147
148	movq	%rdx,%r9
149	movq	$16,%r11
150	movl	240(%rdx),%eax
151	movdqa	%xmm9,%xmm1
152	movdqa	%xmm9,%xmm7
153	movdqa	L$k_ipt(%rip),%xmm2
154	movdqa	%xmm2,%xmm8
155	pandn	%xmm0,%xmm1
156	pandn	%xmm6,%xmm7
157	movdqu	(%r9),%xmm5
158
159	psrld	$4,%xmm1
160	psrld	$4,%xmm7
161	pand	%xmm9,%xmm0
162	pand	%xmm9,%xmm6
163.byte	102,15,56,0,208
164.byte	102,68,15,56,0,198
165	movdqa	L$k_ipt+16(%rip),%xmm0
166	movdqa	%xmm0,%xmm6
167.byte	102,15,56,0,193
168.byte	102,15,56,0,247
169	pxor	%xmm5,%xmm2
170	pxor	%xmm5,%xmm8
171	addq	$16,%r9
172	pxor	%xmm2,%xmm0
173	pxor	%xmm8,%xmm6
174	leaq	L$k_mc_backward(%rip),%r10
175	jmp	L$enc2x_entry
176
177.p2align	4
178L$enc2x_loop:
179
180	movdqa	L$k_sb1(%rip),%xmm4
181	movdqa	L$k_sb1+16(%rip),%xmm0
182	movdqa	%xmm4,%xmm12
183	movdqa	%xmm0,%xmm6
184.byte	102,15,56,0,226
185.byte	102,69,15,56,0,224
186.byte	102,15,56,0,195
187.byte	102,65,15,56,0,243
188	pxor	%xmm5,%xmm4
189	pxor	%xmm5,%xmm12
190	movdqa	L$k_sb2(%rip),%xmm5
191	movdqa	%xmm5,%xmm13
192	pxor	%xmm4,%xmm0
193	pxor	%xmm12,%xmm6
194	movdqa	-64(%r11,%r10,1),%xmm1
195
196.byte	102,15,56,0,234
197.byte	102,69,15,56,0,232
198	movdqa	(%r11,%r10,1),%xmm4
199
200	movdqa	L$k_sb2+16(%rip),%xmm2
201	movdqa	%xmm2,%xmm8
202.byte	102,15,56,0,211
203.byte	102,69,15,56,0,195
204	movdqa	%xmm0,%xmm3
205	movdqa	%xmm6,%xmm11
206	pxor	%xmm5,%xmm2
207	pxor	%xmm13,%xmm8
208.byte	102,15,56,0,193
209.byte	102,15,56,0,241
210	addq	$16,%r9
211	pxor	%xmm2,%xmm0
212	pxor	%xmm8,%xmm6
213.byte	102,15,56,0,220
214.byte	102,68,15,56,0,220
215	addq	$16,%r11
216	pxor	%xmm0,%xmm3
217	pxor	%xmm6,%xmm11
218.byte	102,15,56,0,193
219.byte	102,15,56,0,241
220	andq	$0x30,%r11
221	subq	$1,%rax
222	pxor	%xmm3,%xmm0
223	pxor	%xmm11,%xmm6
224
225L$enc2x_entry:
226
227	movdqa	%xmm9,%xmm1
228	movdqa	%xmm9,%xmm7
229	movdqa	L$k_inv+16(%rip),%xmm5
230	movdqa	%xmm5,%xmm13
231	pandn	%xmm0,%xmm1
232	pandn	%xmm6,%xmm7
233	psrld	$4,%xmm1
234	psrld	$4,%xmm7
235	pand	%xmm9,%xmm0
236	pand	%xmm9,%xmm6
237.byte	102,15,56,0,232
238.byte	102,68,15,56,0,238
239	movdqa	%xmm10,%xmm3
240	movdqa	%xmm10,%xmm11
241	pxor	%xmm1,%xmm0
242	pxor	%xmm7,%xmm6
243.byte	102,15,56,0,217
244.byte	102,68,15,56,0,223
245	movdqa	%xmm10,%xmm4
246	movdqa	%xmm10,%xmm12
247	pxor	%xmm5,%xmm3
248	pxor	%xmm13,%xmm11
249.byte	102,15,56,0,224
250.byte	102,68,15,56,0,230
251	movdqa	%xmm10,%xmm2
252	movdqa	%xmm10,%xmm8
253	pxor	%xmm5,%xmm4
254	pxor	%xmm13,%xmm12
255.byte	102,15,56,0,211
256.byte	102,69,15,56,0,195
257	movdqa	%xmm10,%xmm3
258	movdqa	%xmm10,%xmm11
259	pxor	%xmm0,%xmm2
260	pxor	%xmm6,%xmm8
261.byte	102,15,56,0,220
262.byte	102,69,15,56,0,220
263	movdqu	(%r9),%xmm5
264
265	pxor	%xmm1,%xmm3
266	pxor	%xmm7,%xmm11
267	jnz	L$enc2x_loop
268
269
270	movdqa	-96(%r10),%xmm4
271	movdqa	-80(%r10),%xmm0
272	movdqa	%xmm4,%xmm12
273	movdqa	%xmm0,%xmm6
274.byte	102,15,56,0,226
275.byte	102,69,15,56,0,224
276	pxor	%xmm5,%xmm4
277	pxor	%xmm5,%xmm12
278.byte	102,15,56,0,195
279.byte	102,65,15,56,0,243
280	movdqa	64(%r11,%r10,1),%xmm1
281
282	pxor	%xmm4,%xmm0
283	pxor	%xmm12,%xmm6
284.byte	102,15,56,0,193
285.byte	102,15,56,0,241
286	.byte	0xf3,0xc3
287
288
289
290
291
292
293
294
295
296.p2align	4
297_vpaes_schedule_core:
298
299
300
301
302
303
304	call	_vpaes_preheat
305	movdqa	L$k_rcon(%rip),%xmm8
306	movdqu	(%rdi),%xmm0
307
308
309	movdqa	%xmm0,%xmm3
310	leaq	L$k_ipt(%rip),%r11
311	call	_vpaes_schedule_transform
312	movdqa	%xmm0,%xmm7
313
314	leaq	L$k_sr(%rip),%r10
315
316
317	movdqu	%xmm0,(%rdx)
318
319L$schedule_go:
320	cmpl	$192,%esi
321	ja	L$schedule_256
322
323
324
325
326
327
328
329
330
331
332
333L$schedule_128:
334	movl	$10,%esi
335
336L$oop_schedule_128:
337	call	_vpaes_schedule_round
338	decq	%rsi
339	jz	L$schedule_mangle_last
340	call	_vpaes_schedule_mangle
341	jmp	L$oop_schedule_128
342
343
344
345
346
347
348
349
350
351
352
353.p2align	4
354L$schedule_256:
355	movdqu	16(%rdi),%xmm0
356	call	_vpaes_schedule_transform
357	movl	$7,%esi
358
359L$oop_schedule_256:
360	call	_vpaes_schedule_mangle
361	movdqa	%xmm0,%xmm6
362
363
364	call	_vpaes_schedule_round
365	decq	%rsi
366	jz	L$schedule_mangle_last
367	call	_vpaes_schedule_mangle
368
369
370	pshufd	$0xFF,%xmm0,%xmm0
371	movdqa	%xmm7,%xmm5
372	movdqa	%xmm6,%xmm7
373	call	_vpaes_schedule_low_round
374	movdqa	%xmm5,%xmm7
375
376	jmp	L$oop_schedule_256
377
378
379
380
381
382
383
384
385
386
387
388
389.p2align	4
390L$schedule_mangle_last:
391
392	leaq	L$k_deskew(%rip),%r11
393
394
395	movdqa	(%r8,%r10,1),%xmm1
396.byte	102,15,56,0,193
397	leaq	L$k_opt(%rip),%r11
398	addq	$32,%rdx
399
400L$schedule_mangle_last_dec:
401	addq	$-16,%rdx
402	pxor	L$k_s63(%rip),%xmm0
403	call	_vpaes_schedule_transform
404	movdqu	%xmm0,(%rdx)
405
406
407	pxor	%xmm0,%xmm0
408	pxor	%xmm1,%xmm1
409	pxor	%xmm2,%xmm2
410	pxor	%xmm3,%xmm3
411	pxor	%xmm4,%xmm4
412	pxor	%xmm5,%xmm5
413	pxor	%xmm6,%xmm6
414	pxor	%xmm7,%xmm7
415	.byte	0xf3,0xc3
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438.p2align	4
439_vpaes_schedule_round:
440
441
442	pxor	%xmm1,%xmm1
443.byte	102,65,15,58,15,200,15
444.byte	102,69,15,58,15,192,15
445	pxor	%xmm1,%xmm7
446
447
448	pshufd	$0xFF,%xmm0,%xmm0
449.byte	102,15,58,15,192,1
450
451
452
453
454_vpaes_schedule_low_round:
455
456	movdqa	%xmm7,%xmm1
457	pslldq	$4,%xmm7
458	pxor	%xmm1,%xmm7
459	movdqa	%xmm7,%xmm1
460	pslldq	$8,%xmm7
461	pxor	%xmm1,%xmm7
462	pxor	L$k_s63(%rip),%xmm7
463
464
465	movdqa	%xmm9,%xmm1
466	pandn	%xmm0,%xmm1
467	psrld	$4,%xmm1
468	pand	%xmm9,%xmm0
469	movdqa	%xmm11,%xmm2
470.byte	102,15,56,0,208
471	pxor	%xmm1,%xmm0
472	movdqa	%xmm10,%xmm3
473.byte	102,15,56,0,217
474	pxor	%xmm2,%xmm3
475	movdqa	%xmm10,%xmm4
476.byte	102,15,56,0,224
477	pxor	%xmm2,%xmm4
478	movdqa	%xmm10,%xmm2
479.byte	102,15,56,0,211
480	pxor	%xmm0,%xmm2
481	movdqa	%xmm10,%xmm3
482.byte	102,15,56,0,220
483	pxor	%xmm1,%xmm3
484	movdqa	%xmm13,%xmm4
485.byte	102,15,56,0,226
486	movdqa	%xmm12,%xmm0
487.byte	102,15,56,0,195
488	pxor	%xmm4,%xmm0
489
490
491	pxor	%xmm7,%xmm0
492	movdqa	%xmm0,%xmm7
493	.byte	0xf3,0xc3
494
495
496
497
498
499
500
501
502
503
504
505
506
507.p2align	4
508_vpaes_schedule_transform:
509
510	movdqa	%xmm9,%xmm1
511	pandn	%xmm0,%xmm1
512	psrld	$4,%xmm1
513	pand	%xmm9,%xmm0
514	movdqa	(%r11),%xmm2
515.byte	102,15,56,0,208
516	movdqa	16(%r11),%xmm0
517.byte	102,15,56,0,193
518	pxor	%xmm2,%xmm0
519	.byte	0xf3,0xc3
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547.p2align	4
548_vpaes_schedule_mangle:
549
550	movdqa	%xmm0,%xmm4
551	movdqa	L$k_mc_forward(%rip),%xmm5
552
553
554	addq	$16,%rdx
555	pxor	L$k_s63(%rip),%xmm4
556.byte	102,15,56,0,229
557	movdqa	%xmm4,%xmm3
558.byte	102,15,56,0,229
559	pxor	%xmm4,%xmm3
560.byte	102,15,56,0,229
561	pxor	%xmm4,%xmm3
562
563L$schedule_mangle_both:
564	movdqa	(%r8,%r10,1),%xmm1
565.byte	102,15,56,0,217
566	addq	$-16,%r8
567	andq	$0x30,%r8
568	movdqu	%xmm3,(%rdx)
569	.byte	0xf3,0xc3
570
571
572
573
574
575
576.globl	_GFp_vpaes_set_encrypt_key
577.private_extern _GFp_vpaes_set_encrypt_key
578
579.p2align	4
580_GFp_vpaes_set_encrypt_key:
581
582#ifdef BORINGSSL_DISPATCH_TEST
583
584	movb	$1,_BORINGSSL_function_hit+5(%rip)
585#endif
586
587	movl	%esi,%eax
588	shrl	$5,%eax
589	addl	$5,%eax
590	movl	%eax,240(%rdx)
591
592	movl	$0,%ecx
593	movl	$0x30,%r8d
594	call	_vpaes_schedule_core
595	xorl	%eax,%eax
596	.byte	0xf3,0xc3
597
598
599
600.globl	_GFp_vpaes_encrypt
601.private_extern _GFp_vpaes_encrypt
602
603.p2align	4
604_GFp_vpaes_encrypt:
605
606	movdqu	(%rdi),%xmm0
607	call	_vpaes_preheat
608	call	_vpaes_encrypt_core
609	movdqu	%xmm0,(%rsi)
610	.byte	0xf3,0xc3
611
612
613.globl	_GFp_vpaes_ctr32_encrypt_blocks
614.private_extern _GFp_vpaes_ctr32_encrypt_blocks
615
616.p2align	4
617_GFp_vpaes_ctr32_encrypt_blocks:
618
619
620	xchgq	%rcx,%rdx
621	testq	%rcx,%rcx
622	jz	L$ctr32_abort
623	movdqu	(%r8),%xmm0
624	movdqa	L$ctr_add_one(%rip),%xmm8
625	subq	%rdi,%rsi
626	call	_vpaes_preheat
627	movdqa	%xmm0,%xmm6
628	pshufb	L$rev_ctr(%rip),%xmm6
629
630	testq	$1,%rcx
631	jz	L$ctr32_prep_loop
632
633
634
635	movdqu	(%rdi),%xmm7
636	call	_vpaes_encrypt_core
637	pxor	%xmm7,%xmm0
638	paddd	%xmm8,%xmm6
639	movdqu	%xmm0,(%rsi,%rdi,1)
640	subq	$1,%rcx
641	leaq	16(%rdi),%rdi
642	jz	L$ctr32_done
643
644L$ctr32_prep_loop:
645
646
647	movdqa	%xmm6,%xmm14
648	movdqa	%xmm6,%xmm15
649	paddd	%xmm8,%xmm15
650
651L$ctr32_loop:
652	movdqa	L$rev_ctr(%rip),%xmm1
653	movdqa	%xmm14,%xmm0
654	movdqa	%xmm15,%xmm6
655.byte	102,15,56,0,193
656.byte	102,15,56,0,241
657	call	_vpaes_encrypt_core_2x
658	movdqu	(%rdi),%xmm1
659	movdqu	16(%rdi),%xmm2
660	movdqa	L$ctr_add_two(%rip),%xmm3
661	pxor	%xmm1,%xmm0
662	pxor	%xmm2,%xmm6
663	paddd	%xmm3,%xmm14
664	paddd	%xmm3,%xmm15
665	movdqu	%xmm0,(%rsi,%rdi,1)
666	movdqu	%xmm6,16(%rsi,%rdi,1)
667	subq	$2,%rcx
668	leaq	32(%rdi),%rdi
669	jnz	L$ctr32_loop
670
671L$ctr32_done:
672L$ctr32_abort:
673	.byte	0xf3,0xc3
674
675
676
677
678
679
680
681
682
683.p2align	4
684_vpaes_preheat:
685
686	leaq	L$k_s0F(%rip),%r10
687	movdqa	-32(%r10),%xmm10
688	movdqa	-16(%r10),%xmm11
689	movdqa	0(%r10),%xmm9
690	movdqa	48(%r10),%xmm13
691	movdqa	64(%r10),%xmm12
692	movdqa	80(%r10),%xmm15
693	movdqa	96(%r10),%xmm14
694	.byte	0xf3,0xc3
695
696
697
698
699
700
701
702
703.p2align	6
704_vpaes_consts:
705L$k_inv:
706.quad	0x0E05060F0D080180, 0x040703090A0B0C02
707.quad	0x01040A060F0B0780, 0x030D0E0C02050809
708
709L$k_s0F:
710.quad	0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
711
712L$k_ipt:
713.quad	0xC2B2E8985A2A7000, 0xCABAE09052227808
714.quad	0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
715
716L$k_sb1:
717.quad	0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
718.quad	0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
719L$k_sb2:
720.quad	0xE27A93C60B712400, 0x5EB7E955BC982FCD
721.quad	0x69EB88400AE12900, 0xC2A163C8AB82234A
722L$k_sbo:
723.quad	0xD0D26D176FBDC700, 0x15AABF7AC502A878
724.quad	0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
725
726L$k_mc_forward:
727.quad	0x0407060500030201, 0x0C0F0E0D080B0A09
728.quad	0x080B0A0904070605, 0x000302010C0F0E0D
729.quad	0x0C0F0E0D080B0A09, 0x0407060500030201
730.quad	0x000302010C0F0E0D, 0x080B0A0904070605
731
732L$k_mc_backward:
733.quad	0x0605040702010003, 0x0E0D0C0F0A09080B
734.quad	0x020100030E0D0C0F, 0x0A09080B06050407
735.quad	0x0E0D0C0F0A09080B, 0x0605040702010003
736.quad	0x0A09080B06050407, 0x020100030E0D0C0F
737
738L$k_sr:
739.quad	0x0706050403020100, 0x0F0E0D0C0B0A0908
740.quad	0x030E09040F0A0500, 0x0B06010C07020D08
741.quad	0x0F060D040B020900, 0x070E050C030A0108
742.quad	0x0B0E0104070A0D00, 0x0306090C0F020508
743
744L$k_rcon:
745.quad	0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
746
747L$k_s63:
748.quad	0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
749
750L$k_opt:
751.quad	0xFF9F4929D6B66000, 0xF7974121DEBE6808
752.quad	0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
753
754L$k_deskew:
755.quad	0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
756.quad	0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
757
758
759L$rev_ctr:
760.quad	0x0706050403020100, 0x0c0d0e0f0b0a0908
761
762
763L$ctr_add_one:
764.quad	0x0000000000000000, 0x0000000100000000
765L$ctr_add_two:
766.quad	0x0000000000000000, 0x0000000200000000
767
768.byte	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
769.p2align	6
770
771#endif
772