1default	rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section	.text code align=64
6
7EXTERN	OPENSSL_ia32cap_P
8global	aesni_encrypt
9
10ALIGN	16
11aesni_encrypt:
12	movups	xmm2,XMMWORD[rcx]
13	mov	eax,DWORD[240+r8]
14	movups	xmm0,XMMWORD[r8]
15	movups	xmm1,XMMWORD[16+r8]
16	lea	r8,[32+r8]
17	xorps	xmm2,xmm0
18$L$oop_enc1_1:
19DB	102,15,56,220,209
20	dec	eax
21	movups	xmm1,XMMWORD[r8]
22	lea	r8,[16+r8]
23	jnz	NEAR $L$oop_enc1_1
24DB	102,15,56,221,209
25	pxor	xmm0,xmm0
26	pxor	xmm1,xmm1
27	movups	XMMWORD[rdx],xmm2
28	pxor	xmm2,xmm2
29	DB	0F3h,0C3h		;repret
30
31
32global	aesni_decrypt
33
34ALIGN	16
35aesni_decrypt:
36	movups	xmm2,XMMWORD[rcx]
37	mov	eax,DWORD[240+r8]
38	movups	xmm0,XMMWORD[r8]
39	movups	xmm1,XMMWORD[16+r8]
40	lea	r8,[32+r8]
41	xorps	xmm2,xmm0
42$L$oop_dec1_2:
43DB	102,15,56,222,209
44	dec	eax
45	movups	xmm1,XMMWORD[r8]
46	lea	r8,[16+r8]
47	jnz	NEAR $L$oop_dec1_2
48DB	102,15,56,223,209
49	pxor	xmm0,xmm0
50	pxor	xmm1,xmm1
51	movups	XMMWORD[rdx],xmm2
52	pxor	xmm2,xmm2
53	DB	0F3h,0C3h		;repret
54
55
56ALIGN	16
57_aesni_encrypt2:
58	movups	xmm0,XMMWORD[rcx]
59	shl	eax,4
60	movups	xmm1,XMMWORD[16+rcx]
61	xorps	xmm2,xmm0
62	xorps	xmm3,xmm0
63	movups	xmm0,XMMWORD[32+rcx]
64	lea	rcx,[32+rax*1+rcx]
65	neg	rax
66	add	rax,16
67
68$L$enc_loop2:
69DB	102,15,56,220,209
70DB	102,15,56,220,217
71	movups	xmm1,XMMWORD[rax*1+rcx]
72	add	rax,32
73DB	102,15,56,220,208
74DB	102,15,56,220,216
75	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
76	jnz	NEAR $L$enc_loop2
77
78DB	102,15,56,220,209
79DB	102,15,56,220,217
80DB	102,15,56,221,208
81DB	102,15,56,221,216
82	DB	0F3h,0C3h		;repret
83
84
85ALIGN	16
86_aesni_decrypt2:
87	movups	xmm0,XMMWORD[rcx]
88	shl	eax,4
89	movups	xmm1,XMMWORD[16+rcx]
90	xorps	xmm2,xmm0
91	xorps	xmm3,xmm0
92	movups	xmm0,XMMWORD[32+rcx]
93	lea	rcx,[32+rax*1+rcx]
94	neg	rax
95	add	rax,16
96
97$L$dec_loop2:
98DB	102,15,56,222,209
99DB	102,15,56,222,217
100	movups	xmm1,XMMWORD[rax*1+rcx]
101	add	rax,32
102DB	102,15,56,222,208
103DB	102,15,56,222,216
104	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
105	jnz	NEAR $L$dec_loop2
106
107DB	102,15,56,222,209
108DB	102,15,56,222,217
109DB	102,15,56,223,208
110DB	102,15,56,223,216
111	DB	0F3h,0C3h		;repret
112
113
114ALIGN	16
115_aesni_encrypt3:
116	movups	xmm0,XMMWORD[rcx]
117	shl	eax,4
118	movups	xmm1,XMMWORD[16+rcx]
119	xorps	xmm2,xmm0
120	xorps	xmm3,xmm0
121	xorps	xmm4,xmm0
122	movups	xmm0,XMMWORD[32+rcx]
123	lea	rcx,[32+rax*1+rcx]
124	neg	rax
125	add	rax,16
126
127$L$enc_loop3:
128DB	102,15,56,220,209
129DB	102,15,56,220,217
130DB	102,15,56,220,225
131	movups	xmm1,XMMWORD[rax*1+rcx]
132	add	rax,32
133DB	102,15,56,220,208
134DB	102,15,56,220,216
135DB	102,15,56,220,224
136	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
137	jnz	NEAR $L$enc_loop3
138
139DB	102,15,56,220,209
140DB	102,15,56,220,217
141DB	102,15,56,220,225
142DB	102,15,56,221,208
143DB	102,15,56,221,216
144DB	102,15,56,221,224
145	DB	0F3h,0C3h		;repret
146
147
148ALIGN	16
149_aesni_decrypt3:
150	movups	xmm0,XMMWORD[rcx]
151	shl	eax,4
152	movups	xmm1,XMMWORD[16+rcx]
153	xorps	xmm2,xmm0
154	xorps	xmm3,xmm0
155	xorps	xmm4,xmm0
156	movups	xmm0,XMMWORD[32+rcx]
157	lea	rcx,[32+rax*1+rcx]
158	neg	rax
159	add	rax,16
160
161$L$dec_loop3:
162DB	102,15,56,222,209
163DB	102,15,56,222,217
164DB	102,15,56,222,225
165	movups	xmm1,XMMWORD[rax*1+rcx]
166	add	rax,32
167DB	102,15,56,222,208
168DB	102,15,56,222,216
169DB	102,15,56,222,224
170	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
171	jnz	NEAR $L$dec_loop3
172
173DB	102,15,56,222,209
174DB	102,15,56,222,217
175DB	102,15,56,222,225
176DB	102,15,56,223,208
177DB	102,15,56,223,216
178DB	102,15,56,223,224
179	DB	0F3h,0C3h		;repret
180
181
182ALIGN	16
183_aesni_encrypt4:
184	movups	xmm0,XMMWORD[rcx]
185	shl	eax,4
186	movups	xmm1,XMMWORD[16+rcx]
187	xorps	xmm2,xmm0
188	xorps	xmm3,xmm0
189	xorps	xmm4,xmm0
190	xorps	xmm5,xmm0
191	movups	xmm0,XMMWORD[32+rcx]
192	lea	rcx,[32+rax*1+rcx]
193	neg	rax
194DB	0x0f,0x1f,0x00
195	add	rax,16
196
197$L$enc_loop4:
198DB	102,15,56,220,209
199DB	102,15,56,220,217
200DB	102,15,56,220,225
201DB	102,15,56,220,233
202	movups	xmm1,XMMWORD[rax*1+rcx]
203	add	rax,32
204DB	102,15,56,220,208
205DB	102,15,56,220,216
206DB	102,15,56,220,224
207DB	102,15,56,220,232
208	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
209	jnz	NEAR $L$enc_loop4
210
211DB	102,15,56,220,209
212DB	102,15,56,220,217
213DB	102,15,56,220,225
214DB	102,15,56,220,233
215DB	102,15,56,221,208
216DB	102,15,56,221,216
217DB	102,15,56,221,224
218DB	102,15,56,221,232
219	DB	0F3h,0C3h		;repret
220
221
222ALIGN	16
223_aesni_decrypt4:
224	movups	xmm0,XMMWORD[rcx]
225	shl	eax,4
226	movups	xmm1,XMMWORD[16+rcx]
227	xorps	xmm2,xmm0
228	xorps	xmm3,xmm0
229	xorps	xmm4,xmm0
230	xorps	xmm5,xmm0
231	movups	xmm0,XMMWORD[32+rcx]
232	lea	rcx,[32+rax*1+rcx]
233	neg	rax
234DB	0x0f,0x1f,0x00
235	add	rax,16
236
237$L$dec_loop4:
238DB	102,15,56,222,209
239DB	102,15,56,222,217
240DB	102,15,56,222,225
241DB	102,15,56,222,233
242	movups	xmm1,XMMWORD[rax*1+rcx]
243	add	rax,32
244DB	102,15,56,222,208
245DB	102,15,56,222,216
246DB	102,15,56,222,224
247DB	102,15,56,222,232
248	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
249	jnz	NEAR $L$dec_loop4
250
251DB	102,15,56,222,209
252DB	102,15,56,222,217
253DB	102,15,56,222,225
254DB	102,15,56,222,233
255DB	102,15,56,223,208
256DB	102,15,56,223,216
257DB	102,15,56,223,224
258DB	102,15,56,223,232
259	DB	0F3h,0C3h		;repret
260
261
262ALIGN	16
263_aesni_encrypt6:
264	movups	xmm0,XMMWORD[rcx]
265	shl	eax,4
266	movups	xmm1,XMMWORD[16+rcx]
267	xorps	xmm2,xmm0
268	pxor	xmm3,xmm0
269	pxor	xmm4,xmm0
270DB	102,15,56,220,209
271	lea	rcx,[32+rax*1+rcx]
272	neg	rax
273DB	102,15,56,220,217
274	pxor	xmm5,xmm0
275	pxor	xmm6,xmm0
276DB	102,15,56,220,225
277	pxor	xmm7,xmm0
278	movups	xmm0,XMMWORD[rax*1+rcx]
279	add	rax,16
280	jmp	NEAR $L$enc_loop6_enter
281ALIGN	16
282$L$enc_loop6:
283DB	102,15,56,220,209
284DB	102,15,56,220,217
285DB	102,15,56,220,225
286$L$enc_loop6_enter:
287DB	102,15,56,220,233
288DB	102,15,56,220,241
289DB	102,15,56,220,249
290	movups	xmm1,XMMWORD[rax*1+rcx]
291	add	rax,32
292DB	102,15,56,220,208
293DB	102,15,56,220,216
294DB	102,15,56,220,224
295DB	102,15,56,220,232
296DB	102,15,56,220,240
297DB	102,15,56,220,248
298	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
299	jnz	NEAR $L$enc_loop6
300
301DB	102,15,56,220,209
302DB	102,15,56,220,217
303DB	102,15,56,220,225
304DB	102,15,56,220,233
305DB	102,15,56,220,241
306DB	102,15,56,220,249
307DB	102,15,56,221,208
308DB	102,15,56,221,216
309DB	102,15,56,221,224
310DB	102,15,56,221,232
311DB	102,15,56,221,240
312DB	102,15,56,221,248
313	DB	0F3h,0C3h		;repret
314
315
316ALIGN	16
317_aesni_decrypt6:
318	movups	xmm0,XMMWORD[rcx]
319	shl	eax,4
320	movups	xmm1,XMMWORD[16+rcx]
321	xorps	xmm2,xmm0
322	pxor	xmm3,xmm0
323	pxor	xmm4,xmm0
324DB	102,15,56,222,209
325	lea	rcx,[32+rax*1+rcx]
326	neg	rax
327DB	102,15,56,222,217
328	pxor	xmm5,xmm0
329	pxor	xmm6,xmm0
330DB	102,15,56,222,225
331	pxor	xmm7,xmm0
332	movups	xmm0,XMMWORD[rax*1+rcx]
333	add	rax,16
334	jmp	NEAR $L$dec_loop6_enter
335ALIGN	16
336$L$dec_loop6:
337DB	102,15,56,222,209
338DB	102,15,56,222,217
339DB	102,15,56,222,225
340$L$dec_loop6_enter:
341DB	102,15,56,222,233
342DB	102,15,56,222,241
343DB	102,15,56,222,249
344	movups	xmm1,XMMWORD[rax*1+rcx]
345	add	rax,32
346DB	102,15,56,222,208
347DB	102,15,56,222,216
348DB	102,15,56,222,224
349DB	102,15,56,222,232
350DB	102,15,56,222,240
351DB	102,15,56,222,248
352	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
353	jnz	NEAR $L$dec_loop6
354
355DB	102,15,56,222,209
356DB	102,15,56,222,217
357DB	102,15,56,222,225
358DB	102,15,56,222,233
359DB	102,15,56,222,241
360DB	102,15,56,222,249
361DB	102,15,56,223,208
362DB	102,15,56,223,216
363DB	102,15,56,223,224
364DB	102,15,56,223,232
365DB	102,15,56,223,240
366DB	102,15,56,223,248
367	DB	0F3h,0C3h		;repret
368
369
370ALIGN	16
371_aesni_encrypt8:
372	movups	xmm0,XMMWORD[rcx]
373	shl	eax,4
374	movups	xmm1,XMMWORD[16+rcx]
375	xorps	xmm2,xmm0
376	xorps	xmm3,xmm0
377	pxor	xmm4,xmm0
378	pxor	xmm5,xmm0
379	pxor	xmm6,xmm0
380	lea	rcx,[32+rax*1+rcx]
381	neg	rax
382DB	102,15,56,220,209
383	pxor	xmm7,xmm0
384	pxor	xmm8,xmm0
385DB	102,15,56,220,217
386	pxor	xmm9,xmm0
387	movups	xmm0,XMMWORD[rax*1+rcx]
388	add	rax,16
389	jmp	NEAR $L$enc_loop8_inner
390ALIGN	16
391$L$enc_loop8:
392DB	102,15,56,220,209
393DB	102,15,56,220,217
394$L$enc_loop8_inner:
395DB	102,15,56,220,225
396DB	102,15,56,220,233
397DB	102,15,56,220,241
398DB	102,15,56,220,249
399DB	102,68,15,56,220,193
400DB	102,68,15,56,220,201
401$L$enc_loop8_enter:
402	movups	xmm1,XMMWORD[rax*1+rcx]
403	add	rax,32
404DB	102,15,56,220,208
405DB	102,15,56,220,216
406DB	102,15,56,220,224
407DB	102,15,56,220,232
408DB	102,15,56,220,240
409DB	102,15,56,220,248
410DB	102,68,15,56,220,192
411DB	102,68,15,56,220,200
412	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
413	jnz	NEAR $L$enc_loop8
414
415DB	102,15,56,220,209
416DB	102,15,56,220,217
417DB	102,15,56,220,225
418DB	102,15,56,220,233
419DB	102,15,56,220,241
420DB	102,15,56,220,249
421DB	102,68,15,56,220,193
422DB	102,68,15,56,220,201
423DB	102,15,56,221,208
424DB	102,15,56,221,216
425DB	102,15,56,221,224
426DB	102,15,56,221,232
427DB	102,15,56,221,240
428DB	102,15,56,221,248
429DB	102,68,15,56,221,192
430DB	102,68,15,56,221,200
431	DB	0F3h,0C3h		;repret
432
433
434ALIGN	16
435_aesni_decrypt8:
436	movups	xmm0,XMMWORD[rcx]
437	shl	eax,4
438	movups	xmm1,XMMWORD[16+rcx]
439	xorps	xmm2,xmm0
440	xorps	xmm3,xmm0
441	pxor	xmm4,xmm0
442	pxor	xmm5,xmm0
443	pxor	xmm6,xmm0
444	lea	rcx,[32+rax*1+rcx]
445	neg	rax
446DB	102,15,56,222,209
447	pxor	xmm7,xmm0
448	pxor	xmm8,xmm0
449DB	102,15,56,222,217
450	pxor	xmm9,xmm0
451	movups	xmm0,XMMWORD[rax*1+rcx]
452	add	rax,16
453	jmp	NEAR $L$dec_loop8_inner
454ALIGN	16
455$L$dec_loop8:
456DB	102,15,56,222,209
457DB	102,15,56,222,217
458$L$dec_loop8_inner:
459DB	102,15,56,222,225
460DB	102,15,56,222,233
461DB	102,15,56,222,241
462DB	102,15,56,222,249
463DB	102,68,15,56,222,193
464DB	102,68,15,56,222,201
465$L$dec_loop8_enter:
466	movups	xmm1,XMMWORD[rax*1+rcx]
467	add	rax,32
468DB	102,15,56,222,208
469DB	102,15,56,222,216
470DB	102,15,56,222,224
471DB	102,15,56,222,232
472DB	102,15,56,222,240
473DB	102,15,56,222,248
474DB	102,68,15,56,222,192
475DB	102,68,15,56,222,200
476	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
477	jnz	NEAR $L$dec_loop8
478
479DB	102,15,56,222,209
480DB	102,15,56,222,217
481DB	102,15,56,222,225
482DB	102,15,56,222,233
483DB	102,15,56,222,241
484DB	102,15,56,222,249
485DB	102,68,15,56,222,193
486DB	102,68,15,56,222,201
487DB	102,15,56,223,208
488DB	102,15,56,223,216
489DB	102,15,56,223,224
490DB	102,15,56,223,232
491DB	102,15,56,223,240
492DB	102,15,56,223,248
493DB	102,68,15,56,223,192
494DB	102,68,15,56,223,200
495	DB	0F3h,0C3h		;repret
496
497global	aesni_ecb_encrypt
498
499ALIGN	16
500aesni_ecb_encrypt:
501	mov	QWORD[8+rsp],rdi	;WIN64 prologue
502	mov	QWORD[16+rsp],rsi
503	mov	rax,rsp
504$L$SEH_begin_aesni_ecb_encrypt:
505	mov	rdi,rcx
506	mov	rsi,rdx
507	mov	rdx,r8
508	mov	rcx,r9
509	mov	r8,QWORD[40+rsp]
510
511
512	lea	rsp,[((-88))+rsp]
513	movaps	XMMWORD[rsp],xmm6
514	movaps	XMMWORD[16+rsp],xmm7
515	movaps	XMMWORD[32+rsp],xmm8
516	movaps	XMMWORD[48+rsp],xmm9
517$L$ecb_enc_body:
518	and	rdx,-16
519	jz	NEAR $L$ecb_ret
520
521	mov	eax,DWORD[240+rcx]
522	movups	xmm0,XMMWORD[rcx]
523	mov	r11,rcx
524	mov	r10d,eax
525	test	r8d,r8d
526	jz	NEAR $L$ecb_decrypt
527
528	cmp	rdx,0x80
529	jb	NEAR $L$ecb_enc_tail
530
531	movdqu	xmm2,XMMWORD[rdi]
532	movdqu	xmm3,XMMWORD[16+rdi]
533	movdqu	xmm4,XMMWORD[32+rdi]
534	movdqu	xmm5,XMMWORD[48+rdi]
535	movdqu	xmm6,XMMWORD[64+rdi]
536	movdqu	xmm7,XMMWORD[80+rdi]
537	movdqu	xmm8,XMMWORD[96+rdi]
538	movdqu	xmm9,XMMWORD[112+rdi]
539	lea	rdi,[128+rdi]
540	sub	rdx,0x80
541	jmp	NEAR $L$ecb_enc_loop8_enter
542ALIGN	16
543$L$ecb_enc_loop8:
544	movups	XMMWORD[rsi],xmm2
545	mov	rcx,r11
546	movdqu	xmm2,XMMWORD[rdi]
547	mov	eax,r10d
548	movups	XMMWORD[16+rsi],xmm3
549	movdqu	xmm3,XMMWORD[16+rdi]
550	movups	XMMWORD[32+rsi],xmm4
551	movdqu	xmm4,XMMWORD[32+rdi]
552	movups	XMMWORD[48+rsi],xmm5
553	movdqu	xmm5,XMMWORD[48+rdi]
554	movups	XMMWORD[64+rsi],xmm6
555	movdqu	xmm6,XMMWORD[64+rdi]
556	movups	XMMWORD[80+rsi],xmm7
557	movdqu	xmm7,XMMWORD[80+rdi]
558	movups	XMMWORD[96+rsi],xmm8
559	movdqu	xmm8,XMMWORD[96+rdi]
560	movups	XMMWORD[112+rsi],xmm9
561	lea	rsi,[128+rsi]
562	movdqu	xmm9,XMMWORD[112+rdi]
563	lea	rdi,[128+rdi]
564$L$ecb_enc_loop8_enter:
565
566	call	_aesni_encrypt8
567
568	sub	rdx,0x80
569	jnc	NEAR $L$ecb_enc_loop8
570
571	movups	XMMWORD[rsi],xmm2
572	mov	rcx,r11
573	movups	XMMWORD[16+rsi],xmm3
574	mov	eax,r10d
575	movups	XMMWORD[32+rsi],xmm4
576	movups	XMMWORD[48+rsi],xmm5
577	movups	XMMWORD[64+rsi],xmm6
578	movups	XMMWORD[80+rsi],xmm7
579	movups	XMMWORD[96+rsi],xmm8
580	movups	XMMWORD[112+rsi],xmm9
581	lea	rsi,[128+rsi]
582	add	rdx,0x80
583	jz	NEAR $L$ecb_ret
584
585$L$ecb_enc_tail:
586	movups	xmm2,XMMWORD[rdi]
587	cmp	rdx,0x20
588	jb	NEAR $L$ecb_enc_one
589	movups	xmm3,XMMWORD[16+rdi]
590	je	NEAR $L$ecb_enc_two
591	movups	xmm4,XMMWORD[32+rdi]
592	cmp	rdx,0x40
593	jb	NEAR $L$ecb_enc_three
594	movups	xmm5,XMMWORD[48+rdi]
595	je	NEAR $L$ecb_enc_four
596	movups	xmm6,XMMWORD[64+rdi]
597	cmp	rdx,0x60
598	jb	NEAR $L$ecb_enc_five
599	movups	xmm7,XMMWORD[80+rdi]
600	je	NEAR $L$ecb_enc_six
601	movdqu	xmm8,XMMWORD[96+rdi]
602	xorps	xmm9,xmm9
603	call	_aesni_encrypt8
604	movups	XMMWORD[rsi],xmm2
605	movups	XMMWORD[16+rsi],xmm3
606	movups	XMMWORD[32+rsi],xmm4
607	movups	XMMWORD[48+rsi],xmm5
608	movups	XMMWORD[64+rsi],xmm6
609	movups	XMMWORD[80+rsi],xmm7
610	movups	XMMWORD[96+rsi],xmm8
611	jmp	NEAR $L$ecb_ret
612ALIGN	16
613$L$ecb_enc_one:
614	movups	xmm0,XMMWORD[rcx]
615	movups	xmm1,XMMWORD[16+rcx]
616	lea	rcx,[32+rcx]
617	xorps	xmm2,xmm0
618$L$oop_enc1_3:
619DB	102,15,56,220,209
620	dec	eax
621	movups	xmm1,XMMWORD[rcx]
622	lea	rcx,[16+rcx]
623	jnz	NEAR $L$oop_enc1_3
624DB	102,15,56,221,209
625	movups	XMMWORD[rsi],xmm2
626	jmp	NEAR $L$ecb_ret
627ALIGN	16
628$L$ecb_enc_two:
629	call	_aesni_encrypt2
630	movups	XMMWORD[rsi],xmm2
631	movups	XMMWORD[16+rsi],xmm3
632	jmp	NEAR $L$ecb_ret
633ALIGN	16
634$L$ecb_enc_three:
635	call	_aesni_encrypt3
636	movups	XMMWORD[rsi],xmm2
637	movups	XMMWORD[16+rsi],xmm3
638	movups	XMMWORD[32+rsi],xmm4
639	jmp	NEAR $L$ecb_ret
640ALIGN	16
641$L$ecb_enc_four:
642	call	_aesni_encrypt4
643	movups	XMMWORD[rsi],xmm2
644	movups	XMMWORD[16+rsi],xmm3
645	movups	XMMWORD[32+rsi],xmm4
646	movups	XMMWORD[48+rsi],xmm5
647	jmp	NEAR $L$ecb_ret
648ALIGN	16
649$L$ecb_enc_five:
650	xorps	xmm7,xmm7
651	call	_aesni_encrypt6
652	movups	XMMWORD[rsi],xmm2
653	movups	XMMWORD[16+rsi],xmm3
654	movups	XMMWORD[32+rsi],xmm4
655	movups	XMMWORD[48+rsi],xmm5
656	movups	XMMWORD[64+rsi],xmm6
657	jmp	NEAR $L$ecb_ret
658ALIGN	16
659$L$ecb_enc_six:
660	call	_aesni_encrypt6
661	movups	XMMWORD[rsi],xmm2
662	movups	XMMWORD[16+rsi],xmm3
663	movups	XMMWORD[32+rsi],xmm4
664	movups	XMMWORD[48+rsi],xmm5
665	movups	XMMWORD[64+rsi],xmm6
666	movups	XMMWORD[80+rsi],xmm7
667	jmp	NEAR $L$ecb_ret
668
669ALIGN	16
670$L$ecb_decrypt:
671	cmp	rdx,0x80
672	jb	NEAR $L$ecb_dec_tail
673
674	movdqu	xmm2,XMMWORD[rdi]
675	movdqu	xmm3,XMMWORD[16+rdi]
676	movdqu	xmm4,XMMWORD[32+rdi]
677	movdqu	xmm5,XMMWORD[48+rdi]
678	movdqu	xmm6,XMMWORD[64+rdi]
679	movdqu	xmm7,XMMWORD[80+rdi]
680	movdqu	xmm8,XMMWORD[96+rdi]
681	movdqu	xmm9,XMMWORD[112+rdi]
682	lea	rdi,[128+rdi]
683	sub	rdx,0x80
684	jmp	NEAR $L$ecb_dec_loop8_enter
685ALIGN	16
686$L$ecb_dec_loop8:
687	movups	XMMWORD[rsi],xmm2
688	mov	rcx,r11
689	movdqu	xmm2,XMMWORD[rdi]
690	mov	eax,r10d
691	movups	XMMWORD[16+rsi],xmm3
692	movdqu	xmm3,XMMWORD[16+rdi]
693	movups	XMMWORD[32+rsi],xmm4
694	movdqu	xmm4,XMMWORD[32+rdi]
695	movups	XMMWORD[48+rsi],xmm5
696	movdqu	xmm5,XMMWORD[48+rdi]
697	movups	XMMWORD[64+rsi],xmm6
698	movdqu	xmm6,XMMWORD[64+rdi]
699	movups	XMMWORD[80+rsi],xmm7
700	movdqu	xmm7,XMMWORD[80+rdi]
701	movups	XMMWORD[96+rsi],xmm8
702	movdqu	xmm8,XMMWORD[96+rdi]
703	movups	XMMWORD[112+rsi],xmm9
704	lea	rsi,[128+rsi]
705	movdqu	xmm9,XMMWORD[112+rdi]
706	lea	rdi,[128+rdi]
707$L$ecb_dec_loop8_enter:
708
709	call	_aesni_decrypt8
710
711	movups	xmm0,XMMWORD[r11]
712	sub	rdx,0x80
713	jnc	NEAR $L$ecb_dec_loop8
714
715	movups	XMMWORD[rsi],xmm2
716	pxor	xmm2,xmm2
717	mov	rcx,r11
718	movups	XMMWORD[16+rsi],xmm3
719	pxor	xmm3,xmm3
720	mov	eax,r10d
721	movups	XMMWORD[32+rsi],xmm4
722	pxor	xmm4,xmm4
723	movups	XMMWORD[48+rsi],xmm5
724	pxor	xmm5,xmm5
725	movups	XMMWORD[64+rsi],xmm6
726	pxor	xmm6,xmm6
727	movups	XMMWORD[80+rsi],xmm7
728	pxor	xmm7,xmm7
729	movups	XMMWORD[96+rsi],xmm8
730	pxor	xmm8,xmm8
731	movups	XMMWORD[112+rsi],xmm9
732	pxor	xmm9,xmm9
733	lea	rsi,[128+rsi]
734	add	rdx,0x80
735	jz	NEAR $L$ecb_ret
736
737$L$ecb_dec_tail:
738	movups	xmm2,XMMWORD[rdi]
739	cmp	rdx,0x20
740	jb	NEAR $L$ecb_dec_one
741	movups	xmm3,XMMWORD[16+rdi]
742	je	NEAR $L$ecb_dec_two
743	movups	xmm4,XMMWORD[32+rdi]
744	cmp	rdx,0x40
745	jb	NEAR $L$ecb_dec_three
746	movups	xmm5,XMMWORD[48+rdi]
747	je	NEAR $L$ecb_dec_four
748	movups	xmm6,XMMWORD[64+rdi]
749	cmp	rdx,0x60
750	jb	NEAR $L$ecb_dec_five
751	movups	xmm7,XMMWORD[80+rdi]
752	je	NEAR $L$ecb_dec_six
753	movups	xmm8,XMMWORD[96+rdi]
754	movups	xmm0,XMMWORD[rcx]
755	xorps	xmm9,xmm9
756	call	_aesni_decrypt8
757	movups	XMMWORD[rsi],xmm2
758	pxor	xmm2,xmm2
759	movups	XMMWORD[16+rsi],xmm3
760	pxor	xmm3,xmm3
761	movups	XMMWORD[32+rsi],xmm4
762	pxor	xmm4,xmm4
763	movups	XMMWORD[48+rsi],xmm5
764	pxor	xmm5,xmm5
765	movups	XMMWORD[64+rsi],xmm6
766	pxor	xmm6,xmm6
767	movups	XMMWORD[80+rsi],xmm7
768	pxor	xmm7,xmm7
769	movups	XMMWORD[96+rsi],xmm8
770	pxor	xmm8,xmm8
771	pxor	xmm9,xmm9
772	jmp	NEAR $L$ecb_ret
773ALIGN	16
774$L$ecb_dec_one:
775	movups	xmm0,XMMWORD[rcx]
776	movups	xmm1,XMMWORD[16+rcx]
777	lea	rcx,[32+rcx]
778	xorps	xmm2,xmm0
779$L$oop_dec1_4:
780DB	102,15,56,222,209
781	dec	eax
782	movups	xmm1,XMMWORD[rcx]
783	lea	rcx,[16+rcx]
784	jnz	NEAR $L$oop_dec1_4
785DB	102,15,56,223,209
786	movups	XMMWORD[rsi],xmm2
787	pxor	xmm2,xmm2
788	jmp	NEAR $L$ecb_ret
789ALIGN	16
790$L$ecb_dec_two:
791	call	_aesni_decrypt2
792	movups	XMMWORD[rsi],xmm2
793	pxor	xmm2,xmm2
794	movups	XMMWORD[16+rsi],xmm3
795	pxor	xmm3,xmm3
796	jmp	NEAR $L$ecb_ret
797ALIGN	16
798$L$ecb_dec_three:
799	call	_aesni_decrypt3
800	movups	XMMWORD[rsi],xmm2
801	pxor	xmm2,xmm2
802	movups	XMMWORD[16+rsi],xmm3
803	pxor	xmm3,xmm3
804	movups	XMMWORD[32+rsi],xmm4
805	pxor	xmm4,xmm4
806	jmp	NEAR $L$ecb_ret
807ALIGN	16
808$L$ecb_dec_four:
809	call	_aesni_decrypt4
810	movups	XMMWORD[rsi],xmm2
811	pxor	xmm2,xmm2
812	movups	XMMWORD[16+rsi],xmm3
813	pxor	xmm3,xmm3
814	movups	XMMWORD[32+rsi],xmm4
815	pxor	xmm4,xmm4
816	movups	XMMWORD[48+rsi],xmm5
817	pxor	xmm5,xmm5
818	jmp	NEAR $L$ecb_ret
819ALIGN	16
820$L$ecb_dec_five:
821	xorps	xmm7,xmm7
822	call	_aesni_decrypt6
823	movups	XMMWORD[rsi],xmm2
824	pxor	xmm2,xmm2
825	movups	XMMWORD[16+rsi],xmm3
826	pxor	xmm3,xmm3
827	movups	XMMWORD[32+rsi],xmm4
828	pxor	xmm4,xmm4
829	movups	XMMWORD[48+rsi],xmm5
830	pxor	xmm5,xmm5
831	movups	XMMWORD[64+rsi],xmm6
832	pxor	xmm6,xmm6
833	pxor	xmm7,xmm7
834	jmp	NEAR $L$ecb_ret
835ALIGN	16
836$L$ecb_dec_six:
837	call	_aesni_decrypt6
838	movups	XMMWORD[rsi],xmm2
839	pxor	xmm2,xmm2
840	movups	XMMWORD[16+rsi],xmm3
841	pxor	xmm3,xmm3
842	movups	XMMWORD[32+rsi],xmm4
843	pxor	xmm4,xmm4
844	movups	XMMWORD[48+rsi],xmm5
845	pxor	xmm5,xmm5
846	movups	XMMWORD[64+rsi],xmm6
847	pxor	xmm6,xmm6
848	movups	XMMWORD[80+rsi],xmm7
849	pxor	xmm7,xmm7
850
851$L$ecb_ret:
852	xorps	xmm0,xmm0
853	pxor	xmm1,xmm1
854	movaps	xmm6,XMMWORD[rsp]
855	movaps	XMMWORD[rsp],xmm0
856	movaps	xmm7,XMMWORD[16+rsp]
857	movaps	XMMWORD[16+rsp],xmm0
858	movaps	xmm8,XMMWORD[32+rsp]
859	movaps	XMMWORD[32+rsp],xmm0
860	movaps	xmm9,XMMWORD[48+rsp]
861	movaps	XMMWORD[48+rsp],xmm0
862	lea	rsp,[88+rsp]
863$L$ecb_enc_ret:
864	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
865	mov	rsi,QWORD[16+rsp]
866	DB	0F3h,0C3h		;repret
867$L$SEH_end_aesni_ecb_encrypt:
868global	aesni_ccm64_encrypt_blocks
869
870ALIGN	16
871aesni_ccm64_encrypt_blocks:
872	mov	QWORD[8+rsp],rdi	;WIN64 prologue
873	mov	QWORD[16+rsp],rsi
874	mov	rax,rsp
875$L$SEH_begin_aesni_ccm64_encrypt_blocks:
876	mov	rdi,rcx
877	mov	rsi,rdx
878	mov	rdx,r8
879	mov	rcx,r9
880	mov	r8,QWORD[40+rsp]
881	mov	r9,QWORD[48+rsp]
882
883
884	lea	rsp,[((-88))+rsp]
885	movaps	XMMWORD[rsp],xmm6
886	movaps	XMMWORD[16+rsp],xmm7
887	movaps	XMMWORD[32+rsp],xmm8
888	movaps	XMMWORD[48+rsp],xmm9
889$L$ccm64_enc_body:
890	mov	eax,DWORD[240+rcx]
891	movdqu	xmm6,XMMWORD[r8]
892	movdqa	xmm9,XMMWORD[$L$increment64]
893	movdqa	xmm7,XMMWORD[$L$bswap_mask]
894
895	shl	eax,4
896	mov	r10d,16
897	lea	r11,[rcx]
898	movdqu	xmm3,XMMWORD[r9]
899	movdqa	xmm2,xmm6
900	lea	rcx,[32+rax*1+rcx]
901DB	102,15,56,0,247
902	sub	r10,rax
903	jmp	NEAR $L$ccm64_enc_outer
904ALIGN	16
905$L$ccm64_enc_outer:
906	movups	xmm0,XMMWORD[r11]
907	mov	rax,r10
908	movups	xmm8,XMMWORD[rdi]
909
910	xorps	xmm2,xmm0
911	movups	xmm1,XMMWORD[16+r11]
912	xorps	xmm0,xmm8
913	xorps	xmm3,xmm0
914	movups	xmm0,XMMWORD[32+r11]
915
916$L$ccm64_enc2_loop:
917DB	102,15,56,220,209
918DB	102,15,56,220,217
919	movups	xmm1,XMMWORD[rax*1+rcx]
920	add	rax,32
921DB	102,15,56,220,208
922DB	102,15,56,220,216
923	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
924	jnz	NEAR $L$ccm64_enc2_loop
925DB	102,15,56,220,209
926DB	102,15,56,220,217
927	paddq	xmm6,xmm9
928	dec	rdx
929DB	102,15,56,221,208
930DB	102,15,56,221,216
931
932	lea	rdi,[16+rdi]
933	xorps	xmm8,xmm2
934	movdqa	xmm2,xmm6
935	movups	XMMWORD[rsi],xmm8
936DB	102,15,56,0,215
937	lea	rsi,[16+rsi]
938	jnz	NEAR $L$ccm64_enc_outer
939
940	pxor	xmm0,xmm0
941	pxor	xmm1,xmm1
942	pxor	xmm2,xmm2
943	movups	XMMWORD[r9],xmm3
944	pxor	xmm3,xmm3
945	pxor	xmm8,xmm8
946	pxor	xmm6,xmm6
947	movaps	xmm6,XMMWORD[rsp]
948	movaps	XMMWORD[rsp],xmm0
949	movaps	xmm7,XMMWORD[16+rsp]
950	movaps	XMMWORD[16+rsp],xmm0
951	movaps	xmm8,XMMWORD[32+rsp]
952	movaps	XMMWORD[32+rsp],xmm0
953	movaps	xmm9,XMMWORD[48+rsp]
954	movaps	XMMWORD[48+rsp],xmm0
955	lea	rsp,[88+rsp]
956$L$ccm64_enc_ret:
957	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
958	mov	rsi,QWORD[16+rsp]
959	DB	0F3h,0C3h		;repret
960$L$SEH_end_aesni_ccm64_encrypt_blocks:
961global	aesni_ccm64_decrypt_blocks
962
963ALIGN	16
964aesni_ccm64_decrypt_blocks:
965	mov	QWORD[8+rsp],rdi	;WIN64 prologue
966	mov	QWORD[16+rsp],rsi
967	mov	rax,rsp
968$L$SEH_begin_aesni_ccm64_decrypt_blocks:
969	mov	rdi,rcx
970	mov	rsi,rdx
971	mov	rdx,r8
972	mov	rcx,r9
973	mov	r8,QWORD[40+rsp]
974	mov	r9,QWORD[48+rsp]
975
976
977	lea	rsp,[((-88))+rsp]
978	movaps	XMMWORD[rsp],xmm6
979	movaps	XMMWORD[16+rsp],xmm7
980	movaps	XMMWORD[32+rsp],xmm8
981	movaps	XMMWORD[48+rsp],xmm9
982$L$ccm64_dec_body:
983	mov	eax,DWORD[240+rcx]
984	movups	xmm6,XMMWORD[r8]
985	movdqu	xmm3,XMMWORD[r9]
986	movdqa	xmm9,XMMWORD[$L$increment64]
987	movdqa	xmm7,XMMWORD[$L$bswap_mask]
988
989	movaps	xmm2,xmm6
990	mov	r10d,eax
991	mov	r11,rcx
992DB	102,15,56,0,247
993	movups	xmm0,XMMWORD[rcx]
994	movups	xmm1,XMMWORD[16+rcx]
995	lea	rcx,[32+rcx]
996	xorps	xmm2,xmm0
997$L$oop_enc1_5:
998DB	102,15,56,220,209
999	dec	eax
1000	movups	xmm1,XMMWORD[rcx]
1001	lea	rcx,[16+rcx]
1002	jnz	NEAR $L$oop_enc1_5
1003DB	102,15,56,221,209
1004	shl	r10d,4
1005	mov	eax,16
1006	movups	xmm8,XMMWORD[rdi]
1007	paddq	xmm6,xmm9
1008	lea	rdi,[16+rdi]
1009	sub	rax,r10
1010	lea	rcx,[32+r10*1+r11]
1011	mov	r10,rax
1012	jmp	NEAR $L$ccm64_dec_outer
1013ALIGN	16
1014$L$ccm64_dec_outer:
1015	xorps	xmm8,xmm2
1016	movdqa	xmm2,xmm6
1017	movups	XMMWORD[rsi],xmm8
1018	lea	rsi,[16+rsi]
1019DB	102,15,56,0,215
1020
1021	sub	rdx,1
1022	jz	NEAR $L$ccm64_dec_break
1023
1024	movups	xmm0,XMMWORD[r11]
1025	mov	rax,r10
1026	movups	xmm1,XMMWORD[16+r11]
1027	xorps	xmm8,xmm0
1028	xorps	xmm2,xmm0
1029	xorps	xmm3,xmm8
1030	movups	xmm0,XMMWORD[32+r11]
1031	jmp	NEAR $L$ccm64_dec2_loop
1032ALIGN	16
1033$L$ccm64_dec2_loop:
1034DB	102,15,56,220,209
1035DB	102,15,56,220,217
1036	movups	xmm1,XMMWORD[rax*1+rcx]
1037	add	rax,32
1038DB	102,15,56,220,208
1039DB	102,15,56,220,216
1040	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
1041	jnz	NEAR $L$ccm64_dec2_loop
1042	movups	xmm8,XMMWORD[rdi]
1043	paddq	xmm6,xmm9
1044DB	102,15,56,220,209
1045DB	102,15,56,220,217
1046DB	102,15,56,221,208
1047DB	102,15,56,221,216
1048	lea	rdi,[16+rdi]
1049	jmp	NEAR $L$ccm64_dec_outer
1050
1051ALIGN	16
1052$L$ccm64_dec_break:
1053
1054	mov	eax,DWORD[240+r11]
1055	movups	xmm0,XMMWORD[r11]
1056	movups	xmm1,XMMWORD[16+r11]
1057	xorps	xmm8,xmm0
1058	lea	r11,[32+r11]
1059	xorps	xmm3,xmm8
1060$L$oop_enc1_6:
1061DB	102,15,56,220,217
1062	dec	eax
1063	movups	xmm1,XMMWORD[r11]
1064	lea	r11,[16+r11]
1065	jnz	NEAR $L$oop_enc1_6
1066DB	102,15,56,221,217
1067	pxor	xmm0,xmm0
1068	pxor	xmm1,xmm1
1069	pxor	xmm2,xmm2
1070	movups	XMMWORD[r9],xmm3
1071	pxor	xmm3,xmm3
1072	pxor	xmm8,xmm8
1073	pxor	xmm6,xmm6
1074	movaps	xmm6,XMMWORD[rsp]
1075	movaps	XMMWORD[rsp],xmm0
1076	movaps	xmm7,XMMWORD[16+rsp]
1077	movaps	XMMWORD[16+rsp],xmm0
1078	movaps	xmm8,XMMWORD[32+rsp]
1079	movaps	XMMWORD[32+rsp],xmm0
1080	movaps	xmm9,XMMWORD[48+rsp]
1081	movaps	XMMWORD[48+rsp],xmm0
1082	lea	rsp,[88+rsp]
1083$L$ccm64_dec_ret:
1084	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
1085	mov	rsi,QWORD[16+rsp]
1086	DB	0F3h,0C3h		;repret
1087$L$SEH_end_aesni_ccm64_decrypt_blocks:
1088global	aesni_ctr32_encrypt_blocks
1089
1090ALIGN	16
1091aesni_ctr32_encrypt_blocks:
1092	mov	QWORD[8+rsp],rdi	;WIN64 prologue
1093	mov	QWORD[16+rsp],rsi
1094	mov	rax,rsp
1095$L$SEH_begin_aesni_ctr32_encrypt_blocks:
1096	mov	rdi,rcx
1097	mov	rsi,rdx
1098	mov	rdx,r8
1099	mov	rcx,r9
1100	mov	r8,QWORD[40+rsp]
1101
1102
1103	cmp	rdx,1
1104	jne	NEAR $L$ctr32_bulk
1105
1106
1107
1108	movups	xmm2,XMMWORD[r8]
1109	movups	xmm3,XMMWORD[rdi]
1110	mov	edx,DWORD[240+rcx]
1111	movups	xmm0,XMMWORD[rcx]
1112	movups	xmm1,XMMWORD[16+rcx]
1113	lea	rcx,[32+rcx]
1114	xorps	xmm2,xmm0
1115$L$oop_enc1_7:
1116DB	102,15,56,220,209
1117	dec	edx
1118	movups	xmm1,XMMWORD[rcx]
1119	lea	rcx,[16+rcx]
1120	jnz	NEAR $L$oop_enc1_7
1121DB	102,15,56,221,209
1122	pxor	xmm0,xmm0
1123	pxor	xmm1,xmm1
1124	xorps	xmm2,xmm3
1125	pxor	xmm3,xmm3
1126	movups	XMMWORD[rsi],xmm2
1127	xorps	xmm2,xmm2
1128	jmp	NEAR $L$ctr32_epilogue
1129
1130ALIGN	16
1131$L$ctr32_bulk:
1132	lea	rax,[rsp]
1133	push	rbp
1134	sub	rsp,288
1135	and	rsp,-16
1136	movaps	XMMWORD[(-168)+rax],xmm6
1137	movaps	XMMWORD[(-152)+rax],xmm7
1138	movaps	XMMWORD[(-136)+rax],xmm8
1139	movaps	XMMWORD[(-120)+rax],xmm9
1140	movaps	XMMWORD[(-104)+rax],xmm10
1141	movaps	XMMWORD[(-88)+rax],xmm11
1142	movaps	XMMWORD[(-72)+rax],xmm12
1143	movaps	XMMWORD[(-56)+rax],xmm13
1144	movaps	XMMWORD[(-40)+rax],xmm14
1145	movaps	XMMWORD[(-24)+rax],xmm15
1146$L$ctr32_body:
1147	lea	rbp,[((-8))+rax]
1148
1149
1150
1151
1152	movdqu	xmm2,XMMWORD[r8]
1153	movdqu	xmm0,XMMWORD[rcx]
1154	mov	r8d,DWORD[12+r8]
1155	pxor	xmm2,xmm0
1156	mov	r11d,DWORD[12+rcx]
1157	movdqa	XMMWORD[rsp],xmm2
1158	bswap	r8d
1159	movdqa	xmm3,xmm2
1160	movdqa	xmm4,xmm2
1161	movdqa	xmm5,xmm2
1162	movdqa	XMMWORD[64+rsp],xmm2
1163	movdqa	XMMWORD[80+rsp],xmm2
1164	movdqa	XMMWORD[96+rsp],xmm2
1165	mov	r10,rdx
1166	movdqa	XMMWORD[112+rsp],xmm2
1167
1168	lea	rax,[1+r8]
1169	lea	rdx,[2+r8]
1170	bswap	eax
1171	bswap	edx
1172	xor	eax,r11d
1173	xor	edx,r11d
1174DB	102,15,58,34,216,3
1175	lea	rax,[3+r8]
1176	movdqa	XMMWORD[16+rsp],xmm3
1177DB	102,15,58,34,226,3
1178	bswap	eax
1179	mov	rdx,r10
1180	lea	r10,[4+r8]
1181	movdqa	XMMWORD[32+rsp],xmm4
1182	xor	eax,r11d
1183	bswap	r10d
1184DB	102,15,58,34,232,3
1185	xor	r10d,r11d
1186	movdqa	XMMWORD[48+rsp],xmm5
1187	lea	r9,[5+r8]
1188	mov	DWORD[((64+12))+rsp],r10d
1189	bswap	r9d
1190	lea	r10,[6+r8]
1191	mov	eax,DWORD[240+rcx]
1192	xor	r9d,r11d
1193	bswap	r10d
1194	mov	DWORD[((80+12))+rsp],r9d
1195	xor	r10d,r11d
1196	lea	r9,[7+r8]
1197	mov	DWORD[((96+12))+rsp],r10d
1198	bswap	r9d
1199	mov	r10d,DWORD[((OPENSSL_ia32cap_P+4))]
1200	xor	r9d,r11d
1201	and	r10d,71303168
1202	mov	DWORD[((112+12))+rsp],r9d
1203
1204	movups	xmm1,XMMWORD[16+rcx]
1205
1206	movdqa	xmm6,XMMWORD[64+rsp]
1207	movdqa	xmm7,XMMWORD[80+rsp]
1208
1209	cmp	rdx,8
1210	jb	NEAR $L$ctr32_tail
1211
1212	sub	rdx,6
1213	cmp	r10d,4194304
1214	je	NEAR $L$ctr32_6x
1215
1216	lea	rcx,[128+rcx]
1217	sub	rdx,2
1218	jmp	NEAR $L$ctr32_loop8
1219
1220ALIGN	16
1221$L$ctr32_6x:
1222	shl	eax,4
1223	mov	r10d,48
1224	bswap	r11d
1225	lea	rcx,[32+rax*1+rcx]
1226	sub	r10,rax
1227	jmp	NEAR $L$ctr32_loop6
1228
1229ALIGN	16
1230$L$ctr32_loop6:
1231	add	r8d,6
1232	movups	xmm0,XMMWORD[((-48))+r10*1+rcx]
1233DB	102,15,56,220,209
1234	mov	eax,r8d
1235	xor	eax,r11d
1236DB	102,15,56,220,217
1237DB	0x0f,0x38,0xf1,0x44,0x24,12
1238	lea	eax,[1+r8]
1239DB	102,15,56,220,225
1240	xor	eax,r11d
1241DB	0x0f,0x38,0xf1,0x44,0x24,28
1242DB	102,15,56,220,233
1243	lea	eax,[2+r8]
1244	xor	eax,r11d
1245DB	102,15,56,220,241
1246DB	0x0f,0x38,0xf1,0x44,0x24,44
1247	lea	eax,[3+r8]
1248DB	102,15,56,220,249
1249	movups	xmm1,XMMWORD[((-32))+r10*1+rcx]
1250	xor	eax,r11d
1251
1252DB	102,15,56,220,208
1253DB	0x0f,0x38,0xf1,0x44,0x24,60
1254	lea	eax,[4+r8]
1255DB	102,15,56,220,216
1256	xor	eax,r11d
1257DB	0x0f,0x38,0xf1,0x44,0x24,76
1258DB	102,15,56,220,224
1259	lea	eax,[5+r8]
1260	xor	eax,r11d
1261DB	102,15,56,220,232
1262DB	0x0f,0x38,0xf1,0x44,0x24,92
1263	mov	rax,r10
1264DB	102,15,56,220,240
1265DB	102,15,56,220,248
1266	movups	xmm0,XMMWORD[((-16))+r10*1+rcx]
1267
1268	call	$L$enc_loop6
1269
1270	movdqu	xmm8,XMMWORD[rdi]
1271	movdqu	xmm9,XMMWORD[16+rdi]
1272	movdqu	xmm10,XMMWORD[32+rdi]
1273	movdqu	xmm11,XMMWORD[48+rdi]
1274	movdqu	xmm12,XMMWORD[64+rdi]
1275	movdqu	xmm13,XMMWORD[80+rdi]
1276	lea	rdi,[96+rdi]
1277	movups	xmm1,XMMWORD[((-64))+r10*1+rcx]
1278	pxor	xmm8,xmm2
1279	movaps	xmm2,XMMWORD[rsp]
1280	pxor	xmm9,xmm3
1281	movaps	xmm3,XMMWORD[16+rsp]
1282	pxor	xmm10,xmm4
1283	movaps	xmm4,XMMWORD[32+rsp]
1284	pxor	xmm11,xmm5
1285	movaps	xmm5,XMMWORD[48+rsp]
1286	pxor	xmm12,xmm6
1287	movaps	xmm6,XMMWORD[64+rsp]
1288	pxor	xmm13,xmm7
1289	movaps	xmm7,XMMWORD[80+rsp]
1290	movdqu	XMMWORD[rsi],xmm8
1291	movdqu	XMMWORD[16+rsi],xmm9
1292	movdqu	XMMWORD[32+rsi],xmm10
1293	movdqu	XMMWORD[48+rsi],xmm11
1294	movdqu	XMMWORD[64+rsi],xmm12
1295	movdqu	XMMWORD[80+rsi],xmm13
1296	lea	rsi,[96+rsi]
1297
1298	sub	rdx,6
1299	jnc	NEAR $L$ctr32_loop6
1300
1301	add	rdx,6
1302	jz	NEAR $L$ctr32_done
1303
1304	lea	eax,[((-48))+r10]
1305	lea	rcx,[((-80))+r10*1+rcx]
1306	neg	eax
1307	shr	eax,4
1308	jmp	NEAR $L$ctr32_tail
1309
1310ALIGN	32
1311$L$ctr32_loop8:
1312	add	r8d,8
1313	movdqa	xmm8,XMMWORD[96+rsp]
1314DB	102,15,56,220,209
1315	mov	r9d,r8d
1316	movdqa	xmm9,XMMWORD[112+rsp]
1317DB	102,15,56,220,217
1318	bswap	r9d
1319	movups	xmm0,XMMWORD[((32-128))+rcx]
1320DB	102,15,56,220,225
1321	xor	r9d,r11d
1322	nop
1323DB	102,15,56,220,233
1324	mov	DWORD[((0+12))+rsp],r9d
1325	lea	r9,[1+r8]
1326DB	102,15,56,220,241
1327DB	102,15,56,220,249
1328DB	102,68,15,56,220,193
1329DB	102,68,15,56,220,201
1330	movups	xmm1,XMMWORD[((48-128))+rcx]
1331	bswap	r9d
1332DB	102,15,56,220,208
1333DB	102,15,56,220,216
1334	xor	r9d,r11d
1335DB	0x66,0x90
1336DB	102,15,56,220,224
1337DB	102,15,56,220,232
1338	mov	DWORD[((16+12))+rsp],r9d
1339	lea	r9,[2+r8]
1340DB	102,15,56,220,240
1341DB	102,15,56,220,248
1342DB	102,68,15,56,220,192
1343DB	102,68,15,56,220,200
1344	movups	xmm0,XMMWORD[((64-128))+rcx]
1345	bswap	r9d
1346DB	102,15,56,220,209
1347DB	102,15,56,220,217
1348	xor	r9d,r11d
1349DB	0x66,0x90
1350DB	102,15,56,220,225
1351DB	102,15,56,220,233
1352	mov	DWORD[((32+12))+rsp],r9d
1353	lea	r9,[3+r8]
1354DB	102,15,56,220,241
1355DB	102,15,56,220,249
1356DB	102,68,15,56,220,193
1357DB	102,68,15,56,220,201
1358	movups	xmm1,XMMWORD[((80-128))+rcx]
1359	bswap	r9d
1360DB	102,15,56,220,208
1361DB	102,15,56,220,216
1362	xor	r9d,r11d
1363DB	0x66,0x90
1364DB	102,15,56,220,224
1365DB	102,15,56,220,232
1366	mov	DWORD[((48+12))+rsp],r9d
1367	lea	r9,[4+r8]
1368DB	102,15,56,220,240
1369DB	102,15,56,220,248
1370DB	102,68,15,56,220,192
1371DB	102,68,15,56,220,200
1372	movups	xmm0,XMMWORD[((96-128))+rcx]
1373	bswap	r9d
1374DB	102,15,56,220,209
1375DB	102,15,56,220,217
1376	xor	r9d,r11d
1377DB	0x66,0x90
1378DB	102,15,56,220,225
1379DB	102,15,56,220,233
1380	mov	DWORD[((64+12))+rsp],r9d
1381	lea	r9,[5+r8]
1382DB	102,15,56,220,241
1383DB	102,15,56,220,249
1384DB	102,68,15,56,220,193
1385DB	102,68,15,56,220,201
1386	movups	xmm1,XMMWORD[((112-128))+rcx]
1387	bswap	r9d
1388DB	102,15,56,220,208
1389DB	102,15,56,220,216
1390	xor	r9d,r11d
1391DB	0x66,0x90
1392DB	102,15,56,220,224
1393DB	102,15,56,220,232
1394	mov	DWORD[((80+12))+rsp],r9d
1395	lea	r9,[6+r8]
1396DB	102,15,56,220,240
1397DB	102,15,56,220,248
1398DB	102,68,15,56,220,192
1399DB	102,68,15,56,220,200
1400	movups	xmm0,XMMWORD[((128-128))+rcx]
1401	bswap	r9d
1402DB	102,15,56,220,209
1403DB	102,15,56,220,217
1404	xor	r9d,r11d
1405DB	0x66,0x90
1406DB	102,15,56,220,225
1407DB	102,15,56,220,233
1408	mov	DWORD[((96+12))+rsp],r9d
1409	lea	r9,[7+r8]
1410DB	102,15,56,220,241
1411DB	102,15,56,220,249
1412DB	102,68,15,56,220,193
1413DB	102,68,15,56,220,201
1414	movups	xmm1,XMMWORD[((144-128))+rcx]
1415	bswap	r9d
1416DB	102,15,56,220,208
1417DB	102,15,56,220,216
1418DB	102,15,56,220,224
1419	xor	r9d,r11d
1420	movdqu	xmm10,XMMWORD[rdi]
1421DB	102,15,56,220,232
1422	mov	DWORD[((112+12))+rsp],r9d
1423	cmp	eax,11
1424DB	102,15,56,220,240
1425DB	102,15,56,220,248
1426DB	102,68,15,56,220,192
1427DB	102,68,15,56,220,200
1428	movups	xmm0,XMMWORD[((160-128))+rcx]
1429
1430	jb	NEAR $L$ctr32_enc_done
1431
1432DB	102,15,56,220,209
1433DB	102,15,56,220,217
1434DB	102,15,56,220,225
1435DB	102,15,56,220,233
1436DB	102,15,56,220,241
1437DB	102,15,56,220,249
1438DB	102,68,15,56,220,193
1439DB	102,68,15,56,220,201
1440	movups	xmm1,XMMWORD[((176-128))+rcx]
1441
1442DB	102,15,56,220,208
1443DB	102,15,56,220,216
1444DB	102,15,56,220,224
1445DB	102,15,56,220,232
1446DB	102,15,56,220,240
1447DB	102,15,56,220,248
1448DB	102,68,15,56,220,192
1449DB	102,68,15,56,220,200
1450	movups	xmm0,XMMWORD[((192-128))+rcx]
1451	je	NEAR $L$ctr32_enc_done
1452
1453DB	102,15,56,220,209
1454DB	102,15,56,220,217
1455DB	102,15,56,220,225
1456DB	102,15,56,220,233
1457DB	102,15,56,220,241
1458DB	102,15,56,220,249
1459DB	102,68,15,56,220,193
1460DB	102,68,15,56,220,201
1461	movups	xmm1,XMMWORD[((208-128))+rcx]
1462
1463DB	102,15,56,220,208
1464DB	102,15,56,220,216
1465DB	102,15,56,220,224
1466DB	102,15,56,220,232
1467DB	102,15,56,220,240
1468DB	102,15,56,220,248
1469DB	102,68,15,56,220,192
1470DB	102,68,15,56,220,200
1471	movups	xmm0,XMMWORD[((224-128))+rcx]
1472	jmp	NEAR $L$ctr32_enc_done
1473
1474ALIGN	16
1475$L$ctr32_enc_done:
1476	movdqu	xmm11,XMMWORD[16+rdi]
1477	pxor	xmm10,xmm0
1478	movdqu	xmm12,XMMWORD[32+rdi]
1479	pxor	xmm11,xmm0
1480	movdqu	xmm13,XMMWORD[48+rdi]
1481	pxor	xmm12,xmm0
1482	movdqu	xmm14,XMMWORD[64+rdi]
1483	pxor	xmm13,xmm0
1484	movdqu	xmm15,XMMWORD[80+rdi]
1485	pxor	xmm14,xmm0
1486	pxor	xmm15,xmm0
1487DB	102,15,56,220,209
1488DB	102,15,56,220,217
1489DB	102,15,56,220,225
1490DB	102,15,56,220,233
1491DB	102,15,56,220,241
1492DB	102,15,56,220,249
1493DB	102,68,15,56,220,193
1494DB	102,68,15,56,220,201
1495	movdqu	xmm1,XMMWORD[96+rdi]
1496	lea	rdi,[128+rdi]
1497
1498DB	102,65,15,56,221,210
1499	pxor	xmm1,xmm0
1500	movdqu	xmm10,XMMWORD[((112-128))+rdi]
1501DB	102,65,15,56,221,219
1502	pxor	xmm10,xmm0
1503	movdqa	xmm11,XMMWORD[rsp]
1504DB	102,65,15,56,221,228
1505DB	102,65,15,56,221,237
1506	movdqa	xmm12,XMMWORD[16+rsp]
1507	movdqa	xmm13,XMMWORD[32+rsp]
1508DB	102,65,15,56,221,246
1509DB	102,65,15,56,221,255
1510	movdqa	xmm14,XMMWORD[48+rsp]
1511	movdqa	xmm15,XMMWORD[64+rsp]
1512DB	102,68,15,56,221,193
1513	movdqa	xmm0,XMMWORD[80+rsp]
1514	movups	xmm1,XMMWORD[((16-128))+rcx]
1515DB	102,69,15,56,221,202
1516
1517	movups	XMMWORD[rsi],xmm2
1518	movdqa	xmm2,xmm11
1519	movups	XMMWORD[16+rsi],xmm3
1520	movdqa	xmm3,xmm12
1521	movups	XMMWORD[32+rsi],xmm4
1522	movdqa	xmm4,xmm13
1523	movups	XMMWORD[48+rsi],xmm5
1524	movdqa	xmm5,xmm14
1525	movups	XMMWORD[64+rsi],xmm6
1526	movdqa	xmm6,xmm15
1527	movups	XMMWORD[80+rsi],xmm7
1528	movdqa	xmm7,xmm0
1529	movups	XMMWORD[96+rsi],xmm8
1530	movups	XMMWORD[112+rsi],xmm9
1531	lea	rsi,[128+rsi]
1532
1533	sub	rdx,8
1534	jnc	NEAR $L$ctr32_loop8
1535
1536	add	rdx,8
1537	jz	NEAR $L$ctr32_done
1538	lea	rcx,[((-128))+rcx]
1539
1540$L$ctr32_tail:
1541
1542
1543	lea	rcx,[16+rcx]
1544	cmp	rdx,4
1545	jb	NEAR $L$ctr32_loop3
1546	je	NEAR $L$ctr32_loop4
1547
1548
1549	shl	eax,4
1550	movdqa	xmm8,XMMWORD[96+rsp]
1551	pxor	xmm9,xmm9
1552
1553	movups	xmm0,XMMWORD[16+rcx]
1554DB	102,15,56,220,209
1555DB	102,15,56,220,217
1556	lea	rcx,[((32-16))+rax*1+rcx]
1557	neg	rax
1558DB	102,15,56,220,225
1559	add	rax,16
1560	movups	xmm10,XMMWORD[rdi]
1561DB	102,15,56,220,233
1562DB	102,15,56,220,241
1563	movups	xmm11,XMMWORD[16+rdi]
1564	movups	xmm12,XMMWORD[32+rdi]
1565DB	102,15,56,220,249
1566DB	102,68,15,56,220,193
1567
1568	call	$L$enc_loop8_enter
1569
1570	movdqu	xmm13,XMMWORD[48+rdi]
1571	pxor	xmm2,xmm10
1572	movdqu	xmm10,XMMWORD[64+rdi]
1573	pxor	xmm3,xmm11
1574	movdqu	XMMWORD[rsi],xmm2
1575	pxor	xmm4,xmm12
1576	movdqu	XMMWORD[16+rsi],xmm3
1577	pxor	xmm5,xmm13
1578	movdqu	XMMWORD[32+rsi],xmm4
1579	pxor	xmm6,xmm10
1580	movdqu	XMMWORD[48+rsi],xmm5
1581	movdqu	XMMWORD[64+rsi],xmm6
1582	cmp	rdx,6
1583	jb	NEAR $L$ctr32_done
1584
1585	movups	xmm11,XMMWORD[80+rdi]
1586	xorps	xmm7,xmm11
1587	movups	XMMWORD[80+rsi],xmm7
1588	je	NEAR $L$ctr32_done
1589
1590	movups	xmm12,XMMWORD[96+rdi]
1591	xorps	xmm8,xmm12
1592	movups	XMMWORD[96+rsi],xmm8
1593	jmp	NEAR $L$ctr32_done
1594
1595ALIGN	32
1596$L$ctr32_loop4:
1597DB	102,15,56,220,209
1598	lea	rcx,[16+rcx]
1599	dec	eax
1600DB	102,15,56,220,217
1601DB	102,15,56,220,225
1602DB	102,15,56,220,233
1603	movups	xmm1,XMMWORD[rcx]
1604	jnz	NEAR $L$ctr32_loop4
1605DB	102,15,56,221,209
1606DB	102,15,56,221,217
1607	movups	xmm10,XMMWORD[rdi]
1608	movups	xmm11,XMMWORD[16+rdi]
1609DB	102,15,56,221,225
1610DB	102,15,56,221,233
1611	movups	xmm12,XMMWORD[32+rdi]
1612	movups	xmm13,XMMWORD[48+rdi]
1613
1614	xorps	xmm2,xmm10
1615	movups	XMMWORD[rsi],xmm2
1616	xorps	xmm3,xmm11
1617	movups	XMMWORD[16+rsi],xmm3
1618	pxor	xmm4,xmm12
1619	movdqu	XMMWORD[32+rsi],xmm4
1620	pxor	xmm5,xmm13
1621	movdqu	XMMWORD[48+rsi],xmm5
1622	jmp	NEAR $L$ctr32_done
1623
1624ALIGN	32
1625$L$ctr32_loop3:
1626DB	102,15,56,220,209
1627	lea	rcx,[16+rcx]
1628	dec	eax
1629DB	102,15,56,220,217
1630DB	102,15,56,220,225
1631	movups	xmm1,XMMWORD[rcx]
1632	jnz	NEAR $L$ctr32_loop3
1633DB	102,15,56,221,209
1634DB	102,15,56,221,217
1635DB	102,15,56,221,225
1636
1637	movups	xmm10,XMMWORD[rdi]
1638	xorps	xmm2,xmm10
1639	movups	XMMWORD[rsi],xmm2
1640	cmp	rdx,2
1641	jb	NEAR $L$ctr32_done
1642
1643	movups	xmm11,XMMWORD[16+rdi]
1644	xorps	xmm3,xmm11
1645	movups	XMMWORD[16+rsi],xmm3
1646	je	NEAR $L$ctr32_done
1647
1648	movups	xmm12,XMMWORD[32+rdi]
1649	xorps	xmm4,xmm12
1650	movups	XMMWORD[32+rsi],xmm4
1651
1652$L$ctr32_done:
1653	xorps	xmm0,xmm0
1654	xor	r11d,r11d
1655	pxor	xmm1,xmm1
1656	pxor	xmm2,xmm2
1657	pxor	xmm3,xmm3
1658	pxor	xmm4,xmm4
1659	pxor	xmm5,xmm5
1660	movaps	xmm6,XMMWORD[((-160))+rbp]
1661	movaps	XMMWORD[(-160)+rbp],xmm0
1662	movaps	xmm7,XMMWORD[((-144))+rbp]
1663	movaps	XMMWORD[(-144)+rbp],xmm0
1664	movaps	xmm8,XMMWORD[((-128))+rbp]
1665	movaps	XMMWORD[(-128)+rbp],xmm0
1666	movaps	xmm9,XMMWORD[((-112))+rbp]
1667	movaps	XMMWORD[(-112)+rbp],xmm0
1668	movaps	xmm10,XMMWORD[((-96))+rbp]
1669	movaps	XMMWORD[(-96)+rbp],xmm0
1670	movaps	xmm11,XMMWORD[((-80))+rbp]
1671	movaps	XMMWORD[(-80)+rbp],xmm0
1672	movaps	xmm12,XMMWORD[((-64))+rbp]
1673	movaps	XMMWORD[(-64)+rbp],xmm0
1674	movaps	xmm13,XMMWORD[((-48))+rbp]
1675	movaps	XMMWORD[(-48)+rbp],xmm0
1676	movaps	xmm14,XMMWORD[((-32))+rbp]
1677	movaps	XMMWORD[(-32)+rbp],xmm0
1678	movaps	xmm15,XMMWORD[((-16))+rbp]
1679	movaps	XMMWORD[(-16)+rbp],xmm0
1680	movaps	XMMWORD[rsp],xmm0
1681	movaps	XMMWORD[16+rsp],xmm0
1682	movaps	XMMWORD[32+rsp],xmm0
1683	movaps	XMMWORD[48+rsp],xmm0
1684	movaps	XMMWORD[64+rsp],xmm0
1685	movaps	XMMWORD[80+rsp],xmm0
1686	movaps	XMMWORD[96+rsp],xmm0
1687	movaps	XMMWORD[112+rsp],xmm0
1688	lea	rsp,[rbp]
1689	pop	rbp
1690$L$ctr32_epilogue:
1691	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
1692	mov	rsi,QWORD[16+rsp]
1693	DB	0F3h,0C3h		;repret
1694$L$SEH_end_aesni_ctr32_encrypt_blocks:
1695global	aesni_xts_encrypt
1696
1697ALIGN	16
1698aesni_xts_encrypt:
1699	mov	QWORD[8+rsp],rdi	;WIN64 prologue
1700	mov	QWORD[16+rsp],rsi
1701	mov	rax,rsp
1702$L$SEH_begin_aesni_xts_encrypt:
1703	mov	rdi,rcx
1704	mov	rsi,rdx
1705	mov	rdx,r8
1706	mov	rcx,r9
1707	mov	r8,QWORD[40+rsp]
1708	mov	r9,QWORD[48+rsp]
1709
1710
1711	lea	rax,[rsp]
1712	push	rbp
1713	sub	rsp,272
1714	and	rsp,-16
1715	movaps	XMMWORD[(-168)+rax],xmm6
1716	movaps	XMMWORD[(-152)+rax],xmm7
1717	movaps	XMMWORD[(-136)+rax],xmm8
1718	movaps	XMMWORD[(-120)+rax],xmm9
1719	movaps	XMMWORD[(-104)+rax],xmm10
1720	movaps	XMMWORD[(-88)+rax],xmm11
1721	movaps	XMMWORD[(-72)+rax],xmm12
1722	movaps	XMMWORD[(-56)+rax],xmm13
1723	movaps	XMMWORD[(-40)+rax],xmm14
1724	movaps	XMMWORD[(-24)+rax],xmm15
1725$L$xts_enc_body:
1726	lea	rbp,[((-8))+rax]
1727	movups	xmm2,XMMWORD[r9]
1728	mov	eax,DWORD[240+r8]
1729	mov	r10d,DWORD[240+rcx]
1730	movups	xmm0,XMMWORD[r8]
1731	movups	xmm1,XMMWORD[16+r8]
1732	lea	r8,[32+r8]
1733	xorps	xmm2,xmm0
1734$L$oop_enc1_8:
1735DB	102,15,56,220,209
1736	dec	eax
1737	movups	xmm1,XMMWORD[r8]
1738	lea	r8,[16+r8]
1739	jnz	NEAR $L$oop_enc1_8
1740DB	102,15,56,221,209
1741	movups	xmm0,XMMWORD[rcx]
1742	mov	r11,rcx
1743	mov	eax,r10d
1744	shl	r10d,4
1745	mov	r9,rdx
1746	and	rdx,-16
1747
1748	movups	xmm1,XMMWORD[16+r10*1+rcx]
1749
1750	movdqa	xmm8,XMMWORD[$L$xts_magic]
1751	movdqa	xmm15,xmm2
1752	pshufd	xmm9,xmm2,0x5f
1753	pxor	xmm1,xmm0
1754	movdqa	xmm14,xmm9
1755	paddd	xmm9,xmm9
1756	movdqa	xmm10,xmm15
1757	psrad	xmm14,31
1758	paddq	xmm15,xmm15
1759	pand	xmm14,xmm8
1760	pxor	xmm10,xmm0
1761	pxor	xmm15,xmm14
1762	movdqa	xmm14,xmm9
1763	paddd	xmm9,xmm9
1764	movdqa	xmm11,xmm15
1765	psrad	xmm14,31
1766	paddq	xmm15,xmm15
1767	pand	xmm14,xmm8
1768	pxor	xmm11,xmm0
1769	pxor	xmm15,xmm14
1770	movdqa	xmm14,xmm9
1771	paddd	xmm9,xmm9
1772	movdqa	xmm12,xmm15
1773	psrad	xmm14,31
1774	paddq	xmm15,xmm15
1775	pand	xmm14,xmm8
1776	pxor	xmm12,xmm0
1777	pxor	xmm15,xmm14
1778	movdqa	xmm14,xmm9
1779	paddd	xmm9,xmm9
1780	movdqa	xmm13,xmm15
1781	psrad	xmm14,31
1782	paddq	xmm15,xmm15
1783	pand	xmm14,xmm8
1784	pxor	xmm13,xmm0
1785	pxor	xmm15,xmm14
1786	movdqa	xmm14,xmm15
1787	psrad	xmm9,31
1788	paddq	xmm15,xmm15
1789	pand	xmm9,xmm8
1790	pxor	xmm14,xmm0
1791	pxor	xmm15,xmm9
1792	movaps	XMMWORD[96+rsp],xmm1
1793
1794	sub	rdx,16*6
1795	jc	NEAR $L$xts_enc_short
1796
1797	mov	eax,16+96
1798	lea	rcx,[32+r10*1+r11]
1799	sub	rax,r10
1800	movups	xmm1,XMMWORD[16+r11]
1801	mov	r10,rax
1802	lea	r8,[$L$xts_magic]
1803	jmp	NEAR $L$xts_enc_grandloop
1804
1805ALIGN	32
1806$L$xts_enc_grandloop:
1807	movdqu	xmm2,XMMWORD[rdi]
1808	movdqa	xmm8,xmm0
1809	movdqu	xmm3,XMMWORD[16+rdi]
1810	pxor	xmm2,xmm10
1811	movdqu	xmm4,XMMWORD[32+rdi]
1812	pxor	xmm3,xmm11
1813DB	102,15,56,220,209
1814	movdqu	xmm5,XMMWORD[48+rdi]
1815	pxor	xmm4,xmm12
1816DB	102,15,56,220,217
1817	movdqu	xmm6,XMMWORD[64+rdi]
1818	pxor	xmm5,xmm13
1819DB	102,15,56,220,225
1820	movdqu	xmm7,XMMWORD[80+rdi]
1821	pxor	xmm8,xmm15
1822	movdqa	xmm9,XMMWORD[96+rsp]
1823	pxor	xmm6,xmm14
1824DB	102,15,56,220,233
1825	movups	xmm0,XMMWORD[32+r11]
1826	lea	rdi,[96+rdi]
1827	pxor	xmm7,xmm8
1828
1829	pxor	xmm10,xmm9
1830DB	102,15,56,220,241
1831	pxor	xmm11,xmm9
1832	movdqa	XMMWORD[rsp],xmm10
1833DB	102,15,56,220,249
1834	movups	xmm1,XMMWORD[48+r11]
1835	pxor	xmm12,xmm9
1836
1837DB	102,15,56,220,208
1838	pxor	xmm13,xmm9
1839	movdqa	XMMWORD[16+rsp],xmm11
1840DB	102,15,56,220,216
1841	pxor	xmm14,xmm9
1842	movdqa	XMMWORD[32+rsp],xmm12
1843DB	102,15,56,220,224
1844DB	102,15,56,220,232
1845	pxor	xmm8,xmm9
1846	movdqa	XMMWORD[64+rsp],xmm14
1847DB	102,15,56,220,240
1848DB	102,15,56,220,248
1849	movups	xmm0,XMMWORD[64+r11]
1850	movdqa	XMMWORD[80+rsp],xmm8
1851	pshufd	xmm9,xmm15,0x5f
1852	jmp	NEAR $L$xts_enc_loop6
1853ALIGN	32
1854$L$xts_enc_loop6:
1855DB	102,15,56,220,209
1856DB	102,15,56,220,217
1857DB	102,15,56,220,225
1858DB	102,15,56,220,233
1859DB	102,15,56,220,241
1860DB	102,15,56,220,249
1861	movups	xmm1,XMMWORD[((-64))+rax*1+rcx]
1862	add	rax,32
1863
1864DB	102,15,56,220,208
1865DB	102,15,56,220,216
1866DB	102,15,56,220,224
1867DB	102,15,56,220,232
1868DB	102,15,56,220,240
1869DB	102,15,56,220,248
1870	movups	xmm0,XMMWORD[((-80))+rax*1+rcx]
1871	jnz	NEAR $L$xts_enc_loop6
1872
1873	movdqa	xmm8,XMMWORD[r8]
1874	movdqa	xmm14,xmm9
1875	paddd	xmm9,xmm9
1876DB	102,15,56,220,209
1877	paddq	xmm15,xmm15
1878	psrad	xmm14,31
1879DB	102,15,56,220,217
1880	pand	xmm14,xmm8
1881	movups	xmm10,XMMWORD[r11]
1882DB	102,15,56,220,225
1883DB	102,15,56,220,233
1884DB	102,15,56,220,241
1885	pxor	xmm15,xmm14
1886	movaps	xmm11,xmm10
1887DB	102,15,56,220,249
1888	movups	xmm1,XMMWORD[((-64))+rcx]
1889
1890	movdqa	xmm14,xmm9
1891DB	102,15,56,220,208
1892	paddd	xmm9,xmm9
1893	pxor	xmm10,xmm15
1894DB	102,15,56,220,216
1895	psrad	xmm14,31
1896	paddq	xmm15,xmm15
1897DB	102,15,56,220,224
1898DB	102,15,56,220,232
1899	pand	xmm14,xmm8
1900	movaps	xmm12,xmm11
1901DB	102,15,56,220,240
1902	pxor	xmm15,xmm14
1903	movdqa	xmm14,xmm9
1904DB	102,15,56,220,248
1905	movups	xmm0,XMMWORD[((-48))+rcx]
1906
1907	paddd	xmm9,xmm9
1908DB	102,15,56,220,209
1909	pxor	xmm11,xmm15
1910	psrad	xmm14,31
1911DB	102,15,56,220,217
1912	paddq	xmm15,xmm15
1913	pand	xmm14,xmm8
1914DB	102,15,56,220,225
1915DB	102,15,56,220,233
1916	movdqa	XMMWORD[48+rsp],xmm13
1917	pxor	xmm15,xmm14
1918DB	102,15,56,220,241
1919	movaps	xmm13,xmm12
1920	movdqa	xmm14,xmm9
1921DB	102,15,56,220,249
1922	movups	xmm1,XMMWORD[((-32))+rcx]
1923
1924	paddd	xmm9,xmm9
1925DB	102,15,56,220,208
1926	pxor	xmm12,xmm15
1927	psrad	xmm14,31
1928DB	102,15,56,220,216
1929	paddq	xmm15,xmm15
1930	pand	xmm14,xmm8
1931DB	102,15,56,220,224
1932DB	102,15,56,220,232
1933DB	102,15,56,220,240
1934	pxor	xmm15,xmm14
1935	movaps	xmm14,xmm13
1936DB	102,15,56,220,248
1937
1938	movdqa	xmm0,xmm9
1939	paddd	xmm9,xmm9
1940DB	102,15,56,220,209
1941	pxor	xmm13,xmm15
1942	psrad	xmm0,31
1943DB	102,15,56,220,217
1944	paddq	xmm15,xmm15
1945	pand	xmm0,xmm8
1946DB	102,15,56,220,225
1947DB	102,15,56,220,233
1948	pxor	xmm15,xmm0
1949	movups	xmm0,XMMWORD[r11]
1950DB	102,15,56,220,241
1951DB	102,15,56,220,249
1952	movups	xmm1,XMMWORD[16+r11]
1953
1954	pxor	xmm14,xmm15
1955DB	102,15,56,221,84,36,0
1956	psrad	xmm9,31
1957	paddq	xmm15,xmm15
1958DB	102,15,56,221,92,36,16
1959DB	102,15,56,221,100,36,32
1960	pand	xmm9,xmm8
1961	mov	rax,r10
1962DB	102,15,56,221,108,36,48
1963DB	102,15,56,221,116,36,64
1964DB	102,15,56,221,124,36,80
1965	pxor	xmm15,xmm9
1966
1967	lea	rsi,[96+rsi]
1968	movups	XMMWORD[(-96)+rsi],xmm2
1969	movups	XMMWORD[(-80)+rsi],xmm3
1970	movups	XMMWORD[(-64)+rsi],xmm4
1971	movups	XMMWORD[(-48)+rsi],xmm5
1972	movups	XMMWORD[(-32)+rsi],xmm6
1973	movups	XMMWORD[(-16)+rsi],xmm7
1974	sub	rdx,16*6
1975	jnc	NEAR $L$xts_enc_grandloop
1976
1977	mov	eax,16+96
1978	sub	eax,r10d
1979	mov	rcx,r11
1980	shr	eax,4
1981
1982$L$xts_enc_short:
1983
1984	mov	r10d,eax
1985	pxor	xmm10,xmm0
1986	add	rdx,16*6
1987	jz	NEAR $L$xts_enc_done
1988
1989	pxor	xmm11,xmm0
1990	cmp	rdx,0x20
1991	jb	NEAR $L$xts_enc_one
1992	pxor	xmm12,xmm0
1993	je	NEAR $L$xts_enc_two
1994
1995	pxor	xmm13,xmm0
1996	cmp	rdx,0x40
1997	jb	NEAR $L$xts_enc_three
1998	pxor	xmm14,xmm0
1999	je	NEAR $L$xts_enc_four
2000
2001	movdqu	xmm2,XMMWORD[rdi]
2002	movdqu	xmm3,XMMWORD[16+rdi]
2003	movdqu	xmm4,XMMWORD[32+rdi]
2004	pxor	xmm2,xmm10
2005	movdqu	xmm5,XMMWORD[48+rdi]
2006	pxor	xmm3,xmm11
2007	movdqu	xmm6,XMMWORD[64+rdi]
2008	lea	rdi,[80+rdi]
2009	pxor	xmm4,xmm12
2010	pxor	xmm5,xmm13
2011	pxor	xmm6,xmm14
2012	pxor	xmm7,xmm7
2013
2014	call	_aesni_encrypt6
2015
2016	xorps	xmm2,xmm10
2017	movdqa	xmm10,xmm15
2018	xorps	xmm3,xmm11
2019	xorps	xmm4,xmm12
2020	movdqu	XMMWORD[rsi],xmm2
2021	xorps	xmm5,xmm13
2022	movdqu	XMMWORD[16+rsi],xmm3
2023	xorps	xmm6,xmm14
2024	movdqu	XMMWORD[32+rsi],xmm4
2025	movdqu	XMMWORD[48+rsi],xmm5
2026	movdqu	XMMWORD[64+rsi],xmm6
2027	lea	rsi,[80+rsi]
2028	jmp	NEAR $L$xts_enc_done
2029
2030ALIGN	16
2031$L$xts_enc_one:
2032	movups	xmm2,XMMWORD[rdi]
2033	lea	rdi,[16+rdi]
2034	xorps	xmm2,xmm10
2035	movups	xmm0,XMMWORD[rcx]
2036	movups	xmm1,XMMWORD[16+rcx]
2037	lea	rcx,[32+rcx]
2038	xorps	xmm2,xmm0
2039$L$oop_enc1_9:
2040DB	102,15,56,220,209
2041	dec	eax
2042	movups	xmm1,XMMWORD[rcx]
2043	lea	rcx,[16+rcx]
2044	jnz	NEAR $L$oop_enc1_9
2045DB	102,15,56,221,209
2046	xorps	xmm2,xmm10
2047	movdqa	xmm10,xmm11
2048	movups	XMMWORD[rsi],xmm2
2049	lea	rsi,[16+rsi]
2050	jmp	NEAR $L$xts_enc_done
2051
2052ALIGN	16
2053$L$xts_enc_two:
2054	movups	xmm2,XMMWORD[rdi]
2055	movups	xmm3,XMMWORD[16+rdi]
2056	lea	rdi,[32+rdi]
2057	xorps	xmm2,xmm10
2058	xorps	xmm3,xmm11
2059
2060	call	_aesni_encrypt2
2061
2062	xorps	xmm2,xmm10
2063	movdqa	xmm10,xmm12
2064	xorps	xmm3,xmm11
2065	movups	XMMWORD[rsi],xmm2
2066	movups	XMMWORD[16+rsi],xmm3
2067	lea	rsi,[32+rsi]
2068	jmp	NEAR $L$xts_enc_done
2069
2070ALIGN	16
2071$L$xts_enc_three:
2072	movups	xmm2,XMMWORD[rdi]
2073	movups	xmm3,XMMWORD[16+rdi]
2074	movups	xmm4,XMMWORD[32+rdi]
2075	lea	rdi,[48+rdi]
2076	xorps	xmm2,xmm10
2077	xorps	xmm3,xmm11
2078	xorps	xmm4,xmm12
2079
2080	call	_aesni_encrypt3
2081
2082	xorps	xmm2,xmm10
2083	movdqa	xmm10,xmm13
2084	xorps	xmm3,xmm11
2085	xorps	xmm4,xmm12
2086	movups	XMMWORD[rsi],xmm2
2087	movups	XMMWORD[16+rsi],xmm3
2088	movups	XMMWORD[32+rsi],xmm4
2089	lea	rsi,[48+rsi]
2090	jmp	NEAR $L$xts_enc_done
2091
2092ALIGN	16
2093$L$xts_enc_four:
2094	movups	xmm2,XMMWORD[rdi]
2095	movups	xmm3,XMMWORD[16+rdi]
2096	movups	xmm4,XMMWORD[32+rdi]
2097	xorps	xmm2,xmm10
2098	movups	xmm5,XMMWORD[48+rdi]
2099	lea	rdi,[64+rdi]
2100	xorps	xmm3,xmm11
2101	xorps	xmm4,xmm12
2102	xorps	xmm5,xmm13
2103
2104	call	_aesni_encrypt4
2105
2106	pxor	xmm2,xmm10
2107	movdqa	xmm10,xmm14
2108	pxor	xmm3,xmm11
2109	pxor	xmm4,xmm12
2110	movdqu	XMMWORD[rsi],xmm2
2111	pxor	xmm5,xmm13
2112	movdqu	XMMWORD[16+rsi],xmm3
2113	movdqu	XMMWORD[32+rsi],xmm4
2114	movdqu	XMMWORD[48+rsi],xmm5
2115	lea	rsi,[64+rsi]
2116	jmp	NEAR $L$xts_enc_done
2117
2118ALIGN	16
2119$L$xts_enc_done:
2120	and	r9,15
2121	jz	NEAR $L$xts_enc_ret
2122	mov	rdx,r9
2123
2124$L$xts_enc_steal:
2125	movzx	eax,BYTE[rdi]
2126	movzx	ecx,BYTE[((-16))+rsi]
2127	lea	rdi,[1+rdi]
2128	mov	BYTE[((-16))+rsi],al
2129	mov	BYTE[rsi],cl
2130	lea	rsi,[1+rsi]
2131	sub	rdx,1
2132	jnz	NEAR $L$xts_enc_steal
2133
2134	sub	rsi,r9
2135	mov	rcx,r11
2136	mov	eax,r10d
2137
2138	movups	xmm2,XMMWORD[((-16))+rsi]
2139	xorps	xmm2,xmm10
2140	movups	xmm0,XMMWORD[rcx]
2141	movups	xmm1,XMMWORD[16+rcx]
2142	lea	rcx,[32+rcx]
2143	xorps	xmm2,xmm0
2144$L$oop_enc1_10:
2145DB	102,15,56,220,209
2146	dec	eax
2147	movups	xmm1,XMMWORD[rcx]
2148	lea	rcx,[16+rcx]
2149	jnz	NEAR $L$oop_enc1_10
2150DB	102,15,56,221,209
2151	xorps	xmm2,xmm10
2152	movups	XMMWORD[(-16)+rsi],xmm2
2153
2154$L$xts_enc_ret:
2155	xorps	xmm0,xmm0
2156	pxor	xmm1,xmm1
2157	pxor	xmm2,xmm2
2158	pxor	xmm3,xmm3
2159	pxor	xmm4,xmm4
2160	pxor	xmm5,xmm5
2161	movaps	xmm6,XMMWORD[((-160))+rbp]
2162	movaps	XMMWORD[(-160)+rbp],xmm0
2163	movaps	xmm7,XMMWORD[((-144))+rbp]
2164	movaps	XMMWORD[(-144)+rbp],xmm0
2165	movaps	xmm8,XMMWORD[((-128))+rbp]
2166	movaps	XMMWORD[(-128)+rbp],xmm0
2167	movaps	xmm9,XMMWORD[((-112))+rbp]
2168	movaps	XMMWORD[(-112)+rbp],xmm0
2169	movaps	xmm10,XMMWORD[((-96))+rbp]
2170	movaps	XMMWORD[(-96)+rbp],xmm0
2171	movaps	xmm11,XMMWORD[((-80))+rbp]
2172	movaps	XMMWORD[(-80)+rbp],xmm0
2173	movaps	xmm12,XMMWORD[((-64))+rbp]
2174	movaps	XMMWORD[(-64)+rbp],xmm0
2175	movaps	xmm13,XMMWORD[((-48))+rbp]
2176	movaps	XMMWORD[(-48)+rbp],xmm0
2177	movaps	xmm14,XMMWORD[((-32))+rbp]
2178	movaps	XMMWORD[(-32)+rbp],xmm0
2179	movaps	xmm15,XMMWORD[((-16))+rbp]
2180	movaps	XMMWORD[(-16)+rbp],xmm0
2181	movaps	XMMWORD[rsp],xmm0
2182	movaps	XMMWORD[16+rsp],xmm0
2183	movaps	XMMWORD[32+rsp],xmm0
2184	movaps	XMMWORD[48+rsp],xmm0
2185	movaps	XMMWORD[64+rsp],xmm0
2186	movaps	XMMWORD[80+rsp],xmm0
2187	movaps	XMMWORD[96+rsp],xmm0
2188	lea	rsp,[rbp]
2189	pop	rbp
2190$L$xts_enc_epilogue:
2191	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
2192	mov	rsi,QWORD[16+rsp]
2193	DB	0F3h,0C3h		;repret
2194$L$SEH_end_aesni_xts_encrypt:
2195global	aesni_xts_decrypt
2196
2197ALIGN	16
2198aesni_xts_decrypt:
2199	mov	QWORD[8+rsp],rdi	;WIN64 prologue
2200	mov	QWORD[16+rsp],rsi
2201	mov	rax,rsp
2202$L$SEH_begin_aesni_xts_decrypt:
2203	mov	rdi,rcx
2204	mov	rsi,rdx
2205	mov	rdx,r8
2206	mov	rcx,r9
2207	mov	r8,QWORD[40+rsp]
2208	mov	r9,QWORD[48+rsp]
2209
2210
2211	lea	rax,[rsp]
2212	push	rbp
2213	sub	rsp,272
2214	and	rsp,-16
2215	movaps	XMMWORD[(-168)+rax],xmm6
2216	movaps	XMMWORD[(-152)+rax],xmm7
2217	movaps	XMMWORD[(-136)+rax],xmm8
2218	movaps	XMMWORD[(-120)+rax],xmm9
2219	movaps	XMMWORD[(-104)+rax],xmm10
2220	movaps	XMMWORD[(-88)+rax],xmm11
2221	movaps	XMMWORD[(-72)+rax],xmm12
2222	movaps	XMMWORD[(-56)+rax],xmm13
2223	movaps	XMMWORD[(-40)+rax],xmm14
2224	movaps	XMMWORD[(-24)+rax],xmm15
2225$L$xts_dec_body:
2226	lea	rbp,[((-8))+rax]
2227	movups	xmm2,XMMWORD[r9]
2228	mov	eax,DWORD[240+r8]
2229	mov	r10d,DWORD[240+rcx]
2230	movups	xmm0,XMMWORD[r8]
2231	movups	xmm1,XMMWORD[16+r8]
2232	lea	r8,[32+r8]
2233	xorps	xmm2,xmm0
2234$L$oop_enc1_11:
2235DB	102,15,56,220,209
2236	dec	eax
2237	movups	xmm1,XMMWORD[r8]
2238	lea	r8,[16+r8]
2239	jnz	NEAR $L$oop_enc1_11
2240DB	102,15,56,221,209
2241	xor	eax,eax
2242	test	rdx,15
2243	setnz	al
2244	shl	rax,4
2245	sub	rdx,rax
2246
2247	movups	xmm0,XMMWORD[rcx]
2248	mov	r11,rcx
2249	mov	eax,r10d
2250	shl	r10d,4
2251	mov	r9,rdx
2252	and	rdx,-16
2253
2254	movups	xmm1,XMMWORD[16+r10*1+rcx]
2255
2256	movdqa	xmm8,XMMWORD[$L$xts_magic]
2257	movdqa	xmm15,xmm2
2258	pshufd	xmm9,xmm2,0x5f
2259	pxor	xmm1,xmm0
2260	movdqa	xmm14,xmm9
2261	paddd	xmm9,xmm9
2262	movdqa	xmm10,xmm15
2263	psrad	xmm14,31
2264	paddq	xmm15,xmm15
2265	pand	xmm14,xmm8
2266	pxor	xmm10,xmm0
2267	pxor	xmm15,xmm14
2268	movdqa	xmm14,xmm9
2269	paddd	xmm9,xmm9
2270	movdqa	xmm11,xmm15
2271	psrad	xmm14,31
2272	paddq	xmm15,xmm15
2273	pand	xmm14,xmm8
2274	pxor	xmm11,xmm0
2275	pxor	xmm15,xmm14
2276	movdqa	xmm14,xmm9
2277	paddd	xmm9,xmm9
2278	movdqa	xmm12,xmm15
2279	psrad	xmm14,31
2280	paddq	xmm15,xmm15
2281	pand	xmm14,xmm8
2282	pxor	xmm12,xmm0
2283	pxor	xmm15,xmm14
2284	movdqa	xmm14,xmm9
2285	paddd	xmm9,xmm9
2286	movdqa	xmm13,xmm15
2287	psrad	xmm14,31
2288	paddq	xmm15,xmm15
2289	pand	xmm14,xmm8
2290	pxor	xmm13,xmm0
2291	pxor	xmm15,xmm14
2292	movdqa	xmm14,xmm15
2293	psrad	xmm9,31
2294	paddq	xmm15,xmm15
2295	pand	xmm9,xmm8
2296	pxor	xmm14,xmm0
2297	pxor	xmm15,xmm9
2298	movaps	XMMWORD[96+rsp],xmm1
2299
2300	sub	rdx,16*6
2301	jc	NEAR $L$xts_dec_short
2302
2303	mov	eax,16+96
2304	lea	rcx,[32+r10*1+r11]
2305	sub	rax,r10
2306	movups	xmm1,XMMWORD[16+r11]
2307	mov	r10,rax
2308	lea	r8,[$L$xts_magic]
2309	jmp	NEAR $L$xts_dec_grandloop
2310
2311ALIGN	32
2312$L$xts_dec_grandloop:
2313	movdqu	xmm2,XMMWORD[rdi]
2314	movdqa	xmm8,xmm0
2315	movdqu	xmm3,XMMWORD[16+rdi]
2316	pxor	xmm2,xmm10
2317	movdqu	xmm4,XMMWORD[32+rdi]
2318	pxor	xmm3,xmm11
2319DB	102,15,56,222,209
2320	movdqu	xmm5,XMMWORD[48+rdi]
2321	pxor	xmm4,xmm12
2322DB	102,15,56,222,217
2323	movdqu	xmm6,XMMWORD[64+rdi]
2324	pxor	xmm5,xmm13
2325DB	102,15,56,222,225
2326	movdqu	xmm7,XMMWORD[80+rdi]
2327	pxor	xmm8,xmm15
2328	movdqa	xmm9,XMMWORD[96+rsp]
2329	pxor	xmm6,xmm14
2330DB	102,15,56,222,233
2331	movups	xmm0,XMMWORD[32+r11]
2332	lea	rdi,[96+rdi]
2333	pxor	xmm7,xmm8
2334
2335	pxor	xmm10,xmm9
2336DB	102,15,56,222,241
2337	pxor	xmm11,xmm9
2338	movdqa	XMMWORD[rsp],xmm10
2339DB	102,15,56,222,249
2340	movups	xmm1,XMMWORD[48+r11]
2341	pxor	xmm12,xmm9
2342
2343DB	102,15,56,222,208
2344	pxor	xmm13,xmm9
2345	movdqa	XMMWORD[16+rsp],xmm11
2346DB	102,15,56,222,216
2347	pxor	xmm14,xmm9
2348	movdqa	XMMWORD[32+rsp],xmm12
2349DB	102,15,56,222,224
2350DB	102,15,56,222,232
2351	pxor	xmm8,xmm9
2352	movdqa	XMMWORD[64+rsp],xmm14
2353DB	102,15,56,222,240
2354DB	102,15,56,222,248
2355	movups	xmm0,XMMWORD[64+r11]
2356	movdqa	XMMWORD[80+rsp],xmm8
2357	pshufd	xmm9,xmm15,0x5f
2358	jmp	NEAR $L$xts_dec_loop6
2359ALIGN	32
2360$L$xts_dec_loop6:
2361DB	102,15,56,222,209
2362DB	102,15,56,222,217
2363DB	102,15,56,222,225
2364DB	102,15,56,222,233
2365DB	102,15,56,222,241
2366DB	102,15,56,222,249
2367	movups	xmm1,XMMWORD[((-64))+rax*1+rcx]
2368	add	rax,32
2369
2370DB	102,15,56,222,208
2371DB	102,15,56,222,216
2372DB	102,15,56,222,224
2373DB	102,15,56,222,232
2374DB	102,15,56,222,240
2375DB	102,15,56,222,248
2376	movups	xmm0,XMMWORD[((-80))+rax*1+rcx]
2377	jnz	NEAR $L$xts_dec_loop6
2378
2379	movdqa	xmm8,XMMWORD[r8]
2380	movdqa	xmm14,xmm9
2381	paddd	xmm9,xmm9
2382DB	102,15,56,222,209
2383	paddq	xmm15,xmm15
2384	psrad	xmm14,31
2385DB	102,15,56,222,217
2386	pand	xmm14,xmm8
2387	movups	xmm10,XMMWORD[r11]
2388DB	102,15,56,222,225
2389DB	102,15,56,222,233
2390DB	102,15,56,222,241
2391	pxor	xmm15,xmm14
2392	movaps	xmm11,xmm10
2393DB	102,15,56,222,249
2394	movups	xmm1,XMMWORD[((-64))+rcx]
2395
2396	movdqa	xmm14,xmm9
2397DB	102,15,56,222,208
2398	paddd	xmm9,xmm9
2399	pxor	xmm10,xmm15
2400DB	102,15,56,222,216
2401	psrad	xmm14,31
2402	paddq	xmm15,xmm15
2403DB	102,15,56,222,224
2404DB	102,15,56,222,232
2405	pand	xmm14,xmm8
2406	movaps	xmm12,xmm11
2407DB	102,15,56,222,240
2408	pxor	xmm15,xmm14
2409	movdqa	xmm14,xmm9
2410DB	102,15,56,222,248
2411	movups	xmm0,XMMWORD[((-48))+rcx]
2412
2413	paddd	xmm9,xmm9
2414DB	102,15,56,222,209
2415	pxor	xmm11,xmm15
2416	psrad	xmm14,31
2417DB	102,15,56,222,217
2418	paddq	xmm15,xmm15
2419	pand	xmm14,xmm8
2420DB	102,15,56,222,225
2421DB	102,15,56,222,233
2422	movdqa	XMMWORD[48+rsp],xmm13
2423	pxor	xmm15,xmm14
2424DB	102,15,56,222,241
2425	movaps	xmm13,xmm12
2426	movdqa	xmm14,xmm9
2427DB	102,15,56,222,249
2428	movups	xmm1,XMMWORD[((-32))+rcx]
2429
2430	paddd	xmm9,xmm9
2431DB	102,15,56,222,208
2432	pxor	xmm12,xmm15
2433	psrad	xmm14,31
2434DB	102,15,56,222,216
2435	paddq	xmm15,xmm15
2436	pand	xmm14,xmm8
2437DB	102,15,56,222,224
2438DB	102,15,56,222,232
2439DB	102,15,56,222,240
2440	pxor	xmm15,xmm14
2441	movaps	xmm14,xmm13
2442DB	102,15,56,222,248
2443
2444	movdqa	xmm0,xmm9
2445	paddd	xmm9,xmm9
2446DB	102,15,56,222,209
2447	pxor	xmm13,xmm15
2448	psrad	xmm0,31
2449DB	102,15,56,222,217
2450	paddq	xmm15,xmm15
2451	pand	xmm0,xmm8
2452DB	102,15,56,222,225
2453DB	102,15,56,222,233
2454	pxor	xmm15,xmm0
2455	movups	xmm0,XMMWORD[r11]
2456DB	102,15,56,222,241
2457DB	102,15,56,222,249
2458	movups	xmm1,XMMWORD[16+r11]
2459
2460	pxor	xmm14,xmm15
2461DB	102,15,56,223,84,36,0
2462	psrad	xmm9,31
2463	paddq	xmm15,xmm15
2464DB	102,15,56,223,92,36,16
2465DB	102,15,56,223,100,36,32
2466	pand	xmm9,xmm8
2467	mov	rax,r10
2468DB	102,15,56,223,108,36,48
2469DB	102,15,56,223,116,36,64
2470DB	102,15,56,223,124,36,80
2471	pxor	xmm15,xmm9
2472
2473	lea	rsi,[96+rsi]
2474	movups	XMMWORD[(-96)+rsi],xmm2
2475	movups	XMMWORD[(-80)+rsi],xmm3
2476	movups	XMMWORD[(-64)+rsi],xmm4
2477	movups	XMMWORD[(-48)+rsi],xmm5
2478	movups	XMMWORD[(-32)+rsi],xmm6
2479	movups	XMMWORD[(-16)+rsi],xmm7
2480	sub	rdx,16*6
2481	jnc	NEAR $L$xts_dec_grandloop
2482
2483	mov	eax,16+96
2484	sub	eax,r10d
2485	mov	rcx,r11
2486	shr	eax,4
2487
2488$L$xts_dec_short:
2489
2490	mov	r10d,eax
2491	pxor	xmm10,xmm0
2492	pxor	xmm11,xmm0
2493	add	rdx,16*6
2494	jz	NEAR $L$xts_dec_done
2495
2496	pxor	xmm12,xmm0
2497	cmp	rdx,0x20
2498	jb	NEAR $L$xts_dec_one
2499	pxor	xmm13,xmm0
2500	je	NEAR $L$xts_dec_two
2501
2502	pxor	xmm14,xmm0
2503	cmp	rdx,0x40
2504	jb	NEAR $L$xts_dec_three
2505	je	NEAR $L$xts_dec_four
2506
2507	movdqu	xmm2,XMMWORD[rdi]
2508	movdqu	xmm3,XMMWORD[16+rdi]
2509	movdqu	xmm4,XMMWORD[32+rdi]
2510	pxor	xmm2,xmm10
2511	movdqu	xmm5,XMMWORD[48+rdi]
2512	pxor	xmm3,xmm11
2513	movdqu	xmm6,XMMWORD[64+rdi]
2514	lea	rdi,[80+rdi]
2515	pxor	xmm4,xmm12
2516	pxor	xmm5,xmm13
2517	pxor	xmm6,xmm14
2518
2519	call	_aesni_decrypt6
2520
2521	xorps	xmm2,xmm10
2522	xorps	xmm3,xmm11
2523	xorps	xmm4,xmm12
2524	movdqu	XMMWORD[rsi],xmm2
2525	xorps	xmm5,xmm13
2526	movdqu	XMMWORD[16+rsi],xmm3
2527	xorps	xmm6,xmm14
2528	movdqu	XMMWORD[32+rsi],xmm4
2529	pxor	xmm14,xmm14
2530	movdqu	XMMWORD[48+rsi],xmm5
2531	pcmpgtd	xmm14,xmm15
2532	movdqu	XMMWORD[64+rsi],xmm6
2533	lea	rsi,[80+rsi]
2534	pshufd	xmm11,xmm14,0x13
2535	and	r9,15
2536	jz	NEAR $L$xts_dec_ret
2537
2538	movdqa	xmm10,xmm15
2539	paddq	xmm15,xmm15
2540	pand	xmm11,xmm8
2541	pxor	xmm11,xmm15
2542	jmp	NEAR $L$xts_dec_done2
2543
2544ALIGN	16
2545$L$xts_dec_one:
2546	movups	xmm2,XMMWORD[rdi]
2547	lea	rdi,[16+rdi]
2548	xorps	xmm2,xmm10
2549	movups	xmm0,XMMWORD[rcx]
2550	movups	xmm1,XMMWORD[16+rcx]
2551	lea	rcx,[32+rcx]
2552	xorps	xmm2,xmm0
2553$L$oop_dec1_12:
2554DB	102,15,56,222,209
2555	dec	eax
2556	movups	xmm1,XMMWORD[rcx]
2557	lea	rcx,[16+rcx]
2558	jnz	NEAR $L$oop_dec1_12
2559DB	102,15,56,223,209
2560	xorps	xmm2,xmm10
2561	movdqa	xmm10,xmm11
2562	movups	XMMWORD[rsi],xmm2
2563	movdqa	xmm11,xmm12
2564	lea	rsi,[16+rsi]
2565	jmp	NEAR $L$xts_dec_done
2566
2567ALIGN	16
2568$L$xts_dec_two:
2569	movups	xmm2,XMMWORD[rdi]
2570	movups	xmm3,XMMWORD[16+rdi]
2571	lea	rdi,[32+rdi]
2572	xorps	xmm2,xmm10
2573	xorps	xmm3,xmm11
2574
2575	call	_aesni_decrypt2
2576
2577	xorps	xmm2,xmm10
2578	movdqa	xmm10,xmm12
2579	xorps	xmm3,xmm11
2580	movdqa	xmm11,xmm13
2581	movups	XMMWORD[rsi],xmm2
2582	movups	XMMWORD[16+rsi],xmm3
2583	lea	rsi,[32+rsi]
2584	jmp	NEAR $L$xts_dec_done
2585
2586ALIGN	16
2587$L$xts_dec_three:
2588	movups	xmm2,XMMWORD[rdi]
2589	movups	xmm3,XMMWORD[16+rdi]
2590	movups	xmm4,XMMWORD[32+rdi]
2591	lea	rdi,[48+rdi]
2592	xorps	xmm2,xmm10
2593	xorps	xmm3,xmm11
2594	xorps	xmm4,xmm12
2595
2596	call	_aesni_decrypt3
2597
2598	xorps	xmm2,xmm10
2599	movdqa	xmm10,xmm13
2600	xorps	xmm3,xmm11
2601	movdqa	xmm11,xmm14
2602	xorps	xmm4,xmm12
2603	movups	XMMWORD[rsi],xmm2
2604	movups	XMMWORD[16+rsi],xmm3
2605	movups	XMMWORD[32+rsi],xmm4
2606	lea	rsi,[48+rsi]
2607	jmp	NEAR $L$xts_dec_done
2608
2609ALIGN	16
2610$L$xts_dec_four:
2611	movups	xmm2,XMMWORD[rdi]
2612	movups	xmm3,XMMWORD[16+rdi]
2613	movups	xmm4,XMMWORD[32+rdi]
2614	xorps	xmm2,xmm10
2615	movups	xmm5,XMMWORD[48+rdi]
2616	lea	rdi,[64+rdi]
2617	xorps	xmm3,xmm11
2618	xorps	xmm4,xmm12
2619	xorps	xmm5,xmm13
2620
2621	call	_aesni_decrypt4
2622
2623	pxor	xmm2,xmm10
2624	movdqa	xmm10,xmm14
2625	pxor	xmm3,xmm11
2626	movdqa	xmm11,xmm15
2627	pxor	xmm4,xmm12
2628	movdqu	XMMWORD[rsi],xmm2
2629	pxor	xmm5,xmm13
2630	movdqu	XMMWORD[16+rsi],xmm3
2631	movdqu	XMMWORD[32+rsi],xmm4
2632	movdqu	XMMWORD[48+rsi],xmm5
2633	lea	rsi,[64+rsi]
2634	jmp	NEAR $L$xts_dec_done
2635
2636ALIGN	16
2637$L$xts_dec_done:
2638	and	r9,15
2639	jz	NEAR $L$xts_dec_ret
2640$L$xts_dec_done2:
2641	mov	rdx,r9
2642	mov	rcx,r11
2643	mov	eax,r10d
2644
2645	movups	xmm2,XMMWORD[rdi]
2646	xorps	xmm2,xmm11
2647	movups	xmm0,XMMWORD[rcx]
2648	movups	xmm1,XMMWORD[16+rcx]
2649	lea	rcx,[32+rcx]
2650	xorps	xmm2,xmm0
2651$L$oop_dec1_13:
2652DB	102,15,56,222,209
2653	dec	eax
2654	movups	xmm1,XMMWORD[rcx]
2655	lea	rcx,[16+rcx]
2656	jnz	NEAR $L$oop_dec1_13
2657DB	102,15,56,223,209
2658	xorps	xmm2,xmm11
2659	movups	XMMWORD[rsi],xmm2
2660
2661$L$xts_dec_steal:
2662	movzx	eax,BYTE[16+rdi]
2663	movzx	ecx,BYTE[rsi]
2664	lea	rdi,[1+rdi]
2665	mov	BYTE[rsi],al
2666	mov	BYTE[16+rsi],cl
2667	lea	rsi,[1+rsi]
2668	sub	rdx,1
2669	jnz	NEAR $L$xts_dec_steal
2670
2671	sub	rsi,r9
2672	mov	rcx,r11
2673	mov	eax,r10d
2674
2675	movups	xmm2,XMMWORD[rsi]
2676	xorps	xmm2,xmm10
2677	movups	xmm0,XMMWORD[rcx]
2678	movups	xmm1,XMMWORD[16+rcx]
2679	lea	rcx,[32+rcx]
2680	xorps	xmm2,xmm0
2681$L$oop_dec1_14:
2682DB	102,15,56,222,209
2683	dec	eax
2684	movups	xmm1,XMMWORD[rcx]
2685	lea	rcx,[16+rcx]
2686	jnz	NEAR $L$oop_dec1_14
2687DB	102,15,56,223,209
2688	xorps	xmm2,xmm10
2689	movups	XMMWORD[rsi],xmm2
2690
2691$L$xts_dec_ret:
2692	xorps	xmm0,xmm0
2693	pxor	xmm1,xmm1
2694	pxor	xmm2,xmm2
2695	pxor	xmm3,xmm3
2696	pxor	xmm4,xmm4
2697	pxor	xmm5,xmm5
2698	movaps	xmm6,XMMWORD[((-160))+rbp]
2699	movaps	XMMWORD[(-160)+rbp],xmm0
2700	movaps	xmm7,XMMWORD[((-144))+rbp]
2701	movaps	XMMWORD[(-144)+rbp],xmm0
2702	movaps	xmm8,XMMWORD[((-128))+rbp]
2703	movaps	XMMWORD[(-128)+rbp],xmm0
2704	movaps	xmm9,XMMWORD[((-112))+rbp]
2705	movaps	XMMWORD[(-112)+rbp],xmm0
2706	movaps	xmm10,XMMWORD[((-96))+rbp]
2707	movaps	XMMWORD[(-96)+rbp],xmm0
2708	movaps	xmm11,XMMWORD[((-80))+rbp]
2709	movaps	XMMWORD[(-80)+rbp],xmm0
2710	movaps	xmm12,XMMWORD[((-64))+rbp]
2711	movaps	XMMWORD[(-64)+rbp],xmm0
2712	movaps	xmm13,XMMWORD[((-48))+rbp]
2713	movaps	XMMWORD[(-48)+rbp],xmm0
2714	movaps	xmm14,XMMWORD[((-32))+rbp]
2715	movaps	XMMWORD[(-32)+rbp],xmm0
2716	movaps	xmm15,XMMWORD[((-16))+rbp]
2717	movaps	XMMWORD[(-16)+rbp],xmm0
2718	movaps	XMMWORD[rsp],xmm0
2719	movaps	XMMWORD[16+rsp],xmm0
2720	movaps	XMMWORD[32+rsp],xmm0
2721	movaps	XMMWORD[48+rsp],xmm0
2722	movaps	XMMWORD[64+rsp],xmm0
2723	movaps	XMMWORD[80+rsp],xmm0
2724	movaps	XMMWORD[96+rsp],xmm0
2725	lea	rsp,[rbp]
2726	pop	rbp
2727$L$xts_dec_epilogue:
2728	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
2729	mov	rsi,QWORD[16+rsp]
2730	DB	0F3h,0C3h		;repret
2731$L$SEH_end_aesni_xts_decrypt:
2732global	aesni_cbc_encrypt
2733
2734ALIGN	16
2735aesni_cbc_encrypt:
2736	mov	QWORD[8+rsp],rdi	;WIN64 prologue
2737	mov	QWORD[16+rsp],rsi
2738	mov	rax,rsp
2739$L$SEH_begin_aesni_cbc_encrypt:
2740	mov	rdi,rcx
2741	mov	rsi,rdx
2742	mov	rdx,r8
2743	mov	rcx,r9
2744	mov	r8,QWORD[40+rsp]
2745	mov	r9,QWORD[48+rsp]
2746
2747
2748	test	rdx,rdx
2749	jz	NEAR $L$cbc_ret
2750
2751	mov	r10d,DWORD[240+rcx]
2752	mov	r11,rcx
2753	test	r9d,r9d
2754	jz	NEAR $L$cbc_decrypt
2755
2756	movups	xmm2,XMMWORD[r8]
2757	mov	eax,r10d
2758	cmp	rdx,16
2759	jb	NEAR $L$cbc_enc_tail
2760	sub	rdx,16
2761	jmp	NEAR $L$cbc_enc_loop
2762ALIGN	16
2763$L$cbc_enc_loop:
2764	movups	xmm3,XMMWORD[rdi]
2765	lea	rdi,[16+rdi]
2766
2767	movups	xmm0,XMMWORD[rcx]
2768	movups	xmm1,XMMWORD[16+rcx]
2769	xorps	xmm3,xmm0
2770	lea	rcx,[32+rcx]
2771	xorps	xmm2,xmm3
2772$L$oop_enc1_15:
2773DB	102,15,56,220,209
2774	dec	eax
2775	movups	xmm1,XMMWORD[rcx]
2776	lea	rcx,[16+rcx]
2777	jnz	NEAR $L$oop_enc1_15
2778DB	102,15,56,221,209
2779	mov	eax,r10d
2780	mov	rcx,r11
2781	movups	XMMWORD[rsi],xmm2
2782	lea	rsi,[16+rsi]
2783	sub	rdx,16
2784	jnc	NEAR $L$cbc_enc_loop
2785	add	rdx,16
2786	jnz	NEAR $L$cbc_enc_tail
2787	pxor	xmm0,xmm0
2788	pxor	xmm1,xmm1
2789	movups	XMMWORD[r8],xmm2
2790	pxor	xmm2,xmm2
2791	pxor	xmm3,xmm3
2792	jmp	NEAR $L$cbc_ret
2793
2794$L$cbc_enc_tail:
2795	mov	rcx,rdx
2796	xchg	rsi,rdi
2797	DD	0x9066A4F3
2798	mov	ecx,16
2799	sub	rcx,rdx
2800	xor	eax,eax
2801	DD	0x9066AAF3
2802	lea	rdi,[((-16))+rdi]
2803	mov	eax,r10d
2804	mov	rsi,rdi
2805	mov	rcx,r11
2806	xor	rdx,rdx
2807	jmp	NEAR $L$cbc_enc_loop
2808
2809ALIGN	16
2810$L$cbc_decrypt:
2811	cmp	rdx,16
2812	jne	NEAR $L$cbc_decrypt_bulk
2813
2814
2815
2816	movdqu	xmm2,XMMWORD[rdi]
2817	movdqu	xmm3,XMMWORD[r8]
2818	movdqa	xmm4,xmm2
2819	movups	xmm0,XMMWORD[rcx]
2820	movups	xmm1,XMMWORD[16+rcx]
2821	lea	rcx,[32+rcx]
2822	xorps	xmm2,xmm0
2823$L$oop_dec1_16:
2824DB	102,15,56,222,209
2825	dec	r10d
2826	movups	xmm1,XMMWORD[rcx]
2827	lea	rcx,[16+rcx]
2828	jnz	NEAR $L$oop_dec1_16
2829DB	102,15,56,223,209
2830	pxor	xmm0,xmm0
2831	pxor	xmm1,xmm1
2832	movdqu	XMMWORD[r8],xmm4
2833	xorps	xmm2,xmm3
2834	pxor	xmm3,xmm3
2835	movups	XMMWORD[rsi],xmm2
2836	pxor	xmm2,xmm2
2837	jmp	NEAR $L$cbc_ret
2838ALIGN	16
2839$L$cbc_decrypt_bulk:
2840	lea	rax,[rsp]
2841	push	rbp
2842	sub	rsp,176
2843	and	rsp,-16
2844	movaps	XMMWORD[16+rsp],xmm6
2845	movaps	XMMWORD[32+rsp],xmm7
2846	movaps	XMMWORD[48+rsp],xmm8
2847	movaps	XMMWORD[64+rsp],xmm9
2848	movaps	XMMWORD[80+rsp],xmm10
2849	movaps	XMMWORD[96+rsp],xmm11
2850	movaps	XMMWORD[112+rsp],xmm12
2851	movaps	XMMWORD[128+rsp],xmm13
2852	movaps	XMMWORD[144+rsp],xmm14
2853	movaps	XMMWORD[160+rsp],xmm15
2854$L$cbc_decrypt_body:
2855	lea	rbp,[((-8))+rax]
2856	movups	xmm10,XMMWORD[r8]
2857	mov	eax,r10d
2858	cmp	rdx,0x50
2859	jbe	NEAR $L$cbc_dec_tail
2860
2861	movups	xmm0,XMMWORD[rcx]
2862	movdqu	xmm2,XMMWORD[rdi]
2863	movdqu	xmm3,XMMWORD[16+rdi]
2864	movdqa	xmm11,xmm2
2865	movdqu	xmm4,XMMWORD[32+rdi]
2866	movdqa	xmm12,xmm3
2867	movdqu	xmm5,XMMWORD[48+rdi]
2868	movdqa	xmm13,xmm4
2869	movdqu	xmm6,XMMWORD[64+rdi]
2870	movdqa	xmm14,xmm5
2871	movdqu	xmm7,XMMWORD[80+rdi]
2872	movdqa	xmm15,xmm6
2873	mov	r9d,DWORD[((OPENSSL_ia32cap_P+4))]
2874	cmp	rdx,0x70
2875	jbe	NEAR $L$cbc_dec_six_or_seven
2876
2877	and	r9d,71303168
2878	sub	rdx,0x50
2879	cmp	r9d,4194304
2880	je	NEAR $L$cbc_dec_loop6_enter
2881	sub	rdx,0x20
2882	lea	rcx,[112+rcx]
2883	jmp	NEAR $L$cbc_dec_loop8_enter
2884ALIGN	16
2885$L$cbc_dec_loop8:
2886	movups	XMMWORD[rsi],xmm9
2887	lea	rsi,[16+rsi]
2888$L$cbc_dec_loop8_enter:
2889	movdqu	xmm8,XMMWORD[96+rdi]
2890	pxor	xmm2,xmm0
2891	movdqu	xmm9,XMMWORD[112+rdi]
2892	pxor	xmm3,xmm0
2893	movups	xmm1,XMMWORD[((16-112))+rcx]
2894	pxor	xmm4,xmm0
2895	xor	r11,r11
2896	cmp	rdx,0x70
2897	pxor	xmm5,xmm0
2898	pxor	xmm6,xmm0
2899	pxor	xmm7,xmm0
2900	pxor	xmm8,xmm0
2901
2902DB	102,15,56,222,209
2903	pxor	xmm9,xmm0
2904	movups	xmm0,XMMWORD[((32-112))+rcx]
2905DB	102,15,56,222,217
2906DB	102,15,56,222,225
2907DB	102,15,56,222,233
2908DB	102,15,56,222,241
2909DB	102,15,56,222,249
2910DB	102,68,15,56,222,193
2911	setnc	r11b
2912	shl	r11,7
2913DB	102,68,15,56,222,201
2914	add	r11,rdi
2915	movups	xmm1,XMMWORD[((48-112))+rcx]
2916DB	102,15,56,222,208
2917DB	102,15,56,222,216
2918DB	102,15,56,222,224
2919DB	102,15,56,222,232
2920DB	102,15,56,222,240
2921DB	102,15,56,222,248
2922DB	102,68,15,56,222,192
2923DB	102,68,15,56,222,200
2924	movups	xmm0,XMMWORD[((64-112))+rcx]
2925	nop
2926DB	102,15,56,222,209
2927DB	102,15,56,222,217
2928DB	102,15,56,222,225
2929DB	102,15,56,222,233
2930DB	102,15,56,222,241
2931DB	102,15,56,222,249
2932DB	102,68,15,56,222,193
2933DB	102,68,15,56,222,201
2934	movups	xmm1,XMMWORD[((80-112))+rcx]
2935	nop
2936DB	102,15,56,222,208
2937DB	102,15,56,222,216
2938DB	102,15,56,222,224
2939DB	102,15,56,222,232
2940DB	102,15,56,222,240
2941DB	102,15,56,222,248
2942DB	102,68,15,56,222,192
2943DB	102,68,15,56,222,200
2944	movups	xmm0,XMMWORD[((96-112))+rcx]
2945	nop
2946DB	102,15,56,222,209
2947DB	102,15,56,222,217
2948DB	102,15,56,222,225
2949DB	102,15,56,222,233
2950DB	102,15,56,222,241
2951DB	102,15,56,222,249
2952DB	102,68,15,56,222,193
2953DB	102,68,15,56,222,201
2954	movups	xmm1,XMMWORD[((112-112))+rcx]
2955	nop
2956DB	102,15,56,222,208
2957DB	102,15,56,222,216
2958DB	102,15,56,222,224
2959DB	102,15,56,222,232
2960DB	102,15,56,222,240
2961DB	102,15,56,222,248
2962DB	102,68,15,56,222,192
2963DB	102,68,15,56,222,200
2964	movups	xmm0,XMMWORD[((128-112))+rcx]
2965	nop
2966DB	102,15,56,222,209
2967DB	102,15,56,222,217
2968DB	102,15,56,222,225
2969DB	102,15,56,222,233
2970DB	102,15,56,222,241
2971DB	102,15,56,222,249
2972DB	102,68,15,56,222,193
2973DB	102,68,15,56,222,201
2974	movups	xmm1,XMMWORD[((144-112))+rcx]
2975	cmp	eax,11
2976DB	102,15,56,222,208
2977DB	102,15,56,222,216
2978DB	102,15,56,222,224
2979DB	102,15,56,222,232
2980DB	102,15,56,222,240
2981DB	102,15,56,222,248
2982DB	102,68,15,56,222,192
2983DB	102,68,15,56,222,200
2984	movups	xmm0,XMMWORD[((160-112))+rcx]
2985	jb	NEAR $L$cbc_dec_done
2986DB	102,15,56,222,209
2987DB	102,15,56,222,217
2988DB	102,15,56,222,225
2989DB	102,15,56,222,233
2990DB	102,15,56,222,241
2991DB	102,15,56,222,249
2992DB	102,68,15,56,222,193
2993DB	102,68,15,56,222,201
2994	movups	xmm1,XMMWORD[((176-112))+rcx]
2995	nop
2996DB	102,15,56,222,208
2997DB	102,15,56,222,216
2998DB	102,15,56,222,224
2999DB	102,15,56,222,232
3000DB	102,15,56,222,240
3001DB	102,15,56,222,248
3002DB	102,68,15,56,222,192
3003DB	102,68,15,56,222,200
3004	movups	xmm0,XMMWORD[((192-112))+rcx]
3005	je	NEAR $L$cbc_dec_done
3006DB	102,15,56,222,209
3007DB	102,15,56,222,217
3008DB	102,15,56,222,225
3009DB	102,15,56,222,233
3010DB	102,15,56,222,241
3011DB	102,15,56,222,249
3012DB	102,68,15,56,222,193
3013DB	102,68,15,56,222,201
3014	movups	xmm1,XMMWORD[((208-112))+rcx]
3015	nop
3016DB	102,15,56,222,208
3017DB	102,15,56,222,216
3018DB	102,15,56,222,224
3019DB	102,15,56,222,232
3020DB	102,15,56,222,240
3021DB	102,15,56,222,248
3022DB	102,68,15,56,222,192
3023DB	102,68,15,56,222,200
3024	movups	xmm0,XMMWORD[((224-112))+rcx]
3025	jmp	NEAR $L$cbc_dec_done
3026ALIGN	16
3027$L$cbc_dec_done:
3028DB	102,15,56,222,209
3029DB	102,15,56,222,217
3030	pxor	xmm10,xmm0
3031	pxor	xmm11,xmm0
3032DB	102,15,56,222,225
3033DB	102,15,56,222,233
3034	pxor	xmm12,xmm0
3035	pxor	xmm13,xmm0
3036DB	102,15,56,222,241
3037DB	102,15,56,222,249
3038	pxor	xmm14,xmm0
3039	pxor	xmm15,xmm0
3040DB	102,68,15,56,222,193
3041DB	102,68,15,56,222,201
3042	movdqu	xmm1,XMMWORD[80+rdi]
3043
3044DB	102,65,15,56,223,210
3045	movdqu	xmm10,XMMWORD[96+rdi]
3046	pxor	xmm1,xmm0
3047DB	102,65,15,56,223,219
3048	pxor	xmm10,xmm0
3049	movdqu	xmm0,XMMWORD[112+rdi]
3050DB	102,65,15,56,223,228
3051	lea	rdi,[128+rdi]
3052	movdqu	xmm11,XMMWORD[r11]
3053DB	102,65,15,56,223,237
3054DB	102,65,15,56,223,246
3055	movdqu	xmm12,XMMWORD[16+r11]
3056	movdqu	xmm13,XMMWORD[32+r11]
3057DB	102,65,15,56,223,255
3058DB	102,68,15,56,223,193
3059	movdqu	xmm14,XMMWORD[48+r11]
3060	movdqu	xmm15,XMMWORD[64+r11]
3061DB	102,69,15,56,223,202
3062	movdqa	xmm10,xmm0
3063	movdqu	xmm1,XMMWORD[80+r11]
3064	movups	xmm0,XMMWORD[((-112))+rcx]
3065
3066	movups	XMMWORD[rsi],xmm2
3067	movdqa	xmm2,xmm11
3068	movups	XMMWORD[16+rsi],xmm3
3069	movdqa	xmm3,xmm12
3070	movups	XMMWORD[32+rsi],xmm4
3071	movdqa	xmm4,xmm13
3072	movups	XMMWORD[48+rsi],xmm5
3073	movdqa	xmm5,xmm14
3074	movups	XMMWORD[64+rsi],xmm6
3075	movdqa	xmm6,xmm15
3076	movups	XMMWORD[80+rsi],xmm7
3077	movdqa	xmm7,xmm1
3078	movups	XMMWORD[96+rsi],xmm8
3079	lea	rsi,[112+rsi]
3080
3081	sub	rdx,0x80
3082	ja	NEAR $L$cbc_dec_loop8
3083
3084	movaps	xmm2,xmm9
3085	lea	rcx,[((-112))+rcx]
3086	add	rdx,0x70
3087	jle	NEAR $L$cbc_dec_clear_tail_collected
3088	movups	XMMWORD[rsi],xmm9
3089	lea	rsi,[16+rsi]
3090	cmp	rdx,0x50
3091	jbe	NEAR $L$cbc_dec_tail
3092
3093	movaps	xmm2,xmm11
3094$L$cbc_dec_six_or_seven:
3095	cmp	rdx,0x60
3096	ja	NEAR $L$cbc_dec_seven
3097
3098	movaps	xmm8,xmm7
3099	call	_aesni_decrypt6
3100	pxor	xmm2,xmm10
3101	movaps	xmm10,xmm8
3102	pxor	xmm3,xmm11
3103	movdqu	XMMWORD[rsi],xmm2
3104	pxor	xmm4,xmm12
3105	movdqu	XMMWORD[16+rsi],xmm3
3106	pxor	xmm3,xmm3
3107	pxor	xmm5,xmm13
3108	movdqu	XMMWORD[32+rsi],xmm4
3109	pxor	xmm4,xmm4
3110	pxor	xmm6,xmm14
3111	movdqu	XMMWORD[48+rsi],xmm5
3112	pxor	xmm5,xmm5
3113	pxor	xmm7,xmm15
3114	movdqu	XMMWORD[64+rsi],xmm6
3115	pxor	xmm6,xmm6
3116	lea	rsi,[80+rsi]
3117	movdqa	xmm2,xmm7
3118	pxor	xmm7,xmm7
3119	jmp	NEAR $L$cbc_dec_tail_collected
3120
3121ALIGN	16
3122$L$cbc_dec_seven:
3123	movups	xmm8,XMMWORD[96+rdi]
3124	xorps	xmm9,xmm9
3125	call	_aesni_decrypt8
3126	movups	xmm9,XMMWORD[80+rdi]
3127	pxor	xmm2,xmm10
3128	movups	xmm10,XMMWORD[96+rdi]
3129	pxor	xmm3,xmm11
3130	movdqu	XMMWORD[rsi],xmm2
3131	pxor	xmm4,xmm12
3132	movdqu	XMMWORD[16+rsi],xmm3
3133	pxor	xmm3,xmm3
3134	pxor	xmm5,xmm13
3135	movdqu	XMMWORD[32+rsi],xmm4
3136	pxor	xmm4,xmm4
3137	pxor	xmm6,xmm14
3138	movdqu	XMMWORD[48+rsi],xmm5
3139	pxor	xmm5,xmm5
3140	pxor	xmm7,xmm15
3141	movdqu	XMMWORD[64+rsi],xmm6
3142	pxor	xmm6,xmm6
3143	pxor	xmm8,xmm9
3144	movdqu	XMMWORD[80+rsi],xmm7
3145	pxor	xmm7,xmm7
3146	lea	rsi,[96+rsi]
3147	movdqa	xmm2,xmm8
3148	pxor	xmm8,xmm8
3149	pxor	xmm9,xmm9
3150	jmp	NEAR $L$cbc_dec_tail_collected
3151
3152ALIGN	16
3153$L$cbc_dec_loop6:
3154	movups	XMMWORD[rsi],xmm7
3155	lea	rsi,[16+rsi]
3156	movdqu	xmm2,XMMWORD[rdi]
3157	movdqu	xmm3,XMMWORD[16+rdi]
3158	movdqa	xmm11,xmm2
3159	movdqu	xmm4,XMMWORD[32+rdi]
3160	movdqa	xmm12,xmm3
3161	movdqu	xmm5,XMMWORD[48+rdi]
3162	movdqa	xmm13,xmm4
3163	movdqu	xmm6,XMMWORD[64+rdi]
3164	movdqa	xmm14,xmm5
3165	movdqu	xmm7,XMMWORD[80+rdi]
3166	movdqa	xmm15,xmm6
3167$L$cbc_dec_loop6_enter:
3168	lea	rdi,[96+rdi]
3169	movdqa	xmm8,xmm7
3170
3171	call	_aesni_decrypt6
3172
3173	pxor	xmm2,xmm10
3174	movdqa	xmm10,xmm8
3175	pxor	xmm3,xmm11
3176	movdqu	XMMWORD[rsi],xmm2
3177	pxor	xmm4,xmm12
3178	movdqu	XMMWORD[16+rsi],xmm3
3179	pxor	xmm5,xmm13
3180	movdqu	XMMWORD[32+rsi],xmm4
3181	pxor	xmm6,xmm14
3182	mov	rcx,r11
3183	movdqu	XMMWORD[48+rsi],xmm5
3184	pxor	xmm7,xmm15
3185	mov	eax,r10d
3186	movdqu	XMMWORD[64+rsi],xmm6
3187	lea	rsi,[80+rsi]
3188	sub	rdx,0x60
3189	ja	NEAR $L$cbc_dec_loop6
3190
3191	movdqa	xmm2,xmm7
3192	add	rdx,0x50
3193	jle	NEAR $L$cbc_dec_clear_tail_collected
3194	movups	XMMWORD[rsi],xmm7
3195	lea	rsi,[16+rsi]
3196
3197$L$cbc_dec_tail:
3198	movups	xmm2,XMMWORD[rdi]
3199	sub	rdx,0x10
3200	jbe	NEAR $L$cbc_dec_one
3201
3202	movups	xmm3,XMMWORD[16+rdi]
3203	movaps	xmm11,xmm2
3204	sub	rdx,0x10
3205	jbe	NEAR $L$cbc_dec_two
3206
3207	movups	xmm4,XMMWORD[32+rdi]
3208	movaps	xmm12,xmm3
3209	sub	rdx,0x10
3210	jbe	NEAR $L$cbc_dec_three
3211
3212	movups	xmm5,XMMWORD[48+rdi]
3213	movaps	xmm13,xmm4
3214	sub	rdx,0x10
3215	jbe	NEAR $L$cbc_dec_four
3216
3217	movups	xmm6,XMMWORD[64+rdi]
3218	movaps	xmm14,xmm5
3219	movaps	xmm15,xmm6
3220	xorps	xmm7,xmm7
3221	call	_aesni_decrypt6
3222	pxor	xmm2,xmm10
3223	movaps	xmm10,xmm15
3224	pxor	xmm3,xmm11
3225	movdqu	XMMWORD[rsi],xmm2
3226	pxor	xmm4,xmm12
3227	movdqu	XMMWORD[16+rsi],xmm3
3228	pxor	xmm3,xmm3
3229	pxor	xmm5,xmm13
3230	movdqu	XMMWORD[32+rsi],xmm4
3231	pxor	xmm4,xmm4
3232	pxor	xmm6,xmm14
3233	movdqu	XMMWORD[48+rsi],xmm5
3234	pxor	xmm5,xmm5
3235	lea	rsi,[64+rsi]
3236	movdqa	xmm2,xmm6
3237	pxor	xmm6,xmm6
3238	pxor	xmm7,xmm7
3239	sub	rdx,0x10
3240	jmp	NEAR $L$cbc_dec_tail_collected
3241
3242ALIGN	16
3243$L$cbc_dec_one:
3244	movaps	xmm11,xmm2
3245	movups	xmm0,XMMWORD[rcx]
3246	movups	xmm1,XMMWORD[16+rcx]
3247	lea	rcx,[32+rcx]
3248	xorps	xmm2,xmm0
3249$L$oop_dec1_17:
3250DB	102,15,56,222,209
3251	dec	eax
3252	movups	xmm1,XMMWORD[rcx]
3253	lea	rcx,[16+rcx]
3254	jnz	NEAR $L$oop_dec1_17
3255DB	102,15,56,223,209
3256	xorps	xmm2,xmm10
3257	movaps	xmm10,xmm11
3258	jmp	NEAR $L$cbc_dec_tail_collected
3259ALIGN	16
3260$L$cbc_dec_two:
3261	movaps	xmm12,xmm3
3262	call	_aesni_decrypt2
3263	pxor	xmm2,xmm10
3264	movaps	xmm10,xmm12
3265	pxor	xmm3,xmm11
3266	movdqu	XMMWORD[rsi],xmm2
3267	movdqa	xmm2,xmm3
3268	pxor	xmm3,xmm3
3269	lea	rsi,[16+rsi]
3270	jmp	NEAR $L$cbc_dec_tail_collected
3271ALIGN	16
3272$L$cbc_dec_three:
3273	movaps	xmm13,xmm4
3274	call	_aesni_decrypt3
3275	pxor	xmm2,xmm10
3276	movaps	xmm10,xmm13
3277	pxor	xmm3,xmm11
3278	movdqu	XMMWORD[rsi],xmm2
3279	pxor	xmm4,xmm12
3280	movdqu	XMMWORD[16+rsi],xmm3
3281	pxor	xmm3,xmm3
3282	movdqa	xmm2,xmm4
3283	pxor	xmm4,xmm4
3284	lea	rsi,[32+rsi]
3285	jmp	NEAR $L$cbc_dec_tail_collected
3286ALIGN	16
3287$L$cbc_dec_four:
3288	movaps	xmm14,xmm5
3289	call	_aesni_decrypt4
3290	pxor	xmm2,xmm10
3291	movaps	xmm10,xmm14
3292	pxor	xmm3,xmm11
3293	movdqu	XMMWORD[rsi],xmm2
3294	pxor	xmm4,xmm12
3295	movdqu	XMMWORD[16+rsi],xmm3
3296	pxor	xmm3,xmm3
3297	pxor	xmm5,xmm13
3298	movdqu	XMMWORD[32+rsi],xmm4
3299	pxor	xmm4,xmm4
3300	movdqa	xmm2,xmm5
3301	pxor	xmm5,xmm5
3302	lea	rsi,[48+rsi]
3303	jmp	NEAR $L$cbc_dec_tail_collected
3304
3305ALIGN	16
3306$L$cbc_dec_clear_tail_collected:
3307	pxor	xmm3,xmm3
3308	pxor	xmm4,xmm4
3309	pxor	xmm5,xmm5
3310$L$cbc_dec_tail_collected:
3311	movups	XMMWORD[r8],xmm10
3312	and	rdx,15
3313	jnz	NEAR $L$cbc_dec_tail_partial
3314	movups	XMMWORD[rsi],xmm2
3315	pxor	xmm2,xmm2
3316	jmp	NEAR $L$cbc_dec_ret
3317ALIGN	16
3318$L$cbc_dec_tail_partial:
3319	movaps	XMMWORD[rsp],xmm2
3320	pxor	xmm2,xmm2
3321	mov	rcx,16
3322	mov	rdi,rsi
3323	sub	rcx,rdx
3324	lea	rsi,[rsp]
3325	DD	0x9066A4F3
3326	movdqa	XMMWORD[rsp],xmm2
3327
3328$L$cbc_dec_ret:
3329	xorps	xmm0,xmm0
3330	pxor	xmm1,xmm1
3331	movaps	xmm6,XMMWORD[16+rsp]
3332	movaps	XMMWORD[16+rsp],xmm0
3333	movaps	xmm7,XMMWORD[32+rsp]
3334	movaps	XMMWORD[32+rsp],xmm0
3335	movaps	xmm8,XMMWORD[48+rsp]
3336	movaps	XMMWORD[48+rsp],xmm0
3337	movaps	xmm9,XMMWORD[64+rsp]
3338	movaps	XMMWORD[64+rsp],xmm0
3339	movaps	xmm10,XMMWORD[80+rsp]
3340	movaps	XMMWORD[80+rsp],xmm0
3341	movaps	xmm11,XMMWORD[96+rsp]
3342	movaps	XMMWORD[96+rsp],xmm0
3343	movaps	xmm12,XMMWORD[112+rsp]
3344	movaps	XMMWORD[112+rsp],xmm0
3345	movaps	xmm13,XMMWORD[128+rsp]
3346	movaps	XMMWORD[128+rsp],xmm0
3347	movaps	xmm14,XMMWORD[144+rsp]
3348	movaps	XMMWORD[144+rsp],xmm0
3349	movaps	xmm15,XMMWORD[160+rsp]
3350	movaps	XMMWORD[160+rsp],xmm0
3351	lea	rsp,[rbp]
3352	pop	rbp
3353$L$cbc_ret:
3354	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
3355	mov	rsi,QWORD[16+rsp]
3356	DB	0F3h,0C3h		;repret
3357$L$SEH_end_aesni_cbc_encrypt:
3358global	aesni_set_decrypt_key
3359
3360ALIGN	16
3361aesni_set_decrypt_key:
3362DB	0x48,0x83,0xEC,0x08
3363	call	__aesni_set_encrypt_key
3364	shl	edx,4
3365	test	eax,eax
3366	jnz	NEAR $L$dec_key_ret
3367	lea	rcx,[16+rdx*1+r8]
3368
3369	movups	xmm0,XMMWORD[r8]
3370	movups	xmm1,XMMWORD[rcx]
3371	movups	XMMWORD[rcx],xmm0
3372	movups	XMMWORD[r8],xmm1
3373	lea	r8,[16+r8]
3374	lea	rcx,[((-16))+rcx]
3375
3376$L$dec_key_inverse:
3377	movups	xmm0,XMMWORD[r8]
3378	movups	xmm1,XMMWORD[rcx]
3379DB	102,15,56,219,192
3380DB	102,15,56,219,201
3381	lea	r8,[16+r8]
3382	lea	rcx,[((-16))+rcx]
3383	movups	XMMWORD[16+rcx],xmm0
3384	movups	XMMWORD[(-16)+r8],xmm1
3385	cmp	rcx,r8
3386	ja	NEAR $L$dec_key_inverse
3387
3388	movups	xmm0,XMMWORD[r8]
3389DB	102,15,56,219,192
3390	pxor	xmm1,xmm1
3391	movups	XMMWORD[rcx],xmm0
3392	pxor	xmm0,xmm0
3393$L$dec_key_ret:
3394	add	rsp,8
3395	DB	0F3h,0C3h		;repret
3396$L$SEH_end_set_decrypt_key:
3397
3398global	aesni_set_encrypt_key
3399
3400ALIGN	16
3401aesni_set_encrypt_key:
3402__aesni_set_encrypt_key:
3403DB	0x48,0x83,0xEC,0x08
3404	mov	rax,-1
3405	test	rcx,rcx
3406	jz	NEAR $L$enc_key_ret
3407	test	r8,r8
3408	jz	NEAR $L$enc_key_ret
3409
3410	mov	r10d,268437504
3411	movups	xmm0,XMMWORD[rcx]
3412	xorps	xmm4,xmm4
3413	and	r10d,DWORD[((OPENSSL_ia32cap_P+4))]
3414	lea	rax,[16+r8]
3415	cmp	edx,256
3416	je	NEAR $L$14rounds
3417	cmp	edx,192
3418	je	NEAR $L$12rounds
3419	cmp	edx,128
3420	jne	NEAR $L$bad_keybits
3421
3422$L$10rounds:
3423	mov	edx,9
3424	cmp	r10d,268435456
3425	je	NEAR $L$10rounds_alt
3426
3427	movups	XMMWORD[r8],xmm0
3428DB	102,15,58,223,200,1
3429	call	$L$key_expansion_128_cold
3430DB	102,15,58,223,200,2
3431	call	$L$key_expansion_128
3432DB	102,15,58,223,200,4
3433	call	$L$key_expansion_128
3434DB	102,15,58,223,200,8
3435	call	$L$key_expansion_128
3436DB	102,15,58,223,200,16
3437	call	$L$key_expansion_128
3438DB	102,15,58,223,200,32
3439	call	$L$key_expansion_128
3440DB	102,15,58,223,200,64
3441	call	$L$key_expansion_128
3442DB	102,15,58,223,200,128
3443	call	$L$key_expansion_128
3444DB	102,15,58,223,200,27
3445	call	$L$key_expansion_128
3446DB	102,15,58,223,200,54
3447	call	$L$key_expansion_128
3448	movups	XMMWORD[rax],xmm0
3449	mov	DWORD[80+rax],edx
3450	xor	eax,eax
3451	jmp	NEAR $L$enc_key_ret
3452
3453ALIGN	16
3454$L$10rounds_alt:
3455	movdqa	xmm5,XMMWORD[$L$key_rotate]
3456	mov	r10d,8
3457	movdqa	xmm4,XMMWORD[$L$key_rcon1]
3458	movdqa	xmm2,xmm0
3459	movdqu	XMMWORD[r8],xmm0
3460	jmp	NEAR $L$oop_key128
3461
3462ALIGN	16
3463$L$oop_key128:
3464DB	102,15,56,0,197
3465DB	102,15,56,221,196
3466	pslld	xmm4,1
3467	lea	rax,[16+rax]
3468
3469	movdqa	xmm3,xmm2
3470	pslldq	xmm2,4
3471	pxor	xmm3,xmm2
3472	pslldq	xmm2,4
3473	pxor	xmm3,xmm2
3474	pslldq	xmm2,4
3475	pxor	xmm2,xmm3
3476
3477	pxor	xmm0,xmm2
3478	movdqu	XMMWORD[(-16)+rax],xmm0
3479	movdqa	xmm2,xmm0
3480
3481	dec	r10d
3482	jnz	NEAR $L$oop_key128
3483
3484	movdqa	xmm4,XMMWORD[$L$key_rcon1b]
3485
3486DB	102,15,56,0,197
3487DB	102,15,56,221,196
3488	pslld	xmm4,1
3489
3490	movdqa	xmm3,xmm2
3491	pslldq	xmm2,4
3492	pxor	xmm3,xmm2
3493	pslldq	xmm2,4
3494	pxor	xmm3,xmm2
3495	pslldq	xmm2,4
3496	pxor	xmm2,xmm3
3497
3498	pxor	xmm0,xmm2
3499	movdqu	XMMWORD[rax],xmm0
3500
3501	movdqa	xmm2,xmm0
3502DB	102,15,56,0,197
3503DB	102,15,56,221,196
3504
3505	movdqa	xmm3,xmm2
3506	pslldq	xmm2,4
3507	pxor	xmm3,xmm2
3508	pslldq	xmm2,4
3509	pxor	xmm3,xmm2
3510	pslldq	xmm2,4
3511	pxor	xmm2,xmm3
3512
3513	pxor	xmm0,xmm2
3514	movdqu	XMMWORD[16+rax],xmm0
3515
3516	mov	DWORD[96+rax],edx
3517	xor	eax,eax
3518	jmp	NEAR $L$enc_key_ret
3519
3520ALIGN	16
3521$L$12rounds:
3522	movq	xmm2,QWORD[16+rcx]
3523	mov	edx,11
3524	cmp	r10d,268435456
3525	je	NEAR $L$12rounds_alt
3526
3527	movups	XMMWORD[r8],xmm0
3528DB	102,15,58,223,202,1
3529	call	$L$key_expansion_192a_cold
3530DB	102,15,58,223,202,2
3531	call	$L$key_expansion_192b
3532DB	102,15,58,223,202,4
3533	call	$L$key_expansion_192a
3534DB	102,15,58,223,202,8
3535	call	$L$key_expansion_192b
3536DB	102,15,58,223,202,16
3537	call	$L$key_expansion_192a
3538DB	102,15,58,223,202,32
3539	call	$L$key_expansion_192b
3540DB	102,15,58,223,202,64
3541	call	$L$key_expansion_192a
3542DB	102,15,58,223,202,128
3543	call	$L$key_expansion_192b
3544	movups	XMMWORD[rax],xmm0
3545	mov	DWORD[48+rax],edx
3546	xor	rax,rax
3547	jmp	NEAR $L$enc_key_ret
3548
3549ALIGN	16
3550$L$12rounds_alt:
3551	movdqa	xmm5,XMMWORD[$L$key_rotate192]
3552	movdqa	xmm4,XMMWORD[$L$key_rcon1]
3553	mov	r10d,8
3554	movdqu	XMMWORD[r8],xmm0
3555	jmp	NEAR $L$oop_key192
3556
3557ALIGN	16
3558$L$oop_key192:
3559	movq	QWORD[rax],xmm2
3560	movdqa	xmm1,xmm2
3561DB	102,15,56,0,213
3562DB	102,15,56,221,212
3563	pslld	xmm4,1
3564	lea	rax,[24+rax]
3565
3566	movdqa	xmm3,xmm0
3567	pslldq	xmm0,4
3568	pxor	xmm3,xmm0
3569	pslldq	xmm0,4
3570	pxor	xmm3,xmm0
3571	pslldq	xmm0,4
3572	pxor	xmm0,xmm3
3573
3574	pshufd	xmm3,xmm0,0xff
3575	pxor	xmm3,xmm1
3576	pslldq	xmm1,4
3577	pxor	xmm3,xmm1
3578
3579	pxor	xmm0,xmm2
3580	pxor	xmm2,xmm3
3581	movdqu	XMMWORD[(-16)+rax],xmm0
3582
3583	dec	r10d
3584	jnz	NEAR $L$oop_key192
3585
3586	mov	DWORD[32+rax],edx
3587	xor	eax,eax
3588	jmp	NEAR $L$enc_key_ret
3589
3590ALIGN	16
3591$L$14rounds:
3592	movups	xmm2,XMMWORD[16+rcx]
3593	mov	edx,13
3594	lea	rax,[16+rax]
3595	cmp	r10d,268435456
3596	je	NEAR $L$14rounds_alt
3597
3598	movups	XMMWORD[r8],xmm0
3599	movups	XMMWORD[16+r8],xmm2
3600DB	102,15,58,223,202,1
3601	call	$L$key_expansion_256a_cold
3602DB	102,15,58,223,200,1
3603	call	$L$key_expansion_256b
3604DB	102,15,58,223,202,2
3605	call	$L$key_expansion_256a
3606DB	102,15,58,223,200,2
3607	call	$L$key_expansion_256b
3608DB	102,15,58,223,202,4
3609	call	$L$key_expansion_256a
3610DB	102,15,58,223,200,4
3611	call	$L$key_expansion_256b
3612DB	102,15,58,223,202,8
3613	call	$L$key_expansion_256a
3614DB	102,15,58,223,200,8
3615	call	$L$key_expansion_256b
3616DB	102,15,58,223,202,16
3617	call	$L$key_expansion_256a
3618DB	102,15,58,223,200,16
3619	call	$L$key_expansion_256b
3620DB	102,15,58,223,202,32
3621	call	$L$key_expansion_256a
3622DB	102,15,58,223,200,32
3623	call	$L$key_expansion_256b
3624DB	102,15,58,223,202,64
3625	call	$L$key_expansion_256a
3626	movups	XMMWORD[rax],xmm0
3627	mov	DWORD[16+rax],edx
3628	xor	rax,rax
3629	jmp	NEAR $L$enc_key_ret
3630
3631ALIGN	16
3632$L$14rounds_alt:
3633	movdqa	xmm5,XMMWORD[$L$key_rotate]
3634	movdqa	xmm4,XMMWORD[$L$key_rcon1]
3635	mov	r10d,7
3636	movdqu	XMMWORD[r8],xmm0
3637	movdqa	xmm1,xmm2
3638	movdqu	XMMWORD[16+r8],xmm2
3639	jmp	NEAR $L$oop_key256
3640
3641ALIGN	16
3642$L$oop_key256:
3643DB	102,15,56,0,213
3644DB	102,15,56,221,212
3645
3646	movdqa	xmm3,xmm0
3647	pslldq	xmm0,4
3648	pxor	xmm3,xmm0
3649	pslldq	xmm0,4
3650	pxor	xmm3,xmm0
3651	pslldq	xmm0,4
3652	pxor	xmm0,xmm3
3653	pslld	xmm4,1
3654
3655	pxor	xmm0,xmm2
3656	movdqu	XMMWORD[rax],xmm0
3657
3658	dec	r10d
3659	jz	NEAR $L$done_key256
3660
3661	pshufd	xmm2,xmm0,0xff
3662	pxor	xmm3,xmm3
3663DB	102,15,56,221,211
3664
3665	movdqa	xmm3,xmm1
3666	pslldq	xmm1,4
3667	pxor	xmm3,xmm1
3668	pslldq	xmm1,4
3669	pxor	xmm3,xmm1
3670	pslldq	xmm1,4
3671	pxor	xmm1,xmm3
3672
3673	pxor	xmm2,xmm1
3674	movdqu	XMMWORD[16+rax],xmm2
3675	lea	rax,[32+rax]
3676	movdqa	xmm1,xmm2
3677
3678	jmp	NEAR $L$oop_key256
3679
3680$L$done_key256:
3681	mov	DWORD[16+rax],edx
3682	xor	eax,eax
3683	jmp	NEAR $L$enc_key_ret
3684
3685ALIGN	16
3686$L$bad_keybits:
3687	mov	rax,-2
3688$L$enc_key_ret:
3689	pxor	xmm0,xmm0
3690	pxor	xmm1,xmm1
3691	pxor	xmm2,xmm2
3692	pxor	xmm3,xmm3
3693	pxor	xmm4,xmm4
3694	pxor	xmm5,xmm5
3695	add	rsp,8
3696	DB	0F3h,0C3h		;repret
3697$L$SEH_end_set_encrypt_key:
3698
3699ALIGN	16
3700$L$key_expansion_128:
3701	movups	XMMWORD[rax],xmm0
3702	lea	rax,[16+rax]
3703$L$key_expansion_128_cold:
3704	shufps	xmm4,xmm0,16
3705	xorps	xmm0,xmm4
3706	shufps	xmm4,xmm0,140
3707	xorps	xmm0,xmm4
3708	shufps	xmm1,xmm1,255
3709	xorps	xmm0,xmm1
3710	DB	0F3h,0C3h		;repret
3711
3712ALIGN	16
3713$L$key_expansion_192a:
3714	movups	XMMWORD[rax],xmm0
3715	lea	rax,[16+rax]
3716$L$key_expansion_192a_cold:
3717	movaps	xmm5,xmm2
3718$L$key_expansion_192b_warm:
3719	shufps	xmm4,xmm0,16
3720	movdqa	xmm3,xmm2
3721	xorps	xmm0,xmm4
3722	shufps	xmm4,xmm0,140
3723	pslldq	xmm3,4
3724	xorps	xmm0,xmm4
3725	pshufd	xmm1,xmm1,85
3726	pxor	xmm2,xmm3
3727	pxor	xmm0,xmm1
3728	pshufd	xmm3,xmm0,255
3729	pxor	xmm2,xmm3
3730	DB	0F3h,0C3h		;repret
3731
3732ALIGN	16
3733$L$key_expansion_192b:
3734	movaps	xmm3,xmm0
3735	shufps	xmm5,xmm0,68
3736	movups	XMMWORD[rax],xmm5
3737	shufps	xmm3,xmm2,78
3738	movups	XMMWORD[16+rax],xmm3
3739	lea	rax,[32+rax]
3740	jmp	NEAR $L$key_expansion_192b_warm
3741
3742ALIGN	16
3743$L$key_expansion_256a:
3744	movups	XMMWORD[rax],xmm2
3745	lea	rax,[16+rax]
3746$L$key_expansion_256a_cold:
3747	shufps	xmm4,xmm0,16
3748	xorps	xmm0,xmm4
3749	shufps	xmm4,xmm0,140
3750	xorps	xmm0,xmm4
3751	shufps	xmm1,xmm1,255
3752	xorps	xmm0,xmm1
3753	DB	0F3h,0C3h		;repret
3754
3755ALIGN	16
3756$L$key_expansion_256b:
3757	movups	XMMWORD[rax],xmm0
3758	lea	rax,[16+rax]
3759
3760	shufps	xmm4,xmm2,16
3761	xorps	xmm2,xmm4
3762	shufps	xmm4,xmm2,140
3763	xorps	xmm2,xmm4
3764	shufps	xmm1,xmm1,170
3765	xorps	xmm2,xmm1
3766	DB	0F3h,0C3h		;repret
3767
3768
3769ALIGN	64
3770$L$bswap_mask:
3771DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
3772$L$increment32:
3773	DD	6,6,6,0
3774$L$increment64:
3775	DD	1,0,0,0
3776$L$xts_magic:
3777	DD	0x87,0,1,0
3778$L$increment1:
3779DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3780$L$key_rotate:
3781	DD	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
3782$L$key_rotate192:
3783	DD	0x04070605,0x04070605,0x04070605,0x04070605
3784$L$key_rcon1:
3785	DD	1,1,1,1
3786$L$key_rcon1b:
3787	DD	0x1b,0x1b,0x1b,0x1b
3788
3789DB	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
3790DB	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
3791DB	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
3792DB	115,108,46,111,114,103,62,0
3793ALIGN	64
3794EXTERN	__imp_RtlVirtualUnwind
3795
3796ALIGN	16
3797ecb_ccm64_se_handler:
3798	push	rsi
3799	push	rdi
3800	push	rbx
3801	push	rbp
3802	push	r12
3803	push	r13
3804	push	r14
3805	push	r15
3806	pushfq
3807	sub	rsp,64
3808
3809	mov	rax,QWORD[120+r8]
3810	mov	rbx,QWORD[248+r8]
3811
3812	mov	rsi,QWORD[8+r9]
3813	mov	r11,QWORD[56+r9]
3814
3815	mov	r10d,DWORD[r11]
3816	lea	r10,[r10*1+rsi]
3817	cmp	rbx,r10
3818	jb	NEAR $L$common_seh_tail
3819
3820	mov	rax,QWORD[152+r8]
3821
3822	mov	r10d,DWORD[4+r11]
3823	lea	r10,[r10*1+rsi]
3824	cmp	rbx,r10
3825	jae	NEAR $L$common_seh_tail
3826
3827	lea	rsi,[rax]
3828	lea	rdi,[512+r8]
3829	mov	ecx,8
3830	DD	0xa548f3fc
3831	lea	rax,[88+rax]
3832
3833	jmp	NEAR $L$common_seh_tail
3834
3835
3836
3837ALIGN	16
3838ctr_xts_se_handler:
3839	push	rsi
3840	push	rdi
3841	push	rbx
3842	push	rbp
3843	push	r12
3844	push	r13
3845	push	r14
3846	push	r15
3847	pushfq
3848	sub	rsp,64
3849
3850	mov	rax,QWORD[120+r8]
3851	mov	rbx,QWORD[248+r8]
3852
3853	mov	rsi,QWORD[8+r9]
3854	mov	r11,QWORD[56+r9]
3855
3856	mov	r10d,DWORD[r11]
3857	lea	r10,[r10*1+rsi]
3858	cmp	rbx,r10
3859	jb	NEAR $L$common_seh_tail
3860
3861	mov	rax,QWORD[152+r8]
3862
3863	mov	r10d,DWORD[4+r11]
3864	lea	r10,[r10*1+rsi]
3865	cmp	rbx,r10
3866	jae	NEAR $L$common_seh_tail
3867
3868	mov	rax,QWORD[160+r8]
3869	lea	rsi,[((-160))+rax]
3870	lea	rdi,[512+r8]
3871	mov	ecx,20
3872	DD	0xa548f3fc
3873
3874	jmp	NEAR $L$common_rbp_tail
3875
3876
3877ALIGN	16
3878cbc_se_handler:
3879	push	rsi
3880	push	rdi
3881	push	rbx
3882	push	rbp
3883	push	r12
3884	push	r13
3885	push	r14
3886	push	r15
3887	pushfq
3888	sub	rsp,64
3889
3890	mov	rax,QWORD[152+r8]
3891	mov	rbx,QWORD[248+r8]
3892
3893	lea	r10,[$L$cbc_decrypt_bulk]
3894	cmp	rbx,r10
3895	jb	NEAR $L$common_seh_tail
3896
3897	lea	r10,[$L$cbc_decrypt_body]
3898	cmp	rbx,r10
3899	jb	NEAR $L$restore_cbc_rax
3900
3901	lea	r10,[$L$cbc_ret]
3902	cmp	rbx,r10
3903	jae	NEAR $L$common_seh_tail
3904
3905	lea	rsi,[16+rax]
3906	lea	rdi,[512+r8]
3907	mov	ecx,20
3908	DD	0xa548f3fc
3909
3910$L$common_rbp_tail:
3911	mov	rax,QWORD[160+r8]
3912	mov	rbp,QWORD[rax]
3913	lea	rax,[8+rax]
3914	mov	QWORD[160+r8],rbp
3915	jmp	NEAR $L$common_seh_tail
3916
3917$L$restore_cbc_rax:
3918	mov	rax,QWORD[120+r8]
3919
3920$L$common_seh_tail:
3921	mov	rdi,QWORD[8+rax]
3922	mov	rsi,QWORD[16+rax]
3923	mov	QWORD[152+r8],rax
3924	mov	QWORD[168+r8],rsi
3925	mov	QWORD[176+r8],rdi
3926
3927	mov	rdi,QWORD[40+r9]
3928	mov	rsi,r8
3929	mov	ecx,154
3930	DD	0xa548f3fc
3931
3932	mov	rsi,r9
3933	xor	rcx,rcx
3934	mov	rdx,QWORD[8+rsi]
3935	mov	r8,QWORD[rsi]
3936	mov	r9,QWORD[16+rsi]
3937	mov	r10,QWORD[40+rsi]
3938	lea	r11,[56+rsi]
3939	lea	r12,[24+rsi]
3940	mov	QWORD[32+rsp],r10
3941	mov	QWORD[40+rsp],r11
3942	mov	QWORD[48+rsp],r12
3943	mov	QWORD[56+rsp],rcx
3944	call	QWORD[__imp_RtlVirtualUnwind]
3945
3946	mov	eax,1
3947	add	rsp,64
3948	popfq
3949	pop	r15
3950	pop	r14
3951	pop	r13
3952	pop	r12
3953	pop	rbp
3954	pop	rbx
3955	pop	rdi
3956	pop	rsi
3957	DB	0F3h,0C3h		;repret
3958
3959
3960section	.pdata rdata align=4
3961ALIGN	4
3962	DD	$L$SEH_begin_aesni_ecb_encrypt wrt ..imagebase
3963	DD	$L$SEH_end_aesni_ecb_encrypt wrt ..imagebase
3964	DD	$L$SEH_info_ecb wrt ..imagebase
3965
3966	DD	$L$SEH_begin_aesni_ccm64_encrypt_blocks wrt ..imagebase
3967	DD	$L$SEH_end_aesni_ccm64_encrypt_blocks wrt ..imagebase
3968	DD	$L$SEH_info_ccm64_enc wrt ..imagebase
3969
3970	DD	$L$SEH_begin_aesni_ccm64_decrypt_blocks wrt ..imagebase
3971	DD	$L$SEH_end_aesni_ccm64_decrypt_blocks wrt ..imagebase
3972	DD	$L$SEH_info_ccm64_dec wrt ..imagebase
3973
3974	DD	$L$SEH_begin_aesni_ctr32_encrypt_blocks wrt ..imagebase
3975	DD	$L$SEH_end_aesni_ctr32_encrypt_blocks wrt ..imagebase
3976	DD	$L$SEH_info_ctr32 wrt ..imagebase
3977
3978	DD	$L$SEH_begin_aesni_xts_encrypt wrt ..imagebase
3979	DD	$L$SEH_end_aesni_xts_encrypt wrt ..imagebase
3980	DD	$L$SEH_info_xts_enc wrt ..imagebase
3981
3982	DD	$L$SEH_begin_aesni_xts_decrypt wrt ..imagebase
3983	DD	$L$SEH_end_aesni_xts_decrypt wrt ..imagebase
3984	DD	$L$SEH_info_xts_dec wrt ..imagebase
3985	DD	$L$SEH_begin_aesni_cbc_encrypt wrt ..imagebase
3986	DD	$L$SEH_end_aesni_cbc_encrypt wrt ..imagebase
3987	DD	$L$SEH_info_cbc wrt ..imagebase
3988
3989	DD	aesni_set_decrypt_key wrt ..imagebase
3990	DD	$L$SEH_end_set_decrypt_key wrt ..imagebase
3991	DD	$L$SEH_info_key wrt ..imagebase
3992
3993	DD	aesni_set_encrypt_key wrt ..imagebase
3994	DD	$L$SEH_end_set_encrypt_key wrt ..imagebase
3995	DD	$L$SEH_info_key wrt ..imagebase
3996section	.xdata rdata align=8
3997ALIGN	8
3998$L$SEH_info_ecb:
3999DB	9,0,0,0
4000	DD	ecb_ccm64_se_handler wrt ..imagebase
4001	DD	$L$ecb_enc_body wrt ..imagebase,$L$ecb_enc_ret wrt ..imagebase
4002$L$SEH_info_ccm64_enc:
4003DB	9,0,0,0
4004	DD	ecb_ccm64_se_handler wrt ..imagebase
4005	DD	$L$ccm64_enc_body wrt ..imagebase,$L$ccm64_enc_ret wrt ..imagebase
4006$L$SEH_info_ccm64_dec:
4007DB	9,0,0,0
4008	DD	ecb_ccm64_se_handler wrt ..imagebase
4009	DD	$L$ccm64_dec_body wrt ..imagebase,$L$ccm64_dec_ret wrt ..imagebase
4010$L$SEH_info_ctr32:
4011DB	9,0,0,0
4012	DD	ctr_xts_se_handler wrt ..imagebase
4013	DD	$L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase
4014$L$SEH_info_xts_enc:
4015DB	9,0,0,0
4016	DD	ctr_xts_se_handler wrt ..imagebase
4017	DD	$L$xts_enc_body wrt ..imagebase,$L$xts_enc_epilogue wrt ..imagebase
4018$L$SEH_info_xts_dec:
4019DB	9,0,0,0
4020	DD	ctr_xts_se_handler wrt ..imagebase
4021	DD	$L$xts_dec_body wrt ..imagebase,$L$xts_dec_epilogue wrt ..imagebase
4022$L$SEH_info_cbc:
4023DB	9,0,0,0
4024	DD	cbc_se_handler wrt ..imagebase
4025$L$SEH_info_key:
4026DB	0x01,0x04,0x01,0x00
4027DB	0x04,0x02,0x00,0x00
4028