1default	rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section	.text code align=64
6
7
8EXTERN	asm_AES_encrypt
9EXTERN	asm_AES_decrypt
10
11
12ALIGN	64
13_bsaes_encrypt8:
14	lea	r11,[$L$BS0]
15
16	movdqa	xmm8,XMMWORD[rax]
17	lea	rax,[16+rax]
18	movdqa	xmm7,XMMWORD[80+r11]
19	pxor	xmm15,xmm8
20	pxor	xmm0,xmm8
21	pxor	xmm1,xmm8
22	pxor	xmm2,xmm8
23DB	102,68,15,56,0,255
24DB	102,15,56,0,199
25	pxor	xmm3,xmm8
26	pxor	xmm4,xmm8
27DB	102,15,56,0,207
28DB	102,15,56,0,215
29	pxor	xmm5,xmm8
30	pxor	xmm6,xmm8
31DB	102,15,56,0,223
32DB	102,15,56,0,231
33DB	102,15,56,0,239
34DB	102,15,56,0,247
35_bsaes_encrypt8_bitslice:
36	movdqa	xmm7,XMMWORD[r11]
37	movdqa	xmm8,XMMWORD[16+r11]
38	movdqa	xmm9,xmm5
39	psrlq	xmm5,1
40	movdqa	xmm10,xmm3
41	psrlq	xmm3,1
42	pxor	xmm5,xmm6
43	pxor	xmm3,xmm4
44	pand	xmm5,xmm7
45	pand	xmm3,xmm7
46	pxor	xmm6,xmm5
47	psllq	xmm5,1
48	pxor	xmm4,xmm3
49	psllq	xmm3,1
50	pxor	xmm5,xmm9
51	pxor	xmm3,xmm10
52	movdqa	xmm9,xmm1
53	psrlq	xmm1,1
54	movdqa	xmm10,xmm15
55	psrlq	xmm15,1
56	pxor	xmm1,xmm2
57	pxor	xmm15,xmm0
58	pand	xmm1,xmm7
59	pand	xmm15,xmm7
60	pxor	xmm2,xmm1
61	psllq	xmm1,1
62	pxor	xmm0,xmm15
63	psllq	xmm15,1
64	pxor	xmm1,xmm9
65	pxor	xmm15,xmm10
66	movdqa	xmm7,XMMWORD[32+r11]
67	movdqa	xmm9,xmm4
68	psrlq	xmm4,2
69	movdqa	xmm10,xmm3
70	psrlq	xmm3,2
71	pxor	xmm4,xmm6
72	pxor	xmm3,xmm5
73	pand	xmm4,xmm8
74	pand	xmm3,xmm8
75	pxor	xmm6,xmm4
76	psllq	xmm4,2
77	pxor	xmm5,xmm3
78	psllq	xmm3,2
79	pxor	xmm4,xmm9
80	pxor	xmm3,xmm10
81	movdqa	xmm9,xmm0
82	psrlq	xmm0,2
83	movdqa	xmm10,xmm15
84	psrlq	xmm15,2
85	pxor	xmm0,xmm2
86	pxor	xmm15,xmm1
87	pand	xmm0,xmm8
88	pand	xmm15,xmm8
89	pxor	xmm2,xmm0
90	psllq	xmm0,2
91	pxor	xmm1,xmm15
92	psllq	xmm15,2
93	pxor	xmm0,xmm9
94	pxor	xmm15,xmm10
95	movdqa	xmm9,xmm2
96	psrlq	xmm2,4
97	movdqa	xmm10,xmm1
98	psrlq	xmm1,4
99	pxor	xmm2,xmm6
100	pxor	xmm1,xmm5
101	pand	xmm2,xmm7
102	pand	xmm1,xmm7
103	pxor	xmm6,xmm2
104	psllq	xmm2,4
105	pxor	xmm5,xmm1
106	psllq	xmm1,4
107	pxor	xmm2,xmm9
108	pxor	xmm1,xmm10
109	movdqa	xmm9,xmm0
110	psrlq	xmm0,4
111	movdqa	xmm10,xmm15
112	psrlq	xmm15,4
113	pxor	xmm0,xmm4
114	pxor	xmm15,xmm3
115	pand	xmm0,xmm7
116	pand	xmm15,xmm7
117	pxor	xmm4,xmm0
118	psllq	xmm0,4
119	pxor	xmm3,xmm15
120	psllq	xmm15,4
121	pxor	xmm0,xmm9
122	pxor	xmm15,xmm10
123	dec	r10d
124	jmp	NEAR $L$enc_sbox
125ALIGN	16
126$L$enc_loop:
127	pxor	xmm15,XMMWORD[rax]
128	pxor	xmm0,XMMWORD[16+rax]
129	pxor	xmm1,XMMWORD[32+rax]
130	pxor	xmm2,XMMWORD[48+rax]
131DB	102,68,15,56,0,255
132DB	102,15,56,0,199
133	pxor	xmm3,XMMWORD[64+rax]
134	pxor	xmm4,XMMWORD[80+rax]
135DB	102,15,56,0,207
136DB	102,15,56,0,215
137	pxor	xmm5,XMMWORD[96+rax]
138	pxor	xmm6,XMMWORD[112+rax]
139DB	102,15,56,0,223
140DB	102,15,56,0,231
141DB	102,15,56,0,239
142DB	102,15,56,0,247
143	lea	rax,[128+rax]
144$L$enc_sbox:
145	pxor	xmm4,xmm5
146	pxor	xmm1,xmm0
147	pxor	xmm2,xmm15
148	pxor	xmm5,xmm1
149	pxor	xmm4,xmm15
150
151	pxor	xmm5,xmm2
152	pxor	xmm2,xmm6
153	pxor	xmm6,xmm4
154	pxor	xmm2,xmm3
155	pxor	xmm3,xmm4
156	pxor	xmm2,xmm0
157
158	pxor	xmm1,xmm6
159	pxor	xmm0,xmm4
160	movdqa	xmm10,xmm6
161	movdqa	xmm9,xmm0
162	movdqa	xmm8,xmm4
163	movdqa	xmm12,xmm1
164	movdqa	xmm11,xmm5
165
166	pxor	xmm10,xmm3
167	pxor	xmm9,xmm1
168	pxor	xmm8,xmm2
169	movdqa	xmm13,xmm10
170	pxor	xmm12,xmm3
171	movdqa	xmm7,xmm9
172	pxor	xmm11,xmm15
173	movdqa	xmm14,xmm10
174
175	por	xmm9,xmm8
176	por	xmm10,xmm11
177	pxor	xmm14,xmm7
178	pand	xmm13,xmm11
179	pxor	xmm11,xmm8
180	pand	xmm7,xmm8
181	pand	xmm14,xmm11
182	movdqa	xmm11,xmm2
183	pxor	xmm11,xmm15
184	pand	xmm12,xmm11
185	pxor	xmm10,xmm12
186	pxor	xmm9,xmm12
187	movdqa	xmm12,xmm6
188	movdqa	xmm11,xmm4
189	pxor	xmm12,xmm0
190	pxor	xmm11,xmm5
191	movdqa	xmm8,xmm12
192	pand	xmm12,xmm11
193	por	xmm8,xmm11
194	pxor	xmm7,xmm12
195	pxor	xmm10,xmm14
196	pxor	xmm9,xmm13
197	pxor	xmm8,xmm14
198	movdqa	xmm11,xmm1
199	pxor	xmm7,xmm13
200	movdqa	xmm12,xmm3
201	pxor	xmm8,xmm13
202	movdqa	xmm13,xmm0
203	pand	xmm11,xmm2
204	movdqa	xmm14,xmm6
205	pand	xmm12,xmm15
206	pand	xmm13,xmm4
207	por	xmm14,xmm5
208	pxor	xmm10,xmm11
209	pxor	xmm9,xmm12
210	pxor	xmm8,xmm13
211	pxor	xmm7,xmm14
212
213
214
215
216
217	movdqa	xmm11,xmm10
218	pand	xmm10,xmm8
219	pxor	xmm11,xmm9
220
221	movdqa	xmm13,xmm7
222	movdqa	xmm14,xmm11
223	pxor	xmm13,xmm10
224	pand	xmm14,xmm13
225
226	movdqa	xmm12,xmm8
227	pxor	xmm14,xmm9
228	pxor	xmm12,xmm7
229
230	pxor	xmm10,xmm9
231
232	pand	xmm12,xmm10
233
234	movdqa	xmm9,xmm13
235	pxor	xmm12,xmm7
236
237	pxor	xmm9,xmm12
238	pxor	xmm8,xmm12
239
240	pand	xmm9,xmm7
241
242	pxor	xmm13,xmm9
243	pxor	xmm8,xmm9
244
245	pand	xmm13,xmm14
246
247	pxor	xmm13,xmm11
248	movdqa	xmm11,xmm5
249	movdqa	xmm7,xmm4
250	movdqa	xmm9,xmm14
251	pxor	xmm9,xmm13
252	pand	xmm9,xmm5
253	pxor	xmm5,xmm4
254	pand	xmm4,xmm14
255	pand	xmm5,xmm13
256	pxor	xmm5,xmm4
257	pxor	xmm4,xmm9
258	pxor	xmm11,xmm15
259	pxor	xmm7,xmm2
260	pxor	xmm14,xmm12
261	pxor	xmm13,xmm8
262	movdqa	xmm10,xmm14
263	movdqa	xmm9,xmm12
264	pxor	xmm10,xmm13
265	pxor	xmm9,xmm8
266	pand	xmm10,xmm11
267	pand	xmm9,xmm15
268	pxor	xmm11,xmm7
269	pxor	xmm15,xmm2
270	pand	xmm7,xmm14
271	pand	xmm2,xmm12
272	pand	xmm11,xmm13
273	pand	xmm15,xmm8
274	pxor	xmm7,xmm11
275	pxor	xmm15,xmm2
276	pxor	xmm11,xmm10
277	pxor	xmm2,xmm9
278	pxor	xmm5,xmm11
279	pxor	xmm15,xmm11
280	pxor	xmm4,xmm7
281	pxor	xmm2,xmm7
282
283	movdqa	xmm11,xmm6
284	movdqa	xmm7,xmm0
285	pxor	xmm11,xmm3
286	pxor	xmm7,xmm1
287	movdqa	xmm10,xmm14
288	movdqa	xmm9,xmm12
289	pxor	xmm10,xmm13
290	pxor	xmm9,xmm8
291	pand	xmm10,xmm11
292	pand	xmm9,xmm3
293	pxor	xmm11,xmm7
294	pxor	xmm3,xmm1
295	pand	xmm7,xmm14
296	pand	xmm1,xmm12
297	pand	xmm11,xmm13
298	pand	xmm3,xmm8
299	pxor	xmm7,xmm11
300	pxor	xmm3,xmm1
301	pxor	xmm11,xmm10
302	pxor	xmm1,xmm9
303	pxor	xmm14,xmm12
304	pxor	xmm13,xmm8
305	movdqa	xmm10,xmm14
306	pxor	xmm10,xmm13
307	pand	xmm10,xmm6
308	pxor	xmm6,xmm0
309	pand	xmm0,xmm14
310	pand	xmm6,xmm13
311	pxor	xmm6,xmm0
312	pxor	xmm0,xmm10
313	pxor	xmm6,xmm11
314	pxor	xmm3,xmm11
315	pxor	xmm0,xmm7
316	pxor	xmm1,xmm7
317	pxor	xmm6,xmm15
318	pxor	xmm0,xmm5
319	pxor	xmm3,xmm6
320	pxor	xmm5,xmm15
321	pxor	xmm15,xmm0
322
323	pxor	xmm0,xmm4
324	pxor	xmm4,xmm1
325	pxor	xmm1,xmm2
326	pxor	xmm2,xmm4
327	pxor	xmm3,xmm4
328
329	pxor	xmm5,xmm2
330	dec	r10d
331	jl	NEAR $L$enc_done
332	pshufd	xmm7,xmm15,0x93
333	pshufd	xmm8,xmm0,0x93
334	pxor	xmm15,xmm7
335	pshufd	xmm9,xmm3,0x93
336	pxor	xmm0,xmm8
337	pshufd	xmm10,xmm5,0x93
338	pxor	xmm3,xmm9
339	pshufd	xmm11,xmm2,0x93
340	pxor	xmm5,xmm10
341	pshufd	xmm12,xmm6,0x93
342	pxor	xmm2,xmm11
343	pshufd	xmm13,xmm1,0x93
344	pxor	xmm6,xmm12
345	pshufd	xmm14,xmm4,0x93
346	pxor	xmm1,xmm13
347	pxor	xmm4,xmm14
348
349	pxor	xmm8,xmm15
350	pxor	xmm7,xmm4
351	pxor	xmm8,xmm4
352	pshufd	xmm15,xmm15,0x4E
353	pxor	xmm9,xmm0
354	pshufd	xmm0,xmm0,0x4E
355	pxor	xmm12,xmm2
356	pxor	xmm15,xmm7
357	pxor	xmm13,xmm6
358	pxor	xmm0,xmm8
359	pxor	xmm11,xmm5
360	pshufd	xmm7,xmm2,0x4E
361	pxor	xmm14,xmm1
362	pshufd	xmm8,xmm6,0x4E
363	pxor	xmm10,xmm3
364	pshufd	xmm2,xmm5,0x4E
365	pxor	xmm10,xmm4
366	pshufd	xmm6,xmm4,0x4E
367	pxor	xmm11,xmm4
368	pshufd	xmm5,xmm1,0x4E
369	pxor	xmm7,xmm11
370	pshufd	xmm1,xmm3,0x4E
371	pxor	xmm8,xmm12
372	pxor	xmm2,xmm10
373	pxor	xmm6,xmm14
374	pxor	xmm5,xmm13
375	movdqa	xmm3,xmm7
376	pxor	xmm1,xmm9
377	movdqa	xmm4,xmm8
378	movdqa	xmm7,XMMWORD[48+r11]
379	jnz	NEAR $L$enc_loop
380	movdqa	xmm7,XMMWORD[64+r11]
381	jmp	NEAR $L$enc_loop
382ALIGN	16
383$L$enc_done:
384	movdqa	xmm7,XMMWORD[r11]
385	movdqa	xmm8,XMMWORD[16+r11]
386	movdqa	xmm9,xmm1
387	psrlq	xmm1,1
388	movdqa	xmm10,xmm2
389	psrlq	xmm2,1
390	pxor	xmm1,xmm4
391	pxor	xmm2,xmm6
392	pand	xmm1,xmm7
393	pand	xmm2,xmm7
394	pxor	xmm4,xmm1
395	psllq	xmm1,1
396	pxor	xmm6,xmm2
397	psllq	xmm2,1
398	pxor	xmm1,xmm9
399	pxor	xmm2,xmm10
400	movdqa	xmm9,xmm3
401	psrlq	xmm3,1
402	movdqa	xmm10,xmm15
403	psrlq	xmm15,1
404	pxor	xmm3,xmm5
405	pxor	xmm15,xmm0
406	pand	xmm3,xmm7
407	pand	xmm15,xmm7
408	pxor	xmm5,xmm3
409	psllq	xmm3,1
410	pxor	xmm0,xmm15
411	psllq	xmm15,1
412	pxor	xmm3,xmm9
413	pxor	xmm15,xmm10
414	movdqa	xmm7,XMMWORD[32+r11]
415	movdqa	xmm9,xmm6
416	psrlq	xmm6,2
417	movdqa	xmm10,xmm2
418	psrlq	xmm2,2
419	pxor	xmm6,xmm4
420	pxor	xmm2,xmm1
421	pand	xmm6,xmm8
422	pand	xmm2,xmm8
423	pxor	xmm4,xmm6
424	psllq	xmm6,2
425	pxor	xmm1,xmm2
426	psllq	xmm2,2
427	pxor	xmm6,xmm9
428	pxor	xmm2,xmm10
429	movdqa	xmm9,xmm0
430	psrlq	xmm0,2
431	movdqa	xmm10,xmm15
432	psrlq	xmm15,2
433	pxor	xmm0,xmm5
434	pxor	xmm15,xmm3
435	pand	xmm0,xmm8
436	pand	xmm15,xmm8
437	pxor	xmm5,xmm0
438	psllq	xmm0,2
439	pxor	xmm3,xmm15
440	psllq	xmm15,2
441	pxor	xmm0,xmm9
442	pxor	xmm15,xmm10
443	movdqa	xmm9,xmm5
444	psrlq	xmm5,4
445	movdqa	xmm10,xmm3
446	psrlq	xmm3,4
447	pxor	xmm5,xmm4
448	pxor	xmm3,xmm1
449	pand	xmm5,xmm7
450	pand	xmm3,xmm7
451	pxor	xmm4,xmm5
452	psllq	xmm5,4
453	pxor	xmm1,xmm3
454	psllq	xmm3,4
455	pxor	xmm5,xmm9
456	pxor	xmm3,xmm10
457	movdqa	xmm9,xmm0
458	psrlq	xmm0,4
459	movdqa	xmm10,xmm15
460	psrlq	xmm15,4
461	pxor	xmm0,xmm6
462	pxor	xmm15,xmm2
463	pand	xmm0,xmm7
464	pand	xmm15,xmm7
465	pxor	xmm6,xmm0
466	psllq	xmm0,4
467	pxor	xmm2,xmm15
468	psllq	xmm15,4
469	pxor	xmm0,xmm9
470	pxor	xmm15,xmm10
471	movdqa	xmm7,XMMWORD[rax]
472	pxor	xmm3,xmm7
473	pxor	xmm5,xmm7
474	pxor	xmm2,xmm7
475	pxor	xmm6,xmm7
476	pxor	xmm1,xmm7
477	pxor	xmm4,xmm7
478	pxor	xmm15,xmm7
479	pxor	xmm0,xmm7
480	DB	0F3h,0C3h		;repret
481
482
483
484ALIGN	64
485_bsaes_decrypt8:
486	lea	r11,[$L$BS0]
487
488	movdqa	xmm8,XMMWORD[rax]
489	lea	rax,[16+rax]
490	movdqa	xmm7,XMMWORD[((-48))+r11]
491	pxor	xmm15,xmm8
492	pxor	xmm0,xmm8
493	pxor	xmm1,xmm8
494	pxor	xmm2,xmm8
495DB	102,68,15,56,0,255
496DB	102,15,56,0,199
497	pxor	xmm3,xmm8
498	pxor	xmm4,xmm8
499DB	102,15,56,0,207
500DB	102,15,56,0,215
501	pxor	xmm5,xmm8
502	pxor	xmm6,xmm8
503DB	102,15,56,0,223
504DB	102,15,56,0,231
505DB	102,15,56,0,239
506DB	102,15,56,0,247
507	movdqa	xmm7,XMMWORD[r11]
508	movdqa	xmm8,XMMWORD[16+r11]
509	movdqa	xmm9,xmm5
510	psrlq	xmm5,1
511	movdqa	xmm10,xmm3
512	psrlq	xmm3,1
513	pxor	xmm5,xmm6
514	pxor	xmm3,xmm4
515	pand	xmm5,xmm7
516	pand	xmm3,xmm7
517	pxor	xmm6,xmm5
518	psllq	xmm5,1
519	pxor	xmm4,xmm3
520	psllq	xmm3,1
521	pxor	xmm5,xmm9
522	pxor	xmm3,xmm10
523	movdqa	xmm9,xmm1
524	psrlq	xmm1,1
525	movdqa	xmm10,xmm15
526	psrlq	xmm15,1
527	pxor	xmm1,xmm2
528	pxor	xmm15,xmm0
529	pand	xmm1,xmm7
530	pand	xmm15,xmm7
531	pxor	xmm2,xmm1
532	psllq	xmm1,1
533	pxor	xmm0,xmm15
534	psllq	xmm15,1
535	pxor	xmm1,xmm9
536	pxor	xmm15,xmm10
537	movdqa	xmm7,XMMWORD[32+r11]
538	movdqa	xmm9,xmm4
539	psrlq	xmm4,2
540	movdqa	xmm10,xmm3
541	psrlq	xmm3,2
542	pxor	xmm4,xmm6
543	pxor	xmm3,xmm5
544	pand	xmm4,xmm8
545	pand	xmm3,xmm8
546	pxor	xmm6,xmm4
547	psllq	xmm4,2
548	pxor	xmm5,xmm3
549	psllq	xmm3,2
550	pxor	xmm4,xmm9
551	pxor	xmm3,xmm10
552	movdqa	xmm9,xmm0
553	psrlq	xmm0,2
554	movdqa	xmm10,xmm15
555	psrlq	xmm15,2
556	pxor	xmm0,xmm2
557	pxor	xmm15,xmm1
558	pand	xmm0,xmm8
559	pand	xmm15,xmm8
560	pxor	xmm2,xmm0
561	psllq	xmm0,2
562	pxor	xmm1,xmm15
563	psllq	xmm15,2
564	pxor	xmm0,xmm9
565	pxor	xmm15,xmm10
566	movdqa	xmm9,xmm2
567	psrlq	xmm2,4
568	movdqa	xmm10,xmm1
569	psrlq	xmm1,4
570	pxor	xmm2,xmm6
571	pxor	xmm1,xmm5
572	pand	xmm2,xmm7
573	pand	xmm1,xmm7
574	pxor	xmm6,xmm2
575	psllq	xmm2,4
576	pxor	xmm5,xmm1
577	psllq	xmm1,4
578	pxor	xmm2,xmm9
579	pxor	xmm1,xmm10
580	movdqa	xmm9,xmm0
581	psrlq	xmm0,4
582	movdqa	xmm10,xmm15
583	psrlq	xmm15,4
584	pxor	xmm0,xmm4
585	pxor	xmm15,xmm3
586	pand	xmm0,xmm7
587	pand	xmm15,xmm7
588	pxor	xmm4,xmm0
589	psllq	xmm0,4
590	pxor	xmm3,xmm15
591	psllq	xmm15,4
592	pxor	xmm0,xmm9
593	pxor	xmm15,xmm10
594	dec	r10d
595	jmp	NEAR $L$dec_sbox
596ALIGN	16
597$L$dec_loop:
598	pxor	xmm15,XMMWORD[rax]
599	pxor	xmm0,XMMWORD[16+rax]
600	pxor	xmm1,XMMWORD[32+rax]
601	pxor	xmm2,XMMWORD[48+rax]
602DB	102,68,15,56,0,255
603DB	102,15,56,0,199
604	pxor	xmm3,XMMWORD[64+rax]
605	pxor	xmm4,XMMWORD[80+rax]
606DB	102,15,56,0,207
607DB	102,15,56,0,215
608	pxor	xmm5,XMMWORD[96+rax]
609	pxor	xmm6,XMMWORD[112+rax]
610DB	102,15,56,0,223
611DB	102,15,56,0,231
612DB	102,15,56,0,239
613DB	102,15,56,0,247
614	lea	rax,[128+rax]
615$L$dec_sbox:
616	pxor	xmm2,xmm3
617
618	pxor	xmm3,xmm6
619	pxor	xmm1,xmm6
620	pxor	xmm5,xmm3
621	pxor	xmm6,xmm5
622	pxor	xmm0,xmm6
623
624	pxor	xmm15,xmm0
625	pxor	xmm1,xmm4
626	pxor	xmm2,xmm15
627	pxor	xmm4,xmm15
628	pxor	xmm0,xmm2
629	movdqa	xmm10,xmm2
630	movdqa	xmm9,xmm6
631	movdqa	xmm8,xmm0
632	movdqa	xmm12,xmm3
633	movdqa	xmm11,xmm4
634
635	pxor	xmm10,xmm15
636	pxor	xmm9,xmm3
637	pxor	xmm8,xmm5
638	movdqa	xmm13,xmm10
639	pxor	xmm12,xmm15
640	movdqa	xmm7,xmm9
641	pxor	xmm11,xmm1
642	movdqa	xmm14,xmm10
643
644	por	xmm9,xmm8
645	por	xmm10,xmm11
646	pxor	xmm14,xmm7
647	pand	xmm13,xmm11
648	pxor	xmm11,xmm8
649	pand	xmm7,xmm8
650	pand	xmm14,xmm11
651	movdqa	xmm11,xmm5
652	pxor	xmm11,xmm1
653	pand	xmm12,xmm11
654	pxor	xmm10,xmm12
655	pxor	xmm9,xmm12
656	movdqa	xmm12,xmm2
657	movdqa	xmm11,xmm0
658	pxor	xmm12,xmm6
659	pxor	xmm11,xmm4
660	movdqa	xmm8,xmm12
661	pand	xmm12,xmm11
662	por	xmm8,xmm11
663	pxor	xmm7,xmm12
664	pxor	xmm10,xmm14
665	pxor	xmm9,xmm13
666	pxor	xmm8,xmm14
667	movdqa	xmm11,xmm3
668	pxor	xmm7,xmm13
669	movdqa	xmm12,xmm15
670	pxor	xmm8,xmm13
671	movdqa	xmm13,xmm6
672	pand	xmm11,xmm5
673	movdqa	xmm14,xmm2
674	pand	xmm12,xmm1
675	pand	xmm13,xmm0
676	por	xmm14,xmm4
677	pxor	xmm10,xmm11
678	pxor	xmm9,xmm12
679	pxor	xmm8,xmm13
680	pxor	xmm7,xmm14
681
682
683
684
685
686	movdqa	xmm11,xmm10
687	pand	xmm10,xmm8
688	pxor	xmm11,xmm9
689
690	movdqa	xmm13,xmm7
691	movdqa	xmm14,xmm11
692	pxor	xmm13,xmm10
693	pand	xmm14,xmm13
694
695	movdqa	xmm12,xmm8
696	pxor	xmm14,xmm9
697	pxor	xmm12,xmm7
698
699	pxor	xmm10,xmm9
700
701	pand	xmm12,xmm10
702
703	movdqa	xmm9,xmm13
704	pxor	xmm12,xmm7
705
706	pxor	xmm9,xmm12
707	pxor	xmm8,xmm12
708
709	pand	xmm9,xmm7
710
711	pxor	xmm13,xmm9
712	pxor	xmm8,xmm9
713
714	pand	xmm13,xmm14
715
716	pxor	xmm13,xmm11
717	movdqa	xmm11,xmm4
718	movdqa	xmm7,xmm0
719	movdqa	xmm9,xmm14
720	pxor	xmm9,xmm13
721	pand	xmm9,xmm4
722	pxor	xmm4,xmm0
723	pand	xmm0,xmm14
724	pand	xmm4,xmm13
725	pxor	xmm4,xmm0
726	pxor	xmm0,xmm9
727	pxor	xmm11,xmm1
728	pxor	xmm7,xmm5
729	pxor	xmm14,xmm12
730	pxor	xmm13,xmm8
731	movdqa	xmm10,xmm14
732	movdqa	xmm9,xmm12
733	pxor	xmm10,xmm13
734	pxor	xmm9,xmm8
735	pand	xmm10,xmm11
736	pand	xmm9,xmm1
737	pxor	xmm11,xmm7
738	pxor	xmm1,xmm5
739	pand	xmm7,xmm14
740	pand	xmm5,xmm12
741	pand	xmm11,xmm13
742	pand	xmm1,xmm8
743	pxor	xmm7,xmm11
744	pxor	xmm1,xmm5
745	pxor	xmm11,xmm10
746	pxor	xmm5,xmm9
747	pxor	xmm4,xmm11
748	pxor	xmm1,xmm11
749	pxor	xmm0,xmm7
750	pxor	xmm5,xmm7
751
752	movdqa	xmm11,xmm2
753	movdqa	xmm7,xmm6
754	pxor	xmm11,xmm15
755	pxor	xmm7,xmm3
756	movdqa	xmm10,xmm14
757	movdqa	xmm9,xmm12
758	pxor	xmm10,xmm13
759	pxor	xmm9,xmm8
760	pand	xmm10,xmm11
761	pand	xmm9,xmm15
762	pxor	xmm11,xmm7
763	pxor	xmm15,xmm3
764	pand	xmm7,xmm14
765	pand	xmm3,xmm12
766	pand	xmm11,xmm13
767	pand	xmm15,xmm8
768	pxor	xmm7,xmm11
769	pxor	xmm15,xmm3
770	pxor	xmm11,xmm10
771	pxor	xmm3,xmm9
772	pxor	xmm14,xmm12
773	pxor	xmm13,xmm8
774	movdqa	xmm10,xmm14
775	pxor	xmm10,xmm13
776	pand	xmm10,xmm2
777	pxor	xmm2,xmm6
778	pand	xmm6,xmm14
779	pand	xmm2,xmm13
780	pxor	xmm2,xmm6
781	pxor	xmm6,xmm10
782	pxor	xmm2,xmm11
783	pxor	xmm15,xmm11
784	pxor	xmm6,xmm7
785	pxor	xmm3,xmm7
786	pxor	xmm0,xmm6
787	pxor	xmm5,xmm4
788
789	pxor	xmm3,xmm0
790	pxor	xmm1,xmm6
791	pxor	xmm4,xmm6
792	pxor	xmm3,xmm1
793	pxor	xmm6,xmm15
794	pxor	xmm3,xmm4
795	pxor	xmm2,xmm5
796	pxor	xmm5,xmm0
797	pxor	xmm2,xmm3
798
799	pxor	xmm3,xmm15
800	pxor	xmm6,xmm2
801	dec	r10d
802	jl	NEAR $L$dec_done
803
804	pshufd	xmm7,xmm15,0x4E
805	pshufd	xmm13,xmm2,0x4E
806	pxor	xmm7,xmm15
807	pshufd	xmm14,xmm4,0x4E
808	pxor	xmm13,xmm2
809	pshufd	xmm8,xmm0,0x4E
810	pxor	xmm14,xmm4
811	pshufd	xmm9,xmm5,0x4E
812	pxor	xmm8,xmm0
813	pshufd	xmm10,xmm3,0x4E
814	pxor	xmm9,xmm5
815	pxor	xmm15,xmm13
816	pxor	xmm0,xmm13
817	pshufd	xmm11,xmm1,0x4E
818	pxor	xmm10,xmm3
819	pxor	xmm5,xmm7
820	pxor	xmm3,xmm8
821	pshufd	xmm12,xmm6,0x4E
822	pxor	xmm11,xmm1
823	pxor	xmm0,xmm14
824	pxor	xmm1,xmm9
825	pxor	xmm12,xmm6
826
827	pxor	xmm5,xmm14
828	pxor	xmm3,xmm13
829	pxor	xmm1,xmm13
830	pxor	xmm6,xmm10
831	pxor	xmm2,xmm11
832	pxor	xmm1,xmm14
833	pxor	xmm6,xmm14
834	pxor	xmm4,xmm12
835	pshufd	xmm7,xmm15,0x93
836	pshufd	xmm8,xmm0,0x93
837	pxor	xmm15,xmm7
838	pshufd	xmm9,xmm5,0x93
839	pxor	xmm0,xmm8
840	pshufd	xmm10,xmm3,0x93
841	pxor	xmm5,xmm9
842	pshufd	xmm11,xmm1,0x93
843	pxor	xmm3,xmm10
844	pshufd	xmm12,xmm6,0x93
845	pxor	xmm1,xmm11
846	pshufd	xmm13,xmm2,0x93
847	pxor	xmm6,xmm12
848	pshufd	xmm14,xmm4,0x93
849	pxor	xmm2,xmm13
850	pxor	xmm4,xmm14
851
852	pxor	xmm8,xmm15
853	pxor	xmm7,xmm4
854	pxor	xmm8,xmm4
855	pshufd	xmm15,xmm15,0x4E
856	pxor	xmm9,xmm0
857	pshufd	xmm0,xmm0,0x4E
858	pxor	xmm12,xmm1
859	pxor	xmm15,xmm7
860	pxor	xmm13,xmm6
861	pxor	xmm0,xmm8
862	pxor	xmm11,xmm3
863	pshufd	xmm7,xmm1,0x4E
864	pxor	xmm14,xmm2
865	pshufd	xmm8,xmm6,0x4E
866	pxor	xmm10,xmm5
867	pshufd	xmm1,xmm3,0x4E
868	pxor	xmm10,xmm4
869	pshufd	xmm6,xmm4,0x4E
870	pxor	xmm11,xmm4
871	pshufd	xmm3,xmm2,0x4E
872	pxor	xmm7,xmm11
873	pshufd	xmm2,xmm5,0x4E
874	pxor	xmm8,xmm12
875	pxor	xmm10,xmm1
876	pxor	xmm6,xmm14
877	pxor	xmm13,xmm3
878	movdqa	xmm3,xmm7
879	pxor	xmm2,xmm9
880	movdqa	xmm5,xmm13
881	movdqa	xmm4,xmm8
882	movdqa	xmm1,xmm2
883	movdqa	xmm2,xmm10
884	movdqa	xmm7,XMMWORD[((-16))+r11]
885	jnz	NEAR $L$dec_loop
886	movdqa	xmm7,XMMWORD[((-32))+r11]
887	jmp	NEAR $L$dec_loop
888ALIGN	16
889$L$dec_done:
890	movdqa	xmm7,XMMWORD[r11]
891	movdqa	xmm8,XMMWORD[16+r11]
892	movdqa	xmm9,xmm2
893	psrlq	xmm2,1
894	movdqa	xmm10,xmm1
895	psrlq	xmm1,1
896	pxor	xmm2,xmm4
897	pxor	xmm1,xmm6
898	pand	xmm2,xmm7
899	pand	xmm1,xmm7
900	pxor	xmm4,xmm2
901	psllq	xmm2,1
902	pxor	xmm6,xmm1
903	psllq	xmm1,1
904	pxor	xmm2,xmm9
905	pxor	xmm1,xmm10
906	movdqa	xmm9,xmm5
907	psrlq	xmm5,1
908	movdqa	xmm10,xmm15
909	psrlq	xmm15,1
910	pxor	xmm5,xmm3
911	pxor	xmm15,xmm0
912	pand	xmm5,xmm7
913	pand	xmm15,xmm7
914	pxor	xmm3,xmm5
915	psllq	xmm5,1
916	pxor	xmm0,xmm15
917	psllq	xmm15,1
918	pxor	xmm5,xmm9
919	pxor	xmm15,xmm10
920	movdqa	xmm7,XMMWORD[32+r11]
921	movdqa	xmm9,xmm6
922	psrlq	xmm6,2
923	movdqa	xmm10,xmm1
924	psrlq	xmm1,2
925	pxor	xmm6,xmm4
926	pxor	xmm1,xmm2
927	pand	xmm6,xmm8
928	pand	xmm1,xmm8
929	pxor	xmm4,xmm6
930	psllq	xmm6,2
931	pxor	xmm2,xmm1
932	psllq	xmm1,2
933	pxor	xmm6,xmm9
934	pxor	xmm1,xmm10
935	movdqa	xmm9,xmm0
936	psrlq	xmm0,2
937	movdqa	xmm10,xmm15
938	psrlq	xmm15,2
939	pxor	xmm0,xmm3
940	pxor	xmm15,xmm5
941	pand	xmm0,xmm8
942	pand	xmm15,xmm8
943	pxor	xmm3,xmm0
944	psllq	xmm0,2
945	pxor	xmm5,xmm15
946	psllq	xmm15,2
947	pxor	xmm0,xmm9
948	pxor	xmm15,xmm10
949	movdqa	xmm9,xmm3
950	psrlq	xmm3,4
951	movdqa	xmm10,xmm5
952	psrlq	xmm5,4
953	pxor	xmm3,xmm4
954	pxor	xmm5,xmm2
955	pand	xmm3,xmm7
956	pand	xmm5,xmm7
957	pxor	xmm4,xmm3
958	psllq	xmm3,4
959	pxor	xmm2,xmm5
960	psllq	xmm5,4
961	pxor	xmm3,xmm9
962	pxor	xmm5,xmm10
963	movdqa	xmm9,xmm0
964	psrlq	xmm0,4
965	movdqa	xmm10,xmm15
966	psrlq	xmm15,4
967	pxor	xmm0,xmm6
968	pxor	xmm15,xmm1
969	pand	xmm0,xmm7
970	pand	xmm15,xmm7
971	pxor	xmm6,xmm0
972	psllq	xmm0,4
973	pxor	xmm1,xmm15
974	psllq	xmm15,4
975	pxor	xmm0,xmm9
976	pxor	xmm15,xmm10
977	movdqa	xmm7,XMMWORD[rax]
978	pxor	xmm5,xmm7
979	pxor	xmm3,xmm7
980	pxor	xmm1,xmm7
981	pxor	xmm6,xmm7
982	pxor	xmm2,xmm7
983	pxor	xmm4,xmm7
984	pxor	xmm15,xmm7
985	pxor	xmm0,xmm7
986	DB	0F3h,0C3h		;repret
987
988
989ALIGN	16
990_bsaes_key_convert:
991	lea	r11,[$L$masks]
992	movdqu	xmm7,XMMWORD[rcx]
993	lea	rcx,[16+rcx]
994	movdqa	xmm0,XMMWORD[r11]
995	movdqa	xmm1,XMMWORD[16+r11]
996	movdqa	xmm2,XMMWORD[32+r11]
997	movdqa	xmm3,XMMWORD[48+r11]
998	movdqa	xmm4,XMMWORD[64+r11]
999	pcmpeqd	xmm5,xmm5
1000
1001	movdqu	xmm6,XMMWORD[rcx]
1002	movdqa	XMMWORD[rax],xmm7
1003	lea	rax,[16+rax]
1004	dec	r10d
1005	jmp	NEAR $L$key_loop
1006ALIGN	16
1007$L$key_loop:
1008DB	102,15,56,0,244
1009
1010	movdqa	xmm8,xmm0
1011	movdqa	xmm9,xmm1
1012
1013	pand	xmm8,xmm6
1014	pand	xmm9,xmm6
1015	movdqa	xmm10,xmm2
1016	pcmpeqb	xmm8,xmm0
1017	psllq	xmm0,4
1018	movdqa	xmm11,xmm3
1019	pcmpeqb	xmm9,xmm1
1020	psllq	xmm1,4
1021
1022	pand	xmm10,xmm6
1023	pand	xmm11,xmm6
1024	movdqa	xmm12,xmm0
1025	pcmpeqb	xmm10,xmm2
1026	psllq	xmm2,4
1027	movdqa	xmm13,xmm1
1028	pcmpeqb	xmm11,xmm3
1029	psllq	xmm3,4
1030
1031	movdqa	xmm14,xmm2
1032	movdqa	xmm15,xmm3
1033	pxor	xmm8,xmm5
1034	pxor	xmm9,xmm5
1035
1036	pand	xmm12,xmm6
1037	pand	xmm13,xmm6
1038	movdqa	XMMWORD[rax],xmm8
1039	pcmpeqb	xmm12,xmm0
1040	psrlq	xmm0,4
1041	movdqa	XMMWORD[16+rax],xmm9
1042	pcmpeqb	xmm13,xmm1
1043	psrlq	xmm1,4
1044	lea	rcx,[16+rcx]
1045
1046	pand	xmm14,xmm6
1047	pand	xmm15,xmm6
1048	movdqa	XMMWORD[32+rax],xmm10
1049	pcmpeqb	xmm14,xmm2
1050	psrlq	xmm2,4
1051	movdqa	XMMWORD[48+rax],xmm11
1052	pcmpeqb	xmm15,xmm3
1053	psrlq	xmm3,4
1054	movdqu	xmm6,XMMWORD[rcx]
1055
1056	pxor	xmm13,xmm5
1057	pxor	xmm14,xmm5
1058	movdqa	XMMWORD[64+rax],xmm12
1059	movdqa	XMMWORD[80+rax],xmm13
1060	movdqa	XMMWORD[96+rax],xmm14
1061	movdqa	XMMWORD[112+rax],xmm15
1062	lea	rax,[128+rax]
1063	dec	r10d
1064	jnz	NEAR $L$key_loop
1065
1066	movdqa	xmm7,XMMWORD[80+r11]
1067
1068	DB	0F3h,0C3h		;repret
1069
1070EXTERN	asm_AES_cbc_encrypt
1071global	bsaes_cbc_encrypt
1072
1073ALIGN	16
1074bsaes_cbc_encrypt:
1075	mov	r11d,DWORD[48+rsp]
1076	cmp	r11d,0
1077	jne	NEAR asm_AES_cbc_encrypt
1078	cmp	r8,128
1079	jb	NEAR asm_AES_cbc_encrypt
1080
1081	mov	rax,rsp
1082$L$cbc_dec_prologue:
1083	push	rbp
1084	push	rbx
1085	push	r12
1086	push	r13
1087	push	r14
1088	push	r15
1089	lea	rsp,[((-72))+rsp]
1090	mov	r10,QWORD[160+rsp]
1091	lea	rsp,[((-160))+rsp]
1092	movaps	XMMWORD[64+rsp],xmm6
1093	movaps	XMMWORD[80+rsp],xmm7
1094	movaps	XMMWORD[96+rsp],xmm8
1095	movaps	XMMWORD[112+rsp],xmm9
1096	movaps	XMMWORD[128+rsp],xmm10
1097	movaps	XMMWORD[144+rsp],xmm11
1098	movaps	XMMWORD[160+rsp],xmm12
1099	movaps	XMMWORD[176+rsp],xmm13
1100	movaps	XMMWORD[192+rsp],xmm14
1101	movaps	XMMWORD[208+rsp],xmm15
1102$L$cbc_dec_body:
1103	mov	rbp,rsp
1104	mov	eax,DWORD[240+r9]
1105	mov	r12,rcx
1106	mov	r13,rdx
1107	mov	r14,r8
1108	mov	r15,r9
1109	mov	rbx,r10
1110	shr	r14,4
1111
1112	mov	edx,eax
1113	shl	rax,7
1114	sub	rax,96
1115	sub	rsp,rax
1116
1117	mov	rax,rsp
1118	mov	rcx,r15
1119	mov	r10d,edx
1120	call	_bsaes_key_convert
1121	pxor	xmm7,XMMWORD[rsp]
1122	movdqa	XMMWORD[rax],xmm6
1123	movdqa	XMMWORD[rsp],xmm7
1124
1125	movdqu	xmm14,XMMWORD[rbx]
1126	sub	r14,8
1127$L$cbc_dec_loop:
1128	movdqu	xmm15,XMMWORD[r12]
1129	movdqu	xmm0,XMMWORD[16+r12]
1130	movdqu	xmm1,XMMWORD[32+r12]
1131	movdqu	xmm2,XMMWORD[48+r12]
1132	movdqu	xmm3,XMMWORD[64+r12]
1133	movdqu	xmm4,XMMWORD[80+r12]
1134	mov	rax,rsp
1135	movdqu	xmm5,XMMWORD[96+r12]
1136	mov	r10d,edx
1137	movdqu	xmm6,XMMWORD[112+r12]
1138	movdqa	XMMWORD[32+rbp],xmm14
1139
1140	call	_bsaes_decrypt8
1141
1142	pxor	xmm15,XMMWORD[32+rbp]
1143	movdqu	xmm7,XMMWORD[r12]
1144	movdqu	xmm8,XMMWORD[16+r12]
1145	pxor	xmm0,xmm7
1146	movdqu	xmm9,XMMWORD[32+r12]
1147	pxor	xmm5,xmm8
1148	movdqu	xmm10,XMMWORD[48+r12]
1149	pxor	xmm3,xmm9
1150	movdqu	xmm11,XMMWORD[64+r12]
1151	pxor	xmm1,xmm10
1152	movdqu	xmm12,XMMWORD[80+r12]
1153	pxor	xmm6,xmm11
1154	movdqu	xmm13,XMMWORD[96+r12]
1155	pxor	xmm2,xmm12
1156	movdqu	xmm14,XMMWORD[112+r12]
1157	pxor	xmm4,xmm13
1158	movdqu	XMMWORD[r13],xmm15
1159	lea	r12,[128+r12]
1160	movdqu	XMMWORD[16+r13],xmm0
1161	movdqu	XMMWORD[32+r13],xmm5
1162	movdqu	XMMWORD[48+r13],xmm3
1163	movdqu	XMMWORD[64+r13],xmm1
1164	movdqu	XMMWORD[80+r13],xmm6
1165	movdqu	XMMWORD[96+r13],xmm2
1166	movdqu	XMMWORD[112+r13],xmm4
1167	lea	r13,[128+r13]
1168	sub	r14,8
1169	jnc	NEAR $L$cbc_dec_loop
1170
1171	add	r14,8
1172	jz	NEAR $L$cbc_dec_done
1173
1174	movdqu	xmm15,XMMWORD[r12]
1175	mov	rax,rsp
1176	mov	r10d,edx
1177	cmp	r14,2
1178	jb	NEAR $L$cbc_dec_one
1179	movdqu	xmm0,XMMWORD[16+r12]
1180	je	NEAR $L$cbc_dec_two
1181	movdqu	xmm1,XMMWORD[32+r12]
1182	cmp	r14,4
1183	jb	NEAR $L$cbc_dec_three
1184	movdqu	xmm2,XMMWORD[48+r12]
1185	je	NEAR $L$cbc_dec_four
1186	movdqu	xmm3,XMMWORD[64+r12]
1187	cmp	r14,6
1188	jb	NEAR $L$cbc_dec_five
1189	movdqu	xmm4,XMMWORD[80+r12]
1190	je	NEAR $L$cbc_dec_six
1191	movdqu	xmm5,XMMWORD[96+r12]
1192	movdqa	XMMWORD[32+rbp],xmm14
1193	call	_bsaes_decrypt8
1194	pxor	xmm15,XMMWORD[32+rbp]
1195	movdqu	xmm7,XMMWORD[r12]
1196	movdqu	xmm8,XMMWORD[16+r12]
1197	pxor	xmm0,xmm7
1198	movdqu	xmm9,XMMWORD[32+r12]
1199	pxor	xmm5,xmm8
1200	movdqu	xmm10,XMMWORD[48+r12]
1201	pxor	xmm3,xmm9
1202	movdqu	xmm11,XMMWORD[64+r12]
1203	pxor	xmm1,xmm10
1204	movdqu	xmm12,XMMWORD[80+r12]
1205	pxor	xmm6,xmm11
1206	movdqu	xmm14,XMMWORD[96+r12]
1207	pxor	xmm2,xmm12
1208	movdqu	XMMWORD[r13],xmm15
1209	movdqu	XMMWORD[16+r13],xmm0
1210	movdqu	XMMWORD[32+r13],xmm5
1211	movdqu	XMMWORD[48+r13],xmm3
1212	movdqu	XMMWORD[64+r13],xmm1
1213	movdqu	XMMWORD[80+r13],xmm6
1214	movdqu	XMMWORD[96+r13],xmm2
1215	jmp	NEAR $L$cbc_dec_done
1216ALIGN	16
1217$L$cbc_dec_six:
1218	movdqa	XMMWORD[32+rbp],xmm14
1219	call	_bsaes_decrypt8
1220	pxor	xmm15,XMMWORD[32+rbp]
1221	movdqu	xmm7,XMMWORD[r12]
1222	movdqu	xmm8,XMMWORD[16+r12]
1223	pxor	xmm0,xmm7
1224	movdqu	xmm9,XMMWORD[32+r12]
1225	pxor	xmm5,xmm8
1226	movdqu	xmm10,XMMWORD[48+r12]
1227	pxor	xmm3,xmm9
1228	movdqu	xmm11,XMMWORD[64+r12]
1229	pxor	xmm1,xmm10
1230	movdqu	xmm14,XMMWORD[80+r12]
1231	pxor	xmm6,xmm11
1232	movdqu	XMMWORD[r13],xmm15
1233	movdqu	XMMWORD[16+r13],xmm0
1234	movdqu	XMMWORD[32+r13],xmm5
1235	movdqu	XMMWORD[48+r13],xmm3
1236	movdqu	XMMWORD[64+r13],xmm1
1237	movdqu	XMMWORD[80+r13],xmm6
1238	jmp	NEAR $L$cbc_dec_done
1239ALIGN	16
1240$L$cbc_dec_five:
1241	movdqa	XMMWORD[32+rbp],xmm14
1242	call	_bsaes_decrypt8
1243	pxor	xmm15,XMMWORD[32+rbp]
1244	movdqu	xmm7,XMMWORD[r12]
1245	movdqu	xmm8,XMMWORD[16+r12]
1246	pxor	xmm0,xmm7
1247	movdqu	xmm9,XMMWORD[32+r12]
1248	pxor	xmm5,xmm8
1249	movdqu	xmm10,XMMWORD[48+r12]
1250	pxor	xmm3,xmm9
1251	movdqu	xmm14,XMMWORD[64+r12]
1252	pxor	xmm1,xmm10
1253	movdqu	XMMWORD[r13],xmm15
1254	movdqu	XMMWORD[16+r13],xmm0
1255	movdqu	XMMWORD[32+r13],xmm5
1256	movdqu	XMMWORD[48+r13],xmm3
1257	movdqu	XMMWORD[64+r13],xmm1
1258	jmp	NEAR $L$cbc_dec_done
1259ALIGN	16
1260$L$cbc_dec_four:
1261	movdqa	XMMWORD[32+rbp],xmm14
1262	call	_bsaes_decrypt8
1263	pxor	xmm15,XMMWORD[32+rbp]
1264	movdqu	xmm7,XMMWORD[r12]
1265	movdqu	xmm8,XMMWORD[16+r12]
1266	pxor	xmm0,xmm7
1267	movdqu	xmm9,XMMWORD[32+r12]
1268	pxor	xmm5,xmm8
1269	movdqu	xmm14,XMMWORD[48+r12]
1270	pxor	xmm3,xmm9
1271	movdqu	XMMWORD[r13],xmm15
1272	movdqu	XMMWORD[16+r13],xmm0
1273	movdqu	XMMWORD[32+r13],xmm5
1274	movdqu	XMMWORD[48+r13],xmm3
1275	jmp	NEAR $L$cbc_dec_done
1276ALIGN	16
1277$L$cbc_dec_three:
1278	movdqa	XMMWORD[32+rbp],xmm14
1279	call	_bsaes_decrypt8
1280	pxor	xmm15,XMMWORD[32+rbp]
1281	movdqu	xmm7,XMMWORD[r12]
1282	movdqu	xmm8,XMMWORD[16+r12]
1283	pxor	xmm0,xmm7
1284	movdqu	xmm14,XMMWORD[32+r12]
1285	pxor	xmm5,xmm8
1286	movdqu	XMMWORD[r13],xmm15
1287	movdqu	XMMWORD[16+r13],xmm0
1288	movdqu	XMMWORD[32+r13],xmm5
1289	jmp	NEAR $L$cbc_dec_done
1290ALIGN	16
1291$L$cbc_dec_two:
1292	movdqa	XMMWORD[32+rbp],xmm14
1293	call	_bsaes_decrypt8
1294	pxor	xmm15,XMMWORD[32+rbp]
1295	movdqu	xmm7,XMMWORD[r12]
1296	movdqu	xmm14,XMMWORD[16+r12]
1297	pxor	xmm0,xmm7
1298	movdqu	XMMWORD[r13],xmm15
1299	movdqu	XMMWORD[16+r13],xmm0
1300	jmp	NEAR $L$cbc_dec_done
1301ALIGN	16
1302$L$cbc_dec_one:
1303	lea	rcx,[r12]
1304	lea	rdx,[32+rbp]
1305	lea	r8,[r15]
1306	call	asm_AES_decrypt
1307	pxor	xmm14,XMMWORD[32+rbp]
1308	movdqu	XMMWORD[r13],xmm14
1309	movdqa	xmm14,xmm15
1310
1311$L$cbc_dec_done:
1312	movdqu	XMMWORD[rbx],xmm14
1313	lea	rax,[rsp]
1314	pxor	xmm0,xmm0
1315$L$cbc_dec_bzero:
1316	movdqa	XMMWORD[rax],xmm0
1317	movdqa	XMMWORD[16+rax],xmm0
1318	lea	rax,[32+rax]
1319	cmp	rbp,rax
1320	ja	NEAR $L$cbc_dec_bzero
1321
1322	lea	rsp,[rbp]
1323	movaps	xmm6,XMMWORD[64+rbp]
1324	movaps	xmm7,XMMWORD[80+rbp]
1325	movaps	xmm8,XMMWORD[96+rbp]
1326	movaps	xmm9,XMMWORD[112+rbp]
1327	movaps	xmm10,XMMWORD[128+rbp]
1328	movaps	xmm11,XMMWORD[144+rbp]
1329	movaps	xmm12,XMMWORD[160+rbp]
1330	movaps	xmm13,XMMWORD[176+rbp]
1331	movaps	xmm14,XMMWORD[192+rbp]
1332	movaps	xmm15,XMMWORD[208+rbp]
1333	lea	rsp,[160+rbp]
1334	mov	r15,QWORD[72+rsp]
1335	mov	r14,QWORD[80+rsp]
1336	mov	r13,QWORD[88+rsp]
1337	mov	r12,QWORD[96+rsp]
1338	mov	rbx,QWORD[104+rsp]
1339	mov	rax,QWORD[112+rsp]
1340	lea	rsp,[120+rsp]
1341	mov	rbp,rax
1342$L$cbc_dec_epilogue:
1343	DB	0F3h,0C3h		;repret
1344
1345
1346global	bsaes_ctr32_encrypt_blocks
1347
1348ALIGN	16
1349bsaes_ctr32_encrypt_blocks:
1350	mov	rax,rsp
1351$L$ctr_enc_prologue:
1352	push	rbp
1353	push	rbx
1354	push	r12
1355	push	r13
1356	push	r14
1357	push	r15
1358	lea	rsp,[((-72))+rsp]
1359	mov	r10,QWORD[160+rsp]
1360	lea	rsp,[((-160))+rsp]
1361	movaps	XMMWORD[64+rsp],xmm6
1362	movaps	XMMWORD[80+rsp],xmm7
1363	movaps	XMMWORD[96+rsp],xmm8
1364	movaps	XMMWORD[112+rsp],xmm9
1365	movaps	XMMWORD[128+rsp],xmm10
1366	movaps	XMMWORD[144+rsp],xmm11
1367	movaps	XMMWORD[160+rsp],xmm12
1368	movaps	XMMWORD[176+rsp],xmm13
1369	movaps	XMMWORD[192+rsp],xmm14
1370	movaps	XMMWORD[208+rsp],xmm15
1371$L$ctr_enc_body:
1372	mov	rbp,rsp
1373	movdqu	xmm0,XMMWORD[r10]
1374	mov	eax,DWORD[240+r9]
1375	mov	r12,rcx
1376	mov	r13,rdx
1377	mov	r14,r8
1378	mov	r15,r9
1379	movdqa	XMMWORD[32+rbp],xmm0
1380	cmp	r8,8
1381	jb	NEAR $L$ctr_enc_short
1382
1383	mov	ebx,eax
1384	shl	rax,7
1385	sub	rax,96
1386	sub	rsp,rax
1387
1388	mov	rax,rsp
1389	mov	rcx,r15
1390	mov	r10d,ebx
1391	call	_bsaes_key_convert
1392	pxor	xmm7,xmm6
1393	movdqa	XMMWORD[rax],xmm7
1394
1395	movdqa	xmm8,XMMWORD[rsp]
1396	lea	r11,[$L$ADD1]
1397	movdqa	xmm15,XMMWORD[32+rbp]
1398	movdqa	xmm7,XMMWORD[((-32))+r11]
1399DB	102,68,15,56,0,199
1400DB	102,68,15,56,0,255
1401	movdqa	XMMWORD[rsp],xmm8
1402	jmp	NEAR $L$ctr_enc_loop
1403ALIGN	16
1404$L$ctr_enc_loop:
1405	movdqa	XMMWORD[32+rbp],xmm15
1406	movdqa	xmm0,xmm15
1407	movdqa	xmm1,xmm15
1408	paddd	xmm0,XMMWORD[r11]
1409	movdqa	xmm2,xmm15
1410	paddd	xmm1,XMMWORD[16+r11]
1411	movdqa	xmm3,xmm15
1412	paddd	xmm2,XMMWORD[32+r11]
1413	movdqa	xmm4,xmm15
1414	paddd	xmm3,XMMWORD[48+r11]
1415	movdqa	xmm5,xmm15
1416	paddd	xmm4,XMMWORD[64+r11]
1417	movdqa	xmm6,xmm15
1418	paddd	xmm5,XMMWORD[80+r11]
1419	paddd	xmm6,XMMWORD[96+r11]
1420
1421
1422
1423	movdqa	xmm8,XMMWORD[rsp]
1424	lea	rax,[16+rsp]
1425	movdqa	xmm7,XMMWORD[((-16))+r11]
1426	pxor	xmm15,xmm8
1427	pxor	xmm0,xmm8
1428	pxor	xmm1,xmm8
1429	pxor	xmm2,xmm8
1430DB	102,68,15,56,0,255
1431DB	102,15,56,0,199
1432	pxor	xmm3,xmm8
1433	pxor	xmm4,xmm8
1434DB	102,15,56,0,207
1435DB	102,15,56,0,215
1436	pxor	xmm5,xmm8
1437	pxor	xmm6,xmm8
1438DB	102,15,56,0,223
1439DB	102,15,56,0,231
1440DB	102,15,56,0,239
1441DB	102,15,56,0,247
1442	lea	r11,[$L$BS0]
1443	mov	r10d,ebx
1444
1445	call	_bsaes_encrypt8_bitslice
1446
1447	sub	r14,8
1448	jc	NEAR $L$ctr_enc_loop_done
1449
1450	movdqu	xmm7,XMMWORD[r12]
1451	movdqu	xmm8,XMMWORD[16+r12]
1452	movdqu	xmm9,XMMWORD[32+r12]
1453	movdqu	xmm10,XMMWORD[48+r12]
1454	movdqu	xmm11,XMMWORD[64+r12]
1455	movdqu	xmm12,XMMWORD[80+r12]
1456	movdqu	xmm13,XMMWORD[96+r12]
1457	movdqu	xmm14,XMMWORD[112+r12]
1458	lea	r12,[128+r12]
1459	pxor	xmm7,xmm15
1460	movdqa	xmm15,XMMWORD[32+rbp]
1461	pxor	xmm0,xmm8
1462	movdqu	XMMWORD[r13],xmm7
1463	pxor	xmm3,xmm9
1464	movdqu	XMMWORD[16+r13],xmm0
1465	pxor	xmm5,xmm10
1466	movdqu	XMMWORD[32+r13],xmm3
1467	pxor	xmm2,xmm11
1468	movdqu	XMMWORD[48+r13],xmm5
1469	pxor	xmm6,xmm12
1470	movdqu	XMMWORD[64+r13],xmm2
1471	pxor	xmm1,xmm13
1472	movdqu	XMMWORD[80+r13],xmm6
1473	pxor	xmm4,xmm14
1474	movdqu	XMMWORD[96+r13],xmm1
1475	lea	r11,[$L$ADD1]
1476	movdqu	XMMWORD[112+r13],xmm4
1477	lea	r13,[128+r13]
1478	paddd	xmm15,XMMWORD[112+r11]
1479	jnz	NEAR $L$ctr_enc_loop
1480
1481	jmp	NEAR $L$ctr_enc_done
1482ALIGN	16
1483$L$ctr_enc_loop_done:
1484	add	r14,8
1485	movdqu	xmm7,XMMWORD[r12]
1486	pxor	xmm15,xmm7
1487	movdqu	XMMWORD[r13],xmm15
1488	cmp	r14,2
1489	jb	NEAR $L$ctr_enc_done
1490	movdqu	xmm8,XMMWORD[16+r12]
1491	pxor	xmm0,xmm8
1492	movdqu	XMMWORD[16+r13],xmm0
1493	je	NEAR $L$ctr_enc_done
1494	movdqu	xmm9,XMMWORD[32+r12]
1495	pxor	xmm3,xmm9
1496	movdqu	XMMWORD[32+r13],xmm3
1497	cmp	r14,4
1498	jb	NEAR $L$ctr_enc_done
1499	movdqu	xmm10,XMMWORD[48+r12]
1500	pxor	xmm5,xmm10
1501	movdqu	XMMWORD[48+r13],xmm5
1502	je	NEAR $L$ctr_enc_done
1503	movdqu	xmm11,XMMWORD[64+r12]
1504	pxor	xmm2,xmm11
1505	movdqu	XMMWORD[64+r13],xmm2
1506	cmp	r14,6
1507	jb	NEAR $L$ctr_enc_done
1508	movdqu	xmm12,XMMWORD[80+r12]
1509	pxor	xmm6,xmm12
1510	movdqu	XMMWORD[80+r13],xmm6
1511	je	NEAR $L$ctr_enc_done
1512	movdqu	xmm13,XMMWORD[96+r12]
1513	pxor	xmm1,xmm13
1514	movdqu	XMMWORD[96+r13],xmm1
1515	jmp	NEAR $L$ctr_enc_done
1516
1517ALIGN	16
1518$L$ctr_enc_short:
1519	lea	rcx,[32+rbp]
1520	lea	rdx,[48+rbp]
1521	lea	r8,[r15]
1522	call	asm_AES_encrypt
1523	movdqu	xmm0,XMMWORD[r12]
1524	lea	r12,[16+r12]
1525	mov	eax,DWORD[44+rbp]
1526	bswap	eax
1527	pxor	xmm0,XMMWORD[48+rbp]
1528	inc	eax
1529	movdqu	XMMWORD[r13],xmm0
1530	bswap	eax
1531	lea	r13,[16+r13]
1532	mov	DWORD[44+rsp],eax
1533	dec	r14
1534	jnz	NEAR $L$ctr_enc_short
1535
1536$L$ctr_enc_done:
1537	lea	rax,[rsp]
1538	pxor	xmm0,xmm0
1539$L$ctr_enc_bzero:
1540	movdqa	XMMWORD[rax],xmm0
1541	movdqa	XMMWORD[16+rax],xmm0
1542	lea	rax,[32+rax]
1543	cmp	rbp,rax
1544	ja	NEAR $L$ctr_enc_bzero
1545
1546	lea	rsp,[rbp]
1547	movaps	xmm6,XMMWORD[64+rbp]
1548	movaps	xmm7,XMMWORD[80+rbp]
1549	movaps	xmm8,XMMWORD[96+rbp]
1550	movaps	xmm9,XMMWORD[112+rbp]
1551	movaps	xmm10,XMMWORD[128+rbp]
1552	movaps	xmm11,XMMWORD[144+rbp]
1553	movaps	xmm12,XMMWORD[160+rbp]
1554	movaps	xmm13,XMMWORD[176+rbp]
1555	movaps	xmm14,XMMWORD[192+rbp]
1556	movaps	xmm15,XMMWORD[208+rbp]
1557	lea	rsp,[160+rbp]
1558	mov	r15,QWORD[72+rsp]
1559	mov	r14,QWORD[80+rsp]
1560	mov	r13,QWORD[88+rsp]
1561	mov	r12,QWORD[96+rsp]
1562	mov	rbx,QWORD[104+rsp]
1563	mov	rax,QWORD[112+rsp]
1564	lea	rsp,[120+rsp]
1565	mov	rbp,rax
1566$L$ctr_enc_epilogue:
1567	DB	0F3h,0C3h		;repret
1568
1569global	bsaes_xts_encrypt
1570
1571ALIGN	16
1572bsaes_xts_encrypt:
1573	mov	rax,rsp
1574$L$xts_enc_prologue:
1575	push	rbp
1576	push	rbx
1577	push	r12
1578	push	r13
1579	push	r14
1580	push	r15
1581	lea	rsp,[((-72))+rsp]
1582	mov	r10,QWORD[160+rsp]
1583	mov	r11,QWORD[168+rsp]
1584	lea	rsp,[((-160))+rsp]
1585	movaps	XMMWORD[64+rsp],xmm6
1586	movaps	XMMWORD[80+rsp],xmm7
1587	movaps	XMMWORD[96+rsp],xmm8
1588	movaps	XMMWORD[112+rsp],xmm9
1589	movaps	XMMWORD[128+rsp],xmm10
1590	movaps	XMMWORD[144+rsp],xmm11
1591	movaps	XMMWORD[160+rsp],xmm12
1592	movaps	XMMWORD[176+rsp],xmm13
1593	movaps	XMMWORD[192+rsp],xmm14
1594	movaps	XMMWORD[208+rsp],xmm15
1595$L$xts_enc_body:
1596	mov	rbp,rsp
1597	mov	r12,rcx
1598	mov	r13,rdx
1599	mov	r14,r8
1600	mov	r15,r9
1601
1602	lea	rcx,[r11]
1603	lea	rdx,[32+rbp]
1604	lea	r8,[r10]
1605	call	asm_AES_encrypt
1606
1607	mov	eax,DWORD[240+r15]
1608	mov	rbx,r14
1609
1610	mov	edx,eax
1611	shl	rax,7
1612	sub	rax,96
1613	sub	rsp,rax
1614
1615	mov	rax,rsp
1616	mov	rcx,r15
1617	mov	r10d,edx
1618	call	_bsaes_key_convert
1619	pxor	xmm7,xmm6
1620	movdqa	XMMWORD[rax],xmm7
1621
1622	and	r14,-16
1623	sub	rsp,0x80
1624	movdqa	xmm6,XMMWORD[32+rbp]
1625
1626	pxor	xmm14,xmm14
1627	movdqa	xmm12,XMMWORD[$L$xts_magic]
1628	pcmpgtd	xmm14,xmm6
1629
1630	sub	r14,0x80
1631	jc	NEAR $L$xts_enc_short
1632	jmp	NEAR $L$xts_enc_loop
1633
1634ALIGN	16
1635$L$xts_enc_loop:
1636	pshufd	xmm13,xmm14,0x13
1637	pxor	xmm14,xmm14
1638	movdqa	xmm15,xmm6
1639	movdqa	XMMWORD[rsp],xmm6
1640	paddq	xmm6,xmm6
1641	pand	xmm13,xmm12
1642	pcmpgtd	xmm14,xmm6
1643	pxor	xmm6,xmm13
1644	pshufd	xmm13,xmm14,0x13
1645	pxor	xmm14,xmm14
1646	movdqa	xmm0,xmm6
1647	movdqa	XMMWORD[16+rsp],xmm6
1648	paddq	xmm6,xmm6
1649	pand	xmm13,xmm12
1650	pcmpgtd	xmm14,xmm6
1651	pxor	xmm6,xmm13
1652	movdqu	xmm7,XMMWORD[r12]
1653	pshufd	xmm13,xmm14,0x13
1654	pxor	xmm14,xmm14
1655	movdqa	xmm1,xmm6
1656	movdqa	XMMWORD[32+rsp],xmm6
1657	paddq	xmm6,xmm6
1658	pand	xmm13,xmm12
1659	pcmpgtd	xmm14,xmm6
1660	pxor	xmm6,xmm13
1661	movdqu	xmm8,XMMWORD[16+r12]
1662	pxor	xmm15,xmm7
1663	pshufd	xmm13,xmm14,0x13
1664	pxor	xmm14,xmm14
1665	movdqa	xmm2,xmm6
1666	movdqa	XMMWORD[48+rsp],xmm6
1667	paddq	xmm6,xmm6
1668	pand	xmm13,xmm12
1669	pcmpgtd	xmm14,xmm6
1670	pxor	xmm6,xmm13
1671	movdqu	xmm9,XMMWORD[32+r12]
1672	pxor	xmm0,xmm8
1673	pshufd	xmm13,xmm14,0x13
1674	pxor	xmm14,xmm14
1675	movdqa	xmm3,xmm6
1676	movdqa	XMMWORD[64+rsp],xmm6
1677	paddq	xmm6,xmm6
1678	pand	xmm13,xmm12
1679	pcmpgtd	xmm14,xmm6
1680	pxor	xmm6,xmm13
1681	movdqu	xmm10,XMMWORD[48+r12]
1682	pxor	xmm1,xmm9
1683	pshufd	xmm13,xmm14,0x13
1684	pxor	xmm14,xmm14
1685	movdqa	xmm4,xmm6
1686	movdqa	XMMWORD[80+rsp],xmm6
1687	paddq	xmm6,xmm6
1688	pand	xmm13,xmm12
1689	pcmpgtd	xmm14,xmm6
1690	pxor	xmm6,xmm13
1691	movdqu	xmm11,XMMWORD[64+r12]
1692	pxor	xmm2,xmm10
1693	pshufd	xmm13,xmm14,0x13
1694	pxor	xmm14,xmm14
1695	movdqa	xmm5,xmm6
1696	movdqa	XMMWORD[96+rsp],xmm6
1697	paddq	xmm6,xmm6
1698	pand	xmm13,xmm12
1699	pcmpgtd	xmm14,xmm6
1700	pxor	xmm6,xmm13
1701	movdqu	xmm12,XMMWORD[80+r12]
1702	pxor	xmm3,xmm11
1703	movdqu	xmm13,XMMWORD[96+r12]
1704	pxor	xmm4,xmm12
1705	movdqu	xmm14,XMMWORD[112+r12]
1706	lea	r12,[128+r12]
1707	movdqa	XMMWORD[112+rsp],xmm6
1708	pxor	xmm5,xmm13
1709	lea	rax,[128+rsp]
1710	pxor	xmm6,xmm14
1711	mov	r10d,edx
1712
1713	call	_bsaes_encrypt8
1714
1715	pxor	xmm15,XMMWORD[rsp]
1716	pxor	xmm0,XMMWORD[16+rsp]
1717	movdqu	XMMWORD[r13],xmm15
1718	pxor	xmm3,XMMWORD[32+rsp]
1719	movdqu	XMMWORD[16+r13],xmm0
1720	pxor	xmm5,XMMWORD[48+rsp]
1721	movdqu	XMMWORD[32+r13],xmm3
1722	pxor	xmm2,XMMWORD[64+rsp]
1723	movdqu	XMMWORD[48+r13],xmm5
1724	pxor	xmm6,XMMWORD[80+rsp]
1725	movdqu	XMMWORD[64+r13],xmm2
1726	pxor	xmm1,XMMWORD[96+rsp]
1727	movdqu	XMMWORD[80+r13],xmm6
1728	pxor	xmm4,XMMWORD[112+rsp]
1729	movdqu	XMMWORD[96+r13],xmm1
1730	movdqu	XMMWORD[112+r13],xmm4
1731	lea	r13,[128+r13]
1732
1733	movdqa	xmm6,XMMWORD[112+rsp]
1734	pxor	xmm14,xmm14
1735	movdqa	xmm12,XMMWORD[$L$xts_magic]
1736	pcmpgtd	xmm14,xmm6
1737	pshufd	xmm13,xmm14,0x13
1738	pxor	xmm14,xmm14
1739	paddq	xmm6,xmm6
1740	pand	xmm13,xmm12
1741	pcmpgtd	xmm14,xmm6
1742	pxor	xmm6,xmm13
1743
1744	sub	r14,0x80
1745	jnc	NEAR $L$xts_enc_loop
1746
1747$L$xts_enc_short:
1748	add	r14,0x80
1749	jz	NEAR $L$xts_enc_done
1750	pshufd	xmm13,xmm14,0x13
1751	pxor	xmm14,xmm14
1752	movdqa	xmm15,xmm6
1753	movdqa	XMMWORD[rsp],xmm6
1754	paddq	xmm6,xmm6
1755	pand	xmm13,xmm12
1756	pcmpgtd	xmm14,xmm6
1757	pxor	xmm6,xmm13
1758	pshufd	xmm13,xmm14,0x13
1759	pxor	xmm14,xmm14
1760	movdqa	xmm0,xmm6
1761	movdqa	XMMWORD[16+rsp],xmm6
1762	paddq	xmm6,xmm6
1763	pand	xmm13,xmm12
1764	pcmpgtd	xmm14,xmm6
1765	pxor	xmm6,xmm13
1766	movdqu	xmm7,XMMWORD[r12]
1767	cmp	r14,16
1768	je	NEAR $L$xts_enc_1
1769	pshufd	xmm13,xmm14,0x13
1770	pxor	xmm14,xmm14
1771	movdqa	xmm1,xmm6
1772	movdqa	XMMWORD[32+rsp],xmm6
1773	paddq	xmm6,xmm6
1774	pand	xmm13,xmm12
1775	pcmpgtd	xmm14,xmm6
1776	pxor	xmm6,xmm13
1777	movdqu	xmm8,XMMWORD[16+r12]
1778	cmp	r14,32
1779	je	NEAR $L$xts_enc_2
1780	pxor	xmm15,xmm7
1781	pshufd	xmm13,xmm14,0x13
1782	pxor	xmm14,xmm14
1783	movdqa	xmm2,xmm6
1784	movdqa	XMMWORD[48+rsp],xmm6
1785	paddq	xmm6,xmm6
1786	pand	xmm13,xmm12
1787	pcmpgtd	xmm14,xmm6
1788	pxor	xmm6,xmm13
1789	movdqu	xmm9,XMMWORD[32+r12]
1790	cmp	r14,48
1791	je	NEAR $L$xts_enc_3
1792	pxor	xmm0,xmm8
1793	pshufd	xmm13,xmm14,0x13
1794	pxor	xmm14,xmm14
1795	movdqa	xmm3,xmm6
1796	movdqa	XMMWORD[64+rsp],xmm6
1797	paddq	xmm6,xmm6
1798	pand	xmm13,xmm12
1799	pcmpgtd	xmm14,xmm6
1800	pxor	xmm6,xmm13
1801	movdqu	xmm10,XMMWORD[48+r12]
1802	cmp	r14,64
1803	je	NEAR $L$xts_enc_4
1804	pxor	xmm1,xmm9
1805	pshufd	xmm13,xmm14,0x13
1806	pxor	xmm14,xmm14
1807	movdqa	xmm4,xmm6
1808	movdqa	XMMWORD[80+rsp],xmm6
1809	paddq	xmm6,xmm6
1810	pand	xmm13,xmm12
1811	pcmpgtd	xmm14,xmm6
1812	pxor	xmm6,xmm13
1813	movdqu	xmm11,XMMWORD[64+r12]
1814	cmp	r14,80
1815	je	NEAR $L$xts_enc_5
1816	pxor	xmm2,xmm10
1817	pshufd	xmm13,xmm14,0x13
1818	pxor	xmm14,xmm14
1819	movdqa	xmm5,xmm6
1820	movdqa	XMMWORD[96+rsp],xmm6
1821	paddq	xmm6,xmm6
1822	pand	xmm13,xmm12
1823	pcmpgtd	xmm14,xmm6
1824	pxor	xmm6,xmm13
1825	movdqu	xmm12,XMMWORD[80+r12]
1826	cmp	r14,96
1827	je	NEAR $L$xts_enc_6
1828	pxor	xmm3,xmm11
1829	movdqu	xmm13,XMMWORD[96+r12]
1830	pxor	xmm4,xmm12
1831	movdqa	XMMWORD[112+rsp],xmm6
1832	lea	r12,[112+r12]
1833	pxor	xmm5,xmm13
1834	lea	rax,[128+rsp]
1835	mov	r10d,edx
1836
1837	call	_bsaes_encrypt8
1838
1839	pxor	xmm15,XMMWORD[rsp]
1840	pxor	xmm0,XMMWORD[16+rsp]
1841	movdqu	XMMWORD[r13],xmm15
1842	pxor	xmm3,XMMWORD[32+rsp]
1843	movdqu	XMMWORD[16+r13],xmm0
1844	pxor	xmm5,XMMWORD[48+rsp]
1845	movdqu	XMMWORD[32+r13],xmm3
1846	pxor	xmm2,XMMWORD[64+rsp]
1847	movdqu	XMMWORD[48+r13],xmm5
1848	pxor	xmm6,XMMWORD[80+rsp]
1849	movdqu	XMMWORD[64+r13],xmm2
1850	pxor	xmm1,XMMWORD[96+rsp]
1851	movdqu	XMMWORD[80+r13],xmm6
1852	movdqu	XMMWORD[96+r13],xmm1
1853	lea	r13,[112+r13]
1854
1855	movdqa	xmm6,XMMWORD[112+rsp]
1856	jmp	NEAR $L$xts_enc_done
1857ALIGN	16
1858$L$xts_enc_6:
1859	pxor	xmm3,xmm11
1860	lea	r12,[96+r12]
1861	pxor	xmm4,xmm12
1862	lea	rax,[128+rsp]
1863	mov	r10d,edx
1864
1865	call	_bsaes_encrypt8
1866
1867	pxor	xmm15,XMMWORD[rsp]
1868	pxor	xmm0,XMMWORD[16+rsp]
1869	movdqu	XMMWORD[r13],xmm15
1870	pxor	xmm3,XMMWORD[32+rsp]
1871	movdqu	XMMWORD[16+r13],xmm0
1872	pxor	xmm5,XMMWORD[48+rsp]
1873	movdqu	XMMWORD[32+r13],xmm3
1874	pxor	xmm2,XMMWORD[64+rsp]
1875	movdqu	XMMWORD[48+r13],xmm5
1876	pxor	xmm6,XMMWORD[80+rsp]
1877	movdqu	XMMWORD[64+r13],xmm2
1878	movdqu	XMMWORD[80+r13],xmm6
1879	lea	r13,[96+r13]
1880
1881	movdqa	xmm6,XMMWORD[96+rsp]
1882	jmp	NEAR $L$xts_enc_done
1883ALIGN	16
1884$L$xts_enc_5:
1885	pxor	xmm2,xmm10
1886	lea	r12,[80+r12]
1887	pxor	xmm3,xmm11
1888	lea	rax,[128+rsp]
1889	mov	r10d,edx
1890
1891	call	_bsaes_encrypt8
1892
1893	pxor	xmm15,XMMWORD[rsp]
1894	pxor	xmm0,XMMWORD[16+rsp]
1895	movdqu	XMMWORD[r13],xmm15
1896	pxor	xmm3,XMMWORD[32+rsp]
1897	movdqu	XMMWORD[16+r13],xmm0
1898	pxor	xmm5,XMMWORD[48+rsp]
1899	movdqu	XMMWORD[32+r13],xmm3
1900	pxor	xmm2,XMMWORD[64+rsp]
1901	movdqu	XMMWORD[48+r13],xmm5
1902	movdqu	XMMWORD[64+r13],xmm2
1903	lea	r13,[80+r13]
1904
1905	movdqa	xmm6,XMMWORD[80+rsp]
1906	jmp	NEAR $L$xts_enc_done
1907ALIGN	16
1908$L$xts_enc_4:
1909	pxor	xmm1,xmm9
1910	lea	r12,[64+r12]
1911	pxor	xmm2,xmm10
1912	lea	rax,[128+rsp]
1913	mov	r10d,edx
1914
1915	call	_bsaes_encrypt8
1916
1917	pxor	xmm15,XMMWORD[rsp]
1918	pxor	xmm0,XMMWORD[16+rsp]
1919	movdqu	XMMWORD[r13],xmm15
1920	pxor	xmm3,XMMWORD[32+rsp]
1921	movdqu	XMMWORD[16+r13],xmm0
1922	pxor	xmm5,XMMWORD[48+rsp]
1923	movdqu	XMMWORD[32+r13],xmm3
1924	movdqu	XMMWORD[48+r13],xmm5
1925	lea	r13,[64+r13]
1926
1927	movdqa	xmm6,XMMWORD[64+rsp]
1928	jmp	NEAR $L$xts_enc_done
1929ALIGN	16
1930$L$xts_enc_3:
1931	pxor	xmm0,xmm8
1932	lea	r12,[48+r12]
1933	pxor	xmm1,xmm9
1934	lea	rax,[128+rsp]
1935	mov	r10d,edx
1936
1937	call	_bsaes_encrypt8
1938
1939	pxor	xmm15,XMMWORD[rsp]
1940	pxor	xmm0,XMMWORD[16+rsp]
1941	movdqu	XMMWORD[r13],xmm15
1942	pxor	xmm3,XMMWORD[32+rsp]
1943	movdqu	XMMWORD[16+r13],xmm0
1944	movdqu	XMMWORD[32+r13],xmm3
1945	lea	r13,[48+r13]
1946
1947	movdqa	xmm6,XMMWORD[48+rsp]
1948	jmp	NEAR $L$xts_enc_done
1949ALIGN	16
1950$L$xts_enc_2:
1951	pxor	xmm15,xmm7
1952	lea	r12,[32+r12]
1953	pxor	xmm0,xmm8
1954	lea	rax,[128+rsp]
1955	mov	r10d,edx
1956
1957	call	_bsaes_encrypt8
1958
1959	pxor	xmm15,XMMWORD[rsp]
1960	pxor	xmm0,XMMWORD[16+rsp]
1961	movdqu	XMMWORD[r13],xmm15
1962	movdqu	XMMWORD[16+r13],xmm0
1963	lea	r13,[32+r13]
1964
1965	movdqa	xmm6,XMMWORD[32+rsp]
1966	jmp	NEAR $L$xts_enc_done
1967ALIGN	16
1968$L$xts_enc_1:
1969	pxor	xmm7,xmm15
1970	lea	r12,[16+r12]
1971	movdqa	XMMWORD[32+rbp],xmm7
1972	lea	rcx,[32+rbp]
1973	lea	rdx,[32+rbp]
1974	lea	r8,[r15]
1975	call	asm_AES_encrypt
1976	pxor	xmm15,XMMWORD[32+rbp]
1977
1978
1979
1980
1981
1982	movdqu	XMMWORD[r13],xmm15
1983	lea	r13,[16+r13]
1984
1985	movdqa	xmm6,XMMWORD[16+rsp]
1986
1987$L$xts_enc_done:
1988	and	ebx,15
1989	jz	NEAR $L$xts_enc_ret
1990	mov	rdx,r13
1991
1992$L$xts_enc_steal:
1993	movzx	eax,BYTE[r12]
1994	movzx	ecx,BYTE[((-16))+rdx]
1995	lea	r12,[1+r12]
1996	mov	BYTE[((-16))+rdx],al
1997	mov	BYTE[rdx],cl
1998	lea	rdx,[1+rdx]
1999	sub	ebx,1
2000	jnz	NEAR $L$xts_enc_steal
2001
2002	movdqu	xmm15,XMMWORD[((-16))+r13]
2003	lea	rcx,[32+rbp]
2004	pxor	xmm15,xmm6
2005	lea	rdx,[32+rbp]
2006	movdqa	XMMWORD[32+rbp],xmm15
2007	lea	r8,[r15]
2008	call	asm_AES_encrypt
2009	pxor	xmm6,XMMWORD[32+rbp]
2010	movdqu	XMMWORD[(-16)+r13],xmm6
2011
2012$L$xts_enc_ret:
2013	lea	rax,[rsp]
2014	pxor	xmm0,xmm0
2015$L$xts_enc_bzero:
2016	movdqa	XMMWORD[rax],xmm0
2017	movdqa	XMMWORD[16+rax],xmm0
2018	lea	rax,[32+rax]
2019	cmp	rbp,rax
2020	ja	NEAR $L$xts_enc_bzero
2021
2022	lea	rsp,[rbp]
2023	movaps	xmm6,XMMWORD[64+rbp]
2024	movaps	xmm7,XMMWORD[80+rbp]
2025	movaps	xmm8,XMMWORD[96+rbp]
2026	movaps	xmm9,XMMWORD[112+rbp]
2027	movaps	xmm10,XMMWORD[128+rbp]
2028	movaps	xmm11,XMMWORD[144+rbp]
2029	movaps	xmm12,XMMWORD[160+rbp]
2030	movaps	xmm13,XMMWORD[176+rbp]
2031	movaps	xmm14,XMMWORD[192+rbp]
2032	movaps	xmm15,XMMWORD[208+rbp]
2033	lea	rsp,[160+rbp]
2034	mov	r15,QWORD[72+rsp]
2035	mov	r14,QWORD[80+rsp]
2036	mov	r13,QWORD[88+rsp]
2037	mov	r12,QWORD[96+rsp]
2038	mov	rbx,QWORD[104+rsp]
2039	mov	rax,QWORD[112+rsp]
2040	lea	rsp,[120+rsp]
2041	mov	rbp,rax
2042$L$xts_enc_epilogue:
2043	DB	0F3h,0C3h		;repret
2044
2045
2046global	bsaes_xts_decrypt
2047
2048ALIGN	16
2049bsaes_xts_decrypt:
2050	mov	rax,rsp
2051$L$xts_dec_prologue:
2052	push	rbp
2053	push	rbx
2054	push	r12
2055	push	r13
2056	push	r14
2057	push	r15
2058	lea	rsp,[((-72))+rsp]
2059	mov	r10,QWORD[160+rsp]
2060	mov	r11,QWORD[168+rsp]
2061	lea	rsp,[((-160))+rsp]
2062	movaps	XMMWORD[64+rsp],xmm6
2063	movaps	XMMWORD[80+rsp],xmm7
2064	movaps	XMMWORD[96+rsp],xmm8
2065	movaps	XMMWORD[112+rsp],xmm9
2066	movaps	XMMWORD[128+rsp],xmm10
2067	movaps	XMMWORD[144+rsp],xmm11
2068	movaps	XMMWORD[160+rsp],xmm12
2069	movaps	XMMWORD[176+rsp],xmm13
2070	movaps	XMMWORD[192+rsp],xmm14
2071	movaps	XMMWORD[208+rsp],xmm15
2072$L$xts_dec_body:
2073	mov	rbp,rsp
2074	mov	r12,rcx
2075	mov	r13,rdx
2076	mov	r14,r8
2077	mov	r15,r9
2078
2079	lea	rcx,[r11]
2080	lea	rdx,[32+rbp]
2081	lea	r8,[r10]
2082	call	asm_AES_encrypt
2083
2084	mov	eax,DWORD[240+r15]
2085	mov	rbx,r14
2086
2087	mov	edx,eax
2088	shl	rax,7
2089	sub	rax,96
2090	sub	rsp,rax
2091
2092	mov	rax,rsp
2093	mov	rcx,r15
2094	mov	r10d,edx
2095	call	_bsaes_key_convert
2096	pxor	xmm7,XMMWORD[rsp]
2097	movdqa	XMMWORD[rax],xmm6
2098	movdqa	XMMWORD[rsp],xmm7
2099
2100	xor	eax,eax
2101	and	r14,-16
2102	test	ebx,15
2103	setnz	al
2104	shl	rax,4
2105	sub	r14,rax
2106
2107	sub	rsp,0x80
2108	movdqa	xmm6,XMMWORD[32+rbp]
2109
2110	pxor	xmm14,xmm14
2111	movdqa	xmm12,XMMWORD[$L$xts_magic]
2112	pcmpgtd	xmm14,xmm6
2113
2114	sub	r14,0x80
2115	jc	NEAR $L$xts_dec_short
2116	jmp	NEAR $L$xts_dec_loop
2117
2118ALIGN	16
2119$L$xts_dec_loop:
2120	pshufd	xmm13,xmm14,0x13
2121	pxor	xmm14,xmm14
2122	movdqa	xmm15,xmm6
2123	movdqa	XMMWORD[rsp],xmm6
2124	paddq	xmm6,xmm6
2125	pand	xmm13,xmm12
2126	pcmpgtd	xmm14,xmm6
2127	pxor	xmm6,xmm13
2128	pshufd	xmm13,xmm14,0x13
2129	pxor	xmm14,xmm14
2130	movdqa	xmm0,xmm6
2131	movdqa	XMMWORD[16+rsp],xmm6
2132	paddq	xmm6,xmm6
2133	pand	xmm13,xmm12
2134	pcmpgtd	xmm14,xmm6
2135	pxor	xmm6,xmm13
2136	movdqu	xmm7,XMMWORD[r12]
2137	pshufd	xmm13,xmm14,0x13
2138	pxor	xmm14,xmm14
2139	movdqa	xmm1,xmm6
2140	movdqa	XMMWORD[32+rsp],xmm6
2141	paddq	xmm6,xmm6
2142	pand	xmm13,xmm12
2143	pcmpgtd	xmm14,xmm6
2144	pxor	xmm6,xmm13
2145	movdqu	xmm8,XMMWORD[16+r12]
2146	pxor	xmm15,xmm7
2147	pshufd	xmm13,xmm14,0x13
2148	pxor	xmm14,xmm14
2149	movdqa	xmm2,xmm6
2150	movdqa	XMMWORD[48+rsp],xmm6
2151	paddq	xmm6,xmm6
2152	pand	xmm13,xmm12
2153	pcmpgtd	xmm14,xmm6
2154	pxor	xmm6,xmm13
2155	movdqu	xmm9,XMMWORD[32+r12]
2156	pxor	xmm0,xmm8
2157	pshufd	xmm13,xmm14,0x13
2158	pxor	xmm14,xmm14
2159	movdqa	xmm3,xmm6
2160	movdqa	XMMWORD[64+rsp],xmm6
2161	paddq	xmm6,xmm6
2162	pand	xmm13,xmm12
2163	pcmpgtd	xmm14,xmm6
2164	pxor	xmm6,xmm13
2165	movdqu	xmm10,XMMWORD[48+r12]
2166	pxor	xmm1,xmm9
2167	pshufd	xmm13,xmm14,0x13
2168	pxor	xmm14,xmm14
2169	movdqa	xmm4,xmm6
2170	movdqa	XMMWORD[80+rsp],xmm6
2171	paddq	xmm6,xmm6
2172	pand	xmm13,xmm12
2173	pcmpgtd	xmm14,xmm6
2174	pxor	xmm6,xmm13
2175	movdqu	xmm11,XMMWORD[64+r12]
2176	pxor	xmm2,xmm10
2177	pshufd	xmm13,xmm14,0x13
2178	pxor	xmm14,xmm14
2179	movdqa	xmm5,xmm6
2180	movdqa	XMMWORD[96+rsp],xmm6
2181	paddq	xmm6,xmm6
2182	pand	xmm13,xmm12
2183	pcmpgtd	xmm14,xmm6
2184	pxor	xmm6,xmm13
2185	movdqu	xmm12,XMMWORD[80+r12]
2186	pxor	xmm3,xmm11
2187	movdqu	xmm13,XMMWORD[96+r12]
2188	pxor	xmm4,xmm12
2189	movdqu	xmm14,XMMWORD[112+r12]
2190	lea	r12,[128+r12]
2191	movdqa	XMMWORD[112+rsp],xmm6
2192	pxor	xmm5,xmm13
2193	lea	rax,[128+rsp]
2194	pxor	xmm6,xmm14
2195	mov	r10d,edx
2196
2197	call	_bsaes_decrypt8
2198
2199	pxor	xmm15,XMMWORD[rsp]
2200	pxor	xmm0,XMMWORD[16+rsp]
2201	movdqu	XMMWORD[r13],xmm15
2202	pxor	xmm5,XMMWORD[32+rsp]
2203	movdqu	XMMWORD[16+r13],xmm0
2204	pxor	xmm3,XMMWORD[48+rsp]
2205	movdqu	XMMWORD[32+r13],xmm5
2206	pxor	xmm1,XMMWORD[64+rsp]
2207	movdqu	XMMWORD[48+r13],xmm3
2208	pxor	xmm6,XMMWORD[80+rsp]
2209	movdqu	XMMWORD[64+r13],xmm1
2210	pxor	xmm2,XMMWORD[96+rsp]
2211	movdqu	XMMWORD[80+r13],xmm6
2212	pxor	xmm4,XMMWORD[112+rsp]
2213	movdqu	XMMWORD[96+r13],xmm2
2214	movdqu	XMMWORD[112+r13],xmm4
2215	lea	r13,[128+r13]
2216
2217	movdqa	xmm6,XMMWORD[112+rsp]
2218	pxor	xmm14,xmm14
2219	movdqa	xmm12,XMMWORD[$L$xts_magic]
2220	pcmpgtd	xmm14,xmm6
2221	pshufd	xmm13,xmm14,0x13
2222	pxor	xmm14,xmm14
2223	paddq	xmm6,xmm6
2224	pand	xmm13,xmm12
2225	pcmpgtd	xmm14,xmm6
2226	pxor	xmm6,xmm13
2227
2228	sub	r14,0x80
2229	jnc	NEAR $L$xts_dec_loop
2230
2231$L$xts_dec_short:
2232	add	r14,0x80
2233	jz	NEAR $L$xts_dec_done
2234	pshufd	xmm13,xmm14,0x13
2235	pxor	xmm14,xmm14
2236	movdqa	xmm15,xmm6
2237	movdqa	XMMWORD[rsp],xmm6
2238	paddq	xmm6,xmm6
2239	pand	xmm13,xmm12
2240	pcmpgtd	xmm14,xmm6
2241	pxor	xmm6,xmm13
2242	pshufd	xmm13,xmm14,0x13
2243	pxor	xmm14,xmm14
2244	movdqa	xmm0,xmm6
2245	movdqa	XMMWORD[16+rsp],xmm6
2246	paddq	xmm6,xmm6
2247	pand	xmm13,xmm12
2248	pcmpgtd	xmm14,xmm6
2249	pxor	xmm6,xmm13
2250	movdqu	xmm7,XMMWORD[r12]
2251	cmp	r14,16
2252	je	NEAR $L$xts_dec_1
2253	pshufd	xmm13,xmm14,0x13
2254	pxor	xmm14,xmm14
2255	movdqa	xmm1,xmm6
2256	movdqa	XMMWORD[32+rsp],xmm6
2257	paddq	xmm6,xmm6
2258	pand	xmm13,xmm12
2259	pcmpgtd	xmm14,xmm6
2260	pxor	xmm6,xmm13
2261	movdqu	xmm8,XMMWORD[16+r12]
2262	cmp	r14,32
2263	je	NEAR $L$xts_dec_2
2264	pxor	xmm15,xmm7
2265	pshufd	xmm13,xmm14,0x13
2266	pxor	xmm14,xmm14
2267	movdqa	xmm2,xmm6
2268	movdqa	XMMWORD[48+rsp],xmm6
2269	paddq	xmm6,xmm6
2270	pand	xmm13,xmm12
2271	pcmpgtd	xmm14,xmm6
2272	pxor	xmm6,xmm13
2273	movdqu	xmm9,XMMWORD[32+r12]
2274	cmp	r14,48
2275	je	NEAR $L$xts_dec_3
2276	pxor	xmm0,xmm8
2277	pshufd	xmm13,xmm14,0x13
2278	pxor	xmm14,xmm14
2279	movdqa	xmm3,xmm6
2280	movdqa	XMMWORD[64+rsp],xmm6
2281	paddq	xmm6,xmm6
2282	pand	xmm13,xmm12
2283	pcmpgtd	xmm14,xmm6
2284	pxor	xmm6,xmm13
2285	movdqu	xmm10,XMMWORD[48+r12]
2286	cmp	r14,64
2287	je	NEAR $L$xts_dec_4
2288	pxor	xmm1,xmm9
2289	pshufd	xmm13,xmm14,0x13
2290	pxor	xmm14,xmm14
2291	movdqa	xmm4,xmm6
2292	movdqa	XMMWORD[80+rsp],xmm6
2293	paddq	xmm6,xmm6
2294	pand	xmm13,xmm12
2295	pcmpgtd	xmm14,xmm6
2296	pxor	xmm6,xmm13
2297	movdqu	xmm11,XMMWORD[64+r12]
2298	cmp	r14,80
2299	je	NEAR $L$xts_dec_5
2300	pxor	xmm2,xmm10
2301	pshufd	xmm13,xmm14,0x13
2302	pxor	xmm14,xmm14
2303	movdqa	xmm5,xmm6
2304	movdqa	XMMWORD[96+rsp],xmm6
2305	paddq	xmm6,xmm6
2306	pand	xmm13,xmm12
2307	pcmpgtd	xmm14,xmm6
2308	pxor	xmm6,xmm13
2309	movdqu	xmm12,XMMWORD[80+r12]
2310	cmp	r14,96
2311	je	NEAR $L$xts_dec_6
2312	pxor	xmm3,xmm11
2313	movdqu	xmm13,XMMWORD[96+r12]
2314	pxor	xmm4,xmm12
2315	movdqa	XMMWORD[112+rsp],xmm6
2316	lea	r12,[112+r12]
2317	pxor	xmm5,xmm13
2318	lea	rax,[128+rsp]
2319	mov	r10d,edx
2320
2321	call	_bsaes_decrypt8
2322
2323	pxor	xmm15,XMMWORD[rsp]
2324	pxor	xmm0,XMMWORD[16+rsp]
2325	movdqu	XMMWORD[r13],xmm15
2326	pxor	xmm5,XMMWORD[32+rsp]
2327	movdqu	XMMWORD[16+r13],xmm0
2328	pxor	xmm3,XMMWORD[48+rsp]
2329	movdqu	XMMWORD[32+r13],xmm5
2330	pxor	xmm1,XMMWORD[64+rsp]
2331	movdqu	XMMWORD[48+r13],xmm3
2332	pxor	xmm6,XMMWORD[80+rsp]
2333	movdqu	XMMWORD[64+r13],xmm1
2334	pxor	xmm2,XMMWORD[96+rsp]
2335	movdqu	XMMWORD[80+r13],xmm6
2336	movdqu	XMMWORD[96+r13],xmm2
2337	lea	r13,[112+r13]
2338
2339	movdqa	xmm6,XMMWORD[112+rsp]
2340	jmp	NEAR $L$xts_dec_done
2341ALIGN	16
2342$L$xts_dec_6:
2343	pxor	xmm3,xmm11
2344	lea	r12,[96+r12]
2345	pxor	xmm4,xmm12
2346	lea	rax,[128+rsp]
2347	mov	r10d,edx
2348
2349	call	_bsaes_decrypt8
2350
2351	pxor	xmm15,XMMWORD[rsp]
2352	pxor	xmm0,XMMWORD[16+rsp]
2353	movdqu	XMMWORD[r13],xmm15
2354	pxor	xmm5,XMMWORD[32+rsp]
2355	movdqu	XMMWORD[16+r13],xmm0
2356	pxor	xmm3,XMMWORD[48+rsp]
2357	movdqu	XMMWORD[32+r13],xmm5
2358	pxor	xmm1,XMMWORD[64+rsp]
2359	movdqu	XMMWORD[48+r13],xmm3
2360	pxor	xmm6,XMMWORD[80+rsp]
2361	movdqu	XMMWORD[64+r13],xmm1
2362	movdqu	XMMWORD[80+r13],xmm6
2363	lea	r13,[96+r13]
2364
2365	movdqa	xmm6,XMMWORD[96+rsp]
2366	jmp	NEAR $L$xts_dec_done
2367ALIGN	16
2368$L$xts_dec_5:
2369	pxor	xmm2,xmm10
2370	lea	r12,[80+r12]
2371	pxor	xmm3,xmm11
2372	lea	rax,[128+rsp]
2373	mov	r10d,edx
2374
2375	call	_bsaes_decrypt8
2376
2377	pxor	xmm15,XMMWORD[rsp]
2378	pxor	xmm0,XMMWORD[16+rsp]
2379	movdqu	XMMWORD[r13],xmm15
2380	pxor	xmm5,XMMWORD[32+rsp]
2381	movdqu	XMMWORD[16+r13],xmm0
2382	pxor	xmm3,XMMWORD[48+rsp]
2383	movdqu	XMMWORD[32+r13],xmm5
2384	pxor	xmm1,XMMWORD[64+rsp]
2385	movdqu	XMMWORD[48+r13],xmm3
2386	movdqu	XMMWORD[64+r13],xmm1
2387	lea	r13,[80+r13]
2388
2389	movdqa	xmm6,XMMWORD[80+rsp]
2390	jmp	NEAR $L$xts_dec_done
2391ALIGN	16
2392$L$xts_dec_4:
2393	pxor	xmm1,xmm9
2394	lea	r12,[64+r12]
2395	pxor	xmm2,xmm10
2396	lea	rax,[128+rsp]
2397	mov	r10d,edx
2398
2399	call	_bsaes_decrypt8
2400
2401	pxor	xmm15,XMMWORD[rsp]
2402	pxor	xmm0,XMMWORD[16+rsp]
2403	movdqu	XMMWORD[r13],xmm15
2404	pxor	xmm5,XMMWORD[32+rsp]
2405	movdqu	XMMWORD[16+r13],xmm0
2406	pxor	xmm3,XMMWORD[48+rsp]
2407	movdqu	XMMWORD[32+r13],xmm5
2408	movdqu	XMMWORD[48+r13],xmm3
2409	lea	r13,[64+r13]
2410
2411	movdqa	xmm6,XMMWORD[64+rsp]
2412	jmp	NEAR $L$xts_dec_done
2413ALIGN	16
2414$L$xts_dec_3:
2415	pxor	xmm0,xmm8
2416	lea	r12,[48+r12]
2417	pxor	xmm1,xmm9
2418	lea	rax,[128+rsp]
2419	mov	r10d,edx
2420
2421	call	_bsaes_decrypt8
2422
2423	pxor	xmm15,XMMWORD[rsp]
2424	pxor	xmm0,XMMWORD[16+rsp]
2425	movdqu	XMMWORD[r13],xmm15
2426	pxor	xmm5,XMMWORD[32+rsp]
2427	movdqu	XMMWORD[16+r13],xmm0
2428	movdqu	XMMWORD[32+r13],xmm5
2429	lea	r13,[48+r13]
2430
2431	movdqa	xmm6,XMMWORD[48+rsp]
2432	jmp	NEAR $L$xts_dec_done
2433ALIGN	16
2434$L$xts_dec_2:
2435	pxor	xmm15,xmm7
2436	lea	r12,[32+r12]
2437	pxor	xmm0,xmm8
2438	lea	rax,[128+rsp]
2439	mov	r10d,edx
2440
2441	call	_bsaes_decrypt8
2442
2443	pxor	xmm15,XMMWORD[rsp]
2444	pxor	xmm0,XMMWORD[16+rsp]
2445	movdqu	XMMWORD[r13],xmm15
2446	movdqu	XMMWORD[16+r13],xmm0
2447	lea	r13,[32+r13]
2448
2449	movdqa	xmm6,XMMWORD[32+rsp]
2450	jmp	NEAR $L$xts_dec_done
2451ALIGN	16
2452$L$xts_dec_1:
2453	pxor	xmm7,xmm15
2454	lea	r12,[16+r12]
2455	movdqa	XMMWORD[32+rbp],xmm7
2456	lea	rcx,[32+rbp]
2457	lea	rdx,[32+rbp]
2458	lea	r8,[r15]
2459	call	asm_AES_decrypt
2460	pxor	xmm15,XMMWORD[32+rbp]
2461
2462
2463
2464
2465
2466	movdqu	XMMWORD[r13],xmm15
2467	lea	r13,[16+r13]
2468
2469	movdqa	xmm6,XMMWORD[16+rsp]
2470
2471$L$xts_dec_done:
2472	and	ebx,15
2473	jz	NEAR $L$xts_dec_ret
2474
2475	pxor	xmm14,xmm14
2476	movdqa	xmm12,XMMWORD[$L$xts_magic]
2477	pcmpgtd	xmm14,xmm6
2478	pshufd	xmm13,xmm14,0x13
2479	movdqa	xmm5,xmm6
2480	paddq	xmm6,xmm6
2481	pand	xmm13,xmm12
2482	movdqu	xmm15,XMMWORD[r12]
2483	pxor	xmm6,xmm13
2484
2485	lea	rcx,[32+rbp]
2486	pxor	xmm15,xmm6
2487	lea	rdx,[32+rbp]
2488	movdqa	XMMWORD[32+rbp],xmm15
2489	lea	r8,[r15]
2490	call	asm_AES_decrypt
2491	pxor	xmm6,XMMWORD[32+rbp]
2492	mov	rdx,r13
2493	movdqu	XMMWORD[r13],xmm6
2494
2495$L$xts_dec_steal:
2496	movzx	eax,BYTE[16+r12]
2497	movzx	ecx,BYTE[rdx]
2498	lea	r12,[1+r12]
2499	mov	BYTE[rdx],al
2500	mov	BYTE[16+rdx],cl
2501	lea	rdx,[1+rdx]
2502	sub	ebx,1
2503	jnz	NEAR $L$xts_dec_steal
2504
2505	movdqu	xmm15,XMMWORD[r13]
2506	lea	rcx,[32+rbp]
2507	pxor	xmm15,xmm5
2508	lea	rdx,[32+rbp]
2509	movdqa	XMMWORD[32+rbp],xmm15
2510	lea	r8,[r15]
2511	call	asm_AES_decrypt
2512	pxor	xmm5,XMMWORD[32+rbp]
2513	movdqu	XMMWORD[r13],xmm5
2514
2515$L$xts_dec_ret:
2516	lea	rax,[rsp]
2517	pxor	xmm0,xmm0
2518$L$xts_dec_bzero:
2519	movdqa	XMMWORD[rax],xmm0
2520	movdqa	XMMWORD[16+rax],xmm0
2521	lea	rax,[32+rax]
2522	cmp	rbp,rax
2523	ja	NEAR $L$xts_dec_bzero
2524
2525	lea	rsp,[rbp]
2526	movaps	xmm6,XMMWORD[64+rbp]
2527	movaps	xmm7,XMMWORD[80+rbp]
2528	movaps	xmm8,XMMWORD[96+rbp]
2529	movaps	xmm9,XMMWORD[112+rbp]
2530	movaps	xmm10,XMMWORD[128+rbp]
2531	movaps	xmm11,XMMWORD[144+rbp]
2532	movaps	xmm12,XMMWORD[160+rbp]
2533	movaps	xmm13,XMMWORD[176+rbp]
2534	movaps	xmm14,XMMWORD[192+rbp]
2535	movaps	xmm15,XMMWORD[208+rbp]
2536	lea	rsp,[160+rbp]
2537	mov	r15,QWORD[72+rsp]
2538	mov	r14,QWORD[80+rsp]
2539	mov	r13,QWORD[88+rsp]
2540	mov	r12,QWORD[96+rsp]
2541	mov	rbx,QWORD[104+rsp]
2542	mov	rax,QWORD[112+rsp]
2543	lea	rsp,[120+rsp]
2544	mov	rbp,rax
2545$L$xts_dec_epilogue:
2546	DB	0F3h,0C3h		;repret
2547
2548
2549ALIGN	64
2550_bsaes_const:
2551$L$M0ISR:
2552	DQ	0x0a0e0206070b0f03,0x0004080c0d010509
2553$L$ISRM0:
2554	DQ	0x01040b0e0205080f,0x0306090c00070a0d
2555$L$ISR:
2556	DQ	0x0504070602010003,0x0f0e0d0c080b0a09
2557$L$BS0:
2558	DQ	0x5555555555555555,0x5555555555555555
2559$L$BS1:
2560	DQ	0x3333333333333333,0x3333333333333333
2561$L$BS2:
2562	DQ	0x0f0f0f0f0f0f0f0f,0x0f0f0f0f0f0f0f0f
2563$L$SR:
2564	DQ	0x0504070600030201,0x0f0e0d0c0a09080b
2565$L$SRM0:
2566	DQ	0x0304090e00050a0f,0x01060b0c0207080d
2567$L$M0SR:
2568	DQ	0x0a0e02060f03070b,0x0004080c05090d01
2569$L$SWPUP:
2570	DQ	0x0706050403020100,0x0c0d0e0f0b0a0908
2571$L$SWPUPM0SR:
2572	DQ	0x0a0d02060c03070b,0x0004080f05090e01
2573$L$ADD1:
2574	DQ	0x0000000000000000,0x0000000100000000
2575$L$ADD2:
2576	DQ	0x0000000000000000,0x0000000200000000
2577$L$ADD3:
2578	DQ	0x0000000000000000,0x0000000300000000
2579$L$ADD4:
2580	DQ	0x0000000000000000,0x0000000400000000
2581$L$ADD5:
2582	DQ	0x0000000000000000,0x0000000500000000
2583$L$ADD6:
2584	DQ	0x0000000000000000,0x0000000600000000
2585$L$ADD7:
2586	DQ	0x0000000000000000,0x0000000700000000
2587$L$ADD8:
2588	DQ	0x0000000000000000,0x0000000800000000
2589$L$xts_magic:
2590	DD	0x87,0,1,0
2591$L$masks:
2592	DQ	0x0101010101010101,0x0101010101010101
2593	DQ	0x0202020202020202,0x0202020202020202
2594	DQ	0x0404040404040404,0x0404040404040404
2595	DQ	0x0808080808080808,0x0808080808080808
2596$L$M0:
2597	DQ	0x02060a0e03070b0f,0x0004080c0105090d
2598$L$63:
2599	DQ	0x6363636363636363,0x6363636363636363
2600DB	66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102
2601DB	111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44
2602DB	32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44
2603DB	32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32
2604DB	65,110,100,121,32,80,111,108,121,97,107,111,118,0
2605ALIGN	64
2606
2607EXTERN	__imp_RtlVirtualUnwind
2608
2609ALIGN	16
2610se_handler:
2611	push	rsi
2612	push	rdi
2613	push	rbx
2614	push	rbp
2615	push	r12
2616	push	r13
2617	push	r14
2618	push	r15
2619	pushfq
2620	sub	rsp,64
2621
2622	mov	rax,QWORD[120+r8]
2623	mov	rbx,QWORD[248+r8]
2624
2625	mov	rsi,QWORD[8+r9]
2626	mov	r11,QWORD[56+r9]
2627
2628	mov	r10d,DWORD[r11]
2629	lea	r10,[r10*1+rsi]
2630	cmp	rbx,r10
2631	jb	NEAR $L$in_prologue
2632
2633	mov	rax,QWORD[152+r8]
2634
2635	mov	r10d,DWORD[4+r11]
2636	lea	r10,[r10*1+rsi]
2637	cmp	rbx,r10
2638	jae	NEAR $L$in_prologue
2639
2640	mov	rax,QWORD[160+r8]
2641
2642	lea	rsi,[64+rax]
2643	lea	rdi,[512+r8]
2644	mov	ecx,20
2645	DD	0xa548f3fc
2646	lea	rax,[160+rax]
2647
2648	mov	rbp,QWORD[112+rax]
2649	mov	rbx,QWORD[104+rax]
2650	mov	r12,QWORD[96+rax]
2651	mov	r13,QWORD[88+rax]
2652	mov	r14,QWORD[80+rax]
2653	mov	r15,QWORD[72+rax]
2654	lea	rax,[120+rax]
2655	mov	QWORD[144+r8],rbx
2656	mov	QWORD[160+r8],rbp
2657	mov	QWORD[216+r8],r12
2658	mov	QWORD[224+r8],r13
2659	mov	QWORD[232+r8],r14
2660	mov	QWORD[240+r8],r15
2661
2662$L$in_prologue:
2663	mov	QWORD[152+r8],rax
2664
2665	mov	rdi,QWORD[40+r9]
2666	mov	rsi,r8
2667	mov	ecx,154
2668	DD	0xa548f3fc
2669
2670	mov	rsi,r9
2671	xor	rcx,rcx
2672	mov	rdx,QWORD[8+rsi]
2673	mov	r8,QWORD[rsi]
2674	mov	r9,QWORD[16+rsi]
2675	mov	r10,QWORD[40+rsi]
2676	lea	r11,[56+rsi]
2677	lea	r12,[24+rsi]
2678	mov	QWORD[32+rsp],r10
2679	mov	QWORD[40+rsp],r11
2680	mov	QWORD[48+rsp],r12
2681	mov	QWORD[56+rsp],rcx
2682	call	QWORD[__imp_RtlVirtualUnwind]
2683
2684	mov	eax,1
2685	add	rsp,64
2686	popfq
2687	pop	r15
2688	pop	r14
2689	pop	r13
2690	pop	r12
2691	pop	rbp
2692	pop	rbx
2693	pop	rdi
2694	pop	rsi
2695	DB	0F3h,0C3h		;repret
2696
2697
2698section	.pdata rdata align=4
2699ALIGN	4
2700	DD	$L$cbc_dec_prologue wrt ..imagebase
2701	DD	$L$cbc_dec_epilogue wrt ..imagebase
2702	DD	$L$cbc_dec_info wrt ..imagebase
2703
2704	DD	$L$ctr_enc_prologue wrt ..imagebase
2705	DD	$L$ctr_enc_epilogue wrt ..imagebase
2706	DD	$L$ctr_enc_info wrt ..imagebase
2707
2708	DD	$L$xts_enc_prologue wrt ..imagebase
2709	DD	$L$xts_enc_epilogue wrt ..imagebase
2710	DD	$L$xts_enc_info wrt ..imagebase
2711
2712	DD	$L$xts_dec_prologue wrt ..imagebase
2713	DD	$L$xts_dec_epilogue wrt ..imagebase
2714	DD	$L$xts_dec_info wrt ..imagebase
2715
2716section	.xdata rdata align=8
2717ALIGN	8
2718$L$cbc_dec_info:
2719DB	9,0,0,0
2720	DD	se_handler wrt ..imagebase
2721	DD	$L$cbc_dec_body wrt ..imagebase,$L$cbc_dec_epilogue wrt ..imagebase
2722$L$ctr_enc_info:
2723DB	9,0,0,0
2724	DD	se_handler wrt ..imagebase
2725	DD	$L$ctr_enc_body wrt ..imagebase,$L$ctr_enc_epilogue wrt ..imagebase
2726$L$xts_enc_info:
2727DB	9,0,0,0
2728	DD	se_handler wrt ..imagebase
2729	DD	$L$xts_enc_body wrt ..imagebase,$L$xts_enc_epilogue wrt ..imagebase
2730$L$xts_dec_info:
2731DB	9,0,0,0
2732	DD	se_handler wrt ..imagebase
2733	DD	$L$xts_dec_body wrt ..imagebase,$L$xts_dec_epilogue wrt ..imagebase
2734