1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(BORINGSSL_PREFIX)
13#include <boringssl_prefix_symbols_asm.h>
14#endif
15#include <openssl/arm_arch.h>
16
17#if __ARM_MAX_ARCH__>=7
18.text
19
20.section	__TEXT,__const
21.align	5
22Lrcon:
23.long	0x01,0x01,0x01,0x01
24.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
25.long	0x1b,0x1b,0x1b,0x1b
26
27.text
28
29.globl	_aes_hw_set_encrypt_key
30.private_extern	_aes_hw_set_encrypt_key
31
32.align	5
33_aes_hw_set_encrypt_key:
34Lenc_key:
35	stp	x29,x30,[sp,#-16]!
36	add	x29,sp,#0
37	mov	x3,#-1
38	cmp	x0,#0
39	b.eq	Lenc_key_abort
40	cmp	x2,#0
41	b.eq	Lenc_key_abort
42	mov	x3,#-2
43	cmp	w1,#128
44	b.lt	Lenc_key_abort
45	cmp	w1,#256
46	b.gt	Lenc_key_abort
47	tst	w1,#0x3f
48	b.ne	Lenc_key_abort
49
50	adrp	x3,Lrcon@PAGE
51	add	x3,x3,Lrcon@PAGEOFF
52	cmp	w1,#192
53
54	eor	v0.16b,v0.16b,v0.16b
55	ld1	{v3.16b},[x0],#16
56	mov	w1,#8		// reuse w1
57	ld1	{v1.4s,v2.4s},[x3],#32
58
59	b.lt	Loop128
60	b.eq	L192
61	b	L256
62
63.align	4
64Loop128:
65	tbl	v6.16b,{v3.16b},v2.16b
66	ext	v5.16b,v0.16b,v3.16b,#12
67	st1	{v3.4s},[x2],#16
68	aese	v6.16b,v0.16b
69	subs	w1,w1,#1
70
71	eor	v3.16b,v3.16b,v5.16b
72	ext	v5.16b,v0.16b,v5.16b,#12
73	eor	v3.16b,v3.16b,v5.16b
74	ext	v5.16b,v0.16b,v5.16b,#12
75	eor	v6.16b,v6.16b,v1.16b
76	eor	v3.16b,v3.16b,v5.16b
77	shl	v1.16b,v1.16b,#1
78	eor	v3.16b,v3.16b,v6.16b
79	b.ne	Loop128
80
81	ld1	{v1.4s},[x3]
82
83	tbl	v6.16b,{v3.16b},v2.16b
84	ext	v5.16b,v0.16b,v3.16b,#12
85	st1	{v3.4s},[x2],#16
86	aese	v6.16b,v0.16b
87
88	eor	v3.16b,v3.16b,v5.16b
89	ext	v5.16b,v0.16b,v5.16b,#12
90	eor	v3.16b,v3.16b,v5.16b
91	ext	v5.16b,v0.16b,v5.16b,#12
92	eor	v6.16b,v6.16b,v1.16b
93	eor	v3.16b,v3.16b,v5.16b
94	shl	v1.16b,v1.16b,#1
95	eor	v3.16b,v3.16b,v6.16b
96
97	tbl	v6.16b,{v3.16b},v2.16b
98	ext	v5.16b,v0.16b,v3.16b,#12
99	st1	{v3.4s},[x2],#16
100	aese	v6.16b,v0.16b
101
102	eor	v3.16b,v3.16b,v5.16b
103	ext	v5.16b,v0.16b,v5.16b,#12
104	eor	v3.16b,v3.16b,v5.16b
105	ext	v5.16b,v0.16b,v5.16b,#12
106	eor	v6.16b,v6.16b,v1.16b
107	eor	v3.16b,v3.16b,v5.16b
108	eor	v3.16b,v3.16b,v6.16b
109	st1	{v3.4s},[x2]
110	add	x2,x2,#0x50
111
112	mov	w12,#10
113	b	Ldone
114
115.align	4
116L192:
117	ld1	{v4.8b},[x0],#8
118	movi	v6.16b,#8			// borrow v6.16b
119	st1	{v3.4s},[x2],#16
120	sub	v2.16b,v2.16b,v6.16b	// adjust the mask
121
122Loop192:
123	tbl	v6.16b,{v4.16b},v2.16b
124	ext	v5.16b,v0.16b,v3.16b,#12
125	st1	{v4.8b},[x2],#8
126	aese	v6.16b,v0.16b
127	subs	w1,w1,#1
128
129	eor	v3.16b,v3.16b,v5.16b
130	ext	v5.16b,v0.16b,v5.16b,#12
131	eor	v3.16b,v3.16b,v5.16b
132	ext	v5.16b,v0.16b,v5.16b,#12
133	eor	v3.16b,v3.16b,v5.16b
134
135	dup	v5.4s,v3.s[3]
136	eor	v5.16b,v5.16b,v4.16b
137	eor	v6.16b,v6.16b,v1.16b
138	ext	v4.16b,v0.16b,v4.16b,#12
139	shl	v1.16b,v1.16b,#1
140	eor	v4.16b,v4.16b,v5.16b
141	eor	v3.16b,v3.16b,v6.16b
142	eor	v4.16b,v4.16b,v6.16b
143	st1	{v3.4s},[x2],#16
144	b.ne	Loop192
145
146	mov	w12,#12
147	add	x2,x2,#0x20
148	b	Ldone
149
150.align	4
151L256:
152	ld1	{v4.16b},[x0]
153	mov	w1,#7
154	mov	w12,#14
155	st1	{v3.4s},[x2],#16
156
157Loop256:
158	tbl	v6.16b,{v4.16b},v2.16b
159	ext	v5.16b,v0.16b,v3.16b,#12
160	st1	{v4.4s},[x2],#16
161	aese	v6.16b,v0.16b
162	subs	w1,w1,#1
163
164	eor	v3.16b,v3.16b,v5.16b
165	ext	v5.16b,v0.16b,v5.16b,#12
166	eor	v3.16b,v3.16b,v5.16b
167	ext	v5.16b,v0.16b,v5.16b,#12
168	eor	v6.16b,v6.16b,v1.16b
169	eor	v3.16b,v3.16b,v5.16b
170	shl	v1.16b,v1.16b,#1
171	eor	v3.16b,v3.16b,v6.16b
172	st1	{v3.4s},[x2],#16
173	b.eq	Ldone
174
175	dup	v6.4s,v3.s[3]		// just splat
176	ext	v5.16b,v0.16b,v4.16b,#12
177	aese	v6.16b,v0.16b
178
179	eor	v4.16b,v4.16b,v5.16b
180	ext	v5.16b,v0.16b,v5.16b,#12
181	eor	v4.16b,v4.16b,v5.16b
182	ext	v5.16b,v0.16b,v5.16b,#12
183	eor	v4.16b,v4.16b,v5.16b
184
185	eor	v4.16b,v4.16b,v6.16b
186	b	Loop256
187
188Ldone:
189	str	w12,[x2]
190	mov	x3,#0
191
192Lenc_key_abort:
193	mov	x0,x3			// return value
194	ldr	x29,[sp],#16
195	ret
196
197
198.globl	_aes_hw_set_decrypt_key
199.private_extern	_aes_hw_set_decrypt_key
200
201.align	5
202_aes_hw_set_decrypt_key:
203	stp	x29,x30,[sp,#-16]!
204	add	x29,sp,#0
205	bl	Lenc_key
206
207	cmp	x0,#0
208	b.ne	Ldec_key_abort
209
210	sub	x2,x2,#240		// restore original x2
211	mov	x4,#-16
212	add	x0,x2,x12,lsl#4	// end of key schedule
213
214	ld1	{v0.4s},[x2]
215	ld1	{v1.4s},[x0]
216	st1	{v0.4s},[x0],x4
217	st1	{v1.4s},[x2],#16
218
219Loop_imc:
220	ld1	{v0.4s},[x2]
221	ld1	{v1.4s},[x0]
222	aesimc	v0.16b,v0.16b
223	aesimc	v1.16b,v1.16b
224	st1	{v0.4s},[x0],x4
225	st1	{v1.4s},[x2],#16
226	cmp	x0,x2
227	b.hi	Loop_imc
228
229	ld1	{v0.4s},[x2]
230	aesimc	v0.16b,v0.16b
231	st1	{v0.4s},[x0]
232
233	eor	x0,x0,x0		// return value
234Ldec_key_abort:
235	ldp	x29,x30,[sp],#16
236	ret
237
238.globl	_aes_hw_encrypt
239.private_extern	_aes_hw_encrypt
240
241.align	5
242_aes_hw_encrypt:
243	ldr	w3,[x2,#240]
244	ld1	{v0.4s},[x2],#16
245	ld1	{v2.16b},[x0]
246	sub	w3,w3,#2
247	ld1	{v1.4s},[x2],#16
248
249Loop_enc:
250	aese	v2.16b,v0.16b
251	aesmc	v2.16b,v2.16b
252	ld1	{v0.4s},[x2],#16
253	subs	w3,w3,#2
254	aese	v2.16b,v1.16b
255	aesmc	v2.16b,v2.16b
256	ld1	{v1.4s},[x2],#16
257	b.gt	Loop_enc
258
259	aese	v2.16b,v0.16b
260	aesmc	v2.16b,v2.16b
261	ld1	{v0.4s},[x2]
262	aese	v2.16b,v1.16b
263	eor	v2.16b,v2.16b,v0.16b
264
265	st1	{v2.16b},[x1]
266	ret
267
268.globl	_aes_hw_decrypt
269.private_extern	_aes_hw_decrypt
270
271.align	5
272_aes_hw_decrypt:
273	ldr	w3,[x2,#240]
274	ld1	{v0.4s},[x2],#16
275	ld1	{v2.16b},[x0]
276	sub	w3,w3,#2
277	ld1	{v1.4s},[x2],#16
278
279Loop_dec:
280	aesd	v2.16b,v0.16b
281	aesimc	v2.16b,v2.16b
282	ld1	{v0.4s},[x2],#16
283	subs	w3,w3,#2
284	aesd	v2.16b,v1.16b
285	aesimc	v2.16b,v2.16b
286	ld1	{v1.4s},[x2],#16
287	b.gt	Loop_dec
288
289	aesd	v2.16b,v0.16b
290	aesimc	v2.16b,v2.16b
291	ld1	{v0.4s},[x2]
292	aesd	v2.16b,v1.16b
293	eor	v2.16b,v2.16b,v0.16b
294
295	st1	{v2.16b},[x1]
296	ret
297
298.globl	_aes_hw_cbc_encrypt
299.private_extern	_aes_hw_cbc_encrypt
300
301.align	5
302_aes_hw_cbc_encrypt:
303	stp	x29,x30,[sp,#-16]!
304	add	x29,sp,#0
305	subs	x2,x2,#16
306	mov	x8,#16
307	b.lo	Lcbc_abort
308	csel	x8,xzr,x8,eq
309
310	cmp	w5,#0			// en- or decrypting?
311	ldr	w5,[x3,#240]
312	and	x2,x2,#-16
313	ld1	{v6.16b},[x4]
314	ld1	{v0.16b},[x0],x8
315
316	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
317	sub	w5,w5,#6
318	add	x7,x3,x5,lsl#4	// pointer to last 7 round keys
319	sub	w5,w5,#2
320	ld1	{v18.4s,v19.4s},[x7],#32
321	ld1	{v20.4s,v21.4s},[x7],#32
322	ld1	{v22.4s,v23.4s},[x7],#32
323	ld1	{v7.4s},[x7]
324
325	add	x7,x3,#32
326	mov	w6,w5
327	b.eq	Lcbc_dec
328
329	cmp	w5,#2
330	eor	v0.16b,v0.16b,v6.16b
331	eor	v5.16b,v16.16b,v7.16b
332	b.eq	Lcbc_enc128
333
334	ld1	{v2.4s,v3.4s},[x7]
335	add	x7,x3,#16
336	add	x6,x3,#16*4
337	add	x12,x3,#16*5
338	aese	v0.16b,v16.16b
339	aesmc	v0.16b,v0.16b
340	add	x14,x3,#16*6
341	add	x3,x3,#16*7
342	b	Lenter_cbc_enc
343
344.align	4
345Loop_cbc_enc:
346	aese	v0.16b,v16.16b
347	aesmc	v0.16b,v0.16b
348	st1	{v6.16b},[x1],#16
349Lenter_cbc_enc:
350	aese	v0.16b,v17.16b
351	aesmc	v0.16b,v0.16b
352	aese	v0.16b,v2.16b
353	aesmc	v0.16b,v0.16b
354	ld1	{v16.4s},[x6]
355	cmp	w5,#4
356	aese	v0.16b,v3.16b
357	aesmc	v0.16b,v0.16b
358	ld1	{v17.4s},[x12]
359	b.eq	Lcbc_enc192
360
361	aese	v0.16b,v16.16b
362	aesmc	v0.16b,v0.16b
363	ld1	{v16.4s},[x14]
364	aese	v0.16b,v17.16b
365	aesmc	v0.16b,v0.16b
366	ld1	{v17.4s},[x3]
367	nop
368
369Lcbc_enc192:
370	aese	v0.16b,v16.16b
371	aesmc	v0.16b,v0.16b
372	subs	x2,x2,#16
373	aese	v0.16b,v17.16b
374	aesmc	v0.16b,v0.16b
375	csel	x8,xzr,x8,eq
376	aese	v0.16b,v18.16b
377	aesmc	v0.16b,v0.16b
378	aese	v0.16b,v19.16b
379	aesmc	v0.16b,v0.16b
380	ld1	{v16.16b},[x0],x8
381	aese	v0.16b,v20.16b
382	aesmc	v0.16b,v0.16b
383	eor	v16.16b,v16.16b,v5.16b
384	aese	v0.16b,v21.16b
385	aesmc	v0.16b,v0.16b
386	ld1	{v17.4s},[x7]		// re-pre-load rndkey[1]
387	aese	v0.16b,v22.16b
388	aesmc	v0.16b,v0.16b
389	aese	v0.16b,v23.16b
390	eor	v6.16b,v0.16b,v7.16b
391	b.hs	Loop_cbc_enc
392
393	st1	{v6.16b},[x1],#16
394	b	Lcbc_done
395
396.align	5
397Lcbc_enc128:
398	ld1	{v2.4s,v3.4s},[x7]
399	aese	v0.16b,v16.16b
400	aesmc	v0.16b,v0.16b
401	b	Lenter_cbc_enc128
402Loop_cbc_enc128:
403	aese	v0.16b,v16.16b
404	aesmc	v0.16b,v0.16b
405	st1	{v6.16b},[x1],#16
406Lenter_cbc_enc128:
407	aese	v0.16b,v17.16b
408	aesmc	v0.16b,v0.16b
409	subs	x2,x2,#16
410	aese	v0.16b,v2.16b
411	aesmc	v0.16b,v0.16b
412	csel	x8,xzr,x8,eq
413	aese	v0.16b,v3.16b
414	aesmc	v0.16b,v0.16b
415	aese	v0.16b,v18.16b
416	aesmc	v0.16b,v0.16b
417	aese	v0.16b,v19.16b
418	aesmc	v0.16b,v0.16b
419	ld1	{v16.16b},[x0],x8
420	aese	v0.16b,v20.16b
421	aesmc	v0.16b,v0.16b
422	aese	v0.16b,v21.16b
423	aesmc	v0.16b,v0.16b
424	aese	v0.16b,v22.16b
425	aesmc	v0.16b,v0.16b
426	eor	v16.16b,v16.16b,v5.16b
427	aese	v0.16b,v23.16b
428	eor	v6.16b,v0.16b,v7.16b
429	b.hs	Loop_cbc_enc128
430
431	st1	{v6.16b},[x1],#16
432	b	Lcbc_done
433.align	5
434Lcbc_dec:
435	ld1	{v18.16b},[x0],#16
436	subs	x2,x2,#32		// bias
437	add	w6,w5,#2
438	orr	v3.16b,v0.16b,v0.16b
439	orr	v1.16b,v0.16b,v0.16b
440	orr	v19.16b,v18.16b,v18.16b
441	b.lo	Lcbc_dec_tail
442
443	orr	v1.16b,v18.16b,v18.16b
444	ld1	{v18.16b},[x0],#16
445	orr	v2.16b,v0.16b,v0.16b
446	orr	v3.16b,v1.16b,v1.16b
447	orr	v19.16b,v18.16b,v18.16b
448
449Loop3x_cbc_dec:
450	aesd	v0.16b,v16.16b
451	aesimc	v0.16b,v0.16b
452	aesd	v1.16b,v16.16b
453	aesimc	v1.16b,v1.16b
454	aesd	v18.16b,v16.16b
455	aesimc	v18.16b,v18.16b
456	ld1	{v16.4s},[x7],#16
457	subs	w6,w6,#2
458	aesd	v0.16b,v17.16b
459	aesimc	v0.16b,v0.16b
460	aesd	v1.16b,v17.16b
461	aesimc	v1.16b,v1.16b
462	aesd	v18.16b,v17.16b
463	aesimc	v18.16b,v18.16b
464	ld1	{v17.4s},[x7],#16
465	b.gt	Loop3x_cbc_dec
466
467	aesd	v0.16b,v16.16b
468	aesimc	v0.16b,v0.16b
469	aesd	v1.16b,v16.16b
470	aesimc	v1.16b,v1.16b
471	aesd	v18.16b,v16.16b
472	aesimc	v18.16b,v18.16b
473	eor	v4.16b,v6.16b,v7.16b
474	subs	x2,x2,#0x30
475	eor	v5.16b,v2.16b,v7.16b
476	csel	x6,x2,x6,lo			// x6, w6, is zero at this point
477	aesd	v0.16b,v17.16b
478	aesimc	v0.16b,v0.16b
479	aesd	v1.16b,v17.16b
480	aesimc	v1.16b,v1.16b
481	aesd	v18.16b,v17.16b
482	aesimc	v18.16b,v18.16b
483	eor	v17.16b,v3.16b,v7.16b
484	add	x0,x0,x6		// x0 is adjusted in such way that
485					// at exit from the loop v1.16b-v18.16b
486					// are loaded with last "words"
487	orr	v6.16b,v19.16b,v19.16b
488	mov	x7,x3
489	aesd	v0.16b,v20.16b
490	aesimc	v0.16b,v0.16b
491	aesd	v1.16b,v20.16b
492	aesimc	v1.16b,v1.16b
493	aesd	v18.16b,v20.16b
494	aesimc	v18.16b,v18.16b
495	ld1	{v2.16b},[x0],#16
496	aesd	v0.16b,v21.16b
497	aesimc	v0.16b,v0.16b
498	aesd	v1.16b,v21.16b
499	aesimc	v1.16b,v1.16b
500	aesd	v18.16b,v21.16b
501	aesimc	v18.16b,v18.16b
502	ld1	{v3.16b},[x0],#16
503	aesd	v0.16b,v22.16b
504	aesimc	v0.16b,v0.16b
505	aesd	v1.16b,v22.16b
506	aesimc	v1.16b,v1.16b
507	aesd	v18.16b,v22.16b
508	aesimc	v18.16b,v18.16b
509	ld1	{v19.16b},[x0],#16
510	aesd	v0.16b,v23.16b
511	aesd	v1.16b,v23.16b
512	aesd	v18.16b,v23.16b
513	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
514	add	w6,w5,#2
515	eor	v4.16b,v4.16b,v0.16b
516	eor	v5.16b,v5.16b,v1.16b
517	eor	v18.16b,v18.16b,v17.16b
518	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
519	st1	{v4.16b},[x1],#16
520	orr	v0.16b,v2.16b,v2.16b
521	st1	{v5.16b},[x1],#16
522	orr	v1.16b,v3.16b,v3.16b
523	st1	{v18.16b},[x1],#16
524	orr	v18.16b,v19.16b,v19.16b
525	b.hs	Loop3x_cbc_dec
526
527	cmn	x2,#0x30
528	b.eq	Lcbc_done
529	nop
530
531Lcbc_dec_tail:
532	aesd	v1.16b,v16.16b
533	aesimc	v1.16b,v1.16b
534	aesd	v18.16b,v16.16b
535	aesimc	v18.16b,v18.16b
536	ld1	{v16.4s},[x7],#16
537	subs	w6,w6,#2
538	aesd	v1.16b,v17.16b
539	aesimc	v1.16b,v1.16b
540	aesd	v18.16b,v17.16b
541	aesimc	v18.16b,v18.16b
542	ld1	{v17.4s},[x7],#16
543	b.gt	Lcbc_dec_tail
544
545	aesd	v1.16b,v16.16b
546	aesimc	v1.16b,v1.16b
547	aesd	v18.16b,v16.16b
548	aesimc	v18.16b,v18.16b
549	aesd	v1.16b,v17.16b
550	aesimc	v1.16b,v1.16b
551	aesd	v18.16b,v17.16b
552	aesimc	v18.16b,v18.16b
553	aesd	v1.16b,v20.16b
554	aesimc	v1.16b,v1.16b
555	aesd	v18.16b,v20.16b
556	aesimc	v18.16b,v18.16b
557	cmn	x2,#0x20
558	aesd	v1.16b,v21.16b
559	aesimc	v1.16b,v1.16b
560	aesd	v18.16b,v21.16b
561	aesimc	v18.16b,v18.16b
562	eor	v5.16b,v6.16b,v7.16b
563	aesd	v1.16b,v22.16b
564	aesimc	v1.16b,v1.16b
565	aesd	v18.16b,v22.16b
566	aesimc	v18.16b,v18.16b
567	eor	v17.16b,v3.16b,v7.16b
568	aesd	v1.16b,v23.16b
569	aesd	v18.16b,v23.16b
570	b.eq	Lcbc_dec_one
571	eor	v5.16b,v5.16b,v1.16b
572	eor	v17.16b,v17.16b,v18.16b
573	orr	v6.16b,v19.16b,v19.16b
574	st1	{v5.16b},[x1],#16
575	st1	{v17.16b},[x1],#16
576	b	Lcbc_done
577
578Lcbc_dec_one:
579	eor	v5.16b,v5.16b,v18.16b
580	orr	v6.16b,v19.16b,v19.16b
581	st1	{v5.16b},[x1],#16
582
583Lcbc_done:
584	st1	{v6.16b},[x4]
585Lcbc_abort:
586	ldr	x29,[sp],#16
587	ret
588
589.globl	_aes_hw_ctr32_encrypt_blocks
590.private_extern	_aes_hw_ctr32_encrypt_blocks
591
592.align	5
593_aes_hw_ctr32_encrypt_blocks:
594	stp	x29,x30,[sp,#-16]!
595	add	x29,sp,#0
596	ldr	w5,[x3,#240]
597
598	ldr	w8, [x4, #12]
599	ld1	{v0.4s},[x4]
600
601	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
602	sub	w5,w5,#4
603	mov	x12,#16
604	cmp	x2,#2
605	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
606	sub	w5,w5,#2
607	ld1	{v20.4s,v21.4s},[x7],#32
608	ld1	{v22.4s,v23.4s},[x7],#32
609	ld1	{v7.4s},[x7]
610	add	x7,x3,#32
611	mov	w6,w5
612	csel	x12,xzr,x12,lo
613#ifndef __ARMEB__
614	rev	w8, w8
615#endif
616	orr	v1.16b,v0.16b,v0.16b
617	add	w10, w8, #1
618	orr	v18.16b,v0.16b,v0.16b
619	add	w8, w8, #2
620	orr	v6.16b,v0.16b,v0.16b
621	rev	w10, w10
622	mov	v1.s[3],w10
623	b.ls	Lctr32_tail
624	rev	w12, w8
625	sub	x2,x2,#3		// bias
626	mov	v18.s[3],w12
627	b	Loop3x_ctr32
628
629.align	4
630Loop3x_ctr32:
631	aese	v0.16b,v16.16b
632	aesmc	v0.16b,v0.16b
633	aese	v1.16b,v16.16b
634	aesmc	v1.16b,v1.16b
635	aese	v18.16b,v16.16b
636	aesmc	v18.16b,v18.16b
637	ld1	{v16.4s},[x7],#16
638	subs	w6,w6,#2
639	aese	v0.16b,v17.16b
640	aesmc	v0.16b,v0.16b
641	aese	v1.16b,v17.16b
642	aesmc	v1.16b,v1.16b
643	aese	v18.16b,v17.16b
644	aesmc	v18.16b,v18.16b
645	ld1	{v17.4s},[x7],#16
646	b.gt	Loop3x_ctr32
647
648	aese	v0.16b,v16.16b
649	aesmc	v4.16b,v0.16b
650	aese	v1.16b,v16.16b
651	aesmc	v5.16b,v1.16b
652	ld1	{v2.16b},[x0],#16
653	orr	v0.16b,v6.16b,v6.16b
654	aese	v18.16b,v16.16b
655	aesmc	v18.16b,v18.16b
656	ld1	{v3.16b},[x0],#16
657	orr	v1.16b,v6.16b,v6.16b
658	aese	v4.16b,v17.16b
659	aesmc	v4.16b,v4.16b
660	aese	v5.16b,v17.16b
661	aesmc	v5.16b,v5.16b
662	ld1	{v19.16b},[x0],#16
663	mov	x7,x3
664	aese	v18.16b,v17.16b
665	aesmc	v17.16b,v18.16b
666	orr	v18.16b,v6.16b,v6.16b
667	add	w9,w8,#1
668	aese	v4.16b,v20.16b
669	aesmc	v4.16b,v4.16b
670	aese	v5.16b,v20.16b
671	aesmc	v5.16b,v5.16b
672	eor	v2.16b,v2.16b,v7.16b
673	add	w10,w8,#2
674	aese	v17.16b,v20.16b
675	aesmc	v17.16b,v17.16b
676	eor	v3.16b,v3.16b,v7.16b
677	add	w8,w8,#3
678	aese	v4.16b,v21.16b
679	aesmc	v4.16b,v4.16b
680	aese	v5.16b,v21.16b
681	aesmc	v5.16b,v5.16b
682	eor	v19.16b,v19.16b,v7.16b
683	rev	w9,w9
684	aese	v17.16b,v21.16b
685	aesmc	v17.16b,v17.16b
686	mov	v0.s[3], w9
687	rev	w10,w10
688	aese	v4.16b,v22.16b
689	aesmc	v4.16b,v4.16b
690	aese	v5.16b,v22.16b
691	aesmc	v5.16b,v5.16b
692	mov	v1.s[3], w10
693	rev	w12,w8
694	aese	v17.16b,v22.16b
695	aesmc	v17.16b,v17.16b
696	mov	v18.s[3], w12
697	subs	x2,x2,#3
698	aese	v4.16b,v23.16b
699	aese	v5.16b,v23.16b
700	aese	v17.16b,v23.16b
701
702	eor	v2.16b,v2.16b,v4.16b
703	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
704	st1	{v2.16b},[x1],#16
705	eor	v3.16b,v3.16b,v5.16b
706	mov	w6,w5
707	st1	{v3.16b},[x1],#16
708	eor	v19.16b,v19.16b,v17.16b
709	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
710	st1	{v19.16b},[x1],#16
711	b.hs	Loop3x_ctr32
712
713	adds	x2,x2,#3
714	b.eq	Lctr32_done
715	cmp	x2,#1
716	mov	x12,#16
717	csel	x12,xzr,x12,eq
718
719Lctr32_tail:
720	aese	v0.16b,v16.16b
721	aesmc	v0.16b,v0.16b
722	aese	v1.16b,v16.16b
723	aesmc	v1.16b,v1.16b
724	ld1	{v16.4s},[x7],#16
725	subs	w6,w6,#2
726	aese	v0.16b,v17.16b
727	aesmc	v0.16b,v0.16b
728	aese	v1.16b,v17.16b
729	aesmc	v1.16b,v1.16b
730	ld1	{v17.4s},[x7],#16
731	b.gt	Lctr32_tail
732
733	aese	v0.16b,v16.16b
734	aesmc	v0.16b,v0.16b
735	aese	v1.16b,v16.16b
736	aesmc	v1.16b,v1.16b
737	aese	v0.16b,v17.16b
738	aesmc	v0.16b,v0.16b
739	aese	v1.16b,v17.16b
740	aesmc	v1.16b,v1.16b
741	ld1	{v2.16b},[x0],x12
742	aese	v0.16b,v20.16b
743	aesmc	v0.16b,v0.16b
744	aese	v1.16b,v20.16b
745	aesmc	v1.16b,v1.16b
746	ld1	{v3.16b},[x0]
747	aese	v0.16b,v21.16b
748	aesmc	v0.16b,v0.16b
749	aese	v1.16b,v21.16b
750	aesmc	v1.16b,v1.16b
751	eor	v2.16b,v2.16b,v7.16b
752	aese	v0.16b,v22.16b
753	aesmc	v0.16b,v0.16b
754	aese	v1.16b,v22.16b
755	aesmc	v1.16b,v1.16b
756	eor	v3.16b,v3.16b,v7.16b
757	aese	v0.16b,v23.16b
758	aese	v1.16b,v23.16b
759
760	cmp	x2,#1
761	eor	v2.16b,v2.16b,v0.16b
762	eor	v3.16b,v3.16b,v1.16b
763	st1	{v2.16b},[x1],#16
764	b.eq	Lctr32_done
765	st1	{v3.16b},[x1]
766
767Lctr32_done:
768	ldr	x29,[sp],#16
769	ret
770
771#endif
772#endif  // !OPENSSL_NO_ASM
773