1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(__aarch64__)
13#if defined(BORINGSSL_PREFIX)
14#include <boringssl_prefix_symbols_asm.h>
15#endif
16#include <openssl/arm_arch.h>
17
18#if __ARM_MAX_ARCH__>=7
19.text
20.arch	armv8-a+crypto
21.section	.rodata
22.align	5
23.Lrcon:
24.long	0x01,0x01,0x01,0x01
25.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
26.long	0x1b,0x1b,0x1b,0x1b
27
28.text
29
30.globl	aes_hw_set_encrypt_key
31.hidden	aes_hw_set_encrypt_key
32.type	aes_hw_set_encrypt_key,%function
33.align	5
34aes_hw_set_encrypt_key:
35.Lenc_key:
36	stp	x29,x30,[sp,#-16]!
37	add	x29,sp,#0
38	mov	x3,#-1
39	cmp	x0,#0
40	b.eq	.Lenc_key_abort
41	cmp	x2,#0
42	b.eq	.Lenc_key_abort
43	mov	x3,#-2
44	cmp	w1,#128
45	b.lt	.Lenc_key_abort
46	cmp	w1,#256
47	b.gt	.Lenc_key_abort
48	tst	w1,#0x3f
49	b.ne	.Lenc_key_abort
50
51	adrp	x3,.Lrcon
52	add	x3,x3,:lo12:.Lrcon
53	cmp	w1,#192
54
55	eor	v0.16b,v0.16b,v0.16b
56	ld1	{v3.16b},[x0],#16
57	mov	w1,#8		// reuse w1
58	ld1	{v1.4s,v2.4s},[x3],#32
59
60	b.lt	.Loop128
61	b.eq	.L192
62	b	.L256
63
64.align	4
65.Loop128:
66	tbl	v6.16b,{v3.16b},v2.16b
67	ext	v5.16b,v0.16b,v3.16b,#12
68	st1	{v3.4s},[x2],#16
69	aese	v6.16b,v0.16b
70	subs	w1,w1,#1
71
72	eor	v3.16b,v3.16b,v5.16b
73	ext	v5.16b,v0.16b,v5.16b,#12
74	eor	v3.16b,v3.16b,v5.16b
75	ext	v5.16b,v0.16b,v5.16b,#12
76	eor	v6.16b,v6.16b,v1.16b
77	eor	v3.16b,v3.16b,v5.16b
78	shl	v1.16b,v1.16b,#1
79	eor	v3.16b,v3.16b,v6.16b
80	b.ne	.Loop128
81
82	ld1	{v1.4s},[x3]
83
84	tbl	v6.16b,{v3.16b},v2.16b
85	ext	v5.16b,v0.16b,v3.16b,#12
86	st1	{v3.4s},[x2],#16
87	aese	v6.16b,v0.16b
88
89	eor	v3.16b,v3.16b,v5.16b
90	ext	v5.16b,v0.16b,v5.16b,#12
91	eor	v3.16b,v3.16b,v5.16b
92	ext	v5.16b,v0.16b,v5.16b,#12
93	eor	v6.16b,v6.16b,v1.16b
94	eor	v3.16b,v3.16b,v5.16b
95	shl	v1.16b,v1.16b,#1
96	eor	v3.16b,v3.16b,v6.16b
97
98	tbl	v6.16b,{v3.16b},v2.16b
99	ext	v5.16b,v0.16b,v3.16b,#12
100	st1	{v3.4s},[x2],#16
101	aese	v6.16b,v0.16b
102
103	eor	v3.16b,v3.16b,v5.16b
104	ext	v5.16b,v0.16b,v5.16b,#12
105	eor	v3.16b,v3.16b,v5.16b
106	ext	v5.16b,v0.16b,v5.16b,#12
107	eor	v6.16b,v6.16b,v1.16b
108	eor	v3.16b,v3.16b,v5.16b
109	eor	v3.16b,v3.16b,v6.16b
110	st1	{v3.4s},[x2]
111	add	x2,x2,#0x50
112
113	mov	w12,#10
114	b	.Ldone
115
116.align	4
117.L192:
118	ld1	{v4.8b},[x0],#8
119	movi	v6.16b,#8			// borrow v6.16b
120	st1	{v3.4s},[x2],#16
121	sub	v2.16b,v2.16b,v6.16b	// adjust the mask
122
123.Loop192:
124	tbl	v6.16b,{v4.16b},v2.16b
125	ext	v5.16b,v0.16b,v3.16b,#12
126	st1	{v4.8b},[x2],#8
127	aese	v6.16b,v0.16b
128	subs	w1,w1,#1
129
130	eor	v3.16b,v3.16b,v5.16b
131	ext	v5.16b,v0.16b,v5.16b,#12
132	eor	v3.16b,v3.16b,v5.16b
133	ext	v5.16b,v0.16b,v5.16b,#12
134	eor	v3.16b,v3.16b,v5.16b
135
136	dup	v5.4s,v3.s[3]
137	eor	v5.16b,v5.16b,v4.16b
138	eor	v6.16b,v6.16b,v1.16b
139	ext	v4.16b,v0.16b,v4.16b,#12
140	shl	v1.16b,v1.16b,#1
141	eor	v4.16b,v4.16b,v5.16b
142	eor	v3.16b,v3.16b,v6.16b
143	eor	v4.16b,v4.16b,v6.16b
144	st1	{v3.4s},[x2],#16
145	b.ne	.Loop192
146
147	mov	w12,#12
148	add	x2,x2,#0x20
149	b	.Ldone
150
151.align	4
152.L256:
153	ld1	{v4.16b},[x0]
154	mov	w1,#7
155	mov	w12,#14
156	st1	{v3.4s},[x2],#16
157
158.Loop256:
159	tbl	v6.16b,{v4.16b},v2.16b
160	ext	v5.16b,v0.16b,v3.16b,#12
161	st1	{v4.4s},[x2],#16
162	aese	v6.16b,v0.16b
163	subs	w1,w1,#1
164
165	eor	v3.16b,v3.16b,v5.16b
166	ext	v5.16b,v0.16b,v5.16b,#12
167	eor	v3.16b,v3.16b,v5.16b
168	ext	v5.16b,v0.16b,v5.16b,#12
169	eor	v6.16b,v6.16b,v1.16b
170	eor	v3.16b,v3.16b,v5.16b
171	shl	v1.16b,v1.16b,#1
172	eor	v3.16b,v3.16b,v6.16b
173	st1	{v3.4s},[x2],#16
174	b.eq	.Ldone
175
176	dup	v6.4s,v3.s[3]		// just splat
177	ext	v5.16b,v0.16b,v4.16b,#12
178	aese	v6.16b,v0.16b
179
180	eor	v4.16b,v4.16b,v5.16b
181	ext	v5.16b,v0.16b,v5.16b,#12
182	eor	v4.16b,v4.16b,v5.16b
183	ext	v5.16b,v0.16b,v5.16b,#12
184	eor	v4.16b,v4.16b,v5.16b
185
186	eor	v4.16b,v4.16b,v6.16b
187	b	.Loop256
188
189.Ldone:
190	str	w12,[x2]
191	mov	x3,#0
192
193.Lenc_key_abort:
194	mov	x0,x3			// return value
195	ldr	x29,[sp],#16
196	ret
197.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
198
199.globl	aes_hw_set_decrypt_key
200.hidden	aes_hw_set_decrypt_key
201.type	aes_hw_set_decrypt_key,%function
202.align	5
203aes_hw_set_decrypt_key:
204	stp	x29,x30,[sp,#-16]!
205	add	x29,sp,#0
206	bl	.Lenc_key
207
208	cmp	x0,#0
209	b.ne	.Ldec_key_abort
210
211	sub	x2,x2,#240		// restore original x2
212	mov	x4,#-16
213	add	x0,x2,x12,lsl#4	// end of key schedule
214
215	ld1	{v0.4s},[x2]
216	ld1	{v1.4s},[x0]
217	st1	{v0.4s},[x0],x4
218	st1	{v1.4s},[x2],#16
219
220.Loop_imc:
221	ld1	{v0.4s},[x2]
222	ld1	{v1.4s},[x0]
223	aesimc	v0.16b,v0.16b
224	aesimc	v1.16b,v1.16b
225	st1	{v0.4s},[x0],x4
226	st1	{v1.4s},[x2],#16
227	cmp	x0,x2
228	b.hi	.Loop_imc
229
230	ld1	{v0.4s},[x2]
231	aesimc	v0.16b,v0.16b
232	st1	{v0.4s},[x0]
233
234	eor	x0,x0,x0		// return value
235.Ldec_key_abort:
236	ldp	x29,x30,[sp],#16
237	ret
238.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
239.globl	aes_hw_encrypt
240.hidden	aes_hw_encrypt
241.type	aes_hw_encrypt,%function
242.align	5
243aes_hw_encrypt:
244	ldr	w3,[x2,#240]
245	ld1	{v0.4s},[x2],#16
246	ld1	{v2.16b},[x0]
247	sub	w3,w3,#2
248	ld1	{v1.4s},[x2],#16
249
250.Loop_enc:
251	aese	v2.16b,v0.16b
252	aesmc	v2.16b,v2.16b
253	ld1	{v0.4s},[x2],#16
254	subs	w3,w3,#2
255	aese	v2.16b,v1.16b
256	aesmc	v2.16b,v2.16b
257	ld1	{v1.4s},[x2],#16
258	b.gt	.Loop_enc
259
260	aese	v2.16b,v0.16b
261	aesmc	v2.16b,v2.16b
262	ld1	{v0.4s},[x2]
263	aese	v2.16b,v1.16b
264	eor	v2.16b,v2.16b,v0.16b
265
266	st1	{v2.16b},[x1]
267	ret
268.size	aes_hw_encrypt,.-aes_hw_encrypt
269.globl	aes_hw_decrypt
270.hidden	aes_hw_decrypt
271.type	aes_hw_decrypt,%function
272.align	5
273aes_hw_decrypt:
274	ldr	w3,[x2,#240]
275	ld1	{v0.4s},[x2],#16
276	ld1	{v2.16b},[x0]
277	sub	w3,w3,#2
278	ld1	{v1.4s},[x2],#16
279
280.Loop_dec:
281	aesd	v2.16b,v0.16b
282	aesimc	v2.16b,v2.16b
283	ld1	{v0.4s},[x2],#16
284	subs	w3,w3,#2
285	aesd	v2.16b,v1.16b
286	aesimc	v2.16b,v2.16b
287	ld1	{v1.4s},[x2],#16
288	b.gt	.Loop_dec
289
290	aesd	v2.16b,v0.16b
291	aesimc	v2.16b,v2.16b
292	ld1	{v0.4s},[x2]
293	aesd	v2.16b,v1.16b
294	eor	v2.16b,v2.16b,v0.16b
295
296	st1	{v2.16b},[x1]
297	ret
298.size	aes_hw_decrypt,.-aes_hw_decrypt
299.globl	aes_hw_cbc_encrypt
300.hidden	aes_hw_cbc_encrypt
301.type	aes_hw_cbc_encrypt,%function
302.align	5
303aes_hw_cbc_encrypt:
304	stp	x29,x30,[sp,#-16]!
305	add	x29,sp,#0
306	subs	x2,x2,#16
307	mov	x8,#16
308	b.lo	.Lcbc_abort
309	csel	x8,xzr,x8,eq
310
311	cmp	w5,#0			// en- or decrypting?
312	ldr	w5,[x3,#240]
313	and	x2,x2,#-16
314	ld1	{v6.16b},[x4]
315	ld1	{v0.16b},[x0],x8
316
317	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
318	sub	w5,w5,#6
319	add	x7,x3,x5,lsl#4	// pointer to last 7 round keys
320	sub	w5,w5,#2
321	ld1	{v18.4s,v19.4s},[x7],#32
322	ld1	{v20.4s,v21.4s},[x7],#32
323	ld1	{v22.4s,v23.4s},[x7],#32
324	ld1	{v7.4s},[x7]
325
326	add	x7,x3,#32
327	mov	w6,w5
328	b.eq	.Lcbc_dec
329
330	cmp	w5,#2
331	eor	v0.16b,v0.16b,v6.16b
332	eor	v5.16b,v16.16b,v7.16b
333	b.eq	.Lcbc_enc128
334
335	ld1	{v2.4s,v3.4s},[x7]
336	add	x7,x3,#16
337	add	x6,x3,#16*4
338	add	x12,x3,#16*5
339	aese	v0.16b,v16.16b
340	aesmc	v0.16b,v0.16b
341	add	x14,x3,#16*6
342	add	x3,x3,#16*7
343	b	.Lenter_cbc_enc
344
345.align	4
346.Loop_cbc_enc:
347	aese	v0.16b,v16.16b
348	aesmc	v0.16b,v0.16b
349	st1	{v6.16b},[x1],#16
350.Lenter_cbc_enc:
351	aese	v0.16b,v17.16b
352	aesmc	v0.16b,v0.16b
353	aese	v0.16b,v2.16b
354	aesmc	v0.16b,v0.16b
355	ld1	{v16.4s},[x6]
356	cmp	w5,#4
357	aese	v0.16b,v3.16b
358	aesmc	v0.16b,v0.16b
359	ld1	{v17.4s},[x12]
360	b.eq	.Lcbc_enc192
361
362	aese	v0.16b,v16.16b
363	aesmc	v0.16b,v0.16b
364	ld1	{v16.4s},[x14]
365	aese	v0.16b,v17.16b
366	aesmc	v0.16b,v0.16b
367	ld1	{v17.4s},[x3]
368	nop
369
370.Lcbc_enc192:
371	aese	v0.16b,v16.16b
372	aesmc	v0.16b,v0.16b
373	subs	x2,x2,#16
374	aese	v0.16b,v17.16b
375	aesmc	v0.16b,v0.16b
376	csel	x8,xzr,x8,eq
377	aese	v0.16b,v18.16b
378	aesmc	v0.16b,v0.16b
379	aese	v0.16b,v19.16b
380	aesmc	v0.16b,v0.16b
381	ld1	{v16.16b},[x0],x8
382	aese	v0.16b,v20.16b
383	aesmc	v0.16b,v0.16b
384	eor	v16.16b,v16.16b,v5.16b
385	aese	v0.16b,v21.16b
386	aesmc	v0.16b,v0.16b
387	ld1	{v17.4s},[x7]		// re-pre-load rndkey[1]
388	aese	v0.16b,v22.16b
389	aesmc	v0.16b,v0.16b
390	aese	v0.16b,v23.16b
391	eor	v6.16b,v0.16b,v7.16b
392	b.hs	.Loop_cbc_enc
393
394	st1	{v6.16b},[x1],#16
395	b	.Lcbc_done
396
397.align	5
398.Lcbc_enc128:
399	ld1	{v2.4s,v3.4s},[x7]
400	aese	v0.16b,v16.16b
401	aesmc	v0.16b,v0.16b
402	b	.Lenter_cbc_enc128
403.Loop_cbc_enc128:
404	aese	v0.16b,v16.16b
405	aesmc	v0.16b,v0.16b
406	st1	{v6.16b},[x1],#16
407.Lenter_cbc_enc128:
408	aese	v0.16b,v17.16b
409	aesmc	v0.16b,v0.16b
410	subs	x2,x2,#16
411	aese	v0.16b,v2.16b
412	aesmc	v0.16b,v0.16b
413	csel	x8,xzr,x8,eq
414	aese	v0.16b,v3.16b
415	aesmc	v0.16b,v0.16b
416	aese	v0.16b,v18.16b
417	aesmc	v0.16b,v0.16b
418	aese	v0.16b,v19.16b
419	aesmc	v0.16b,v0.16b
420	ld1	{v16.16b},[x0],x8
421	aese	v0.16b,v20.16b
422	aesmc	v0.16b,v0.16b
423	aese	v0.16b,v21.16b
424	aesmc	v0.16b,v0.16b
425	aese	v0.16b,v22.16b
426	aesmc	v0.16b,v0.16b
427	eor	v16.16b,v16.16b,v5.16b
428	aese	v0.16b,v23.16b
429	eor	v6.16b,v0.16b,v7.16b
430	b.hs	.Loop_cbc_enc128
431
432	st1	{v6.16b},[x1],#16
433	b	.Lcbc_done
434.align	5
435.Lcbc_dec:
436	ld1	{v18.16b},[x0],#16
437	subs	x2,x2,#32		// bias
438	add	w6,w5,#2
439	orr	v3.16b,v0.16b,v0.16b
440	orr	v1.16b,v0.16b,v0.16b
441	orr	v19.16b,v18.16b,v18.16b
442	b.lo	.Lcbc_dec_tail
443
444	orr	v1.16b,v18.16b,v18.16b
445	ld1	{v18.16b},[x0],#16
446	orr	v2.16b,v0.16b,v0.16b
447	orr	v3.16b,v1.16b,v1.16b
448	orr	v19.16b,v18.16b,v18.16b
449
450.Loop3x_cbc_dec:
451	aesd	v0.16b,v16.16b
452	aesimc	v0.16b,v0.16b
453	aesd	v1.16b,v16.16b
454	aesimc	v1.16b,v1.16b
455	aesd	v18.16b,v16.16b
456	aesimc	v18.16b,v18.16b
457	ld1	{v16.4s},[x7],#16
458	subs	w6,w6,#2
459	aesd	v0.16b,v17.16b
460	aesimc	v0.16b,v0.16b
461	aesd	v1.16b,v17.16b
462	aesimc	v1.16b,v1.16b
463	aesd	v18.16b,v17.16b
464	aesimc	v18.16b,v18.16b
465	ld1	{v17.4s},[x7],#16
466	b.gt	.Loop3x_cbc_dec
467
468	aesd	v0.16b,v16.16b
469	aesimc	v0.16b,v0.16b
470	aesd	v1.16b,v16.16b
471	aesimc	v1.16b,v1.16b
472	aesd	v18.16b,v16.16b
473	aesimc	v18.16b,v18.16b
474	eor	v4.16b,v6.16b,v7.16b
475	subs	x2,x2,#0x30
476	eor	v5.16b,v2.16b,v7.16b
477	csel	x6,x2,x6,lo			// x6, w6, is zero at this point
478	aesd	v0.16b,v17.16b
479	aesimc	v0.16b,v0.16b
480	aesd	v1.16b,v17.16b
481	aesimc	v1.16b,v1.16b
482	aesd	v18.16b,v17.16b
483	aesimc	v18.16b,v18.16b
484	eor	v17.16b,v3.16b,v7.16b
485	add	x0,x0,x6		// x0 is adjusted in such way that
486					// at exit from the loop v1.16b-v18.16b
487					// are loaded with last "words"
488	orr	v6.16b,v19.16b,v19.16b
489	mov	x7,x3
490	aesd	v0.16b,v20.16b
491	aesimc	v0.16b,v0.16b
492	aesd	v1.16b,v20.16b
493	aesimc	v1.16b,v1.16b
494	aesd	v18.16b,v20.16b
495	aesimc	v18.16b,v18.16b
496	ld1	{v2.16b},[x0],#16
497	aesd	v0.16b,v21.16b
498	aesimc	v0.16b,v0.16b
499	aesd	v1.16b,v21.16b
500	aesimc	v1.16b,v1.16b
501	aesd	v18.16b,v21.16b
502	aesimc	v18.16b,v18.16b
503	ld1	{v3.16b},[x0],#16
504	aesd	v0.16b,v22.16b
505	aesimc	v0.16b,v0.16b
506	aesd	v1.16b,v22.16b
507	aesimc	v1.16b,v1.16b
508	aesd	v18.16b,v22.16b
509	aesimc	v18.16b,v18.16b
510	ld1	{v19.16b},[x0],#16
511	aesd	v0.16b,v23.16b
512	aesd	v1.16b,v23.16b
513	aesd	v18.16b,v23.16b
514	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
515	add	w6,w5,#2
516	eor	v4.16b,v4.16b,v0.16b
517	eor	v5.16b,v5.16b,v1.16b
518	eor	v18.16b,v18.16b,v17.16b
519	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
520	st1	{v4.16b},[x1],#16
521	orr	v0.16b,v2.16b,v2.16b
522	st1	{v5.16b},[x1],#16
523	orr	v1.16b,v3.16b,v3.16b
524	st1	{v18.16b},[x1],#16
525	orr	v18.16b,v19.16b,v19.16b
526	b.hs	.Loop3x_cbc_dec
527
528	cmn	x2,#0x30
529	b.eq	.Lcbc_done
530	nop
531
532.Lcbc_dec_tail:
533	aesd	v1.16b,v16.16b
534	aesimc	v1.16b,v1.16b
535	aesd	v18.16b,v16.16b
536	aesimc	v18.16b,v18.16b
537	ld1	{v16.4s},[x7],#16
538	subs	w6,w6,#2
539	aesd	v1.16b,v17.16b
540	aesimc	v1.16b,v1.16b
541	aesd	v18.16b,v17.16b
542	aesimc	v18.16b,v18.16b
543	ld1	{v17.4s},[x7],#16
544	b.gt	.Lcbc_dec_tail
545
546	aesd	v1.16b,v16.16b
547	aesimc	v1.16b,v1.16b
548	aesd	v18.16b,v16.16b
549	aesimc	v18.16b,v18.16b
550	aesd	v1.16b,v17.16b
551	aesimc	v1.16b,v1.16b
552	aesd	v18.16b,v17.16b
553	aesimc	v18.16b,v18.16b
554	aesd	v1.16b,v20.16b
555	aesimc	v1.16b,v1.16b
556	aesd	v18.16b,v20.16b
557	aesimc	v18.16b,v18.16b
558	cmn	x2,#0x20
559	aesd	v1.16b,v21.16b
560	aesimc	v1.16b,v1.16b
561	aesd	v18.16b,v21.16b
562	aesimc	v18.16b,v18.16b
563	eor	v5.16b,v6.16b,v7.16b
564	aesd	v1.16b,v22.16b
565	aesimc	v1.16b,v1.16b
566	aesd	v18.16b,v22.16b
567	aesimc	v18.16b,v18.16b
568	eor	v17.16b,v3.16b,v7.16b
569	aesd	v1.16b,v23.16b
570	aesd	v18.16b,v23.16b
571	b.eq	.Lcbc_dec_one
572	eor	v5.16b,v5.16b,v1.16b
573	eor	v17.16b,v17.16b,v18.16b
574	orr	v6.16b,v19.16b,v19.16b
575	st1	{v5.16b},[x1],#16
576	st1	{v17.16b},[x1],#16
577	b	.Lcbc_done
578
579.Lcbc_dec_one:
580	eor	v5.16b,v5.16b,v18.16b
581	orr	v6.16b,v19.16b,v19.16b
582	st1	{v5.16b},[x1],#16
583
584.Lcbc_done:
585	st1	{v6.16b},[x4]
586.Lcbc_abort:
587	ldr	x29,[sp],#16
588	ret
589.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
590.globl	aes_hw_ctr32_encrypt_blocks
591.hidden	aes_hw_ctr32_encrypt_blocks
592.type	aes_hw_ctr32_encrypt_blocks,%function
593.align	5
594aes_hw_ctr32_encrypt_blocks:
595	stp	x29,x30,[sp,#-16]!
596	add	x29,sp,#0
597	ldr	w5,[x3,#240]
598
599	ldr	w8, [x4, #12]
600	ld1	{v0.4s},[x4]
601
602	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
603	sub	w5,w5,#4
604	mov	x12,#16
605	cmp	x2,#2
606	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
607	sub	w5,w5,#2
608	ld1	{v20.4s,v21.4s},[x7],#32
609	ld1	{v22.4s,v23.4s},[x7],#32
610	ld1	{v7.4s},[x7]
611	add	x7,x3,#32
612	mov	w6,w5
613	csel	x12,xzr,x12,lo
614#ifndef __ARMEB__
615	rev	w8, w8
616#endif
617	orr	v1.16b,v0.16b,v0.16b
618	add	w10, w8, #1
619	orr	v18.16b,v0.16b,v0.16b
620	add	w8, w8, #2
621	orr	v6.16b,v0.16b,v0.16b
622	rev	w10, w10
623	mov	v1.s[3],w10
624	b.ls	.Lctr32_tail
625	rev	w12, w8
626	sub	x2,x2,#3		// bias
627	mov	v18.s[3],w12
628	b	.Loop3x_ctr32
629
630.align	4
631.Loop3x_ctr32:
632	aese	v0.16b,v16.16b
633	aesmc	v0.16b,v0.16b
634	aese	v1.16b,v16.16b
635	aesmc	v1.16b,v1.16b
636	aese	v18.16b,v16.16b
637	aesmc	v18.16b,v18.16b
638	ld1	{v16.4s},[x7],#16
639	subs	w6,w6,#2
640	aese	v0.16b,v17.16b
641	aesmc	v0.16b,v0.16b
642	aese	v1.16b,v17.16b
643	aesmc	v1.16b,v1.16b
644	aese	v18.16b,v17.16b
645	aesmc	v18.16b,v18.16b
646	ld1	{v17.4s},[x7],#16
647	b.gt	.Loop3x_ctr32
648
649	aese	v0.16b,v16.16b
650	aesmc	v4.16b,v0.16b
651	aese	v1.16b,v16.16b
652	aesmc	v5.16b,v1.16b
653	ld1	{v2.16b},[x0],#16
654	orr	v0.16b,v6.16b,v6.16b
655	aese	v18.16b,v16.16b
656	aesmc	v18.16b,v18.16b
657	ld1	{v3.16b},[x0],#16
658	orr	v1.16b,v6.16b,v6.16b
659	aese	v4.16b,v17.16b
660	aesmc	v4.16b,v4.16b
661	aese	v5.16b,v17.16b
662	aesmc	v5.16b,v5.16b
663	ld1	{v19.16b},[x0],#16
664	mov	x7,x3
665	aese	v18.16b,v17.16b
666	aesmc	v17.16b,v18.16b
667	orr	v18.16b,v6.16b,v6.16b
668	add	w9,w8,#1
669	aese	v4.16b,v20.16b
670	aesmc	v4.16b,v4.16b
671	aese	v5.16b,v20.16b
672	aesmc	v5.16b,v5.16b
673	eor	v2.16b,v2.16b,v7.16b
674	add	w10,w8,#2
675	aese	v17.16b,v20.16b
676	aesmc	v17.16b,v17.16b
677	eor	v3.16b,v3.16b,v7.16b
678	add	w8,w8,#3
679	aese	v4.16b,v21.16b
680	aesmc	v4.16b,v4.16b
681	aese	v5.16b,v21.16b
682	aesmc	v5.16b,v5.16b
683	eor	v19.16b,v19.16b,v7.16b
684	rev	w9,w9
685	aese	v17.16b,v21.16b
686	aesmc	v17.16b,v17.16b
687	mov	v0.s[3], w9
688	rev	w10,w10
689	aese	v4.16b,v22.16b
690	aesmc	v4.16b,v4.16b
691	aese	v5.16b,v22.16b
692	aesmc	v5.16b,v5.16b
693	mov	v1.s[3], w10
694	rev	w12,w8
695	aese	v17.16b,v22.16b
696	aesmc	v17.16b,v17.16b
697	mov	v18.s[3], w12
698	subs	x2,x2,#3
699	aese	v4.16b,v23.16b
700	aese	v5.16b,v23.16b
701	aese	v17.16b,v23.16b
702
703	eor	v2.16b,v2.16b,v4.16b
704	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
705	st1	{v2.16b},[x1],#16
706	eor	v3.16b,v3.16b,v5.16b
707	mov	w6,w5
708	st1	{v3.16b},[x1],#16
709	eor	v19.16b,v19.16b,v17.16b
710	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
711	st1	{v19.16b},[x1],#16
712	b.hs	.Loop3x_ctr32
713
714	adds	x2,x2,#3
715	b.eq	.Lctr32_done
716	cmp	x2,#1
717	mov	x12,#16
718	csel	x12,xzr,x12,eq
719
720.Lctr32_tail:
721	aese	v0.16b,v16.16b
722	aesmc	v0.16b,v0.16b
723	aese	v1.16b,v16.16b
724	aesmc	v1.16b,v1.16b
725	ld1	{v16.4s},[x7],#16
726	subs	w6,w6,#2
727	aese	v0.16b,v17.16b
728	aesmc	v0.16b,v0.16b
729	aese	v1.16b,v17.16b
730	aesmc	v1.16b,v1.16b
731	ld1	{v17.4s},[x7],#16
732	b.gt	.Lctr32_tail
733
734	aese	v0.16b,v16.16b
735	aesmc	v0.16b,v0.16b
736	aese	v1.16b,v16.16b
737	aesmc	v1.16b,v1.16b
738	aese	v0.16b,v17.16b
739	aesmc	v0.16b,v0.16b
740	aese	v1.16b,v17.16b
741	aesmc	v1.16b,v1.16b
742	ld1	{v2.16b},[x0],x12
743	aese	v0.16b,v20.16b
744	aesmc	v0.16b,v0.16b
745	aese	v1.16b,v20.16b
746	aesmc	v1.16b,v1.16b
747	ld1	{v3.16b},[x0]
748	aese	v0.16b,v21.16b
749	aesmc	v0.16b,v0.16b
750	aese	v1.16b,v21.16b
751	aesmc	v1.16b,v1.16b
752	eor	v2.16b,v2.16b,v7.16b
753	aese	v0.16b,v22.16b
754	aesmc	v0.16b,v0.16b
755	aese	v1.16b,v22.16b
756	aesmc	v1.16b,v1.16b
757	eor	v3.16b,v3.16b,v7.16b
758	aese	v0.16b,v23.16b
759	aese	v1.16b,v23.16b
760
761	cmp	x2,#1
762	eor	v2.16b,v2.16b,v0.16b
763	eor	v3.16b,v3.16b,v1.16b
764	st1	{v2.16b},[x1],#16
765	b.eq	.Lctr32_done
766	st1	{v3.16b},[x1]
767
768.Lctr32_done:
769	ldr	x29,[sp],#16
770	ret
771.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
772#endif
773#endif
774#endif  // !OPENSSL_NO_ASM
775.section	.note.GNU-stack,"",%progbits
776