1#if defined(__arm__)
2#include <openssl/arm_arch.h>
3
4#if __ARM_MAX_ARCH__>=7
5.text
6.arch	armv7-a
7.fpu	neon
8.code	32
9.align	5
10.Lrcon:
11.long	0x01,0x01,0x01,0x01
12.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
13.long	0x1b,0x1b,0x1b,0x1b
14
15.globl	aes_hw_set_encrypt_key
16.hidden	aes_hw_set_encrypt_key
17.type	aes_hw_set_encrypt_key,%function
18.align	5
19aes_hw_set_encrypt_key:
20.Lenc_key:
21	mov	r3,#-1
22	cmp	r0,#0
23	beq	.Lenc_key_abort
24	cmp	r2,#0
25	beq	.Lenc_key_abort
26	mov	r3,#-2
27	cmp	r1,#128
28	blt	.Lenc_key_abort
29	cmp	r1,#256
30	bgt	.Lenc_key_abort
31	tst	r1,#0x3f
32	bne	.Lenc_key_abort
33
34	adr	r3,.Lrcon
35	cmp	r1,#192
36
37	veor	q0,q0,q0
38	vld1.8	{q3},[r0]!
39	mov	r1,#8		@ reuse r1
40	vld1.32	{q1,q2},[r3]!
41
42	blt	.Loop128
43	beq	.L192
44	b	.L256
45
46.align	4
47.Loop128:
48	vtbl.8	d20,{q3},d4
49	vtbl.8	d21,{q3},d5
50	vext.8	q9,q0,q3,#12
51	vst1.32	{q3},[r2]!
52.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
53	subs	r1,r1,#1
54
55	veor	q3,q3,q9
56	vext.8	q9,q0,q9,#12
57	veor	q3,q3,q9
58	vext.8	q9,q0,q9,#12
59	veor	q10,q10,q1
60	veor	q3,q3,q9
61	vshl.u8	q1,q1,#1
62	veor	q3,q3,q10
63	bne	.Loop128
64
65	vld1.32	{q1},[r3]
66
67	vtbl.8	d20,{q3},d4
68	vtbl.8	d21,{q3},d5
69	vext.8	q9,q0,q3,#12
70	vst1.32	{q3},[r2]!
71.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
72
73	veor	q3,q3,q9
74	vext.8	q9,q0,q9,#12
75	veor	q3,q3,q9
76	vext.8	q9,q0,q9,#12
77	veor	q10,q10,q1
78	veor	q3,q3,q9
79	vshl.u8	q1,q1,#1
80	veor	q3,q3,q10
81
82	vtbl.8	d20,{q3},d4
83	vtbl.8	d21,{q3},d5
84	vext.8	q9,q0,q3,#12
85	vst1.32	{q3},[r2]!
86.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
87
88	veor	q3,q3,q9
89	vext.8	q9,q0,q9,#12
90	veor	q3,q3,q9
91	vext.8	q9,q0,q9,#12
92	veor	q10,q10,q1
93	veor	q3,q3,q9
94	veor	q3,q3,q10
95	vst1.32	{q3},[r2]
96	add	r2,r2,#0x50
97
98	mov	r12,#10
99	b	.Ldone
100
101.align	4
102.L192:
103	vld1.8	{d16},[r0]!
104	vmov.i8	q10,#8			@ borrow q10
105	vst1.32	{q3},[r2]!
106	vsub.i8	q2,q2,q10	@ adjust the mask
107
108.Loop192:
109	vtbl.8	d20,{q8},d4
110	vtbl.8	d21,{q8},d5
111	vext.8	q9,q0,q3,#12
112	vst1.32	{d16},[r2]!
113.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
114	subs	r1,r1,#1
115
116	veor	q3,q3,q9
117	vext.8	q9,q0,q9,#12
118	veor	q3,q3,q9
119	vext.8	q9,q0,q9,#12
120	veor	q3,q3,q9
121
122	vdup.32	q9,d7[1]
123	veor	q9,q9,q8
124	veor	q10,q10,q1
125	vext.8	q8,q0,q8,#12
126	vshl.u8	q1,q1,#1
127	veor	q8,q8,q9
128	veor	q3,q3,q10
129	veor	q8,q8,q10
130	vst1.32	{q3},[r2]!
131	bne	.Loop192
132
133	mov	r12,#12
134	add	r2,r2,#0x20
135	b	.Ldone
136
137.align	4
138.L256:
139	vld1.8	{q8},[r0]
140	mov	r1,#7
141	mov	r12,#14
142	vst1.32	{q3},[r2]!
143
144.Loop256:
145	vtbl.8	d20,{q8},d4
146	vtbl.8	d21,{q8},d5
147	vext.8	q9,q0,q3,#12
148	vst1.32	{q8},[r2]!
149.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
150	subs	r1,r1,#1
151
152	veor	q3,q3,q9
153	vext.8	q9,q0,q9,#12
154	veor	q3,q3,q9
155	vext.8	q9,q0,q9,#12
156	veor	q10,q10,q1
157	veor	q3,q3,q9
158	vshl.u8	q1,q1,#1
159	veor	q3,q3,q10
160	vst1.32	{q3},[r2]!
161	beq	.Ldone
162
163	vdup.32	q10,d7[1]
164	vext.8	q9,q0,q8,#12
165.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
166
167	veor	q8,q8,q9
168	vext.8	q9,q0,q9,#12
169	veor	q8,q8,q9
170	vext.8	q9,q0,q9,#12
171	veor	q8,q8,q9
172
173	veor	q8,q8,q10
174	b	.Loop256
175
176.Ldone:
177	str	r12,[r2]
178	mov	r3,#0
179
180.Lenc_key_abort:
181	mov	r0,r3			@ return value
182
183	bx	lr
184.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
185
186.globl	aes_hw_set_decrypt_key
187.hidden	aes_hw_set_decrypt_key
188.type	aes_hw_set_decrypt_key,%function
189.align	5
190aes_hw_set_decrypt_key:
191	stmdb	sp!,{r4,lr}
192	bl	.Lenc_key
193
194	cmp	r0,#0
195	bne	.Ldec_key_abort
196
197	sub	r2,r2,#240		@ restore original r2
198	mov	r4,#-16
199	add	r0,r2,r12,lsl#4	@ end of key schedule
200
201	vld1.32	{q0},[r2]
202	vld1.32	{q1},[r0]
203	vst1.32	{q0},[r0],r4
204	vst1.32	{q1},[r2]!
205
206.Loop_imc:
207	vld1.32	{q0},[r2]
208	vld1.32	{q1},[r0]
209.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
210.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
211	vst1.32	{q0},[r0],r4
212	vst1.32	{q1},[r2]!
213	cmp	r0,r2
214	bhi	.Loop_imc
215
216	vld1.32	{q0},[r2]
217.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
218	vst1.32	{q0},[r0]
219
220	eor	r0,r0,r0		@ return value
221.Ldec_key_abort:
222	ldmia	sp!,{r4,pc}
223.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
224.globl	aes_hw_encrypt
225.hidden	aes_hw_encrypt
226.type	aes_hw_encrypt,%function
227.align	5
228aes_hw_encrypt:
229	ldr	r3,[r2,#240]
230	vld1.32	{q0},[r2]!
231	vld1.8	{q2},[r0]
232	sub	r3,r3,#2
233	vld1.32	{q1},[r2]!
234
235.Loop_enc:
236.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
237.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
238	vld1.32	{q0},[r2]!
239	subs	r3,r3,#2
240.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
241.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
242	vld1.32	{q1},[r2]!
243	bgt	.Loop_enc
244
245.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
246.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
247	vld1.32	{q0},[r2]
248.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
249	veor	q2,q2,q0
250
251	vst1.8	{q2},[r1]
252	bx	lr
253.size	aes_hw_encrypt,.-aes_hw_encrypt
254.globl	aes_hw_decrypt
255.hidden	aes_hw_decrypt
256.type	aes_hw_decrypt,%function
257.align	5
258aes_hw_decrypt:
259	ldr	r3,[r2,#240]
260	vld1.32	{q0},[r2]!
261	vld1.8	{q2},[r0]
262	sub	r3,r3,#2
263	vld1.32	{q1},[r2]!
264
265.Loop_dec:
266.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
267.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
268	vld1.32	{q0},[r2]!
269	subs	r3,r3,#2
270.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
271.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
272	vld1.32	{q1},[r2]!
273	bgt	.Loop_dec
274
275.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
276.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
277	vld1.32	{q0},[r2]
278.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
279	veor	q2,q2,q0
280
281	vst1.8	{q2},[r1]
282	bx	lr
283.size	aes_hw_decrypt,.-aes_hw_decrypt
284.globl	aes_hw_cbc_encrypt
285.hidden	aes_hw_cbc_encrypt
286.type	aes_hw_cbc_encrypt,%function
287.align	5
288aes_hw_cbc_encrypt:
289	mov	ip,sp
290	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
291	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
292	ldmia	ip,{r4,r5}		@ load remaining args
293	subs	r2,r2,#16
294	mov	r8,#16
295	blo	.Lcbc_abort
296	moveq	r8,#0
297
298	cmp	r5,#0			@ en- or decrypting?
299	ldr	r5,[r3,#240]
300	and	r2,r2,#-16
301	vld1.8	{q6},[r4]
302	vld1.8	{q0},[r0],r8
303
304	vld1.32	{q8,q9},[r3]		@ load key schedule...
305	sub	r5,r5,#6
306	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
307	sub	r5,r5,#2
308	vld1.32	{q10,q11},[r7]!
309	vld1.32	{q12,q13},[r7]!
310	vld1.32	{q14,q15},[r7]!
311	vld1.32	{q7},[r7]
312
313	add	r7,r3,#32
314	mov	r6,r5
315	beq	.Lcbc_dec
316
317	cmp	r5,#2
318	veor	q0,q0,q6
319	veor	q5,q8,q7
320	beq	.Lcbc_enc128
321
322	vld1.32	{q2,q3},[r7]
323	add	r7,r3,#16
324	add	r6,r3,#16*4
325	add	r12,r3,#16*5
326.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
327.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
328	add	r14,r3,#16*6
329	add	r3,r3,#16*7
330	b	.Lenter_cbc_enc
331
332.align	4
333.Loop_cbc_enc:
334.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
335.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
336	vst1.8	{q6},[r1]!
337.Lenter_cbc_enc:
338.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
339.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
340.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
341.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
342	vld1.32	{q8},[r6]
343	cmp	r5,#4
344.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
345.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
346	vld1.32	{q9},[r12]
347	beq	.Lcbc_enc192
348
349.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
350.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
351	vld1.32	{q8},[r14]
352.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
353.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
354	vld1.32	{q9},[r3]
355	nop
356
357.Lcbc_enc192:
358.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
359.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
360	subs	r2,r2,#16
361.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
362.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
363	moveq	r8,#0
364.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
365.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
366.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
367.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
368	vld1.8	{q8},[r0],r8
369.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
370.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
371	veor	q8,q8,q5
372.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
373.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
374	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
375.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
376.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
377.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
378	veor	q6,q0,q7
379	bhs	.Loop_cbc_enc
380
381	vst1.8	{q6},[r1]!
382	b	.Lcbc_done
383
384.align	5
385.Lcbc_enc128:
386	vld1.32	{q2,q3},[r7]
387.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
388.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
389	b	.Lenter_cbc_enc128
390.Loop_cbc_enc128:
391.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
392.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
393	vst1.8	{q6},[r1]!
394.Lenter_cbc_enc128:
395.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
396.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
397	subs	r2,r2,#16
398.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
399.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
400	moveq	r8,#0
401.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
402.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
403.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
404.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
405.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
406.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
407	vld1.8	{q8},[r0],r8
408.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
409.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
410.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
411.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
412.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
413.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
414	veor	q8,q8,q5
415.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
416	veor	q6,q0,q7
417	bhs	.Loop_cbc_enc128
418
419	vst1.8	{q6},[r1]!
420	b	.Lcbc_done
421.align	5
422.Lcbc_dec:
423	vld1.8	{q10},[r0]!
424	subs	r2,r2,#32		@ bias
425	add	r6,r5,#2
426	vorr	q3,q0,q0
427	vorr	q1,q0,q0
428	vorr	q11,q10,q10
429	blo	.Lcbc_dec_tail
430
431	vorr	q1,q10,q10
432	vld1.8	{q10},[r0]!
433	vorr	q2,q0,q0
434	vorr	q3,q1,q1
435	vorr	q11,q10,q10
436
437.Loop3x_cbc_dec:
438.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
439.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
440.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
441.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
442.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
443.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
444	vld1.32	{q8},[r7]!
445	subs	r6,r6,#2
446.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
447.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
448.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
449.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
450.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
451.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
452	vld1.32	{q9},[r7]!
453	bgt	.Loop3x_cbc_dec
454
455.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
456.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
457.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
458.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
459.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
460.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
461	veor	q4,q6,q7
462	subs	r2,r2,#0x30
463	veor	q5,q2,q7
464	movlo	r6,r2			@ r6, r6, is zero at this point
465.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
466.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
467.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
468.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
469.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
470.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
471	veor	q9,q3,q7
472	add	r0,r0,r6		@ r0 is adjusted in such way that
473					@ at exit from the loop q1-q10
474					@ are loaded with last "words"
475	vorr	q6,q11,q11
476	mov	r7,r3
477.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
478.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
479.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
480.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
481.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
482.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
483	vld1.8	{q2},[r0]!
484.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
485.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
486.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
487.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
488.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
489.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
490	vld1.8	{q3},[r0]!
491.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
492.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
493.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
494.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
495.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
496.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
497	vld1.8	{q11},[r0]!
498.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
499.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
500.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
501	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
502	add	r6,r5,#2
503	veor	q4,q4,q0
504	veor	q5,q5,q1
505	veor	q10,q10,q9
506	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
507	vst1.8	{q4},[r1]!
508	vorr	q0,q2,q2
509	vst1.8	{q5},[r1]!
510	vorr	q1,q3,q3
511	vst1.8	{q10},[r1]!
512	vorr	q10,q11,q11
513	bhs	.Loop3x_cbc_dec
514
515	cmn	r2,#0x30
516	beq	.Lcbc_done
517	nop
518
519.Lcbc_dec_tail:
520.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
521.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
522.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
523.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
524	vld1.32	{q8},[r7]!
525	subs	r6,r6,#2
526.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
527.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
528.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
529.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
530	vld1.32	{q9},[r7]!
531	bgt	.Lcbc_dec_tail
532
533.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
534.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
535.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
536.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
537.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
538.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
539.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
540.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
541.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
542.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
543.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
544.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
545	cmn	r2,#0x20
546.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
547.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
548.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
549.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
550	veor	q5,q6,q7
551.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
552.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
553.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
554.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
555	veor	q9,q3,q7
556.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
557.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
558	beq	.Lcbc_dec_one
559	veor	q5,q5,q1
560	veor	q9,q9,q10
561	vorr	q6,q11,q11
562	vst1.8	{q5},[r1]!
563	vst1.8	{q9},[r1]!
564	b	.Lcbc_done
565
566.Lcbc_dec_one:
567	veor	q5,q5,q10
568	vorr	q6,q11,q11
569	vst1.8	{q5},[r1]!
570
571.Lcbc_done:
572	vst1.8	{q6},[r4]
573.Lcbc_abort:
574	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
575	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
576.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
577.globl	aes_hw_ctr32_encrypt_blocks
578.hidden	aes_hw_ctr32_encrypt_blocks
579.type	aes_hw_ctr32_encrypt_blocks,%function
580.align	5
581aes_hw_ctr32_encrypt_blocks:
582	mov	ip,sp
583	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
584	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
585	ldr	r4, [ip]		@ load remaining arg
586	ldr	r5,[r3,#240]
587
588	ldr	r8, [r4, #12]
589	vld1.32	{q0},[r4]
590
591	vld1.32	{q8,q9},[r3]		@ load key schedule...
592	sub	r5,r5,#4
593	mov	r12,#16
594	cmp	r2,#2
595	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
596	sub	r5,r5,#2
597	vld1.32	{q12,q13},[r7]!
598	vld1.32	{q14,q15},[r7]!
599	vld1.32	{q7},[r7]
600	add	r7,r3,#32
601	mov	r6,r5
602	movlo	r12,#0
603#ifndef __ARMEB__
604	rev	r8, r8
605#endif
606	vorr	q1,q0,q0
607	add	r10, r8, #1
608	vorr	q10,q0,q0
609	add	r8, r8, #2
610	vorr	q6,q0,q0
611	rev	r10, r10
612	vmov.32	d3[1],r10
613	bls	.Lctr32_tail
614	rev	r12, r8
615	sub	r2,r2,#3		@ bias
616	vmov.32	d21[1],r12
617	b	.Loop3x_ctr32
618
619.align	4
620.Loop3x_ctr32:
621.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
622.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
623.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
624.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
625.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
626.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
627	vld1.32	{q8},[r7]!
628	subs	r6,r6,#2
629.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
630.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
631.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
632.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
633.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
634.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
635	vld1.32	{q9},[r7]!
636	bgt	.Loop3x_ctr32
637
638.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
639.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
640.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
641.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
642	vld1.8	{q2},[r0]!
643	vorr	q0,q6,q6
644.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
645.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
646	vld1.8	{q3},[r0]!
647	vorr	q1,q6,q6
648.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
649.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
650.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
651.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
652	vld1.8	{q11},[r0]!
653	mov	r7,r3
654.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
655.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
656	vorr	q10,q6,q6
657	add	r9,r8,#1
658.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
659.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
660.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
661.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
662	veor	q2,q2,q7
663	add	r10,r8,#2
664.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
665.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
666	veor	q3,q3,q7
667	add	r8,r8,#3
668.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
669.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
670.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
671.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
672	veor	q11,q11,q7
673	rev	r9,r9
674.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
675.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
676	vmov.32	d1[1], r9
677	rev	r10,r10
678.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
679.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
680.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
681.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
682	vmov.32	d3[1], r10
683	rev	r12,r8
684.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
685.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
686	vmov.32	d21[1], r12
687	subs	r2,r2,#3
688.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
689.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
690.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
691
692	veor	q2,q2,q4
693	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
694	vst1.8	{q2},[r1]!
695	veor	q3,q3,q5
696	mov	r6,r5
697	vst1.8	{q3},[r1]!
698	veor	q11,q11,q9
699	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
700	vst1.8	{q11},[r1]!
701	bhs	.Loop3x_ctr32
702
703	adds	r2,r2,#3
704	beq	.Lctr32_done
705	cmp	r2,#1
706	mov	r12,#16
707	moveq	r12,#0
708
709.Lctr32_tail:
710.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
711.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
712.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
713.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
714	vld1.32	{q8},[r7]!
715	subs	r6,r6,#2
716.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
717.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
718.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
719.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
720	vld1.32	{q9},[r7]!
721	bgt	.Lctr32_tail
722
723.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
724.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
725.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
726.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
727.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
728.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
729.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
730.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
731	vld1.8	{q2},[r0],r12
732.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
733.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
734.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
735.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
736	vld1.8	{q3},[r0]
737.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
738.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
739.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
740.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
741	veor	q2,q2,q7
742.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
743.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
744.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
745.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
746	veor	q3,q3,q7
747.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
748.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
749
750	cmp	r2,#1
751	veor	q2,q2,q0
752	veor	q3,q3,q1
753	vst1.8	{q2},[r1]!
754	beq	.Lctr32_done
755	vst1.8	{q3},[r1]
756
757.Lctr32_done:
758	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
759	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
760.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
761#endif
762#endif
763