1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(__arm__)
13#include <GFp/arm_arch.h>
14
15#if __ARM_MAX_ARCH__>=7
16.text
17.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
18.fpu	neon
19.code	32
20#undef	__thumb2__
21.align	5
22.Lrcon:
23.long	0x01,0x01,0x01,0x01
24.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
25.long	0x1b,0x1b,0x1b,0x1b
26
27.text
28
29.globl	GFp_aes_hw_set_encrypt_key
30.hidden	GFp_aes_hw_set_encrypt_key
31.type	GFp_aes_hw_set_encrypt_key,%function
32.align	5
33GFp_aes_hw_set_encrypt_key:
34.Lenc_key:
35	mov	r3,#-1
36	cmp	r0,#0
37	beq	.Lenc_key_abort
38	cmp	r2,#0
39	beq	.Lenc_key_abort
40	mov	r3,#-2
41	cmp	r1,#128
42	blt	.Lenc_key_abort
43	cmp	r1,#256
44	bgt	.Lenc_key_abort
45	tst	r1,#0x3f
46	bne	.Lenc_key_abort
47
48	adr	r3,.Lrcon
49	cmp	r1,#192
50
51	veor	q0,q0,q0
52	vld1.8	{q3},[r0]!
53	mov	r1,#8		@ reuse r1
54	vld1.32	{q1,q2},[r3]!
55
56	blt	.Loop128
57	@ 192-bit key support was removed.
58	b	.L256
59
60.align	4
61.Loop128:
62	vtbl.8	d20,{q3},d4
63	vtbl.8	d21,{q3},d5
64	vext.8	q9,q0,q3,#12
65	vst1.32	{q3},[r2]!
66.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
67	subs	r1,r1,#1
68
69	veor	q3,q3,q9
70	vext.8	q9,q0,q9,#12
71	veor	q3,q3,q9
72	vext.8	q9,q0,q9,#12
73	veor	q10,q10,q1
74	veor	q3,q3,q9
75	vshl.u8	q1,q1,#1
76	veor	q3,q3,q10
77	bne	.Loop128
78
79	vld1.32	{q1},[r3]
80
81	vtbl.8	d20,{q3},d4
82	vtbl.8	d21,{q3},d5
83	vext.8	q9,q0,q3,#12
84	vst1.32	{q3},[r2]!
85.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
86
87	veor	q3,q3,q9
88	vext.8	q9,q0,q9,#12
89	veor	q3,q3,q9
90	vext.8	q9,q0,q9,#12
91	veor	q10,q10,q1
92	veor	q3,q3,q9
93	vshl.u8	q1,q1,#1
94	veor	q3,q3,q10
95
96	vtbl.8	d20,{q3},d4
97	vtbl.8	d21,{q3},d5
98	vext.8	q9,q0,q3,#12
99	vst1.32	{q3},[r2]!
100.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
101
102	veor	q3,q3,q9
103	vext.8	q9,q0,q9,#12
104	veor	q3,q3,q9
105	vext.8	q9,q0,q9,#12
106	veor	q10,q10,q1
107	veor	q3,q3,q9
108	veor	q3,q3,q10
109	vst1.32	{q3},[r2]
110	add	r2,r2,#0x50
111
112	mov	r12,#10
113	b	.Ldone
114
115@ 192-bit key support was removed.
116
117.align	4
118.L256:
119	vld1.8	{q8},[r0]
120	mov	r1,#7
121	mov	r12,#14
122	vst1.32	{q3},[r2]!
123
124.Loop256:
125	vtbl.8	d20,{q8},d4
126	vtbl.8	d21,{q8},d5
127	vext.8	q9,q0,q3,#12
128	vst1.32	{q8},[r2]!
129.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
130	subs	r1,r1,#1
131
132	veor	q3,q3,q9
133	vext.8	q9,q0,q9,#12
134	veor	q3,q3,q9
135	vext.8	q9,q0,q9,#12
136	veor	q10,q10,q1
137	veor	q3,q3,q9
138	vshl.u8	q1,q1,#1
139	veor	q3,q3,q10
140	vst1.32	{q3},[r2]!
141	beq	.Ldone
142
143	vdup.32	q10,d7[1]
144	vext.8	q9,q0,q8,#12
145.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
146
147	veor	q8,q8,q9
148	vext.8	q9,q0,q9,#12
149	veor	q8,q8,q9
150	vext.8	q9,q0,q9,#12
151	veor	q8,q8,q9
152
153	veor	q8,q8,q10
154	b	.Loop256
155
156.Ldone:
157	str	r12,[r2]
158	mov	r3,#0
159
160.Lenc_key_abort:
161	mov	r0,r3			@ return value
162
163	bx	lr
164.size	GFp_aes_hw_set_encrypt_key,.-GFp_aes_hw_set_encrypt_key
165.globl	GFp_aes_hw_encrypt
166.hidden	GFp_aes_hw_encrypt
167.type	GFp_aes_hw_encrypt,%function
168.align	5
169GFp_aes_hw_encrypt:
170	AARCH64_VALID_CALL_TARGET
171	ldr	r3,[r2,#240]
172	vld1.32	{q0},[r2]!
173	vld1.8	{q2},[r0]
174	sub	r3,r3,#2
175	vld1.32	{q1},[r2]!
176
177.Loop_enc:
178.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
179.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
180	vld1.32	{q0},[r2]!
181	subs	r3,r3,#2
182.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
183.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
184	vld1.32	{q1},[r2]!
185	bgt	.Loop_enc
186
187.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
188.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
189	vld1.32	{q0},[r2]
190.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
191	veor	q2,q2,q0
192
193	vst1.8	{q2},[r1]
194	bx	lr
195.size	GFp_aes_hw_encrypt,.-GFp_aes_hw_encrypt
196.globl	GFp_aes_hw_decrypt
197.hidden	GFp_aes_hw_decrypt
198.type	GFp_aes_hw_decrypt,%function
199.align	5
200GFp_aes_hw_decrypt:
201	AARCH64_VALID_CALL_TARGET
202	ldr	r3,[r2,#240]
203	vld1.32	{q0},[r2]!
204	vld1.8	{q2},[r0]
205	sub	r3,r3,#2
206	vld1.32	{q1},[r2]!
207
208.Loop_dec:
209.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
210.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
211	vld1.32	{q0},[r2]!
212	subs	r3,r3,#2
213.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
214.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
215	vld1.32	{q1},[r2]!
216	bgt	.Loop_dec
217
218.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
219.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
220	vld1.32	{q0},[r2]
221.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
222	veor	q2,q2,q0
223
224	vst1.8	{q2},[r1]
225	bx	lr
226.size	GFp_aes_hw_decrypt,.-GFp_aes_hw_decrypt
227.globl	GFp_aes_hw_ctr32_encrypt_blocks
228.hidden	GFp_aes_hw_ctr32_encrypt_blocks
229.type	GFp_aes_hw_ctr32_encrypt_blocks,%function
230.align	5
231GFp_aes_hw_ctr32_encrypt_blocks:
232	mov	ip,sp
233	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
234	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
235	ldr	r4, [ip]		@ load remaining arg
236	ldr	r5,[r3,#240]
237
238	ldr	r8, [r4, #12]
239	vld1.32	{q0},[r4]
240
241	vld1.32	{q8,q9},[r3]		@ load key schedule...
242	sub	r5,r5,#4
243	mov	r12,#16
244	cmp	r2,#2
245	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
246	sub	r5,r5,#2
247	vld1.32	{q12,q13},[r7]!
248	vld1.32	{q14,q15},[r7]!
249	vld1.32	{q7},[r7]
250	add	r7,r3,#32
251	mov	r6,r5
252	movlo	r12,#0
253
254	@ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
255	@ affected by silicon errata #1742098 [0] and #1655431 [1],
256	@ respectively, where the second instruction of an aese/aesmc
257	@ instruction pair may execute twice if an interrupt is taken right
258	@ after the first instruction consumes an input register of which a
259	@ single 32-bit lane has been updated the last time it was modified.
260	@
261	@ This function uses a counter in one 32-bit lane. The
262	@ could write to q1 and q10 directly, but that trips this bugs.
263	@ We write to q6 and copy to the final register as a workaround.
264	@
265	@ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
266	@ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
267#ifndef __ARMEB__
268	rev	r8, r8
269#endif
270	add	r10, r8, #1
271	vorr	q6,q0,q0
272	rev	r10, r10
273	vmov.32	d13[1],r10
274	add	r8, r8, #2
275	vorr	q1,q6,q6
276	bls	.Lctr32_tail
277	rev	r12, r8
278	vmov.32	d13[1],r12
279	sub	r2,r2,#3		@ bias
280	vorr	q10,q6,q6
281	b	.Loop3x_ctr32
282
283.align	4
284.Loop3x_ctr32:
285.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
286.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
287.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
288.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
289.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
290.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
291	vld1.32	{q8},[r7]!
292	subs	r6,r6,#2
293.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
294.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
295.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
296.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
297.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
298.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
299	vld1.32	{q9},[r7]!
300	bgt	.Loop3x_ctr32
301
302.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
303.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
304.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
305.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
306	vld1.8	{q2},[r0]!
307	add	r9,r8,#1
308.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
309.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
310	vld1.8	{q3},[r0]!
311	rev	r9,r9
312.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
313.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
314.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
315.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
316	vld1.8	{q11},[r0]!
317	mov	r7,r3
318.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
319.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
320.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
321.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
322.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
323.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
324	veor	q2,q2,q7
325	add	r10,r8,#2
326.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
327.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
328	veor	q3,q3,q7
329	add	r8,r8,#3
330.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
331.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
332.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
333.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
334	 @ Note the logic to update q0, q1, and q1 is written to work
335	 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
336	 @ 32-bit mode. See the comment above.
337	veor	q11,q11,q7
338	vmov.32	d13[1], r9
339.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
340.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
341	vorr	q0,q6,q6
342	rev	r10,r10
343.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
344.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
345	vmov.32	d13[1], r10
346	rev	r12,r8
347.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
348.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
349	vorr	q1,q6,q6
350	vmov.32	d13[1], r12
351.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
352.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
353	vorr	q10,q6,q6
354	subs	r2,r2,#3
355.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
356.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
357.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
358
359	veor	q2,q2,q4
360	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
361	vst1.8	{q2},[r1]!
362	veor	q3,q3,q5
363	mov	r6,r5
364	vst1.8	{q3},[r1]!
365	veor	q11,q11,q9
366	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
367	vst1.8	{q11},[r1]!
368	bhs	.Loop3x_ctr32
369
370	adds	r2,r2,#3
371	beq	.Lctr32_done
372	cmp	r2,#1
373	mov	r12,#16
374	moveq	r12,#0
375
376.Lctr32_tail:
377.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
378.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
379.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
380.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
381	vld1.32	{q8},[r7]!
382	subs	r6,r6,#2
383.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
384.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
385.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
386.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
387	vld1.32	{q9},[r7]!
388	bgt	.Lctr32_tail
389
390.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
391.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
392.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
393.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
394.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
395.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
396.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
397.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
398	vld1.8	{q2},[r0],r12
399.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
400.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
401.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
402.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
403	vld1.8	{q3},[r0]
404.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
405.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
406.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
407.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
408	veor	q2,q2,q7
409.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
410.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
411.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
412.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
413	veor	q3,q3,q7
414.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
415.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
416
417	cmp	r2,#1
418	veor	q2,q2,q0
419	veor	q3,q3,q1
420	vst1.8	{q2},[r1]!
421	beq	.Lctr32_done
422	vst1.8	{q3},[r1]
423
424.Lctr32_done:
425	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
426	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
427.size	GFp_aes_hw_ctr32_encrypt_blocks,.-GFp_aes_hw_ctr32_encrypt_blocks
428#endif
429#endif
430#endif  // !OPENSSL_NO_ASM
431.section	.note.GNU-stack,"",%progbits
432