1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#include <GFp/arm_arch.h>
13
14#if __ARM_MAX_ARCH__>=7
15.text
16
17.section	__TEXT,__const
18.align	5
19Lrcon:
20.long	0x01,0x01,0x01,0x01
21.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
22.long	0x1b,0x1b,0x1b,0x1b
23
24.text
25
26.globl	_GFp_aes_hw_set_encrypt_key
27.private_extern	_GFp_aes_hw_set_encrypt_key
28
29.align	5
30_GFp_aes_hw_set_encrypt_key:
31Lenc_key:
32	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
33	AARCH64_VALID_CALL_TARGET
34	stp	x29,x30,[sp,#-16]!
35	add	x29,sp,#0
36	mov	x3,#-1
37	cmp	x0,#0
38	b.eq	Lenc_key_abort
39	cmp	x2,#0
40	b.eq	Lenc_key_abort
41	mov	x3,#-2
42	cmp	w1,#128
43	b.lt	Lenc_key_abort
44	cmp	w1,#256
45	b.gt	Lenc_key_abort
46	tst	w1,#0x3f
47	b.ne	Lenc_key_abort
48
49	adrp	x3,Lrcon@PAGE
50	add	x3,x3,Lrcon@PAGEOFF
51	cmp	w1,#192
52
53	eor	v0.16b,v0.16b,v0.16b
54	ld1	{v3.16b},[x0],#16
55	mov	w1,#8		// reuse w1
56	ld1	{v1.4s,v2.4s},[x3],#32
57
58	b.lt	Loop128
59	// 192-bit key support was removed.
60	b	L256
61
62.align	4
63Loop128:
64	tbl	v6.16b,{v3.16b},v2.16b
65	ext	v5.16b,v0.16b,v3.16b,#12
66	st1	{v3.4s},[x2],#16
67	aese	v6.16b,v0.16b
68	subs	w1,w1,#1
69
70	eor	v3.16b,v3.16b,v5.16b
71	ext	v5.16b,v0.16b,v5.16b,#12
72	eor	v3.16b,v3.16b,v5.16b
73	ext	v5.16b,v0.16b,v5.16b,#12
74	eor	v6.16b,v6.16b,v1.16b
75	eor	v3.16b,v3.16b,v5.16b
76	shl	v1.16b,v1.16b,#1
77	eor	v3.16b,v3.16b,v6.16b
78	b.ne	Loop128
79
80	ld1	{v1.4s},[x3]
81
82	tbl	v6.16b,{v3.16b},v2.16b
83	ext	v5.16b,v0.16b,v3.16b,#12
84	st1	{v3.4s},[x2],#16
85	aese	v6.16b,v0.16b
86
87	eor	v3.16b,v3.16b,v5.16b
88	ext	v5.16b,v0.16b,v5.16b,#12
89	eor	v3.16b,v3.16b,v5.16b
90	ext	v5.16b,v0.16b,v5.16b,#12
91	eor	v6.16b,v6.16b,v1.16b
92	eor	v3.16b,v3.16b,v5.16b
93	shl	v1.16b,v1.16b,#1
94	eor	v3.16b,v3.16b,v6.16b
95
96	tbl	v6.16b,{v3.16b},v2.16b
97	ext	v5.16b,v0.16b,v3.16b,#12
98	st1	{v3.4s},[x2],#16
99	aese	v6.16b,v0.16b
100
101	eor	v3.16b,v3.16b,v5.16b
102	ext	v5.16b,v0.16b,v5.16b,#12
103	eor	v3.16b,v3.16b,v5.16b
104	ext	v5.16b,v0.16b,v5.16b,#12
105	eor	v6.16b,v6.16b,v1.16b
106	eor	v3.16b,v3.16b,v5.16b
107	eor	v3.16b,v3.16b,v6.16b
108	st1	{v3.4s},[x2]
109	add	x2,x2,#0x50
110
111	mov	w12,#10
112	b	Ldone
113
114// 192-bit key support was removed.
115
116.align	4
117L256:
118	ld1	{v4.16b},[x0]
119	mov	w1,#7
120	mov	w12,#14
121	st1	{v3.4s},[x2],#16
122
123Loop256:
124	tbl	v6.16b,{v4.16b},v2.16b
125	ext	v5.16b,v0.16b,v3.16b,#12
126	st1	{v4.4s},[x2],#16
127	aese	v6.16b,v0.16b
128	subs	w1,w1,#1
129
130	eor	v3.16b,v3.16b,v5.16b
131	ext	v5.16b,v0.16b,v5.16b,#12
132	eor	v3.16b,v3.16b,v5.16b
133	ext	v5.16b,v0.16b,v5.16b,#12
134	eor	v6.16b,v6.16b,v1.16b
135	eor	v3.16b,v3.16b,v5.16b
136	shl	v1.16b,v1.16b,#1
137	eor	v3.16b,v3.16b,v6.16b
138	st1	{v3.4s},[x2],#16
139	b.eq	Ldone
140
141	dup	v6.4s,v3.s[3]		// just splat
142	ext	v5.16b,v0.16b,v4.16b,#12
143	aese	v6.16b,v0.16b
144
145	eor	v4.16b,v4.16b,v5.16b
146	ext	v5.16b,v0.16b,v5.16b,#12
147	eor	v4.16b,v4.16b,v5.16b
148	ext	v5.16b,v0.16b,v5.16b,#12
149	eor	v4.16b,v4.16b,v5.16b
150
151	eor	v4.16b,v4.16b,v6.16b
152	b	Loop256
153
154Ldone:
155	str	w12,[x2]
156	mov	x3,#0
157
158Lenc_key_abort:
159	mov	x0,x3			// return value
160	ldr	x29,[sp],#16
161	ret
162
163.globl	_GFp_aes_hw_encrypt
164.private_extern	_GFp_aes_hw_encrypt
165
166.align	5
167_GFp_aes_hw_encrypt:
168	AARCH64_VALID_CALL_TARGET
169	ldr	w3,[x2,#240]
170	ld1	{v0.4s},[x2],#16
171	ld1	{v2.16b},[x0]
172	sub	w3,w3,#2
173	ld1	{v1.4s},[x2],#16
174
175Loop_enc:
176	aese	v2.16b,v0.16b
177	aesmc	v2.16b,v2.16b
178	ld1	{v0.4s},[x2],#16
179	subs	w3,w3,#2
180	aese	v2.16b,v1.16b
181	aesmc	v2.16b,v2.16b
182	ld1	{v1.4s},[x2],#16
183	b.gt	Loop_enc
184
185	aese	v2.16b,v0.16b
186	aesmc	v2.16b,v2.16b
187	ld1	{v0.4s},[x2]
188	aese	v2.16b,v1.16b
189	eor	v2.16b,v2.16b,v0.16b
190
191	st1	{v2.16b},[x1]
192	ret
193
194.globl	_GFp_aes_hw_decrypt
195.private_extern	_GFp_aes_hw_decrypt
196
197.align	5
198_GFp_aes_hw_decrypt:
199	AARCH64_VALID_CALL_TARGET
200	ldr	w3,[x2,#240]
201	ld1	{v0.4s},[x2],#16
202	ld1	{v2.16b},[x0]
203	sub	w3,w3,#2
204	ld1	{v1.4s},[x2],#16
205
206Loop_dec:
207	aesd	v2.16b,v0.16b
208	aesimc	v2.16b,v2.16b
209	ld1	{v0.4s},[x2],#16
210	subs	w3,w3,#2
211	aesd	v2.16b,v1.16b
212	aesimc	v2.16b,v2.16b
213	ld1	{v1.4s},[x2],#16
214	b.gt	Loop_dec
215
216	aesd	v2.16b,v0.16b
217	aesimc	v2.16b,v2.16b
218	ld1	{v0.4s},[x2]
219	aesd	v2.16b,v1.16b
220	eor	v2.16b,v2.16b,v0.16b
221
222	st1	{v2.16b},[x1]
223	ret
224
225.globl	_GFp_aes_hw_ctr32_encrypt_blocks
226.private_extern	_GFp_aes_hw_ctr32_encrypt_blocks
227
228.align	5
229_GFp_aes_hw_ctr32_encrypt_blocks:
230	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
231	AARCH64_VALID_CALL_TARGET
232	stp	x29,x30,[sp,#-16]!
233	add	x29,sp,#0
234	ldr	w5,[x3,#240]
235
236	ldr	w8, [x4, #12]
237	ld1	{v0.4s},[x4]
238
239	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
240	sub	w5,w5,#4
241	mov	x12,#16
242	cmp	x2,#2
243	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
244	sub	w5,w5,#2
245	ld1	{v20.4s,v21.4s},[x7],#32
246	ld1	{v22.4s,v23.4s},[x7],#32
247	ld1	{v7.4s},[x7]
248	add	x7,x3,#32
249	mov	w6,w5
250	csel	x12,xzr,x12,lo
251
252	// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
253	// affected by silicon errata #1742098 [0] and #1655431 [1],
254	// respectively, where the second instruction of an aese/aesmc
255	// instruction pair may execute twice if an interrupt is taken right
256	// after the first instruction consumes an input register of which a
257	// single 32-bit lane has been updated the last time it was modified.
258	//
259	// This function uses a counter in one 32-bit lane. The vmov lines
260	// could write to v1.16b and v18.16b directly, but that trips this bugs.
261	// We write to v6.16b and copy to the final register as a workaround.
262	//
263	// [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
264	// [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
265#ifndef __ARMEB__
266	rev	w8, w8
267#endif
268	add	w10, w8, #1
269	orr	v6.16b,v0.16b,v0.16b
270	rev	w10, w10
271	mov	v6.s[3],w10
272	add	w8, w8, #2
273	orr	v1.16b,v6.16b,v6.16b
274	b.ls	Lctr32_tail
275	rev	w12, w8
276	mov	v6.s[3],w12
277	sub	x2,x2,#3		// bias
278	orr	v18.16b,v6.16b,v6.16b
279	b	Loop3x_ctr32
280
281.align	4
282Loop3x_ctr32:
283	aese	v0.16b,v16.16b
284	aesmc	v0.16b,v0.16b
285	aese	v1.16b,v16.16b
286	aesmc	v1.16b,v1.16b
287	aese	v18.16b,v16.16b
288	aesmc	v18.16b,v18.16b
289	ld1	{v16.4s},[x7],#16
290	subs	w6,w6,#2
291	aese	v0.16b,v17.16b
292	aesmc	v0.16b,v0.16b
293	aese	v1.16b,v17.16b
294	aesmc	v1.16b,v1.16b
295	aese	v18.16b,v17.16b
296	aesmc	v18.16b,v18.16b
297	ld1	{v17.4s},[x7],#16
298	b.gt	Loop3x_ctr32
299
300	aese	v0.16b,v16.16b
301	aesmc	v4.16b,v0.16b
302	aese	v1.16b,v16.16b
303	aesmc	v5.16b,v1.16b
304	ld1	{v2.16b},[x0],#16
305	add	w9,w8,#1
306	aese	v18.16b,v16.16b
307	aesmc	v18.16b,v18.16b
308	ld1	{v3.16b},[x0],#16
309	rev	w9,w9
310	aese	v4.16b,v17.16b
311	aesmc	v4.16b,v4.16b
312	aese	v5.16b,v17.16b
313	aesmc	v5.16b,v5.16b
314	ld1	{v19.16b},[x0],#16
315	mov	x7,x3
316	aese	v18.16b,v17.16b
317	aesmc	v17.16b,v18.16b
318	aese	v4.16b,v20.16b
319	aesmc	v4.16b,v4.16b
320	aese	v5.16b,v20.16b
321	aesmc	v5.16b,v5.16b
322	eor	v2.16b,v2.16b,v7.16b
323	add	w10,w8,#2
324	aese	v17.16b,v20.16b
325	aesmc	v17.16b,v17.16b
326	eor	v3.16b,v3.16b,v7.16b
327	add	w8,w8,#3
328	aese	v4.16b,v21.16b
329	aesmc	v4.16b,v4.16b
330	aese	v5.16b,v21.16b
331	aesmc	v5.16b,v5.16b
332	 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
333	 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
334	 // 32-bit mode. See the comment above.
335	eor	v19.16b,v19.16b,v7.16b
336	mov	v6.s[3], w9
337	aese	v17.16b,v21.16b
338	aesmc	v17.16b,v17.16b
339	orr	v0.16b,v6.16b,v6.16b
340	rev	w10,w10
341	aese	v4.16b,v22.16b
342	aesmc	v4.16b,v4.16b
343	mov	v6.s[3], w10
344	rev	w12,w8
345	aese	v5.16b,v22.16b
346	aesmc	v5.16b,v5.16b
347	orr	v1.16b,v6.16b,v6.16b
348	mov	v6.s[3], w12
349	aese	v17.16b,v22.16b
350	aesmc	v17.16b,v17.16b
351	orr	v18.16b,v6.16b,v6.16b
352	subs	x2,x2,#3
353	aese	v4.16b,v23.16b
354	aese	v5.16b,v23.16b
355	aese	v17.16b,v23.16b
356
357	eor	v2.16b,v2.16b,v4.16b
358	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
359	st1	{v2.16b},[x1],#16
360	eor	v3.16b,v3.16b,v5.16b
361	mov	w6,w5
362	st1	{v3.16b},[x1],#16
363	eor	v19.16b,v19.16b,v17.16b
364	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
365	st1	{v19.16b},[x1],#16
366	b.hs	Loop3x_ctr32
367
368	adds	x2,x2,#3
369	b.eq	Lctr32_done
370	cmp	x2,#1
371	mov	x12,#16
372	csel	x12,xzr,x12,eq
373
374Lctr32_tail:
375	aese	v0.16b,v16.16b
376	aesmc	v0.16b,v0.16b
377	aese	v1.16b,v16.16b
378	aesmc	v1.16b,v1.16b
379	ld1	{v16.4s},[x7],#16
380	subs	w6,w6,#2
381	aese	v0.16b,v17.16b
382	aesmc	v0.16b,v0.16b
383	aese	v1.16b,v17.16b
384	aesmc	v1.16b,v1.16b
385	ld1	{v17.4s},[x7],#16
386	b.gt	Lctr32_tail
387
388	aese	v0.16b,v16.16b
389	aesmc	v0.16b,v0.16b
390	aese	v1.16b,v16.16b
391	aesmc	v1.16b,v1.16b
392	aese	v0.16b,v17.16b
393	aesmc	v0.16b,v0.16b
394	aese	v1.16b,v17.16b
395	aesmc	v1.16b,v1.16b
396	ld1	{v2.16b},[x0],x12
397	aese	v0.16b,v20.16b
398	aesmc	v0.16b,v0.16b
399	aese	v1.16b,v20.16b
400	aesmc	v1.16b,v1.16b
401	ld1	{v3.16b},[x0]
402	aese	v0.16b,v21.16b
403	aesmc	v0.16b,v0.16b
404	aese	v1.16b,v21.16b
405	aesmc	v1.16b,v1.16b
406	eor	v2.16b,v2.16b,v7.16b
407	aese	v0.16b,v22.16b
408	aesmc	v0.16b,v0.16b
409	aese	v1.16b,v22.16b
410	aesmc	v1.16b,v1.16b
411	eor	v3.16b,v3.16b,v7.16b
412	aese	v0.16b,v23.16b
413	aese	v1.16b,v23.16b
414
415	cmp	x2,#1
416	eor	v2.16b,v2.16b,v0.16b
417	eor	v3.16b,v3.16b,v1.16b
418	st1	{v2.16b},[x1],#16
419	b.eq	Lctr32_done
420	st1	{v3.16b},[x1]
421
422Lctr32_done:
423	ldr	x29,[sp],#16
424	ret
425
426#endif
427#endif  // !OPENSSL_NO_ASM
428