1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if !defined(OPENSSL_NO_ASM)
11#if defined(__aarch64__)
12#if defined(BORINGSSL_PREFIX)
13#include <boringssl_prefix_symbols_asm.h>
14#endif
15// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
16//
17// Licensed under the OpenSSL license (the "License").  You may not use
18// this file except in compliance with the License.  You can obtain a copy
19// in the file LICENSE in the source distribution or at
20// https://www.openssl.org/source/license.html
21
22// ====================================================================
23// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
24// project. The module is, however, dual licensed under OpenSSL and
25// CRYPTOGAMS licenses depending on where you obtain it. For further
26// details see http://www.openssl.org/~appro/cryptogams/.
27//
28// Permission to use under GPLv2 terms is granted.
29// ====================================================================
30//
31// SHA256/512 for ARMv8.
32//
33// Performance in cycles per processed byte and improvement coefficient
34// over code generated with "default" compiler:
35//
36//		SHA256-hw	SHA256(*)	SHA512
37// Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
38// Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
39// Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
40// Denver	2.01		10.5 (+26%)	6.70 (+8%)
41// X-Gene			20.0 (+100%)	12.8 (+300%(***))
42// Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
43//
44// (*)	Software SHA256 results are of lesser relevance, presented
45//	mostly for informational purposes.
46// (**)	The result is a trade-off: it's possible to improve it by
47//	10% (or by 1 cycle per round), but at the cost of 20% loss
48//	on Cortex-A53 (or by 4 cycles per round).
49// (***)	Super-impressive coefficients over gcc-generated code are
50//	indication of some compiler "pathology", most notably code
51//	generated with -mgeneral-regs-only is significanty faster
52//	and the gap is only 40-90%.
53
54#ifndef	__KERNEL__
55# include <openssl/arm_arch.h>
56#endif
57
58.text
59
60
61.globl	sha512_block_data_order
62.hidden	sha512_block_data_order
63.type	sha512_block_data_order,%function
64.align	6
65sha512_block_data_order:
66	stp	x29,x30,[sp,#-128]!
67	add	x29,sp,#0
68
69	stp	x19,x20,[sp,#16]
70	stp	x21,x22,[sp,#32]
71	stp	x23,x24,[sp,#48]
72	stp	x25,x26,[sp,#64]
73	stp	x27,x28,[sp,#80]
74	sub	sp,sp,#4*8
75
76	ldp	x20,x21,[x0]				// load context
77	ldp	x22,x23,[x0,#2*8]
78	ldp	x24,x25,[x0,#4*8]
79	add	x2,x1,x2,lsl#7	// end of input
80	ldp	x26,x27,[x0,#6*8]
81	adrp	x30,.LK512
82	add	x30,x30,:lo12:.LK512
83	stp	x0,x2,[x29,#96]
84
85.Loop:
86	ldp	x3,x4,[x1],#2*8
87	ldr	x19,[x30],#8			// *K++
88	eor	x28,x21,x22				// magic seed
89	str	x1,[x29,#112]
90#ifndef	__ARMEB__
91	rev	x3,x3			// 0
92#endif
93	ror	x16,x24,#14
94	add	x27,x27,x19			// h+=K[i]
95	eor	x6,x24,x24,ror#23
96	and	x17,x25,x24
97	bic	x19,x26,x24
98	add	x27,x27,x3			// h+=X[i]
99	orr	x17,x17,x19			// Ch(e,f,g)
100	eor	x19,x20,x21			// a^b, b^c in next round
101	eor	x16,x16,x6,ror#18	// Sigma1(e)
102	ror	x6,x20,#28
103	add	x27,x27,x17			// h+=Ch(e,f,g)
104	eor	x17,x20,x20,ror#5
105	add	x27,x27,x16			// h+=Sigma1(e)
106	and	x28,x28,x19			// (b^c)&=(a^b)
107	add	x23,x23,x27			// d+=h
108	eor	x28,x28,x21			// Maj(a,b,c)
109	eor	x17,x6,x17,ror#34	// Sigma0(a)
110	add	x27,x27,x28			// h+=Maj(a,b,c)
111	ldr	x28,[x30],#8		// *K++, x19 in next round
112	//add	x27,x27,x17			// h+=Sigma0(a)
113#ifndef	__ARMEB__
114	rev	x4,x4			// 1
115#endif
116	ldp	x5,x6,[x1],#2*8
117	add	x27,x27,x17			// h+=Sigma0(a)
118	ror	x16,x23,#14
119	add	x26,x26,x28			// h+=K[i]
120	eor	x7,x23,x23,ror#23
121	and	x17,x24,x23
122	bic	x28,x25,x23
123	add	x26,x26,x4			// h+=X[i]
124	orr	x17,x17,x28			// Ch(e,f,g)
125	eor	x28,x27,x20			// a^b, b^c in next round
126	eor	x16,x16,x7,ror#18	// Sigma1(e)
127	ror	x7,x27,#28
128	add	x26,x26,x17			// h+=Ch(e,f,g)
129	eor	x17,x27,x27,ror#5
130	add	x26,x26,x16			// h+=Sigma1(e)
131	and	x19,x19,x28			// (b^c)&=(a^b)
132	add	x22,x22,x26			// d+=h
133	eor	x19,x19,x20			// Maj(a,b,c)
134	eor	x17,x7,x17,ror#34	// Sigma0(a)
135	add	x26,x26,x19			// h+=Maj(a,b,c)
136	ldr	x19,[x30],#8		// *K++, x28 in next round
137	//add	x26,x26,x17			// h+=Sigma0(a)
138#ifndef	__ARMEB__
139	rev	x5,x5			// 2
140#endif
141	add	x26,x26,x17			// h+=Sigma0(a)
142	ror	x16,x22,#14
143	add	x25,x25,x19			// h+=K[i]
144	eor	x8,x22,x22,ror#23
145	and	x17,x23,x22
146	bic	x19,x24,x22
147	add	x25,x25,x5			// h+=X[i]
148	orr	x17,x17,x19			// Ch(e,f,g)
149	eor	x19,x26,x27			// a^b, b^c in next round
150	eor	x16,x16,x8,ror#18	// Sigma1(e)
151	ror	x8,x26,#28
152	add	x25,x25,x17			// h+=Ch(e,f,g)
153	eor	x17,x26,x26,ror#5
154	add	x25,x25,x16			// h+=Sigma1(e)
155	and	x28,x28,x19			// (b^c)&=(a^b)
156	add	x21,x21,x25			// d+=h
157	eor	x28,x28,x27			// Maj(a,b,c)
158	eor	x17,x8,x17,ror#34	// Sigma0(a)
159	add	x25,x25,x28			// h+=Maj(a,b,c)
160	ldr	x28,[x30],#8		// *K++, x19 in next round
161	//add	x25,x25,x17			// h+=Sigma0(a)
162#ifndef	__ARMEB__
163	rev	x6,x6			// 3
164#endif
165	ldp	x7,x8,[x1],#2*8
166	add	x25,x25,x17			// h+=Sigma0(a)
167	ror	x16,x21,#14
168	add	x24,x24,x28			// h+=K[i]
169	eor	x9,x21,x21,ror#23
170	and	x17,x22,x21
171	bic	x28,x23,x21
172	add	x24,x24,x6			// h+=X[i]
173	orr	x17,x17,x28			// Ch(e,f,g)
174	eor	x28,x25,x26			// a^b, b^c in next round
175	eor	x16,x16,x9,ror#18	// Sigma1(e)
176	ror	x9,x25,#28
177	add	x24,x24,x17			// h+=Ch(e,f,g)
178	eor	x17,x25,x25,ror#5
179	add	x24,x24,x16			// h+=Sigma1(e)
180	and	x19,x19,x28			// (b^c)&=(a^b)
181	add	x20,x20,x24			// d+=h
182	eor	x19,x19,x26			// Maj(a,b,c)
183	eor	x17,x9,x17,ror#34	// Sigma0(a)
184	add	x24,x24,x19			// h+=Maj(a,b,c)
185	ldr	x19,[x30],#8		// *K++, x28 in next round
186	//add	x24,x24,x17			// h+=Sigma0(a)
187#ifndef	__ARMEB__
188	rev	x7,x7			// 4
189#endif
190	add	x24,x24,x17			// h+=Sigma0(a)
191	ror	x16,x20,#14
192	add	x23,x23,x19			// h+=K[i]
193	eor	x10,x20,x20,ror#23
194	and	x17,x21,x20
195	bic	x19,x22,x20
196	add	x23,x23,x7			// h+=X[i]
197	orr	x17,x17,x19			// Ch(e,f,g)
198	eor	x19,x24,x25			// a^b, b^c in next round
199	eor	x16,x16,x10,ror#18	// Sigma1(e)
200	ror	x10,x24,#28
201	add	x23,x23,x17			// h+=Ch(e,f,g)
202	eor	x17,x24,x24,ror#5
203	add	x23,x23,x16			// h+=Sigma1(e)
204	and	x28,x28,x19			// (b^c)&=(a^b)
205	add	x27,x27,x23			// d+=h
206	eor	x28,x28,x25			// Maj(a,b,c)
207	eor	x17,x10,x17,ror#34	// Sigma0(a)
208	add	x23,x23,x28			// h+=Maj(a,b,c)
209	ldr	x28,[x30],#8		// *K++, x19 in next round
210	//add	x23,x23,x17			// h+=Sigma0(a)
211#ifndef	__ARMEB__
212	rev	x8,x8			// 5
213#endif
214	ldp	x9,x10,[x1],#2*8
215	add	x23,x23,x17			// h+=Sigma0(a)
216	ror	x16,x27,#14
217	add	x22,x22,x28			// h+=K[i]
218	eor	x11,x27,x27,ror#23
219	and	x17,x20,x27
220	bic	x28,x21,x27
221	add	x22,x22,x8			// h+=X[i]
222	orr	x17,x17,x28			// Ch(e,f,g)
223	eor	x28,x23,x24			// a^b, b^c in next round
224	eor	x16,x16,x11,ror#18	// Sigma1(e)
225	ror	x11,x23,#28
226	add	x22,x22,x17			// h+=Ch(e,f,g)
227	eor	x17,x23,x23,ror#5
228	add	x22,x22,x16			// h+=Sigma1(e)
229	and	x19,x19,x28			// (b^c)&=(a^b)
230	add	x26,x26,x22			// d+=h
231	eor	x19,x19,x24			// Maj(a,b,c)
232	eor	x17,x11,x17,ror#34	// Sigma0(a)
233	add	x22,x22,x19			// h+=Maj(a,b,c)
234	ldr	x19,[x30],#8		// *K++, x28 in next round
235	//add	x22,x22,x17			// h+=Sigma0(a)
236#ifndef	__ARMEB__
237	rev	x9,x9			// 6
238#endif
239	add	x22,x22,x17			// h+=Sigma0(a)
240	ror	x16,x26,#14
241	add	x21,x21,x19			// h+=K[i]
242	eor	x12,x26,x26,ror#23
243	and	x17,x27,x26
244	bic	x19,x20,x26
245	add	x21,x21,x9			// h+=X[i]
246	orr	x17,x17,x19			// Ch(e,f,g)
247	eor	x19,x22,x23			// a^b, b^c in next round
248	eor	x16,x16,x12,ror#18	// Sigma1(e)
249	ror	x12,x22,#28
250	add	x21,x21,x17			// h+=Ch(e,f,g)
251	eor	x17,x22,x22,ror#5
252	add	x21,x21,x16			// h+=Sigma1(e)
253	and	x28,x28,x19			// (b^c)&=(a^b)
254	add	x25,x25,x21			// d+=h
255	eor	x28,x28,x23			// Maj(a,b,c)
256	eor	x17,x12,x17,ror#34	// Sigma0(a)
257	add	x21,x21,x28			// h+=Maj(a,b,c)
258	ldr	x28,[x30],#8		// *K++, x19 in next round
259	//add	x21,x21,x17			// h+=Sigma0(a)
260#ifndef	__ARMEB__
261	rev	x10,x10			// 7
262#endif
263	ldp	x11,x12,[x1],#2*8
264	add	x21,x21,x17			// h+=Sigma0(a)
265	ror	x16,x25,#14
266	add	x20,x20,x28			// h+=K[i]
267	eor	x13,x25,x25,ror#23
268	and	x17,x26,x25
269	bic	x28,x27,x25
270	add	x20,x20,x10			// h+=X[i]
271	orr	x17,x17,x28			// Ch(e,f,g)
272	eor	x28,x21,x22			// a^b, b^c in next round
273	eor	x16,x16,x13,ror#18	// Sigma1(e)
274	ror	x13,x21,#28
275	add	x20,x20,x17			// h+=Ch(e,f,g)
276	eor	x17,x21,x21,ror#5
277	add	x20,x20,x16			// h+=Sigma1(e)
278	and	x19,x19,x28			// (b^c)&=(a^b)
279	add	x24,x24,x20			// d+=h
280	eor	x19,x19,x22			// Maj(a,b,c)
281	eor	x17,x13,x17,ror#34	// Sigma0(a)
282	add	x20,x20,x19			// h+=Maj(a,b,c)
283	ldr	x19,[x30],#8		// *K++, x28 in next round
284	//add	x20,x20,x17			// h+=Sigma0(a)
285#ifndef	__ARMEB__
286	rev	x11,x11			// 8
287#endif
288	add	x20,x20,x17			// h+=Sigma0(a)
289	ror	x16,x24,#14
290	add	x27,x27,x19			// h+=K[i]
291	eor	x14,x24,x24,ror#23
292	and	x17,x25,x24
293	bic	x19,x26,x24
294	add	x27,x27,x11			// h+=X[i]
295	orr	x17,x17,x19			// Ch(e,f,g)
296	eor	x19,x20,x21			// a^b, b^c in next round
297	eor	x16,x16,x14,ror#18	// Sigma1(e)
298	ror	x14,x20,#28
299	add	x27,x27,x17			// h+=Ch(e,f,g)
300	eor	x17,x20,x20,ror#5
301	add	x27,x27,x16			// h+=Sigma1(e)
302	and	x28,x28,x19			// (b^c)&=(a^b)
303	add	x23,x23,x27			// d+=h
304	eor	x28,x28,x21			// Maj(a,b,c)
305	eor	x17,x14,x17,ror#34	// Sigma0(a)
306	add	x27,x27,x28			// h+=Maj(a,b,c)
307	ldr	x28,[x30],#8		// *K++, x19 in next round
308	//add	x27,x27,x17			// h+=Sigma0(a)
309#ifndef	__ARMEB__
310	rev	x12,x12			// 9
311#endif
312	ldp	x13,x14,[x1],#2*8
313	add	x27,x27,x17			// h+=Sigma0(a)
314	ror	x16,x23,#14
315	add	x26,x26,x28			// h+=K[i]
316	eor	x15,x23,x23,ror#23
317	and	x17,x24,x23
318	bic	x28,x25,x23
319	add	x26,x26,x12			// h+=X[i]
320	orr	x17,x17,x28			// Ch(e,f,g)
321	eor	x28,x27,x20			// a^b, b^c in next round
322	eor	x16,x16,x15,ror#18	// Sigma1(e)
323	ror	x15,x27,#28
324	add	x26,x26,x17			// h+=Ch(e,f,g)
325	eor	x17,x27,x27,ror#5
326	add	x26,x26,x16			// h+=Sigma1(e)
327	and	x19,x19,x28			// (b^c)&=(a^b)
328	add	x22,x22,x26			// d+=h
329	eor	x19,x19,x20			// Maj(a,b,c)
330	eor	x17,x15,x17,ror#34	// Sigma0(a)
331	add	x26,x26,x19			// h+=Maj(a,b,c)
332	ldr	x19,[x30],#8		// *K++, x28 in next round
333	//add	x26,x26,x17			// h+=Sigma0(a)
334#ifndef	__ARMEB__
335	rev	x13,x13			// 10
336#endif
337	add	x26,x26,x17			// h+=Sigma0(a)
338	ror	x16,x22,#14
339	add	x25,x25,x19			// h+=K[i]
340	eor	x0,x22,x22,ror#23
341	and	x17,x23,x22
342	bic	x19,x24,x22
343	add	x25,x25,x13			// h+=X[i]
344	orr	x17,x17,x19			// Ch(e,f,g)
345	eor	x19,x26,x27			// a^b, b^c in next round
346	eor	x16,x16,x0,ror#18	// Sigma1(e)
347	ror	x0,x26,#28
348	add	x25,x25,x17			// h+=Ch(e,f,g)
349	eor	x17,x26,x26,ror#5
350	add	x25,x25,x16			// h+=Sigma1(e)
351	and	x28,x28,x19			// (b^c)&=(a^b)
352	add	x21,x21,x25			// d+=h
353	eor	x28,x28,x27			// Maj(a,b,c)
354	eor	x17,x0,x17,ror#34	// Sigma0(a)
355	add	x25,x25,x28			// h+=Maj(a,b,c)
356	ldr	x28,[x30],#8		// *K++, x19 in next round
357	//add	x25,x25,x17			// h+=Sigma0(a)
358#ifndef	__ARMEB__
359	rev	x14,x14			// 11
360#endif
361	ldp	x15,x0,[x1],#2*8
362	add	x25,x25,x17			// h+=Sigma0(a)
363	str	x6,[sp,#24]
364	ror	x16,x21,#14
365	add	x24,x24,x28			// h+=K[i]
366	eor	x6,x21,x21,ror#23
367	and	x17,x22,x21
368	bic	x28,x23,x21
369	add	x24,x24,x14			// h+=X[i]
370	orr	x17,x17,x28			// Ch(e,f,g)
371	eor	x28,x25,x26			// a^b, b^c in next round
372	eor	x16,x16,x6,ror#18	// Sigma1(e)
373	ror	x6,x25,#28
374	add	x24,x24,x17			// h+=Ch(e,f,g)
375	eor	x17,x25,x25,ror#5
376	add	x24,x24,x16			// h+=Sigma1(e)
377	and	x19,x19,x28			// (b^c)&=(a^b)
378	add	x20,x20,x24			// d+=h
379	eor	x19,x19,x26			// Maj(a,b,c)
380	eor	x17,x6,x17,ror#34	// Sigma0(a)
381	add	x24,x24,x19			// h+=Maj(a,b,c)
382	ldr	x19,[x30],#8		// *K++, x28 in next round
383	//add	x24,x24,x17			// h+=Sigma0(a)
384#ifndef	__ARMEB__
385	rev	x15,x15			// 12
386#endif
387	add	x24,x24,x17			// h+=Sigma0(a)
388	str	x7,[sp,#0]
389	ror	x16,x20,#14
390	add	x23,x23,x19			// h+=K[i]
391	eor	x7,x20,x20,ror#23
392	and	x17,x21,x20
393	bic	x19,x22,x20
394	add	x23,x23,x15			// h+=X[i]
395	orr	x17,x17,x19			// Ch(e,f,g)
396	eor	x19,x24,x25			// a^b, b^c in next round
397	eor	x16,x16,x7,ror#18	// Sigma1(e)
398	ror	x7,x24,#28
399	add	x23,x23,x17			// h+=Ch(e,f,g)
400	eor	x17,x24,x24,ror#5
401	add	x23,x23,x16			// h+=Sigma1(e)
402	and	x28,x28,x19			// (b^c)&=(a^b)
403	add	x27,x27,x23			// d+=h
404	eor	x28,x28,x25			// Maj(a,b,c)
405	eor	x17,x7,x17,ror#34	// Sigma0(a)
406	add	x23,x23,x28			// h+=Maj(a,b,c)
407	ldr	x28,[x30],#8		// *K++, x19 in next round
408	//add	x23,x23,x17			// h+=Sigma0(a)
409#ifndef	__ARMEB__
410	rev	x0,x0			// 13
411#endif
412	ldp	x1,x2,[x1]
413	add	x23,x23,x17			// h+=Sigma0(a)
414	str	x8,[sp,#8]
415	ror	x16,x27,#14
416	add	x22,x22,x28			// h+=K[i]
417	eor	x8,x27,x27,ror#23
418	and	x17,x20,x27
419	bic	x28,x21,x27
420	add	x22,x22,x0			// h+=X[i]
421	orr	x17,x17,x28			// Ch(e,f,g)
422	eor	x28,x23,x24			// a^b, b^c in next round
423	eor	x16,x16,x8,ror#18	// Sigma1(e)
424	ror	x8,x23,#28
425	add	x22,x22,x17			// h+=Ch(e,f,g)
426	eor	x17,x23,x23,ror#5
427	add	x22,x22,x16			// h+=Sigma1(e)
428	and	x19,x19,x28			// (b^c)&=(a^b)
429	add	x26,x26,x22			// d+=h
430	eor	x19,x19,x24			// Maj(a,b,c)
431	eor	x17,x8,x17,ror#34	// Sigma0(a)
432	add	x22,x22,x19			// h+=Maj(a,b,c)
433	ldr	x19,[x30],#8		// *K++, x28 in next round
434	//add	x22,x22,x17			// h+=Sigma0(a)
435#ifndef	__ARMEB__
436	rev	x1,x1			// 14
437#endif
438	ldr	x6,[sp,#24]
439	add	x22,x22,x17			// h+=Sigma0(a)
440	str	x9,[sp,#16]
441	ror	x16,x26,#14
442	add	x21,x21,x19			// h+=K[i]
443	eor	x9,x26,x26,ror#23
444	and	x17,x27,x26
445	bic	x19,x20,x26
446	add	x21,x21,x1			// h+=X[i]
447	orr	x17,x17,x19			// Ch(e,f,g)
448	eor	x19,x22,x23			// a^b, b^c in next round
449	eor	x16,x16,x9,ror#18	// Sigma1(e)
450	ror	x9,x22,#28
451	add	x21,x21,x17			// h+=Ch(e,f,g)
452	eor	x17,x22,x22,ror#5
453	add	x21,x21,x16			// h+=Sigma1(e)
454	and	x28,x28,x19			// (b^c)&=(a^b)
455	add	x25,x25,x21			// d+=h
456	eor	x28,x28,x23			// Maj(a,b,c)
457	eor	x17,x9,x17,ror#34	// Sigma0(a)
458	add	x21,x21,x28			// h+=Maj(a,b,c)
459	ldr	x28,[x30],#8		// *K++, x19 in next round
460	//add	x21,x21,x17			// h+=Sigma0(a)
461#ifndef	__ARMEB__
462	rev	x2,x2			// 15
463#endif
464	ldr	x7,[sp,#0]
465	add	x21,x21,x17			// h+=Sigma0(a)
466	str	x10,[sp,#24]
467	ror	x16,x25,#14
468	add	x20,x20,x28			// h+=K[i]
469	ror	x9,x4,#1
470	and	x17,x26,x25
471	ror	x8,x1,#19
472	bic	x28,x27,x25
473	ror	x10,x21,#28
474	add	x20,x20,x2			// h+=X[i]
475	eor	x16,x16,x25,ror#18
476	eor	x9,x9,x4,ror#8
477	orr	x17,x17,x28			// Ch(e,f,g)
478	eor	x28,x21,x22			// a^b, b^c in next round
479	eor	x16,x16,x25,ror#41	// Sigma1(e)
480	eor	x10,x10,x21,ror#34
481	add	x20,x20,x17			// h+=Ch(e,f,g)
482	and	x19,x19,x28			// (b^c)&=(a^b)
483	eor	x8,x8,x1,ror#61
484	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
485	add	x20,x20,x16			// h+=Sigma1(e)
486	eor	x19,x19,x22			// Maj(a,b,c)
487	eor	x17,x10,x21,ror#39	// Sigma0(a)
488	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
489	add	x3,x3,x12
490	add	x24,x24,x20			// d+=h
491	add	x20,x20,x19			// h+=Maj(a,b,c)
492	ldr	x19,[x30],#8		// *K++, x28 in next round
493	add	x3,x3,x9
494	add	x20,x20,x17			// h+=Sigma0(a)
495	add	x3,x3,x8
496.Loop_16_xx:
497	ldr	x8,[sp,#8]
498	str	x11,[sp,#0]
499	ror	x16,x24,#14
500	add	x27,x27,x19			// h+=K[i]
501	ror	x10,x5,#1
502	and	x17,x25,x24
503	ror	x9,x2,#19
504	bic	x19,x26,x24
505	ror	x11,x20,#28
506	add	x27,x27,x3			// h+=X[i]
507	eor	x16,x16,x24,ror#18
508	eor	x10,x10,x5,ror#8
509	orr	x17,x17,x19			// Ch(e,f,g)
510	eor	x19,x20,x21			// a^b, b^c in next round
511	eor	x16,x16,x24,ror#41	// Sigma1(e)
512	eor	x11,x11,x20,ror#34
513	add	x27,x27,x17			// h+=Ch(e,f,g)
514	and	x28,x28,x19			// (b^c)&=(a^b)
515	eor	x9,x9,x2,ror#61
516	eor	x10,x10,x5,lsr#7	// sigma0(X[i+1])
517	add	x27,x27,x16			// h+=Sigma1(e)
518	eor	x28,x28,x21			// Maj(a,b,c)
519	eor	x17,x11,x20,ror#39	// Sigma0(a)
520	eor	x9,x9,x2,lsr#6	// sigma1(X[i+14])
521	add	x4,x4,x13
522	add	x23,x23,x27			// d+=h
523	add	x27,x27,x28			// h+=Maj(a,b,c)
524	ldr	x28,[x30],#8		// *K++, x19 in next round
525	add	x4,x4,x10
526	add	x27,x27,x17			// h+=Sigma0(a)
527	add	x4,x4,x9
528	ldr	x9,[sp,#16]
529	str	x12,[sp,#8]
530	ror	x16,x23,#14
531	add	x26,x26,x28			// h+=K[i]
532	ror	x11,x6,#1
533	and	x17,x24,x23
534	ror	x10,x3,#19
535	bic	x28,x25,x23
536	ror	x12,x27,#28
537	add	x26,x26,x4			// h+=X[i]
538	eor	x16,x16,x23,ror#18
539	eor	x11,x11,x6,ror#8
540	orr	x17,x17,x28			// Ch(e,f,g)
541	eor	x28,x27,x20			// a^b, b^c in next round
542	eor	x16,x16,x23,ror#41	// Sigma1(e)
543	eor	x12,x12,x27,ror#34
544	add	x26,x26,x17			// h+=Ch(e,f,g)
545	and	x19,x19,x28			// (b^c)&=(a^b)
546	eor	x10,x10,x3,ror#61
547	eor	x11,x11,x6,lsr#7	// sigma0(X[i+1])
548	add	x26,x26,x16			// h+=Sigma1(e)
549	eor	x19,x19,x20			// Maj(a,b,c)
550	eor	x17,x12,x27,ror#39	// Sigma0(a)
551	eor	x10,x10,x3,lsr#6	// sigma1(X[i+14])
552	add	x5,x5,x14
553	add	x22,x22,x26			// d+=h
554	add	x26,x26,x19			// h+=Maj(a,b,c)
555	ldr	x19,[x30],#8		// *K++, x28 in next round
556	add	x5,x5,x11
557	add	x26,x26,x17			// h+=Sigma0(a)
558	add	x5,x5,x10
559	ldr	x10,[sp,#24]
560	str	x13,[sp,#16]
561	ror	x16,x22,#14
562	add	x25,x25,x19			// h+=K[i]
563	ror	x12,x7,#1
564	and	x17,x23,x22
565	ror	x11,x4,#19
566	bic	x19,x24,x22
567	ror	x13,x26,#28
568	add	x25,x25,x5			// h+=X[i]
569	eor	x16,x16,x22,ror#18
570	eor	x12,x12,x7,ror#8
571	orr	x17,x17,x19			// Ch(e,f,g)
572	eor	x19,x26,x27			// a^b, b^c in next round
573	eor	x16,x16,x22,ror#41	// Sigma1(e)
574	eor	x13,x13,x26,ror#34
575	add	x25,x25,x17			// h+=Ch(e,f,g)
576	and	x28,x28,x19			// (b^c)&=(a^b)
577	eor	x11,x11,x4,ror#61
578	eor	x12,x12,x7,lsr#7	// sigma0(X[i+1])
579	add	x25,x25,x16			// h+=Sigma1(e)
580	eor	x28,x28,x27			// Maj(a,b,c)
581	eor	x17,x13,x26,ror#39	// Sigma0(a)
582	eor	x11,x11,x4,lsr#6	// sigma1(X[i+14])
583	add	x6,x6,x15
584	add	x21,x21,x25			// d+=h
585	add	x25,x25,x28			// h+=Maj(a,b,c)
586	ldr	x28,[x30],#8		// *K++, x19 in next round
587	add	x6,x6,x12
588	add	x25,x25,x17			// h+=Sigma0(a)
589	add	x6,x6,x11
590	ldr	x11,[sp,#0]
591	str	x14,[sp,#24]
592	ror	x16,x21,#14
593	add	x24,x24,x28			// h+=K[i]
594	ror	x13,x8,#1
595	and	x17,x22,x21
596	ror	x12,x5,#19
597	bic	x28,x23,x21
598	ror	x14,x25,#28
599	add	x24,x24,x6			// h+=X[i]
600	eor	x16,x16,x21,ror#18
601	eor	x13,x13,x8,ror#8
602	orr	x17,x17,x28			// Ch(e,f,g)
603	eor	x28,x25,x26			// a^b, b^c in next round
604	eor	x16,x16,x21,ror#41	// Sigma1(e)
605	eor	x14,x14,x25,ror#34
606	add	x24,x24,x17			// h+=Ch(e,f,g)
607	and	x19,x19,x28			// (b^c)&=(a^b)
608	eor	x12,x12,x5,ror#61
609	eor	x13,x13,x8,lsr#7	// sigma0(X[i+1])
610	add	x24,x24,x16			// h+=Sigma1(e)
611	eor	x19,x19,x26			// Maj(a,b,c)
612	eor	x17,x14,x25,ror#39	// Sigma0(a)
613	eor	x12,x12,x5,lsr#6	// sigma1(X[i+14])
614	add	x7,x7,x0
615	add	x20,x20,x24			// d+=h
616	add	x24,x24,x19			// h+=Maj(a,b,c)
617	ldr	x19,[x30],#8		// *K++, x28 in next round
618	add	x7,x7,x13
619	add	x24,x24,x17			// h+=Sigma0(a)
620	add	x7,x7,x12
621	ldr	x12,[sp,#8]
622	str	x15,[sp,#0]
623	ror	x16,x20,#14
624	add	x23,x23,x19			// h+=K[i]
625	ror	x14,x9,#1
626	and	x17,x21,x20
627	ror	x13,x6,#19
628	bic	x19,x22,x20
629	ror	x15,x24,#28
630	add	x23,x23,x7			// h+=X[i]
631	eor	x16,x16,x20,ror#18
632	eor	x14,x14,x9,ror#8
633	orr	x17,x17,x19			// Ch(e,f,g)
634	eor	x19,x24,x25			// a^b, b^c in next round
635	eor	x16,x16,x20,ror#41	// Sigma1(e)
636	eor	x15,x15,x24,ror#34
637	add	x23,x23,x17			// h+=Ch(e,f,g)
638	and	x28,x28,x19			// (b^c)&=(a^b)
639	eor	x13,x13,x6,ror#61
640	eor	x14,x14,x9,lsr#7	// sigma0(X[i+1])
641	add	x23,x23,x16			// h+=Sigma1(e)
642	eor	x28,x28,x25			// Maj(a,b,c)
643	eor	x17,x15,x24,ror#39	// Sigma0(a)
644	eor	x13,x13,x6,lsr#6	// sigma1(X[i+14])
645	add	x8,x8,x1
646	add	x27,x27,x23			// d+=h
647	add	x23,x23,x28			// h+=Maj(a,b,c)
648	ldr	x28,[x30],#8		// *K++, x19 in next round
649	add	x8,x8,x14
650	add	x23,x23,x17			// h+=Sigma0(a)
651	add	x8,x8,x13
652	ldr	x13,[sp,#16]
653	str	x0,[sp,#8]
654	ror	x16,x27,#14
655	add	x22,x22,x28			// h+=K[i]
656	ror	x15,x10,#1
657	and	x17,x20,x27
658	ror	x14,x7,#19
659	bic	x28,x21,x27
660	ror	x0,x23,#28
661	add	x22,x22,x8			// h+=X[i]
662	eor	x16,x16,x27,ror#18
663	eor	x15,x15,x10,ror#8
664	orr	x17,x17,x28			// Ch(e,f,g)
665	eor	x28,x23,x24			// a^b, b^c in next round
666	eor	x16,x16,x27,ror#41	// Sigma1(e)
667	eor	x0,x0,x23,ror#34
668	add	x22,x22,x17			// h+=Ch(e,f,g)
669	and	x19,x19,x28			// (b^c)&=(a^b)
670	eor	x14,x14,x7,ror#61
671	eor	x15,x15,x10,lsr#7	// sigma0(X[i+1])
672	add	x22,x22,x16			// h+=Sigma1(e)
673	eor	x19,x19,x24			// Maj(a,b,c)
674	eor	x17,x0,x23,ror#39	// Sigma0(a)
675	eor	x14,x14,x7,lsr#6	// sigma1(X[i+14])
676	add	x9,x9,x2
677	add	x26,x26,x22			// d+=h
678	add	x22,x22,x19			// h+=Maj(a,b,c)
679	ldr	x19,[x30],#8		// *K++, x28 in next round
680	add	x9,x9,x15
681	add	x22,x22,x17			// h+=Sigma0(a)
682	add	x9,x9,x14
683	ldr	x14,[sp,#24]
684	str	x1,[sp,#16]
685	ror	x16,x26,#14
686	add	x21,x21,x19			// h+=K[i]
687	ror	x0,x11,#1
688	and	x17,x27,x26
689	ror	x15,x8,#19
690	bic	x19,x20,x26
691	ror	x1,x22,#28
692	add	x21,x21,x9			// h+=X[i]
693	eor	x16,x16,x26,ror#18
694	eor	x0,x0,x11,ror#8
695	orr	x17,x17,x19			// Ch(e,f,g)
696	eor	x19,x22,x23			// a^b, b^c in next round
697	eor	x16,x16,x26,ror#41	// Sigma1(e)
698	eor	x1,x1,x22,ror#34
699	add	x21,x21,x17			// h+=Ch(e,f,g)
700	and	x28,x28,x19			// (b^c)&=(a^b)
701	eor	x15,x15,x8,ror#61
702	eor	x0,x0,x11,lsr#7	// sigma0(X[i+1])
703	add	x21,x21,x16			// h+=Sigma1(e)
704	eor	x28,x28,x23			// Maj(a,b,c)
705	eor	x17,x1,x22,ror#39	// Sigma0(a)
706	eor	x15,x15,x8,lsr#6	// sigma1(X[i+14])
707	add	x10,x10,x3
708	add	x25,x25,x21			// d+=h
709	add	x21,x21,x28			// h+=Maj(a,b,c)
710	ldr	x28,[x30],#8		// *K++, x19 in next round
711	add	x10,x10,x0
712	add	x21,x21,x17			// h+=Sigma0(a)
713	add	x10,x10,x15
714	ldr	x15,[sp,#0]
715	str	x2,[sp,#24]
716	ror	x16,x25,#14
717	add	x20,x20,x28			// h+=K[i]
718	ror	x1,x12,#1
719	and	x17,x26,x25
720	ror	x0,x9,#19
721	bic	x28,x27,x25
722	ror	x2,x21,#28
723	add	x20,x20,x10			// h+=X[i]
724	eor	x16,x16,x25,ror#18
725	eor	x1,x1,x12,ror#8
726	orr	x17,x17,x28			// Ch(e,f,g)
727	eor	x28,x21,x22			// a^b, b^c in next round
728	eor	x16,x16,x25,ror#41	// Sigma1(e)
729	eor	x2,x2,x21,ror#34
730	add	x20,x20,x17			// h+=Ch(e,f,g)
731	and	x19,x19,x28			// (b^c)&=(a^b)
732	eor	x0,x0,x9,ror#61
733	eor	x1,x1,x12,lsr#7	// sigma0(X[i+1])
734	add	x20,x20,x16			// h+=Sigma1(e)
735	eor	x19,x19,x22			// Maj(a,b,c)
736	eor	x17,x2,x21,ror#39	// Sigma0(a)
737	eor	x0,x0,x9,lsr#6	// sigma1(X[i+14])
738	add	x11,x11,x4
739	add	x24,x24,x20			// d+=h
740	add	x20,x20,x19			// h+=Maj(a,b,c)
741	ldr	x19,[x30],#8		// *K++, x28 in next round
742	add	x11,x11,x1
743	add	x20,x20,x17			// h+=Sigma0(a)
744	add	x11,x11,x0
745	ldr	x0,[sp,#8]
746	str	x3,[sp,#0]
747	ror	x16,x24,#14
748	add	x27,x27,x19			// h+=K[i]
749	ror	x2,x13,#1
750	and	x17,x25,x24
751	ror	x1,x10,#19
752	bic	x19,x26,x24
753	ror	x3,x20,#28
754	add	x27,x27,x11			// h+=X[i]
755	eor	x16,x16,x24,ror#18
756	eor	x2,x2,x13,ror#8
757	orr	x17,x17,x19			// Ch(e,f,g)
758	eor	x19,x20,x21			// a^b, b^c in next round
759	eor	x16,x16,x24,ror#41	// Sigma1(e)
760	eor	x3,x3,x20,ror#34
761	add	x27,x27,x17			// h+=Ch(e,f,g)
762	and	x28,x28,x19			// (b^c)&=(a^b)
763	eor	x1,x1,x10,ror#61
764	eor	x2,x2,x13,lsr#7	// sigma0(X[i+1])
765	add	x27,x27,x16			// h+=Sigma1(e)
766	eor	x28,x28,x21			// Maj(a,b,c)
767	eor	x17,x3,x20,ror#39	// Sigma0(a)
768	eor	x1,x1,x10,lsr#6	// sigma1(X[i+14])
769	add	x12,x12,x5
770	add	x23,x23,x27			// d+=h
771	add	x27,x27,x28			// h+=Maj(a,b,c)
772	ldr	x28,[x30],#8		// *K++, x19 in next round
773	add	x12,x12,x2
774	add	x27,x27,x17			// h+=Sigma0(a)
775	add	x12,x12,x1
776	ldr	x1,[sp,#16]
777	str	x4,[sp,#8]
778	ror	x16,x23,#14
779	add	x26,x26,x28			// h+=K[i]
780	ror	x3,x14,#1
781	and	x17,x24,x23
782	ror	x2,x11,#19
783	bic	x28,x25,x23
784	ror	x4,x27,#28
785	add	x26,x26,x12			// h+=X[i]
786	eor	x16,x16,x23,ror#18
787	eor	x3,x3,x14,ror#8
788	orr	x17,x17,x28			// Ch(e,f,g)
789	eor	x28,x27,x20			// a^b, b^c in next round
790	eor	x16,x16,x23,ror#41	// Sigma1(e)
791	eor	x4,x4,x27,ror#34
792	add	x26,x26,x17			// h+=Ch(e,f,g)
793	and	x19,x19,x28			// (b^c)&=(a^b)
794	eor	x2,x2,x11,ror#61
795	eor	x3,x3,x14,lsr#7	// sigma0(X[i+1])
796	add	x26,x26,x16			// h+=Sigma1(e)
797	eor	x19,x19,x20			// Maj(a,b,c)
798	eor	x17,x4,x27,ror#39	// Sigma0(a)
799	eor	x2,x2,x11,lsr#6	// sigma1(X[i+14])
800	add	x13,x13,x6
801	add	x22,x22,x26			// d+=h
802	add	x26,x26,x19			// h+=Maj(a,b,c)
803	ldr	x19,[x30],#8		// *K++, x28 in next round
804	add	x13,x13,x3
805	add	x26,x26,x17			// h+=Sigma0(a)
806	add	x13,x13,x2
807	ldr	x2,[sp,#24]
808	str	x5,[sp,#16]
809	ror	x16,x22,#14
810	add	x25,x25,x19			// h+=K[i]
811	ror	x4,x15,#1
812	and	x17,x23,x22
813	ror	x3,x12,#19
814	bic	x19,x24,x22
815	ror	x5,x26,#28
816	add	x25,x25,x13			// h+=X[i]
817	eor	x16,x16,x22,ror#18
818	eor	x4,x4,x15,ror#8
819	orr	x17,x17,x19			// Ch(e,f,g)
820	eor	x19,x26,x27			// a^b, b^c in next round
821	eor	x16,x16,x22,ror#41	// Sigma1(e)
822	eor	x5,x5,x26,ror#34
823	add	x25,x25,x17			// h+=Ch(e,f,g)
824	and	x28,x28,x19			// (b^c)&=(a^b)
825	eor	x3,x3,x12,ror#61
826	eor	x4,x4,x15,lsr#7	// sigma0(X[i+1])
827	add	x25,x25,x16			// h+=Sigma1(e)
828	eor	x28,x28,x27			// Maj(a,b,c)
829	eor	x17,x5,x26,ror#39	// Sigma0(a)
830	eor	x3,x3,x12,lsr#6	// sigma1(X[i+14])
831	add	x14,x14,x7
832	add	x21,x21,x25			// d+=h
833	add	x25,x25,x28			// h+=Maj(a,b,c)
834	ldr	x28,[x30],#8		// *K++, x19 in next round
835	add	x14,x14,x4
836	add	x25,x25,x17			// h+=Sigma0(a)
837	add	x14,x14,x3
838	ldr	x3,[sp,#0]
839	str	x6,[sp,#24]
840	ror	x16,x21,#14
841	add	x24,x24,x28			// h+=K[i]
842	ror	x5,x0,#1
843	and	x17,x22,x21
844	ror	x4,x13,#19
845	bic	x28,x23,x21
846	ror	x6,x25,#28
847	add	x24,x24,x14			// h+=X[i]
848	eor	x16,x16,x21,ror#18
849	eor	x5,x5,x0,ror#8
850	orr	x17,x17,x28			// Ch(e,f,g)
851	eor	x28,x25,x26			// a^b, b^c in next round
852	eor	x16,x16,x21,ror#41	// Sigma1(e)
853	eor	x6,x6,x25,ror#34
854	add	x24,x24,x17			// h+=Ch(e,f,g)
855	and	x19,x19,x28			// (b^c)&=(a^b)
856	eor	x4,x4,x13,ror#61
857	eor	x5,x5,x0,lsr#7	// sigma0(X[i+1])
858	add	x24,x24,x16			// h+=Sigma1(e)
859	eor	x19,x19,x26			// Maj(a,b,c)
860	eor	x17,x6,x25,ror#39	// Sigma0(a)
861	eor	x4,x4,x13,lsr#6	// sigma1(X[i+14])
862	add	x15,x15,x8
863	add	x20,x20,x24			// d+=h
864	add	x24,x24,x19			// h+=Maj(a,b,c)
865	ldr	x19,[x30],#8		// *K++, x28 in next round
866	add	x15,x15,x5
867	add	x24,x24,x17			// h+=Sigma0(a)
868	add	x15,x15,x4
869	ldr	x4,[sp,#8]
870	str	x7,[sp,#0]
871	ror	x16,x20,#14
872	add	x23,x23,x19			// h+=K[i]
873	ror	x6,x1,#1
874	and	x17,x21,x20
875	ror	x5,x14,#19
876	bic	x19,x22,x20
877	ror	x7,x24,#28
878	add	x23,x23,x15			// h+=X[i]
879	eor	x16,x16,x20,ror#18
880	eor	x6,x6,x1,ror#8
881	orr	x17,x17,x19			// Ch(e,f,g)
882	eor	x19,x24,x25			// a^b, b^c in next round
883	eor	x16,x16,x20,ror#41	// Sigma1(e)
884	eor	x7,x7,x24,ror#34
885	add	x23,x23,x17			// h+=Ch(e,f,g)
886	and	x28,x28,x19			// (b^c)&=(a^b)
887	eor	x5,x5,x14,ror#61
888	eor	x6,x6,x1,lsr#7	// sigma0(X[i+1])
889	add	x23,x23,x16			// h+=Sigma1(e)
890	eor	x28,x28,x25			// Maj(a,b,c)
891	eor	x17,x7,x24,ror#39	// Sigma0(a)
892	eor	x5,x5,x14,lsr#6	// sigma1(X[i+14])
893	add	x0,x0,x9
894	add	x27,x27,x23			// d+=h
895	add	x23,x23,x28			// h+=Maj(a,b,c)
896	ldr	x28,[x30],#8		// *K++, x19 in next round
897	add	x0,x0,x6
898	add	x23,x23,x17			// h+=Sigma0(a)
899	add	x0,x0,x5
900	ldr	x5,[sp,#16]
901	str	x8,[sp,#8]
902	ror	x16,x27,#14
903	add	x22,x22,x28			// h+=K[i]
904	ror	x7,x2,#1
905	and	x17,x20,x27
906	ror	x6,x15,#19
907	bic	x28,x21,x27
908	ror	x8,x23,#28
909	add	x22,x22,x0			// h+=X[i]
910	eor	x16,x16,x27,ror#18
911	eor	x7,x7,x2,ror#8
912	orr	x17,x17,x28			// Ch(e,f,g)
913	eor	x28,x23,x24			// a^b, b^c in next round
914	eor	x16,x16,x27,ror#41	// Sigma1(e)
915	eor	x8,x8,x23,ror#34
916	add	x22,x22,x17			// h+=Ch(e,f,g)
917	and	x19,x19,x28			// (b^c)&=(a^b)
918	eor	x6,x6,x15,ror#61
919	eor	x7,x7,x2,lsr#7	// sigma0(X[i+1])
920	add	x22,x22,x16			// h+=Sigma1(e)
921	eor	x19,x19,x24			// Maj(a,b,c)
922	eor	x17,x8,x23,ror#39	// Sigma0(a)
923	eor	x6,x6,x15,lsr#6	// sigma1(X[i+14])
924	add	x1,x1,x10
925	add	x26,x26,x22			// d+=h
926	add	x22,x22,x19			// h+=Maj(a,b,c)
927	ldr	x19,[x30],#8		// *K++, x28 in next round
928	add	x1,x1,x7
929	add	x22,x22,x17			// h+=Sigma0(a)
930	add	x1,x1,x6
931	ldr	x6,[sp,#24]
932	str	x9,[sp,#16]
933	ror	x16,x26,#14
934	add	x21,x21,x19			// h+=K[i]
935	ror	x8,x3,#1
936	and	x17,x27,x26
937	ror	x7,x0,#19
938	bic	x19,x20,x26
939	ror	x9,x22,#28
940	add	x21,x21,x1			// h+=X[i]
941	eor	x16,x16,x26,ror#18
942	eor	x8,x8,x3,ror#8
943	orr	x17,x17,x19			// Ch(e,f,g)
944	eor	x19,x22,x23			// a^b, b^c in next round
945	eor	x16,x16,x26,ror#41	// Sigma1(e)
946	eor	x9,x9,x22,ror#34
947	add	x21,x21,x17			// h+=Ch(e,f,g)
948	and	x28,x28,x19			// (b^c)&=(a^b)
949	eor	x7,x7,x0,ror#61
950	eor	x8,x8,x3,lsr#7	// sigma0(X[i+1])
951	add	x21,x21,x16			// h+=Sigma1(e)
952	eor	x28,x28,x23			// Maj(a,b,c)
953	eor	x17,x9,x22,ror#39	// Sigma0(a)
954	eor	x7,x7,x0,lsr#6	// sigma1(X[i+14])
955	add	x2,x2,x11
956	add	x25,x25,x21			// d+=h
957	add	x21,x21,x28			// h+=Maj(a,b,c)
958	ldr	x28,[x30],#8		// *K++, x19 in next round
959	add	x2,x2,x8
960	add	x21,x21,x17			// h+=Sigma0(a)
961	add	x2,x2,x7
962	ldr	x7,[sp,#0]
963	str	x10,[sp,#24]
964	ror	x16,x25,#14
965	add	x20,x20,x28			// h+=K[i]
966	ror	x9,x4,#1
967	and	x17,x26,x25
968	ror	x8,x1,#19
969	bic	x28,x27,x25
970	ror	x10,x21,#28
971	add	x20,x20,x2			// h+=X[i]
972	eor	x16,x16,x25,ror#18
973	eor	x9,x9,x4,ror#8
974	orr	x17,x17,x28			// Ch(e,f,g)
975	eor	x28,x21,x22			// a^b, b^c in next round
976	eor	x16,x16,x25,ror#41	// Sigma1(e)
977	eor	x10,x10,x21,ror#34
978	add	x20,x20,x17			// h+=Ch(e,f,g)
979	and	x19,x19,x28			// (b^c)&=(a^b)
980	eor	x8,x8,x1,ror#61
981	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
982	add	x20,x20,x16			// h+=Sigma1(e)
983	eor	x19,x19,x22			// Maj(a,b,c)
984	eor	x17,x10,x21,ror#39	// Sigma0(a)
985	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
986	add	x3,x3,x12
987	add	x24,x24,x20			// d+=h
988	add	x20,x20,x19			// h+=Maj(a,b,c)
989	ldr	x19,[x30],#8		// *K++, x28 in next round
990	add	x3,x3,x9
991	add	x20,x20,x17			// h+=Sigma0(a)
992	add	x3,x3,x8
993	cbnz	x19,.Loop_16_xx
994
995	ldp	x0,x2,[x29,#96]
996	ldr	x1,[x29,#112]
997	sub	x30,x30,#648		// rewind
998
999	ldp	x3,x4,[x0]
1000	ldp	x5,x6,[x0,#2*8]
1001	add	x1,x1,#14*8			// advance input pointer
1002	ldp	x7,x8,[x0,#4*8]
1003	add	x20,x20,x3
1004	ldp	x9,x10,[x0,#6*8]
1005	add	x21,x21,x4
1006	add	x22,x22,x5
1007	add	x23,x23,x6
1008	stp	x20,x21,[x0]
1009	add	x24,x24,x7
1010	add	x25,x25,x8
1011	stp	x22,x23,[x0,#2*8]
1012	add	x26,x26,x9
1013	add	x27,x27,x10
1014	cmp	x1,x2
1015	stp	x24,x25,[x0,#4*8]
1016	stp	x26,x27,[x0,#6*8]
1017	b.ne	.Loop
1018
1019	ldp	x19,x20,[x29,#16]
1020	add	sp,sp,#4*8
1021	ldp	x21,x22,[x29,#32]
1022	ldp	x23,x24,[x29,#48]
1023	ldp	x25,x26,[x29,#64]
1024	ldp	x27,x28,[x29,#80]
1025	ldp	x29,x30,[sp],#128
1026	ret
1027.size	sha512_block_data_order,.-sha512_block_data_order
1028
1029.section	.rodata
1030.align	6
1031.type	.LK512,%object
1032.LK512:
1033.quad	0x428a2f98d728ae22,0x7137449123ef65cd
1034.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1035.quad	0x3956c25bf348b538,0x59f111f1b605d019
1036.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
1037.quad	0xd807aa98a3030242,0x12835b0145706fbe
1038.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1039.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
1040.quad	0x9bdc06a725c71235,0xc19bf174cf692694
1041.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
1042.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1043.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
1044.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1045.quad	0x983e5152ee66dfab,0xa831c66d2db43210
1046.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
1047.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
1048.quad	0x06ca6351e003826f,0x142929670a0e6e70
1049.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
1050.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1051.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
1052.quad	0x81c2c92e47edaee6,0x92722c851482353b
1053.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
1054.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
1055.quad	0xd192e819d6ef5218,0xd69906245565a910
1056.quad	0xf40e35855771202a,0x106aa07032bbd1b8
1057.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
1058.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1059.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1060.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
1061.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
1062.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
1063.quad	0x90befffa23631e28,0xa4506cebde82bde9
1064.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
1065.quad	0xca273eceea26619c,0xd186b8c721c0c207
1066.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
1067.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
1068.quad	0x113f9804bef90dae,0x1b710b35131c471b
1069.quad	0x28db77f523047d84,0x32caab7b40c72493
1070.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
1071.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
1072.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
1073.quad	0	// terminator
1074.size	.LK512,.-.LK512
1075.byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1076.align	2
1077.align	2
1078#ifndef	__KERNEL__
1079.comm	OPENSSL_armcap_P,4,4
1080.hidden	OPENSSL_armcap_P
1081#endif
1082#endif
1083#endif  // !OPENSSL_NO_ASM
1084