1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if !defined(OPENSSL_NO_ASM)
11#if defined(__arm__)
12#if defined(BORINGSSL_PREFIX)
13#include <boringssl_prefix_symbols_asm.h>
14#endif
15@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
16@
17@ Licensed under the OpenSSL license (the "License").  You may not use
18@ this file except in compliance with the License.  You can obtain a copy
19@ in the file LICENSE in the source distribution or at
20@ https://www.openssl.org/source/license.html
21
22
23@ ====================================================================
24@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
25@ project. The module is, however, dual licensed under OpenSSL and
26@ CRYPTOGAMS licenses depending on where you obtain it. For further
27@ details see http://www.openssl.org/~appro/cryptogams/.
28@
29@ Permission to use under GPL terms is granted.
30@ ====================================================================
31
32@ SHA256 block procedure for ARMv4. May 2007.
33
34@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
35@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
36@ byte [on single-issue Xscale PXA250 core].
37
38@ July 2010.
39@
40@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
41@ Cortex A8 core and ~20 cycles per processed byte.
42
43@ February 2011.
44@
45@ Profiler-assisted and platform-specific optimization resulted in 16%
46@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
47
48@ September 2013.
49@
50@ Add NEON implementation. On Cortex A8 it was measured to process one
51@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
52@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
53@ code (meaning that latter performs sub-optimally, nothing was done
54@ about it).
55
56@ May 2014.
57@
58@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
59
60#ifndef __KERNEL__
61# include <openssl/arm_arch.h>
62#else
63# define __ARM_ARCH__ __LINUX_ARM_ARCH__
64# define __ARM_MAX_ARCH__ 7
65#endif
66
67@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
68@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
69@ instructions are manually-encoded. (See unsha256.)
70.arch	armv7-a
71
72.text
73#if defined(__thumb2__)
74.syntax	unified
75.thumb
76#else
77.code	32
78#endif
79
80.type	K256,%object
81.align	5
82K256:
83.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
84.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
85.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
86.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
87.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
88.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
89.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
90.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
91.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
92.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
93.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
94.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
95.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
96.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
97.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
98.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
99.size	K256,.-K256
100.word	0				@ terminator
101#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
102.LOPENSSL_armcap:
103.word	OPENSSL_armcap_P-.Lsha256_block_data_order
104#endif
105.align	5
106
107.globl	sha256_block_data_order
108.hidden	sha256_block_data_order
109.type	sha256_block_data_order,%function
110sha256_block_data_order:
111.Lsha256_block_data_order:
112#if __ARM_ARCH__<7 && !defined(__thumb2__)
113	sub	r3,pc,#8		@ sha256_block_data_order
114#else
115	adr	r3,.Lsha256_block_data_order
116#endif
117#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
118	ldr	r12,.LOPENSSL_armcap
119	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
120#ifdef	__APPLE__
121	ldr	r12,[r12]
122#endif
123	tst	r12,#ARMV8_SHA256
124	bne	.LARMv8
125	tst	r12,#ARMV7_NEON
126	bne	.LNEON
127#endif
128	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
129	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
130	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
131	sub	r14,r3,#256+32	@ K256
132	sub	sp,sp,#16*4		@ alloca(X[16])
133.Loop:
134# if __ARM_ARCH__>=7
135	ldr	r2,[r1],#4
136# else
137	ldrb	r2,[r1,#3]
138# endif
139	eor	r3,r5,r6		@ magic
140	eor	r12,r12,r12
141#if __ARM_ARCH__>=7
142	@ ldr	r2,[r1],#4			@ 0
143# if 0==15
144	str	r1,[sp,#17*4]			@ make room for r1
145# endif
146	eor	r0,r8,r8,ror#5
147	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
148	eor	r0,r0,r8,ror#19	@ Sigma1(e)
149# ifndef __ARMEB__
150	rev	r2,r2
151# endif
152#else
153	@ ldrb	r2,[r1,#3]			@ 0
154	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
155	ldrb	r12,[r1,#2]
156	ldrb	r0,[r1,#1]
157	orr	r2,r2,r12,lsl#8
158	ldrb	r12,[r1],#4
159	orr	r2,r2,r0,lsl#16
160# if 0==15
161	str	r1,[sp,#17*4]			@ make room for r1
162# endif
163	eor	r0,r8,r8,ror#5
164	orr	r2,r2,r12,lsl#24
165	eor	r0,r0,r8,ror#19	@ Sigma1(e)
166#endif
167	ldr	r12,[r14],#4			@ *K256++
168	add	r11,r11,r2			@ h+=X[i]
169	str	r2,[sp,#0*4]
170	eor	r2,r9,r10
171	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
172	and	r2,r2,r8
173	add	r11,r11,r12			@ h+=K256[i]
174	eor	r2,r2,r10			@ Ch(e,f,g)
175	eor	r0,r4,r4,ror#11
176	add	r11,r11,r2			@ h+=Ch(e,f,g)
177#if 0==31
178	and	r12,r12,#0xff
179	cmp	r12,#0xf2			@ done?
180#endif
181#if 0<15
182# if __ARM_ARCH__>=7
183	ldr	r2,[r1],#4			@ prefetch
184# else
185	ldrb	r2,[r1,#3]
186# endif
187	eor	r12,r4,r5			@ a^b, b^c in next round
188#else
189	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
190	eor	r12,r4,r5			@ a^b, b^c in next round
191	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
192#endif
193	eor	r0,r0,r4,ror#20	@ Sigma0(a)
194	and	r3,r3,r12			@ (b^c)&=(a^b)
195	add	r7,r7,r11			@ d+=h
196	eor	r3,r3,r5			@ Maj(a,b,c)
197	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
198	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
199#if __ARM_ARCH__>=7
200	@ ldr	r2,[r1],#4			@ 1
201# if 1==15
202	str	r1,[sp,#17*4]			@ make room for r1
203# endif
204	eor	r0,r7,r7,ror#5
205	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
206	eor	r0,r0,r7,ror#19	@ Sigma1(e)
207# ifndef __ARMEB__
208	rev	r2,r2
209# endif
210#else
211	@ ldrb	r2,[r1,#3]			@ 1
212	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
213	ldrb	r3,[r1,#2]
214	ldrb	r0,[r1,#1]
215	orr	r2,r2,r3,lsl#8
216	ldrb	r3,[r1],#4
217	orr	r2,r2,r0,lsl#16
218# if 1==15
219	str	r1,[sp,#17*4]			@ make room for r1
220# endif
221	eor	r0,r7,r7,ror#5
222	orr	r2,r2,r3,lsl#24
223	eor	r0,r0,r7,ror#19	@ Sigma1(e)
224#endif
225	ldr	r3,[r14],#4			@ *K256++
226	add	r10,r10,r2			@ h+=X[i]
227	str	r2,[sp,#1*4]
228	eor	r2,r8,r9
229	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
230	and	r2,r2,r7
231	add	r10,r10,r3			@ h+=K256[i]
232	eor	r2,r2,r9			@ Ch(e,f,g)
233	eor	r0,r11,r11,ror#11
234	add	r10,r10,r2			@ h+=Ch(e,f,g)
235#if 1==31
236	and	r3,r3,#0xff
237	cmp	r3,#0xf2			@ done?
238#endif
239#if 1<15
240# if __ARM_ARCH__>=7
241	ldr	r2,[r1],#4			@ prefetch
242# else
243	ldrb	r2,[r1,#3]
244# endif
245	eor	r3,r11,r4			@ a^b, b^c in next round
246#else
247	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
248	eor	r3,r11,r4			@ a^b, b^c in next round
249	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
250#endif
251	eor	r0,r0,r11,ror#20	@ Sigma0(a)
252	and	r12,r12,r3			@ (b^c)&=(a^b)
253	add	r6,r6,r10			@ d+=h
254	eor	r12,r12,r4			@ Maj(a,b,c)
255	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
256	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
257#if __ARM_ARCH__>=7
258	@ ldr	r2,[r1],#4			@ 2
259# if 2==15
260	str	r1,[sp,#17*4]			@ make room for r1
261# endif
262	eor	r0,r6,r6,ror#5
263	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
264	eor	r0,r0,r6,ror#19	@ Sigma1(e)
265# ifndef __ARMEB__
266	rev	r2,r2
267# endif
268#else
269	@ ldrb	r2,[r1,#3]			@ 2
270	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
271	ldrb	r12,[r1,#2]
272	ldrb	r0,[r1,#1]
273	orr	r2,r2,r12,lsl#8
274	ldrb	r12,[r1],#4
275	orr	r2,r2,r0,lsl#16
276# if 2==15
277	str	r1,[sp,#17*4]			@ make room for r1
278# endif
279	eor	r0,r6,r6,ror#5
280	orr	r2,r2,r12,lsl#24
281	eor	r0,r0,r6,ror#19	@ Sigma1(e)
282#endif
283	ldr	r12,[r14],#4			@ *K256++
284	add	r9,r9,r2			@ h+=X[i]
285	str	r2,[sp,#2*4]
286	eor	r2,r7,r8
287	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
288	and	r2,r2,r6
289	add	r9,r9,r12			@ h+=K256[i]
290	eor	r2,r2,r8			@ Ch(e,f,g)
291	eor	r0,r10,r10,ror#11
292	add	r9,r9,r2			@ h+=Ch(e,f,g)
293#if 2==31
294	and	r12,r12,#0xff
295	cmp	r12,#0xf2			@ done?
296#endif
297#if 2<15
298# if __ARM_ARCH__>=7
299	ldr	r2,[r1],#4			@ prefetch
300# else
301	ldrb	r2,[r1,#3]
302# endif
303	eor	r12,r10,r11			@ a^b, b^c in next round
304#else
305	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
306	eor	r12,r10,r11			@ a^b, b^c in next round
307	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
308#endif
309	eor	r0,r0,r10,ror#20	@ Sigma0(a)
310	and	r3,r3,r12			@ (b^c)&=(a^b)
311	add	r5,r5,r9			@ d+=h
312	eor	r3,r3,r11			@ Maj(a,b,c)
313	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
314	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
315#if __ARM_ARCH__>=7
316	@ ldr	r2,[r1],#4			@ 3
317# if 3==15
318	str	r1,[sp,#17*4]			@ make room for r1
319# endif
320	eor	r0,r5,r5,ror#5
321	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
322	eor	r0,r0,r5,ror#19	@ Sigma1(e)
323# ifndef __ARMEB__
324	rev	r2,r2
325# endif
326#else
327	@ ldrb	r2,[r1,#3]			@ 3
328	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
329	ldrb	r3,[r1,#2]
330	ldrb	r0,[r1,#1]
331	orr	r2,r2,r3,lsl#8
332	ldrb	r3,[r1],#4
333	orr	r2,r2,r0,lsl#16
334# if 3==15
335	str	r1,[sp,#17*4]			@ make room for r1
336# endif
337	eor	r0,r5,r5,ror#5
338	orr	r2,r2,r3,lsl#24
339	eor	r0,r0,r5,ror#19	@ Sigma1(e)
340#endif
341	ldr	r3,[r14],#4			@ *K256++
342	add	r8,r8,r2			@ h+=X[i]
343	str	r2,[sp,#3*4]
344	eor	r2,r6,r7
345	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
346	and	r2,r2,r5
347	add	r8,r8,r3			@ h+=K256[i]
348	eor	r2,r2,r7			@ Ch(e,f,g)
349	eor	r0,r9,r9,ror#11
350	add	r8,r8,r2			@ h+=Ch(e,f,g)
351#if 3==31
352	and	r3,r3,#0xff
353	cmp	r3,#0xf2			@ done?
354#endif
355#if 3<15
356# if __ARM_ARCH__>=7
357	ldr	r2,[r1],#4			@ prefetch
358# else
359	ldrb	r2,[r1,#3]
360# endif
361	eor	r3,r9,r10			@ a^b, b^c in next round
362#else
363	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
364	eor	r3,r9,r10			@ a^b, b^c in next round
365	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
366#endif
367	eor	r0,r0,r9,ror#20	@ Sigma0(a)
368	and	r12,r12,r3			@ (b^c)&=(a^b)
369	add	r4,r4,r8			@ d+=h
370	eor	r12,r12,r10			@ Maj(a,b,c)
371	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
372	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
373#if __ARM_ARCH__>=7
374	@ ldr	r2,[r1],#4			@ 4
375# if 4==15
376	str	r1,[sp,#17*4]			@ make room for r1
377# endif
378	eor	r0,r4,r4,ror#5
379	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
380	eor	r0,r0,r4,ror#19	@ Sigma1(e)
381# ifndef __ARMEB__
382	rev	r2,r2
383# endif
384#else
385	@ ldrb	r2,[r1,#3]			@ 4
386	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
387	ldrb	r12,[r1,#2]
388	ldrb	r0,[r1,#1]
389	orr	r2,r2,r12,lsl#8
390	ldrb	r12,[r1],#4
391	orr	r2,r2,r0,lsl#16
392# if 4==15
393	str	r1,[sp,#17*4]			@ make room for r1
394# endif
395	eor	r0,r4,r4,ror#5
396	orr	r2,r2,r12,lsl#24
397	eor	r0,r0,r4,ror#19	@ Sigma1(e)
398#endif
399	ldr	r12,[r14],#4			@ *K256++
400	add	r7,r7,r2			@ h+=X[i]
401	str	r2,[sp,#4*4]
402	eor	r2,r5,r6
403	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
404	and	r2,r2,r4
405	add	r7,r7,r12			@ h+=K256[i]
406	eor	r2,r2,r6			@ Ch(e,f,g)
407	eor	r0,r8,r8,ror#11
408	add	r7,r7,r2			@ h+=Ch(e,f,g)
409#if 4==31
410	and	r12,r12,#0xff
411	cmp	r12,#0xf2			@ done?
412#endif
413#if 4<15
414# if __ARM_ARCH__>=7
415	ldr	r2,[r1],#4			@ prefetch
416# else
417	ldrb	r2,[r1,#3]
418# endif
419	eor	r12,r8,r9			@ a^b, b^c in next round
420#else
421	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
422	eor	r12,r8,r9			@ a^b, b^c in next round
423	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
424#endif
425	eor	r0,r0,r8,ror#20	@ Sigma0(a)
426	and	r3,r3,r12			@ (b^c)&=(a^b)
427	add	r11,r11,r7			@ d+=h
428	eor	r3,r3,r9			@ Maj(a,b,c)
429	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
430	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
431#if __ARM_ARCH__>=7
432	@ ldr	r2,[r1],#4			@ 5
433# if 5==15
434	str	r1,[sp,#17*4]			@ make room for r1
435# endif
436	eor	r0,r11,r11,ror#5
437	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
438	eor	r0,r0,r11,ror#19	@ Sigma1(e)
439# ifndef __ARMEB__
440	rev	r2,r2
441# endif
442#else
443	@ ldrb	r2,[r1,#3]			@ 5
444	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
445	ldrb	r3,[r1,#2]
446	ldrb	r0,[r1,#1]
447	orr	r2,r2,r3,lsl#8
448	ldrb	r3,[r1],#4
449	orr	r2,r2,r0,lsl#16
450# if 5==15
451	str	r1,[sp,#17*4]			@ make room for r1
452# endif
453	eor	r0,r11,r11,ror#5
454	orr	r2,r2,r3,lsl#24
455	eor	r0,r0,r11,ror#19	@ Sigma1(e)
456#endif
457	ldr	r3,[r14],#4			@ *K256++
458	add	r6,r6,r2			@ h+=X[i]
459	str	r2,[sp,#5*4]
460	eor	r2,r4,r5
461	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
462	and	r2,r2,r11
463	add	r6,r6,r3			@ h+=K256[i]
464	eor	r2,r2,r5			@ Ch(e,f,g)
465	eor	r0,r7,r7,ror#11
466	add	r6,r6,r2			@ h+=Ch(e,f,g)
467#if 5==31
468	and	r3,r3,#0xff
469	cmp	r3,#0xf2			@ done?
470#endif
471#if 5<15
472# if __ARM_ARCH__>=7
473	ldr	r2,[r1],#4			@ prefetch
474# else
475	ldrb	r2,[r1,#3]
476# endif
477	eor	r3,r7,r8			@ a^b, b^c in next round
478#else
479	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
480	eor	r3,r7,r8			@ a^b, b^c in next round
481	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
482#endif
483	eor	r0,r0,r7,ror#20	@ Sigma0(a)
484	and	r12,r12,r3			@ (b^c)&=(a^b)
485	add	r10,r10,r6			@ d+=h
486	eor	r12,r12,r8			@ Maj(a,b,c)
487	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
488	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
489#if __ARM_ARCH__>=7
490	@ ldr	r2,[r1],#4			@ 6
491# if 6==15
492	str	r1,[sp,#17*4]			@ make room for r1
493# endif
494	eor	r0,r10,r10,ror#5
495	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
496	eor	r0,r0,r10,ror#19	@ Sigma1(e)
497# ifndef __ARMEB__
498	rev	r2,r2
499# endif
500#else
501	@ ldrb	r2,[r1,#3]			@ 6
502	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
503	ldrb	r12,[r1,#2]
504	ldrb	r0,[r1,#1]
505	orr	r2,r2,r12,lsl#8
506	ldrb	r12,[r1],#4
507	orr	r2,r2,r0,lsl#16
508# if 6==15
509	str	r1,[sp,#17*4]			@ make room for r1
510# endif
511	eor	r0,r10,r10,ror#5
512	orr	r2,r2,r12,lsl#24
513	eor	r0,r0,r10,ror#19	@ Sigma1(e)
514#endif
515	ldr	r12,[r14],#4			@ *K256++
516	add	r5,r5,r2			@ h+=X[i]
517	str	r2,[sp,#6*4]
518	eor	r2,r11,r4
519	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
520	and	r2,r2,r10
521	add	r5,r5,r12			@ h+=K256[i]
522	eor	r2,r2,r4			@ Ch(e,f,g)
523	eor	r0,r6,r6,ror#11
524	add	r5,r5,r2			@ h+=Ch(e,f,g)
525#if 6==31
526	and	r12,r12,#0xff
527	cmp	r12,#0xf2			@ done?
528#endif
529#if 6<15
530# if __ARM_ARCH__>=7
531	ldr	r2,[r1],#4			@ prefetch
532# else
533	ldrb	r2,[r1,#3]
534# endif
535	eor	r12,r6,r7			@ a^b, b^c in next round
536#else
537	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
538	eor	r12,r6,r7			@ a^b, b^c in next round
539	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
540#endif
541	eor	r0,r0,r6,ror#20	@ Sigma0(a)
542	and	r3,r3,r12			@ (b^c)&=(a^b)
543	add	r9,r9,r5			@ d+=h
544	eor	r3,r3,r7			@ Maj(a,b,c)
545	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
546	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
547#if __ARM_ARCH__>=7
548	@ ldr	r2,[r1],#4			@ 7
549# if 7==15
550	str	r1,[sp,#17*4]			@ make room for r1
551# endif
552	eor	r0,r9,r9,ror#5
553	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
554	eor	r0,r0,r9,ror#19	@ Sigma1(e)
555# ifndef __ARMEB__
556	rev	r2,r2
557# endif
558#else
559	@ ldrb	r2,[r1,#3]			@ 7
560	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
561	ldrb	r3,[r1,#2]
562	ldrb	r0,[r1,#1]
563	orr	r2,r2,r3,lsl#8
564	ldrb	r3,[r1],#4
565	orr	r2,r2,r0,lsl#16
566# if 7==15
567	str	r1,[sp,#17*4]			@ make room for r1
568# endif
569	eor	r0,r9,r9,ror#5
570	orr	r2,r2,r3,lsl#24
571	eor	r0,r0,r9,ror#19	@ Sigma1(e)
572#endif
573	ldr	r3,[r14],#4			@ *K256++
574	add	r4,r4,r2			@ h+=X[i]
575	str	r2,[sp,#7*4]
576	eor	r2,r10,r11
577	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
578	and	r2,r2,r9
579	add	r4,r4,r3			@ h+=K256[i]
580	eor	r2,r2,r11			@ Ch(e,f,g)
581	eor	r0,r5,r5,ror#11
582	add	r4,r4,r2			@ h+=Ch(e,f,g)
583#if 7==31
584	and	r3,r3,#0xff
585	cmp	r3,#0xf2			@ done?
586#endif
587#if 7<15
588# if __ARM_ARCH__>=7
589	ldr	r2,[r1],#4			@ prefetch
590# else
591	ldrb	r2,[r1,#3]
592# endif
593	eor	r3,r5,r6			@ a^b, b^c in next round
594#else
595	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
596	eor	r3,r5,r6			@ a^b, b^c in next round
597	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
598#endif
599	eor	r0,r0,r5,ror#20	@ Sigma0(a)
600	and	r12,r12,r3			@ (b^c)&=(a^b)
601	add	r8,r8,r4			@ d+=h
602	eor	r12,r12,r6			@ Maj(a,b,c)
603	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
604	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
605#if __ARM_ARCH__>=7
606	@ ldr	r2,[r1],#4			@ 8
607# if 8==15
608	str	r1,[sp,#17*4]			@ make room for r1
609# endif
610	eor	r0,r8,r8,ror#5
611	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
612	eor	r0,r0,r8,ror#19	@ Sigma1(e)
613# ifndef __ARMEB__
614	rev	r2,r2
615# endif
616#else
617	@ ldrb	r2,[r1,#3]			@ 8
618	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
619	ldrb	r12,[r1,#2]
620	ldrb	r0,[r1,#1]
621	orr	r2,r2,r12,lsl#8
622	ldrb	r12,[r1],#4
623	orr	r2,r2,r0,lsl#16
624# if 8==15
625	str	r1,[sp,#17*4]			@ make room for r1
626# endif
627	eor	r0,r8,r8,ror#5
628	orr	r2,r2,r12,lsl#24
629	eor	r0,r0,r8,ror#19	@ Sigma1(e)
630#endif
631	ldr	r12,[r14],#4			@ *K256++
632	add	r11,r11,r2			@ h+=X[i]
633	str	r2,[sp,#8*4]
634	eor	r2,r9,r10
635	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
636	and	r2,r2,r8
637	add	r11,r11,r12			@ h+=K256[i]
638	eor	r2,r2,r10			@ Ch(e,f,g)
639	eor	r0,r4,r4,ror#11
640	add	r11,r11,r2			@ h+=Ch(e,f,g)
641#if 8==31
642	and	r12,r12,#0xff
643	cmp	r12,#0xf2			@ done?
644#endif
645#if 8<15
646# if __ARM_ARCH__>=7
647	ldr	r2,[r1],#4			@ prefetch
648# else
649	ldrb	r2,[r1,#3]
650# endif
651	eor	r12,r4,r5			@ a^b, b^c in next round
652#else
653	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
654	eor	r12,r4,r5			@ a^b, b^c in next round
655	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
656#endif
657	eor	r0,r0,r4,ror#20	@ Sigma0(a)
658	and	r3,r3,r12			@ (b^c)&=(a^b)
659	add	r7,r7,r11			@ d+=h
660	eor	r3,r3,r5			@ Maj(a,b,c)
661	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
662	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
663#if __ARM_ARCH__>=7
664	@ ldr	r2,[r1],#4			@ 9
665# if 9==15
666	str	r1,[sp,#17*4]			@ make room for r1
667# endif
668	eor	r0,r7,r7,ror#5
669	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
670	eor	r0,r0,r7,ror#19	@ Sigma1(e)
671# ifndef __ARMEB__
672	rev	r2,r2
673# endif
674#else
675	@ ldrb	r2,[r1,#3]			@ 9
676	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
677	ldrb	r3,[r1,#2]
678	ldrb	r0,[r1,#1]
679	orr	r2,r2,r3,lsl#8
680	ldrb	r3,[r1],#4
681	orr	r2,r2,r0,lsl#16
682# if 9==15
683	str	r1,[sp,#17*4]			@ make room for r1
684# endif
685	eor	r0,r7,r7,ror#5
686	orr	r2,r2,r3,lsl#24
687	eor	r0,r0,r7,ror#19	@ Sigma1(e)
688#endif
689	ldr	r3,[r14],#4			@ *K256++
690	add	r10,r10,r2			@ h+=X[i]
691	str	r2,[sp,#9*4]
692	eor	r2,r8,r9
693	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
694	and	r2,r2,r7
695	add	r10,r10,r3			@ h+=K256[i]
696	eor	r2,r2,r9			@ Ch(e,f,g)
697	eor	r0,r11,r11,ror#11
698	add	r10,r10,r2			@ h+=Ch(e,f,g)
699#if 9==31
700	and	r3,r3,#0xff
701	cmp	r3,#0xf2			@ done?
702#endif
703#if 9<15
704# if __ARM_ARCH__>=7
705	ldr	r2,[r1],#4			@ prefetch
706# else
707	ldrb	r2,[r1,#3]
708# endif
709	eor	r3,r11,r4			@ a^b, b^c in next round
710#else
711	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
712	eor	r3,r11,r4			@ a^b, b^c in next round
713	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
714#endif
715	eor	r0,r0,r11,ror#20	@ Sigma0(a)
716	and	r12,r12,r3			@ (b^c)&=(a^b)
717	add	r6,r6,r10			@ d+=h
718	eor	r12,r12,r4			@ Maj(a,b,c)
719	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
720	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
721#if __ARM_ARCH__>=7
722	@ ldr	r2,[r1],#4			@ 10
723# if 10==15
724	str	r1,[sp,#17*4]			@ make room for r1
725# endif
726	eor	r0,r6,r6,ror#5
727	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
728	eor	r0,r0,r6,ror#19	@ Sigma1(e)
729# ifndef __ARMEB__
730	rev	r2,r2
731# endif
732#else
733	@ ldrb	r2,[r1,#3]			@ 10
734	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
735	ldrb	r12,[r1,#2]
736	ldrb	r0,[r1,#1]
737	orr	r2,r2,r12,lsl#8
738	ldrb	r12,[r1],#4
739	orr	r2,r2,r0,lsl#16
740# if 10==15
741	str	r1,[sp,#17*4]			@ make room for r1
742# endif
743	eor	r0,r6,r6,ror#5
744	orr	r2,r2,r12,lsl#24
745	eor	r0,r0,r6,ror#19	@ Sigma1(e)
746#endif
747	ldr	r12,[r14],#4			@ *K256++
748	add	r9,r9,r2			@ h+=X[i]
749	str	r2,[sp,#10*4]
750	eor	r2,r7,r8
751	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
752	and	r2,r2,r6
753	add	r9,r9,r12			@ h+=K256[i]
754	eor	r2,r2,r8			@ Ch(e,f,g)
755	eor	r0,r10,r10,ror#11
756	add	r9,r9,r2			@ h+=Ch(e,f,g)
757#if 10==31
758	and	r12,r12,#0xff
759	cmp	r12,#0xf2			@ done?
760#endif
761#if 10<15
762# if __ARM_ARCH__>=7
763	ldr	r2,[r1],#4			@ prefetch
764# else
765	ldrb	r2,[r1,#3]
766# endif
767	eor	r12,r10,r11			@ a^b, b^c in next round
768#else
769	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
770	eor	r12,r10,r11			@ a^b, b^c in next round
771	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
772#endif
773	eor	r0,r0,r10,ror#20	@ Sigma0(a)
774	and	r3,r3,r12			@ (b^c)&=(a^b)
775	add	r5,r5,r9			@ d+=h
776	eor	r3,r3,r11			@ Maj(a,b,c)
777	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
778	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
779#if __ARM_ARCH__>=7
780	@ ldr	r2,[r1],#4			@ 11
781# if 11==15
782	str	r1,[sp,#17*4]			@ make room for r1
783# endif
784	eor	r0,r5,r5,ror#5
785	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
786	eor	r0,r0,r5,ror#19	@ Sigma1(e)
787# ifndef __ARMEB__
788	rev	r2,r2
789# endif
790#else
791	@ ldrb	r2,[r1,#3]			@ 11
792	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
793	ldrb	r3,[r1,#2]
794	ldrb	r0,[r1,#1]
795	orr	r2,r2,r3,lsl#8
796	ldrb	r3,[r1],#4
797	orr	r2,r2,r0,lsl#16
798# if 11==15
799	str	r1,[sp,#17*4]			@ make room for r1
800# endif
801	eor	r0,r5,r5,ror#5
802	orr	r2,r2,r3,lsl#24
803	eor	r0,r0,r5,ror#19	@ Sigma1(e)
804#endif
805	ldr	r3,[r14],#4			@ *K256++
806	add	r8,r8,r2			@ h+=X[i]
807	str	r2,[sp,#11*4]
808	eor	r2,r6,r7
809	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
810	and	r2,r2,r5
811	add	r8,r8,r3			@ h+=K256[i]
812	eor	r2,r2,r7			@ Ch(e,f,g)
813	eor	r0,r9,r9,ror#11
814	add	r8,r8,r2			@ h+=Ch(e,f,g)
815#if 11==31
816	and	r3,r3,#0xff
817	cmp	r3,#0xf2			@ done?
818#endif
819#if 11<15
820# if __ARM_ARCH__>=7
821	ldr	r2,[r1],#4			@ prefetch
822# else
823	ldrb	r2,[r1,#3]
824# endif
825	eor	r3,r9,r10			@ a^b, b^c in next round
826#else
827	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
828	eor	r3,r9,r10			@ a^b, b^c in next round
829	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
830#endif
831	eor	r0,r0,r9,ror#20	@ Sigma0(a)
832	and	r12,r12,r3			@ (b^c)&=(a^b)
833	add	r4,r4,r8			@ d+=h
834	eor	r12,r12,r10			@ Maj(a,b,c)
835	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
836	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
837#if __ARM_ARCH__>=7
838	@ ldr	r2,[r1],#4			@ 12
839# if 12==15
840	str	r1,[sp,#17*4]			@ make room for r1
841# endif
842	eor	r0,r4,r4,ror#5
843	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
844	eor	r0,r0,r4,ror#19	@ Sigma1(e)
845# ifndef __ARMEB__
846	rev	r2,r2
847# endif
848#else
849	@ ldrb	r2,[r1,#3]			@ 12
850	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
851	ldrb	r12,[r1,#2]
852	ldrb	r0,[r1,#1]
853	orr	r2,r2,r12,lsl#8
854	ldrb	r12,[r1],#4
855	orr	r2,r2,r0,lsl#16
856# if 12==15
857	str	r1,[sp,#17*4]			@ make room for r1
858# endif
859	eor	r0,r4,r4,ror#5
860	orr	r2,r2,r12,lsl#24
861	eor	r0,r0,r4,ror#19	@ Sigma1(e)
862#endif
863	ldr	r12,[r14],#4			@ *K256++
864	add	r7,r7,r2			@ h+=X[i]
865	str	r2,[sp,#12*4]
866	eor	r2,r5,r6
867	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
868	and	r2,r2,r4
869	add	r7,r7,r12			@ h+=K256[i]
870	eor	r2,r2,r6			@ Ch(e,f,g)
871	eor	r0,r8,r8,ror#11
872	add	r7,r7,r2			@ h+=Ch(e,f,g)
873#if 12==31
874	and	r12,r12,#0xff
875	cmp	r12,#0xf2			@ done?
876#endif
877#if 12<15
878# if __ARM_ARCH__>=7
879	ldr	r2,[r1],#4			@ prefetch
880# else
881	ldrb	r2,[r1,#3]
882# endif
883	eor	r12,r8,r9			@ a^b, b^c in next round
884#else
885	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
886	eor	r12,r8,r9			@ a^b, b^c in next round
887	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
888#endif
889	eor	r0,r0,r8,ror#20	@ Sigma0(a)
890	and	r3,r3,r12			@ (b^c)&=(a^b)
891	add	r11,r11,r7			@ d+=h
892	eor	r3,r3,r9			@ Maj(a,b,c)
893	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
894	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
895#if __ARM_ARCH__>=7
896	@ ldr	r2,[r1],#4			@ 13
897# if 13==15
898	str	r1,[sp,#17*4]			@ make room for r1
899# endif
900	eor	r0,r11,r11,ror#5
901	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
902	eor	r0,r0,r11,ror#19	@ Sigma1(e)
903# ifndef __ARMEB__
904	rev	r2,r2
905# endif
906#else
907	@ ldrb	r2,[r1,#3]			@ 13
908	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
909	ldrb	r3,[r1,#2]
910	ldrb	r0,[r1,#1]
911	orr	r2,r2,r3,lsl#8
912	ldrb	r3,[r1],#4
913	orr	r2,r2,r0,lsl#16
914# if 13==15
915	str	r1,[sp,#17*4]			@ make room for r1
916# endif
917	eor	r0,r11,r11,ror#5
918	orr	r2,r2,r3,lsl#24
919	eor	r0,r0,r11,ror#19	@ Sigma1(e)
920#endif
921	ldr	r3,[r14],#4			@ *K256++
922	add	r6,r6,r2			@ h+=X[i]
923	str	r2,[sp,#13*4]
924	eor	r2,r4,r5
925	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
926	and	r2,r2,r11
927	add	r6,r6,r3			@ h+=K256[i]
928	eor	r2,r2,r5			@ Ch(e,f,g)
929	eor	r0,r7,r7,ror#11
930	add	r6,r6,r2			@ h+=Ch(e,f,g)
931#if 13==31
932	and	r3,r3,#0xff
933	cmp	r3,#0xf2			@ done?
934#endif
935#if 13<15
936# if __ARM_ARCH__>=7
937	ldr	r2,[r1],#4			@ prefetch
938# else
939	ldrb	r2,[r1,#3]
940# endif
941	eor	r3,r7,r8			@ a^b, b^c in next round
942#else
943	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
944	eor	r3,r7,r8			@ a^b, b^c in next round
945	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
946#endif
947	eor	r0,r0,r7,ror#20	@ Sigma0(a)
948	and	r12,r12,r3			@ (b^c)&=(a^b)
949	add	r10,r10,r6			@ d+=h
950	eor	r12,r12,r8			@ Maj(a,b,c)
951	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
952	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
953#if __ARM_ARCH__>=7
954	@ ldr	r2,[r1],#4			@ 14
955# if 14==15
956	str	r1,[sp,#17*4]			@ make room for r1
957# endif
958	eor	r0,r10,r10,ror#5
959	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
960	eor	r0,r0,r10,ror#19	@ Sigma1(e)
961# ifndef __ARMEB__
962	rev	r2,r2
963# endif
964#else
965	@ ldrb	r2,[r1,#3]			@ 14
966	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
967	ldrb	r12,[r1,#2]
968	ldrb	r0,[r1,#1]
969	orr	r2,r2,r12,lsl#8
970	ldrb	r12,[r1],#4
971	orr	r2,r2,r0,lsl#16
972# if 14==15
973	str	r1,[sp,#17*4]			@ make room for r1
974# endif
975	eor	r0,r10,r10,ror#5
976	orr	r2,r2,r12,lsl#24
977	eor	r0,r0,r10,ror#19	@ Sigma1(e)
978#endif
979	ldr	r12,[r14],#4			@ *K256++
980	add	r5,r5,r2			@ h+=X[i]
981	str	r2,[sp,#14*4]
982	eor	r2,r11,r4
983	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
984	and	r2,r2,r10
985	add	r5,r5,r12			@ h+=K256[i]
986	eor	r2,r2,r4			@ Ch(e,f,g)
987	eor	r0,r6,r6,ror#11
988	add	r5,r5,r2			@ h+=Ch(e,f,g)
989#if 14==31
990	and	r12,r12,#0xff
991	cmp	r12,#0xf2			@ done?
992#endif
993#if 14<15
994# if __ARM_ARCH__>=7
995	ldr	r2,[r1],#4			@ prefetch
996# else
997	ldrb	r2,[r1,#3]
998# endif
999	eor	r12,r6,r7			@ a^b, b^c in next round
1000#else
1001	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1002	eor	r12,r6,r7			@ a^b, b^c in next round
1003	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1004#endif
1005	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1006	and	r3,r3,r12			@ (b^c)&=(a^b)
1007	add	r9,r9,r5			@ d+=h
1008	eor	r3,r3,r7			@ Maj(a,b,c)
1009	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1010	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1011#if __ARM_ARCH__>=7
1012	@ ldr	r2,[r1],#4			@ 15
1013# if 15==15
1014	str	r1,[sp,#17*4]			@ make room for r1
1015# endif
1016	eor	r0,r9,r9,ror#5
1017	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1018	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1019# ifndef __ARMEB__
1020	rev	r2,r2
1021# endif
1022#else
1023	@ ldrb	r2,[r1,#3]			@ 15
1024	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1025	ldrb	r3,[r1,#2]
1026	ldrb	r0,[r1,#1]
1027	orr	r2,r2,r3,lsl#8
1028	ldrb	r3,[r1],#4
1029	orr	r2,r2,r0,lsl#16
1030# if 15==15
1031	str	r1,[sp,#17*4]			@ make room for r1
1032# endif
1033	eor	r0,r9,r9,ror#5
1034	orr	r2,r2,r3,lsl#24
1035	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1036#endif
1037	ldr	r3,[r14],#4			@ *K256++
1038	add	r4,r4,r2			@ h+=X[i]
1039	str	r2,[sp,#15*4]
1040	eor	r2,r10,r11
1041	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1042	and	r2,r2,r9
1043	add	r4,r4,r3			@ h+=K256[i]
1044	eor	r2,r2,r11			@ Ch(e,f,g)
1045	eor	r0,r5,r5,ror#11
1046	add	r4,r4,r2			@ h+=Ch(e,f,g)
1047#if 15==31
1048	and	r3,r3,#0xff
1049	cmp	r3,#0xf2			@ done?
1050#endif
1051#if 15<15
1052# if __ARM_ARCH__>=7
1053	ldr	r2,[r1],#4			@ prefetch
1054# else
1055	ldrb	r2,[r1,#3]
1056# endif
1057	eor	r3,r5,r6			@ a^b, b^c in next round
1058#else
1059	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1060	eor	r3,r5,r6			@ a^b, b^c in next round
1061	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1062#endif
1063	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1064	and	r12,r12,r3			@ (b^c)&=(a^b)
1065	add	r8,r8,r4			@ d+=h
1066	eor	r12,r12,r6			@ Maj(a,b,c)
1067	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1068	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1069.Lrounds_16_xx:
1070	@ ldr	r2,[sp,#1*4]		@ 16
1071	@ ldr	r1,[sp,#14*4]
1072	mov	r0,r2,ror#7
1073	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1074	mov	r12,r1,ror#17
1075	eor	r0,r0,r2,ror#18
1076	eor	r12,r12,r1,ror#19
1077	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1078	ldr	r2,[sp,#0*4]
1079	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1080	ldr	r1,[sp,#9*4]
1081
1082	add	r12,r12,r0
1083	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1084	add	r2,r2,r12
1085	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1086	add	r2,r2,r1			@ X[i]
1087	ldr	r12,[r14],#4			@ *K256++
1088	add	r11,r11,r2			@ h+=X[i]
1089	str	r2,[sp,#0*4]
1090	eor	r2,r9,r10
1091	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1092	and	r2,r2,r8
1093	add	r11,r11,r12			@ h+=K256[i]
1094	eor	r2,r2,r10			@ Ch(e,f,g)
1095	eor	r0,r4,r4,ror#11
1096	add	r11,r11,r2			@ h+=Ch(e,f,g)
1097#if 16==31
1098	and	r12,r12,#0xff
1099	cmp	r12,#0xf2			@ done?
1100#endif
1101#if 16<15
1102# if __ARM_ARCH__>=7
1103	ldr	r2,[r1],#4			@ prefetch
1104# else
1105	ldrb	r2,[r1,#3]
1106# endif
1107	eor	r12,r4,r5			@ a^b, b^c in next round
1108#else
1109	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1110	eor	r12,r4,r5			@ a^b, b^c in next round
1111	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1112#endif
1113	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1114	and	r3,r3,r12			@ (b^c)&=(a^b)
1115	add	r7,r7,r11			@ d+=h
1116	eor	r3,r3,r5			@ Maj(a,b,c)
1117	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1118	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1119	@ ldr	r2,[sp,#2*4]		@ 17
1120	@ ldr	r1,[sp,#15*4]
1121	mov	r0,r2,ror#7
1122	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1123	mov	r3,r1,ror#17
1124	eor	r0,r0,r2,ror#18
1125	eor	r3,r3,r1,ror#19
1126	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1127	ldr	r2,[sp,#1*4]
1128	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1129	ldr	r1,[sp,#10*4]
1130
1131	add	r3,r3,r0
1132	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1133	add	r2,r2,r3
1134	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1135	add	r2,r2,r1			@ X[i]
1136	ldr	r3,[r14],#4			@ *K256++
1137	add	r10,r10,r2			@ h+=X[i]
1138	str	r2,[sp,#1*4]
1139	eor	r2,r8,r9
1140	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1141	and	r2,r2,r7
1142	add	r10,r10,r3			@ h+=K256[i]
1143	eor	r2,r2,r9			@ Ch(e,f,g)
1144	eor	r0,r11,r11,ror#11
1145	add	r10,r10,r2			@ h+=Ch(e,f,g)
1146#if 17==31
1147	and	r3,r3,#0xff
1148	cmp	r3,#0xf2			@ done?
1149#endif
1150#if 17<15
1151# if __ARM_ARCH__>=7
1152	ldr	r2,[r1],#4			@ prefetch
1153# else
1154	ldrb	r2,[r1,#3]
1155# endif
1156	eor	r3,r11,r4			@ a^b, b^c in next round
1157#else
1158	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1159	eor	r3,r11,r4			@ a^b, b^c in next round
1160	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1161#endif
1162	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1163	and	r12,r12,r3			@ (b^c)&=(a^b)
1164	add	r6,r6,r10			@ d+=h
1165	eor	r12,r12,r4			@ Maj(a,b,c)
1166	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1167	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1168	@ ldr	r2,[sp,#3*4]		@ 18
1169	@ ldr	r1,[sp,#0*4]
1170	mov	r0,r2,ror#7
1171	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1172	mov	r12,r1,ror#17
1173	eor	r0,r0,r2,ror#18
1174	eor	r12,r12,r1,ror#19
1175	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1176	ldr	r2,[sp,#2*4]
1177	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1178	ldr	r1,[sp,#11*4]
1179
1180	add	r12,r12,r0
1181	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1182	add	r2,r2,r12
1183	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1184	add	r2,r2,r1			@ X[i]
1185	ldr	r12,[r14],#4			@ *K256++
1186	add	r9,r9,r2			@ h+=X[i]
1187	str	r2,[sp,#2*4]
1188	eor	r2,r7,r8
1189	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1190	and	r2,r2,r6
1191	add	r9,r9,r12			@ h+=K256[i]
1192	eor	r2,r2,r8			@ Ch(e,f,g)
1193	eor	r0,r10,r10,ror#11
1194	add	r9,r9,r2			@ h+=Ch(e,f,g)
1195#if 18==31
1196	and	r12,r12,#0xff
1197	cmp	r12,#0xf2			@ done?
1198#endif
1199#if 18<15
1200# if __ARM_ARCH__>=7
1201	ldr	r2,[r1],#4			@ prefetch
1202# else
1203	ldrb	r2,[r1,#3]
1204# endif
1205	eor	r12,r10,r11			@ a^b, b^c in next round
1206#else
1207	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1208	eor	r12,r10,r11			@ a^b, b^c in next round
1209	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1210#endif
1211	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1212	and	r3,r3,r12			@ (b^c)&=(a^b)
1213	add	r5,r5,r9			@ d+=h
1214	eor	r3,r3,r11			@ Maj(a,b,c)
1215	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1216	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1217	@ ldr	r2,[sp,#4*4]		@ 19
1218	@ ldr	r1,[sp,#1*4]
1219	mov	r0,r2,ror#7
1220	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1221	mov	r3,r1,ror#17
1222	eor	r0,r0,r2,ror#18
1223	eor	r3,r3,r1,ror#19
1224	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1225	ldr	r2,[sp,#3*4]
1226	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1227	ldr	r1,[sp,#12*4]
1228
1229	add	r3,r3,r0
1230	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1231	add	r2,r2,r3
1232	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1233	add	r2,r2,r1			@ X[i]
1234	ldr	r3,[r14],#4			@ *K256++
1235	add	r8,r8,r2			@ h+=X[i]
1236	str	r2,[sp,#3*4]
1237	eor	r2,r6,r7
1238	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1239	and	r2,r2,r5
1240	add	r8,r8,r3			@ h+=K256[i]
1241	eor	r2,r2,r7			@ Ch(e,f,g)
1242	eor	r0,r9,r9,ror#11
1243	add	r8,r8,r2			@ h+=Ch(e,f,g)
1244#if 19==31
1245	and	r3,r3,#0xff
1246	cmp	r3,#0xf2			@ done?
1247#endif
1248#if 19<15
1249# if __ARM_ARCH__>=7
1250	ldr	r2,[r1],#4			@ prefetch
1251# else
1252	ldrb	r2,[r1,#3]
1253# endif
1254	eor	r3,r9,r10			@ a^b, b^c in next round
1255#else
1256	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1257	eor	r3,r9,r10			@ a^b, b^c in next round
1258	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1259#endif
1260	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1261	and	r12,r12,r3			@ (b^c)&=(a^b)
1262	add	r4,r4,r8			@ d+=h
1263	eor	r12,r12,r10			@ Maj(a,b,c)
1264	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1265	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1266	@ ldr	r2,[sp,#5*4]		@ 20
1267	@ ldr	r1,[sp,#2*4]
1268	mov	r0,r2,ror#7
1269	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1270	mov	r12,r1,ror#17
1271	eor	r0,r0,r2,ror#18
1272	eor	r12,r12,r1,ror#19
1273	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1274	ldr	r2,[sp,#4*4]
1275	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1276	ldr	r1,[sp,#13*4]
1277
1278	add	r12,r12,r0
1279	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1280	add	r2,r2,r12
1281	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1282	add	r2,r2,r1			@ X[i]
1283	ldr	r12,[r14],#4			@ *K256++
1284	add	r7,r7,r2			@ h+=X[i]
1285	str	r2,[sp,#4*4]
1286	eor	r2,r5,r6
1287	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1288	and	r2,r2,r4
1289	add	r7,r7,r12			@ h+=K256[i]
1290	eor	r2,r2,r6			@ Ch(e,f,g)
1291	eor	r0,r8,r8,ror#11
1292	add	r7,r7,r2			@ h+=Ch(e,f,g)
1293#if 20==31
1294	and	r12,r12,#0xff
1295	cmp	r12,#0xf2			@ done?
1296#endif
1297#if 20<15
1298# if __ARM_ARCH__>=7
1299	ldr	r2,[r1],#4			@ prefetch
1300# else
1301	ldrb	r2,[r1,#3]
1302# endif
1303	eor	r12,r8,r9			@ a^b, b^c in next round
1304#else
1305	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1306	eor	r12,r8,r9			@ a^b, b^c in next round
1307	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1308#endif
1309	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1310	and	r3,r3,r12			@ (b^c)&=(a^b)
1311	add	r11,r11,r7			@ d+=h
1312	eor	r3,r3,r9			@ Maj(a,b,c)
1313	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1314	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1315	@ ldr	r2,[sp,#6*4]		@ 21
1316	@ ldr	r1,[sp,#3*4]
1317	mov	r0,r2,ror#7
1318	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1319	mov	r3,r1,ror#17
1320	eor	r0,r0,r2,ror#18
1321	eor	r3,r3,r1,ror#19
1322	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1323	ldr	r2,[sp,#5*4]
1324	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1325	ldr	r1,[sp,#14*4]
1326
1327	add	r3,r3,r0
1328	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1329	add	r2,r2,r3
1330	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1331	add	r2,r2,r1			@ X[i]
1332	ldr	r3,[r14],#4			@ *K256++
1333	add	r6,r6,r2			@ h+=X[i]
1334	str	r2,[sp,#5*4]
1335	eor	r2,r4,r5
1336	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1337	and	r2,r2,r11
1338	add	r6,r6,r3			@ h+=K256[i]
1339	eor	r2,r2,r5			@ Ch(e,f,g)
1340	eor	r0,r7,r7,ror#11
1341	add	r6,r6,r2			@ h+=Ch(e,f,g)
1342#if 21==31
1343	and	r3,r3,#0xff
1344	cmp	r3,#0xf2			@ done?
1345#endif
1346#if 21<15
1347# if __ARM_ARCH__>=7
1348	ldr	r2,[r1],#4			@ prefetch
1349# else
1350	ldrb	r2,[r1,#3]
1351# endif
1352	eor	r3,r7,r8			@ a^b, b^c in next round
1353#else
1354	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1355	eor	r3,r7,r8			@ a^b, b^c in next round
1356	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1357#endif
1358	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1359	and	r12,r12,r3			@ (b^c)&=(a^b)
1360	add	r10,r10,r6			@ d+=h
1361	eor	r12,r12,r8			@ Maj(a,b,c)
1362	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1363	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1364	@ ldr	r2,[sp,#7*4]		@ 22
1365	@ ldr	r1,[sp,#4*4]
1366	mov	r0,r2,ror#7
1367	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1368	mov	r12,r1,ror#17
1369	eor	r0,r0,r2,ror#18
1370	eor	r12,r12,r1,ror#19
1371	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1372	ldr	r2,[sp,#6*4]
1373	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1374	ldr	r1,[sp,#15*4]
1375
1376	add	r12,r12,r0
1377	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1378	add	r2,r2,r12
1379	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1380	add	r2,r2,r1			@ X[i]
1381	ldr	r12,[r14],#4			@ *K256++
1382	add	r5,r5,r2			@ h+=X[i]
1383	str	r2,[sp,#6*4]
1384	eor	r2,r11,r4
1385	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1386	and	r2,r2,r10
1387	add	r5,r5,r12			@ h+=K256[i]
1388	eor	r2,r2,r4			@ Ch(e,f,g)
1389	eor	r0,r6,r6,ror#11
1390	add	r5,r5,r2			@ h+=Ch(e,f,g)
1391#if 22==31
1392	and	r12,r12,#0xff
1393	cmp	r12,#0xf2			@ done?
1394#endif
1395#if 22<15
1396# if __ARM_ARCH__>=7
1397	ldr	r2,[r1],#4			@ prefetch
1398# else
1399	ldrb	r2,[r1,#3]
1400# endif
1401	eor	r12,r6,r7			@ a^b, b^c in next round
1402#else
1403	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1404	eor	r12,r6,r7			@ a^b, b^c in next round
1405	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1406#endif
1407	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1408	and	r3,r3,r12			@ (b^c)&=(a^b)
1409	add	r9,r9,r5			@ d+=h
1410	eor	r3,r3,r7			@ Maj(a,b,c)
1411	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1412	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1413	@ ldr	r2,[sp,#8*4]		@ 23
1414	@ ldr	r1,[sp,#5*4]
1415	mov	r0,r2,ror#7
1416	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1417	mov	r3,r1,ror#17
1418	eor	r0,r0,r2,ror#18
1419	eor	r3,r3,r1,ror#19
1420	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1421	ldr	r2,[sp,#7*4]
1422	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1423	ldr	r1,[sp,#0*4]
1424
1425	add	r3,r3,r0
1426	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1427	add	r2,r2,r3
1428	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1429	add	r2,r2,r1			@ X[i]
1430	ldr	r3,[r14],#4			@ *K256++
1431	add	r4,r4,r2			@ h+=X[i]
1432	str	r2,[sp,#7*4]
1433	eor	r2,r10,r11
1434	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1435	and	r2,r2,r9
1436	add	r4,r4,r3			@ h+=K256[i]
1437	eor	r2,r2,r11			@ Ch(e,f,g)
1438	eor	r0,r5,r5,ror#11
1439	add	r4,r4,r2			@ h+=Ch(e,f,g)
1440#if 23==31
1441	and	r3,r3,#0xff
1442	cmp	r3,#0xf2			@ done?
1443#endif
1444#if 23<15
1445# if __ARM_ARCH__>=7
1446	ldr	r2,[r1],#4			@ prefetch
1447# else
1448	ldrb	r2,[r1,#3]
1449# endif
1450	eor	r3,r5,r6			@ a^b, b^c in next round
1451#else
1452	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1453	eor	r3,r5,r6			@ a^b, b^c in next round
1454	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1455#endif
1456	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1457	and	r12,r12,r3			@ (b^c)&=(a^b)
1458	add	r8,r8,r4			@ d+=h
1459	eor	r12,r12,r6			@ Maj(a,b,c)
1460	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1461	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1462	@ ldr	r2,[sp,#9*4]		@ 24
1463	@ ldr	r1,[sp,#6*4]
1464	mov	r0,r2,ror#7
1465	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1466	mov	r12,r1,ror#17
1467	eor	r0,r0,r2,ror#18
1468	eor	r12,r12,r1,ror#19
1469	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1470	ldr	r2,[sp,#8*4]
1471	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1472	ldr	r1,[sp,#1*4]
1473
1474	add	r12,r12,r0
1475	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1476	add	r2,r2,r12
1477	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1478	add	r2,r2,r1			@ X[i]
1479	ldr	r12,[r14],#4			@ *K256++
1480	add	r11,r11,r2			@ h+=X[i]
1481	str	r2,[sp,#8*4]
1482	eor	r2,r9,r10
1483	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1484	and	r2,r2,r8
1485	add	r11,r11,r12			@ h+=K256[i]
1486	eor	r2,r2,r10			@ Ch(e,f,g)
1487	eor	r0,r4,r4,ror#11
1488	add	r11,r11,r2			@ h+=Ch(e,f,g)
1489#if 24==31
1490	and	r12,r12,#0xff
1491	cmp	r12,#0xf2			@ done?
1492#endif
1493#if 24<15
1494# if __ARM_ARCH__>=7
1495	ldr	r2,[r1],#4			@ prefetch
1496# else
1497	ldrb	r2,[r1,#3]
1498# endif
1499	eor	r12,r4,r5			@ a^b, b^c in next round
1500#else
1501	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1502	eor	r12,r4,r5			@ a^b, b^c in next round
1503	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1504#endif
1505	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1506	and	r3,r3,r12			@ (b^c)&=(a^b)
1507	add	r7,r7,r11			@ d+=h
1508	eor	r3,r3,r5			@ Maj(a,b,c)
1509	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1510	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1511	@ ldr	r2,[sp,#10*4]		@ 25
1512	@ ldr	r1,[sp,#7*4]
1513	mov	r0,r2,ror#7
1514	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1515	mov	r3,r1,ror#17
1516	eor	r0,r0,r2,ror#18
1517	eor	r3,r3,r1,ror#19
1518	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1519	ldr	r2,[sp,#9*4]
1520	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1521	ldr	r1,[sp,#2*4]
1522
1523	add	r3,r3,r0
1524	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1525	add	r2,r2,r3
1526	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1527	add	r2,r2,r1			@ X[i]
1528	ldr	r3,[r14],#4			@ *K256++
1529	add	r10,r10,r2			@ h+=X[i]
1530	str	r2,[sp,#9*4]
1531	eor	r2,r8,r9
1532	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1533	and	r2,r2,r7
1534	add	r10,r10,r3			@ h+=K256[i]
1535	eor	r2,r2,r9			@ Ch(e,f,g)
1536	eor	r0,r11,r11,ror#11
1537	add	r10,r10,r2			@ h+=Ch(e,f,g)
1538#if 25==31
1539	and	r3,r3,#0xff
1540	cmp	r3,#0xf2			@ done?
1541#endif
1542#if 25<15
1543# if __ARM_ARCH__>=7
1544	ldr	r2,[r1],#4			@ prefetch
1545# else
1546	ldrb	r2,[r1,#3]
1547# endif
1548	eor	r3,r11,r4			@ a^b, b^c in next round
1549#else
1550	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1551	eor	r3,r11,r4			@ a^b, b^c in next round
1552	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1553#endif
1554	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1555	and	r12,r12,r3			@ (b^c)&=(a^b)
1556	add	r6,r6,r10			@ d+=h
1557	eor	r12,r12,r4			@ Maj(a,b,c)
1558	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1559	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1560	@ ldr	r2,[sp,#11*4]		@ 26
1561	@ ldr	r1,[sp,#8*4]
1562	mov	r0,r2,ror#7
1563	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1564	mov	r12,r1,ror#17
1565	eor	r0,r0,r2,ror#18
1566	eor	r12,r12,r1,ror#19
1567	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1568	ldr	r2,[sp,#10*4]
1569	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1570	ldr	r1,[sp,#3*4]
1571
1572	add	r12,r12,r0
1573	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1574	add	r2,r2,r12
1575	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1576	add	r2,r2,r1			@ X[i]
1577	ldr	r12,[r14],#4			@ *K256++
1578	add	r9,r9,r2			@ h+=X[i]
1579	str	r2,[sp,#10*4]
1580	eor	r2,r7,r8
1581	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1582	and	r2,r2,r6
1583	add	r9,r9,r12			@ h+=K256[i]
1584	eor	r2,r2,r8			@ Ch(e,f,g)
1585	eor	r0,r10,r10,ror#11
1586	add	r9,r9,r2			@ h+=Ch(e,f,g)
1587#if 26==31
1588	and	r12,r12,#0xff
1589	cmp	r12,#0xf2			@ done?
1590#endif
1591#if 26<15
1592# if __ARM_ARCH__>=7
1593	ldr	r2,[r1],#4			@ prefetch
1594# else
1595	ldrb	r2,[r1,#3]
1596# endif
1597	eor	r12,r10,r11			@ a^b, b^c in next round
1598#else
1599	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1600	eor	r12,r10,r11			@ a^b, b^c in next round
1601	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1602#endif
1603	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1604	and	r3,r3,r12			@ (b^c)&=(a^b)
1605	add	r5,r5,r9			@ d+=h
1606	eor	r3,r3,r11			@ Maj(a,b,c)
1607	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1608	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1609	@ ldr	r2,[sp,#12*4]		@ 27
1610	@ ldr	r1,[sp,#9*4]
1611	mov	r0,r2,ror#7
1612	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1613	mov	r3,r1,ror#17
1614	eor	r0,r0,r2,ror#18
1615	eor	r3,r3,r1,ror#19
1616	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1617	ldr	r2,[sp,#11*4]
1618	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1619	ldr	r1,[sp,#4*4]
1620
1621	add	r3,r3,r0
1622	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1623	add	r2,r2,r3
1624	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1625	add	r2,r2,r1			@ X[i]
1626	ldr	r3,[r14],#4			@ *K256++
1627	add	r8,r8,r2			@ h+=X[i]
1628	str	r2,[sp,#11*4]
1629	eor	r2,r6,r7
1630	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1631	and	r2,r2,r5
1632	add	r8,r8,r3			@ h+=K256[i]
1633	eor	r2,r2,r7			@ Ch(e,f,g)
1634	eor	r0,r9,r9,ror#11
1635	add	r8,r8,r2			@ h+=Ch(e,f,g)
1636#if 27==31
1637	and	r3,r3,#0xff
1638	cmp	r3,#0xf2			@ done?
1639#endif
1640#if 27<15
1641# if __ARM_ARCH__>=7
1642	ldr	r2,[r1],#4			@ prefetch
1643# else
1644	ldrb	r2,[r1,#3]
1645# endif
1646	eor	r3,r9,r10			@ a^b, b^c in next round
1647#else
1648	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1649	eor	r3,r9,r10			@ a^b, b^c in next round
1650	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1651#endif
1652	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1653	and	r12,r12,r3			@ (b^c)&=(a^b)
1654	add	r4,r4,r8			@ d+=h
1655	eor	r12,r12,r10			@ Maj(a,b,c)
1656	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1657	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1658	@ ldr	r2,[sp,#13*4]		@ 28
1659	@ ldr	r1,[sp,#10*4]
1660	mov	r0,r2,ror#7
1661	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1662	mov	r12,r1,ror#17
1663	eor	r0,r0,r2,ror#18
1664	eor	r12,r12,r1,ror#19
1665	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1666	ldr	r2,[sp,#12*4]
1667	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1668	ldr	r1,[sp,#5*4]
1669
1670	add	r12,r12,r0
1671	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1672	add	r2,r2,r12
1673	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1674	add	r2,r2,r1			@ X[i]
1675	ldr	r12,[r14],#4			@ *K256++
1676	add	r7,r7,r2			@ h+=X[i]
1677	str	r2,[sp,#12*4]
1678	eor	r2,r5,r6
1679	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1680	and	r2,r2,r4
1681	add	r7,r7,r12			@ h+=K256[i]
1682	eor	r2,r2,r6			@ Ch(e,f,g)
1683	eor	r0,r8,r8,ror#11
1684	add	r7,r7,r2			@ h+=Ch(e,f,g)
1685#if 28==31
1686	and	r12,r12,#0xff
1687	cmp	r12,#0xf2			@ done?
1688#endif
1689#if 28<15
1690# if __ARM_ARCH__>=7
1691	ldr	r2,[r1],#4			@ prefetch
1692# else
1693	ldrb	r2,[r1,#3]
1694# endif
1695	eor	r12,r8,r9			@ a^b, b^c in next round
1696#else
1697	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1698	eor	r12,r8,r9			@ a^b, b^c in next round
1699	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1700#endif
1701	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1702	and	r3,r3,r12			@ (b^c)&=(a^b)
1703	add	r11,r11,r7			@ d+=h
1704	eor	r3,r3,r9			@ Maj(a,b,c)
1705	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1706	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1707	@ ldr	r2,[sp,#14*4]		@ 29
1708	@ ldr	r1,[sp,#11*4]
1709	mov	r0,r2,ror#7
1710	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1711	mov	r3,r1,ror#17
1712	eor	r0,r0,r2,ror#18
1713	eor	r3,r3,r1,ror#19
1714	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1715	ldr	r2,[sp,#13*4]
1716	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1717	ldr	r1,[sp,#6*4]
1718
1719	add	r3,r3,r0
1720	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1721	add	r2,r2,r3
1722	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1723	add	r2,r2,r1			@ X[i]
1724	ldr	r3,[r14],#4			@ *K256++
1725	add	r6,r6,r2			@ h+=X[i]
1726	str	r2,[sp,#13*4]
1727	eor	r2,r4,r5
1728	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1729	and	r2,r2,r11
1730	add	r6,r6,r3			@ h+=K256[i]
1731	eor	r2,r2,r5			@ Ch(e,f,g)
1732	eor	r0,r7,r7,ror#11
1733	add	r6,r6,r2			@ h+=Ch(e,f,g)
1734#if 29==31
1735	and	r3,r3,#0xff
1736	cmp	r3,#0xf2			@ done?
1737#endif
1738#if 29<15
1739# if __ARM_ARCH__>=7
1740	ldr	r2,[r1],#4			@ prefetch
1741# else
1742	ldrb	r2,[r1,#3]
1743# endif
1744	eor	r3,r7,r8			@ a^b, b^c in next round
1745#else
1746	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1747	eor	r3,r7,r8			@ a^b, b^c in next round
1748	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1749#endif
1750	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1751	and	r12,r12,r3			@ (b^c)&=(a^b)
1752	add	r10,r10,r6			@ d+=h
1753	eor	r12,r12,r8			@ Maj(a,b,c)
1754	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1755	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1756	@ ldr	r2,[sp,#15*4]		@ 30
1757	@ ldr	r1,[sp,#12*4]
1758	mov	r0,r2,ror#7
1759	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1760	mov	r12,r1,ror#17
1761	eor	r0,r0,r2,ror#18
1762	eor	r12,r12,r1,ror#19
1763	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1764	ldr	r2,[sp,#14*4]
1765	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1766	ldr	r1,[sp,#7*4]
1767
1768	add	r12,r12,r0
1769	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1770	add	r2,r2,r12
1771	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1772	add	r2,r2,r1			@ X[i]
1773	ldr	r12,[r14],#4			@ *K256++
1774	add	r5,r5,r2			@ h+=X[i]
1775	str	r2,[sp,#14*4]
1776	eor	r2,r11,r4
1777	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1778	and	r2,r2,r10
1779	add	r5,r5,r12			@ h+=K256[i]
1780	eor	r2,r2,r4			@ Ch(e,f,g)
1781	eor	r0,r6,r6,ror#11
1782	add	r5,r5,r2			@ h+=Ch(e,f,g)
1783#if 30==31
1784	and	r12,r12,#0xff
1785	cmp	r12,#0xf2			@ done?
1786#endif
1787#if 30<15
1788# if __ARM_ARCH__>=7
1789	ldr	r2,[r1],#4			@ prefetch
1790# else
1791	ldrb	r2,[r1,#3]
1792# endif
1793	eor	r12,r6,r7			@ a^b, b^c in next round
1794#else
1795	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1796	eor	r12,r6,r7			@ a^b, b^c in next round
1797	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1798#endif
1799	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1800	and	r3,r3,r12			@ (b^c)&=(a^b)
1801	add	r9,r9,r5			@ d+=h
1802	eor	r3,r3,r7			@ Maj(a,b,c)
1803	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1804	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1805	@ ldr	r2,[sp,#0*4]		@ 31
1806	@ ldr	r1,[sp,#13*4]
1807	mov	r0,r2,ror#7
1808	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1809	mov	r3,r1,ror#17
1810	eor	r0,r0,r2,ror#18
1811	eor	r3,r3,r1,ror#19
1812	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1813	ldr	r2,[sp,#15*4]
1814	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1815	ldr	r1,[sp,#8*4]
1816
1817	add	r3,r3,r0
1818	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1819	add	r2,r2,r3
1820	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1821	add	r2,r2,r1			@ X[i]
1822	ldr	r3,[r14],#4			@ *K256++
1823	add	r4,r4,r2			@ h+=X[i]
1824	str	r2,[sp,#15*4]
1825	eor	r2,r10,r11
1826	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1827	and	r2,r2,r9
1828	add	r4,r4,r3			@ h+=K256[i]
1829	eor	r2,r2,r11			@ Ch(e,f,g)
1830	eor	r0,r5,r5,ror#11
1831	add	r4,r4,r2			@ h+=Ch(e,f,g)
1832#if 31==31
1833	and	r3,r3,#0xff
1834	cmp	r3,#0xf2			@ done?
1835#endif
1836#if 31<15
1837# if __ARM_ARCH__>=7
1838	ldr	r2,[r1],#4			@ prefetch
1839# else
1840	ldrb	r2,[r1,#3]
1841# endif
1842	eor	r3,r5,r6			@ a^b, b^c in next round
1843#else
1844	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1845	eor	r3,r5,r6			@ a^b, b^c in next round
1846	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1847#endif
1848	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1849	and	r12,r12,r3			@ (b^c)&=(a^b)
1850	add	r8,r8,r4			@ d+=h
1851	eor	r12,r12,r6			@ Maj(a,b,c)
1852	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1853	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1854#if __ARM_ARCH__>=7
1855	ite	eq			@ Thumb2 thing, sanity check in ARM
1856#endif
1857	ldreq	r3,[sp,#16*4]		@ pull ctx
1858	bne	.Lrounds_16_xx
1859
1860	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1861	ldr	r0,[r3,#0]
1862	ldr	r2,[r3,#4]
1863	ldr	r12,[r3,#8]
1864	add	r4,r4,r0
1865	ldr	r0,[r3,#12]
1866	add	r5,r5,r2
1867	ldr	r2,[r3,#16]
1868	add	r6,r6,r12
1869	ldr	r12,[r3,#20]
1870	add	r7,r7,r0
1871	ldr	r0,[r3,#24]
1872	add	r8,r8,r2
1873	ldr	r2,[r3,#28]
1874	add	r9,r9,r12
1875	ldr	r1,[sp,#17*4]		@ pull inp
1876	ldr	r12,[sp,#18*4]		@ pull inp+len
1877	add	r10,r10,r0
1878	add	r11,r11,r2
1879	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1880	cmp	r1,r12
1881	sub	r14,r14,#256	@ rewind Ktbl
1882	bne	.Loop
1883
1884	add	sp,sp,#19*4	@ destroy frame
1885#if __ARM_ARCH__>=5
1886	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1887#else
1888	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1889	tst	lr,#1
1890	moveq	pc,lr			@ be binary compatible with V4, yet
1891.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1892#endif
1893.size	sha256_block_data_order,.-sha256_block_data_order
1894#if __ARM_MAX_ARCH__>=7
1895.arch	armv7-a
1896.fpu	neon
1897
1898.globl	sha256_block_data_order_neon
1899.hidden	sha256_block_data_order_neon
1900.type	sha256_block_data_order_neon,%function
1901.align	5
1902.skip	16
1903sha256_block_data_order_neon:
1904.LNEON:
1905	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1906
1907	sub	r11,sp,#16*4+16
1908	adr	r14,K256
1909	bic	r11,r11,#15		@ align for 128-bit stores
1910	mov	r12,sp
1911	mov	sp,r11			@ alloca
1912	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1913
1914	vld1.8	{q0},[r1]!
1915	vld1.8	{q1},[r1]!
1916	vld1.8	{q2},[r1]!
1917	vld1.8	{q3},[r1]!
1918	vld1.32	{q8},[r14,:128]!
1919	vld1.32	{q9},[r14,:128]!
1920	vld1.32	{q10},[r14,:128]!
1921	vld1.32	{q11},[r14,:128]!
1922	vrev32.8	q0,q0		@ yes, even on
1923	str	r0,[sp,#64]
1924	vrev32.8	q1,q1		@ big-endian
1925	str	r1,[sp,#68]
1926	mov	r1,sp
1927	vrev32.8	q2,q2
1928	str	r2,[sp,#72]
1929	vrev32.8	q3,q3
1930	str	r12,[sp,#76]		@ save original sp
1931	vadd.i32	q8,q8,q0
1932	vadd.i32	q9,q9,q1
1933	vst1.32	{q8},[r1,:128]!
1934	vadd.i32	q10,q10,q2
1935	vst1.32	{q9},[r1,:128]!
1936	vadd.i32	q11,q11,q3
1937	vst1.32	{q10},[r1,:128]!
1938	vst1.32	{q11},[r1,:128]!
1939
1940	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1941	sub	r1,r1,#64
1942	ldr	r2,[sp,#0]
1943	eor	r12,r12,r12
1944	eor	r3,r5,r6
1945	b	.L_00_48
1946
1947.align	4
1948.L_00_48:
1949	vext.8	q8,q0,q1,#4
1950	add	r11,r11,r2
1951	eor	r2,r9,r10
1952	eor	r0,r8,r8,ror#5
1953	vext.8	q9,q2,q3,#4
1954	add	r4,r4,r12
1955	and	r2,r2,r8
1956	eor	r12,r0,r8,ror#19
1957	vshr.u32	q10,q8,#7
1958	eor	r0,r4,r4,ror#11
1959	eor	r2,r2,r10
1960	vadd.i32	q0,q0,q9
1961	add	r11,r11,r12,ror#6
1962	eor	r12,r4,r5
1963	vshr.u32	q9,q8,#3
1964	eor	r0,r0,r4,ror#20
1965	add	r11,r11,r2
1966	vsli.32	q10,q8,#25
1967	ldr	r2,[sp,#4]
1968	and	r3,r3,r12
1969	vshr.u32	q11,q8,#18
1970	add	r7,r7,r11
1971	add	r11,r11,r0,ror#2
1972	eor	r3,r3,r5
1973	veor	q9,q9,q10
1974	add	r10,r10,r2
1975	vsli.32	q11,q8,#14
1976	eor	r2,r8,r9
1977	eor	r0,r7,r7,ror#5
1978	vshr.u32	d24,d7,#17
1979	add	r11,r11,r3
1980	and	r2,r2,r7
1981	veor	q9,q9,q11
1982	eor	r3,r0,r7,ror#19
1983	eor	r0,r11,r11,ror#11
1984	vsli.32	d24,d7,#15
1985	eor	r2,r2,r9
1986	add	r10,r10,r3,ror#6
1987	vshr.u32	d25,d7,#10
1988	eor	r3,r11,r4
1989	eor	r0,r0,r11,ror#20
1990	vadd.i32	q0,q0,q9
1991	add	r10,r10,r2
1992	ldr	r2,[sp,#8]
1993	veor	d25,d25,d24
1994	and	r12,r12,r3
1995	add	r6,r6,r10
1996	vshr.u32	d24,d7,#19
1997	add	r10,r10,r0,ror#2
1998	eor	r12,r12,r4
1999	vsli.32	d24,d7,#13
2000	add	r9,r9,r2
2001	eor	r2,r7,r8
2002	veor	d25,d25,d24
2003	eor	r0,r6,r6,ror#5
2004	add	r10,r10,r12
2005	vadd.i32	d0,d0,d25
2006	and	r2,r2,r6
2007	eor	r12,r0,r6,ror#19
2008	vshr.u32	d24,d0,#17
2009	eor	r0,r10,r10,ror#11
2010	eor	r2,r2,r8
2011	vsli.32	d24,d0,#15
2012	add	r9,r9,r12,ror#6
2013	eor	r12,r10,r11
2014	vshr.u32	d25,d0,#10
2015	eor	r0,r0,r10,ror#20
2016	add	r9,r9,r2
2017	veor	d25,d25,d24
2018	ldr	r2,[sp,#12]
2019	and	r3,r3,r12
2020	vshr.u32	d24,d0,#19
2021	add	r5,r5,r9
2022	add	r9,r9,r0,ror#2
2023	eor	r3,r3,r11
2024	vld1.32	{q8},[r14,:128]!
2025	add	r8,r8,r2
2026	vsli.32	d24,d0,#13
2027	eor	r2,r6,r7
2028	eor	r0,r5,r5,ror#5
2029	veor	d25,d25,d24
2030	add	r9,r9,r3
2031	and	r2,r2,r5
2032	vadd.i32	d1,d1,d25
2033	eor	r3,r0,r5,ror#19
2034	eor	r0,r9,r9,ror#11
2035	vadd.i32	q8,q8,q0
2036	eor	r2,r2,r7
2037	add	r8,r8,r3,ror#6
2038	eor	r3,r9,r10
2039	eor	r0,r0,r9,ror#20
2040	add	r8,r8,r2
2041	ldr	r2,[sp,#16]
2042	and	r12,r12,r3
2043	add	r4,r4,r8
2044	vst1.32	{q8},[r1,:128]!
2045	add	r8,r8,r0,ror#2
2046	eor	r12,r12,r10
2047	vext.8	q8,q1,q2,#4
2048	add	r7,r7,r2
2049	eor	r2,r5,r6
2050	eor	r0,r4,r4,ror#5
2051	vext.8	q9,q3,q0,#4
2052	add	r8,r8,r12
2053	and	r2,r2,r4
2054	eor	r12,r0,r4,ror#19
2055	vshr.u32	q10,q8,#7
2056	eor	r0,r8,r8,ror#11
2057	eor	r2,r2,r6
2058	vadd.i32	q1,q1,q9
2059	add	r7,r7,r12,ror#6
2060	eor	r12,r8,r9
2061	vshr.u32	q9,q8,#3
2062	eor	r0,r0,r8,ror#20
2063	add	r7,r7,r2
2064	vsli.32	q10,q8,#25
2065	ldr	r2,[sp,#20]
2066	and	r3,r3,r12
2067	vshr.u32	q11,q8,#18
2068	add	r11,r11,r7
2069	add	r7,r7,r0,ror#2
2070	eor	r3,r3,r9
2071	veor	q9,q9,q10
2072	add	r6,r6,r2
2073	vsli.32	q11,q8,#14
2074	eor	r2,r4,r5
2075	eor	r0,r11,r11,ror#5
2076	vshr.u32	d24,d1,#17
2077	add	r7,r7,r3
2078	and	r2,r2,r11
2079	veor	q9,q9,q11
2080	eor	r3,r0,r11,ror#19
2081	eor	r0,r7,r7,ror#11
2082	vsli.32	d24,d1,#15
2083	eor	r2,r2,r5
2084	add	r6,r6,r3,ror#6
2085	vshr.u32	d25,d1,#10
2086	eor	r3,r7,r8
2087	eor	r0,r0,r7,ror#20
2088	vadd.i32	q1,q1,q9
2089	add	r6,r6,r2
2090	ldr	r2,[sp,#24]
2091	veor	d25,d25,d24
2092	and	r12,r12,r3
2093	add	r10,r10,r6
2094	vshr.u32	d24,d1,#19
2095	add	r6,r6,r0,ror#2
2096	eor	r12,r12,r8
2097	vsli.32	d24,d1,#13
2098	add	r5,r5,r2
2099	eor	r2,r11,r4
2100	veor	d25,d25,d24
2101	eor	r0,r10,r10,ror#5
2102	add	r6,r6,r12
2103	vadd.i32	d2,d2,d25
2104	and	r2,r2,r10
2105	eor	r12,r0,r10,ror#19
2106	vshr.u32	d24,d2,#17
2107	eor	r0,r6,r6,ror#11
2108	eor	r2,r2,r4
2109	vsli.32	d24,d2,#15
2110	add	r5,r5,r12,ror#6
2111	eor	r12,r6,r7
2112	vshr.u32	d25,d2,#10
2113	eor	r0,r0,r6,ror#20
2114	add	r5,r5,r2
2115	veor	d25,d25,d24
2116	ldr	r2,[sp,#28]
2117	and	r3,r3,r12
2118	vshr.u32	d24,d2,#19
2119	add	r9,r9,r5
2120	add	r5,r5,r0,ror#2
2121	eor	r3,r3,r7
2122	vld1.32	{q8},[r14,:128]!
2123	add	r4,r4,r2
2124	vsli.32	d24,d2,#13
2125	eor	r2,r10,r11
2126	eor	r0,r9,r9,ror#5
2127	veor	d25,d25,d24
2128	add	r5,r5,r3
2129	and	r2,r2,r9
2130	vadd.i32	d3,d3,d25
2131	eor	r3,r0,r9,ror#19
2132	eor	r0,r5,r5,ror#11
2133	vadd.i32	q8,q8,q1
2134	eor	r2,r2,r11
2135	add	r4,r4,r3,ror#6
2136	eor	r3,r5,r6
2137	eor	r0,r0,r5,ror#20
2138	add	r4,r4,r2
2139	ldr	r2,[sp,#32]
2140	and	r12,r12,r3
2141	add	r8,r8,r4
2142	vst1.32	{q8},[r1,:128]!
2143	add	r4,r4,r0,ror#2
2144	eor	r12,r12,r6
2145	vext.8	q8,q2,q3,#4
2146	add	r11,r11,r2
2147	eor	r2,r9,r10
2148	eor	r0,r8,r8,ror#5
2149	vext.8	q9,q0,q1,#4
2150	add	r4,r4,r12
2151	and	r2,r2,r8
2152	eor	r12,r0,r8,ror#19
2153	vshr.u32	q10,q8,#7
2154	eor	r0,r4,r4,ror#11
2155	eor	r2,r2,r10
2156	vadd.i32	q2,q2,q9
2157	add	r11,r11,r12,ror#6
2158	eor	r12,r4,r5
2159	vshr.u32	q9,q8,#3
2160	eor	r0,r0,r4,ror#20
2161	add	r11,r11,r2
2162	vsli.32	q10,q8,#25
2163	ldr	r2,[sp,#36]
2164	and	r3,r3,r12
2165	vshr.u32	q11,q8,#18
2166	add	r7,r7,r11
2167	add	r11,r11,r0,ror#2
2168	eor	r3,r3,r5
2169	veor	q9,q9,q10
2170	add	r10,r10,r2
2171	vsli.32	q11,q8,#14
2172	eor	r2,r8,r9
2173	eor	r0,r7,r7,ror#5
2174	vshr.u32	d24,d3,#17
2175	add	r11,r11,r3
2176	and	r2,r2,r7
2177	veor	q9,q9,q11
2178	eor	r3,r0,r7,ror#19
2179	eor	r0,r11,r11,ror#11
2180	vsli.32	d24,d3,#15
2181	eor	r2,r2,r9
2182	add	r10,r10,r3,ror#6
2183	vshr.u32	d25,d3,#10
2184	eor	r3,r11,r4
2185	eor	r0,r0,r11,ror#20
2186	vadd.i32	q2,q2,q9
2187	add	r10,r10,r2
2188	ldr	r2,[sp,#40]
2189	veor	d25,d25,d24
2190	and	r12,r12,r3
2191	add	r6,r6,r10
2192	vshr.u32	d24,d3,#19
2193	add	r10,r10,r0,ror#2
2194	eor	r12,r12,r4
2195	vsli.32	d24,d3,#13
2196	add	r9,r9,r2
2197	eor	r2,r7,r8
2198	veor	d25,d25,d24
2199	eor	r0,r6,r6,ror#5
2200	add	r10,r10,r12
2201	vadd.i32	d4,d4,d25
2202	and	r2,r2,r6
2203	eor	r12,r0,r6,ror#19
2204	vshr.u32	d24,d4,#17
2205	eor	r0,r10,r10,ror#11
2206	eor	r2,r2,r8
2207	vsli.32	d24,d4,#15
2208	add	r9,r9,r12,ror#6
2209	eor	r12,r10,r11
2210	vshr.u32	d25,d4,#10
2211	eor	r0,r0,r10,ror#20
2212	add	r9,r9,r2
2213	veor	d25,d25,d24
2214	ldr	r2,[sp,#44]
2215	and	r3,r3,r12
2216	vshr.u32	d24,d4,#19
2217	add	r5,r5,r9
2218	add	r9,r9,r0,ror#2
2219	eor	r3,r3,r11
2220	vld1.32	{q8},[r14,:128]!
2221	add	r8,r8,r2
2222	vsli.32	d24,d4,#13
2223	eor	r2,r6,r7
2224	eor	r0,r5,r5,ror#5
2225	veor	d25,d25,d24
2226	add	r9,r9,r3
2227	and	r2,r2,r5
2228	vadd.i32	d5,d5,d25
2229	eor	r3,r0,r5,ror#19
2230	eor	r0,r9,r9,ror#11
2231	vadd.i32	q8,q8,q2
2232	eor	r2,r2,r7
2233	add	r8,r8,r3,ror#6
2234	eor	r3,r9,r10
2235	eor	r0,r0,r9,ror#20
2236	add	r8,r8,r2
2237	ldr	r2,[sp,#48]
2238	and	r12,r12,r3
2239	add	r4,r4,r8
2240	vst1.32	{q8},[r1,:128]!
2241	add	r8,r8,r0,ror#2
2242	eor	r12,r12,r10
2243	vext.8	q8,q3,q0,#4
2244	add	r7,r7,r2
2245	eor	r2,r5,r6
2246	eor	r0,r4,r4,ror#5
2247	vext.8	q9,q1,q2,#4
2248	add	r8,r8,r12
2249	and	r2,r2,r4
2250	eor	r12,r0,r4,ror#19
2251	vshr.u32	q10,q8,#7
2252	eor	r0,r8,r8,ror#11
2253	eor	r2,r2,r6
2254	vadd.i32	q3,q3,q9
2255	add	r7,r7,r12,ror#6
2256	eor	r12,r8,r9
2257	vshr.u32	q9,q8,#3
2258	eor	r0,r0,r8,ror#20
2259	add	r7,r7,r2
2260	vsli.32	q10,q8,#25
2261	ldr	r2,[sp,#52]
2262	and	r3,r3,r12
2263	vshr.u32	q11,q8,#18
2264	add	r11,r11,r7
2265	add	r7,r7,r0,ror#2
2266	eor	r3,r3,r9
2267	veor	q9,q9,q10
2268	add	r6,r6,r2
2269	vsli.32	q11,q8,#14
2270	eor	r2,r4,r5
2271	eor	r0,r11,r11,ror#5
2272	vshr.u32	d24,d5,#17
2273	add	r7,r7,r3
2274	and	r2,r2,r11
2275	veor	q9,q9,q11
2276	eor	r3,r0,r11,ror#19
2277	eor	r0,r7,r7,ror#11
2278	vsli.32	d24,d5,#15
2279	eor	r2,r2,r5
2280	add	r6,r6,r3,ror#6
2281	vshr.u32	d25,d5,#10
2282	eor	r3,r7,r8
2283	eor	r0,r0,r7,ror#20
2284	vadd.i32	q3,q3,q9
2285	add	r6,r6,r2
2286	ldr	r2,[sp,#56]
2287	veor	d25,d25,d24
2288	and	r12,r12,r3
2289	add	r10,r10,r6
2290	vshr.u32	d24,d5,#19
2291	add	r6,r6,r0,ror#2
2292	eor	r12,r12,r8
2293	vsli.32	d24,d5,#13
2294	add	r5,r5,r2
2295	eor	r2,r11,r4
2296	veor	d25,d25,d24
2297	eor	r0,r10,r10,ror#5
2298	add	r6,r6,r12
2299	vadd.i32	d6,d6,d25
2300	and	r2,r2,r10
2301	eor	r12,r0,r10,ror#19
2302	vshr.u32	d24,d6,#17
2303	eor	r0,r6,r6,ror#11
2304	eor	r2,r2,r4
2305	vsli.32	d24,d6,#15
2306	add	r5,r5,r12,ror#6
2307	eor	r12,r6,r7
2308	vshr.u32	d25,d6,#10
2309	eor	r0,r0,r6,ror#20
2310	add	r5,r5,r2
2311	veor	d25,d25,d24
2312	ldr	r2,[sp,#60]
2313	and	r3,r3,r12
2314	vshr.u32	d24,d6,#19
2315	add	r9,r9,r5
2316	add	r5,r5,r0,ror#2
2317	eor	r3,r3,r7
2318	vld1.32	{q8},[r14,:128]!
2319	add	r4,r4,r2
2320	vsli.32	d24,d6,#13
2321	eor	r2,r10,r11
2322	eor	r0,r9,r9,ror#5
2323	veor	d25,d25,d24
2324	add	r5,r5,r3
2325	and	r2,r2,r9
2326	vadd.i32	d7,d7,d25
2327	eor	r3,r0,r9,ror#19
2328	eor	r0,r5,r5,ror#11
2329	vadd.i32	q8,q8,q3
2330	eor	r2,r2,r11
2331	add	r4,r4,r3,ror#6
2332	eor	r3,r5,r6
2333	eor	r0,r0,r5,ror#20
2334	add	r4,r4,r2
2335	ldr	r2,[r14]
2336	and	r12,r12,r3
2337	add	r8,r8,r4
2338	vst1.32	{q8},[r1,:128]!
2339	add	r4,r4,r0,ror#2
2340	eor	r12,r12,r6
2341	teq	r2,#0				@ check for K256 terminator
2342	ldr	r2,[sp,#0]
2343	sub	r1,r1,#64
2344	bne	.L_00_48
2345
2346	ldr	r1,[sp,#68]
2347	ldr	r0,[sp,#72]
2348	sub	r14,r14,#256	@ rewind r14
2349	teq	r1,r0
2350	it	eq
2351	subeq	r1,r1,#64		@ avoid SEGV
2352	vld1.8	{q0},[r1]!		@ load next input block
2353	vld1.8	{q1},[r1]!
2354	vld1.8	{q2},[r1]!
2355	vld1.8	{q3},[r1]!
2356	it	ne
2357	strne	r1,[sp,#68]
2358	mov	r1,sp
2359	add	r11,r11,r2
2360	eor	r2,r9,r10
2361	eor	r0,r8,r8,ror#5
2362	add	r4,r4,r12
2363	vld1.32	{q8},[r14,:128]!
2364	and	r2,r2,r8
2365	eor	r12,r0,r8,ror#19
2366	eor	r0,r4,r4,ror#11
2367	eor	r2,r2,r10
2368	vrev32.8	q0,q0
2369	add	r11,r11,r12,ror#6
2370	eor	r12,r4,r5
2371	eor	r0,r0,r4,ror#20
2372	add	r11,r11,r2
2373	vadd.i32	q8,q8,q0
2374	ldr	r2,[sp,#4]
2375	and	r3,r3,r12
2376	add	r7,r7,r11
2377	add	r11,r11,r0,ror#2
2378	eor	r3,r3,r5
2379	add	r10,r10,r2
2380	eor	r2,r8,r9
2381	eor	r0,r7,r7,ror#5
2382	add	r11,r11,r3
2383	and	r2,r2,r7
2384	eor	r3,r0,r7,ror#19
2385	eor	r0,r11,r11,ror#11
2386	eor	r2,r2,r9
2387	add	r10,r10,r3,ror#6
2388	eor	r3,r11,r4
2389	eor	r0,r0,r11,ror#20
2390	add	r10,r10,r2
2391	ldr	r2,[sp,#8]
2392	and	r12,r12,r3
2393	add	r6,r6,r10
2394	add	r10,r10,r0,ror#2
2395	eor	r12,r12,r4
2396	add	r9,r9,r2
2397	eor	r2,r7,r8
2398	eor	r0,r6,r6,ror#5
2399	add	r10,r10,r12
2400	and	r2,r2,r6
2401	eor	r12,r0,r6,ror#19
2402	eor	r0,r10,r10,ror#11
2403	eor	r2,r2,r8
2404	add	r9,r9,r12,ror#6
2405	eor	r12,r10,r11
2406	eor	r0,r0,r10,ror#20
2407	add	r9,r9,r2
2408	ldr	r2,[sp,#12]
2409	and	r3,r3,r12
2410	add	r5,r5,r9
2411	add	r9,r9,r0,ror#2
2412	eor	r3,r3,r11
2413	add	r8,r8,r2
2414	eor	r2,r6,r7
2415	eor	r0,r5,r5,ror#5
2416	add	r9,r9,r3
2417	and	r2,r2,r5
2418	eor	r3,r0,r5,ror#19
2419	eor	r0,r9,r9,ror#11
2420	eor	r2,r2,r7
2421	add	r8,r8,r3,ror#6
2422	eor	r3,r9,r10
2423	eor	r0,r0,r9,ror#20
2424	add	r8,r8,r2
2425	ldr	r2,[sp,#16]
2426	and	r12,r12,r3
2427	add	r4,r4,r8
2428	add	r8,r8,r0,ror#2
2429	eor	r12,r12,r10
2430	vst1.32	{q8},[r1,:128]!
2431	add	r7,r7,r2
2432	eor	r2,r5,r6
2433	eor	r0,r4,r4,ror#5
2434	add	r8,r8,r12
2435	vld1.32	{q8},[r14,:128]!
2436	and	r2,r2,r4
2437	eor	r12,r0,r4,ror#19
2438	eor	r0,r8,r8,ror#11
2439	eor	r2,r2,r6
2440	vrev32.8	q1,q1
2441	add	r7,r7,r12,ror#6
2442	eor	r12,r8,r9
2443	eor	r0,r0,r8,ror#20
2444	add	r7,r7,r2
2445	vadd.i32	q8,q8,q1
2446	ldr	r2,[sp,#20]
2447	and	r3,r3,r12
2448	add	r11,r11,r7
2449	add	r7,r7,r0,ror#2
2450	eor	r3,r3,r9
2451	add	r6,r6,r2
2452	eor	r2,r4,r5
2453	eor	r0,r11,r11,ror#5
2454	add	r7,r7,r3
2455	and	r2,r2,r11
2456	eor	r3,r0,r11,ror#19
2457	eor	r0,r7,r7,ror#11
2458	eor	r2,r2,r5
2459	add	r6,r6,r3,ror#6
2460	eor	r3,r7,r8
2461	eor	r0,r0,r7,ror#20
2462	add	r6,r6,r2
2463	ldr	r2,[sp,#24]
2464	and	r12,r12,r3
2465	add	r10,r10,r6
2466	add	r6,r6,r0,ror#2
2467	eor	r12,r12,r8
2468	add	r5,r5,r2
2469	eor	r2,r11,r4
2470	eor	r0,r10,r10,ror#5
2471	add	r6,r6,r12
2472	and	r2,r2,r10
2473	eor	r12,r0,r10,ror#19
2474	eor	r0,r6,r6,ror#11
2475	eor	r2,r2,r4
2476	add	r5,r5,r12,ror#6
2477	eor	r12,r6,r7
2478	eor	r0,r0,r6,ror#20
2479	add	r5,r5,r2
2480	ldr	r2,[sp,#28]
2481	and	r3,r3,r12
2482	add	r9,r9,r5
2483	add	r5,r5,r0,ror#2
2484	eor	r3,r3,r7
2485	add	r4,r4,r2
2486	eor	r2,r10,r11
2487	eor	r0,r9,r9,ror#5
2488	add	r5,r5,r3
2489	and	r2,r2,r9
2490	eor	r3,r0,r9,ror#19
2491	eor	r0,r5,r5,ror#11
2492	eor	r2,r2,r11
2493	add	r4,r4,r3,ror#6
2494	eor	r3,r5,r6
2495	eor	r0,r0,r5,ror#20
2496	add	r4,r4,r2
2497	ldr	r2,[sp,#32]
2498	and	r12,r12,r3
2499	add	r8,r8,r4
2500	add	r4,r4,r0,ror#2
2501	eor	r12,r12,r6
2502	vst1.32	{q8},[r1,:128]!
2503	add	r11,r11,r2
2504	eor	r2,r9,r10
2505	eor	r0,r8,r8,ror#5
2506	add	r4,r4,r12
2507	vld1.32	{q8},[r14,:128]!
2508	and	r2,r2,r8
2509	eor	r12,r0,r8,ror#19
2510	eor	r0,r4,r4,ror#11
2511	eor	r2,r2,r10
2512	vrev32.8	q2,q2
2513	add	r11,r11,r12,ror#6
2514	eor	r12,r4,r5
2515	eor	r0,r0,r4,ror#20
2516	add	r11,r11,r2
2517	vadd.i32	q8,q8,q2
2518	ldr	r2,[sp,#36]
2519	and	r3,r3,r12
2520	add	r7,r7,r11
2521	add	r11,r11,r0,ror#2
2522	eor	r3,r3,r5
2523	add	r10,r10,r2
2524	eor	r2,r8,r9
2525	eor	r0,r7,r7,ror#5
2526	add	r11,r11,r3
2527	and	r2,r2,r7
2528	eor	r3,r0,r7,ror#19
2529	eor	r0,r11,r11,ror#11
2530	eor	r2,r2,r9
2531	add	r10,r10,r3,ror#6
2532	eor	r3,r11,r4
2533	eor	r0,r0,r11,ror#20
2534	add	r10,r10,r2
2535	ldr	r2,[sp,#40]
2536	and	r12,r12,r3
2537	add	r6,r6,r10
2538	add	r10,r10,r0,ror#2
2539	eor	r12,r12,r4
2540	add	r9,r9,r2
2541	eor	r2,r7,r8
2542	eor	r0,r6,r6,ror#5
2543	add	r10,r10,r12
2544	and	r2,r2,r6
2545	eor	r12,r0,r6,ror#19
2546	eor	r0,r10,r10,ror#11
2547	eor	r2,r2,r8
2548	add	r9,r9,r12,ror#6
2549	eor	r12,r10,r11
2550	eor	r0,r0,r10,ror#20
2551	add	r9,r9,r2
2552	ldr	r2,[sp,#44]
2553	and	r3,r3,r12
2554	add	r5,r5,r9
2555	add	r9,r9,r0,ror#2
2556	eor	r3,r3,r11
2557	add	r8,r8,r2
2558	eor	r2,r6,r7
2559	eor	r0,r5,r5,ror#5
2560	add	r9,r9,r3
2561	and	r2,r2,r5
2562	eor	r3,r0,r5,ror#19
2563	eor	r0,r9,r9,ror#11
2564	eor	r2,r2,r7
2565	add	r8,r8,r3,ror#6
2566	eor	r3,r9,r10
2567	eor	r0,r0,r9,ror#20
2568	add	r8,r8,r2
2569	ldr	r2,[sp,#48]
2570	and	r12,r12,r3
2571	add	r4,r4,r8
2572	add	r8,r8,r0,ror#2
2573	eor	r12,r12,r10
2574	vst1.32	{q8},[r1,:128]!
2575	add	r7,r7,r2
2576	eor	r2,r5,r6
2577	eor	r0,r4,r4,ror#5
2578	add	r8,r8,r12
2579	vld1.32	{q8},[r14,:128]!
2580	and	r2,r2,r4
2581	eor	r12,r0,r4,ror#19
2582	eor	r0,r8,r8,ror#11
2583	eor	r2,r2,r6
2584	vrev32.8	q3,q3
2585	add	r7,r7,r12,ror#6
2586	eor	r12,r8,r9
2587	eor	r0,r0,r8,ror#20
2588	add	r7,r7,r2
2589	vadd.i32	q8,q8,q3
2590	ldr	r2,[sp,#52]
2591	and	r3,r3,r12
2592	add	r11,r11,r7
2593	add	r7,r7,r0,ror#2
2594	eor	r3,r3,r9
2595	add	r6,r6,r2
2596	eor	r2,r4,r5
2597	eor	r0,r11,r11,ror#5
2598	add	r7,r7,r3
2599	and	r2,r2,r11
2600	eor	r3,r0,r11,ror#19
2601	eor	r0,r7,r7,ror#11
2602	eor	r2,r2,r5
2603	add	r6,r6,r3,ror#6
2604	eor	r3,r7,r8
2605	eor	r0,r0,r7,ror#20
2606	add	r6,r6,r2
2607	ldr	r2,[sp,#56]
2608	and	r12,r12,r3
2609	add	r10,r10,r6
2610	add	r6,r6,r0,ror#2
2611	eor	r12,r12,r8
2612	add	r5,r5,r2
2613	eor	r2,r11,r4
2614	eor	r0,r10,r10,ror#5
2615	add	r6,r6,r12
2616	and	r2,r2,r10
2617	eor	r12,r0,r10,ror#19
2618	eor	r0,r6,r6,ror#11
2619	eor	r2,r2,r4
2620	add	r5,r5,r12,ror#6
2621	eor	r12,r6,r7
2622	eor	r0,r0,r6,ror#20
2623	add	r5,r5,r2
2624	ldr	r2,[sp,#60]
2625	and	r3,r3,r12
2626	add	r9,r9,r5
2627	add	r5,r5,r0,ror#2
2628	eor	r3,r3,r7
2629	add	r4,r4,r2
2630	eor	r2,r10,r11
2631	eor	r0,r9,r9,ror#5
2632	add	r5,r5,r3
2633	and	r2,r2,r9
2634	eor	r3,r0,r9,ror#19
2635	eor	r0,r5,r5,ror#11
2636	eor	r2,r2,r11
2637	add	r4,r4,r3,ror#6
2638	eor	r3,r5,r6
2639	eor	r0,r0,r5,ror#20
2640	add	r4,r4,r2
2641	ldr	r2,[sp,#64]
2642	and	r12,r12,r3
2643	add	r8,r8,r4
2644	add	r4,r4,r0,ror#2
2645	eor	r12,r12,r6
2646	vst1.32	{q8},[r1,:128]!
2647	ldr	r0,[r2,#0]
2648	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2649	ldr	r12,[r2,#4]
2650	ldr	r3,[r2,#8]
2651	ldr	r1,[r2,#12]
2652	add	r4,r4,r0			@ accumulate
2653	ldr	r0,[r2,#16]
2654	add	r5,r5,r12
2655	ldr	r12,[r2,#20]
2656	add	r6,r6,r3
2657	ldr	r3,[r2,#24]
2658	add	r7,r7,r1
2659	ldr	r1,[r2,#28]
2660	add	r8,r8,r0
2661	str	r4,[r2],#4
2662	add	r9,r9,r12
2663	str	r5,[r2],#4
2664	add	r10,r10,r3
2665	str	r6,[r2],#4
2666	add	r11,r11,r1
2667	str	r7,[r2],#4
2668	stmia	r2,{r8,r9,r10,r11}
2669
2670	ittte	ne
2671	movne	r1,sp
2672	ldrne	r2,[sp,#0]
2673	eorne	r12,r12,r12
2674	ldreq	sp,[sp,#76]			@ restore original sp
2675	itt	ne
2676	eorne	r3,r5,r6
2677	bne	.L_00_48
2678
2679	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2680.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2681#endif
2682#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2683
2684# if defined(__thumb2__)
2685#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2686# else
2687#  define INST(a,b,c,d)	.byte	a,b,c,d
2688# endif
2689
2690.type	sha256_block_data_order_armv8,%function
2691.align	5
2692sha256_block_data_order_armv8:
2693.LARMv8:
2694	vld1.32	{q0,q1},[r0]
2695	sub	r3,r3,#256+32
2696	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2697	b	.Loop_v8
2698
2699.align	4
2700.Loop_v8:
2701	vld1.8	{q8,q9},[r1]!
2702	vld1.8	{q10,q11},[r1]!
2703	vld1.32	{q12},[r3]!
2704	vrev32.8	q8,q8
2705	vrev32.8	q9,q9
2706	vrev32.8	q10,q10
2707	vrev32.8	q11,q11
2708	vmov	q14,q0	@ offload
2709	vmov	q15,q1
2710	teq	r1,r2
2711	vld1.32	{q13},[r3]!
2712	vadd.i32	q12,q12,q8
2713	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2714	vmov	q2,q0
2715	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2716	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2717	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2718	vld1.32	{q12},[r3]!
2719	vadd.i32	q13,q13,q9
2720	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2721	vmov	q2,q0
2722	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2723	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2724	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2725	vld1.32	{q13},[r3]!
2726	vadd.i32	q12,q12,q10
2727	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2728	vmov	q2,q0
2729	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2730	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2731	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2732	vld1.32	{q12},[r3]!
2733	vadd.i32	q13,q13,q11
2734	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2735	vmov	q2,q0
2736	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2737	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2738	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2739	vld1.32	{q13},[r3]!
2740	vadd.i32	q12,q12,q8
2741	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2742	vmov	q2,q0
2743	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2744	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2745	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2746	vld1.32	{q12},[r3]!
2747	vadd.i32	q13,q13,q9
2748	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2749	vmov	q2,q0
2750	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2751	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2752	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2753	vld1.32	{q13},[r3]!
2754	vadd.i32	q12,q12,q10
2755	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2756	vmov	q2,q0
2757	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2758	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2759	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2760	vld1.32	{q12},[r3]!
2761	vadd.i32	q13,q13,q11
2762	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2763	vmov	q2,q0
2764	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2765	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2766	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2767	vld1.32	{q13},[r3]!
2768	vadd.i32	q12,q12,q8
2769	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2770	vmov	q2,q0
2771	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2772	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2773	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2774	vld1.32	{q12},[r3]!
2775	vadd.i32	q13,q13,q9
2776	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2777	vmov	q2,q0
2778	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2779	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2780	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2781	vld1.32	{q13},[r3]!
2782	vadd.i32	q12,q12,q10
2783	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2784	vmov	q2,q0
2785	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2786	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2787	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2788	vld1.32	{q12},[r3]!
2789	vadd.i32	q13,q13,q11
2790	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2791	vmov	q2,q0
2792	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2793	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2794	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2795	vld1.32	{q13},[r3]!
2796	vadd.i32	q12,q12,q8
2797	vmov	q2,q0
2798	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2799	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2800
2801	vld1.32	{q12},[r3]!
2802	vadd.i32	q13,q13,q9
2803	vmov	q2,q0
2804	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2805	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2806
2807	vld1.32	{q13},[r3]
2808	vadd.i32	q12,q12,q10
2809	sub	r3,r3,#256-16	@ rewind
2810	vmov	q2,q0
2811	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2812	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2813
2814	vadd.i32	q13,q13,q11
2815	vmov	q2,q0
2816	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2817	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2818
2819	vadd.i32	q0,q0,q14
2820	vadd.i32	q1,q1,q15
2821	it	ne
2822	bne	.Loop_v8
2823
2824	vst1.32	{q0,q1},[r0]
2825
2826	bx	lr		@ bx lr
2827.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2828#endif
2829.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2830.align	2
2831.align	2
2832#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2833.comm	OPENSSL_armcap_P,4,4
2834.hidden	OPENSSL_armcap_P
2835#endif
2836#endif
2837#endif  // !OPENSSL_NO_ASM
2838